Skip to content

Commit

Permalink
fix: pod not-ready causes webhook call failure
Browse files Browse the repository at this point in the history
Signed-off-by: tao.yang <[email protected]>
  • Loading branch information
ty-dc committed Nov 11, 2024
1 parent 364fb54 commit 5e2cb0d
Showing 1 changed file with 73 additions and 64 deletions.
137 changes: 73 additions & 64 deletions test/e2e/reclaim/reclaim_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,11 +623,11 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() {
})

Context("choose to release conflicted ip of stateless workload with node not ready", Serial, func() {
ctx := context.TODO()
var workerNodeName string
const SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_NOT_READY_NODE_ENABLED = "SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_NOT_READY_NODE_ENABLED"

It("stateless workload IP could be released with node not ready", Label("G00009"), func() {
ctx := context.TODO()

BeforeEach(func() {
// 0. change the spiderpool-controller env
trueStr := strconv.FormatBool(true)
deployment, err := frame.GetDeployment(constant.SpiderpoolController, "kube-system")
Expand Down Expand Up @@ -661,11 +661,11 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() {
deployment.Spec.Strategy.RollingUpdate.MaxSurge = &zeroMaxSurge
}

// When maxSurge is 0, spiderpool-controller Pods will be restarted one by one
err = frame.KClient.Patch(ctx, deployment, client.MergeFrom(oldDeploy))
Expect(err).NotTo(HaveOccurred())

// 1. get worker node name
var workerNodeName string
// Get the name of the work-node for restarting kubelet
nodeList, err := frame.GetNodeList()
Expect(err).NotTo(HaveOccurred())
for _, tmpNode := range nodeList.Items {
Expand All @@ -677,46 +677,11 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() {
}
Expect(workerNodeName).NotTo(Equal(""))

// 2.create a pod
podYaml := common.GenerateExamplePodYaml(podName, namespace)
podIppoolAnnoStr := common.GeneratePodIPPoolAnnotations(frame, common.NIC1, globalDefaultV4IPPoolList, globalDefaultV6IPPoolList)
podYaml.Annotations = map[string]string{constant.AnnoPodIPPool: podIppoolAnnoStr}
podYaml.Spec.NodeName = workerNodeName
Expect(podYaml).NotTo(BeNil())
GinkgoWriter.Printf("try to create Pod %v/%v \n", namespace, podName)
err = frame.CreatePod(podYaml)
Expect(err).NotTo(HaveOccurred(), "failed to create Pod %v/%v \n", namespace, podName)
ctxWithTimeout, _ := context.WithTimeout(ctx, time.Minute*3)
podYaml, err = frame.WaitPodStarted(podName, namespace, ctxWithTimeout)
Expect(err).NotTo(HaveOccurred())

// 3. record the pod IPs
var podV4IP, podV6IP string
var spiderEndpoint spiderpool.SpiderEndpoint
err = frame.KClient.Get(ctx, types.NamespacedName{
Namespace: podYaml.Namespace,
Name: podYaml.Name,
}, &spiderEndpoint)
Expect(err).NotTo(HaveOccurred())
Expect(spiderEndpoint.Status.Current.IPs).To(HaveLen(1))
if frame.Info.IpV4Enabled {
Expect(spiderEndpoint.Status.Current.IPs[0].IPv4).NotTo(BeNil())
podV4IP = strings.Split(*spiderEndpoint.Status.Current.IPs[0].IPv4, "/")[0]
}
if frame.Info.IpV6Enabled {
Expect(spiderEndpoint.Status.Current.IPs[0].IPv6).NotTo(BeNil())
podV6IP = strings.Split(*spiderEndpoint.Status.Current.IPs[0].IPv6, "/")[0]
}
GinkgoWriter.Printf("Pod '%s/%s' has IP '%v'", podYaml.Namespace, podYaml.Name, podYaml.Status.PodIPs)

// 4. set "spider-worker" kubelet down
commandStr := "systemctl stop kubelet"
output, err := frame.DockerExecCommand(ctx, workerNodeName, commandStr)
Expect(err).NotTo(HaveOccurred(), "Failed exec '%s' in docker container '%s', error is: %v,log: %v.", commandStr, workerNodeName, err, string(output))

// In the current case, kubelet is stopped and all spiderpool-controllers are restarted.
// After the use case is completed, check whether the environment returns to normal.
DeferCleanup(func() {
commandStr = "systemctl start kubelet"
output, err = frame.DockerExecCommand(ctx, workerNodeName, commandStr)
commandStr := "systemctl start kubelet"
output, err := frame.DockerExecCommand(ctx, workerNodeName, commandStr)
Expect(err).NotTo(HaveOccurred(), "Failed exec '%s' in docker container '%s', error is: %v,log: %v.", commandStr, workerNodeName, err, string(output))
Eventually(func() error {
checkCommandStr := "systemctl is-active kubelet"
Expand All @@ -736,12 +701,13 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() {
var deletePodList *corev1.PodList
needDelete := false
for _, spiderpoolControllerPod := range podList.Items {
if spiderpoolControllerPod.Spec.NodeName == workerNodeName && !podutils.IsPodReady(&spiderpoolControllerPod) && spiderpoolControllerPod.DeletionTimestamp != nil {
if !podutils.IsPodReady(&spiderpoolControllerPod) && spiderpoolControllerPod.DeletionTimestamp != nil {
needDelete = true
deletePodList = &corev1.PodList{Items: []corev1.Pod{spiderpoolControllerPod}}
}
}
if needDelete {
GinkgoWriter.Printf("delete spiderpoolcontroller pod %v/%v\n", deletePodList.Items[0].Namespace, deletePodList.Items[0].Name)
Expect(frame.DeletePodList(deletePodList)).NotTo(HaveOccurred())
Eventually(func() error {
newPodList, err := frame.GetPodListByLabel(map[string]string{"app.kubernetes.io/component": constant.SpiderpoolController})
Expand All @@ -752,50 +718,92 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() {
return fmt.Errorf("The number of Spiderpool controllers does not meet expectations. Expected %d, but got %d.", len(frame.Info.KindNodeList), len(newPodList.Items))
}
for _, newPod := range newPodList.Items {
if newPod.Spec.NodeName == workerNodeName && !podutils.IsPodReady(&newPod) {
if !podutils.IsPodReady(&newPod) {
return fmt.Errorf("Pod %s/%s on node '%s' is not running yet", newPod.Namespace, newPod.Name, workerNodeName)
}
}
return nil
}).WithTimeout(common.PodReStartTimeout).WithPolling(10 * time.Second).Should(BeNil())
}

// wait for Node spider-worker to be ready
// wait for Node to be ready
webhookHealthCheckClient := openapi.NewWebhookHealthCheckClient()
Eventually(func() error {
workerNode, err := frame.GetNode(workerNodeName)
nodeList, err := frame.GetNodeList()
if nil != err {
return err
}
isNodeReady := nodemanager.IsNodeReady(workerNode)
if !isNodeReady {
return fmt.Errorf("node '%s' is still not ready", workerNodeName)
}
for _, node := range nodeList.Items {
isNodeReady := nodemanager.IsNodeReady(&node)
if !isNodeReady {
return fmt.Errorf("node '%s' is still not ready", node.Name)
}

var nodeIP string
for _, nodeAddress := range workerNode.Status.Addresses {
if nodeAddress.Type == corev1.NodeInternalIP {
nodeIP = nodeAddress.Address
var nodeIP string
for _, nodeAddress := range node.Status.Addresses {
if nodeAddress.Type == corev1.NodeInternalIP {
nodeIP = nodeAddress.Address
}
}
Expect(nodeIP).NotTo(BeEmpty())
err = openapi.WebhookHealthyCheck(webhookHealthCheckClient, common.WebhookPort, &nodeIP)
if nil != err {
return fmt.Errorf("node '%s' spiderpool-controller is still not ready with webhook", node.Name)
}
}
Expect(nodeIP).NotTo(BeEmpty())
err = openapi.WebhookHealthyCheck(webhookHealthCheckClient, common.WebhookPort, &nodeIP)
if nil != err {
return fmt.Errorf("node '%s' spiderpool-controller is still not ready with webhook", workerNodeName)
}

return nil
}).WithTimeout(4 * time.Minute).WithPolling(10 * time.Second).Should(BeNil())
})
})

It("stateless workload IP could be released with node not ready", Label("G00009"), func() {

// 1.create a pod
podYaml := common.GenerateExamplePodYaml(podName, namespace)
podIppoolAnnoStr := common.GeneratePodIPPoolAnnotations(frame, common.NIC1, globalDefaultV4IPPoolList, globalDefaultV6IPPoolList)
podYaml.Annotations = map[string]string{constant.AnnoPodIPPool: podIppoolAnnoStr}
podYaml.Spec.NodeName = workerNodeName
Expect(podYaml).NotTo(BeNil())
GinkgoWriter.Printf("try to create Pod %v/%v \n", namespace, podName)
err = frame.CreatePod(podYaml)
Expect(err).NotTo(HaveOccurred(), "failed to create Pod %v/%v \n", namespace, podName)
ctxWithTimeout, _ := context.WithTimeout(ctx, time.Minute*3)
podYaml, err = frame.WaitPodStarted(podName, namespace, ctxWithTimeout)
Expect(err).NotTo(HaveOccurred())

// 2. record the pod IPs
var podV4IP, podV6IP string
var spiderEndpoint spiderpool.SpiderEndpoint
err = frame.KClient.Get(ctx, types.NamespacedName{
Namespace: podYaml.Namespace,
Name: podYaml.Name,
}, &spiderEndpoint)
Expect(err).NotTo(HaveOccurred())
Expect(spiderEndpoint.Status.Current.IPs).To(HaveLen(1))
if frame.Info.IpV4Enabled {
Expect(spiderEndpoint.Status.Current.IPs[0].IPv4).NotTo(BeNil())
podV4IP = strings.Split(*spiderEndpoint.Status.Current.IPs[0].IPv4, "/")[0]
}
if frame.Info.IpV6Enabled {
Expect(spiderEndpoint.Status.Current.IPs[0].IPv6).NotTo(BeNil())
podV6IP = strings.Split(*spiderEndpoint.Status.Current.IPs[0].IPv6, "/")[0]
}
GinkgoWriter.Printf("Pod '%s/%s' has IP '%v' \n", podYaml.Namespace, podYaml.Name, podYaml.Status.PodIPs)

// 3. set "spider-worker" kubelet down
commandStr := "systemctl stop kubelet"
output, err := frame.DockerExecCommand(ctx, workerNodeName, commandStr)
Expect(err).NotTo(HaveOccurred(), "Failed exec '%s' in docker container '%s', error is: %v,log: %v.", commandStr, workerNodeName, err, string(output))

// 5. wait for the Node to be 'NotReady'
// 4. wait for the Node to be 'NotReady'
tick := time.Tick(time.Minute * 3)
END:
for {
select {
case <-tick:
Skip(fmt.Sprintf("timeout to wait for the Pod '%s/%s' to be Terminating, skip this case", namespace, podName))
default:
GinkgoWriter.Printf("test here 1")
workerNode, err := frame.GetNode(workerNodeName)
if nil != err {
if errors.IsNotFound(err) {
Expand All @@ -814,8 +822,9 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() {
}
}

// 6. wait for the IPs to be released
// 5. wait for the IPs to be released
Eventually(func() error {
GinkgoWriter.Printf("test here 2")
if frame.Info.IpV4Enabled {
defaultV4pool, err := common.GetIppoolByName(frame, common.SpiderPoolIPv4PoolDefault)
if nil != err {
Expand Down

0 comments on commit 5e2cb0d

Please sign in to comment.