From 5e2cb0d03fecff685fc357694ddec5d2e7867e17 Mon Sep 17 00:00:00 2001 From: "tao.yang" Date: Mon, 11 Nov 2024 18:58:29 +0800 Subject: [PATCH] fix: pod not-ready causes webhook call failure Signed-off-by: tao.yang --- test/e2e/reclaim/reclaim_test.go | 137 ++++++++++++++++--------------- 1 file changed, 73 insertions(+), 64 deletions(-) diff --git a/test/e2e/reclaim/reclaim_test.go b/test/e2e/reclaim/reclaim_test.go index 587a88b23..665d82240 100644 --- a/test/e2e/reclaim/reclaim_test.go +++ b/test/e2e/reclaim/reclaim_test.go @@ -623,11 +623,11 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() { }) Context("choose to release conflicted ip of stateless workload with node not ready", Serial, func() { + ctx := context.TODO() + var workerNodeName string const SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_NOT_READY_NODE_ENABLED = "SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_NOT_READY_NODE_ENABLED" - It("stateless workload IP could be released with node not ready", Label("G00009"), func() { - ctx := context.TODO() - + BeforeEach(func() { // 0. change the spiderpool-controller env trueStr := strconv.FormatBool(true) deployment, err := frame.GetDeployment(constant.SpiderpoolController, "kube-system") @@ -661,11 +661,11 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() { deployment.Spec.Strategy.RollingUpdate.MaxSurge = &zeroMaxSurge } + // When maxSurge is 0, spiderpool-controller Pods will be restarted one by one err = frame.KClient.Patch(ctx, deployment, client.MergeFrom(oldDeploy)) Expect(err).NotTo(HaveOccurred()) - // 1. get worker node name - var workerNodeName string + // Get the name of the work-node for restarting kubelet nodeList, err := frame.GetNodeList() Expect(err).NotTo(HaveOccurred()) for _, tmpNode := range nodeList.Items { @@ -677,46 +677,11 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() { } Expect(workerNodeName).NotTo(Equal("")) - // 2.create a pod - podYaml := common.GenerateExamplePodYaml(podName, namespace) - podIppoolAnnoStr := common.GeneratePodIPPoolAnnotations(frame, common.NIC1, globalDefaultV4IPPoolList, globalDefaultV6IPPoolList) - podYaml.Annotations = map[string]string{constant.AnnoPodIPPool: podIppoolAnnoStr} - podYaml.Spec.NodeName = workerNodeName - Expect(podYaml).NotTo(BeNil()) - GinkgoWriter.Printf("try to create Pod %v/%v \n", namespace, podName) - err = frame.CreatePod(podYaml) - Expect(err).NotTo(HaveOccurred(), "failed to create Pod %v/%v \n", namespace, podName) - ctxWithTimeout, _ := context.WithTimeout(ctx, time.Minute*3) - podYaml, err = frame.WaitPodStarted(podName, namespace, ctxWithTimeout) - Expect(err).NotTo(HaveOccurred()) - - // 3. record the pod IPs - var podV4IP, podV6IP string - var spiderEndpoint spiderpool.SpiderEndpoint - err = frame.KClient.Get(ctx, types.NamespacedName{ - Namespace: podYaml.Namespace, - Name: podYaml.Name, - }, &spiderEndpoint) - Expect(err).NotTo(HaveOccurred()) - Expect(spiderEndpoint.Status.Current.IPs).To(HaveLen(1)) - if frame.Info.IpV4Enabled { - Expect(spiderEndpoint.Status.Current.IPs[0].IPv4).NotTo(BeNil()) - podV4IP = strings.Split(*spiderEndpoint.Status.Current.IPs[0].IPv4, "/")[0] - } - if frame.Info.IpV6Enabled { - Expect(spiderEndpoint.Status.Current.IPs[0].IPv6).NotTo(BeNil()) - podV6IP = strings.Split(*spiderEndpoint.Status.Current.IPs[0].IPv6, "/")[0] - } - GinkgoWriter.Printf("Pod '%s/%s' has IP '%v'", podYaml.Namespace, podYaml.Name, podYaml.Status.PodIPs) - - // 4. set "spider-worker" kubelet down - commandStr := "systemctl stop kubelet" - output, err := frame.DockerExecCommand(ctx, workerNodeName, commandStr) - Expect(err).NotTo(HaveOccurred(), "Failed exec '%s' in docker container '%s', error is: %v,log: %v.", commandStr, workerNodeName, err, string(output)) - + // In the current case, kubelet is stopped and all spiderpool-controllers are restarted. + // After the use case is completed, check whether the environment returns to normal. DeferCleanup(func() { - commandStr = "systemctl start kubelet" - output, err = frame.DockerExecCommand(ctx, workerNodeName, commandStr) + commandStr := "systemctl start kubelet" + output, err := frame.DockerExecCommand(ctx, workerNodeName, commandStr) Expect(err).NotTo(HaveOccurred(), "Failed exec '%s' in docker container '%s', error is: %v,log: %v.", commandStr, workerNodeName, err, string(output)) Eventually(func() error { checkCommandStr := "systemctl is-active kubelet" @@ -736,12 +701,13 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() { var deletePodList *corev1.PodList needDelete := false for _, spiderpoolControllerPod := range podList.Items { - if spiderpoolControllerPod.Spec.NodeName == workerNodeName && !podutils.IsPodReady(&spiderpoolControllerPod) && spiderpoolControllerPod.DeletionTimestamp != nil { + if !podutils.IsPodReady(&spiderpoolControllerPod) && spiderpoolControllerPod.DeletionTimestamp != nil { needDelete = true deletePodList = &corev1.PodList{Items: []corev1.Pod{spiderpoolControllerPod}} } } if needDelete { + GinkgoWriter.Printf("delete spiderpoolcontroller pod %v/%v\n", deletePodList.Items[0].Namespace, deletePodList.Items[0].Name) Expect(frame.DeletePodList(deletePodList)).NotTo(HaveOccurred()) Eventually(func() error { newPodList, err := frame.GetPodListByLabel(map[string]string{"app.kubernetes.io/component": constant.SpiderpoolController}) @@ -752,7 +718,7 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() { return fmt.Errorf("The number of Spiderpool controllers does not meet expectations. Expected %d, but got %d.", len(frame.Info.KindNodeList), len(newPodList.Items)) } for _, newPod := range newPodList.Items { - if newPod.Spec.NodeName == workerNodeName && !podutils.IsPodReady(&newPod) { + if !podutils.IsPodReady(&newPod) { return fmt.Errorf("Pod %s/%s on node '%s' is not running yet", newPod.Namespace, newPod.Name, workerNodeName) } } @@ -760,35 +726,76 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() { }).WithTimeout(common.PodReStartTimeout).WithPolling(10 * time.Second).Should(BeNil()) } - // wait for Node spider-worker to be ready + // wait for Node to be ready webhookHealthCheckClient := openapi.NewWebhookHealthCheckClient() Eventually(func() error { - workerNode, err := frame.GetNode(workerNodeName) + nodeList, err := frame.GetNodeList() if nil != err { return err } - isNodeReady := nodemanager.IsNodeReady(workerNode) - if !isNodeReady { - return fmt.Errorf("node '%s' is still not ready", workerNodeName) - } + for _, node := range nodeList.Items { + isNodeReady := nodemanager.IsNodeReady(&node) + if !isNodeReady { + return fmt.Errorf("node '%s' is still not ready", node.Name) + } - var nodeIP string - for _, nodeAddress := range workerNode.Status.Addresses { - if nodeAddress.Type == corev1.NodeInternalIP { - nodeIP = nodeAddress.Address + var nodeIP string + for _, nodeAddress := range node.Status.Addresses { + if nodeAddress.Type == corev1.NodeInternalIP { + nodeIP = nodeAddress.Address + } + } + Expect(nodeIP).NotTo(BeEmpty()) + err = openapi.WebhookHealthyCheck(webhookHealthCheckClient, common.WebhookPort, &nodeIP) + if nil != err { + return fmt.Errorf("node '%s' spiderpool-controller is still not ready with webhook", node.Name) } } - Expect(nodeIP).NotTo(BeEmpty()) - err = openapi.WebhookHealthyCheck(webhookHealthCheckClient, common.WebhookPort, &nodeIP) - if nil != err { - return fmt.Errorf("node '%s' spiderpool-controller is still not ready with webhook", workerNodeName) - } - return nil }).WithTimeout(4 * time.Minute).WithPolling(10 * time.Second).Should(BeNil()) }) + }) + + It("stateless workload IP could be released with node not ready", Label("G00009"), func() { + + // 1.create a pod + podYaml := common.GenerateExamplePodYaml(podName, namespace) + podIppoolAnnoStr := common.GeneratePodIPPoolAnnotations(frame, common.NIC1, globalDefaultV4IPPoolList, globalDefaultV6IPPoolList) + podYaml.Annotations = map[string]string{constant.AnnoPodIPPool: podIppoolAnnoStr} + podYaml.Spec.NodeName = workerNodeName + Expect(podYaml).NotTo(BeNil()) + GinkgoWriter.Printf("try to create Pod %v/%v \n", namespace, podName) + err = frame.CreatePod(podYaml) + Expect(err).NotTo(HaveOccurred(), "failed to create Pod %v/%v \n", namespace, podName) + ctxWithTimeout, _ := context.WithTimeout(ctx, time.Minute*3) + podYaml, err = frame.WaitPodStarted(podName, namespace, ctxWithTimeout) + Expect(err).NotTo(HaveOccurred()) + + // 2. record the pod IPs + var podV4IP, podV6IP string + var spiderEndpoint spiderpool.SpiderEndpoint + err = frame.KClient.Get(ctx, types.NamespacedName{ + Namespace: podYaml.Namespace, + Name: podYaml.Name, + }, &spiderEndpoint) + Expect(err).NotTo(HaveOccurred()) + Expect(spiderEndpoint.Status.Current.IPs).To(HaveLen(1)) + if frame.Info.IpV4Enabled { + Expect(spiderEndpoint.Status.Current.IPs[0].IPv4).NotTo(BeNil()) + podV4IP = strings.Split(*spiderEndpoint.Status.Current.IPs[0].IPv4, "/")[0] + } + if frame.Info.IpV6Enabled { + Expect(spiderEndpoint.Status.Current.IPs[0].IPv6).NotTo(BeNil()) + podV6IP = strings.Split(*spiderEndpoint.Status.Current.IPs[0].IPv6, "/")[0] + } + GinkgoWriter.Printf("Pod '%s/%s' has IP '%v' \n", podYaml.Namespace, podYaml.Name, podYaml.Status.PodIPs) + + // 3. set "spider-worker" kubelet down + commandStr := "systemctl stop kubelet" + output, err := frame.DockerExecCommand(ctx, workerNodeName, commandStr) + Expect(err).NotTo(HaveOccurred(), "Failed exec '%s' in docker container '%s', error is: %v,log: %v.", commandStr, workerNodeName, err, string(output)) - // 5. wait for the Node to be 'NotReady' + // 4. wait for the Node to be 'NotReady' tick := time.Tick(time.Minute * 3) END: for { @@ -796,6 +803,7 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() { case <-tick: Skip(fmt.Sprintf("timeout to wait for the Pod '%s/%s' to be Terminating, skip this case", namespace, podName)) default: + GinkgoWriter.Printf("test here 1") workerNode, err := frame.GetNode(workerNodeName) if nil != err { if errors.IsNotFound(err) { @@ -814,8 +822,9 @@ var _ = Describe("test ip with reclaim ip case", Label("reclaim"), func() { } } - // 6. wait for the IPs to be released + // 5. wait for the IPs to be released Eventually(func() error { + GinkgoWriter.Printf("test here 2") if frame.Info.IpV4Enabled { defaultV4pool, err := common.GetIppoolByName(frame, common.SpiderPoolIPv4PoolDefault) if nil != err {