From 79833845c8d1e55bf68102929ab02c5e48413f0d Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Fri, 13 Dec 2024 18:16:11 +0200 Subject: [PATCH] Modify Datacenter deletion process to scaledown StatefulSets first and only then delete the CassandraDatacenter --- cmd/main.go | 2 +- internal/controllers/cassandra/suite_test.go | 18 ++++++- internal/envtest/statefulset_controller.go | 4 +- pkg/reconciliation/handler_test.go | 49 +++++++++++++++++--- pkg/reconciliation/reconcile_datacenter.go | 24 ++++++++++ pkg/reconciliation/reconcile_racks.go | 1 + 6 files changed, 87 insertions(+), 11 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index d8495c88..efbab670 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -147,7 +147,7 @@ func main() { os.Exit(1) } - if err := mgr.GetFieldIndexer().IndexField(ctx, &corev1.Pod{}, "spec.volumes.persistentVolumeClaim.claimName", func(obj client.Object) []string { + if err := mgr.GetCache().IndexField(ctx, &corev1.Pod{}, "spec.volumes.persistentVolumeClaim.claimName", func(obj client.Object) []string { pod, ok := obj.(*corev1.Pod) if !ok { return nil diff --git a/internal/controllers/cassandra/suite_test.go b/internal/controllers/cassandra/suite_test.go index 711464e8..71125aac 100644 --- a/internal/controllers/cassandra/suite_test.go +++ b/internal/controllers/cassandra/suite_test.go @@ -28,6 +28,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "go.uber.org/zap/zapcore" + corev1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -97,7 +98,7 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) err = (&CassandraDatacenterReconciler{ - Client: k8sClient, + Client: k8sManager.GetClient(), Log: ctrl.Log.WithName("controllers").WithName("CassandraDatacenter"), Scheme: k8sManager.GetScheme(), Recorder: k8sManager.GetEventRecorderFor("cass-operator"), @@ -116,6 +117,21 @@ var _ = BeforeSuite(func() { }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) + Expect(k8sManager.GetCache().IndexField(ctx, &corev1.Pod{}, "spec.volumes.persistentVolumeClaim.claimName", func(obj client.Object) []string { + pod, ok := obj.(*corev1.Pod) + if !ok { + return nil + } + + var pvcNames []string + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { + pvcNames = append(pvcNames, volume.PersistentVolumeClaim.ClaimName) + } + } + return pvcNames + })).ToNot(HaveOccurred()) + // Reduce the polling times and sleeps to speed up the tests cooldownPeriod = 1 * time.Millisecond minimumRequeueTime = 10 * time.Millisecond diff --git a/internal/envtest/statefulset_controller.go b/internal/envtest/statefulset_controller.go index af6977b2..f1185ec6 100644 --- a/internal/envtest/statefulset_controller.go +++ b/internal/envtest/statefulset_controller.go @@ -80,7 +80,7 @@ func (r *StatefulSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) // TODO Get existing pods and modify them . podList := &corev1.PodList{} - if err := r.Client.List(ctx, podList, client.MatchingLabels(sts.Spec.Template.Labels), client.InNamespace(req.Namespace)); err != nil { + if err := r.Client.List(ctx, podList, client.MatchingLabels(sts.Labels), client.InNamespace(req.Namespace)); err != nil { logger.Error(err, "Failed to list the pods belonging to this StatefulSet") return ctrl.Result{}, err } @@ -94,7 +94,7 @@ func (r *StatefulSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) if len(stsPods) > intendedReplicas { // We need to delete the pods.. - for i := len(stsPods) - 1; i > intendedReplicas; i-- { + for i := len(stsPods) - 1; i >= intendedReplicas; i-- { pod := stsPods[i] if err := r.Client.Delete(ctx, pod); err != nil { logger.Error(err, "Failed to delete extra pod from this StS") diff --git a/pkg/reconciliation/handler_test.go b/pkg/reconciliation/handler_test.go index f8bcb60c..fdd6dc34 100644 --- a/pkg/reconciliation/handler_test.go +++ b/pkg/reconciliation/handler_test.go @@ -5,16 +5,19 @@ package reconciliation import ( "fmt" + "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" - v1 "k8s.io/api/core/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -91,16 +94,33 @@ func TestProcessDeletion_FailedDelete(t *testing.T) { mockClient := mocks.NewClient(t) rc.Client = mockClient + rc.Datacenter.Spec.Size = 0 k8sMockClientList(mockClient, nil). Run(func(args mock.Arguments) { - arg := args.Get(1).(*v1.PersistentVolumeClaimList) - arg.Items = []v1.PersistentVolumeClaim{{ + _, ok := args.Get(1).(*corev1.PodList) + if ok { + if strings.HasPrefix(args.Get(2).(*client.ListOptions).FieldSelector.String(), "spec.volumes.persistentVolumeClaim.claimName") { + arg := args.Get(1).(*corev1.PodList) + arg.Items = []corev1.Pod{} + } else { + t.Fail() + } + return + } + arg := args.Get(1).(*corev1.PersistentVolumeClaimList) + arg.Items = []corev1.PersistentVolumeClaim{{ ObjectMeta: metav1.ObjectMeta{ Name: "pvc-1", }, }} - }) + }).Twice() + + k8sMockClientGet(mockClient, nil). + Run(func(args mock.Arguments) { + arg := args.Get(2).(*appsv1.StatefulSet) + arg.Spec.Replicas = ptr.To[int32](0) + }).Once() k8sMockClientDelete(mockClient, fmt.Errorf("")) @@ -131,16 +151,31 @@ func TestProcessDeletion(t *testing.T) { k8sMockClientList(mockClient, nil). Run(func(args mock.Arguments) { - arg := args.Get(1).(*v1.PersistentVolumeClaimList) - arg.Items = []v1.PersistentVolumeClaim{{ + _, ok := args.Get(1).(*corev1.PodList) + if ok { + if strings.HasPrefix(args.Get(2).(*client.ListOptions).FieldSelector.String(), "spec.volumes.persistentVolumeClaim.claimName") { + arg := args.Get(1).(*corev1.PodList) + arg.Items = []corev1.Pod{} + } else { + t.Fail() + } + return + } + arg := args.Get(1).(*corev1.PersistentVolumeClaimList) + arg.Items = []corev1.PersistentVolumeClaim{{ ObjectMeta: metav1.ObjectMeta{ Name: "pvc-1", }, }} - }) // ListPods + }).Twice() // ListPods k8sMockClientDelete(mockClient, nil) // Delete PVC k8sMockClientUpdate(mockClient, nil) // Remove dc finalizer + k8sMockClientGet(mockClient, nil). + Run(func(args mock.Arguments) { + arg := args.Get(2).(*appsv1.StatefulSet) + arg.Spec.Replicas = ptr.To[int32](0) + }).Once() emptySecretWatcher(t, rc) diff --git a/pkg/reconciliation/reconcile_datacenter.go b/pkg/reconciliation/reconcile_datacenter.go index 3c45a0d7..cf065100 100644 --- a/pkg/reconciliation/reconcile_datacenter.go +++ b/pkg/reconciliation/reconcile_datacenter.go @@ -77,6 +77,30 @@ func (rc *ReconciliationContext) ProcessDeletion() result.ReconcileResult { // How could we have pods if we've decommissioned everything? return result.RequeueSoon(5) } + } else { + // This is small mini reconcile to make everything 0 sized before we finish deletion, but do not run decommission in Cassandra + rc.ReqLogger.Info("Proceeding with deletion, setting all StatefulSets to 0 replicas") + if err := rc.CalculateRackInformation(); err != nil { + return result.Error(err) + } + + if res := rc.CheckRackCreation(); res.Completed() { + return res + } + + waitingForRackScale := false + for _, sts := range rc.statefulSets { + currentReplicas := int(*sts.Spec.Replicas) + if currentReplicas > 0 { + waitingForRackScale = true + if err := rc.UpdateRackNodeCount(sts, 0); err != nil { + return result.Error(err) + } + } + } + if waitingForRackScale { + return result.RequeueSoon(5) + } } // Clean up annotation litter on the user Secrets diff --git a/pkg/reconciliation/reconcile_racks.go b/pkg/reconciliation/reconcile_racks.go index 963be49f..5ca83058 100644 --- a/pkg/reconciliation/reconcile_racks.go +++ b/pkg/reconciliation/reconcile_racks.go @@ -586,6 +586,7 @@ func (rc *ReconciliationContext) CheckRackLabels() result.ReconcileResult { func (rc *ReconciliationContext) CheckRackStoppedState() result.ReconcileResult { logger := rc.ReqLogger + logger.Info("reconcile_racks::CheckRackStoppedState") emittedStoppingEvent := false racksUpdated := false