diff --git a/charts/mantle/templates/deployment.yaml b/charts/mantle/templates/deployment.yaml index bb5fc49a..ef4eb440 100644 --- a/charts/mantle/templates/deployment.yaml +++ b/charts/mantle/templates/deployment.yaml @@ -85,6 +85,9 @@ spec: {{- with .Values.controller.exportDataStorageClass }} - --export-data-storage-class={{ . }} {{- end }} + {{- with .Values.controller.gcInterval }} + - --gc-interval={{ . }} + {{- end }} env: - name: POD_NAME valueFrom: diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 815d8b85..81851fe6 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -56,6 +56,7 @@ var ( objectStorageEndpoint string caCertConfigMapSrc string caCertKeySrc string + gcInterval string scheme = runtime.NewScheme() setupLog = ctrl.Log.WithName("setup") @@ -95,6 +96,7 @@ func init() { flags.StringVar(&caCertKeySrc, "ca-cert-key", "ca.crt", "The key of the ConfigMap specified by --ca-cert-config-map that contains the intermediate certificate. "+ "The default value is ca.crt. This option is just ignored if --ca-cert-configmap isn't specified.") + flags.StringVar(&gcInterval, "gc-interval", "1h", "The time period between each garbage collection.") goflags := flag.NewFlagSet("goflags", flag.ExitOnError) zapOpts.Development = true @@ -204,6 +206,16 @@ func setupReconcilers(mgr manager.Manager, primarySettings *controller.PrimarySe //+kubebuilder:scaffold:builder + parsedGCInterval, err := time.ParseDuration(gcInterval) + if err != nil { + setupLog.Error(err, "faield to parse gc interval", "gcInterval", gcInterval) + return err + } + if err := mgr.Add(controller.NewGarbageCollectorRunner(mgr.GetClient(), parsedGCInterval)); err != nil { + setupLog.Error(err, "unable to create runner", "runner", "GarbageCollectorRunner") + return err + } + return nil } diff --git a/internal/controller/garbage_collector_runner.go b/internal/controller/garbage_collector_runner.go new file mode 100644 index 00000000..efde9256 --- /dev/null +++ b/internal/controller/garbage_collector_runner.go @@ -0,0 +1,124 @@ +package controller + +import ( + "context" + "fmt" + "time" + + mantlev1 "github.com/cybozu-go/mantle/api/v1" + corev1 "k8s.io/api/core/v1" + aerrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +type GarbageCollectorRunner struct { + client client.Client + interval time.Duration +} + +func NewGarbageCollectorRunner( + client client.Client, + interval time.Duration, +) *GarbageCollectorRunner { + return &GarbageCollectorRunner{ + client: client, + interval: interval, + } +} + +func (r *GarbageCollectorRunner) Start(ctx context.Context) error { + logger := log.FromContext(ctx) + + for { + ctxSleep, cancelSleep := context.WithTimeout(ctx, r.interval) + <-ctxSleep.Done() + cancelSleep() + if ctx.Err() != nil { + break + } + + logger.Info("garbage collection started") + + if err := r.deleteOrphanPVs(ctx); err != nil { + logger.Error(err, "failed to delete orphan PVs", "error", err) + } + + logger.Info("garbage collection finished") + } + + return nil +} + +func (r *GarbageCollectorRunner) deleteOrphanPVs(ctx context.Context) error { + logger := log.FromContext(ctx) + + requirement, err := labels.NewRequirement(labelRestoringPVKey, selection.Exists, []string{}) + if err != nil { + return fmt.Errorf("failed to create a new labels requirement: %w", err) + } + selector := labels.ValidatedSetSelector{}.Add(*requirement) + + var pvList corev1.PersistentVolumeList + if err := r.client.List(ctx, &pvList, &client.ListOptions{ + LabelSelector: selector, + }); err != nil { + return fmt.Errorf("failed to list PVs: %w", err) + } + + for _, pv := range pvList.Items { + shouldDelete, err := r.isMantleRestoreAlreadyDeleted(ctx, &pv) + if err != nil { + return fmt.Errorf("failed to check if a PV should be deleted: %w", err) + } + if !shouldDelete { + continue + } + if err := r.client.Delete(ctx, &pv, &client.DeleteOptions{ + Preconditions: &metav1.Preconditions{UID: &pv.ObjectMeta.UID, ResourceVersion: &pv.ObjectMeta.ResourceVersion}, + }); err != nil { + return fmt.Errorf("failed to delete PV: %w", err) + } + logger.Info("an orphan PV is removed", "name", pv.GetName()) + } + + return nil +} + +func (r *GarbageCollectorRunner) isMantleRestoreAlreadyDeleted(ctx context.Context, pv *corev1.PersistentVolume) (bool, error) { + restoreUID, ok := pv.GetAnnotations()[PVAnnotationRestoredBy] + if !ok { + return false, fmt.Errorf("failed to find annotation: %s", PVAnnotationRestoredBy) + } + restoreName, ok := pv.GetAnnotations()[PVAnnotationRestoredByName] + if !ok { + return false, fmt.Errorf("failed to find annotation: %s", PVAnnotationRestoredByName) + } + restoreNamespace, ok := pv.GetAnnotations()[PVAnnotationRestoredByNamespace] + if !ok { + return false, fmt.Errorf("failed to find annotation: %s", PVAnnotationRestoredByNamespace) + } + + var restore mantlev1.MantleRestore + if err := r.client.Get( + ctx, + types.NamespacedName{Name: restoreName, Namespace: restoreNamespace}, + &restore, + ); err != nil { + if aerrors.IsNotFound(err) { + return true, nil + } else { + return false, fmt.Errorf("failed to get MantleRestore: %w", err) + } + } + + if string(restore.GetUID()) != restoreUID { + return true, nil + } + + return false, nil +} diff --git a/internal/controller/mantlerestore_controller.go b/internal/controller/mantlerestore_controller.go index 13d0212d..d4b52d08 100644 --- a/internal/controller/mantlerestore_controller.go +++ b/internal/controller/mantlerestore_controller.go @@ -29,10 +29,14 @@ type MantleRestoreReconciler struct { } const ( - MantleRestoreFinalizerName = "mantlerestore.mantle.cybozu.io/finalizer" - RestoringPVFinalizerName = "mantle.cybozu.io/restoring-pv-finalizer" - PVAnnotationRestoredBy = "mantle.cybozu.io/restored-by" - PVCAnnotationRestoredBy = "mantle.cybozu.io/restored-by" + MantleRestoreFinalizerName = "mantlerestore.mantle.cybozu.io/finalizer" + RestoringPVFinalizerName = "mantle.cybozu.io/restoring-pv-finalizer" + PVAnnotationRestoredBy = "mantle.cybozu.io/restored-by" + PVAnnotationRestoredByName = "mantle.cybozu.io/restored-by-name" + PVAnnotationRestoredByNamespace = "mantle.cybozu.io/restored-by-namespace" + PVCAnnotationRestoredBy = "mantle.cybozu.io/restored-by" + labelRestoringPVKey = "mantle.cybozu.io/restoring-pv" + labelRestoringPVValue = "true" ) // +kubebuilder:rbac:groups=mantle.cybozu.io,resources=mantlerbackup,verbs=get;list;watch @@ -259,6 +263,8 @@ func (r *MantleRestoreReconciler) createOrUpdateRestoringPV(ctx context.Context, pvName, pv.Annotations[PVAnnotationRestoredBy]) } pv.Annotations[PVAnnotationRestoredBy] = restoredBy + pv.Annotations[PVAnnotationRestoredByName] = restore.GetName() + pv.Annotations[PVAnnotationRestoredByNamespace] = restore.GetNamespace() // get the source PV from the backup srcPV := corev1.PersistentVolume{} @@ -268,6 +274,11 @@ func (r *MantleRestoreReconciler) createOrUpdateRestoringPV(ctx context.Context, controllerutil.AddFinalizer(&pv, RestoringPVFinalizerName) + if pv.Labels == nil { + pv.Labels = map[string]string{} + } + pv.Labels[labelRestoringPVKey] = labelRestoringPVValue + pv.Spec = *srcPV.Spec.DeepCopy() pv.Spec.ClaimRef = nil pv.Spec.CSI.VolumeAttributes = map[string]string{ @@ -383,6 +394,9 @@ func (r *MantleRestoreReconciler) deleteRestoringPV(ctx context.Context, restore pv := corev1.PersistentVolume{} if err := r.client.Get(ctx, client.ObjectKey{Name: r.restoringPVName(restore)}, &pv); err != nil { if errors.IsNotFound(err) { + // NOTE: Since the cache of the client may be stale, we may look + // over some PVs that should be removed here. Such PVs will be + // removed by GarbageCollectorRunner. return nil } return fmt.Errorf("failed to get PV: %v", err) diff --git a/test/e2e/testdata/values-mantle-primary-template.yaml b/test/e2e/testdata/values-mantle-primary-template.yaml index 838aa7b5..cbcee13f 100644 --- a/test/e2e/testdata/values-mantle-primary-template.yaml +++ b/test/e2e/testdata/values-mantle-primary-template.yaml @@ -5,3 +5,4 @@ controller: objectStorageEndpoint: {OBJECT_STORAGE_ENDPOINT} envSecret: export-data exportDataStorageClass: rook-ceph-block + gcInterval: 1s diff --git a/test/e2e/testdata/values-mantle-secondary-template.yaml b/test/e2e/testdata/values-mantle-secondary-template.yaml index 560d8a6c..cc2c857d 100644 --- a/test/e2e/testdata/values-mantle-secondary-template.yaml +++ b/test/e2e/testdata/values-mantle-secondary-template.yaml @@ -6,6 +6,7 @@ controller: objectStorageBucketName: {OBJECT_STORAGE_BUCKET_NAME} objectStorageEndpoint: {OBJECT_STORAGE_ENDPOINT} envSecret: export-data + gcInterval: 1s secondaryService: type: NodePort diff --git a/test/e2e/testdata/values-mantle1.yaml b/test/e2e/testdata/values-mantle1.yaml index 8ab3f446..97bc36de 100644 --- a/test/e2e/testdata/values-mantle1.yaml +++ b/test/e2e/testdata/values-mantle1.yaml @@ -1,2 +1,3 @@ controller: overwriteMBCSchedule: "* * * * *" + gcInterval: 1s diff --git a/test/e2e/testdata/values-mantle2.yaml b/test/e2e/testdata/values-mantle2.yaml index e69de29b..755d6e90 100644 --- a/test/e2e/testdata/values-mantle2.yaml +++ b/test/e2e/testdata/values-mantle2.yaml @@ -0,0 +1,2 @@ +controller: + gcInterval: 1s