neonvm-controller: Remove support for DIMM slots

This is part 1 of 2 for removing DIMM slots support and completing our transition to using virtio-mem instead (ref #1060). With this change, neonvm-controller will always assume that VMs are using virtio-mem. The only checks exist at the top of the reconcile functions, making sure that this is indeed the case. The fields still exist in the CRD (although the're marked as deprecated), so that rollback of this change is still sound. neonvm-runner also retains support for both memory providers, so that custom neonvm-runner images continue to work for at least one version across the change.
neondatabase · Sep 9, 2024 · 78ae0eb · 78ae0eb
1 parent 3b7f11f
commit 78ae0eb
Show file tree

Hide file tree

Showing 15 changed files with 48 additions and 364 deletions.
diff --git a/neonvm/apis/neonvm/v1/virtualmachine_types.go b/neonvm/apis/neonvm/v1/virtualmachine_types.go
@@ -185,6 +185,7 @@ type Guest struct {
 	MemorySlotSize resource.Quantity `json:"memorySlotSize"`
 	// +optional
 	MemorySlots MemorySlots `json:"memorySlots"`
+	// Deprecated: MemoryProvider is ignored and always interpreted as equal to VirtioMem.
 	// +optional
 	MemoryProvider *MemoryProvider `json:"memoryProvider,omitempty"`
 	// +optional
@@ -212,18 +213,11 @@ type Guest struct {
 
 const virtioMemBlockSizeBytes = 8 * 1024 * 1024 // 8 MiB
 
-// ValidateForMemoryProvider returns an error iff the guest memory settings are invalid for the
-// MemoryProvider.
-//
-// This is used in two places. First, to validate VirtualMachine object creation. Second, to handle
-// the defaulting behavior for VirtualMachines that would be switching from DIMMSlots to VirtioMem
-// on restart. We place more restrictions on VirtioMem because we use 8MiB block sizes, so changing
-// to a new default can only happen if the memory slot size is a multiple of 8MiB.
-func (g Guest) ValidateForMemoryProvider(p MemoryProvider) error {
-	if p == MemoryProviderVirtioMem {
-		if g.MemorySlotSize.Value()%virtioMemBlockSizeBytes != 0 {
-			return fmt.Errorf("memorySlotSize invalid for memoryProvider VirtioMem: must be a multiple of 8Mi")
-		}
+// ValidateMemorySize returns an error iff the memory settings are invalid for use with virtio-mem
+// (the backing memory provider that we use)
+func (g Guest) ValidateMemorySize() error {
+	if g.MemorySlotSize.Value()%virtioMemBlockSizeBytes != 0 {
+		return fmt.Errorf("memorySlotSize invalid for use with virtio-mem: must be a multiple of 8Mi")
 	}
 	return nil
 }
@@ -603,6 +597,7 @@ type VirtualMachineStatus struct {
 	CPUs *MilliCPU `json:"cpus,omitempty"`
 	// +optional
 	MemorySize *resource.Quantity `json:"memorySize,omitempty"`
+	// Deprecated: MemoryProvider is ignored and always interpreted as equal to VirtioMem.
 	// +optional
 	MemoryProvider *MemoryProvider `json:"memoryProvider,omitempty"`
 	// +optional

diff --git a/neonvm/apis/neonvm/v1/virtualmachine_webhook.go b/neonvm/apis/neonvm/v1/virtualmachine_webhook.go
@@ -59,11 +59,13 @@ func (r *VirtualMachine) ValidateCreate() (admission.Warnings, error) {
 			r.Spec.Guest.CPUs.Max)
 	}
 
-	// validate .spec.guest.memorySlotSize w.r.t. .spec.guest.memoryProvider
-	if r.Spec.Guest.MemoryProvider != nil {
-		if err := r.Spec.Guest.ValidateForMemoryProvider(*r.Spec.Guest.MemoryProvider); err != nil {
-			return nil, fmt.Errorf(".spec.guest: %w", err)
-		}
+	// DIMMSlots memory provider is deprecated, and assumed to never be used.
+	if r.Spec.Guest.MemoryProvider != nil && *r.Spec.Guest.MemoryProvider == MemoryProviderDIMMSlots {
+		return nil, errors.New("DIMMSlots memory provider is deprecated and disabled")
+	}
+
+	if err := r.Spec.Guest.ValidateMemorySize(); err != nil {
+		return nil, fmt.Errorf(".spec.guest: %w", err)
 	}
 
 	// validate .spec.guest.memorySlots.use and .spec.guest.memorySlots.max

diff --git a/neonvm/config/controller/deployment.yaml b/neonvm/config/controller/deployment.yaml
@@ -60,7 +60,6 @@ spec:
         # * cache.direct=on    - use O_DIRECT (don't abuse host's page cache!)
         # * cache.no-flush=on  - ignores disk flush operations (not needed; our disks are ephemeral)
         - "--qemu-disk-cache-settings=cache.writeback=on,cache.direct=on,cache.no-flush=on"
-        - "--default-memory-provider=DIMMSlots"
         - "--memhp-auto-movable-ratio=801" # for virtio-mem, set memory_hotplug.auto_movable_ratio=801
         - "--failure-pending-period=1m"
         - "--failing-refresh-interval=15s"

diff --git a/neonvm/config/crd/bases/vm.neon.tech_virtualmachines.yaml b/neonvm/config/crd/bases/vm.neon.tech_virtualmachines.yaml
@@ -2415,6 +2415,8 @@ spec:
                   kernelImage:
                     type: string
                   memoryProvider:
+                    description: 'Deprecated: MemoryProvider is ignored and always
+                      interpreted as equal to VirtioMem.'
                     enum:
                     - DIMMSlots
                     - VirtioMem
@@ -2869,6 +2871,8 @@ spec:
               extraNetMask:
                 type: string
               memoryProvider:
+                description: 'Deprecated: MemoryProvider is ignored and always interpreted
+                  as equal to VirtioMem.'
                 enum:
                 - DIMMSlots
                 - VirtioMem

diff --git a/neonvm/controllers/config.go b/neonvm/controllers/config.go
@@ -4,8 +4,6 @@ import (
 	"time"
 
 	"k8s.io/apimachinery/pkg/types"
-
-	vmv1 "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1"
 )
 
 // ReconcilerConfig stores shared configuration for VirtualMachineReconciler and
@@ -43,10 +41,6 @@ type ReconcilerConfig struct {
 	// used in setting up the VM disks via QEMU's `-drive` flag.
 	QEMUDiskCacheSettings string
 
-	// DefaultMemoryProvider is the memory provider (dimm slots or virtio-mem) that will be used for
-	// new VMs (or, when old ones restart) if nothing is explicitly set.
-	DefaultMemoryProvider vmv1.MemoryProvider
-
 	// MemhpAutoMovableRatio specifies the value that new neonvm-runners will set as the
 	// kernel's 'memory_hotplug.auto_movable_ratio', iff the memory provider is virtio-mem.
 	//

diff --git a/neonvm/controllers/vm_controller.go b/neonvm/controllers/vm_controller.go
@@ -24,6 +24,7 @@ import (
 	"encoding/base64"
 	"encoding/json"
 	"encoding/pem"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -330,11 +331,10 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
 		vm.Status.SSHSecretName = fmt.Sprintf("ssh-neonvm-%s", vm.Name)
 	}
 
-	// Set memory provider for old VMs that don't have it in the Status.
-	if vm.Status.PodName != "" && vm.Status.MemoryProvider == nil {
-		oldMemProvider := vmv1.MemoryProviderDIMMSlots
-		log.Error(nil, "Setting default MemoryProvider for VM", "MemoryProvider", oldMemProvider)
-		vm.Status.MemoryProvider = lo.ToPtr(oldMemProvider)
+	// DIMMSlots memory provider is deprecated, and assumed to never be used.
+	//nolint:staticcheck // We know it's deprecated. That's why we're checking it.
+	if vm.Status.MemoryProvider != nil && *vm.Status.MemoryProvider == vmv1.MemoryProviderDIMMSlots {
+		return errors.New("DIMMSlots memory provider is deprecated and disabled")
 	}
 
 	switch vm.Status.Phase {
@@ -374,12 +374,15 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
 		vm.Status.Phase = vmv1.VmPending
 	case vmv1.VmPending:
 		// Generate runner pod name and set desired memory provider.
-		// Together with Status.MemoryProvider set for PodName != "" above,
-		// It is now guaranteed to have Status.MemoryProvider != nil
 		if len(vm.Status.PodName) == 0 {
 			vm.Status.PodName = names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-", vm.Name))
+			//nolint:staticcheck // We know it's deprecated. That's why we're checking it.
 			if vm.Status.MemoryProvider == nil {
-				vm.Status.MemoryProvider = lo.ToPtr(pickMemoryProvider(r.Config, vm))
+				if err := vm.Spec.Guest.ValidateMemorySize(); err != nil {
+					return fmt.Errorf("Failed to set memoryProvider for VM: %w", err)
+				}
+				//nolint:staticcheck // We know it's deprecated. That's why we're checking it.
+				vm.Status.MemoryProvider = lo.ToPtr(vmv1.MemoryProviderVirtioMem)
 			}
 			// Update the .Status on API Server to avoid creating multiple pods for a single VM
 			// See https://github.com/neondatabase/autoscaling/issues/794 for the context
@@ -388,8 +391,6 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
 			}
 		}
 
-		memoryProvider := *vm.Status.MemoryProvider
-
 		// Check if the runner pod already exists, if not create a new one
 		vmRunner := &corev1.Pod{}
 		err := r.Get(ctx, types.NamespacedName{Name: vm.Status.PodName, Namespace: vm.Namespace}, vmRunner)
@@ -423,7 +424,7 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
 			}
 
 			// Define a new pod
-			pod, err := r.podForVirtualMachine(vm, memoryProvider, sshSecret)
+			pod, err := r.podForVirtualMachine(vm, sshSecret)
 			if err != nil {
 				log.Error(err, "Failed to define new Pod resource for VirtualMachine")
 				return err
@@ -740,19 +741,9 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
 		r.updateVMStatusCPU(ctx, vm, vmRunner, pluggedCPU, cgroupUsage)
 
 		// do hotplug/unplug Memory
-		switch *vm.Status.MemoryProvider {
-		case vmv1.MemoryProviderVirtioMem:
-			ramScaled, err = r.doVirtioMemScaling(vm)
-			if err != nil {
-				return err
-			}
-		case vmv1.MemoryProviderDIMMSlots:
-			ramScaled, err = r.doDIMMSlotsScaling(ctx, vm)
-			if err != nil {
-				return err
-			}
-		default:
-			panic(fmt.Errorf("unexpected vm.status.memoryProvider %q", *vm.Status.MemoryProvider))
+		ramScaled, err = r.doVirtioMemScaling(vm)
+		if err != nil {
+			return err
 		}
 
 		// set VM phase to running if everything scaled
@@ -828,23 +819,6 @@ func propagateRevision(vm *vmv1.VirtualMachine) {
 	vm.Status.CurrentRevision = &rev
 }
 
-func pickMemoryProvider(config *ReconcilerConfig, vm *vmv1.VirtualMachine) vmv1.MemoryProvider {
-	if p := vm.Spec.Guest.MemoryProvider; p != nil {
-		return *p
-	}
-	if p := vm.Status.MemoryProvider; p != nil {
-		return *p
-	}
-
-	// Not all configurations are valid for virtio-mem. Only switch to the default as long as it
-	// won't be invalid:
-	if err := vm.Spec.Guest.ValidateForMemoryProvider(config.DefaultMemoryProvider); err != nil {
-		return vmv1.MemoryProviderDIMMSlots
-	}
-
-	return config.DefaultMemoryProvider
-}
-
 func (r *VMReconciler) doVirtioMemScaling(vm *vmv1.VirtualMachine) (done bool, _ error) {
 	targetSlotCount := int(vm.Spec.Guest.MemorySlots.Use - vm.Spec.Guest.MemorySlots.Min)
 
@@ -881,47 +855,6 @@ func (r *VMReconciler) doVirtioMemScaling(vm *vmv1.VirtualMachine) (done bool, _
 	return done, nil
 }
 
-func (r *VMReconciler) doDIMMSlotsScaling(ctx context.Context, vm *vmv1.VirtualMachine) (done bool, _ error) {
-	log := log.FromContext(ctx)
-
-	memSlotsMin := vm.Spec.Guest.MemorySlots.Min
-	targetSlotCount := int(vm.Spec.Guest.MemorySlots.Use - memSlotsMin)
-
-	realSlots, err := QmpSetMemorySlots(ctx, vm, targetSlotCount, r.Recorder)
-	if realSlots < 0 {
-		return false, err
-	}
-
-	if realSlots != int(targetSlotCount) {
-		log.Info("Couldn't achieve desired memory slot count, will modify .spec.guest.memorySlots.use instead", "details", err)
-		// firstly re-fetch VM
-		if err := r.Get(ctx, types.NamespacedName{Name: vm.Name, Namespace: vm.Namespace}, vm); err != nil {
-			log.Error(err, "Unable to re-fetch VirtualMachine")
-			return false, err
-		}
-		memorySlotsUseInSpec := vm.Spec.Guest.MemorySlots.Use
-		memoryPluggedSlots := memSlotsMin + int32(realSlots)
-		vm.Spec.Guest.MemorySlots.Use = memoryPluggedSlots
-		if err := r.tryUpdateVM(ctx, vm); err != nil {
-			log.Error(err, "Failed to update .spec.guest.memorySlots.use",
-				"old value", memorySlotsUseInSpec,
-				"new value", memoryPluggedSlots)
-			return false, err
-		}
-	} else {
-		done = true
-	}
-	// get Memory details from hypervisor and update VM status
-	memorySize, err := QmpGetMemorySize(QmpAddr(vm))
-	if err != nil {
-		log.Error(err, "Failed to get Memory details from VirtualMachine", "VirtualMachine", vm.Name)
-		return false, err
-	}
-	// update status by memory sizes used in the VM
-	r.updateVMStatusMemory(vm, memorySize)
-	return done, nil
-}
-
 type runnerStatusKind string
 
 const (
@@ -1193,10 +1126,9 @@ func extractVirtualMachineResourcesJSON(spec vmv1.VirtualMachineSpec) string {
 // podForVirtualMachine returns a VirtualMachine Pod object
 func (r *VMReconciler) podForVirtualMachine(
 	vm *vmv1.VirtualMachine,
-	memoryProvider vmv1.MemoryProvider,
 	sshSecret *corev1.Secret,
 ) (*corev1.Pod, error) {
-	pod, err := podSpec(vm, memoryProvider, sshSecret, r.Config)
+	pod, err := podSpec(vm, sshSecret, r.Config)
 	if err != nil {
 		return nil, err
 	}
@@ -1395,7 +1327,6 @@ func imageForVmRunner() (string, error) {
 
 func podSpec(
 	vm *vmv1.VirtualMachine,
-	memoryProvider vmv1.MemoryProvider,
 	sshSecret *corev1.Secret,
 	config *ReconcilerConfig,
 ) (*corev1.Pod, error) {
@@ -1497,11 +1428,9 @@ func podSpec(
 						cmd = append(
 							cmd,
 							"-qemu-disk-cache-settings", config.QEMUDiskCacheSettings,
-							"-memory-provider", string(memoryProvider),
+							"-memory-provider", string(vmv1.MemoryProviderVirtioMem),
+							"-memhp-auto-movable-ratio", config.MemhpAutoMovableRatio,
 						)
-						if memoryProvider == vmv1.MemoryProviderVirtioMem {
-							cmd = append(cmd, "-memhp-auto-movable-ratio", config.MemhpAutoMovableRatio)
-						}
 						// put these last, so that the earlier args are easier to see (because these
 						// can get quite large)
 						cmd = append(

diff --git a/neonvm/controllers/vmmigration_controller.go b/neonvm/controllers/vmmigration_controller.go
@@ -197,6 +197,14 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
 		return r.updateMigrationStatus(ctx, migration)
 	}
 
+	// DIMMSlots memory provider is deprecated, and assumed to never be used.
+	//nolint:staticcheck // We know it's deprecated. That's why we're checking it.
+	if vm.Status.MemoryProvider != nil && *vm.Status.MemoryProvider == vmv1.MemoryProviderDIMMSlots {
+		err := errors.New("DIMMSlots memory provider is deprecated and disabled")
+		log.Error(err, "Cannot reconcile the migration")
+		return ctrl.Result{}, err
+	}
+
 	switch migration.Status.Phase {
 
 	case "":
@@ -314,24 +322,6 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
 			}
 			log.Info("CPUs in Target runner synced", "TargetPod.Name", migration.Status.TargetPodName)
 
-			// do hotplug Memory in targetRunner -- only needed for dimm slots; virtio-mem Just Works™
-			switch *vm.Status.MemoryProvider {
-			case vmv1.MemoryProviderVirtioMem:
-				// ref "Migration works out of the box" - https://lwn.net/Articles/755423/
-				log.Info(
-					"No need to sync memory in Target runner because MemoryProvider is VirtioMem",
-					"TargetPod.Name", migration.Status.TargetPodName,
-				)
-			case vmv1.MemoryProviderDIMMSlots:
-				log.Info("Syncing Memory in Target runner", "TargetPod.Name", migration.Status.TargetPodName)
-				if err := QmpSyncMemoryToTarget(vm, migration); err != nil {
-					return ctrl.Result{}, err
-				}
-				log.Info("Memory in Target runner synced", "TargetPod.Name", migration.Status.TargetPodName)
-			default:
-				panic(fmt.Errorf("unexpected vm.status.memoryProvider %q", *vm.Status.MemoryProvider))
-			}
-
 			// Migrate only running VMs to target with plugged devices
 			if vm.Status.Phase == vmv1.VmPreMigrating {
 				// update VM status
@@ -703,18 +693,11 @@ func (r *VirtualMachineMigrationReconciler) targetPodForVirtualMachine(
 	migration *vmv1.VirtualMachineMigration,
 	sshSecret *corev1.Secret,
 ) (*corev1.Pod, error) {
-	if vm.Status.MemoryProvider == nil {
-		return nil, errors.New("cannot create target pod because vm.status.memoryProvider is not set")
+	if err := vm.Spec.Guest.ValidateMemorySize(); err != nil {
+		return nil, fmt.Errorf("cannot create target pod because memory is invalid: %w", err)
 	}
-	// TODO: this is technically racy because target pod creation happens before we set the
-	// migration source pod, so in between reading this and starting the migration, it's
-	// *technically* possible that we create a target pod with a different memory provider than a
-	// newer source pod.
-	// Given that this requires (a) restart *during* initial live migration, and (b) that restart to
-	// change the memory provider, this is low enough risk that it's ok to leave to a follow-up.
-	memoryProvider := *vm.Status.MemoryProvider
-
-	pod, err := podSpec(vm, memoryProvider, sshSecret, r.Config)
+
+	pod, err := podSpec(vm, sshSecret, r.Config)
 	if err != nil {
 		return nil, err
 	}

diff --git a/neonvm/main.go b/neonvm/main.go
@@ -103,7 +103,6 @@ func main() {
 	var enableContainerMgr bool
 	var disableRunnerCgroup bool
 	var qemuDiskCacheSettings string
-	var defaultMemoryProvider vmv1.MemoryProvider
 	var memhpAutoMovableRatio string
 	var failurePendingPeriod time.Duration
 	var failingRefreshInterval time.Duration
@@ -143,18 +142,13 @@ func main() {
 	flag.BoolVar(&enableContainerMgr, "enable-container-mgr", false, "Enable crictl-based container-mgr alongside each VM")
 	flag.BoolVar(&disableRunnerCgroup, "disable-runner-cgroup", false, "Disable creation of a cgroup in neonvm-runner for fractional CPU limiting")
 	flag.StringVar(&qemuDiskCacheSettings, "qemu-disk-cache-settings", "cache=none", "Set neonvm-runner's QEMU disk cache settings")
-	flag.Func("default-memory-provider", "Set default memory provider to use for new VMs", defaultMemoryProvider.FlagFunc)
 	flag.StringVar(&memhpAutoMovableRatio, "memhp-auto-movable-ratio", "301", "For virtio-mem, set VM kernel's memory_hotplug.auto_movable_ratio")
 	flag.DurationVar(&failurePendingPeriod, "failure-pending-period", 1*time.Minute,
 		"the period for the propagation of reconciliation failures to the observability instruments")
 	flag.DurationVar(&failingRefreshInterval, "failing-refresh-interval", 1*time.Minute,
 		"the interval between consecutive updates of metrics and logs, related to failing reconciliations")
 	flag.Parse()
 
-	if defaultMemoryProvider == "" {
-		fmt.Fprintln(os.Stderr, "missing required flag '-default-memory-provider'")
-		os.Exit(1)
-	}
 	if disableRunnerCgroup && enableContainerMgr {
 		fmt.Fprintln(os.Stderr, "Cannot have both '-enable-container-mgr' and '-disable-runner-cgroup'")
 		os.Exit(1)
@@ -216,7 +210,6 @@ func main() {
 		MaxConcurrentReconciles: concurrencyLimit,
 		SkipUpdateValidationFor: skipUpdateValidationFor,
 		QEMUDiskCacheSettings:   qemuDiskCacheSettings,
-		DefaultMemoryProvider:   defaultMemoryProvider,
 		MemhpAutoMovableRatio:   memhpAutoMovableRatio,
 		FailurePendingPeriod:    failurePendingPeriod,
 		FailingRefreshInterval:  failingRefreshInterval,

diff --git a/tests/e2e/autoscaling.default/00-assert.yaml b/tests/e2e/autoscaling.default/00-assert.yaml
@@ -14,4 +14,3 @@ status:
       status: "True"
   cpus: 250m
   memorySize: 1Gi
-  memoryProvider: DIMMSlots