Skip to content

Commit

Permalink
neonvm-controller: Remove support for DIMM slots
Browse files Browse the repository at this point in the history
This is part 1 of 2 for removing DIMM slots support and completing our
transition to using virtio-mem instead (ref #1060).

With this change, neonvm-controller will always assume that VMs are
using virtio-mem. The only checks exist at the top of the reconcile
functions, making sure that this is indeed the case.

The fields still exist in the CRD (although the're marked as
deprecated), so that rollback of this change is still sound.

neonvm-runner also retains support for both memory providers, so that
custom neonvm-runner images continue to work for at least one version
across the change.
  • Loading branch information
sharnoff committed Sep 9, 2024
1 parent 3b7f11f commit 78ae0eb
Show file tree
Hide file tree
Showing 15 changed files with 48 additions and 364 deletions.
19 changes: 7 additions & 12 deletions neonvm/apis/neonvm/v1/virtualmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ type Guest struct {
MemorySlotSize resource.Quantity `json:"memorySlotSize"`
// +optional
MemorySlots MemorySlots `json:"memorySlots"`
// Deprecated: MemoryProvider is ignored and always interpreted as equal to VirtioMem.
// +optional
MemoryProvider *MemoryProvider `json:"memoryProvider,omitempty"`
// +optional
Expand Down Expand Up @@ -212,18 +213,11 @@ type Guest struct {

const virtioMemBlockSizeBytes = 8 * 1024 * 1024 // 8 MiB

// ValidateForMemoryProvider returns an error iff the guest memory settings are invalid for the
// MemoryProvider.
//
// This is used in two places. First, to validate VirtualMachine object creation. Second, to handle
// the defaulting behavior for VirtualMachines that would be switching from DIMMSlots to VirtioMem
// on restart. We place more restrictions on VirtioMem because we use 8MiB block sizes, so changing
// to a new default can only happen if the memory slot size is a multiple of 8MiB.
func (g Guest) ValidateForMemoryProvider(p MemoryProvider) error {
if p == MemoryProviderVirtioMem {
if g.MemorySlotSize.Value()%virtioMemBlockSizeBytes != 0 {
return fmt.Errorf("memorySlotSize invalid for memoryProvider VirtioMem: must be a multiple of 8Mi")
}
// ValidateMemorySize returns an error iff the memory settings are invalid for use with virtio-mem
// (the backing memory provider that we use)
func (g Guest) ValidateMemorySize() error {
if g.MemorySlotSize.Value()%virtioMemBlockSizeBytes != 0 {
return fmt.Errorf("memorySlotSize invalid for use with virtio-mem: must be a multiple of 8Mi")
}
return nil
}
Expand Down Expand Up @@ -603,6 +597,7 @@ type VirtualMachineStatus struct {
CPUs *MilliCPU `json:"cpus,omitempty"`
// +optional
MemorySize *resource.Quantity `json:"memorySize,omitempty"`
// Deprecated: MemoryProvider is ignored and always interpreted as equal to VirtioMem.
// +optional
MemoryProvider *MemoryProvider `json:"memoryProvider,omitempty"`
// +optional
Expand Down
12 changes: 7 additions & 5 deletions neonvm/apis/neonvm/v1/virtualmachine_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ func (r *VirtualMachine) ValidateCreate() (admission.Warnings, error) {
r.Spec.Guest.CPUs.Max)
}

// validate .spec.guest.memorySlotSize w.r.t. .spec.guest.memoryProvider
if r.Spec.Guest.MemoryProvider != nil {
if err := r.Spec.Guest.ValidateForMemoryProvider(*r.Spec.Guest.MemoryProvider); err != nil {
return nil, fmt.Errorf(".spec.guest: %w", err)
}
// DIMMSlots memory provider is deprecated, and assumed to never be used.
if r.Spec.Guest.MemoryProvider != nil && *r.Spec.Guest.MemoryProvider == MemoryProviderDIMMSlots {
return nil, errors.New("DIMMSlots memory provider is deprecated and disabled")
}

if err := r.Spec.Guest.ValidateMemorySize(); err != nil {
return nil, fmt.Errorf(".spec.guest: %w", err)
}

// validate .spec.guest.memorySlots.use and .spec.guest.memorySlots.max
Expand Down
1 change: 0 additions & 1 deletion neonvm/config/controller/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ spec:
# * cache.direct=on - use O_DIRECT (don't abuse host's page cache!)
# * cache.no-flush=on - ignores disk flush operations (not needed; our disks are ephemeral)
- "--qemu-disk-cache-settings=cache.writeback=on,cache.direct=on,cache.no-flush=on"
- "--default-memory-provider=DIMMSlots"
- "--memhp-auto-movable-ratio=801" # for virtio-mem, set memory_hotplug.auto_movable_ratio=801
- "--failure-pending-period=1m"
- "--failing-refresh-interval=15s"
Expand Down
4 changes: 4 additions & 0 deletions neonvm/config/crd/bases/vm.neon.tech_virtualmachines.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2415,6 +2415,8 @@ spec:
kernelImage:
type: string
memoryProvider:
description: 'Deprecated: MemoryProvider is ignored and always
interpreted as equal to VirtioMem.'
enum:
- DIMMSlots
- VirtioMem
Expand Down Expand Up @@ -2869,6 +2871,8 @@ spec:
extraNetMask:
type: string
memoryProvider:
description: 'Deprecated: MemoryProvider is ignored and always interpreted
as equal to VirtioMem.'
enum:
- DIMMSlots
- VirtioMem
Expand Down
6 changes: 0 additions & 6 deletions neonvm/controllers/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ import (
"time"

"k8s.io/apimachinery/pkg/types"

vmv1 "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1"
)

// ReconcilerConfig stores shared configuration for VirtualMachineReconciler and
Expand Down Expand Up @@ -43,10 +41,6 @@ type ReconcilerConfig struct {
// used in setting up the VM disks via QEMU's `-drive` flag.
QEMUDiskCacheSettings string

// DefaultMemoryProvider is the memory provider (dimm slots or virtio-mem) that will be used for
// new VMs (or, when old ones restart) if nothing is explicitly set.
DefaultMemoryProvider vmv1.MemoryProvider

// MemhpAutoMovableRatio specifies the value that new neonvm-runners will set as the
// kernel's 'memory_hotplug.auto_movable_ratio', iff the memory provider is virtio-mem.
//
Expand Down
107 changes: 18 additions & 89 deletions neonvm/controllers/vm_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"encoding/base64"
"encoding/json"
"encoding/pem"
"errors"
"fmt"
"io"
"net/http"
Expand Down Expand Up @@ -330,11 +331,10 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
vm.Status.SSHSecretName = fmt.Sprintf("ssh-neonvm-%s", vm.Name)
}

// Set memory provider for old VMs that don't have it in the Status.
if vm.Status.PodName != "" && vm.Status.MemoryProvider == nil {
oldMemProvider := vmv1.MemoryProviderDIMMSlots
log.Error(nil, "Setting default MemoryProvider for VM", "MemoryProvider", oldMemProvider)
vm.Status.MemoryProvider = lo.ToPtr(oldMemProvider)
// DIMMSlots memory provider is deprecated, and assumed to never be used.
//nolint:staticcheck // We know it's deprecated. That's why we're checking it.
if vm.Status.MemoryProvider != nil && *vm.Status.MemoryProvider == vmv1.MemoryProviderDIMMSlots {
return errors.New("DIMMSlots memory provider is deprecated and disabled")
}

switch vm.Status.Phase {
Expand Down Expand Up @@ -374,12 +374,15 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
vm.Status.Phase = vmv1.VmPending
case vmv1.VmPending:
// Generate runner pod name and set desired memory provider.
// Together with Status.MemoryProvider set for PodName != "" above,
// It is now guaranteed to have Status.MemoryProvider != nil
if len(vm.Status.PodName) == 0 {
vm.Status.PodName = names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-", vm.Name))
//nolint:staticcheck // We know it's deprecated. That's why we're checking it.
if vm.Status.MemoryProvider == nil {
vm.Status.MemoryProvider = lo.ToPtr(pickMemoryProvider(r.Config, vm))
if err := vm.Spec.Guest.ValidateMemorySize(); err != nil {
return fmt.Errorf("Failed to set memoryProvider for VM: %w", err)
}
//nolint:staticcheck // We know it's deprecated. That's why we're checking it.
vm.Status.MemoryProvider = lo.ToPtr(vmv1.MemoryProviderVirtioMem)
}
// Update the .Status on API Server to avoid creating multiple pods for a single VM
// See https://github.com/neondatabase/autoscaling/issues/794 for the context
Expand All @@ -388,8 +391,6 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
}
}

memoryProvider := *vm.Status.MemoryProvider

// Check if the runner pod already exists, if not create a new one
vmRunner := &corev1.Pod{}
err := r.Get(ctx, types.NamespacedName{Name: vm.Status.PodName, Namespace: vm.Namespace}, vmRunner)
Expand Down Expand Up @@ -423,7 +424,7 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
}

// Define a new pod
pod, err := r.podForVirtualMachine(vm, memoryProvider, sshSecret)
pod, err := r.podForVirtualMachine(vm, sshSecret)
if err != nil {
log.Error(err, "Failed to define new Pod resource for VirtualMachine")
return err
Expand Down Expand Up @@ -740,19 +741,9 @@ func (r *VMReconciler) doReconcile(ctx context.Context, vm *vmv1.VirtualMachine)
r.updateVMStatusCPU(ctx, vm, vmRunner, pluggedCPU, cgroupUsage)

// do hotplug/unplug Memory
switch *vm.Status.MemoryProvider {
case vmv1.MemoryProviderVirtioMem:
ramScaled, err = r.doVirtioMemScaling(vm)
if err != nil {
return err
}
case vmv1.MemoryProviderDIMMSlots:
ramScaled, err = r.doDIMMSlotsScaling(ctx, vm)
if err != nil {
return err
}
default:
panic(fmt.Errorf("unexpected vm.status.memoryProvider %q", *vm.Status.MemoryProvider))
ramScaled, err = r.doVirtioMemScaling(vm)
if err != nil {
return err
}

// set VM phase to running if everything scaled
Expand Down Expand Up @@ -828,23 +819,6 @@ func propagateRevision(vm *vmv1.VirtualMachine) {
vm.Status.CurrentRevision = &rev
}

func pickMemoryProvider(config *ReconcilerConfig, vm *vmv1.VirtualMachine) vmv1.MemoryProvider {
if p := vm.Spec.Guest.MemoryProvider; p != nil {
return *p
}
if p := vm.Status.MemoryProvider; p != nil {
return *p
}

// Not all configurations are valid for virtio-mem. Only switch to the default as long as it
// won't be invalid:
if err := vm.Spec.Guest.ValidateForMemoryProvider(config.DefaultMemoryProvider); err != nil {
return vmv1.MemoryProviderDIMMSlots
}

return config.DefaultMemoryProvider
}

func (r *VMReconciler) doVirtioMemScaling(vm *vmv1.VirtualMachine) (done bool, _ error) {
targetSlotCount := int(vm.Spec.Guest.MemorySlots.Use - vm.Spec.Guest.MemorySlots.Min)

Expand Down Expand Up @@ -881,47 +855,6 @@ func (r *VMReconciler) doVirtioMemScaling(vm *vmv1.VirtualMachine) (done bool, _
return done, nil
}

func (r *VMReconciler) doDIMMSlotsScaling(ctx context.Context, vm *vmv1.VirtualMachine) (done bool, _ error) {
log := log.FromContext(ctx)

memSlotsMin := vm.Spec.Guest.MemorySlots.Min
targetSlotCount := int(vm.Spec.Guest.MemorySlots.Use - memSlotsMin)

realSlots, err := QmpSetMemorySlots(ctx, vm, targetSlotCount, r.Recorder)
if realSlots < 0 {
return false, err
}

if realSlots != int(targetSlotCount) {
log.Info("Couldn't achieve desired memory slot count, will modify .spec.guest.memorySlots.use instead", "details", err)
// firstly re-fetch VM
if err := r.Get(ctx, types.NamespacedName{Name: vm.Name, Namespace: vm.Namespace}, vm); err != nil {
log.Error(err, "Unable to re-fetch VirtualMachine")
return false, err
}
memorySlotsUseInSpec := vm.Spec.Guest.MemorySlots.Use
memoryPluggedSlots := memSlotsMin + int32(realSlots)
vm.Spec.Guest.MemorySlots.Use = memoryPluggedSlots
if err := r.tryUpdateVM(ctx, vm); err != nil {
log.Error(err, "Failed to update .spec.guest.memorySlots.use",
"old value", memorySlotsUseInSpec,
"new value", memoryPluggedSlots)
return false, err
}
} else {
done = true
}
// get Memory details from hypervisor and update VM status
memorySize, err := QmpGetMemorySize(QmpAddr(vm))
if err != nil {
log.Error(err, "Failed to get Memory details from VirtualMachine", "VirtualMachine", vm.Name)
return false, err
}
// update status by memory sizes used in the VM
r.updateVMStatusMemory(vm, memorySize)
return done, nil
}

type runnerStatusKind string

const (
Expand Down Expand Up @@ -1193,10 +1126,9 @@ func extractVirtualMachineResourcesJSON(spec vmv1.VirtualMachineSpec) string {
// podForVirtualMachine returns a VirtualMachine Pod object
func (r *VMReconciler) podForVirtualMachine(
vm *vmv1.VirtualMachine,
memoryProvider vmv1.MemoryProvider,
sshSecret *corev1.Secret,
) (*corev1.Pod, error) {
pod, err := podSpec(vm, memoryProvider, sshSecret, r.Config)
pod, err := podSpec(vm, sshSecret, r.Config)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -1395,7 +1327,6 @@ func imageForVmRunner() (string, error) {

func podSpec(
vm *vmv1.VirtualMachine,
memoryProvider vmv1.MemoryProvider,
sshSecret *corev1.Secret,
config *ReconcilerConfig,
) (*corev1.Pod, error) {
Expand Down Expand Up @@ -1497,11 +1428,9 @@ func podSpec(
cmd = append(
cmd,
"-qemu-disk-cache-settings", config.QEMUDiskCacheSettings,
"-memory-provider", string(memoryProvider),
"-memory-provider", string(vmv1.MemoryProviderVirtioMem),
"-memhp-auto-movable-ratio", config.MemhpAutoMovableRatio,
)
if memoryProvider == vmv1.MemoryProviderVirtioMem {
cmd = append(cmd, "-memhp-auto-movable-ratio", config.MemhpAutoMovableRatio)
}
// put these last, so that the earlier args are easier to see (because these
// can get quite large)
cmd = append(
Expand Down
41 changes: 12 additions & 29 deletions neonvm/controllers/vmmigration_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,14 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
return r.updateMigrationStatus(ctx, migration)
}

// DIMMSlots memory provider is deprecated, and assumed to never be used.
//nolint:staticcheck // We know it's deprecated. That's why we're checking it.
if vm.Status.MemoryProvider != nil && *vm.Status.MemoryProvider == vmv1.MemoryProviderDIMMSlots {
err := errors.New("DIMMSlots memory provider is deprecated and disabled")
log.Error(err, "Cannot reconcile the migration")
return ctrl.Result{}, err
}

switch migration.Status.Phase {

case "":
Expand Down Expand Up @@ -314,24 +322,6 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
}
log.Info("CPUs in Target runner synced", "TargetPod.Name", migration.Status.TargetPodName)

// do hotplug Memory in targetRunner -- only needed for dimm slots; virtio-mem Just Works™
switch *vm.Status.MemoryProvider {
case vmv1.MemoryProviderVirtioMem:
// ref "Migration works out of the box" - https://lwn.net/Articles/755423/
log.Info(
"No need to sync memory in Target runner because MemoryProvider is VirtioMem",
"TargetPod.Name", migration.Status.TargetPodName,
)
case vmv1.MemoryProviderDIMMSlots:
log.Info("Syncing Memory in Target runner", "TargetPod.Name", migration.Status.TargetPodName)
if err := QmpSyncMemoryToTarget(vm, migration); err != nil {
return ctrl.Result{}, err
}
log.Info("Memory in Target runner synced", "TargetPod.Name", migration.Status.TargetPodName)
default:
panic(fmt.Errorf("unexpected vm.status.memoryProvider %q", *vm.Status.MemoryProvider))
}

// Migrate only running VMs to target with plugged devices
if vm.Status.Phase == vmv1.VmPreMigrating {
// update VM status
Expand Down Expand Up @@ -703,18 +693,11 @@ func (r *VirtualMachineMigrationReconciler) targetPodForVirtualMachine(
migration *vmv1.VirtualMachineMigration,
sshSecret *corev1.Secret,
) (*corev1.Pod, error) {
if vm.Status.MemoryProvider == nil {
return nil, errors.New("cannot create target pod because vm.status.memoryProvider is not set")
if err := vm.Spec.Guest.ValidateMemorySize(); err != nil {
return nil, fmt.Errorf("cannot create target pod because memory is invalid: %w", err)
}
// TODO: this is technically racy because target pod creation happens before we set the
// migration source pod, so in between reading this and starting the migration, it's
// *technically* possible that we create a target pod with a different memory provider than a
// newer source pod.
// Given that this requires (a) restart *during* initial live migration, and (b) that restart to
// change the memory provider, this is low enough risk that it's ok to leave to a follow-up.
memoryProvider := *vm.Status.MemoryProvider

pod, err := podSpec(vm, memoryProvider, sshSecret, r.Config)

pod, err := podSpec(vm, sshSecret, r.Config)
if err != nil {
return nil, err
}
Expand Down
7 changes: 0 additions & 7 deletions neonvm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ func main() {
var enableContainerMgr bool
var disableRunnerCgroup bool
var qemuDiskCacheSettings string
var defaultMemoryProvider vmv1.MemoryProvider
var memhpAutoMovableRatio string
var failurePendingPeriod time.Duration
var failingRefreshInterval time.Duration
Expand Down Expand Up @@ -143,18 +142,13 @@ func main() {
flag.BoolVar(&enableContainerMgr, "enable-container-mgr", false, "Enable crictl-based container-mgr alongside each VM")
flag.BoolVar(&disableRunnerCgroup, "disable-runner-cgroup", false, "Disable creation of a cgroup in neonvm-runner for fractional CPU limiting")
flag.StringVar(&qemuDiskCacheSettings, "qemu-disk-cache-settings", "cache=none", "Set neonvm-runner's QEMU disk cache settings")
flag.Func("default-memory-provider", "Set default memory provider to use for new VMs", defaultMemoryProvider.FlagFunc)
flag.StringVar(&memhpAutoMovableRatio, "memhp-auto-movable-ratio", "301", "For virtio-mem, set VM kernel's memory_hotplug.auto_movable_ratio")
flag.DurationVar(&failurePendingPeriod, "failure-pending-period", 1*time.Minute,
"the period for the propagation of reconciliation failures to the observability instruments")
flag.DurationVar(&failingRefreshInterval, "failing-refresh-interval", 1*time.Minute,
"the interval between consecutive updates of metrics and logs, related to failing reconciliations")
flag.Parse()

if defaultMemoryProvider == "" {
fmt.Fprintln(os.Stderr, "missing required flag '-default-memory-provider'")
os.Exit(1)
}
if disableRunnerCgroup && enableContainerMgr {
fmt.Fprintln(os.Stderr, "Cannot have both '-enable-container-mgr' and '-disable-runner-cgroup'")
os.Exit(1)
Expand Down Expand Up @@ -216,7 +210,6 @@ func main() {
MaxConcurrentReconciles: concurrencyLimit,
SkipUpdateValidationFor: skipUpdateValidationFor,
QEMUDiskCacheSettings: qemuDiskCacheSettings,
DefaultMemoryProvider: defaultMemoryProvider,
MemhpAutoMovableRatio: memhpAutoMovableRatio,
FailurePendingPeriod: failurePendingPeriod,
FailingRefreshInterval: failingRefreshInterval,
Expand Down
1 change: 0 additions & 1 deletion tests/e2e/autoscaling.default/00-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,3 @@ status:
status: "True"
cpus: 250m
memorySize: 1Gi
memoryProvider: DIMMSlots
Loading

0 comments on commit 78ae0eb

Please sign in to comment.