diff --git a/neonvm/config/controller/deployment.yaml b/neonvm/config/controller/deployment.yaml index e1d489e78..5501e091a 100644 --- a/neonvm/config/controller/deployment.yaml +++ b/neonvm/config/controller/deployment.yaml @@ -60,6 +60,7 @@ spec: # * cache.no-flush=on - ignores disk flush operations (not needed; our disks are ephemeral) - "--qemu-disk-cache-settings=cache.writeback=on,cache.direct=on,cache.no-flush=on" - "--default-memory-provider=DIMMSlots" + - "--memhp-auto-movable-ratio=801" # for virtio-mem, set memory_hotplug.auto_movable_ratio=801 - "--failure-pending-period=1m" - "--failing-refresh-interval=15s" env: diff --git a/neonvm/controllers/config.go b/neonvm/controllers/config.go index 2b50a2c34..e4358e752 100644 --- a/neonvm/controllers/config.go +++ b/neonvm/controllers/config.go @@ -33,6 +33,14 @@ type ReconcilerConfig struct { // new VMs (or, when old ones restart) if nothing is explicitly set. DefaultMemoryProvider vmv1.MemoryProvider + // MemhpAutoMovableRatio specifies the value that new neonvm-runners will set as the + // kernel's 'memory_hotplug.auto_movable_ratio', iff the memory provider is virtio-mem. + // + // This value is passed directly to neonvm-runner as the '-memhp-auto-movable-ratio' flag. + // We've confirmed sensible values are from 301 to 801 (i.e. 3.01:1 through 8.01:1). + // The range of sensible values may extend further, but we have not tested that. + MemhpAutoMovableRatio string + // FailurePendingPeriod is the period for the propagation of // reconciliation failures to the observability instruments FailurePendingPeriod time.Duration diff --git a/neonvm/controllers/functests/vm_controller_test.go b/neonvm/controllers/functests/vm_controller_test.go index 987a4b6e5..94832d641 100644 --- a/neonvm/controllers/functests/vm_controller_test.go +++ b/neonvm/controllers/functests/vm_controller_test.go @@ -112,6 +112,7 @@ var _ = Describe("VirtualMachine controller", func() { MaxConcurrentReconciles: 1, QEMUDiskCacheSettings: "cache=none", DefaultMemoryProvider: vmv1.MemoryProviderDIMMSlots, + MemhpAutoMovableRatio: "301", FailurePendingPeriod: 1 * time.Minute, FailingRefreshInterval: 1 * time.Minute, }, diff --git a/neonvm/controllers/vm_controller.go b/neonvm/controllers/vm_controller.go index 688d14d16..fc2bb3d65 100644 --- a/neonvm/controllers/vm_controller.go +++ b/neonvm/controllers/vm_controller.go @@ -1461,8 +1461,6 @@ func podSpec( }}, Command: func() []string { cmd := []string{"runner"} - // intentionally add this first, so it's easier to see among the very long - // args that follow. if config.UseContainerMgr { cmd = append(cmd, "-skip-cgroup-management") } @@ -1470,6 +1468,14 @@ func podSpec( cmd, "-qemu-disk-cache-settings", config.QEMUDiskCacheSettings, "-memory-provider", string(memoryProvider), + ) + if memoryProvider == vmv1.MemoryProviderVirtioMem { + cmd = append(cmd, "-memhp-auto-movable-ratio", config.MemhpAutoMovableRatio) + } + // put these last, so that the earlier args are easier to see (because these + // can get quite large) + cmd = append( + cmd, "-vmspec", base64.StdEncoding.EncodeToString(vmSpecJson), "-vmstatus", base64.StdEncoding.EncodeToString(vmStatusJson), ) diff --git a/neonvm/controllers/vm_controller_unit_test.go b/neonvm/controllers/vm_controller_unit_test.go index 8afb9ede1..1478485dc 100644 --- a/neonvm/controllers/vm_controller_unit_test.go +++ b/neonvm/controllers/vm_controller_unit_test.go @@ -117,6 +117,7 @@ func newTestParams(t *testing.T) *testParams { MaxConcurrentReconciles: 10, QEMUDiskCacheSettings: "", DefaultMemoryProvider: vmv1.MemoryProviderDIMMSlots, + MemhpAutoMovableRatio: "301", FailurePendingPeriod: time.Minute, FailingRefreshInterval: time.Minute, }, diff --git a/neonvm/main.go b/neonvm/main.go index 3fc96bce1..58d798886 100644 --- a/neonvm/main.go +++ b/neonvm/main.go @@ -99,6 +99,7 @@ func main() { var enableContainerMgr bool var qemuDiskCacheSettings string var defaultMemoryProvider vmv1.MemoryProvider + var memhpAutoMovableRatio string var failurePendingPeriod time.Duration var failingRefreshInterval time.Duration flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") @@ -110,6 +111,7 @@ func main() { flag.BoolVar(&enableContainerMgr, "enable-container-mgr", false, "Enable crictl-based container-mgr alongside each VM") flag.StringVar(&qemuDiskCacheSettings, "qemu-disk-cache-settings", "cache=none", "Set neonvm-runner's QEMU disk cache settings") flag.Func("default-memory-provider", "Set default memory provider to use for new VMs", defaultMemoryProvider.FlagFunc) + flag.StringVar(&memhpAutoMovableRatio, "memhp-auto-movable-ratio", "301", "For virtio-mem, set VM kernel's memory_hotplug.auto_movable_ratio") flag.DurationVar(&failurePendingPeriod, "failure-pending-period", 1*time.Minute, "the period for the propagation of reconciliation failures to the observability instruments") flag.DurationVar(&failingRefreshInterval, "failing-refresh-interval", 1*time.Minute, @@ -175,6 +177,7 @@ func main() { MaxConcurrentReconciles: concurrencyLimit, QEMUDiskCacheSettings: qemuDiskCacheSettings, DefaultMemoryProvider: defaultMemoryProvider, + MemhpAutoMovableRatio: memhpAutoMovableRatio, FailurePendingPeriod: failurePendingPeriod, FailingRefreshInterval: failingRefreshInterval, } diff --git a/neonvm/runner/main.go b/neonvm/runner/main.go index 8a8d02a63..abd950069 100644 --- a/neonvm/runner/main.go +++ b/neonvm/runner/main.go @@ -596,6 +596,7 @@ type Config struct { skipCgroupManagement bool diskCacheSettings string memoryProvider vmv1.MemoryProvider + autoMovableRatio string } func newConfig(logger *zap.Logger) *Config { @@ -607,6 +608,7 @@ func newConfig(logger *zap.Logger) *Config { skipCgroupManagement: false, diskCacheSettings: "cache=none", memoryProvider: "", // Require that this is explicitly set. We'll check later. + autoMovableRatio: "", // Require that this is explicitly set IFF memoryProvider is VirtioMem. We'll check later. } flag.StringVar(&cfg.vmSpecDump, "vmspec", cfg.vmSpecDump, "Base64 encoded VirtualMachine json specification") @@ -622,11 +624,17 @@ func newConfig(logger *zap.Logger) *Config { flag.StringVar(&cfg.diskCacheSettings, "qemu-disk-cache-settings", cfg.diskCacheSettings, "Cache settings to add to -drive args for VM disks") flag.Func("memory-provider", "Set provider for memory hotplug", cfg.memoryProvider.FlagFunc) + flag.StringVar(&cfg.autoMovableRatio, "memhp-auto-movable-ratio", + cfg.autoMovableRatio, "Set value of kernel's memory_hotplug.auto_movable_ratio [virtio-mem only]") + flag.Parse() if cfg.memoryProvider == "" { logger.Fatal("missing required flag '-memory-provider'") } + if cfg.memoryProvider == vmv1.MemoryProviderVirtioMem && cfg.autoMovableRatio == "" { + logger.Fatal("missing required flag '-memhp-auto-movable-ratio'") + } return cfg } @@ -911,12 +919,23 @@ func buildQEMUCmd( } const ( - baseKernelCmdline = "panic=-1 init=/neonvm/bin/init memhp_default_state=online_movable console=ttyS1 loglevel=7 root=/dev/vda rw" + baseKernelCmdline = "panic=-1 init=/neonvm/bin/init console=ttyS1 loglevel=7 root=/dev/vda rw" + kernelCmdlineDIMMSlots = "memhp_default_state=online_movable" + kernelCmdlineVirtioMemTmpl = "memhp_default_state=online memory_hotplug.online_policy=auto-movable memory_hotplug.auto_movable_ratio=%s" ) func makeKernelCmdline(cfg *Config, vmSpec *vmv1.VirtualMachineSpec, vmStatus *vmv1.VirtualMachineStatus) string { cmdlineParts := []string{baseKernelCmdline} + switch cfg.memoryProvider { + case vmv1.MemoryProviderDIMMSlots: + cmdlineParts = append(cmdlineParts, kernelCmdlineDIMMSlots) + case vmv1.MemoryProviderVirtioMem: + cmdlineParts = append(cmdlineParts, fmt.Sprintf(kernelCmdlineVirtioMemTmpl, cfg.autoMovableRatio)) + default: + panic(fmt.Errorf("unknown memory provider %s", cfg.memoryProvider)) + } + if vmSpec.ExtraNetwork != nil && vmSpec.ExtraNetwork.Enable { netDetails := fmt.Sprintf("ip=%s:::%s:%s:eth1:off", vmStatus.ExtraNetIP, vmStatus.ExtraNetMask, vmStatus.PodName) cmdlineParts = append(cmdlineParts, netDetails)