From d5e6dc605001f1750bd6006972f02a53a4039340 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Tue, 22 Oct 2024 13:08:48 +0200 Subject: [PATCH 1/6] ci: add a test that ensures that KVM_CLOCK_CTRL works Launch a script in the guest that continuously calls `ls -R /` and on the host side, continuously pause and resume the microVM trying to cause an RCU soft lockup. Signed-off-by: Babis Chalios --- .../functional/test_pause_resume.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/integration_tests/functional/test_pause_resume.py b/tests/integration_tests/functional/test_pause_resume.py index 4210e3a0d7f..3d0ac124c11 100644 --- a/tests/integration_tests/functional/test_pause_resume.py +++ b/tests/integration_tests/functional/test_pause_resume.py @@ -2,6 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 """Basic tests scenarios for snapshot save/restore.""" +import platform +import time + import pytest @@ -127,3 +130,38 @@ def test_pause_resume_preboot(uvm_nano): # Try to resume microvm when not running, it must fail. with pytest.raises(RuntimeError, match=expected_err): basevm.api.vm.patch(state="Resumed") + + +@pytest.mark.skipif( + platform.machine() != "x86_64", reason="Only x86_64 supports pvclocks." +) +def test_kvmclock_ctrl(uvm_plain_any): + """ + Test that pausing vCPUs does not trigger a soft lock-up + """ + + microvm = uvm_plain_any + microvm.help.enable_console() + microvm.spawn() + microvm.basic_config() + microvm.add_net_iface() + microvm.start() + + # Launch reproducer in host + # This launches `ls -R /` in a loop inside the guest. The command writes its output in the + # console. This detail is important as it writing in the console seems to increase the probability + # that we will pause the execution inside the kernel and cause a lock up. Setting KVM_CLOCK_CTRL + # bit that informs the guest we're pausing the vCPUs, should avoid that lock up. + microvm.ssh.check_output( + "timeout 60 sh -c 'while true; do ls -R /; done' > /dev/ttyS0 2>&1 < /dev/null &" + ) + + for _ in range(12): + microvm.api.vm.patch(state="Paused") + time.sleep(5) + microvm.api.vm.patch(state="Resumed") + + dmesg = microvm.ssh.check_output("dmesg").stdout + assert "rcu_sched self-detected stall on CPU" not in dmesg + assert "rcu_preempt detected stalls on CPUs/tasks" not in dmesg + assert "BUG: soft lockup -" not in dmesg From 8caefa5baefd8c58ce8c03e6ba8f5a01bece9614 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Fri, 16 Feb 2024 17:53:22 +0000 Subject: [PATCH 2/6] fix(vmm): call KVMCLOCK_CTRL when pausing This is to avoid guest kernel panic on resume path due to softlockup detection. Signed-off-by: Nikita Kalyazin Signed-off-by: Babis Chalios --- src/vmm/src/vstate/vcpu/mod.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index 43a0946931e..83362aa37a4 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -336,8 +336,15 @@ impl Vcpu { .send(VcpuResponse::Paused) .expect("vcpu channel unexpectedly closed"); - // TODO: we should call `KVM_KVMCLOCK_CTRL` here to make sure - // TODO continued: the guest soft lockup watchdog does not panic on Resume. + // Calling `KVM_KVMCLOCK_CTRL` to make sure the guest softlockup watchdog + // does not panic on resume, see https://docs.kernel.org/virt/kvm/api.html . + // We do not want to fail if the call is not successful, because depending + // that may be acceptable depending on the workload. + // TODO: add a metric + #[cfg(target_arch = "x86_64")] + if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() { + warn!("KVM_KVMCLOCK_CTRL call failed {}", err); + } // Move to 'paused' state. state = StateMachine::next(Self::paused); From e4b66adb109afe2f1663cfd4255307563c673676 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Mon, 19 Feb 2024 15:48:55 +0000 Subject: [PATCH 3/6] fix(seccomp): add KVMCLOCK_CTRL ioctl on x86_64 This is to be able to call KVMCLOCK_CTRL ioctl in a vCPU thread. Signed-off-by: Nikita Kalyazin Signed-off-by: Babis Chalios --- resources/seccomp/x86_64-unknown-linux-musl.json | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index 75c9afa02f0..861b69c6b44 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -1254,6 +1254,18 @@ } ] }, + { + "syscall": "ioctl", + "args": [ + { + "index": 1, + "type": "dword", + "op": "eq", + "val": 44717, + "comment": "KVM_KVMCLOCK_CTRL. We call this after pausing vCPUs to avoid soft lockups in the guest." + } + ] + }, { "syscall": "sched_yield", "comment": "Used by the rust standard library in std::sync::mpmc. Firecracker uses mpsc channels from this module for inter-thread communication" From badc4ccd42815a298b1fa1efff5aee15af33e32c Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 23 Oct 2024 11:32:59 +0200 Subject: [PATCH 4/6] metrics: add metric for KVM_KVMCLOCK_CTRL ioctl failures Add a metric that counts KVM_KVMCLOCK_CTRL ioctl call failures. These failures might happen because the guest simply doesn't use the KVM clock. In these cases, the metric will increase expectedly. Otherwise, non-zero values will indicate something actually going wrong. Signed-off-by: Babis Chalios --- src/vmm/src/logger/metrics.rs | 3 +++ src/vmm/src/vstate/vcpu/mod.rs | 2 +- tests/host_tools/fcmetrics.py | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index 083881645a5..bcde569115e 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -779,6 +779,8 @@ pub struct VcpuMetrics { pub exit_mmio_write: SharedIncMetric, /// Number of errors during this VCPU's run. pub failures: SharedIncMetric, + /// Number of times that the `KVM_KVMCLOCK_CTRL` ioctl failed. + pub kvmclock_ctrl_fails: SharedIncMetric, /// Provides Min/max/sum for KVM exits handling input IO. pub exit_io_in_agg: LatencyAggregateMetrics, /// Provides Min/max/sum for KVM exits handling output IO. @@ -797,6 +799,7 @@ impl VcpuMetrics { exit_mmio_read: SharedIncMetric::new(), exit_mmio_write: SharedIncMetric::new(), failures: SharedIncMetric::new(), + kvmclock_ctrl_fails: SharedIncMetric::new(), exit_io_in_agg: LatencyAggregateMetrics::new(), exit_io_out_agg: LatencyAggregateMetrics::new(), exit_mmio_read_agg: LatencyAggregateMetrics::new(), diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index 83362aa37a4..cb63afa4579 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -340,9 +340,9 @@ impl Vcpu { // does not panic on resume, see https://docs.kernel.org/virt/kvm/api.html . // We do not want to fail if the call is not successful, because depending // that may be acceptable depending on the workload. - // TODO: add a metric #[cfg(target_arch = "x86_64")] if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() { + METRICS.vcpu.kvmclock_ctrl_fails.inc(); warn!("KVM_KVMCLOCK_CTRL call failed {}", err); } diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index 457b3c65aa2..e33e89089dc 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -234,6 +234,7 @@ def validate_fc_metrics(metrics): "exit_mmio_read", "exit_mmio_write", "failures", + "kvmclock_ctrl_fails", {"exit_io_in_agg": latency_agg_metrics_fields}, {"exit_io_out_agg": latency_agg_metrics_fields}, {"exit_mmio_read_agg": latency_agg_metrics_fields}, From 2d5f1414b654572fdc0dbd4cd6e69e1a7450536f Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 23 Oct 2024 11:54:39 +0200 Subject: [PATCH 5/6] doc: add CHANGELOG entry for KVM_KVMCLOCK_CTRL ioctl Mention that we now call KVM_KVMCLOCK_CTRL ioctl on x86_64 after pausing vCPUs. Clarify that failures to call this ioctl are not fatal and that we log the failure and increase a metric when these happen. Signed-off-by: Babis Chalios --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7f50678de4..c29a94cbdd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,15 @@ and this project adheres to `VIRTIO_NET_F_RX_MRGBUF` support to the `virtio-net` device. When this feature is negotiated, guest `virtio-net` driver can perform more efficient memory management which in turn improves RX and TX performance. +- [#4460](https://github.com/firecracker-microvm/firecracker/pull/4460): Add a + call to + [`KVM_KVMCLOCK_CTRL`](https://docs.kernel.org/virt/kvm/api.html#kvm-kvmclock-ctrl) + after pausing vCPUs on x86_64 architectures. This ioctl sets a flag in the KVM + state of the vCPU indicating that it has been paused by the host userspace. In + guests that use kvmclock, the soft lockup watchdog checks this flag. If it is + set, it won't trigger the lockup condition. Calling the ioctl for guests that + don't use kvmclock will fail. These failures are not fatal. We log the failure + and increase the `vcpu.kvmclock_ctrl_fails` metric. ### Changed From bc6417ca8eb360ec9dba833223fb9a83a7ca4c46 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 23 Oct 2024 12:01:14 +0200 Subject: [PATCH 6/6] fix(ci): condition for a non-existent process `pidfd_open` will fail if there is not a process with the requested PID. According to `man pidfd_open(2)`, it will return EINVAL when `PID` is not valid and `ESRCH` when the `PID` does not exist. Right now, we were checking only for the latter condition. Change the logic to also care for the former, which materializes as an OSError exception with errno == EINVAL. Signed-off-by: Babis Chalios --- tests/framework/utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/framework/utils.py b/tests/framework/utils.py index 88e520fa5f3..dbae40016ea 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -1,6 +1,7 @@ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Generic utility functions that are used in the framework.""" +import errno import functools import json import logging @@ -453,7 +454,9 @@ def get_process_pidfd(pid): """Get a pidfd file descriptor for the process with PID `pid` Will return a pid file descriptor for the process with PID `pid` if it is - still alive. If the process has already exited it will return `None`. + still alive. If the process has already exited we will receive either a + `ProcessLookupError` exception or and an `OSError` exception with errno `EINVAL`. + In these cases, we will return `None`. Any other error while calling the system call, will raise an OSError exception. @@ -462,6 +465,11 @@ def get_process_pidfd(pid): pidfd = os.pidfd_open(pid) except ProcessLookupError: return None + except OSError as err: + if err.errno == errno.EINVAL: + return None + + raise return pidfd