Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vCPU Hotplug Performance Tests #4722

Draft
wants to merge 2 commits into
base: feature/vcpu-hotplug
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/vmm/src/devices/pseudo/boot_timer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE: u8 = 123;
/// Pseudo device to record the kernel boot time.
#[derive(Debug)]
pub struct BootTimer {
start_ts: TimestampUs,
pub start_ts: TimestampUs,
}

impl BootTimer {
Expand Down
7 changes: 7 additions & 0 deletions src/vmm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,13 @@

self.acpi_device_manager.notify_cpu_container()?;

if let Some(devices::BusDevice::BootTimer(timer)) =

Check warning on line 686 in src/vmm/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/lib.rs#L686

Added line #L686 was not covered by tests
self.get_bus_device(DeviceType::BootTimer, "BootTimer")
{
let mut locked_timer = timer.lock().expect("Poisoned lock");
locked_timer.start_ts = utils::time::TimestampUs::default();

Check warning on line 690 in src/vmm/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/lib.rs#L688-L690

Added lines #L688 - L690 were not covered by tests
}

Ok(new_machine_config)
}

Expand Down
3 changes: 3 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,9 @@ def rootfs_fxt(request, record_property):
guest_kernel_linux_5_10 = pytest.fixture(
guest_kernel_fxt, params=kernel_params("vmlinux-5.10*")
)
guest_kernel_linux_acpi_only = pytest.fixture(
guest_kernel_fxt, params=kernel_params("vmlinux-5.10.219")
)
# Use the unfiltered selector, since we don't officially support 6.1 yet.
# TODO: switch to default selector once we add full 6.1 support.
guest_kernel_linux_6_1 = pytest.fixture(
Expand Down
1 change: 1 addition & 0 deletions tests/framework/http_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,4 @@ def __init__(self, api_usocket_full_name):
self.snapshot_load = Resource(self, "/snapshot/load")
self.cpu_config = Resource(self, "/cpu-config")
self.entropy = Resource(self, "/entropy")
self.hotplug = Resource(self, "/hotplug")
1 change: 1 addition & 0 deletions tests/host_tools/1-cpu-hotplug.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SUBSYSTEM=="cpu", ACTION=="add", ATTR{online}!="1", ATTR{online}="1"
17 changes: 17 additions & 0 deletions tests/host_tools/hotplug.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

while :; do
[[ -d /sys/devices/system/cpu/cpu$1 ]] && break
done

for i in $(seq 1 $1); do
echo 1 >/sys/devices/system/cpu/cpu$i/online
done

while :; do
[[ $(nproc) == $((1 + $1)) ]] && break
done

/home/hotplug_time.o
33 changes: 33 additions & 0 deletions tests/host_tools/hotplug_time.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

// Init wrapper for boot timing. It points at /sbin/init.

#include <fcntl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>

// Base address values are defined in arch/src/lib.rs as arch::MMIO_MEM_START.
// Values are computed in arch/src/<arch>/mod.rs from the architecture layouts.
// Position on the bus is defined by MMIO_LEN increments, where MMIO_LEN is
// defined as 0x1000 in vmm/src/device_manager/mmio.rs.
#ifdef __x86_64__
#define MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE 0xd0000000
#endif
#ifdef __aarch64__
#define MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE 0x40000000
#endif

#define MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE 123

int main() {
int fd = open("/dev/mem", (O_RDWR | O_SYNC | O_CLOEXEC));
int mapped_size = getpagesize();

char *map_base = mmap(NULL, mapped_size, PROT_WRITE, MAP_SHARED, fd,
MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE);

*map_base = MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE;
msync(map_base, mapped_size, MS_ASYNC);
}
9 changes: 9 additions & 0 deletions tests/host_tools/hotplug_udev.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

while :; do
[[ $(nproc) == $((1 + $1)) ]] && break
done

/home/hotplug_time.o
195 changes: 195 additions & 0 deletions tests/integration_tests/performance/test_vcpu_hotplug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

"""Testing hotplug performance"""

import os
import platform
import re
import time
from pathlib import Path

import pandas
import pytest

from framework.utils_cpuid import check_guest_cpuid_output
from host_tools.cargo_build import gcc_compile


@pytest.mark.nonci
@pytest.mark.skipif(
platform.machine() != "x86_64", reason="Hotplug only enabled on x86_64."
)
@pytest.mark.parametrize(
"vcpu_count", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
)
def test_custom_udev_rule_latency(
microvm_factory,
guest_kernel_linux_6_1,
rootfs_rw,
vcpu_count,
results_dir,
test_fc_session_root_path,
):
"""Test the latency for hotplugging and booting CPUs in the guest"""
hotplug_time_path = os.path.join(test_fc_session_root_path, "hotplug_time.o")
gcc_compile(Path("./host_tools/hotplug_time.c"), hotplug_time_path)
data = []
for _ in range(20):
uvm_hotplug = microvm_factory.build(guest_kernel_linux_6_1, rootfs_rw)
uvm_hotplug.jailer.extra_args.update({"boot-timer": None, "no-seccomp": None})
uvm_hotplug.help.enable_console()
uvm_hotplug.spawn()
uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128)
uvm_hotplug.add_net_iface()
uvm_hotplug.start()
uvm_hotplug.ssh.scp_put(
Path("./host_tools/hotplug_udev.sh"), Path("/home/hotplug_udev.sh")
)
uvm_hotplug.ssh.scp_put(hotplug_time_path, Path("/home/hotplug_time.o"))
uvm_hotplug.ssh.scp_put(
Path("./host_tools/1-cpu-hotplug.rules"),
Path("/usr/lib/udev/rules.d/1-cpu-hotplug.rules"),
)
uvm_hotplug.ssh.run(
f"udevadm control --reload-rules && tmux new-session -d /bin/bash /home/hotplug_udev.sh {vcpu_count}"
)

uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count})
time.sleep(5)

# Extract API call duration
api_duration = (
float(
re.findall(
r"Total previous API call duration: (\d+) us\.",
uvm_hotplug.log_data,
)[-1]
)
/ 1000
)
try:
timestamp = (
float(
re.findall(
r"Guest-boot-time\s+\=\s+(\d+)\s+us", uvm_hotplug.log_data
)[0]
)
/ 1000
)
except IndexError:
uvm_hotplug.kill()
data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": None})
continue

data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": timestamp})

check_guest_cpuid_output(
uvm_hotplug,
"lscpu",
None,
":",
{
"CPU(s)": str(1 + vcpu_count),
"On-line CPU(s) list": f"0-{vcpu_count}",
},
)
uvm_hotplug.kill()

output_file = results_dir / f"hotplug-{vcpu_count}.csv"

csv_data = pandas.DataFrame.from_dict(data).to_csv(
index=False,
float_format="%.3f",
)

output_file.write_text(csv_data)


@pytest.mark.nonci
@pytest.mark.skipif(
platform.machine() != "x86_64", reason="Hotplug only enabled on x86_64."
)
@pytest.mark.parametrize(
"vcpu_count", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
)
def test_manual_latency(
microvm_factory,
guest_kernel_linux_6_1,
rootfs_rw,
vcpu_count,
results_dir,
test_fc_session_root_path,
):
"""Test the latency for hotplugging and booting CPUs in the guest"""

hotplug_time_path = os.path.join(test_fc_session_root_path, "hotplug_time.o")
gcc_compile(Path("./host_tools/hotplug_time.c"), hotplug_time_path)
data = []
for _ in range(20):
uvm_hotplug = microvm_factory.build(guest_kernel_linux_6_1, rootfs_rw)
uvm_hotplug.jailer.extra_args.update({"boot-timer": None, "no-seccomp": None})
uvm_hotplug.help.enable_console()
uvm_hotplug.spawn()
uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128)
uvm_hotplug.add_net_iface()
uvm_hotplug.start()

uvm_hotplug.ssh.scp_put(
Path("./host_tools/hotplug.sh"), Path("/home/hotplug.sh")
)
uvm_hotplug.ssh.scp_put(hotplug_time_path, Path("/home/hotplug_time.o"))
uvm_hotplug.ssh.run(
f"tmux new-session -d /bin/bash /home/hotplug.sh {vcpu_count}"
)

uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count})

time.sleep(5)
# Extract API call duration
api_duration = (
float(
re.findall(
r"Total previous API call duration: (\d+) us\.",
uvm_hotplug.log_data,
)[-1]
)
/ 1000
)
try:
timestamp = (
float(
re.findall(
r"Guest-boot-time\s+\=\s+(\d+)\s+us", uvm_hotplug.log_data
)[0]
)
/ 1000
)
except IndexError:
data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": None})
uvm_hotplug.kill()
continue

data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": timestamp})

check_guest_cpuid_output(
uvm_hotplug,
"lscpu",
None,
":",
{
"CPU(s)": str(1 + vcpu_count),
"On-line CPU(s) list": f"0-{vcpu_count}",
},
)

uvm_hotplug.kill()

output_file = results_dir / f"hotplug-{vcpu_count}.csv"

csv_data = pandas.DataFrame.from_dict(data).to_csv(
index=False,
float_format="%.3f",
)

output_file.write_text(csv_data)
Loading