Merge branch 'main' into dependabot/cargo/firecracker-35cb359f3a

firecracker-microvm · Sep 30, 2023 · 6d29d1d · 6d29d1d
2 parents a8dfaf3 + 3d2a61f
commit 6d29d1d
Show file tree

Hide file tree

Showing 8 changed files with 466 additions and 63 deletions.
diff --git a/.buildkite/pipeline_ab.py b/.buildkite/pipeline_ab.py
@@ -8,6 +8,18 @@
 from common import COMMON_PARSER, group, overlay_dict, pipeline_to_json
 
 perf_test = {
+    "block": {
+        "label": "🖴 Block Performance",
+        "test_path": "integration_tests/performance/test_block_ab.py",
+        "devtool_opts": "-c 1-10 -m 0",
+        "timeout_in_minutes": 40,
+    },
+    "network-latency": {
+        "label": "🖧 Network Latency",
+        "test_path": "integration_tests/performance/test_network_ab.py",
+        "devtool_opts": "-c 1-10 -m 0",
+        "timeout_in_minutes": 30,
+    },
     "snapshot-latency": {
         "label": "📸 Snapshot Latency",
         "test_path": "integration_tests/performance/test_snapshot_ab.py",

diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py
@@ -360,6 +360,19 @@ def pid_in_new_ns(self):
         # Read the PID stored inside the file.
         return int(pid_file_path.read_text(encoding="ascii"))
 
+    @property
+    def dimensions(self):
+        """Gets a default set of cloudwatch dimensions describing the configuration of this microvm"""
+        return {
+            "instance": global_props.instance,
+            "cpu_model": global_props.cpu_model,
+            "host_kernel": f"linux-{global_props.host_linux_version}",
+            "guest_kernel": self.kernel_file.stem[2:],
+            "rootfs": self.rootfs_file.name,
+            "vcpus": str(self.vcpus_count),
+            "guest_memory": f"{self.mem_size_bytes / (1024 * 1024)}MB",
+        }
+
     def flush_metrics(self):
         """Flush the microvm metrics and get the latest datapoint"""
         self.api.actions.put(action_type="FlushMetrics")

diff --git a/tests/framework/properties.py b/tests/framework/properties.py
@@ -7,7 +7,7 @@
 """
 Metadata we want to attach to tests for further analysis and troubleshooting
 """
-
+import os
 import platform
 import re
 import subprocess
@@ -57,6 +57,8 @@ def __init__(self):
         self.git_branch = run_cmd("git show -s --pretty=%D HEAD")
         self.git_origin_url = run_cmd("git config --get remote.origin.url")
         self.rust_version = run_cmd("rustc --version |awk '{print $2}'")
+        self.buildkite_pipeline_slug = os.environ.get("BUILDKITE_PIPELINE_SLUG")
+        self.buildkite_build_number = os.environ.get("BUILDKITE_BUILD_NUMBER")
 
         self.environment = self._detect_environment()
         if self.is_ec2:

diff --git a/tests/host_tools/metrics.py b/tests/host_tools/metrics.py
@@ -110,3 +110,89 @@ def emit_raw_emf(emf_msg: dict):
             (json.dumps(emf_msg) + "\n").encode("utf-8"),
             (emf_endpoint.hostname, emf_endpoint.port),
         )
+
+
+UNIT_REDUCTIONS = {
+    "Microseconds": "Milliseconds",
+    "Milliseconds": "Seconds",
+    "Bytes": "Kilobytes",
+    "Kilobytes": "Megabytes",
+    "Megabytes": "Gigabytes",
+    "Gigabytes": "Terabytes",
+    "Bits": "Kilobits",
+    "Kilobits": "Megabits",
+    "Megabits": "Gigabits",
+    "Gigabits": "Terabit",
+    "Bytes/Second": "Kilobytes/Second",
+    "Kilobytes/Second": "Megabytes/Second",
+    "Megabytes/Second": "Gigabytes/Second",
+    "Gigabytes/Second": "Terabytes/Second",
+    "Bits/Second": "Kilobits/Second",
+    "Kilobits/Second": "Megabits/Second",
+    "Megabits/Second": "Gigabits/Second",
+    "Gigabits/Second": "Terabits/Second",
+}
+INV_UNIT_REDUCTIONS = {v: k for k, v in UNIT_REDUCTIONS.items()}
+
+
+UNIT_SHORTHANDS = {
+    "Seconds": "s",
+    "Microseconds": "μs",
+    "Milliseconds": "ms",
+    "Bytes": "B",
+    "Kilobytes": "KB",
+    "Megabytes": "MB",
+    "Gigabytes": "GB",
+    "Terabytes": "TB",
+    "Bits": "Bit",
+    "Kilobits": "KBit",
+    "Megabits": "MBit",
+    "Gigabits": "GBit",
+    "Terabits": "TBit",
+    "Percent": "%",
+    "Count": "",
+    "Bytes/Second": "B/s",
+    "Kilobytes/Second": "KB/s",
+    "Megabytes/Second": "MB/s",
+    "Gigabytes/Second": "GB/s",
+    "Terabytes/Second": "TB/s",
+    "Bits/Second": "Bit/s",
+    "Kilobits/Second": "KBit/s",
+    "Megabits/Second": "MBit/s",
+    "Gigabits/Second": "GBit/s",
+    "Terabits/Second": "TBit/s",
+    "Count/Second": "Hz",
+    "None": "",
+}
+
+
+def reduce_value(value, unit):
+    """
+    Utility function for expressing a value in the largest possible unit in which it would still be >= 1
+
+    For example, `reduce_value(1_000_000, Bytes)` would return (1, Megabytes)
+    """
+    # Could do this recursively, but I am worried about infinite recursion
+    # due to precision problems (e.g. infinite loop of dividing/multiplying by 1000, alternating
+    # between values < 1 and >= 1000).
+    while abs(value) < 1 and unit in INV_UNIT_REDUCTIONS:
+        value *= 1000
+        unit = INV_UNIT_REDUCTIONS[unit]
+    while abs(value) >= 1000 and unit in UNIT_REDUCTIONS:
+        value /= 1000
+        unit = UNIT_REDUCTIONS[unit]
+
+    return value, unit
+
+
+def format_with_reduced_unit(value, unit):
+    """
+    Utility function for pretty printing a given value by choosing a unit as large as possible,
+    and then outputting its shorthand.
+
+    For example, `format_with_reduced_unit(1_000_000, Bytes)` would return "1MB".
+    """
+    reduced_value, reduced_unit = reduce_value(value, unit)
+    formatted_unit = UNIT_SHORTHANDS.get(reduced_unit, reduced_unit)
+
+    return f"{reduced_value:.2f}{formatted_unit}"
diff --git a/tests/integration_tests/performance/test_block_ab.py b/tests/integration_tests/performance/test_block_ab.py
@@ -0,0 +1,176 @@
+# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Performance benchmark for block device emulation."""
+
+import concurrent
+import os
+import shutil
+from pathlib import Path
+
+import pytest
+
+import host_tools.drive as drive_tools
+from framework.utils import CmdBuilder, get_cpu_percent, run_cmd
+
+# size of the block device used in the test, in MB
+BLOCK_DEVICE_SIZE_MB = 2048
+
+# Time (in seconds) for which fio "warms up"
+WARMUP_SEC = 10
+
+# Time (in seconds) for which fio runs after warmup is done
+RUNTIME_SEC = 30
+
+# VM guest memory size
+GUEST_MEM_MIB = 1024
+
+
+def prepare_microvm_for_test(microvm):
+    """Prepares the microvm for running a fio-based performance test by tweaking
+    various performance related parameters."""
+    rc, _, stderr = microvm.ssh.run("echo 'none' > /sys/block/vdb/queue/scheduler")
+    assert rc == 0, stderr
+    assert stderr == ""
+
+    # First, flush all guest cached data to host, then drop guest FS caches.
+    rc, _, stderr = microvm.ssh.run("sync")
+    assert rc == 0, stderr
+    assert stderr == ""
+    rc, _, stderr = microvm.ssh.run("echo 3 > /proc/sys/vm/drop_caches")
+    assert rc == 0, stderr
+    assert stderr == ""
+
+    # Then, flush all host cached data to hardware, also drop host FS caches.
+    run_cmd("sync")
+    run_cmd("echo 3 > /proc/sys/vm/drop_caches")
+
+
+def run_fio(microvm, mode, block_size):
+    """Run a fio test in the specified mode with block size bs."""
+    cmd = (
+        CmdBuilder("fio")
+        .with_arg(f"--name={mode}-{block_size}")
+        .with_arg(f"--rw={mode}")
+        .with_arg(f"--bs={block_size}")
+        .with_arg("--filename=/dev/vdb")
+        .with_arg("--time_base=1")
+        .with_arg(f"--size={BLOCK_DEVICE_SIZE_MB}M")
+        .with_arg("--direct=1")
+        .with_arg("--ioengine=libaio")
+        .with_arg("--iodepth=32")
+        .with_arg(f"--ramp_time={WARMUP_SEC}")
+        .with_arg(f"--numjobs={microvm.vcpus_count}")
+        # Set affinity of the entire fio process to a set of vCPUs equal in size to number of workers
+        .with_arg(
+            f"--cpus_allowed={','.join(str(i) for i in range(microvm.vcpus_count))}"
+        )
+        # Instruct fio to pin one worker per vcpu
+        .with_arg("--cpus_allowed_policy=split")
+        .with_arg("--randrepeat=0")
+        .with_arg(f"--runtime={RUNTIME_SEC}")
+        .with_arg(f"--write_bw_log={mode}")
+        .with_arg("--log_avg_msec=1000")
+        .with_arg("--output-format=json+")
+        .build()
+    )
+
+    logs_path = Path(microvm.jailer.chroot_base_with_id()) / "fio_output"
+
+    if logs_path.is_dir():
+        shutil.rmtree(logs_path)
+
+    logs_path.mkdir()
+
+    prepare_microvm_for_test(microvm)
+
+    # Start the CPU load monitor.
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        cpu_load_future = executor.submit(
+            get_cpu_percent,
+            microvm.jailer_clone_pid,
+            RUNTIME_SEC,
+            omit=WARMUP_SEC,
+        )
+
+        # Print the fio command in the log and run it
+        rc, _, stderr = microvm.ssh.run(f"cd /tmp; {cmd}")
+        assert rc == 0, stderr
+        assert stderr == ""
+
+        microvm.ssh.scp_get("/tmp/*.log", logs_path)
+        rc, _, stderr = microvm.ssh.run("rm /tmp/*.log")
+        assert rc == 0, stderr
+
+        return logs_path, cpu_load_future.result()
+
+
+def process_fio_logs(vm, fio_mode, logs_dir, metrics):
+    """Parses the fio logs in `{logs_dir}/{fio_mode}_bw.*.log and emits their contents as CloudWatch metrics"""
+    for job_id in range(vm.vcpus_count):
+        data = Path(f"{logs_dir}/{fio_mode}_bw.{job_id + 1}.log").read_text("UTF-8")
+
+        for line in data.splitlines():
+            _, value, direction, _ = line.split(",", maxsplit=3)
+            value = int(value.strip())
+
+            # See https://fio.readthedocs.io/en/latest/fio_doc.html#log-file-formats
+            match direction.strip():
+                case "0":
+                    metrics.put_metric("bw_read", value, "Kilobytes/Second")
+                case "1":
+                    metrics.put_metric("bw_write", value, "Kilobytes/Second")
+                case _:
+                    assert False
+
+
+@pytest.mark.nonci
+@pytest.mark.timeout(RUNTIME_SEC * 1000)  # 1.40 hours
+@pytest.mark.parametrize("vcpus", [1, 2], ids=["1vcpu", "2vcpu"])
+@pytest.mark.parametrize("fio_mode", ["randread", "randwrite"])
+@pytest.mark.parametrize("fio_block_size", [4096], ids=["bs4096"])
+def test_block_performance(
+    microvm_factory,
+    guest_kernel,
+    rootfs,
+    vcpus,
+    fio_mode,
+    fio_block_size,
+    io_engine,
+    metrics,
+):
+    """
+    Execute block device emulation benchmarking scenarios.
+    """
+    vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
+    vm.spawn(log_level="Info")
+    vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB)
+    vm.add_net_iface()
+    # Add a secondary block device for benchmark tests.
+    fs = drive_tools.FilesystemFile(
+        os.path.join(vm.fsfiles, "scratch"), BLOCK_DEVICE_SIZE_MB
+    )
+    vm.add_drive("scratch", fs.path, io_engine=io_engine)
+    vm.start()
+
+    # Pin uVM threads to physical cores.
+    assert vm.pin_vmm(0), "Failed to pin firecracker thread."
+    assert vm.pin_api(1), "Failed to pin fc_api thread."
+    for i in range(vm.vcpus_count):
+        assert vm.pin_vcpu(i, i + 2), f"Failed to pin fc_vcpu {i} thread."
+
+    logs_dir, cpu_load = run_fio(vm, fio_mode, fio_block_size)
+
+    process_fio_logs(vm, fio_mode, logs_dir, metrics)
+
+    for cpu_util_data_point in list(cpu_load["firecracker"].values())[0]:
+        metrics.put_metric("cpu_utilization_vmm", cpu_util_data_point, "Percent")
+
+    metrics.set_dimensions(
+        {
+            "performance_test": "test_block_performance",
+            "io_engine": io_engine,
+            "fio_mode": fio_mode,
+            "fio_block_size": str(fio_block_size),
+            **vm.dimensions,
+        }
+    )