Merge branch 'main' into prepare-1-5

firecracker-microvm · Oct 9, 2023 · d67ef18 · d67ef18
2 parents 7c289ee + 9232614
commit d67ef18
Show file tree

Hide file tree

Showing 8 changed files with 182 additions and 90 deletions.
diff --git a/.github/workflows/trigger_ab_tests.yml b/.github/workflows/trigger_ab_tests.yml
@@ -2,6 +2,7 @@ on:
   push:
     branches:
       - main
+      - firecracker-v*
 
 jobs:
   trigger_ab_test:

diff --git a/tests/framework/ab_test.py b/tests/framework/ab_test.py
@@ -100,7 +100,9 @@ def git_ab_test(
         return result_a, result_b, comparison
 
 
-def check_regression(a_samples: List[float], b_samples: List[float]):
+def check_regression(
+    a_samples: List[float], b_samples: List[float], *, n_resamples: int = 9999
+):
     """Checks for a regression by performing a permutation test. A permutation test is a non-parametric test that takes
     three parameters: Two populations (sets of samples) and a function computing a "statistic" based on two populations.
     First, the test computes the statistic for the initial populations. It then randomly
@@ -120,6 +122,7 @@ def check_regression(a_samples: List[float], b_samples: List[float]):
         # Compute the difference of means, such that a positive different indicates potential for regression.
         lambda x, y: statistics.mean(y) - statistics.mean(x),
         vectorized=False,
+        n_resamples=n_resamples,
     )
 
 

diff --git a/tests/framework/utils_iperf.py b/tests/framework/utils_iperf.py
@@ -57,11 +57,13 @@ def run_test(self, first_free_cpu):
         assert self._num_clients < CpuMap.len() - self._microvm.vcpus_count - 2
 
         for server_idx in range(self._num_clients):
-            cmd = self.host_command(server_idx).build()
             assigned_cpu = CpuMap(first_free_cpu)
-            utils.run_cmd(
-                f"taskset --cpu-list {assigned_cpu} {self._microvm.jailer.netns_cmd_prefix()} {cmd}"
+            cmd = (
+                self.host_command(server_idx)
+                .with_arg("--affinity", assigned_cpu)
+                .build()
             )
+            utils.run_cmd(f"{self._microvm.jailer.netns_cmd_prefix()} {cmd}")
             first_free_cpu += 1
 
         time.sleep(SERVER_STARTUP_TIME_SEC)
@@ -105,12 +107,14 @@ def spawn_iperf3_client(self, client_idx):
         mode = MODE_MAP[self._mode][client_idx % len(MODE_MAP[self._mode])]
 
         # Add the port where the iperf3 client is going to send/receive.
-        cmd = self.guest_command(client_idx).with_arg(mode).build()
-
-        pinned_cmd = (
-            f"taskset --cpu-list {client_idx % self._microvm.vcpus_count} {cmd}"
+        cmd = (
+            self.guest_command(client_idx)
+            .with_arg(mode)
+            .with_arg("--affinity", client_idx % self._microvm.vcpus_count)
+            .build()
         )
-        rc, stdout, stderr = self._microvm.ssh.run(pinned_cmd)
+
+        rc, stdout, stderr = self._microvm.ssh.run(cmd)
 
         assert rc == 0, stderr
 
@@ -176,18 +180,24 @@ def emit_iperf3_metrics(metrics, iperf_result, omit):
     )[0]:
         metrics.put_metric("cpu_utilization_vmm", cpu_util_data_point, "Percent")
 
-    for time_series in iperf_result["g2h"]:
-        for interval in time_series["intervals"][omit:]:
-            metrics.put_metric(
-                "throughput_guest_to_host",
-                interval["sum"]["bits_per_second"],
-                "Bits/Second",
-            )
+    data_points = zip(
+        *[time_series["intervals"][omit:] for time_series in iperf_result["g2h"]]
+    )
 
-    for time_series in iperf_result["h2g"]:
-        for interval in time_series["intervals"][omit:]:
-            metrics.put_metric(
-                "throughput_host_to_guest",
-                interval["sum"]["bits_per_second"],
-                "Bits/Second",
-            )
+    for point_in_time in data_points:
+        metrics.put_metric(
+            "throughput_guest_to_host",
+            sum(interval["sum"]["bits_per_second"] for interval in point_in_time),
+            "Bits/Second",
+        )
+
+    data_points = zip(
+        *[time_series["intervals"][omit:] for time_series in iperf_result["h2g"]]
+    )
+
+    for point_in_time in data_points:
+        metrics.put_metric(
+            "throughput_host_to_guest",
+            sum(interval["sum"]["bits_per_second"] for interval in point_in_time),
+            "Bits/Second",
+        )
diff --git a/tests/host_tools/metrics.py b/tests/host_tools/metrics.py
@@ -105,7 +105,8 @@ def emit_raw_emf(emf_msg: dict):
         "AWS_EMF_LOG_GROUP_NAME", f"{namespace}-metrics"
     )
     emf_msg["_aws"]["LogStreamName"] = os.environ.get("AWS_EMF_LOG_STREAM_NAME", "")
-    emf_msg["_aws"]["Namespace"] = namespace
+    for metrics in emf_msg["_aws"]["CloudWatchMetrics"]:
+        metrics["Namespace"] = namespace
 
     emf_endpoint = urlparse(os.environ["AWS_EMF_AGENT_ENDPOINT"])
     with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock:

diff --git a/tests/integration_tests/performance/test_block_ab.py b/tests/integration_tests/performance/test_block_ab.py
@@ -106,25 +106,38 @@ def run_fio(microvm, mode, block_size):
 
 def process_fio_logs(vm, fio_mode, logs_dir, metrics):
     """Parses the fio logs in `{logs_dir}/{fio_mode}_bw.*.log and emits their contents as CloudWatch metrics"""
-    for job_id in range(vm.vcpus_count):
-        data = Path(f"{logs_dir}/{fio_mode}_bw.{job_id + 1}.log").read_text("UTF-8")
 
-        for line in data.splitlines():
+    data = [
+        Path(f"{logs_dir}/{fio_mode}_bw.{job_id + 1}.log")
+        .read_text("UTF-8")
+        .splitlines()
+        for job_id in range(vm.vcpus_count)
+    ]
+
+    for tup in zip(*data):
+        bw_read = 0
+        bw_write = 0
+
+        for line in tup:
             _, value, direction, _ = line.split(",", maxsplit=3)
             value = int(value.strip())
 
             # See https://fio.readthedocs.io/en/latest/fio_doc.html#log-file-formats
             match direction.strip():
                 case "0":
-                    metrics.put_metric("bw_read", value, "Kilobytes/Second")
+                    bw_read += value
                 case "1":
-                    metrics.put_metric("bw_write", value, "Kilobytes/Second")
+                    bw_write += value
                 case _:
                     assert False
 
+        if bw_read:
+            metrics.put_metric("bw_read", bw_read, "Kilobytes/Second")
+        if bw_write:
+            metrics.put_metric("bw_write", bw_write, "Kilobytes/Second")
+
 
 @pytest.mark.nonci
-@pytest.mark.timeout(RUNTIME_SEC * 1000)  # 1.40 hours
 @pytest.mark.parametrize("vcpus", [1, 2], ids=["1vcpu", "2vcpu"])
 @pytest.mark.parametrize("fio_mode", ["randread", "randwrite"])
 @pytest.mark.parametrize("fio_block_size", [4096], ids=["bs4096"])

diff --git a/tests/integration_tests/performance/test_network_ab.py b/tests/integration_tests/performance/test_network_ab.py
@@ -6,7 +6,6 @@
 
 import pytest
 
-from framework.utils import CpuMap
 from framework.utils_iperf import IPerf3Test, emit_iperf3_metrics
 
 # each iteration is 30 * 0.2s = 6s
@@ -46,50 +45,57 @@ def consume_ping_output(ping_putput):
         yield float(time[0])
 
 
-@pytest.mark.nonci
-@pytest.mark.timeout(3600)
-def test_network_latency(microvm_factory, guest_kernel, rootfs, metrics):
-    """
-    Test network latency for multiple vm configurations.
-
-    Send a ping from the guest to the host.
-    """
+@pytest.fixture
+def network_microvm(request, microvm_factory, guest_kernel, rootfs):
+    """Creates a microvm with the networking setup used by the performance tests in this file.
 
+    This fixture receives its vcpu count via indirect parameterization"""
     vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
     vm.spawn(log_level="Info")
-    vm.basic_config(vcpu_count=GUEST_VCPUS, mem_size_mib=GUEST_MEM_MIB)
-    iface = vm.add_net_iface()
+    vm.basic_config(vcpu_count=request.param, mem_size_mib=GUEST_MEM_MIB)
+    vm.add_net_iface()
     vm.start()
 
-    # Check if the needed CPU cores are available. We have the API thread, VMM
-    # thread and then one thread for each configured vCPU.
-    assert CpuMap.len() >= 2 + vm.vcpus_count
-
     # Pin uVM threads to physical cores.
     assert vm.pin_vmm(0), "Failed to pin firecracker thread."
     assert vm.pin_api(1), "Failed to pin fc_api thread."
     for i in range(vm.vcpus_count):
         assert vm.pin_vcpu(i, i + 2), f"Failed to pin fc_vcpu {i} thread."
 
+    return vm
+
+
+@pytest.mark.nonci
+@pytest.mark.parametrize("network_microvm", [1], indirect=True)
+def test_network_latency(
+    network_microvm, metrics
+):  # pylint:disable=redefined-outer-name
+    """
+    Test network latency for multiple vm configurations.
+
+    Send a ping from the guest to the host.
+    """
+
     samples = []
+    host_ip = network_microvm.iface["eth0"]["iface"].host_ip
 
     for _ in range(ITERATIONS):
-        rc, ping_output, stderr = vm.ssh.run(
-            f"ping -c {REQUEST_PER_ITERATION} -i {DELAY} {iface.host_ip}"
+        rc, ping_output, stderr = network_microvm.ssh.run(
+            f"ping -c {REQUEST_PER_ITERATION} -i {DELAY} {host_ip}"
         )
         assert rc == 0, stderr
 
         samples.extend(consume_ping_output(ping_output))
 
     metrics.set_dimensions(
-        {"performance_test": "test_network_latency", **vm.dimensions}
+        {"performance_test": "test_network_latency", **network_microvm.dimensions}
     )
 
     for sample in samples:
         metrics.put_metric("ping_latency", sample, "Milliseconds")
 
 
-class TCPIPerf3Test(IPerf3Test):
+class TcpIPerf3Test(IPerf3Test):
     """IPerf3 runner for the TCP throughput performance test"""
 
     BASE_PORT = 5000
@@ -120,55 +126,40 @@ def __init__(self, microvm, mode, host_ip, payload_length):
 
 @pytest.mark.nonci
 @pytest.mark.timeout(3600)
-@pytest.mark.parametrize("vcpus", [1, 2])
+@pytest.mark.parametrize("network_microvm", [1, 2], indirect=True)
 @pytest.mark.parametrize("payload_length", ["128K", "1024K"], ids=["p128K", "p1024K"])
 @pytest.mark.parametrize("mode", ["g2h", "h2g", "bd"])
 def test_network_tcp_throughput(
-    microvm_factory,
-    guest_kernel,
-    rootfs,
-    vcpus,
+    network_microvm,
     payload_length,
     mode,
     metrics,
-):
+):  # pylint:disable=redefined-outer-name
     """
     Iperf between guest and host in both directions for TCP workload.
     """
 
     # We run bi-directional tests only on uVM with more than 2 vCPus
     # because we need to pin one iperf3/direction per vCPU, and since we
     # have two directions, we need at least two vCPUs.
-    if mode == "bd" and vcpus < 2:
+    if mode == "bd" and network_microvm.vcpus_count < 2:
         pytest.skip("bidrectional test only done with at least 2 vcpus")
 
-    vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
-    vm.spawn(log_level="Info")
-    vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB)
-    iface = vm.add_net_iface()
-    vm.start()
-
-    # Check if the needed CPU cores are available. We have the API thread, VMM
-    # thread and then one thread for each configured vCPU. Lastly, we need one for
-    # the iperf server on the host.
-    assert CpuMap.len() > 2 + vm.vcpus_count
-
-    # Pin uVM threads to physical cores.
-    assert vm.pin_vmm(0), "Failed to pin firecracker thread."
-    assert vm.pin_api(1), "Failed to pin fc_api thread."
-    for i in range(vm.vcpus_count):
-        assert vm.pin_vcpu(i, i + 2), f"Failed to pin fc_vcpu {i} thread."
-
-    test = TCPIPerf3Test(vm, mode, iface.host_ip, payload_length)
-    data = test.run_test(vm.vcpus_count + 2)
+    test = TcpIPerf3Test(
+        network_microvm,
+        mode,
+        network_microvm.iface["eth0"]["iface"].host_ip,
+        payload_length,
+    )
+    data = test.run_test(network_microvm.vcpus_count + 2)
 
     metrics.set_dimensions(
         {
             "performance_test": "test_network_tcp_throughput",
             "payload_length": payload_length,
             "mode": mode,
-            **vm.dimensions,
+            **network_microvm.dimensions,
         }
     )
 
-    emit_iperf3_metrics(metrics, data, TCPIPerf3Test.WARMUP_SEC)
+    emit_iperf3_metrics(metrics, data, TcpIPerf3Test.WARMUP_SEC)
diff --git a/tests/integration_tests/performance/test_vsock_ab.py b/tests/integration_tests/performance/test_vsock_ab.py
@@ -6,7 +6,6 @@
 
 import pytest
 
-from framework.utils import CpuMap
 from framework.utils_iperf import IPerf3Test, emit_iperf3_metrics
 from framework.utils_vsock import VSOCK_UDS_PATH, make_host_port_path
 
@@ -93,11 +92,6 @@ def test_vsock_throughput(
     vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/" + VSOCK_UDS_PATH)
     vm.start()
 
-    # Check if the needed CPU cores are available. We have the API thread, VMM
-    # thread and then one thread for each configured vCPU. Lastly, we need one for
-    # the iperf server on the host.
-    assert CpuMap.len() > 2 + vm.vcpus_count
-
     # Pin uVM threads to physical cores.
     assert vm.pin_vmm(0), "Failed to pin firecracker thread."
     assert vm.pin_api(1), "Failed to pin fc_api thread."
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ on: @@
       push:
         branches:
           - main
+          - firecracker-v*
     jobs:
       trigger_ab_test:
@@ Expand Down @@