firecracker-microvm · roypat · Sep 22, 2023 · Sep 19, 2023 · Sep 20, 2023 · Sep 6, 2023
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -67,6 +67,11 @@ def pytest_addoption(parser):
         action="store_true",
         help="fail the test if the baseline does not match",
     )
+    parser.addoption(
+        "--binary-dir",
+        action="store",
+        help="use firecracker/jailer binaries from this directory instead of compiling from source",
+    )
 
 
 @pytest.fixture(scope="function", autouse=True)
@@ -271,7 +276,7 @@ def fc_tmp_path(test_fc_session_root_path):
 
 
 @pytest.fixture()
-def microvm_factory(fc_tmp_path, bin_cloner_path):
+def microvm_factory(fc_tmp_path, bin_cloner_path, request):
     """Fixture to create microvms simply.
 
     In order to avoid running out of space when instantiating many microvms,
@@ -280,7 +285,15 @@ def microvm_factory(fc_tmp_path, bin_cloner_path):
     One can comment the removal line, if it helps with debugging.
     """
 
-    uvm_factory = MicroVMFactory(fc_tmp_path, bin_cloner_path)
+    if binary_dir := request.config.getoption("--binary-dir"):
+        fc_binary_path = Path(binary_dir) / "firecracker"
+        jailer_binary_path = Path(binary_dir) / "jailer"
+    else:
+        fc_binary_path, jailer_binary_path = build_tools.get_firecracker_binaries()
+
+    uvm_factory = MicroVMFactory(
+        fc_tmp_path, bin_cloner_path, fc_binary_path, jailer_binary_path
+    )
     yield uvm_factory
     uvm_factory.kill()
     shutil.rmtree(fc_tmp_path)

diff --git a/tests/framework/ab_test.py b/tests/framework/ab_test.py
@@ -23,8 +23,12 @@
 """
 import contextlib
 import os
+import statistics
+from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import Callable, Optional, TypeVar
+from typing import Callable, List, Optional, TypeVar
+
+import scipy
 
 from framework import utils
 
@@ -47,7 +51,7 @@ def default_comparator(ah: T, be: T) -> bool:
 
 
 def git_ab_test(
-    test_runner: Callable[[bool], T],
+    test_runner: Callable[[Path, bool], T],
     comparator: Callable[[T, T], U] = default_comparator,
     *,
     a_revision: str = DEFAULT_A_REVISION,
@@ -57,13 +61,13 @@ def git_ab_test(
     Performs an A/B-test using the given test runner between the specified revision, and the currently checked out revision.
     The specified revisions will be checked out in temporary directories, with `test_runner` getting executed in the
     repository root. If the test depends on firecracker binaries built from the requested revision, care has to be taken
-    that they are built from the sources in the temporary directory (which can be ensured via the `workspace` parameter
-    to `cargo_build.get_binary`).
+    that they are built from the sources in the temporary directory.
 
     Note that there are no guarantees on the order in which the two tests are run.
 
     :param test_runner: A callable which when executed runs the test in the context of the current working directory. Its
-                        parameter is `true` if and only if it is currently running the "A" test.
+                        first parameter is a temporary directory in which firecracker is checked out at some revision.
+                        The second parameter is `true` if and only if the checked out revision is the "A" revision.
     :param comparator: A callable taking two outputs from `test_runner` and comparing them. Should return some value
                        indicating whether the test should pass or no, which will be returned by the `ab_test` functions,
                        and on which the caller can then do an assertion.
@@ -79,26 +83,46 @@ def git_ab_test(
     # uncommitted changes. In the CI this will not work because multiple tests will run in parallel, and thus switching
     # branches will cause random failures in other tests.
     with temporary_checkout(a_revision) as a_tmp:
-        with chdir(a_tmp):
-            result_a = test_runner(True)
+        result_a = test_runner(a_tmp, True)
 
         if b_revision:
             with temporary_checkout(b_revision) as b_tmp:
-                with chdir(b_tmp):
-                    result_b = test_runner(False)
+                result_b = test_runner(b_tmp, False)
                 # Have to call comparator here to make sure both temporary directories exist (as the comparator
                 # might rely on some files that were created during test running, see the benchmark test)
                 comparison = comparator(result_a, result_b)
         else:
-            # By default, pytest execution happens inside the `tests` subdirectory. Change to the repository root, as
+            # By default, pytest execution happens inside the `tests` subdirectory. Pass the repository root, as
             # documented.
-            with chdir(".."):
-                result_b = test_runner(False)
+            result_b = test_runner(Path.cwd().parent, False)
             comparison = comparator(result_a, result_b)
 
         return result_a, result_b, comparison
 
 
+def check_regression(a_samples: List[float], b_samples: List[float]):
+    """Checks for a regression by performing a permutation test. A permutation test is a non-parametric test that takes
+    three parameters: Two populations (sets of samples) and a function computing a "statistic" based on two populations.
+    First, the test computes the statistic for the initial populations. It then randomly
+    permutes the two populations (e.g. merges them and then randomly splits them again). For each such permuted
+    population, the statistic is computed. Then, all the statistics are sorted, and the percentile of the statistic for the
+    initial populations is computed. We then look at the fraction of statistics that are larger/smaller than that of the
+    initial populations. The minimum of these two fractions will then become the p-value.
+
+    The idea is that if the two populations are indeed drawn from the same distribution (e.g. if performance did not
+    change), then permuting will not affect the statistic (indeed, it should be approximately normal-distributed, and
+    the statistic for the initial populations will be somewhere "in the middle").
+
+    Useful for performance tests.
+    """
+    return scipy.stats.permutation_test(
+        (a_samples, b_samples),
+        # Compute the difference of means, such that a positive different indicates potential for regression.
+        lambda x, y: statistics.mean(y) - statistics.mean(x),
+        vectorized=False,
+    )
+
+
 @contextlib.contextmanager
 def temporary_checkout(revision: str):
     """
@@ -108,14 +132,17 @@ def temporary_checkout(revision: str):
     happen along the way.
     """
     with TemporaryDirectory() as tmp_dir:
-        utils.run_cmd(
-            f"git clone https://github.com/firecracker-microvm/firecracker {tmp_dir}"
-        )
+        # `git clone` can take a path instead of an URL, which causes it to create a copy of the
+        # repository at the given path. However, that path needs to point to the root of a repository,
+        # it cannot be some arbitrary subdirectory. Therefore:
+        _, git_root, _ = utils.run_cmd("git rev-parse --show-toplevel")
+        # split off the '\n' at the end of the stdout
+        utils.run_cmd(f"git clone {git_root.strip()} {tmp_dir}")
 
         with chdir(tmp_dir):
             utils.run_cmd(f"git checkout {revision}")
 
-        yield tmp_dir
+        yield Path(tmp_dir)
 
 
 # Once we upgrade to python 3.11, this will be in contextlib:

diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py
@@ -150,8 +150,8 @@ class Microvm:
     def __init__(
         self,
         resource_path,
-        fc_binary_path=None,
-        jailer_binary_path=None,
+        fc_binary_path,
+        jailer_binary_path,
         microvm_id=None,
         bin_cloner_path=None,
         monitor_memory=True,
@@ -172,11 +172,6 @@ def __init__(
         self.initrd_file = None
         self.boot_args = None
 
-        # The binaries this microvm will use to start.
-        if fc_binary_path is None:
-            fc_binary_path, _ = build_tools.get_firecracker_binaries()
-        if jailer_binary_path is None:
-            _, jailer_binary_path = build_tools.get_firecracker_binaries()
         self._fc_binary_path = str(fc_binary_path)
         assert fc_binary_path.exists()
         self._jailer_binary_path = str(jailer_binary_path)
@@ -812,17 +807,23 @@ def ssh(self):
 class MicroVMFactory:
     """MicroVM factory"""
 
-    def __init__(self, base_path, bin_cloner):
+    def __init__(self, base_path, bin_cloner, fc_binary_path, jailer_binary_path):
         self.base_path = Path(base_path)
         self.bin_cloner_path = bin_cloner
         self.vms = []
+        self.fc_binary_path = fc_binary_path
+        self.jailer_binary_path = jailer_binary_path
 
     def build(self, kernel=None, rootfs=None, microvm_id=None, **kwargs):
         """Build a microvm"""
         vm = Microvm(
             resource_path=self.base_path,
             microvm_id=microvm_id or str(uuid.uuid4()),
             bin_cloner_path=self.bin_cloner_path,
+            fc_binary_path=kwargs.pop("fc_binary_path", self.fc_binary_path),
+            jailer_binary_path=kwargs.pop(
+                "jailer_binary_path", self.jailer_binary_path
+            ),
             **kwargs,
         )
         self.vms.append(vm)

diff --git a/tests/host_tools/metrics.py b/tests/host_tools/metrics.py
@@ -43,7 +43,10 @@
 """
 
 import asyncio
+import json
 import os
+import socket
+from urllib.parse import urlparse
 
 from aws_embedded_metrics.logger.metrics_logger_factory import create_metrics_logger
 
@@ -89,3 +92,21 @@ def get_metrics_logger():
     logger = create_metrics_logger()
     logger.reset_dimensions(False)
     return MetricsWrapper(logger)
+
+
+def emit_raw_emf(emf_msg: dict):
+    """Emites a raw EMF log message to the local cloudwatch agent"""
+    if "AWS_EMF_AGENT_ENDPOINT" not in os.environ:
+        return
+
+    emf_msg["_aws"]["LogGroupName"] = os.environ.get(
+        "AWS_EMF_LOG_GROUP_NAME", f"{os.environ['AWS_EMF_NAMESPACE']}-metrics"
+    )
+    emf_msg["_aws"]["LogStreamName"] = ""
+
+    emf_endpoint = urlparse(os.environ["AWS_EMF_AGENT_ENDPOINT"])
+    with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock:
+        sock.sendto(
+            (json.dumps(emf_msg) + "\n").encode("utf-8"),
+            (emf_endpoint.hostname, emf_endpoint.port),
+        )
diff --git a/tests/integration_tests/functional/test_mmds.py b/tests/integration_tests/functional/test_mmds.py
@@ -17,6 +17,7 @@
     populate_data_store,
     run_guest_cmd,
 )
+from host_tools.cargo_build import get_firecracker_binaries
 
 # Minimum lifetime of token.
 MIN_TOKEN_TTL_SECONDS = 1
@@ -639,7 +640,8 @@ def test_mmds_older_snapshot(
         microvm,
         microvm_factory,
         mmds_version,
-        target_fc_version=firecracker_release.snapshot_version,
+        firecracker_release.snapshot_version,
+        *get_firecracker_binaries(),
     )
 
 

diff --git a/tests/integration_tests/performance/test_benchmarks.py b/tests/integration_tests/performance/test_benchmarks.py
@@ -3,15 +3,14 @@
 """Optional benchmarks-do-not-regress test"""
 import json
 import logging
-import os
 import platform
 import shutil
 from pathlib import Path
 
 import pytest
 
 from framework import utils
-from framework.ab_test import git_ab_test
+from framework.ab_test import chdir, git_ab_test
 from host_tools.cargo_build import cargo
 
 LOGGER = logging.getLogger(__name__)
@@ -27,34 +26,35 @@ def test_no_regression_relative_to_target_branch():
     git_ab_test(run_criterion, compare_results)
 
 
-def run_criterion(is_a: bool) -> Path:
+def run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
     """
     Executes all benchmarks by running "cargo bench --no-run", finding the executables, and running them pinned to some CPU
     """
     baseline_name = "a_baseline" if is_a else "b_baseline"
 
-    # Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
-    # usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
-    # extract the path to the compiled benchmark binary.
-    _, stdout, _ = cargo(
-        "bench",
-        f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
-    )
-
-    executables = []
-    for line in stdout.split("\n"):
-        if line:
-            msg = json.loads(line)
-            executable = msg.get("executable")
-            if executable:
-                executables.append(executable)
-
-    for executable in executables:
-        utils.run_cmd(
-            f"CARGO_TARGET_DIR=build/cargo_target taskset -c 1 {executable} --bench --save-baseline {baseline_name}"
+    with chdir(firecracker_checkout):
+        # Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
+        # usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
+        # extract the path to the compiled benchmark binary.
+        _, stdout, _ = cargo(
+            "bench",
+            f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
         )
 
-    return Path(os.getcwd()) / "build" / "cargo_target" / "criterion"
+        executables = []
+        for line in stdout.split("\n"):
+            if line:
+                msg = json.loads(line)
+                executable = msg.get("executable")
+                if executable:
+                    executables.append(executable)
+
+        for executable in executables:
+            utils.run_cmd(
+                f"CARGO_TARGET_DIR=build/cargo_target taskset -c 1 {executable} --bench --save-baseline {baseline_name}"
+            )
+
+    return firecracker_checkout / "build" / "cargo_target" / "criterion"
 
 
 def compare_results(location_a_baselines: Path, location_b_baselines: Path):