From c723745c6487e1956218259ff494c2b7686d023b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= <pablob@amazon.com>
Date: Mon, 9 Oct 2023 09:56:11 +0200
Subject: [PATCH 01/15] fix: restore the id parameter default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Running Firecracker without parameters causes a panic.

```
[anonymous-instance:main] Running Firecracker v1.5.0-dev
[anonymous-instance:main] Firecracker panicked at 'called
`Option::unwrap()` on a `None` value',
src/firecracker/src/main.rs:291:52
[1] 980007 IOT instruction
./firecracker
```

Fixes: 332f2184ae29265fbae1648469e073fc7f987378

Signed-off-by: Pablo Barbáchano <pablob@amazon.com>
---
 src/firecracker/src/main.rs                      |  9 +++------
 .../functional/test_cmd_line_parameters.py       | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/firecracker/src/main.rs b/src/firecracker/src/main.rs
index 5b237b02bae..cf95ff1f995 100644
--- a/src/firecracker/src/main.rs
+++ b/src/firecracker/src/main.rs
@@ -144,6 +144,7 @@ fn main_exec() -> Result<(), MainError> {
             .arg(
                 Argument::new("id")
                     .takes_value(true)
+                    .default_value(vmm::logger::DEFAULT_INSTANCE_ID)
                     .help("MicroVM unique identifier."),
             )
             .arg(
@@ -262,8 +263,6 @@ fn main_exec() -> Result<(), MainError> {
         return Ok(());
     }
 
-    info!("Running Firecracker v{FIRECRACKER_VERSION}");
-
     register_signal_handlers().map_err(MainError::RegisterSignalHandlers)?;
 
     #[cfg(target_arch = "aarch64")]
@@ -298,11 +297,9 @@ fn main_exec() -> Result<(), MainError> {
         app_name: "Firecracker".to_string(),
     };
 
-    let id = arguments
-        .single_value("id")
-        .map(|s| s.as_str())
-        .unwrap_or(vmm::logger::DEFAULT_INSTANCE_ID);
+    let id = arguments.single_value("id").map(|s| s.as_str()).unwrap();
     vmm::logger::INSTANCE_ID.set(String::from(id)).unwrap();
+    info!("Running Firecracker v{FIRECRACKER_VERSION}");
 
     // Apply the logger configuration.
     let log_path = arguments.single_value("log-path").map(PathBuf::from);
diff --git a/tests/integration_tests/functional/test_cmd_line_parameters.py b/tests/integration_tests/functional/test_cmd_line_parameters.py
index 4b3bcb00703..55985ddea0f 100644
--- a/tests/integration_tests/functional/test_cmd_line_parameters.py
+++ b/tests/integration_tests/functional/test_cmd_line_parameters.py
@@ -3,6 +3,7 @@
 """Tests that ensure the correctness of the command line parameters."""
 
 import platform
+import subprocess
 from pathlib import Path
 
 import pytest
@@ -130,3 +131,18 @@ def test_cli_metrics_if_resume_no_metrics(uvm_plain, microvm_factory):
     # Then: the old metrics configuration does not exist
     metrics2 = Path(uvm2.jailer.chroot_path()) / metrics_path.name
     assert not metrics2.exists()
+
+
+def test_cli_no_params():
+    """
+    Test running firecracker with no parameters should work
+    """
+
+    fc_binary, _ = get_firecracker_binaries()
+    process = subprocess.Popen(fc_binary)
+    try:
+        process.communicate(timeout=3)
+        assert process.returncode is None
+    except subprocess.TimeoutExpired:
+        # The good case
+        process.kill()

From 0643a52fff006baeaf6e930a31ce8eb4588b3206 Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Mon, 2 Oct 2023 12:32:37 +0100
Subject: [PATCH 02/15] test(ab): Ensure we do enough permutations for chosen
 p-threshold

The minimal p-value that a permutation test can return is
1/#permutations. Should a p-value smaller than this be chosen, the test
will never fail. Therefore, always pick 100 times as many permutations
as needed.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 tests/framework/ab_test.py | 5 ++++-
 tools/ab_test.py           | 8 +++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/tests/framework/ab_test.py b/tests/framework/ab_test.py
index 316ecc5704b..8b7204baf20 100644
--- a/tests/framework/ab_test.py
+++ b/tests/framework/ab_test.py
@@ -100,7 +100,9 @@ def git_ab_test(
         return result_a, result_b, comparison
 
 
-def check_regression(a_samples: List[float], b_samples: List[float]):
+def check_regression(
+    a_samples: List[float], b_samples: List[float], *, n_resamples: int = 9999
+):
     """Checks for a regression by performing a permutation test. A permutation test is a non-parametric test that takes
     three parameters: Two populations (sets of samples) and a function computing a "statistic" based on two populations.
     First, the test computes the statistic for the initial populations. It then randomly
@@ -120,6 +122,7 @@ def check_regression(a_samples: List[float], b_samples: List[float]):
         # Compute the difference of means, such that a positive different indicates potential for regression.
         lambda x, y: statistics.mean(y) - statistics.mean(x),
         vectorized=False,
+        n_resamples=n_resamples,
     )
 
 
diff --git a/tools/ab_test.py b/tools/ab_test.py
index 0efd89b2fae..ad98ca3d8be 100755
--- a/tools/ab_test.py
+++ b/tools/ab_test.py
@@ -145,7 +145,7 @@ def collect_data(firecracker_checkout: Path, test: str):
     return load_data_series(revision)
 
 
-def analyze_data(processed_emf_a, processed_emf_b):
+def analyze_data(processed_emf_a, processed_emf_b, *, n_resamples: int = 9999):
     """
     Analyzes the A/B-test data produced by `collect_data`, by performing regression tests
     as described this script's doc-comment.
@@ -175,7 +175,9 @@ def analyze_data(processed_emf_a, processed_emf_b):
             print(
                 f"Doing A/B-test for dimensions {dimension_set} and property {metric}"
             )
-            result = check_regression(values_a, metrics_b[metric][0])
+            result = check_regression(
+                values_a, metrics_b[metric][0], n_resamples=n_resamples
+            )
 
             metrics_logger.set_dimensions({"metric": metric, **dict(dimension_set)})
             metrics_logger.put_metric("p_value", float(result.pvalue), "None")
@@ -201,7 +203,7 @@ def ab_performance_test(a_revision, b_revision, test, p_thresh, strength_thresh)
 
     processed_emf_a, processed_emf_b, results = git_ab_test(
         lambda checkout, _: collect_data(checkout, test),
-        analyze_data,
+        lambda ah, be: analyze_data(ah, be, n_resamples=int(100 / p_thresh)),
         a_revision=a_revision,
         b_revision=b_revision,
     )

From 562ae2f1677325cc9e3217107079712b77111cbd Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Mon, 2 Oct 2023 14:06:20 +0100
Subject: [PATCH 03/15] test(network-ab): Factor out microvm setup into fixture

Both tests were using identical microvm setups, so we factor it out into
a shared fixture (with indirect parameterization for the number of
vcpus).

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 .../performance/test_network_ab.py            | 81 +++++++++----------
 1 file changed, 39 insertions(+), 42 deletions(-)

diff --git a/tests/integration_tests/performance/test_network_ab.py b/tests/integration_tests/performance/test_network_ab.py
index 55769348ce2..706de24ee84 100644
--- a/tests/integration_tests/performance/test_network_ab.py
+++ b/tests/integration_tests/performance/test_network_ab.py
@@ -46,19 +46,15 @@ def consume_ping_output(ping_putput):
         yield float(time[0])
 
 
-@pytest.mark.nonci
-@pytest.mark.timeout(3600)
-def test_network_latency(microvm_factory, guest_kernel, rootfs, metrics):
-    """
-    Test network latency for multiple vm configurations.
-
-    Send a ping from the guest to the host.
-    """
+@pytest.fixture
+def network_microvm(request, microvm_factory, guest_kernel, rootfs):
+    """Creates a microvm with the networking setup used by the performance tests in this file.
 
+    This fixture receives its vcpu count via indirect parameterization"""
     vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
     vm.spawn(log_level="Info")
-    vm.basic_config(vcpu_count=GUEST_VCPUS, mem_size_mib=GUEST_MEM_MIB)
-    iface = vm.add_net_iface()
+    vm.basic_config(vcpu_count=request.param, mem_size_mib=GUEST_MEM_MIB)
+    vm.add_net_iface()
     vm.start()
 
     # Check if the needed CPU cores are available. We have the API thread, VMM
@@ -71,25 +67,41 @@ def test_network_latency(microvm_factory, guest_kernel, rootfs, metrics):
     for i in range(vm.vcpus_count):
         assert vm.pin_vcpu(i, i + 2), f"Failed to pin fc_vcpu {i} thread."
 
+    return vm
+
+
+@pytest.mark.nonci
+@pytest.mark.timeout(3600)
+@pytest.mark.parametrize("network_microvm", [1], indirect=True)
+def test_network_latency(
+    network_microvm, metrics
+):  # pylint:disable=redefined-outer-name
+    """
+    Test network latency for multiple vm configurations.
+
+    Send a ping from the guest to the host.
+    """
+
     samples = []
+    host_ip = network_microvm.iface["eth0"]["iface"].host_ip
 
     for _ in range(ITERATIONS):
-        rc, ping_output, stderr = vm.ssh.run(
-            f"ping -c {REQUEST_PER_ITERATION} -i {DELAY} {iface.host_ip}"
+        rc, ping_output, stderr = network_microvm.ssh.run(
+            f"ping -c {REQUEST_PER_ITERATION} -i {DELAY} {host_ip}"
         )
         assert rc == 0, stderr
 
         samples.extend(consume_ping_output(ping_output))
 
     metrics.set_dimensions(
-        {"performance_test": "test_network_latency", **vm.dimensions}
+        {"performance_test": "test_network_latency", **network_microvm.dimensions}
     )
 
     for sample in samples:
         metrics.put_metric("ping_latency", sample, "Milliseconds")
 
 
-class TCPIPerf3Test(IPerf3Test):
+class TcpIPerf3Test(IPerf3Test):
     """IPerf3 runner for the TCP throughput performance test"""
 
     BASE_PORT = 5000
@@ -120,18 +132,15 @@ def __init__(self, microvm, mode, host_ip, payload_length):
 
 @pytest.mark.nonci
 @pytest.mark.timeout(3600)
-@pytest.mark.parametrize("vcpus", [1, 2])
+@pytest.mark.parametrize("network_microvm", [1, 2], indirect=True)
 @pytest.mark.parametrize("payload_length", ["128K", "1024K"], ids=["p128K", "p1024K"])
 @pytest.mark.parametrize("mode", ["g2h", "h2g", "bd"])
 def test_network_tcp_throughput(
-    microvm_factory,
-    guest_kernel,
-    rootfs,
-    vcpus,
+    network_microvm,
     payload_length,
     mode,
     metrics,
-):
+):  # pylint:disable=redefined-outer-name
     """
     Iperf between guest and host in both directions for TCP workload.
     """
@@ -139,36 +148,24 @@ def test_network_tcp_throughput(
     # We run bi-directional tests only on uVM with more than 2 vCPus
     # because we need to pin one iperf3/direction per vCPU, and since we
     # have two directions, we need at least two vCPUs.
-    if mode == "bd" and vcpus < 2:
+    if mode == "bd" and network_microvm.vcpus_count < 2:
         pytest.skip("bidrectional test only done with at least 2 vcpus")
 
-    vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
-    vm.spawn(log_level="Info")
-    vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB)
-    iface = vm.add_net_iface()
-    vm.start()
-
-    # Check if the needed CPU cores are available. We have the API thread, VMM
-    # thread and then one thread for each configured vCPU. Lastly, we need one for
-    # the iperf server on the host.
-    assert CpuMap.len() > 2 + vm.vcpus_count
-
-    # Pin uVM threads to physical cores.
-    assert vm.pin_vmm(0), "Failed to pin firecracker thread."
-    assert vm.pin_api(1), "Failed to pin fc_api thread."
-    for i in range(vm.vcpus_count):
-        assert vm.pin_vcpu(i, i + 2), f"Failed to pin fc_vcpu {i} thread."
-
-    test = TCPIPerf3Test(vm, mode, iface.host_ip, payload_length)
-    data = test.run_test(vm.vcpus_count + 2)
+    test = TcpIPerf3Test(
+        network_microvm,
+        mode,
+        network_microvm.iface["eth0"]["iface"].host_ip,
+        payload_length,
+    )
+    data = test.run_test(network_microvm.vcpus_count + 2)
 
     metrics.set_dimensions(
         {
             "performance_test": "test_network_tcp_throughput",
             "payload_length": payload_length,
             "mode": mode,
-            **vm.dimensions,
+            **network_microvm.dimensions,
         }
     )
 
-    emit_iperf3_metrics(metrics, data, TCPIPerf3Test.WARMUP_SEC)
+    emit_iperf3_metrics(metrics, data, TcpIPerf3Test.WARMUP_SEC)

From a1980048e88b3191fed03103cd20b908a86a86fa Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Thu, 28 Sep 2023 10:37:12 +0100
Subject: [PATCH 04/15] test: Declare high-variance tests

Some tests are inexplicably unstable, where even two consecutive runs on
the same machine give wildly different results. Until we find out why
these are this unstable, there is no real point in running them, so do
not fail the pipeline if these tests fail.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 tools/ab_test.py | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/tools/ab_test.py b/tools/ab_test.py
index ad98ca3d8be..b8db04e04a9 100755
--- a/tools/ab_test.py
+++ b/tools/ab_test.py
@@ -40,6 +40,30 @@
     get_metrics_logger,
 )
 
+# Performance tests that are known to be unstable and exhibit variances of up to 60% of the mean
+IGNORED = [
+    # Network throughput on m6a.metal
+    {"instance": "m6a.metal", "performance_test": "test_network_tcp_throughput"},
+    # Block throughput for 1 vcpu on m6g.metal/5.10
+    {
+        "performance_test": "test_block_performance",
+        "instance": "m6g.metal",
+        "host_kernel": "linux-5.10",
+        "vcpus": "1",
+    },
+]
+
+
+def is_ignored(dimensions) -> bool:
+    """Checks whether the given dimensions match a entry in the IGNORED dictionary above"""
+    for high_variance in IGNORED:
+        matching = {key: dimensions[key] for key in high_variance}
+
+        if matching == high_variance:
+            return True
+
+    return False
+
 
 def extract_dimensions(emf):
     """Extracts the cloudwatch dimensions from an EMF log message"""
@@ -210,11 +234,14 @@ def ab_performance_test(a_revision, b_revision, test, p_thresh, strength_thresh)
 
     failures = []
     for (dimension_set, metric), (result, unit) in results.items():
+        if is_ignored(dict(dimension_set)):
+            continue
+
         values_a = processed_emf_a[dimension_set][metric][0]
-        if (
-            result.pvalue < p_thresh
-            and abs(result.statistic) > abs(statistics.mean(values_a)) * strength_thresh
-        ):
+
+        if result.pvalue < p_thresh and abs(result.statistic) > abs(
+            statistics.mean(values_a)
+        ) * (strength_thresh):
             failures.append((dimension_set, metric, result, unit))
 
     failure_report = "\n".join(

From bbda8f9144447ecb1e607c845d799a839b2e3551 Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Tue, 3 Oct 2023 11:35:39 +0100
Subject: [PATCH 05/15] test(iperf3): Use --affinity instead of taskset

Instead of using taskset to pin iperf3 worker to specific cores, use
iperf3's built-in --affinity argument to do so.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 tests/framework/utils_iperf.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/framework/utils_iperf.py b/tests/framework/utils_iperf.py
index ab7117e405c..1b7f043e26b 100644
--- a/tests/framework/utils_iperf.py
+++ b/tests/framework/utils_iperf.py
@@ -57,11 +57,13 @@ def run_test(self, first_free_cpu):
         assert self._num_clients < CpuMap.len() - self._microvm.vcpus_count - 2
 
         for server_idx in range(self._num_clients):
-            cmd = self.host_command(server_idx).build()
             assigned_cpu = CpuMap(first_free_cpu)
-            utils.run_cmd(
-                f"taskset --cpu-list {assigned_cpu} {self._microvm.jailer.netns_cmd_prefix()} {cmd}"
+            cmd = (
+                self.host_command(server_idx)
+                .with_arg("--affinity", assigned_cpu)
+                .build()
             )
+            utils.run_cmd(f"{self._microvm.jailer.netns_cmd_prefix()} {cmd}")
             first_free_cpu += 1
 
         time.sleep(SERVER_STARTUP_TIME_SEC)
@@ -105,12 +107,14 @@ def spawn_iperf3_client(self, client_idx):
         mode = MODE_MAP[self._mode][client_idx % len(MODE_MAP[self._mode])]
 
         # Add the port where the iperf3 client is going to send/receive.
-        cmd = self.guest_command(client_idx).with_arg(mode).build()
-
-        pinned_cmd = (
-            f"taskset --cpu-list {client_idx % self._microvm.vcpus_count} {cmd}"
+        cmd = (
+            self.guest_command(client_idx)
+            .with_arg(mode)
+            .with_arg("--affinity", client_idx % self._microvm.vcpus_count)
+            .build()
         )
-        rc, stdout, stderr = self._microvm.ssh.run(pinned_cmd)
+
+        rc, stdout, stderr = self._microvm.ssh.run(cmd)
 
         assert rc == 0, stderr
 

From 6cc7270c24bd780812622e312f17451b36dbb918 Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Tue, 3 Oct 2023 13:20:02 +0100
Subject: [PATCH 06/15] test(perf): Remove unneeded asserts on cpu count

These asserts are already done in CpuMap when pinning threads to CPUs,
so no need to repeat them here.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 tests/integration_tests/performance/test_network_ab.py | 5 -----
 tests/integration_tests/performance/test_vsock_ab.py   | 6 ------
 2 files changed, 11 deletions(-)

diff --git a/tests/integration_tests/performance/test_network_ab.py b/tests/integration_tests/performance/test_network_ab.py
index 706de24ee84..ec306d24fc6 100644
--- a/tests/integration_tests/performance/test_network_ab.py
+++ b/tests/integration_tests/performance/test_network_ab.py
@@ -6,7 +6,6 @@
 
 import pytest
 
-from framework.utils import CpuMap
 from framework.utils_iperf import IPerf3Test, emit_iperf3_metrics
 
 # each iteration is 30 * 0.2s = 6s
@@ -57,10 +56,6 @@ def network_microvm(request, microvm_factory, guest_kernel, rootfs):
     vm.add_net_iface()
     vm.start()
 
-    # Check if the needed CPU cores are available. We have the API thread, VMM
-    # thread and then one thread for each configured vCPU.
-    assert CpuMap.len() >= 2 + vm.vcpus_count
-
     # Pin uVM threads to physical cores.
     assert vm.pin_vmm(0), "Failed to pin firecracker thread."
     assert vm.pin_api(1), "Failed to pin fc_api thread."
diff --git a/tests/integration_tests/performance/test_vsock_ab.py b/tests/integration_tests/performance/test_vsock_ab.py
index 5d3f2b64560..9a2f068535c 100644
--- a/tests/integration_tests/performance/test_vsock_ab.py
+++ b/tests/integration_tests/performance/test_vsock_ab.py
@@ -6,7 +6,6 @@
 
 import pytest
 
-from framework.utils import CpuMap
 from framework.utils_iperf import IPerf3Test, emit_iperf3_metrics
 from framework.utils_vsock import VSOCK_UDS_PATH, make_host_port_path
 
@@ -93,11 +92,6 @@ def test_vsock_throughput(
     vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/" + VSOCK_UDS_PATH)
     vm.start()
 
-    # Check if the needed CPU cores are available. We have the API thread, VMM
-    # thread and then one thread for each configured vCPU. Lastly, we need one for
-    # the iperf server on the host.
-    assert CpuMap.len() > 2 + vm.vcpus_count
-
     # Pin uVM threads to physical cores.
     assert vm.pin_vmm(0), "Failed to pin firecracker thread."
     assert vm.pin_api(1), "Failed to pin fc_api thread."

From 7bb409db4f2143104e4b4b0db13811f5e4bfbe4d Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Tue, 3 Oct 2023 15:49:41 +0100
Subject: [PATCH 07/15] test(perf): Overlay time series instead of
 concatenating

Previously, we concatenated time series for tests that produced data
from multiple vcpus. Now, we instead overlay them, to accurately record
the net throughput for each second interval, across all workers. This
does not make a difference for permutation tests, but does allow us to
exploit some properties of averages of time series across different
parameterizations.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 tests/framework/utils_iperf.py                | 34 +++++++++++--------
 .../performance/test_block_ab.py              | 24 ++++++++++---
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/tests/framework/utils_iperf.py b/tests/framework/utils_iperf.py
index 1b7f043e26b..4b1b5691e41 100644
--- a/tests/framework/utils_iperf.py
+++ b/tests/framework/utils_iperf.py
@@ -180,18 +180,24 @@ def emit_iperf3_metrics(metrics, iperf_result, omit):
     )[0]:
         metrics.put_metric("cpu_utilization_vmm", cpu_util_data_point, "Percent")
 
-    for time_series in iperf_result["g2h"]:
-        for interval in time_series["intervals"][omit:]:
-            metrics.put_metric(
-                "throughput_guest_to_host",
-                interval["sum"]["bits_per_second"],
-                "Bits/Second",
-            )
+    data_points = zip(
+        *[time_series["intervals"][omit:] for time_series in iperf_result["g2h"]]
+    )
+
+    for point_in_time in data_points:
+        metrics.put_metric(
+            "throughput_guest_to_host",
+            sum(interval["sum"]["bits_per_second"] for interval in point_in_time),
+            "Bits/Second",
+        )
 
-    for time_series in iperf_result["h2g"]:
-        for interval in time_series["intervals"][omit:]:
-            metrics.put_metric(
-                "throughput_host_to_guest",
-                interval["sum"]["bits_per_second"],
-                "Bits/Second",
-            )
+    data_points = zip(
+        *[time_series["intervals"][omit:] for time_series in iperf_result["h2g"]]
+    )
+
+    for point_in_time in data_points:
+        metrics.put_metric(
+            "throughput_host_to_guest",
+            sum(interval["sum"]["bits_per_second"] for interval in point_in_time),
+            "Bits/Second",
+        )
diff --git a/tests/integration_tests/performance/test_block_ab.py b/tests/integration_tests/performance/test_block_ab.py
index 75729aee465..c7b6b34b7f6 100644
--- a/tests/integration_tests/performance/test_block_ab.py
+++ b/tests/integration_tests/performance/test_block_ab.py
@@ -106,22 +106,36 @@ def run_fio(microvm, mode, block_size):
 
 def process_fio_logs(vm, fio_mode, logs_dir, metrics):
     """Parses the fio logs in `{logs_dir}/{fio_mode}_bw.*.log and emits their contents as CloudWatch metrics"""
-    for job_id in range(vm.vcpus_count):
-        data = Path(f"{logs_dir}/{fio_mode}_bw.{job_id + 1}.log").read_text("UTF-8")
 
-        for line in data.splitlines():
+    data = [
+        Path(f"{logs_dir}/{fio_mode}_bw.{job_id + 1}.log")
+        .read_text("UTF-8")
+        .splitlines()
+        for job_id in range(vm.vcpus_count)
+    ]
+
+    for tup in zip(*data):
+        bw_read = 0
+        bw_write = 0
+
+        for line in tup:
             _, value, direction, _ = line.split(",", maxsplit=3)
             value = int(value.strip())
 
             # See https://fio.readthedocs.io/en/latest/fio_doc.html#log-file-formats
             match direction.strip():
                 case "0":
-                    metrics.put_metric("bw_read", value, "Kilobytes/Second")
+                    bw_read += value
                 case "1":
-                    metrics.put_metric("bw_write", value, "Kilobytes/Second")
+                    bw_write += value
                 case _:
                     assert False
 
+        if bw_read:
+            metrics.put_metric("bw_read", bw_read, "Kilobytes/Second")
+        if bw_write:
+            metrics.put_metric("bw_write", bw_write, "Kilobytes/Second")
+
 
 @pytest.mark.nonci
 @pytest.mark.timeout(RUNTIME_SEC * 1000)  # 1.40 hours

From d304a01218d6c091e5edd3fd232c67cc03670012 Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Tue, 3 Oct 2023 17:05:27 +0100
Subject: [PATCH 08/15] test(ab): Add noise threshold

The idea here is the following: Intuitively, different parameterization
of the same metric (e.g. the metric "snapshot restore latency" across
different microvm configurations) are not completely independent.
Therefore, we would generally expect impact to be reflected across
multiple tested configurations (and particularly, if different
configurations show mean changes in different directions, e.g. 1vcpu
gives +5ms latency, but 2vcpu gives -5ms latency, then this is a
further hint at a result being noise). To counteract this, we consider
the average of the reported mean regressions, and only report failures
for a metric whose average regression is at lesat 5%.

Mathematically, this has the following justification: By the central
limit theorem, the means of samples are normal distributed. Denote by A
and B the distributions of the mean of samples from the 'A' and 'B'
tests respectively. Under our null hypothesis, the distributions of the
'A' and 'B' samples are identical (although we dont know what the exact
distributions are), meaning so are A and B, say A ~ B ~ N(mu, sigma^2).
The difference of two normal distributions is also normal distributed,
with the means being subtracted and the variances being added.
Therefore, A - B ~ N(0, 2sigma^2). However, each parameterization (e.g.
1vcpu, 2vcpu, and so on) will potentially have different variances
sigma^2. Here, we do the following assumption: For all parameterizations
of the same metric, we have sigma^2/mu^2 = const. I have no mathematical
justification for this (as it is a property of our tests), but
empirically it works out. This means that (A-B)/mu ~ N(0, c), with c
being a constant that is identical across all parameterizations of a
metric. This means that we can combine the relative means across
different parameterizations, and get a distributions whose expected
value is 0, provided our null hypothesis was true. This is exactly what
the code added in this commit verifies: The empirical average of our
samples of this distribution only negligibly deviates from 0. Only if
it deviates significantly (here by more than 0.05), we actually allow
failures in that metric.

For all tests but the snapshot restore test on x86_64/{5.10,6.1}, this
allows us to completely drop the per-test noise threshold to 0.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 tools/ab_test.py | 62 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 6 deletions(-)

diff --git a/tools/ab_test.py b/tools/ab_test.py
index b8db04e04a9..3084f5a51e6 100755
--- a/tools/ab_test.py
+++ b/tools/ab_test.py
@@ -215,7 +215,9 @@ def analyze_data(processed_emf_a, processed_emf_b, *, n_resamples: int = 9999):
     return results
 
 
-def ab_performance_test(a_revision, b_revision, test, p_thresh, strength_thresh):
+def ab_performance_test(
+    a_revision, b_revision, test, p_thresh, strength_thresh, noise_threshold
+):
     """Does an A/B-test of the specified test across the given revisions"""
     _, commit_list, _ = utils.run_cmd(
         f"git --no-pager log --oneline {a_revision}..{b_revision}"
@@ -232,16 +234,58 @@ def ab_performance_test(a_revision, b_revision, test, p_thresh, strength_thresh)
         b_revision=b_revision,
     )
 
+    # We sort our A/B-Testing results keyed by metric here. The resulting lists of values
+    # will be approximately normal distributed, and we will use this property as a means of error correction.
+    # The idea behind this is that testing the same metric (say, restore_latency) across different scenarios (e.g.
+    # different vcpu counts) will be related in some unknown way (meaning most scenarios will show a change in the same
+    # direction). In particular, if one scenario yields a slight improvement and the next yields a
+    # slight degradation, we take this as evidence towards both being mere noise that cancels out.
+    #
+    # Empirical evidence for this assumption is that
+    #  1. Historically, a true performance change has never shown up in just a single test, it always showed up
+    #     across most (if not all) tests for a specific metric.
+    #  2. Analyzing data collected from historical runs shows that across different parameterizations of the same
+    #     metric, the collected samples approximately follow mean / variance = const, with the constant independent
+    #     of the parameterization.
+    #
+    # Mathematically, this has the following justification: By the central
+    # limit theorem, the means of samples are (approximately) normal distributed. Denote by A
+    # and B the distributions of the mean of samples from the 'A' and 'B'
+    # tests respectively. Under our null hypothesis, the distributions of the
+    # 'A' and 'B' samples are identical (although we dont know what the exact
+    # distributions are), meaning so are A and B, say A ~ B ~ N(mu, sigma^2).
+    # The difference of two normal distributions is also normal distributed,
+    # with the means being subtracted and the variances being added.
+    # Therefore, A - B ~ N(0, 2sigma^2). If we now normalize this distribution by mu (which
+    # corresponds to considering the distribution of relative regressions instead), we get (A-B)/mu ~ N(0, c), with c
+    # being the constant from point 2. above. This means that we can combine the relative means across
+    # different parameterizations, and get a distributions whose expected
+    # value is 0, provided our null hypothesis was true. It is exactly this distribution
+    # for which we collect samples in the dictionary below. Therefore, a sanity check
+    # on the average of the average of the performance changes for a single metric
+    # is a good candidates for a sanity check against false-positives.
+    #
+    # Note that with this approach, for performance changes to "cancel out", we would need essentially a perfect split
+    # between scenarios that improve performance and scenarios that degrade performance, something we have not
+    # ever observed to actually happen.
+    relative_changes_by_metric = {}
+
     failures = []
     for (dimension_set, metric), (result, unit) in results.items():
         if is_ignored(dict(dimension_set)):
             continue
 
         values_a = processed_emf_a[dimension_set][metric][0]
+        baseline_mean = statistics.mean(values_a)
+
+        if metric not in relative_changes_by_metric:
+            relative_changes_by_metric[metric] = []
+        relative_changes_by_metric[metric].append(result.statistic / baseline_mean)
 
-        if result.pvalue < p_thresh and abs(result.statistic) > abs(
-            statistics.mean(values_a)
-        ) * (strength_thresh):
+        if (
+            result.pvalue < p_thresh
+            and abs(result.statistic) > baseline_mean * strength_thresh
+        ):
             failures.append((dimension_set, metric, result, unit))
 
     failure_report = "\n".join(
@@ -254,8 +298,10 @@ def ab_performance_test(a_revision, b_revision, test, p_thresh, strength_thresh)
         f"characteristics did not change across the tested commits, has a probability of {result.pvalue:.2%}. "
         f"Tested Dimensions:\n{json.dumps(dict(dimension_set), indent=2)}"
         for (dimension_set, metric, result, unit) in failures
+        # Sanity check as described above
+        if abs(statistics.mean(relative_changes_by_metric[metric])) > noise_threshold
     )
-    assert not failures, "\n" + failure_report
+    assert not failure_report, "\n" + failure_report
     print("No regressions detected!")
 
 
@@ -280,13 +326,16 @@ def canonicalize_revision(revision):
     parser.add_argument(
         "--significance",
         help="The p-value threshold that needs to be crossed for a test result to be considered significant",
+        type=float,
         default=0.01,
     )
     parser.add_argument(
         "--relative-strength",
         help="The minimal delta required before a regression will be considered valid",
-        default=0.2,
+        type=float,
+        default=0.0,
     )
+    parser.add_argument("--noise-threshold", type=float, default=0.05)
     args = parser.parse_args()
 
     ab_performance_test(
@@ -296,4 +345,5 @@ def canonicalize_revision(revision):
         args.test,
         args.significance,
         args.relative_strength,
+        args.noise_threshold,
     )

From 04d5f85006d902560d5e919a7b350d47cd22696a Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Fri, 6 Oct 2023 09:30:14 +0100
Subject: [PATCH 09/15] test(perf): Drop pytest timeouts

They are remnant of when the performance tests were run without pytest
parameterization, meaning all test scenarios were run inside of the same
pytest test. This required large timeouts. Now with pytest
parameterization, the runtime of each test is at most 30s, which
conformtably fits into pytest's default timeout of 6min.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 tests/integration_tests/performance/test_block_ab.py   | 1 -
 tests/integration_tests/performance/test_network_ab.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tests/integration_tests/performance/test_block_ab.py b/tests/integration_tests/performance/test_block_ab.py
index c7b6b34b7f6..f0946d4a996 100644
--- a/tests/integration_tests/performance/test_block_ab.py
+++ b/tests/integration_tests/performance/test_block_ab.py
@@ -138,7 +138,6 @@ def process_fio_logs(vm, fio_mode, logs_dir, metrics):
 
 
 @pytest.mark.nonci
-@pytest.mark.timeout(RUNTIME_SEC * 1000)  # 1.40 hours
 @pytest.mark.parametrize("vcpus", [1, 2], ids=["1vcpu", "2vcpu"])
 @pytest.mark.parametrize("fio_mode", ["randread", "randwrite"])
 @pytest.mark.parametrize("fio_block_size", [4096], ids=["bs4096"])
diff --git a/tests/integration_tests/performance/test_network_ab.py b/tests/integration_tests/performance/test_network_ab.py
index ec306d24fc6..46bf9516014 100644
--- a/tests/integration_tests/performance/test_network_ab.py
+++ b/tests/integration_tests/performance/test_network_ab.py
@@ -66,7 +66,6 @@ def network_microvm(request, microvm_factory, guest_kernel, rootfs):
 
 
 @pytest.mark.nonci
-@pytest.mark.timeout(3600)
 @pytest.mark.parametrize("network_microvm", [1], indirect=True)
 def test_network_latency(
     network_microvm, metrics

From 98ec7dd4a95b89ff17fc86a9472abd48bdc87553 Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Fri, 6 Oct 2023 13:21:55 +0100
Subject: [PATCH 10/15] test: Run Performance A/B-Tests on release branches

Since we no longer need to maintain separate sets of baselines for each
release branch, it becomes feasible to run performance tests on release
branches. These only trigger when PRs are merged.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 .github/workflows/trigger_ab_tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/trigger_ab_tests.yml b/.github/workflows/trigger_ab_tests.yml
index bed66f08ddf..bee5f4af191 100644
--- a/.github/workflows/trigger_ab_tests.yml
+++ b/.github/workflows/trigger_ab_tests.yml
@@ -2,6 +2,7 @@ on:
   push:
     branches:
       - main
+      - firecracker-v*
 
 jobs:
   trigger_ab_test:

From 9232614d552d814cee4afe9519cf061ee0e08f98 Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Mon, 9 Oct 2023 12:44:51 +0100
Subject: [PATCH 11/15] fix: Set EMF metrics key correctly

It lives in the "CloudWatchMetrics" subdictionary, not in the "_aws"
root dictionary. This was prevening metrics from showing up in the
correct namespace in the cloudwatch web interface (they were listed
under "local" instead of "PerfTests").

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 tests/host_tools/metrics.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/host_tools/metrics.py b/tests/host_tools/metrics.py
index ef636373295..7a5d15f1bfa 100644
--- a/tests/host_tools/metrics.py
+++ b/tests/host_tools/metrics.py
@@ -105,7 +105,8 @@ def emit_raw_emf(emf_msg: dict):
         "AWS_EMF_LOG_GROUP_NAME", f"{namespace}-metrics"
     )
     emf_msg["_aws"]["LogStreamName"] = os.environ.get("AWS_EMF_LOG_STREAM_NAME", "")
-    emf_msg["_aws"]["Namespace"] = namespace
+    for metrics in emf_msg["_aws"]["CloudWatchMetrics"]:
+        metrics["Namespace"] = namespace
 
     emf_endpoint = urlparse(os.environ["AWS_EMF_AGENT_ENDPOINT"])
     with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock:

From e6db719fc79f561a01bdb18d28a1cf90d2c19614 Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Fri, 6 Oct 2023 16:17:23 +0100
Subject: [PATCH 12/15] release: Add --reruns 2 to cmd_test invocation

To prevent flaky tests from failing the release.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 tools/devtool | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/devtool b/tools/devtool
index 4f82640238b..40193b7a9fe 100755
--- a/tools/devtool
+++ b/tools/devtool
@@ -486,7 +486,7 @@ cmd_build() {
 }
 
 function cmd_make_release {
-    cmd_test || die "Tests failed!"
+    cmd_test -- --reruns 2 || die "Tests failed!"
 
     run_devctr \
         --user "$(id -u):$(id -g)" \

From 2e22cf2128dda5beb1ac8f0bb2471199ae97db5d Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Fri, 6 Oct 2023 15:58:49 +0100
Subject: [PATCH 13/15] chore: Update RELEASE_POLICY.md

Release of 1.5 pushes 1.3 out of support.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 docs/RELEASE_POLICY.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/docs/RELEASE_POLICY.md b/docs/RELEASE_POLICY.md
index 235d932e2c5..5982d09f696 100644
--- a/docs/RELEASE_POLICY.md
+++ b/docs/RELEASE_POLICY.md
@@ -85,13 +85,14 @@ also be specifying the supported kernel versions.
 ## Release Status
 
 | Release | Release Date | Latest Patch | Min. end of support | Official end of Support        |
-| ------: | -----------: | -----------: | ------------------: | :----------------------------- |
-| v1.4    |   2023-07-20 | v1.4.1       |          2024-01-20 | Supported                      |
-| v1.3    |   2023-03-02 | v1.3.3       |          2023-09-02 | Supported                      |
-| v1.2    |   2022-11-30 | v1.2.1       |          2023-05-30 | 2023-07-20 (v1.4 released)     |
-| v1.1    |   2022-05-06 | v1.1.4       |          2022-11-06 | 2023-03-02 (v1.3 released)     |
-| v1.0    |   2022-01-31 | v1.0.2       |          2022-07-31 | 2022-11-30 (v1.2 released)     |
-| v0.25   |   2021-03-13 | v0.25.2      |          2021-09-13 | 2022-03-13 (end of 1y support) |
+|--------:|-------------:|-------------:|--------------------:|:-------------------------------|
+|    v1.5 |   2023-10-09 |       v1.5.0 |          2024-04-09 | Supported                      |
+|    v1.4 |   2023-07-20 |       v1.4.1 |          2024-01-20 | Supported                      |
+|    v1.3 |   2023-03-02 |       v1.3.3 |          2023-09-02 | 2023-10-09 (v1.5 released)     |
+|    v1.2 |   2022-11-30 |       v1.2.1 |          2023-05-30 | 2023-07-20 (v1.4 released)     |
+|    v1.1 |   2022-05-06 |       v1.1.4 |          2022-11-06 | 2023-03-02 (v1.3 released)     |
+|    v1.0 |   2022-01-31 |       v1.0.2 |          2022-07-31 | 2022-11-30 (v1.2 released)     |
+|   v0.25 |   2021-03-13 |      v0.25.2 |          2021-09-13 | 2022-03-13 (end of 1y support) |
 
 ## API support
 

From 40b96e2564c515865aca0d333683589b181917ee Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Fri, 6 Oct 2023 16:07:07 +0100
Subject: [PATCH 14/15] chore: bump version to 1.6.0-dev

We are starting 1.6.0 development once the 1.5.0 release is done.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 Cargo.lock                              | 100 +++++++++++++-----------
 docs/getting-started.md                 |   6 +-
 src/api_server/swagger/firecracker.yaml |   2 +-
 src/cpu-template-helper/Cargo.toml      |   2 +-
 src/firecracker/Cargo.toml              |   2 +-
 src/jailer/Cargo.toml                   |   2 +-
 src/rebase-snap/Cargo.toml              |   2 +-
 src/seccompiler/Cargo.toml              |   2 +-
 src/vmm/src/version_map.rs              |   6 ++
 tools/devtool                           |   2 +-
 10 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index ba2ea6c69e2..2575f349a0b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -168,7 +168,7 @@ version = "0.66.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "cexpr",
  "clang-sys",
  "lazy_static",
@@ -181,7 +181,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.28",
+ "syn 2.0.38",
  "which",
 ]
 
@@ -191,7 +191,7 @@ version = "0.68.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "cexpr",
  "clang-sys",
  "lazy_static",
@@ -202,7 +202,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.28",
+ "syn 2.0.38",
 ]
 
 [[package]]
@@ -213,9 +213,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.3.3"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
+checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
 
 [[package]]
 name = "cargo_toml"
@@ -345,7 +345,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.38",
 ]
 
 [[package]]
@@ -371,7 +371,7 @@ checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
 
 [[package]]
 name = "cpu-template-helper"
-version = "1.5.0-dev"
+version = "1.6.0-dev"
 dependencies = [
  "clap",
  "displaydoc",
@@ -476,7 +476,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.38",
 ]
 
 [[package]]
@@ -530,7 +530,7 @@ dependencies = [
 
 [[package]]
 name = "firecracker"
-version = "1.5.0-dev"
+version = "1.6.0-dev"
 dependencies = [
  "api_server",
  "bincode",
@@ -611,6 +611,15 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
 
+[[package]]
+name = "home"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb"
+dependencies = [
+ "windows-sys",
+]
+
 [[package]]
 name = "indexmap"
 version = "2.0.2"
@@ -648,7 +657,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
 dependencies = [
  "hermit-abi",
- "rustix 0.38.4",
+ "rustix 0.38.17",
  "windows-sys",
 ]
 
@@ -669,7 +678,7 @@ checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
 
 [[package]]
 name = "jailer"
-version = "1.5.0-dev"
+version = "1.6.0-dev"
 dependencies = [
  "libc",
  "nix",
@@ -729,9 +738,9 @@ dependencies = [
 
 [[package]]
 name = "libm"
-version = "0.2.7"
+version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4"
+checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
 
 [[package]]
 name = "linux-loader"
@@ -750,9 +759,9 @@ checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.4.3"
+version = "0.4.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
+checksum = "3852614a3bd9ca9804678ba6be5e3b8ce76dfc902cae004e3e0c44051b6e88db"
 
 [[package]]
 name = "log"
@@ -772,7 +781,7 @@ checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
 [[package]]
 name = "micro_http"
 version = "0.1.0"
-source = "git+https://github.com/firecracker-microvm/micro-http#0d0fdcd50ea10c1b4777f9a958873fc848a5b7bb"
+source = "git+https://github.com/firecracker-microvm/micro-http#a4d632f2c5ea45712c0d2002dc909a63879e85c3"
 dependencies = [
  "libc",
  "vmm-sys-util",
@@ -800,7 +809,7 @@ version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "cfg-if",
  "libc",
 ]
@@ -875,19 +884,19 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 
 [[package]]
 name = "prettyplease"
-version = "0.2.12"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62"
+checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
 dependencies = [
  "proc-macro2",
- "syn 2.0.28",
+ "syn 2.0.38",
 ]
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.67"
+version = "1.0.68"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328"
+checksum = "5b1106fec09662ec6dd98ccac0f81cef56984d0b49f75c92d8cbad76e20c005c"
 dependencies = [
  "unicode-ident",
 ]
@@ -898,7 +907,7 @@ version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7c003ac8c77cb07bb74f5f198bce836a689bcd5a42574612bf14d17bfd08c20e"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "lazy_static",
  "num-traits",
  "rand",
@@ -958,7 +967,7 @@ dependencies = [
 
 [[package]]
 name = "rebase-snap"
-version = "1.5.0-dev"
+version = "1.6.0-dev"
 dependencies = [
  "displaydoc",
  "libc",
@@ -1017,14 +1026,14 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.4"
+version = "0.38.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
+checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "errno",
  "libc",
- "linux-raw-sys 0.4.3",
+ "linux-raw-sys 0.4.8",
  "windows-sys",
 ]
 
@@ -1045,7 +1054,7 @@ dependencies = [
 
 [[package]]
 name = "seccompiler"
-version = "1.5.0-dev"
+version = "1.6.0-dev"
 dependencies = [
  "bincode",
  "displaydoc",
@@ -1082,7 +1091,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.38",
 ]
 
 [[package]]
@@ -1162,9 +1171,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.28"
+version = "2.0.38"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
+checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1188,7 +1197,7 @@ checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.38",
 ]
 
 [[package]]
@@ -1212,9 +1221,9 @@ dependencies = [
 
 [[package]]
 name = "toml"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bc1433177506450fe920e46a4f9812d0c211f5dd556da10e731a0a3dfa151f0"
+checksum = "185d8ab0dfbb35cf1399a6344d8484209c088f75f8f68230da55d48d95d43e3d"
 dependencies = [
  "serde",
  "serde_spanned",
@@ -1233,9 +1242,9 @@ dependencies = [
 
 [[package]]
 name = "toml_edit"
-version = "0.20.1"
+version = "0.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca676d9ba1a322c1b64eb8045a5ec5c0cfb0c9d08e15e9ff622589ad5221c8fe"
+checksum = "396e4d48bbb2b7554c944bde63101b5ae446cff6ec4a24227428f15eb72ef338"
 dependencies = [
  "indexmap",
  "serde",
@@ -1284,7 +1293,7 @@ version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b6a38c7e24af201e66f02659492f86ccd5efa9fd62f2c851fff3961a60e81966"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "cfg-if",
  "libc",
  "nix",
@@ -1405,7 +1414,7 @@ dependencies = [
  "aws-lc-rs",
  "base64",
  "bincode",
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "criterion",
  "derive_more",
  "device_tree",
@@ -1465,13 +1474,14 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "which"
-version = "4.4.0"
+version = "4.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269"
+checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
 dependencies = [
  "either",
- "libc",
+ "home",
  "once_cell",
+ "rustix 0.38.17",
 ]
 
 [[package]]
@@ -1573,9 +1583,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
 
 [[package]]
 name = "winnow"
-version = "0.5.15"
+version = "0.5.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c2e3184b9c4e92ad5167ca73039d0c42476302ab603e2fec4487511f38ccefc"
+checksum = "037711d82167854aff2018dfd193aa0fef5370f456732f0d5a0c59b0f1b4b907"
 dependencies = [
  "memchr",
 ]
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 9616201a2e8..e7987f5a177 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -76,13 +76,13 @@ with a Ubuntu 22.04 rootfs from our CI:
 ARCH="$(uname -m)"
 
 # Download a linux kernel binary
-wget https://s3.amazonaws.com/spec.ccfc.min/firecracker-ci/v1.5/${ARCH}/vmlinux-5.10.186
+wget https://s3.amazonaws.com/spec.ccfc.min/firecracker-ci/v1.6/${ARCH}/vmlinux-5.10.186
 
 # Download a rootfs
-wget https://s3.amazonaws.com/spec.ccfc.min/firecracker-ci/v1.5/${ARCH}/ubuntu-22.04.ext4
+wget https://s3.amazonaws.com/spec.ccfc.min/firecracker-ci/v1.6/${ARCH}/ubuntu-22.04.ext4
 
 # Download the ssh key for the rootfs
-wget https://s3.amazonaws.com/spec.ccfc.min/firecracker-ci/v1.5/${ARCH}/ubuntu-22.04.id_rsa
+wget https://s3.amazonaws.com/spec.ccfc.min/firecracker-ci/v1.6/${ARCH}/ubuntu-22.04.id_rsa
 
 # Set user read permission on the ssh key
 chmod 400 ./ubuntu-22.04.id_rsa
diff --git a/src/api_server/swagger/firecracker.yaml b/src/api_server/swagger/firecracker.yaml
index 9be57c05017..9519f710936 100644
--- a/src/api_server/swagger/firecracker.yaml
+++ b/src/api_server/swagger/firecracker.yaml
@@ -5,7 +5,7 @@ info:
     The API is accessible through HTTP calls on specific URLs
     carrying JSON modeled data.
     The transport medium is a Unix Domain Socket.
-  version: 1.5.0-dev
+  version: 1.6.0-dev
   termsOfService: ""
   contact:
     email: "compute-capsule@amazon.com"
diff --git a/src/cpu-template-helper/Cargo.toml b/src/cpu-template-helper/Cargo.toml
index f04f1fbbc9f..1d010652895 100644
--- a/src/cpu-template-helper/Cargo.toml
+++ b/src/cpu-template-helper/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cpu-template-helper"
-version = "1.5.0-dev"
+version = "1.6.0-dev"
 authors = ["Amazon Firecracker team <firecracker-devel@amazon.com>"]
 edition = "2021"
 license = "Apache-2.0"
diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml
index 105ff1b2f95..b1baa2fb412 100644
--- a/src/firecracker/Cargo.toml
+++ b/src/firecracker/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "firecracker"
-version = "1.5.0-dev"
+version = "1.6.0-dev"
 authors = ["Amazon Firecracker team <firecracker-devel@amazon.com>"]
 edition = "2021"
 build = "build.rs"
diff --git a/src/jailer/Cargo.toml b/src/jailer/Cargo.toml
index f0055157e51..16a12e788e9 100644
--- a/src/jailer/Cargo.toml
+++ b/src/jailer/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "jailer"
-version = "1.5.0-dev"
+version = "1.6.0-dev"
 authors = ["Amazon Firecracker team <firecracker-devel@amazon.com>"]
 edition = "2021"
 description = "Process for starting Firecracker in production scenarios; applies a cgroup/namespace isolation barrier and then drops privileges."
diff --git a/src/rebase-snap/Cargo.toml b/src/rebase-snap/Cargo.toml
index 955dbcb42c9..c66cec73d5f 100644
--- a/src/rebase-snap/Cargo.toml
+++ b/src/rebase-snap/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rebase-snap"
-version = "1.5.0-dev"
+version = "1.6.0-dev"
 authors = ["Amazon Firecracker team <firecracker-devel@amazon.com>"]
 edition = "2021"
 license = "Apache-2.0"
diff --git a/src/seccompiler/Cargo.toml b/src/seccompiler/Cargo.toml
index 876940e3339..99e52f2fc94 100644
--- a/src/seccompiler/Cargo.toml
+++ b/src/seccompiler/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "seccompiler"
-version = "1.5.0-dev"
+version = "1.6.0-dev"
 authors = ["Amazon Firecracker team <firecracker-devel@amazon.com>"]
 edition = "2021"
 description = "Program that compiles multi-threaded seccomp-bpf filters expressed as JSON into raw BPF programs, serializing them and outputting them to a file."
diff --git a/src/vmm/src/version_map.rs b/src/vmm/src/version_map.rs
index c433ddebbc9..30fd913647d 100644
--- a/src/vmm/src/version_map.rs
+++ b/src/vmm/src/version_map.rs
@@ -36,6 +36,8 @@ pub const FC_V1_3_SNAP_VERSION: u16 = 7;
 pub const FC_V1_4_SNAP_VERSION: u16 = 8;
 /// Snap version for Firecracker v1.5
 pub const FC_V1_5_SNAP_VERSION: u16 = 9;
+/// Snap version for Firecracker v1.6
+pub const FC_V1_6_SNAP_VERSION: u16 = 10;
 
 lazy_static! {
     // Note: until we have a better design, this needs to be updated when the version changes.
@@ -80,6 +82,9 @@ lazy_static! {
 
         version_map.set_type_version(VmState::type_id(), 2);
 
+        // v1.6 state change mappings
+        version_map.new_version();
+
         version_map
     };
 
@@ -107,6 +112,7 @@ lazy_static! {
         mapping.insert(Version::new(1, 3, 0), FC_V1_3_SNAP_VERSION);
         mapping.insert(Version::new(1, 4, 0), FC_V1_4_SNAP_VERSION);
         mapping.insert(Version::new(1, 5, 0), FC_V1_5_SNAP_VERSION);
+        mapping.insert(Version::new(1, 6, 0), FC_V1_6_SNAP_VERSION);
 
         mapping
     };
diff --git a/tools/devtool b/tools/devtool
index 40193b7a9fe..fdcc615a6d3 100755
--- a/tools/devtool
+++ b/tools/devtool
@@ -520,7 +520,7 @@ ensure_ci_artifacts() {
 
     # Fetch all the artifacts so they are local
     say "Fetching CI artifacts from S3"
-    S3_URL=s3://spec.ccfc.min/firecracker-ci/v1.5/$(uname -m)
+    S3_URL=s3://spec.ccfc.min/firecracker-ci/v1.6/$(uname -m)
     ARTIFACTS=$MICROVM_IMAGES_DIR/$(uname -m)
     if [ ! -d "$ARTIFACTS" ]; then
         mkdir -pv $ARTIFACTS

From 9dc52b714dc55d319bc8210e07a634b76700015b Mon Sep 17 00:00:00 2001
From: Patrick Roy <roypat@amazon.co.uk>
Date: Mon, 9 Oct 2023 16:25:10 +0100
Subject: [PATCH 15/15] Remove redundant min-version check from artifacts.py

With the new CI artifacts system, we no longer keep all firecracker
releases in a single S3 bucket, and instead keep a folder of supported
firecracker releases per artifacts folder. This means that in the 1.6
artifacts folder, only 1.4 and 1.5 release binaries exist. Therefore,
the additional check in firecracker_artifacts() is no longer needed. We
remove it, because it specifies the minimal version as a tuple, and is
thus easily missed when doign a release (as the runbook only hints that
sometimes min-versions are specified as strings).

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 tests/framework/artifacts.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/framework/artifacts.py b/tests/framework/artifacts.py
index a2e35da39fe..be91a0421ce 100644
--- a/tests/framework/artifacts.py
+++ b/tests/framework/artifacts.py
@@ -123,7 +123,6 @@ def firecracker_artifacts():
     cargo_version = get_firecracker_version_from_toml()
     # until the next minor version (but *not* including)
     max_version = (cargo_version.major, cargo_version.minor + 1, 0)
-    min_version = (1, 3, 0)
     prefix = "firecracker/firecracker-*"
     for firecracker in sorted(ARTIFACT_DIR.glob(prefix)):
         match = re.match(r"firecracker-v(\d+)\.(\d+)\.(\d+)", firecracker.name)
@@ -131,8 +130,6 @@ def firecracker_artifacts():
             continue
         fc = FirecrackerArtifact(firecracker)
         version = fc.version_tuple
-        if version < min_version:
-            continue
         if version >= max_version:
             continue
         yield pytest.param(fc, id=fc.name)