Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tweaks to reduce false positive rate of A/B-Tests #4147

Merged
merged 10 commits into from
Oct 9, 2023
1 change: 1 addition & 0 deletions .github/workflows/trigger_ab_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ on:
push:
branches:
- main
- firecracker-v*

jobs:
trigger_ab_test:
Expand Down
5 changes: 4 additions & 1 deletion tests/framework/ab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ def git_ab_test(
return result_a, result_b, comparison


def check_regression(a_samples: List[float], b_samples: List[float]):
def check_regression(
a_samples: List[float], b_samples: List[float], *, n_resamples: int = 9999
):
"""Checks for a regression by performing a permutation test. A permutation test is a non-parametric test that takes
three parameters: Two populations (sets of samples) and a function computing a "statistic" based on two populations.
First, the test computes the statistic for the initial populations. It then randomly
Expand All @@ -120,6 +122,7 @@ def check_regression(a_samples: List[float], b_samples: List[float]):
# Compute the difference of means, such that a positive different indicates potential for regression.
lambda x, y: statistics.mean(y) - statistics.mean(x),
vectorized=False,
n_resamples=n_resamples,
)


Expand Down
54 changes: 32 additions & 22 deletions tests/framework/utils_iperf.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,13 @@ def run_test(self, first_free_cpu):
assert self._num_clients < CpuMap.len() - self._microvm.vcpus_count - 2

for server_idx in range(self._num_clients):
cmd = self.host_command(server_idx).build()
assigned_cpu = CpuMap(first_free_cpu)
utils.run_cmd(
f"taskset --cpu-list {assigned_cpu} {self._microvm.jailer.netns_cmd_prefix()} {cmd}"
cmd = (
self.host_command(server_idx)
.with_arg("--affinity", assigned_cpu)
.build()
)
utils.run_cmd(f"{self._microvm.jailer.netns_cmd_prefix()} {cmd}")
first_free_cpu += 1

time.sleep(SERVER_STARTUP_TIME_SEC)
Expand Down Expand Up @@ -105,12 +107,14 @@ def spawn_iperf3_client(self, client_idx):
mode = MODE_MAP[self._mode][client_idx % len(MODE_MAP[self._mode])]

# Add the port where the iperf3 client is going to send/receive.
cmd = self.guest_command(client_idx).with_arg(mode).build()

pinned_cmd = (
f"taskset --cpu-list {client_idx % self._microvm.vcpus_count} {cmd}"
cmd = (
self.guest_command(client_idx)
.with_arg(mode)
.with_arg("--affinity", client_idx % self._microvm.vcpus_count)
.build()
)
rc, stdout, stderr = self._microvm.ssh.run(pinned_cmd)

rc, stdout, stderr = self._microvm.ssh.run(cmd)

assert rc == 0, stderr

Expand Down Expand Up @@ -176,18 +180,24 @@ def emit_iperf3_metrics(metrics, iperf_result, omit):
)[0]:
metrics.put_metric("cpu_utilization_vmm", cpu_util_data_point, "Percent")

for time_series in iperf_result["g2h"]:
for interval in time_series["intervals"][omit:]:
metrics.put_metric(
"throughput_guest_to_host",
interval["sum"]["bits_per_second"],
"Bits/Second",
)
data_points = zip(
*[time_series["intervals"][omit:] for time_series in iperf_result["g2h"]]
)

for time_series in iperf_result["h2g"]:
for interval in time_series["intervals"][omit:]:
metrics.put_metric(
"throughput_host_to_guest",
interval["sum"]["bits_per_second"],
"Bits/Second",
)
for point_in_time in data_points:
metrics.put_metric(
"throughput_guest_to_host",
sum(interval["sum"]["bits_per_second"] for interval in point_in_time),
"Bits/Second",
)

data_points = zip(
*[time_series["intervals"][omit:] for time_series in iperf_result["h2g"]]
)

for point_in_time in data_points:
metrics.put_metric(
"throughput_host_to_guest",
sum(interval["sum"]["bits_per_second"] for interval in point_in_time),
"Bits/Second",
)
3 changes: 2 additions & 1 deletion tests/host_tools/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ def emit_raw_emf(emf_msg: dict):
"AWS_EMF_LOG_GROUP_NAME", f"{namespace}-metrics"
)
emf_msg["_aws"]["LogStreamName"] = os.environ.get("AWS_EMF_LOG_STREAM_NAME", "")
emf_msg["_aws"]["Namespace"] = namespace
for metrics in emf_msg["_aws"]["CloudWatchMetrics"]:
metrics["Namespace"] = namespace

emf_endpoint = urlparse(os.environ["AWS_EMF_AGENT_ENDPOINT"])
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock:
Expand Down
25 changes: 19 additions & 6 deletions tests/integration_tests/performance/test_block_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,25 +106,38 @@ def run_fio(microvm, mode, block_size):

def process_fio_logs(vm, fio_mode, logs_dir, metrics):
"""Parses the fio logs in `{logs_dir}/{fio_mode}_bw.*.log and emits their contents as CloudWatch metrics"""
for job_id in range(vm.vcpus_count):
data = Path(f"{logs_dir}/{fio_mode}_bw.{job_id + 1}.log").read_text("UTF-8")

for line in data.splitlines():
data = [
Path(f"{logs_dir}/{fio_mode}_bw.{job_id + 1}.log")
.read_text("UTF-8")
.splitlines()
for job_id in range(vm.vcpus_count)
]

for tup in zip(*data):
bw_read = 0
bw_write = 0

for line in tup:
_, value, direction, _ = line.split(",", maxsplit=3)
value = int(value.strip())

# See https://fio.readthedocs.io/en/latest/fio_doc.html#log-file-formats
match direction.strip():
case "0":
metrics.put_metric("bw_read", value, "Kilobytes/Second")
bw_read += value
case "1":
metrics.put_metric("bw_write", value, "Kilobytes/Second")
bw_write += value
case _:
assert False

if bw_read:
metrics.put_metric("bw_read", bw_read, "Kilobytes/Second")
if bw_write:
metrics.put_metric("bw_write", bw_write, "Kilobytes/Second")


@pytest.mark.nonci
@pytest.mark.timeout(RUNTIME_SEC * 1000) # 1.40 hours
@pytest.mark.parametrize("vcpus", [1, 2], ids=["1vcpu", "2vcpu"])
@pytest.mark.parametrize("fio_mode", ["randread", "randwrite"])
@pytest.mark.parametrize("fio_block_size", [4096], ids=["bs4096"])
Expand Down
85 changes: 38 additions & 47 deletions tests/integration_tests/performance/test_network_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import pytest

from framework.utils import CpuMap
zulinx86 marked this conversation as resolved.
Show resolved Hide resolved
from framework.utils_iperf import IPerf3Test, emit_iperf3_metrics

# each iteration is 30 * 0.2s = 6s
Expand Down Expand Up @@ -46,50 +45,57 @@ def consume_ping_output(ping_putput):
yield float(time[0])


@pytest.mark.nonci
@pytest.mark.timeout(3600)
def test_network_latency(microvm_factory, guest_kernel, rootfs, metrics):
"""
Test network latency for multiple vm configurations.

Send a ping from the guest to the host.
"""
@pytest.fixture
def network_microvm(request, microvm_factory, guest_kernel, rootfs):
"""Creates a microvm with the networking setup used by the performance tests in this file.

This fixture receives its vcpu count via indirect parameterization"""
vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
vm.spawn(log_level="Info")
vm.basic_config(vcpu_count=GUEST_VCPUS, mem_size_mib=GUEST_MEM_MIB)
iface = vm.add_net_iface()
vm.basic_config(vcpu_count=request.param, mem_size_mib=GUEST_MEM_MIB)
vm.add_net_iface()
vm.start()

# Check if the needed CPU cores are available. We have the API thread, VMM
# thread and then one thread for each configured vCPU.
assert CpuMap.len() >= 2 + vm.vcpus_count

# Pin uVM threads to physical cores.
assert vm.pin_vmm(0), "Failed to pin firecracker thread."
assert vm.pin_api(1), "Failed to pin fc_api thread."
for i in range(vm.vcpus_count):
assert vm.pin_vcpu(i, i + 2), f"Failed to pin fc_vcpu {i} thread."

return vm


@pytest.mark.nonci
@pytest.mark.parametrize("network_microvm", [1], indirect=True)
def test_network_latency(
network_microvm, metrics
): # pylint:disable=redefined-outer-name
"""
Test network latency for multiple vm configurations.

Send a ping from the guest to the host.
"""

samples = []
host_ip = network_microvm.iface["eth0"]["iface"].host_ip

for _ in range(ITERATIONS):
rc, ping_output, stderr = vm.ssh.run(
f"ping -c {REQUEST_PER_ITERATION} -i {DELAY} {iface.host_ip}"
rc, ping_output, stderr = network_microvm.ssh.run(
f"ping -c {REQUEST_PER_ITERATION} -i {DELAY} {host_ip}"
)
assert rc == 0, stderr

samples.extend(consume_ping_output(ping_output))

metrics.set_dimensions(
{"performance_test": "test_network_latency", **vm.dimensions}
{"performance_test": "test_network_latency", **network_microvm.dimensions}
)

for sample in samples:
metrics.put_metric("ping_latency", sample, "Milliseconds")


class TCPIPerf3Test(IPerf3Test):
class TcpIPerf3Test(IPerf3Test):
"""IPerf3 runner for the TCP throughput performance test"""

BASE_PORT = 5000
Expand Down Expand Up @@ -120,55 +126,40 @@ def __init__(self, microvm, mode, host_ip, payload_length):

@pytest.mark.nonci
@pytest.mark.timeout(3600)
@pytest.mark.parametrize("vcpus", [1, 2])
@pytest.mark.parametrize("network_microvm", [1, 2], indirect=True)
@pytest.mark.parametrize("payload_length", ["128K", "1024K"], ids=["p128K", "p1024K"])
@pytest.mark.parametrize("mode", ["g2h", "h2g", "bd"])
def test_network_tcp_throughput(
microvm_factory,
guest_kernel,
rootfs,
vcpus,
network_microvm,
payload_length,
mode,
metrics,
):
): # pylint:disable=redefined-outer-name
pb8o marked this conversation as resolved.
Show resolved Hide resolved
"""
Iperf between guest and host in both directions for TCP workload.
"""

# We run bi-directional tests only on uVM with more than 2 vCPus
# because we need to pin one iperf3/direction per vCPU, and since we
# have two directions, we need at least two vCPUs.
if mode == "bd" and vcpus < 2:
if mode == "bd" and network_microvm.vcpus_count < 2:
pytest.skip("bidrectional test only done with at least 2 vcpus")

vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
vm.spawn(log_level="Info")
vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB)
iface = vm.add_net_iface()
vm.start()

# Check if the needed CPU cores are available. We have the API thread, VMM
# thread and then one thread for each configured vCPU. Lastly, we need one for
# the iperf server on the host.
assert CpuMap.len() > 2 + vm.vcpus_count

# Pin uVM threads to physical cores.
assert vm.pin_vmm(0), "Failed to pin firecracker thread."
assert vm.pin_api(1), "Failed to pin fc_api thread."
for i in range(vm.vcpus_count):
assert vm.pin_vcpu(i, i + 2), f"Failed to pin fc_vcpu {i} thread."

test = TCPIPerf3Test(vm, mode, iface.host_ip, payload_length)
data = test.run_test(vm.vcpus_count + 2)
test = TcpIPerf3Test(
network_microvm,
mode,
network_microvm.iface["eth0"]["iface"].host_ip,
payload_length,
)
data = test.run_test(network_microvm.vcpus_count + 2)

metrics.set_dimensions(
{
"performance_test": "test_network_tcp_throughput",
"payload_length": payload_length,
"mode": mode,
**vm.dimensions,
**network_microvm.dimensions,
}
)

emit_iperf3_metrics(metrics, data, TCPIPerf3Test.WARMUP_SEC)
emit_iperf3_metrics(metrics, data, TcpIPerf3Test.WARMUP_SEC)
6 changes: 0 additions & 6 deletions tests/integration_tests/performance/test_vsock_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import pytest

from framework.utils import CpuMap
from framework.utils_iperf import IPerf3Test, emit_iperf3_metrics
from framework.utils_vsock import VSOCK_UDS_PATH, make_host_port_path

Expand Down Expand Up @@ -93,11 +92,6 @@ def test_vsock_throughput(
vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/" + VSOCK_UDS_PATH)
vm.start()

# Check if the needed CPU cores are available. We have the API thread, VMM
# thread and then one thread for each configured vCPU. Lastly, we need one for
# the iperf server on the host.
assert CpuMap.len() > 2 + vm.vcpus_count

# Pin uVM threads to physical cores.
assert vm.pin_vmm(0), "Failed to pin firecracker thread."
assert vm.pin_api(1), "Failed to pin fc_api thread."
Expand Down
Loading