Skip to content

Commit

Permalink
Merge branch 'main' into prepare-1-5
Browse files Browse the repository at this point in the history
  • Loading branch information
roypat authored Oct 9, 2023
2 parents 7c289ee + 9232614 commit d67ef18
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 90 deletions.
1 change: 1 addition & 0 deletions .github/workflows/trigger_ab_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ on:
push:
branches:
- main
- firecracker-v*

jobs:
trigger_ab_test:
Expand Down
5 changes: 4 additions & 1 deletion tests/framework/ab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ def git_ab_test(
return result_a, result_b, comparison


def check_regression(a_samples: List[float], b_samples: List[float]):
def check_regression(
a_samples: List[float], b_samples: List[float], *, n_resamples: int = 9999
):
"""Checks for a regression by performing a permutation test. A permutation test is a non-parametric test that takes
three parameters: Two populations (sets of samples) and a function computing a "statistic" based on two populations.
First, the test computes the statistic for the initial populations. It then randomly
Expand All @@ -120,6 +122,7 @@ def check_regression(a_samples: List[float], b_samples: List[float]):
# Compute the difference of means, such that a positive different indicates potential for regression.
lambda x, y: statistics.mean(y) - statistics.mean(x),
vectorized=False,
n_resamples=n_resamples,
)


Expand Down
54 changes: 32 additions & 22 deletions tests/framework/utils_iperf.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,13 @@ def run_test(self, first_free_cpu):
assert self._num_clients < CpuMap.len() - self._microvm.vcpus_count - 2

for server_idx in range(self._num_clients):
cmd = self.host_command(server_idx).build()
assigned_cpu = CpuMap(first_free_cpu)
utils.run_cmd(
f"taskset --cpu-list {assigned_cpu} {self._microvm.jailer.netns_cmd_prefix()} {cmd}"
cmd = (
self.host_command(server_idx)
.with_arg("--affinity", assigned_cpu)
.build()
)
utils.run_cmd(f"{self._microvm.jailer.netns_cmd_prefix()} {cmd}")
first_free_cpu += 1

time.sleep(SERVER_STARTUP_TIME_SEC)
Expand Down Expand Up @@ -105,12 +107,14 @@ def spawn_iperf3_client(self, client_idx):
mode = MODE_MAP[self._mode][client_idx % len(MODE_MAP[self._mode])]

# Add the port where the iperf3 client is going to send/receive.
cmd = self.guest_command(client_idx).with_arg(mode).build()

pinned_cmd = (
f"taskset --cpu-list {client_idx % self._microvm.vcpus_count} {cmd}"
cmd = (
self.guest_command(client_idx)
.with_arg(mode)
.with_arg("--affinity", client_idx % self._microvm.vcpus_count)
.build()
)
rc, stdout, stderr = self._microvm.ssh.run(pinned_cmd)

rc, stdout, stderr = self._microvm.ssh.run(cmd)

assert rc == 0, stderr

Expand Down Expand Up @@ -176,18 +180,24 @@ def emit_iperf3_metrics(metrics, iperf_result, omit):
)[0]:
metrics.put_metric("cpu_utilization_vmm", cpu_util_data_point, "Percent")

for time_series in iperf_result["g2h"]:
for interval in time_series["intervals"][omit:]:
metrics.put_metric(
"throughput_guest_to_host",
interval["sum"]["bits_per_second"],
"Bits/Second",
)
data_points = zip(
*[time_series["intervals"][omit:] for time_series in iperf_result["g2h"]]
)

for time_series in iperf_result["h2g"]:
for interval in time_series["intervals"][omit:]:
metrics.put_metric(
"throughput_host_to_guest",
interval["sum"]["bits_per_second"],
"Bits/Second",
)
for point_in_time in data_points:
metrics.put_metric(
"throughput_guest_to_host",
sum(interval["sum"]["bits_per_second"] for interval in point_in_time),
"Bits/Second",
)

data_points = zip(
*[time_series["intervals"][omit:] for time_series in iperf_result["h2g"]]
)

for point_in_time in data_points:
metrics.put_metric(
"throughput_host_to_guest",
sum(interval["sum"]["bits_per_second"] for interval in point_in_time),
"Bits/Second",
)
3 changes: 2 additions & 1 deletion tests/host_tools/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ def emit_raw_emf(emf_msg: dict):
"AWS_EMF_LOG_GROUP_NAME", f"{namespace}-metrics"
)
emf_msg["_aws"]["LogStreamName"] = os.environ.get("AWS_EMF_LOG_STREAM_NAME", "")
emf_msg["_aws"]["Namespace"] = namespace
for metrics in emf_msg["_aws"]["CloudWatchMetrics"]:
metrics["Namespace"] = namespace

emf_endpoint = urlparse(os.environ["AWS_EMF_AGENT_ENDPOINT"])
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock:
Expand Down
25 changes: 19 additions & 6 deletions tests/integration_tests/performance/test_block_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,25 +106,38 @@ def run_fio(microvm, mode, block_size):

def process_fio_logs(vm, fio_mode, logs_dir, metrics):
"""Parses the fio logs in `{logs_dir}/{fio_mode}_bw.*.log and emits their contents as CloudWatch metrics"""
for job_id in range(vm.vcpus_count):
data = Path(f"{logs_dir}/{fio_mode}_bw.{job_id + 1}.log").read_text("UTF-8")

for line in data.splitlines():
data = [
Path(f"{logs_dir}/{fio_mode}_bw.{job_id + 1}.log")
.read_text("UTF-8")
.splitlines()
for job_id in range(vm.vcpus_count)
]

for tup in zip(*data):
bw_read = 0
bw_write = 0

for line in tup:
_, value, direction, _ = line.split(",", maxsplit=3)
value = int(value.strip())

# See https://fio.readthedocs.io/en/latest/fio_doc.html#log-file-formats
match direction.strip():
case "0":
metrics.put_metric("bw_read", value, "Kilobytes/Second")
bw_read += value
case "1":
metrics.put_metric("bw_write", value, "Kilobytes/Second")
bw_write += value
case _:
assert False

if bw_read:
metrics.put_metric("bw_read", bw_read, "Kilobytes/Second")
if bw_write:
metrics.put_metric("bw_write", bw_write, "Kilobytes/Second")


@pytest.mark.nonci
@pytest.mark.timeout(RUNTIME_SEC * 1000) # 1.40 hours
@pytest.mark.parametrize("vcpus", [1, 2], ids=["1vcpu", "2vcpu"])
@pytest.mark.parametrize("fio_mode", ["randread", "randwrite"])
@pytest.mark.parametrize("fio_block_size", [4096], ids=["bs4096"])
Expand Down
85 changes: 38 additions & 47 deletions tests/integration_tests/performance/test_network_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import pytest

from framework.utils import CpuMap
from framework.utils_iperf import IPerf3Test, emit_iperf3_metrics

# each iteration is 30 * 0.2s = 6s
Expand Down Expand Up @@ -46,50 +45,57 @@ def consume_ping_output(ping_putput):
yield float(time[0])


@pytest.mark.nonci
@pytest.mark.timeout(3600)
def test_network_latency(microvm_factory, guest_kernel, rootfs, metrics):
"""
Test network latency for multiple vm configurations.
Send a ping from the guest to the host.
"""
@pytest.fixture
def network_microvm(request, microvm_factory, guest_kernel, rootfs):
"""Creates a microvm with the networking setup used by the performance tests in this file.
This fixture receives its vcpu count via indirect parameterization"""
vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
vm.spawn(log_level="Info")
vm.basic_config(vcpu_count=GUEST_VCPUS, mem_size_mib=GUEST_MEM_MIB)
iface = vm.add_net_iface()
vm.basic_config(vcpu_count=request.param, mem_size_mib=GUEST_MEM_MIB)
vm.add_net_iface()
vm.start()

# Check if the needed CPU cores are available. We have the API thread, VMM
# thread and then one thread for each configured vCPU.
assert CpuMap.len() >= 2 + vm.vcpus_count

# Pin uVM threads to physical cores.
assert vm.pin_vmm(0), "Failed to pin firecracker thread."
assert vm.pin_api(1), "Failed to pin fc_api thread."
for i in range(vm.vcpus_count):
assert vm.pin_vcpu(i, i + 2), f"Failed to pin fc_vcpu {i} thread."

return vm


@pytest.mark.nonci
@pytest.mark.parametrize("network_microvm", [1], indirect=True)
def test_network_latency(
network_microvm, metrics
): # pylint:disable=redefined-outer-name
"""
Test network latency for multiple vm configurations.
Send a ping from the guest to the host.
"""

samples = []
host_ip = network_microvm.iface["eth0"]["iface"].host_ip

for _ in range(ITERATIONS):
rc, ping_output, stderr = vm.ssh.run(
f"ping -c {REQUEST_PER_ITERATION} -i {DELAY} {iface.host_ip}"
rc, ping_output, stderr = network_microvm.ssh.run(
f"ping -c {REQUEST_PER_ITERATION} -i {DELAY} {host_ip}"
)
assert rc == 0, stderr

samples.extend(consume_ping_output(ping_output))

metrics.set_dimensions(
{"performance_test": "test_network_latency", **vm.dimensions}
{"performance_test": "test_network_latency", **network_microvm.dimensions}
)

for sample in samples:
metrics.put_metric("ping_latency", sample, "Milliseconds")


class TCPIPerf3Test(IPerf3Test):
class TcpIPerf3Test(IPerf3Test):
"""IPerf3 runner for the TCP throughput performance test"""

BASE_PORT = 5000
Expand Down Expand Up @@ -120,55 +126,40 @@ def __init__(self, microvm, mode, host_ip, payload_length):

@pytest.mark.nonci
@pytest.mark.timeout(3600)
@pytest.mark.parametrize("vcpus", [1, 2])
@pytest.mark.parametrize("network_microvm", [1, 2], indirect=True)
@pytest.mark.parametrize("payload_length", ["128K", "1024K"], ids=["p128K", "p1024K"])
@pytest.mark.parametrize("mode", ["g2h", "h2g", "bd"])
def test_network_tcp_throughput(
microvm_factory,
guest_kernel,
rootfs,
vcpus,
network_microvm,
payload_length,
mode,
metrics,
):
): # pylint:disable=redefined-outer-name
"""
Iperf between guest and host in both directions for TCP workload.
"""

# We run bi-directional tests only on uVM with more than 2 vCPus
# because we need to pin one iperf3/direction per vCPU, and since we
# have two directions, we need at least two vCPUs.
if mode == "bd" and vcpus < 2:
if mode == "bd" and network_microvm.vcpus_count < 2:
pytest.skip("bidrectional test only done with at least 2 vcpus")

vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
vm.spawn(log_level="Info")
vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB)
iface = vm.add_net_iface()
vm.start()

# Check if the needed CPU cores are available. We have the API thread, VMM
# thread and then one thread for each configured vCPU. Lastly, we need one for
# the iperf server on the host.
assert CpuMap.len() > 2 + vm.vcpus_count

# Pin uVM threads to physical cores.
assert vm.pin_vmm(0), "Failed to pin firecracker thread."
assert vm.pin_api(1), "Failed to pin fc_api thread."
for i in range(vm.vcpus_count):
assert vm.pin_vcpu(i, i + 2), f"Failed to pin fc_vcpu {i} thread."

test = TCPIPerf3Test(vm, mode, iface.host_ip, payload_length)
data = test.run_test(vm.vcpus_count + 2)
test = TcpIPerf3Test(
network_microvm,
mode,
network_microvm.iface["eth0"]["iface"].host_ip,
payload_length,
)
data = test.run_test(network_microvm.vcpus_count + 2)

metrics.set_dimensions(
{
"performance_test": "test_network_tcp_throughput",
"payload_length": payload_length,
"mode": mode,
**vm.dimensions,
**network_microvm.dimensions,
}
)

emit_iperf3_metrics(metrics, data, TCPIPerf3Test.WARMUP_SEC)
emit_iperf3_metrics(metrics, data, TcpIPerf3Test.WARMUP_SEC)
6 changes: 0 additions & 6 deletions tests/integration_tests/performance/test_vsock_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import pytest

from framework.utils import CpuMap
from framework.utils_iperf import IPerf3Test, emit_iperf3_metrics
from framework.utils_vsock import VSOCK_UDS_PATH, make_host_port_path

Expand Down Expand Up @@ -93,11 +92,6 @@ def test_vsock_throughput(
vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/" + VSOCK_UDS_PATH)
vm.start()

# Check if the needed CPU cores are available. We have the API thread, VMM
# thread and then one thread for each configured vCPU. Lastly, we need one for
# the iperf server on the host.
assert CpuMap.len() > 2 + vm.vcpus_count

# Pin uVM threads to physical cores.
assert vm.pin_vmm(0), "Failed to pin firecracker thread."
assert vm.pin_api(1), "Failed to pin fc_api thread."
Expand Down
Loading

0 comments on commit d67ef18

Please sign in to comment.