diff --git a/tests/README.md b/tests/README.md index 97885fa456c1..49986fcab4de 100644 --- a/tests/README.md +++ b/tests/README.md @@ -192,31 +192,23 @@ source tree. This directory is bind-mounted in the container and used as a local image cache. `Q5:` -*Is there a way to speed up integration tests execution time?* -`A5:` -You can speed up tests execution time with any of these: - -`Q6:` *How can I get live logger output from the tests?* -`A6:` +`A5:` Accessing **pytest.ini** will allow you to modify logger settings. -1. Run the tests from inside the container and set the environment variable - `KEEP_TEST_SESSION` to a non-empty value. - Each **Testrun** begins by building the firecracker and unit tests binaries, - and ends by deleting all the built artifacts. - If you run the tests [from inside the container](#running), you can prevent - the binaries from being deleted exporting the `KEEP_TEST_SESSION` variable. - This way, all the following **Testrun** will be significantly faster as they - will not need to rebuild everything. - If any Rust source file is changed, the build is done incrementally. +`Q6:` +*Is there a way to speed up integration tests execution time?* + +`A6:` +You can narrow down the test selection as described in the **Running** +section, or in the **Troubleshooting Tests** section. For example: 1. Pass the `-k substring` option to Pytest to only run a subset of tests by specifying a part of their name. -1. Only run the tests contained in a file or directory, as specified in the - **Running** section. +1. Only run the tests contained in a file or directory. + ## Implementation Goals @@ -241,7 +233,6 @@ Pytest was chosen because: ### Features -- A fixture for interacting with microvms via SSH. - Use the Firecracker Open API spec to populate Microvm API resource URLs. - Do the testrun in a container for better insulation. - Event-based monitoring of microvm socket file creation to avoid while spins. @@ -261,3 +252,95 @@ Pytest was chosen because: ## Further Reading Contributing to this testing system requires a dive deep on `pytest`. + +## Troubleshooting tests + +### How to select tests + +When troubleshooting tests, it is important to only narrow down the ones that +are of interest. `pytest` offers several features to do that: + +#### single file + +```sh +./tools/devtool -y test -- integration_tests/performance/test_boottime.py +``` + +#### single test + +```sh +./tools/devtool -y test -- integration_tests/performance/test_boottime.py::test_boottime +``` + +#### single test + parameter(s) + +Use the `-k` parameter to match part of the test (including the parameters!): + +```sh +./tools/devtool -y test -- -k 1024 integration_tests/performance/test_boottime.py::test_boottime +``` + +#### --last-failed + +One can use the `--last-failed` parameter to only run the tests that failed from +the previous run. Useful when several tests fail after making large changes. + +### Run tests from within the container + +To avoid having to enter/exit Docker every test run, you can run the tests +directly within a Docker session: + +```sh +./tools/devtool -y shell --privileged +./tools/test.sh integration_tests/functional/test_api.py +``` + +### How to use the Python debugger (pdb) for debugging + +Just append `--pdb`, and when a test fails it will drop you in pdb, where you +can examine local variables and the stack, and can use the normal Python REPL. + +``` +./tools/devtool -y test -- -k 1024 integration_tests/performance/test_boottime.py::test_boottime --pdb +``` + +### How to use ipython's ipdb instead of pdb + +```sh +./tools/devtool -y shell --privileged +pip3 install ipython +export PYTEST_ADDOPTS=--pdbcls=IPython.terminal.debugger:TerminalPdb +./tools/test.sh -k 1024 integration_tests/performance/test_boottime.py::test_boottime +``` + +### How to debug tests + +```sh +./tools/devtool -y test_debug -k 1024 integration_tests/performance/test_boottime.py::test_boottime +``` + +### How to reproduce intermittent (aka flaky) tests + +Just run the test in a loop, and make it drop you into pdb when it fails. + +```sh +while true; do + ./tools/devtool -y test -- integration_tests/functional/test_balloon.py::test_deflate_on_oom -k False --pdb +done +``` + +### How to run tests in parallel with `-n` + +We can run the tests in parallel via `pytest-xdist`. Not all tests can run in +parallel (the ones in `build` and `performance` are not supposed to run in +parallel). + +By default, the tests run sequentially. One can use the `-n` to control the +parallelism. Just `-n` will run as many workers as CPUs, which may be too many. +As a rough heuristic, use half the available CPUs. I use -n4 for my 8 CPU +(HT-enabled) laptop. In metals 8 is a good number; more than that just gives +diminishing returns. + +```sh +./tools/devtool -y test -- integration_tests/functional -n$(expr $(nproc) / 2) --dist worksteal +``` diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index d592293bc193..55b557c24248 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -35,6 +35,7 @@ from framework.defs import FC_PID_FILE_NAME, MAX_API_CALL_DURATION_MS from framework.http_api import Api from framework.jailer import JailerContext +from framework.microvm_helpers import MicrovmHelpers from framework.properties import global_props from host_tools.memory import MemoryMonitor @@ -169,6 +170,7 @@ def __init__( self.rootfs_file = None self.ssh_key = None self.initrd_file = None + self.boot_args = None # The binaries this microvm will use to start. if fc_binary_path is None: @@ -221,6 +223,8 @@ def __init__( # MMDS content from file self.metadata_file = None + self.help = MicrovmHelpers(self) + def __repr__(self): return f"" @@ -576,9 +580,11 @@ def basic_config( if self.memory_monitor: self.memory_monitor.start() + if boot_args is not None: + self.boot_args = boot_args boot_source_args = { "kernel_image_path": self.create_jailed_resource(self.kernel_file), - "boot_args": boot_args, + "boot_args": self.boot_args, } if use_initrd and self.initrd_file is not None: diff --git a/tests/framework/microvm_helpers.py b/tests/framework/microvm_helpers.py new file mode 100644 index 000000000000..fdc821376223 --- /dev/null +++ b/tests/framework/microvm_helpers.py @@ -0,0 +1,162 @@ +# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Microvm helper functions for interactive use""" + +import os +import platform +import subprocess + + +class DockerInfo: + """Class to extract information from the Docker environment""" + + @property + def ip(self): + """Return this container's IP address""" + return ( + subprocess.check_output( + "ip -j address show eth0 |jq -r '.[].addr_info[].local'", + shell=True, + ) + .decode("ascii") + .strip() + ) + + @property + def id(self): + """Return this container's id""" + return platform.node() + + +DOCKER = DockerInfo() + + +class MicrovmHelpers: + """Microvm helper functions for interactive use""" + + def __init__(self, vm): + self.vm = vm + + def print_log(self): + """Print Firecracker's log""" + print(self.vm.log_data) + + def resize_disk(self, disk, size: int = 2**30): + """Resize a filesystem + + The filesystem should be unmounted for this to work + """ + os.truncate(disk, size) + subprocess.check_output(["resize2fs", disk]) + + def gdbserver(self, port=2000): + """Attach gdbserver to the FC process + + See https://sourceware.org/gdb/current/onlinedocs/gdb.html/Remote-Debugging.html#Remote-Debugging + """ + comm = f"localhost:{port}" + subprocess.Popen(["gdbserver", "--attach", comm, str(self.vm.jailer_clone_pid)]) + print(f"Connect gdb with:\n\tgdb --ex 'target remote {DOCKER.ip}:{port}'") + + def lldbserver(self, port=2001): + """Attach lldb-server to the FC process + + See https://lldb.llvm.org/use/remote.html + + TBD does not work. Fails with + error: attach failed: lost connection + """ + # Unlike gdbserver, lldb-server is not a separate package, but is part + # of lldb and it's about ~400MB to install, so we don't include it in + # the devctr + subprocess.run("apt update && apt install lldb", shell=True, check=True) + subprocess.Popen(["lldb-server", "p", "--listen", f"*:{port}", "--server"]) + print( + f"Connect lldb with\n\tlldb -o 'platform select remote-linux' -o 'platform connect connect://{DOCKER.ip}:{port}' -o 'attach {self.vm.jailer_clone_pid}'" + ) + + def tmux_neww(self, cmd: str): + """Open a window in the local tmux""" + return subprocess.run(["tmux", "neww", cmd], check=True) + + def how_to_ssh(self): + """Print how to SSH to the microvm + + This may be useful for example to get a terminal + """ + ip = self.vm.iface["eth0"]["iface"].guest_ip + return f"ip netns exec {self.vm.jailer.netns} ssh -o StrictHostKeyChecking=no -i {self.vm.ssh_key} root@{ip}" + + def tmux_ssh(self): + """Open a tmux window with an SSH session to the VM""" + return self.tmux_neww(self.how_to_ssh()) + + def enable_console(self): + """Helper method to attach a console, before the machine boots""" + if self.vm.api is not None: + raise RuntimeError(".spawn already called, too late to enable the console") + if self.vm.boot_args is None: + self.vm.boot_args = "" + self.vm.boot_args += "console=ttyS0 reboot=k panic=1" + self.vm.jailer.daemonize = False + + def how_to_console(self): + """Print how to connect to the VM console""" + return f"screen -dR {self.vm.screen_session}" + + def tmux_console(self): + """Open a tmux window with the console""" + return self.tmux_neww(self.how_to_console()) + + def how_to_docker(self): + """How to get into this container from outside""" + return f"docker exec -it {DOCKER.id}" + + def enable_ip_forwarding(self): + """ + Enables IP forwarding + + TBD this only works for a single microvm. allow several microvms. + we need to make the veth network smaller and **allocate** them + accordingly + """ + netns = self.vm.jailer.netns + vethhost = "vethhost0" + vethhost_ip = "10.0.0.1" + veth_net = "10.0.0.0/255.255.255.0" + tap_net = "192.168.0.0/255.255.255.0" + tap_host_ip = self.vm.iface["eth0"]["iface"].host_ip + + def run(cmd): + return subprocess.run(cmd, shell=True, check=True) + + def run_in_netns(cmd): + return run(f"ip netns exec {netns} " + cmd) + + # outside netns + # iptables -L -v -n + run(f"ip link add name {vethhost} type veth peer name vethvpn0 netns {netns}") + run(f"ip addr add {vethhost_ip}/24 dev {vethhost}") + run_in_netns("ip addr add 10.0.0.2/24 dev vethvpn0") + run(f"ip link set {vethhost} up") + run_in_netns("ip link set vethvpn0 up") + + run("iptables -P FORWARD DROP") + # iptables -L FORWARD + # iptables -t nat -L + run(f"iptables -t nat -A POSTROUTING -s {veth_net} -o eth0 -j MASQUERADE") + run("iptables -A FORWARD -i eth0 -o vethhost0 -j ACCEPT") + run("iptables -A FORWARD -i vethhost0 -o eth0 -j ACCEPT") + + # in the netns + run_in_netns(f"ip route add default via {vethhost_ip}") + # tap_ip = ipaddress.ip_network("192.168.0.1/30", False) + run_in_netns("iptables -A FORWARD -i tap0 -o vethvpn0 -j ACCEPT") + run_in_netns("iptables -A FORWARD -i vethvpn0 -o tap0 -j ACCEPT") + run_in_netns( + f"iptables -t nat -A POSTROUTING -s {tap_net} -o vethvpn0 -j MASQUERADE" + ) + + self.vm.ssh.run(f"ip route add default via {tap_host_ip}") + self.vm.ssh.run("echo nameserver 8.8.8.8 >/etc/resolv.conf") diff --git a/tools/devtool b/tools/devtool index fa687bb7f2af..31718769eb37 100755 --- a/tools/devtool +++ b/tools/devtool @@ -410,6 +410,11 @@ cmd_help() { echo " -c, --cpuset-cpus cpulist Set a dedicated cpulist to be used by the tests." echo " -m, --cpuset-mems memlist Set a dedicated memlist to be used by the tests." echo "" + + cat <]] + Run tests in a debugging environment +EOF } @@ -689,6 +694,10 @@ cmd_sh() { bash --norc -c "$*" } +cmd_test_debug() { + cmd_sh "tmux new ./tools/test.sh --pdb $@" +} + # Auto-format all source code, to match the Firecracker requirements. For the # moment, this is just a wrapper over `cargo fmt --all` # Example: `devtool fmt`