diff --git a/tests/framework/utils.py b/tests/framework/utils.py index a6d79613513..802e002b12c 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -448,7 +448,7 @@ def get_free_mem_ssh(ssh_connection): raise Exception("Available memory not found in `/proc/meminfo") -def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None): +def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None, timeout=None): """ Execute a given command. @@ -469,7 +469,7 @@ def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None): ) # Capture stdout/stderr - stdout, stderr = proc.communicate() + stdout, stderr = proc.communicate(timeout=timeout) output_message = f"\n[{proc.pid}] Command:\n{cmd}" # Append stdout/stderr to the output message @@ -493,18 +493,14 @@ def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None): return CommandReturn(proc.returncode, stdout.decode(), stderr.decode()) -def run_cmd(cmd, ignore_return_code=False, no_shell=False, cwd=None): +def run_cmd(cmd, **kwargs): """ Run a command using the sync function that logs the output. :param cmd: command to run - :param ignore_return_code: whether a non-zero return code should be ignored - :param noshell: don't run the command in a sub-shell :returns: tuple of (return code, stdout, stderr) """ - return run_cmd_sync( - cmd=cmd, ignore_return_code=ignore_return_code, no_shell=no_shell, cwd=cwd - ) + return run_cmd_sync(cmd, **kwargs) def eager_map(func, iterable): diff --git a/tests/host_tools/network.py b/tests/host_tools/network.py index 3ef2332e748..60f1624fb90 100644 --- a/tests/host_tools/network.py +++ b/tests/host_tools/network.py @@ -89,7 +89,7 @@ def _init_connection(self): if ecode != 0: raise ConnectionError - def run(self, cmd_string): + def run(self, cmd_string, timeout=None): """Execute the command passed as a string in the ssh context.""" return self._exec( [ @@ -97,10 +97,11 @@ def run(self, cmd_string): *self.options, f"{self.user}@{self.host}", cmd_string, - ] + ], + timeout, ) - def _exec(self, cmd): + def _exec(self, cmd, timeout=None): """Private function that handles the ssh client invocation.""" # TODO: If a microvm runs in a particular network namespace, we have to @@ -111,7 +112,7 @@ def _exec(self, cmd): if self.netns_file_path is not None: ctx = Namespace(self.netns_file_path, "net") with ctx: - return utils.run_cmd(cmd, ignore_return_code=True) + return utils.run_cmd(cmd, ignore_return_code=True, timeout=timeout) def mac_from_ip(ip_address): diff --git a/tests/integration_tests/functional/test_balloon.py b/tests/integration_tests/functional/test_balloon.py index 785f4c52a81..040dae5ea0c 100644 --- a/tests/integration_tests/functional/test_balloon.py +++ b/tests/integration_tests/functional/test_balloon.py @@ -4,6 +4,7 @@ import logging import time +from subprocess import TimeoutExpired import pytest from retry import retry @@ -42,7 +43,7 @@ def lower_ssh_oom_chance(ssh_connection): """Lure OOM away from ssh process""" logger = logging.getLogger("lower_ssh_oom_chance") - cmd = "pidof sshd" + cmd = "cat /run/sshd.pid" exit_code, stdout, stderr = ssh_connection.run(cmd) # add something to the logs for troubleshooting if exit_code != 0: @@ -52,39 +53,36 @@ def lower_ssh_oom_chance(ssh_connection): for pid in stdout.split(" "): cmd = f"choom -n -1000 -p {pid}" - ssh_connection.run(cmd) + exit_code, stdout, stderr = ssh_connection.run(cmd) + if exit_code != 0: + logger.error("while running: %s", cmd) + logger.error("stdout: %s", stdout) + logger.error("stderr: %s", stderr) -def make_guest_dirty_memory(ssh_connection, should_oom=False, amount_mib=32): +def make_guest_dirty_memory(ssh_connection, amount_mib=32): """Tell the guest, over ssh, to dirty `amount` pages of memory.""" logger = logging.getLogger("make_guest_dirty_memory") lower_ssh_oom_chance(ssh_connection) - # Aim OOM at fillmem process - cmd = f"choom -n 1000 -- /usr/local/bin/fillmem {amount_mib}" - exit_code, stdout, stderr = ssh_connection.run(cmd) - # add something to the logs for troubleshooting - if exit_code != 0: - logger.error("while running: %s", cmd) - logger.error("stdout: %s", stdout) - logger.error("stderr: %s", stderr) + cmd = f"/usr/local/bin/fillmem {amount_mib}" + try: + exit_code, stdout, stderr = ssh_connection.run(cmd, timeout=1.0) + # add something to the logs for troubleshooting + if exit_code != 0: + logger.error("while running: %s", cmd) + logger.error("stdout: %s", stdout) + logger.error("stderr: %s", stderr) + + cmd = "cat /tmp/fillmem_output.txt" + except TimeoutExpired: + # It's ok if this expires. Some times the SSH connection + # gets killed by the OOM killer *after* the fillmem program + # started. As a result, we can ignore timeouts here. + pass - cmd = "cat /tmp/fillmem_output.txt" - tries = 3 - while tries > 0: - # it may take a bit of time to dirty the memory and the OOM to kick-in - time.sleep(0.5) - _, stdout, _ = ssh_connection.run(cmd) - if stdout != "": - break - tries -= 1 - - if should_oom: - assert "OOM Killer stopped the program with signal 9, exit code 0" in stdout - else: - assert exit_code == 0, stderr - assert "Memory filling was successful" in stdout, stdout + time.sleep(5) def _test_rss_memory_lower(test_microvm, stable_delta=1): @@ -181,11 +179,11 @@ def test_deflate_on_oom(test_microvm_with_api, deflate_on_oom): deflate_on_oom=True - should not result in an OOM kill + should result in balloon_stats['actual_mib'] be reduced deflate_on_oom=False - should result in an OOM kill + should result in balloon_stats['actual_mib'] remain the same """ test_microvm = test_microvm_with_api test_microvm.spawn() @@ -194,7 +192,7 @@ def test_deflate_on_oom(test_microvm_with_api, deflate_on_oom): # Add a deflated memory balloon. test_microvm.api.balloon.put( - amount_mib=0, deflate_on_oom=deflate_on_oom, stats_polling_interval_s=0 + amount_mib=0, deflate_on_oom=deflate_on_oom, stats_polling_interval_s=1 ) # Start the microvm. @@ -213,8 +211,17 @@ def test_deflate_on_oom(test_microvm_with_api, deflate_on_oom): # This call will internally wait for rss to become stable. _ = get_stable_rss_mem_by_pid(firecracker_pid) - # Check that using memory leads an out of memory error (or not). - make_guest_dirty_memory(test_microvm.ssh, should_oom=not deflate_on_oom) + # Check that using memory leads to the balloon device automatically + # deflate (or not). + balloon_size_before = test_microvm.api.balloon_stats.get().json()["actual_mib"] + make_guest_dirty_memory(test_microvm.ssh) + + balloon_size_after = test_microvm.api.balloon_stats.get().json()["actual_mib"] + print(f"size before: {balloon_size_before} size after: {balloon_size_after}") + if deflate_on_oom: + assert balloon_size_after < balloon_size_before, "Balloon did not deflate" + else: + assert balloon_size_after >= balloon_size_before, "Balloon deflated" # pylint: disable=C0103