From b4871b1bdbf7cd984029f5a272f6dfda6752ec2e Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Thu, 5 Oct 2023 14:31:18 +0000 Subject: [PATCH] feat(test): add optional timeout in SSH connections We have a mechanism that allows us to run a command inside a microVM. This mechanism ends up using Popen.communicate to retrieve the output of the SSH command. Popen.communicate comes with a timeout variable that we were not using. However, it is useful in cases where we don't want to wait for the result of the command we execute in the microVM. This commit extends our SSH mechanism to accept the timeout argument. Then, it uses the timeout in the test_balloon.py::test_deflate_on_oom when launching the fillmem process. fillmem drains the memory of the VM, which sometimes results in the SSH connection hanging. Signed-off-by: Babis Chalios --- tests/framework/utils.py | 12 ++++------- tests/host_tools/network.py | 9 ++++---- .../functional/test_balloon.py | 21 +++++++++++++------ 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tests/framework/utils.py b/tests/framework/utils.py index a6d79613513..802e002b12c 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -448,7 +448,7 @@ def get_free_mem_ssh(ssh_connection): raise Exception("Available memory not found in `/proc/meminfo") -def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None): +def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None, timeout=None): """ Execute a given command. @@ -469,7 +469,7 @@ def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None): ) # Capture stdout/stderr - stdout, stderr = proc.communicate() + stdout, stderr = proc.communicate(timeout=timeout) output_message = f"\n[{proc.pid}] Command:\n{cmd}" # Append stdout/stderr to the output message @@ -493,18 +493,14 @@ def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None): return CommandReturn(proc.returncode, stdout.decode(), stderr.decode()) -def run_cmd(cmd, ignore_return_code=False, no_shell=False, cwd=None): +def run_cmd(cmd, **kwargs): """ Run a command using the sync function that logs the output. :param cmd: command to run - :param ignore_return_code: whether a non-zero return code should be ignored - :param noshell: don't run the command in a sub-shell :returns: tuple of (return code, stdout, stderr) """ - return run_cmd_sync( - cmd=cmd, ignore_return_code=ignore_return_code, no_shell=no_shell, cwd=cwd - ) + return run_cmd_sync(cmd, **kwargs) def eager_map(func, iterable): diff --git a/tests/host_tools/network.py b/tests/host_tools/network.py index 3ef2332e748..60f1624fb90 100644 --- a/tests/host_tools/network.py +++ b/tests/host_tools/network.py @@ -89,7 +89,7 @@ def _init_connection(self): if ecode != 0: raise ConnectionError - def run(self, cmd_string): + def run(self, cmd_string, timeout=None): """Execute the command passed as a string in the ssh context.""" return self._exec( [ @@ -97,10 +97,11 @@ def run(self, cmd_string): *self.options, f"{self.user}@{self.host}", cmd_string, - ] + ], + timeout, ) - def _exec(self, cmd): + def _exec(self, cmd, timeout=None): """Private function that handles the ssh client invocation.""" # TODO: If a microvm runs in a particular network namespace, we have to @@ -111,7 +112,7 @@ def _exec(self, cmd): if self.netns_file_path is not None: ctx = Namespace(self.netns_file_path, "net") with ctx: - return utils.run_cmd(cmd, ignore_return_code=True) + return utils.run_cmd(cmd, ignore_return_code=True, timeout=timeout) def mac_from_ip(ip_address): diff --git a/tests/integration_tests/functional/test_balloon.py b/tests/integration_tests/functional/test_balloon.py index 8ec591b5836..040dae5ea0c 100644 --- a/tests/integration_tests/functional/test_balloon.py +++ b/tests/integration_tests/functional/test_balloon.py @@ -4,6 +4,7 @@ import logging import time +from subprocess import TimeoutExpired import pytest from retry import retry @@ -66,12 +67,20 @@ def make_guest_dirty_memory(ssh_connection, amount_mib=32): lower_ssh_oom_chance(ssh_connection) cmd = f"/usr/local/bin/fillmem {amount_mib}" - exit_code, stdout, stderr = ssh_connection.run(cmd) - # add something to the logs for troubleshooting - if exit_code != 0: - logger.error("while running: %s", cmd) - logger.error("stdout: %s", stdout) - logger.error("stderr: %s", stderr) + try: + exit_code, stdout, stderr = ssh_connection.run(cmd, timeout=1.0) + # add something to the logs for troubleshooting + if exit_code != 0: + logger.error("while running: %s", cmd) + logger.error("stdout: %s", stdout) + logger.error("stderr: %s", stderr) + + cmd = "cat /tmp/fillmem_output.txt" + except TimeoutExpired: + # It's ok if this expires. Some times the SSH connection + # gets killed by the OOM killer *after* the fillmem program + # started. As a result, we can ignore timeouts here. + pass time.sleep(5)