Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use balloon statistics in the test that checks balloon deflates on OOM #4150

Merged
merged 4 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions tests/framework/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ def get_free_mem_ssh(ssh_connection):
raise Exception("Available memory not found in `/proc/meminfo")


def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None):
def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None, timeout=None):
"""
Execute a given command.

Expand All @@ -469,7 +469,7 @@ def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None):
)

# Capture stdout/stderr
stdout, stderr = proc.communicate()
stdout, stderr = proc.communicate(timeout=timeout)

output_message = f"\n[{proc.pid}] Command:\n{cmd}"
# Append stdout/stderr to the output message
Expand All @@ -493,18 +493,14 @@ def run_cmd_sync(cmd, ignore_return_code=False, no_shell=False, cwd=None):
return CommandReturn(proc.returncode, stdout.decode(), stderr.decode())


def run_cmd(cmd, ignore_return_code=False, no_shell=False, cwd=None):
def run_cmd(cmd, **kwargs):
"""
Run a command using the sync function that logs the output.

:param cmd: command to run
:param ignore_return_code: whether a non-zero return code should be ignored
:param noshell: don't run the command in a sub-shell
:returns: tuple of (return code, stdout, stderr)
"""
return run_cmd_sync(
cmd=cmd, ignore_return_code=ignore_return_code, no_shell=no_shell, cwd=cwd
)
return run_cmd_sync(cmd, **kwargs)


def eager_map(func, iterable):
Expand Down
9 changes: 5 additions & 4 deletions tests/host_tools/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,19 @@ def _init_connection(self):
if ecode != 0:
raise ConnectionError

def run(self, cmd_string):
def run(self, cmd_string, timeout=None):
"""Execute the command passed as a string in the ssh context."""
return self._exec(
[
"ssh",
*self.options,
f"{self.user}@{self.host}",
cmd_string,
]
],
timeout,
)

def _exec(self, cmd):
def _exec(self, cmd, timeout=None):
"""Private function that handles the ssh client invocation."""

# TODO: If a microvm runs in a particular network namespace, we have to
Expand All @@ -111,7 +112,7 @@ def _exec(self, cmd):
if self.netns_file_path is not None:
ctx = Namespace(self.netns_file_path, "net")
with ctx:
return utils.run_cmd(cmd, ignore_return_code=True)
return utils.run_cmd(cmd, ignore_return_code=True, timeout=timeout)


def mac_from_ip(ip_address):
Expand Down
69 changes: 38 additions & 31 deletions tests/integration_tests/functional/test_balloon.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import logging
import time
from subprocess import TimeoutExpired

import pytest
from retry import retry
Expand Down Expand Up @@ -42,7 +43,7 @@ def lower_ssh_oom_chance(ssh_connection):
"""Lure OOM away from ssh process"""
logger = logging.getLogger("lower_ssh_oom_chance")

cmd = "pidof sshd"
cmd = "cat /run/sshd.pid"
exit_code, stdout, stderr = ssh_connection.run(cmd)
# add something to the logs for troubleshooting
if exit_code != 0:
Expand All @@ -52,39 +53,36 @@ def lower_ssh_oom_chance(ssh_connection):

for pid in stdout.split(" "):
cmd = f"choom -n -1000 -p {pid}"
ssh_connection.run(cmd)
exit_code, stdout, stderr = ssh_connection.run(cmd)
if exit_code != 0:
logger.error("while running: %s", cmd)
logger.error("stdout: %s", stdout)
logger.error("stderr: %s", stderr)


def make_guest_dirty_memory(ssh_connection, should_oom=False, amount_mib=32):
def make_guest_dirty_memory(ssh_connection, amount_mib=32):
"""Tell the guest, over ssh, to dirty `amount` pages of memory."""
logger = logging.getLogger("make_guest_dirty_memory")

lower_ssh_oom_chance(ssh_connection)

# Aim OOM at fillmem process
cmd = f"choom -n 1000 -- /usr/local/bin/fillmem {amount_mib}"
exit_code, stdout, stderr = ssh_connection.run(cmd)
# add something to the logs for troubleshooting
if exit_code != 0:
logger.error("while running: %s", cmd)
logger.error("stdout: %s", stdout)
logger.error("stderr: %s", stderr)
cmd = f"/usr/local/bin/fillmem {amount_mib}"
try:
exit_code, stdout, stderr = ssh_connection.run(cmd, timeout=1.0)
# add something to the logs for troubleshooting
if exit_code != 0:
logger.error("while running: %s", cmd)
logger.error("stdout: %s", stdout)
logger.error("stderr: %s", stderr)

cmd = "cat /tmp/fillmem_output.txt"
except TimeoutExpired:
# It's ok if this expires. Some times the SSH connection
# gets killed by the OOM killer *after* the fillmem program
# started. As a result, we can ignore timeouts here.
roypat marked this conversation as resolved.
Show resolved Hide resolved
pass

cmd = "cat /tmp/fillmem_output.txt"
tries = 3
while tries > 0:
# it may take a bit of time to dirty the memory and the OOM to kick-in
time.sleep(0.5)
_, stdout, _ = ssh_connection.run(cmd)
if stdout != "":
break
tries -= 1

if should_oom:
assert "OOM Killer stopped the program with signal 9, exit code 0" in stdout
else:
assert exit_code == 0, stderr
assert "Memory filling was successful" in stdout, stdout
time.sleep(5)


def _test_rss_memory_lower(test_microvm, stable_delta=1):
Expand Down Expand Up @@ -181,11 +179,11 @@ def test_deflate_on_oom(test_microvm_with_api, deflate_on_oom):

deflate_on_oom=True

should not result in an OOM kill
should result in balloon_stats['actual_mib'] be reduced

deflate_on_oom=False

should result in an OOM kill
should result in balloon_stats['actual_mib'] remain the same
"""
test_microvm = test_microvm_with_api
test_microvm.spawn()
Expand All @@ -194,7 +192,7 @@ def test_deflate_on_oom(test_microvm_with_api, deflate_on_oom):

# Add a deflated memory balloon.
test_microvm.api.balloon.put(
amount_mib=0, deflate_on_oom=deflate_on_oom, stats_polling_interval_s=0
amount_mib=0, deflate_on_oom=deflate_on_oom, stats_polling_interval_s=1
)

# Start the microvm.
Expand All @@ -213,8 +211,17 @@ def test_deflate_on_oom(test_microvm_with_api, deflate_on_oom):
# This call will internally wait for rss to become stable.
_ = get_stable_rss_mem_by_pid(firecracker_pid)

# Check that using memory leads an out of memory error (or not).
make_guest_dirty_memory(test_microvm.ssh, should_oom=not deflate_on_oom)
# Check that using memory leads to the balloon device automatically
# deflate (or not).
balloon_size_before = test_microvm.api.balloon_stats.get().json()["actual_mib"]
make_guest_dirty_memory(test_microvm.ssh)

balloon_size_after = test_microvm.api.balloon_stats.get().json()["actual_mib"]
print(f"size before: {balloon_size_before} size after: {balloon_size_after}")
if deflate_on_oom:
assert balloon_size_after < balloon_size_before, "Balloon did not deflate"
else:
assert balloon_size_after >= balloon_size_before, "Balloon deflated"


# pylint: disable=C0103
Expand Down