From 5fc3de55837be1849b33b41490e56dd8e3835794 Mon Sep 17 00:00:00 2001 From: Ed Santiago <santiago@redhat.com> Date: Tue, 17 Sep 2024 10:06:34 -0600 Subject: [PATCH] registry: lock start attempts When running parallel, multiple tests could be trying to start the registry at once. Make this parallel-safe. Also, use a safer port range for the registry. Something outside of /proc/sys/net/ipv4/ip_local_port_range Sorry, I'm including a FIXME section that I haven't investigated deeply enough. Signed-off-by: Ed Santiago <santiago@redhat.com> --- test/system/helpers.registry.bash | 39 +++++++++++++++++++++++++------ test/system/setup_suite.bash | 2 +- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/test/system/helpers.registry.bash b/test/system/helpers.registry.bash index 643a0e5151..ff57ad8b87 100644 --- a/test/system/helpers.registry.bash +++ b/test/system/helpers.registry.bash @@ -17,8 +17,26 @@ unset REGISTRY_AUTH_FILE # Start a local registry. Only needed on demand (e.g. by 150-login.bats) # and then only once: if we start, leave it running until final teardown. function start_registry() { - if [[ -d "$PODMAN_LOGIN_WORKDIR/auth" ]]; then - # Already started + AUTHDIR=${PODMAN_LOGIN_WORKDIR}/auth + + local startflag=${PODMAN_LOGIN_WORKDIR}/OK + + if ! mkdir $AUTHDIR; then + # *Possibly* already started. Or, possibly (when running + # parallel tests) another process is trying to start it. + # Give it some time. + local timeout=30 + while [[ $timeout -gt 0 ]]; do + if [[ -e $startflag ]]; then + echo "Registry has already been started by another process" + return + fi + + sleep 1 + timeout=$((timeout - 1)) + done + + die "Internal error: timed out waiting for another process to start registry" # Fixes very obscure corner case in root system tests: # 1) we run 150-login tests, starting a registry; then @@ -26,11 +44,15 @@ function start_registry() { # 3) run 700-play, the "private" test, which needs the # already-started registry, but its port is now DROPped, # so the test times out trying to talk to registry - run_podman --storage-driver vfs $(podman_isolation_opts ${PODMAN_LOGIN_WORKDIR}) network reload --all + + ###### FIXME FIXME FIXME TEMPORARY! + ###### Trying to understand flake #23725. What happens if we stop + ###### doing the network reload? + ###### FIXME FIXME FIXME, should we do it in stop_registry?? + ###### run_podman --storage-driver vfs $(podman_isolation_opts ${PODMAN_LOGIN_WORKDIR}) network reload --all return fi - AUTHDIR=${PODMAN_LOGIN_WORKDIR}/auth mkdir -p $AUTHDIR # Registry image; copy of docker.io, but on our own registry @@ -79,6 +101,9 @@ function start_registry() { wait_for_port 127.0.0.1 ${PODMAN_LOGIN_REGISTRY_PORT} # ...so we look in container logs for confirmation that registry is running. _PODMAN_TEST_OPTS="${PODMAN_LOGIN_ARGS}" wait_for_output "listening on .::.:5000" $cid + + touch $startflag + echo "I have started the registry" } function stop_registry() { @@ -103,10 +128,10 @@ function stop_registry() { mount | grep ${PODMAN_LOGIN_WORKDIR} | awk '{print $3}' | xargs --no-run-if-empty umount if [[ $(id -u) -eq 0 ]]; then - rm -rf ${PODMAN_LOGIN_WORKDIR} + rm -rf ${PODMAN_LOGIN_WORKDIR}/* else # rootless image data is owned by a subuid - run_podman unshare rm -rf ${PODMAN_LOGIN_WORKDIR} + run_podman unshare rm -rf ${PODMAN_LOGIN_WORKDIR}/* fi fi @@ -119,7 +144,7 @@ function stop_registry() { echo "" echo "lsof -i -P" lsof -i -P - die "Socket still seems open" + die "Socket $PODMAN_LOGIN_REGISTRY_PORT still seems open" fi } diff --git a/test/system/setup_suite.bash b/test/system/setup_suite.bash index 8a3f910839..fa7169a127 100644 --- a/test/system/setup_suite.bash +++ b/test/system/setup_suite.bash @@ -25,7 +25,7 @@ function setup_suite() { # FIXME: racy! It could be many minutes between now and when we start it. # To mitigate, we use a range not used anywhere else in system tests. - export PODMAN_LOGIN_REGISTRY_PORT=$(random_free_port 42000-42999) + export PODMAN_LOGIN_REGISTRY_PORT=$(random_free_port 27000-27999) # The above does not handle errors. Do a final confirmation. assert "$PODMAN_LOGIN_REGISTRY_PORT" != "" \