From 75c8852a8c8df1962d93aa0445ac0d1e5b85711d Mon Sep 17 00:00:00 2001 From: Ed Santiago Date: Mon, 18 Nov 2024 09:04:16 -0700 Subject: [PATCH] Tests: make _prefetch() parallel-safe The _prefetch helper, introduced in #2036, is not parallel-safe: two or more parallel jobs fetching the same image can step on each other and produce garbage images. Although we still can't run buildah tests in parallel (see #5552), we can at least set up the scaffolding for that to happen. This commit reworks _prefetch() such that the image work is wrapped inside flock. It has been working fine for months in #5552, and is IMO safe for production. This can then make it much easier to flip the parallelization switch once the final zstd bug is squashed. Signed-off-by: Ed Santiago --- tests/helpers.bash | 47 +++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/tests/helpers.bash b/tests/helpers.bash index f8ab624a831..ed5de994e7d 100644 --- a/tests/helpers.bash +++ b/tests/helpers.bash @@ -172,9 +172,11 @@ function normalize_image_name() { function _prefetch() { if [ -z "${_BUILDAH_IMAGE_CACHEDIR}" ]; then - _pgid=$(sed -ne 's/^NSpgid:\s*//p' /proc/$$/status) - export _BUILDAH_IMAGE_CACHEDIR=${BATS_TMPDIR}/buildah-image-cache.$_pgid + export _BUILDAH_IMAGE_CACHEDIR=${BATS_SUITE_TMPDIR}/buildah-image-cache mkdir -p ${_BUILDAH_IMAGE_CACHEDIR} + + # It's 700 by default; that prevents 'unshare' from reading cached images + chmod 711 ${BATS_SUITE_TMPDIR:?is unset} ${BATS_SUITE_TMPDIR}/.. fi local storage= @@ -186,24 +188,35 @@ function _prefetch() { img=$(normalize_image_name "$img") echo "# [checking for: $img]" >&2 fname=$(tr -c a-zA-Z0-9.- - <<< "$img") - if [ -d $_BUILDAH_IMAGE_CACHEDIR/$fname ]; then - echo "# [restoring from cache: $_BUILDAH_IMAGE_CACHEDIR / $img]" >&2 - copy dir:$_BUILDAH_IMAGE_CACHEDIR/$fname containers-storage:"$storage""$img" - else - rm -fr $_BUILDAH_IMAGE_CACHEDIR/$fname - echo "# [copy docker://$img dir:$_BUILDAH_IMAGE_CACHEDIR/$fname]" >&2 - for attempt in $(seq 3) ; do - if copy $COPY_REGISTRY_OPTS docker://"$img" dir:$_BUILDAH_IMAGE_CACHEDIR/$fname ; then - break - fi - sleep 5 - done - echo "# [copy dir:$_BUILDAH_IMAGE_CACHEDIR/$fname containers-storage:$storage$img]" >&2 - copy dir:$_BUILDAH_IMAGE_CACHEDIR/$fname containers-storage:"$storage""$img" - fi + ( flock --timeout 300 9 || die "Could not flock"; _prefetch_locksafe $img $fname ) 9> $_BUILDAH_IMAGE_CACHEDIR/$fname.lock done } +# DO NOT CALL THIS. EVER. This must only be called from _prefetch(). +function _prefetch_locksafe() { + local img="$1" + local fname="$2" + + if [ -d $_BUILDAH_IMAGE_CACHEDIR/$fname ]; then + echo "# [restoring from cache: $_BUILDAH_IMAGE_CACHEDIR / $img]" >&2 + copy dir:$_BUILDAH_IMAGE_CACHEDIR/$fname containers-storage:"$storage""$img" + else + rm -fr ${_BUILDAH_IMAGE_CACHEDIR:?THIS CAN NEVER HAPPEN}/$fname + echo "# [copy docker://$img dir:$_BUILDAH_IMAGE_CACHEDIR/$fname]" >&2 + for attempt in $(seq 3) ; do + if copy $COPY_REGISTRY_OPTS docker://"$img" dir:$_BUILDAH_IMAGE_CACHEDIR/$fname ; then + break + else + # Failed. Clean up, so we don't leave incomplete remnants + rm -fr ${_BUILDAH_IMAGE_CACHEDIR:?THIS CAN NEVER HAPPEN EITHER}/$fname + fi + sleep 5 + done + echo "# [copy dir:$_BUILDAH_IMAGE_CACHEDIR/$fname containers-storage:$storage$img]" >&2 + copy dir:$_BUILDAH_IMAGE_CACHEDIR/$fname containers-storage:"$storage""$img" + fi +} + function createrandom() { dd if=/dev/urandom bs=1 count=${2:-256} of=${1:-${BATS_TMPDIR}/randomfile} status=none }