From 91fd5c2826a2480fea22575e086692b284fe8d20 Mon Sep 17 00:00:00 2001 From: Shizun Ge Date: Thu, 5 Dec 2024 21:42:01 -0800 Subject: [PATCH] [tests] add check_timeout function. --- src/lib-common.sh | 32 +++++++++---- tests/spec_gantry_test_helper.sh | 79 ++++++++++++++++---------------- 2 files changed, 62 insertions(+), 49 deletions(-) diff --git a/src/lib-common.sh b/src/lib-common.sh index 27ef46d..0ea4b57 100755 --- a/src/lib-common.sh +++ b/src/lib-common.sh @@ -349,6 +349,21 @@ time_elapsed_since() { _time_elapsed_between "$(date +%s)" "${START_TIME}" } +# Return 0 if not timeout +# Return 1 if timeout +# Return 2 if error +_check_timeout() { + local TIMEOUT_SECONDS="${1}" + local START_TIME="${2}" + [ -z "${TIMEOUT_SECONDS}" ] && return 0 + is_number "${TIMEOUT_SECONDS}" || return 2 + is_number "${START_TIME}" || return 2 + local SECONDS_ELAPSED= + SECONDS_ELAPSED=$(first_minus_second "$(date +%s)" "${START_TIME}") + echo "${SECONDS_ELAPSED}" + [ "${SECONDS_ELAPSED}" -lt "${TIMEOUT_SECONDS}" ] +} + add_unique_to_list() { local OLD_LIST="${1}" local NEW_ITEM="${2}" @@ -618,9 +633,7 @@ wait_service_state() { DOCKER_CMD_ERROR=0 RETURN_VALUE=$(_all_tasks_reach_state "${WANT_STATE}" "${CHECK_FAILURES}" "${STATES}") && break local SECONDS_ELAPSED= - if is_number "${TIMEOUT_SECONDS}" \ - && SECONDS_ELAPSED=$(first_minus_second "$(date +%s)" "${START_TIME}") \ - && [ "${SECONDS_ELAPSED}" -ge "${TIMEOUT_SECONDS}" ]; then + if ! SECONDS_ELAPSED=$(_check_timeout "${TIMEOUT_SECONDS}" "${START_TIME}"); then log ERROR "wait_service_state ${SERVICE_NAME} ${WANT_STATE} timeout after ${SECONDS_ELAPSED}s." RETURN_VALUE=2 break @@ -776,16 +789,17 @@ docker_remove() { } docker_run() { - local RETRIES=0 - local MAX_RETRIES=5 - local SLEEP_SECONDS=10 + local SLEEP_SECONDS=1 + local TIMEOUT_SECONDS=10; + local START_TIME= + START_TIME=$(date +%s) local LOG= while ! LOG=$(run_cmd docker container run "${@}"); do - if [ ${RETRIES} -ge ${MAX_RETRIES} ]; then - log ERROR "Failed to run docker. Reached the max retries ${MAX_RETRIES}. ${LOG}" + local SECONDS_ELAPSED= + if ! SECONDS_ELAPSED=$(_check_timeout "${TIMEOUT_SECONDS}" "${START_TIME}"); then + log ERROR "Failed to run docker after ${SECONDS_ELAPSED}s. ${LOG}" return 1 fi - RETRIES=$((RETRIES + 1)) sleep ${SLEEP_SECONDS} log WARN "Retry docker container run (${RETRIES}). ${LOG}" done diff --git a/tests/spec_gantry_test_helper.sh b/tests/spec_gantry_test_helper.sh index cd2611e..959c445 100644 --- a/tests/spec_gantry_test_helper.sh +++ b/tests/spec_gantry_test_helper.sh @@ -16,7 +16,7 @@ # # Constant strings for checks. -# START_WITHOUT_A_SQUARE_BRACKET ignores color codes. Use test_log not to trigger this check. +# START_WITHOUT_A_SQUARE_BRACKET ignores color codes. Use _test_log not to trigger this check in tests. export START_WITHOUT_A_SQUARE_BRACKET="^(?!(?:\x1b\[[0-9;]*[mG])?\[)" export GANTRY_AUTH_CONFIG_LABEL="gantry.auth.config" export MUST_BE_A_NUMBER="must be a number" @@ -75,7 +75,7 @@ export SLEEP_SECONDS_BEFORE_NEXT_UPDATE="Sleep [0-9]+ seconds before next update export GANTRY_TEST_TEMP_DIR="gantry-test-tmp" -test_log() { +_test_log() { echo "${GANTRY_LOG_LEVEL}" | grep -q -i "^NONE$" && return 0; echo "${GANTRY_LOG_LEVEL}" | grep -q -i "^ERROR$" && return 0; echo "${GANTRY_LOG_LEVEL}" | grep -q -i "^WARN$" && return 0; @@ -83,6 +83,25 @@ test_log() { echo "[$(date -Iseconds)] Test: ${*}" >&2 } +# Return 0 if not timeout +# Return 1 if timeout +# Return 2 if error +_test_check_timeout() { + local TIMEOUT_SECONDS="${1}" + local START_TIME="${2}" + local MESSAGE="${3:-_test_check_timeout}" + [ -z "${TIMEOUT_SECONDS}" ] && echo "TIMEOUT_SECONDS is empty." 1>&2 && return 0 + ! is_number "${TIMEOUT_SECONDS}" && echo "TIMEOUT_SECONDS is not a number." 1>&2 && return 2 + ! is_number "${START_TIME}" && echo "START_TIME is not a number." 1>&2 && return 2 + local SECONDS_ELAPSED= + SECONDS_ELAPSED=$(first_minus_second "$(date +%s)" "${START_TIME}") + if [ "${SECONDS_ELAPSED}" -ge "${TIMEOUT_SECONDS}" ]; then + echo "${MESSAGE} timeout after ${SECONDS_ELAPSED}s" 1>&2 + return 1 + fi + return 0 +} + display_output() { echo "${display_output:-""}" } @@ -337,8 +356,8 @@ _start_registry() { export TEST_USERNAME="gantry" export TEST_PASSWORD="gantry" local REGISTRY_IMAGE="docker.io/registry" - local TRIES=0 - local MAX_RETRIES=50 + local START_TIME= + START_TIME=$(date +%s) local PORT_LIMIT=500 pull_image_if_not_exist "${REGISTRY_IMAGE}" while true; do @@ -369,26 +388,19 @@ _start_registry() { local STATUS= while [ "${STATUS}" != "running" ]; do STATUS=$(docker container inspect "${CID}" --format '{{.State.Status}}') + _test_check_timeout "60" "${START_TIME}" "_start_registry" || return 1 + sleep 1 done break; fi echo "docker container run: ${CID}"; - if [ "${TRIES}" -ge "${MAX_RETRIES}" ]; then - echo "_start_registry Reach MAX_RETRIES ${MAX_RETRIES}" >&2 - return 1 - fi REGISTRY_PORT=$((REGISTRY_PORT+1)) - TRIES=$((TRIES+1)) + _test_check_timeout "60" "${START_TIME}" "_start_registry" || return 1 sleep 1 done _store_test_registry "${SUITE_NAME}" "${TEST_REGISTRY}" || return 1; - TRIES=0 while ! _login_test_registry "${ENFORCE_LOGIN}" "${TEST_REGISTRY}" "${TEST_USERNAME}" "${TEST_PASSWORD}"; do - if [ "${TRIES}" -ge "${MAX_RETRIES}" ]; then - echo "_login_test_registry Reach MAX_RETRIES ${MAX_RETRIES}" >&2 - return 1 - fi - TRIES=$((TRIES+1)) + _test_check_timeout "60" "${START_TIME}" "_start_registry" || return 1 sleep 1 done } @@ -646,14 +658,10 @@ build_test_image() { EXIT_CMD="sleep ${EXIT_SECONDS};" fi local RETURN_VALUE=1 - local TRIES=0 - local MAX_RETRIES=60 + local START_TIME= + START_TIME=$(date +%s) while [ "${RETURN_VALUE}" != "0" ]; do - if [ "${TRIES}" -ge "${MAX_RETRIES}" ]; then - echo "build_test_image Reach MAX_RETRIES ${MAX_RETRIES}" >&2 - return 1 - fi - TRIES=$((TRIES+1)) + _test_check_timeout "60" "${START_TIME}" "build_test_image" || return 1 local FILE= FILE=$(make_test_temp_file) echo "FROM $(_get_test_service_image)" > "${FILE}" @@ -692,17 +700,12 @@ docker_service_update() { wait_zero_running_tasks() { local SERVICE_NAME="${1}" local TIMEOUT_SECONDS="${2}" - local NUM_RUNS=1 - local REPLICAS= - local USED_SECONDS=0 - local TRIES=0 - local MAX_RETRIES=60 + local START_TIME= + START_TIME=$(date +%s) echo "Wait until ${SERVICE_NAME} has zero running tasks." - while [ "${NUM_RUNS}" -ne 0 ]; do - if [ -n "${TIMEOUT_SECONDS}" ] && [ "${USED_SECONDS}" -ge "${TIMEOUT_SECONDS}" ]; then - _handle_failure "Services ${SERVICE_NAME} does not stop after ${TIMEOUT_SECONDS} seconds." - return 1 - fi + while true; do + _test_check_timeout "60" "${START_TIME}" "wait_zero_running_tasks" || return 1 + local REPLICAS= if ! REPLICAS=$(docker service ls --filter "name=${SERVICE_NAME}" --format '{{.Replicas}} {{.Name}}' 2>&1); then _handle_failure "Failed to obtain task states of service ${SERVICE_NAME}: ${REPLICAS}" return 1 @@ -712,17 +715,13 @@ wait_zero_running_tasks() { # It does not do the exact match of the name. See https://github.com/moby/moby/issues/32985 # We do an extra step to to perform the exact match. REPLICAS=$(echo "${REPLICAS}" | sed -n -E "s/(.*) ${SERVICE_NAME}$/\1/p") - if [ "${TRIES}" -ge "${MAX_RETRIES}" ]; then - echo "wait_zero_running_tasks Reach MAX_RETRIES ${MAX_RETRIES}" >&2 - return 1 - fi - TRIES=$((TRIES+1)) # https://docs.docker.com/engine/reference/commandline/service_ls/#examples # The REPLICAS is like "5/5" or "1/1 (3/5 completed)" # Get the number before the first "/". + local NUM_RUNS= NUM_RUNS=$(echo "${REPLICAS}/" | cut -d '/' -f 1) + [ "${NUM_RUNS}" = 0 ] && return 0 sleep 1 - USED_SECONDS=$((USED_SECONDS+1)) done } @@ -1013,8 +1012,8 @@ _run_gantry_container() { MOUNT_OPTIONS=$(_add_file_to_mount_options "${MOUNT_OPTIONS}" "${GANTRY_REGISTRY_HOST_FILE}") MOUNT_OPTIONS=$(_add_file_to_mount_options "${MOUNT_OPTIONS}" "${GANTRY_REGISTRY_PASSWORD_FILE}") MOUNT_OPTIONS=$(_add_file_to_mount_options "${MOUNT_OPTIONS}" "${GANTRY_REGISTRY_USER_FILE}") - test_log "Starting SUT service ${SERVICE_NAME} with image ${SUT_REPO_TAG}." - test_log "MOUNT_OPTIONS=${MOUNT_OPTIONS}" + _test_log "Starting SUT service ${SERVICE_NAME} with image ${SUT_REPO_TAG}." + _test_log "MOUNT_OPTIONS=${MOUNT_OPTIONS}" local RETURN_VALUE=0 local CMD_OUTPUT= # SC2086 (info): Double quote to prevent globbing and word splitting.