Skip to content

Commit

Permalink
[tests] allow more tests to run with a container.
Browse files Browse the repository at this point in the history
Use busybox time explicitly for docker service update.
  • Loading branch information
shizunge committed Nov 26, 2024
1 parent d6bf10a commit 63337e9
Show file tree
Hide file tree
Showing 19 changed files with 332 additions and 220 deletions.
56 changes: 35 additions & 21 deletions src/lib-common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ _random_string() {

_pipe_name() {
local BASE_NAME="${1:-pipe-base-name}"
local RANDOM_STR=
RANDOM_STR=$(_random_string)
local PID=$$
local TIMESTAMP=
TIMESTAMP=$(date +%s)
local PIPE_NAME="/tmp/${BASE_NAME}-$$-${TIMESTAMP}-${RANDOM_STR}"
local RANDOM_STR=
RANDOM_STR=$(_random_string)
local PIPE_NAME="/tmp/${BASE_NAME}-${PID}-${TIMESTAMP}-${RANDOM_STR}"
echo "${PIPE_NAME}"
}

Expand Down Expand Up @@ -79,6 +80,7 @@ extract_string() {
# All lower or all upper. No mix.
_log_level_to_upper() {
local LEVEL="${1}";
# tr is slow.
case "${LEVEL}" in
"debug") echo "DEBUG"; ;;
"info") echo "INFO"; ;;
Expand Down Expand Up @@ -177,6 +179,8 @@ _color_iso_time() {
local TIME_STR=
TIME_STR=$(busybox date -d "@${EPOCH}" +"\033[1;30m%Y-%m-%dT\033[0;37m%H:%M:%S\033[1;30m%z")
# +0000 -> +00:00
# SC2028 (info): echo may not expand escape sequences. Use printf.
# shellcheck disable=SC2028
echo "${TIME_STR:0:-2}:${TIME_STR:0-2}\033[0m"
}

Expand All @@ -202,7 +206,14 @@ _log_formatter() {
SCOPE_STR="${DGRAY}${SCOPE}:${NO_COLOR} "
fi
local MSG_STR=
MSG_STR=$(echo "${*}" | tr '\n' ' ')
# echo without quotes remove carriage returns, tabs and multiple spaces.
# SC2116 (style): Useless echo? Instead of 'cmd $(echo foo)', just use 'cmd foo'.
# SC2048 (warning): Use "$@" (with quotes) to prevent whitespace problems.
# SC2086 (info): Double quote to prevent globbing and word splitting.
# shellcheck disable=SC2048,SC2086,2116
MSG_STR=$(echo ${*})
# tr is slow.
# MSG_STR=$(echo "${*}" | tr '\n' ' ')
echo -e "${TIME_STR}${LOC_STR}${LEVEL_STR}${SCOPE_STR}${MSG_STR}" >&2
}

Expand Down Expand Up @@ -441,15 +452,10 @@ _get_docker_command_name_arg() {
}

_get_docker_command_detach() {
if echo "${@}" | grep_q "--detach=false"; then
echo "false"
elif echo "${@}" | grep_q "--detach"; then
# assume we find --detach or --detach=true.
echo "true"
else
echo "false"
fi
return 0
echo "${@}" | grep_q "--detach=false" && return 1;
# assume we find --detach or --detach=true.
echo "${@}" | grep_q "--detach" && return 0;
return 1;
}

docker_service_logs() {
Expand Down Expand Up @@ -557,25 +563,35 @@ _all_tasks_reach_state() {
return 0
}

# Usage: wait_service_state <SERVICE_NAME> <WANT_STATE>
# Usage: wait_service_state <SERVICE_NAME> <WANT_STATE> [timeout in seconds]
# Wait for the service, usually a global job or a replicated job,
# to reach either running or complete state.
# Valid WANT_STATE includes "Running" and "Complete"
# When the WANT_STATE is complete, the function returns immediately
# when any of the tasks of the service fails.
# In case of task failing, the function returns a non-zero value.
# When the WANT_STATE is complete, the function returns immediately when any of the tasks of the service fails.
# In case of task failing, the function returns the first failing task's return value.
wait_service_state() {
local SERVICE_NAME="${1}";
local WANT_STATE="${2}";
local TIMEOUT_SECONDS="${3}";
local CHECK_FAILURES=false
[ "${WANT_STATE}" = "Complete" ] && CHECK_FAILURES=true
local SLEEP_SECONDS=1
local DOCKER_CMD_ERROR=1
local START_TIME=
START_TIME=$(date +%s)
local RETURN_VALUE=0
local DOCKER_CMD_ERROR=1
local STATES=
while STATES=$(_docker_service_task_states "${SERVICE_NAME}" 2>&1); do
DOCKER_CMD_ERROR=0
RETURN_VALUE=$(_all_tasks_reach_state "${WANT_STATE}" "${CHECK_FAILURES}" "${STATES}") && break
local SECONDS_ELAPSED=
if is_number "${TIMEOUT_SECONDS}" \
&& SECONDS_ELAPSED=$(first_minus_second "$(date +%s)" "${START_TIME}") \
&& [ "${SECONDS_ELAPSED}" -ge "${TIMEOUT_SECONDS}" ]; then
log ERROR "wait_service_state ${SERVICE_NAME} ${WANT_STATE} timeout after ${SECONDS_ELAPSED}s."
RETURN_VALUE=2
break
fi
sleep "${SLEEP_SECONDS}"
DOCKER_CMD_ERROR=1
done
Expand Down Expand Up @@ -636,9 +652,7 @@ docker_global_job() {
# A job could fail when using docker_replicated_job.
docker_replicated_job() {
local SERVICE_NAME=
local IS_DETACH=
SERVICE_NAME=$(_get_docker_command_name_arg "${@}")
IS_DETACH=$(_get_docker_command_detach "${@}")
# Add "--detach" to work around https://github.com/docker/cli/issues/2979
# The Docker CLI does not exit on failures.
log INFO "Starting replicated-job ${SERVICE_NAME}."
Expand All @@ -648,7 +662,7 @@ docker_replicated_job() {
return 1
fi
# If the command line does not contain '--detach', the function returns til the replicated job is complete.
if ! "${IS_DETACH}"; then
if ! _get_docker_command_detach "${@}"; then
wait_service_state "${SERVICE_NAME}" "Complete" || return $?
fi
return 0
Expand Down
11 changes: 7 additions & 4 deletions src/lib-gantry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,7 @@ _get_timeout_command() {
fi
local TIMEOUT_COMMAND=""
if [ "${UPDATE_TIMEOUT_SECONDS}" != "0" ]; then
TIMEOUT_COMMAND="timeout ${UPDATE_TIMEOUT_SECONDS}"
TIMEOUT_COMMAND="busybox timeout ${UPDATE_TIMEOUT_SECONDS}"
log DEBUG "Set timeout to ${UPDATE_TIMEOUT_SECONDS} for updating ${SERVICE_NAME}."
fi
echo "${TIMEOUT_COMMAND}"
Expand Down Expand Up @@ -1013,14 +1013,17 @@ _update_single_service() {
local UPDATE_COMMAND="${TIMEOUT_COMMAND} docker ${AUTH_CONFIG} service update"
local UPDATE_RETURN_VALUE=0
local UPDATE_MSG=
# Add "2>/dev/null" outside the $(cmd) to suppress the "Terminated" message from "busybox timeout".
# Add "-quiet" to suppress progress output.
# SC2086: Double quote to prevent globbing and word splitting.
# shellcheck disable=SC2086
UPDATE_MSG=$(${UPDATE_COMMAND} --quiet ${AUTOMATIC_OPTIONS} ${UPDATE_OPTIONS} --image="${IMAGE}" "${SERVICE_NAME}" 2>&1);
UPDATE_MSG=$(${UPDATE_COMMAND} --quiet ${AUTOMATIC_OPTIONS} ${UPDATE_OPTIONS} --image="${IMAGE}" "${SERVICE_NAME}" 2>&1) 2>/dev/null;
UPDATE_RETURN_VALUE=$?
if [ "${UPDATE_RETURN_VALUE}" != 0 ]; then
# https://git.savannah.gnu.org/cgit/coreutils.git/tree/src/timeout.c
local TIMEOUT_RETURN_CODE=124
# When there is a timeout:
# * coreutils timeout returns 124: https://git.savannah.gnu.org/cgit/coreutils.git/tree/src/timeout.c
# * busybox timeout returns 143
local TIMEOUT_RETURN_CODE=143
local TIMEOUT_MSG=""
if [ -n "${TIMEOUT_COMMAND}" ] && [ "${UPDATE_RETURN_VALUE}" = "${TIMEOUT_RETURN_CODE}" ]; then
TIMEOUT_MSG="The return value ${UPDATE_RETURN_VALUE} indicates the job timed out."
Expand Down
7 changes: 2 additions & 5 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,11 @@ bash shellspec --jobs 50

To generate coverage (require [kcov](https://github.com/SimonKagstrom/kcov) installed):
```
bash shellspec --kcov --tag coverage:true
bash shellspec --kcov --tag "coverage:true"
```

If you want to test a container image of *Gantry*, you need to specify the image of *Gantry* via the environment variable `GANTRY_TEST_CONTAINER_REPO_TAG`.
```
export GANTRY_TEST_CONTAINER_REPO_TAG=<gantry image>:<tag>
bash shellspec --tag "container_test:true" "coverage:true"
bash shellspec --jobs 50
```

> NOTE: Negative tests will hang when testing a *Gantry* container, which may be due to a bug in shellspec. So when testing *Gantry* images, we should run only tests with tag `container_test:true`.
8 changes: 4 additions & 4 deletions tests/gantry_cleanup_images_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Describe 'cleanup-images'
SUITE_NAME="cleanup-images"
BeforeAll "initialize_all_tests ${SUITE_NAME}"
AfterAll "finish_all_tests ${SUITE_NAME}"
Describe "test_CLEANUP_IMAGES_false" "container_test:true" "coverage:true"
Describe "test_CLEANUP_IMAGES_false" "coverage:true"
TEST_NAME="test_CLEANUP_IMAGES_false"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -63,7 +63,7 @@ Describe 'cleanup-images'
The stderr should satisfy spec_expect_no_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_CLEANUP_IMAGES_OPTIONS_bad" "container_test:true" "coverage:true"
Describe "test_CLEANUP_IMAGES_OPTIONS_bad" "coverage:true"
TEST_NAME="test_CLEANUP_IMAGES_OPTIONS_bad"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -110,7 +110,7 @@ Describe 'cleanup-images'
The stderr should satisfy spec_expect_no_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_CLEANUP_IMAGES_OPTIONS_good" "container_test:true" "coverage:true"
Describe "test_CLEANUP_IMAGES_OPTIONS_good" "coverage:true"
TEST_NAME="test_CLEANUP_IMAGES_OPTIONS_good"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -156,7 +156,7 @@ Describe 'cleanup-images'
The stderr should satisfy spec_expect_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_IMAGES_TO_REMOVE_none_empty" "container_test:true" "coverage:true"
Describe "test_IMAGES_TO_REMOVE_none_empty" "coverage:true"
# Test the remove image entrypoint. To improve coverage.
TEST_NAME="test_IMAGES_TO_REMOVE_none_empty"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
Expand Down
27 changes: 11 additions & 16 deletions tests/gantry_common_options_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,16 @@ Describe 'common-options'
SUITE_NAME="common-options"
BeforeAll "initialize_all_tests ${SUITE_NAME}"
AfterAll "finish_all_tests ${SUITE_NAME}"
Describe "test_common_DOCKER_HOST_not_swarm_manager" "container_test:false" "coverage:true"
Describe "test_common_DOCKER_HOST_not_swarm_manager" "coverage:true"
TEST_NAME="test_common_DOCKER_HOST_not_swarm_manager"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
test_common_DOCKER_HOST_not_swarm_manager() {
local TEST_NAME="${1}"
local SERVICE_NAME="${2}"
reset_gantry_env "${SERVICE_NAME}"
export DOCKER_HOST="8.8.8.8:53"
local RETURN_VALUE=0
export GANTRY_TEST_DOCKER_HOST="8.8.8.8:53"
run_gantry "${TEST_NAME}"
RETURN_VALUE="${?}"
export DOCKER_HOST=
return "${RETURN_VALUE}"
}
BeforeEach "common_setup_new_image ${TEST_NAME} ${IMAGE_WITH_TAG} ${SERVICE_NAME}"
AfterEach "common_cleanup ${TEST_NAME} ${IMAGE_WITH_TAG} ${SERVICE_NAME}"
Expand Down Expand Up @@ -69,7 +65,7 @@ Describe 'common-options'
The stderr should satisfy spec_expect_no_message "${SLEEP_SECONDS_BEFORE_NEXT_UPDATE}"
End
End
Describe "test_common_LOG_LEVEL_none" "container_test:true" "coverage:true"
Describe "test_common_LOG_LEVEL_none" "coverage:true"
TEST_NAME="test_common_LOG_LEVEL_none"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand All @@ -93,10 +89,12 @@ Describe 'common-options'
End
End
# Do not run test_common_no_new_env with the kcov, which alters the environment variables.
Describe "test_common_no_new_env" "container_test:false" "coverage:false"
Describe "test_common_no_new_env" "coverage:false"
# Check there is no new variable set,
# to avoid errors like https://github.com/shizunge/gantry/issues/64#issuecomment-2475499085
# We don't need to run this test using containers because we check env on the host, while the container test set env inside the container.
#
# It makes no sense to run run this test using containers because we check env on the host, while the container test set env inside the container.
# But it should not failed with a container. We are just testing GANTRY_LOG_LEVEL=WARN.
TEST_NAME="test_common_no_new_env"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -135,7 +133,7 @@ Describe 'common-options'
The stderr should satisfy spec_expect_no_message ".+"
End
End
Describe "test_common_PRE_POST_RUN_CMD" "container_test:true" "coverage:true"
Describe "test_common_PRE_POST_RUN_CMD" "coverage:true"
TEST_NAME="test_common_PRE_POST_RUN_CMD"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -195,10 +193,7 @@ Describe 'common-options'
The stderr should satisfy spec_expect_no_message "${SLEEP_SECONDS_BEFORE_NEXT_UPDATE}"
End
End
# run_gantry prints logs after gantry exists, while testing a container.
# In thes test, gantry never exit, but will be killed, thus there is no log.
# Therefore we disable the container test for this test.
Describe "test_common_SLEEP_SECONDS" "container_test:false" "coverage:true"
Describe "test_common_SLEEP_SECONDS" "coverage:true"
TEST_NAME="test_common_SLEEP_SECONDS"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand All @@ -210,7 +205,7 @@ Describe 'common-options'
run_gantry "${TEST_NAME}" &
local PID="${!}"
sleep $((GANTRY_SLEEP_SECONDS*3+1))
kill "${PID}"
stop_gantry "${TEST_NAME}" "${PID}"
}
BeforeEach "common_setup_no_new_image ${TEST_NAME} ${IMAGE_WITH_TAG} ${SERVICE_NAME}"
AfterEach "common_cleanup ${TEST_NAME} ${IMAGE_WITH_TAG} ${SERVICE_NAME}"
Expand Down Expand Up @@ -249,7 +244,7 @@ Describe 'common-options'
The stderr should satisfy spec_expect_message "${SLEEP_SECONDS_BEFORE_NEXT_UPDATE}"
End
End
Describe "test_common_SLEEP_SECONDS_not_a_number" "container_test:false" "coverage:true"
Describe "test_common_SLEEP_SECONDS_not_a_number" "coverage:true"
TEST_NAME="test_common_SLEEP_SECONDS_not_a_number"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down
8 changes: 4 additions & 4 deletions tests/gantry_filters_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Describe 'filters'
SUITE_NAME="filters"
BeforeAll "initialize_all_tests ${SUITE_NAME}"
AfterAll "finish_all_tests ${SUITE_NAME}"
Describe "test_SERVICES_FILTERS_bad" "container_test:false" "coverage:true"
Describe "test_SERVICES_FILTERS_bad" "coverage:true"
TEST_NAME="test_SERVICES_FILTERS_bad"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -64,7 +64,7 @@ Describe 'filters'
The stderr should satisfy spec_expect_no_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_SERVICES_EXCLUDED_multiple_services" "container_test:true" "coverage:true"
Describe "test_SERVICES_EXCLUDED_multiple_services" "coverage:true"
TEST_NAME="test_SERVICES_EXCLUDED_multiple_services"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -122,7 +122,7 @@ Describe 'filters'
The stderr should satisfy spec_expect_no_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_SERVICES_EXCLUDED_FILTERS_default" "container_test:true" "coverage:true"
Describe "test_SERVICES_EXCLUDED_FILTERS_default" "coverage:true"
TEST_NAME="test_SERVICES_EXCLUDED_FILTERS_default"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -173,7 +173,7 @@ Describe 'filters'
The stderr should satisfy spec_expect_no_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_SERVICES_EXCLUDED_FILTERS_bad" "container_test:false" "coverage:true"
Describe "test_SERVICES_EXCLUDED_FILTERS_bad" "coverage:true"
TEST_NAME="test_SERVICES_EXCLUDED_FILTERS_bad"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down
14 changes: 7 additions & 7 deletions tests/gantry_jobs_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#

Describe 'update-jobs'
SUITE_NAME="update-jobs"
Describe 'jobs'
SUITE_NAME="jobs"
BeforeAll "initialize_all_tests ${SUITE_NAME}"
AfterAll "finish_all_tests ${SUITE_NAME}"
Describe "test_jobs_skipping" "container_test:true" "coverage:true"
Describe "test_jobs_skipping" "coverage:true"
# For `docker service ls --filter`, the name filter matches on all or the prefix of a service's name
# See https://docs.docker.com/engine/reference/commandline/service_ls/#name
# It does not do the exact match of the name. See https://github.com/moby/moby/issues/32985
Expand Down Expand Up @@ -86,7 +86,7 @@ Describe 'update-jobs'
The stderr should satisfy spec_expect_no_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_jobs_UPDATE_JOBS_true" "container_test:true" "coverage:true"
Describe "test_jobs_UPDATE_JOBS_true" "coverage:true"
TEST_NAME="test_jobs_UPDATE_JOBS_true"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -137,7 +137,7 @@ Describe 'update-jobs'
The stderr should satisfy spec_expect_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_jobs_label_UPDATE_JOBS_true" "container_test:true" "coverage:true"
Describe "test_jobs_label_UPDATE_JOBS_true" "coverage:true"
TEST_NAME="test_jobs_label_UPDATE_JOBS_true"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -194,7 +194,7 @@ Describe 'update-jobs'
The stderr should satisfy spec_expect_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_jobs_no_running_tasks" "container_test:true" "coverage:true"
Describe "test_jobs_no_running_tasks" "coverage:true"
TEST_NAME="test_jobs_no_running_tasks"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -253,4 +253,4 @@ Describe 'update-jobs'
The stderr should satisfy spec_expect_message "${DONE_REMOVING_IMAGES}"
End
End
End # Describe 'update-jobs'
End # Describe 'jobs'
Loading

0 comments on commit 63337e9

Please sign in to comment.