Skip to content

Commit

Permalink
[tests] allow more tests to run with a container.
Browse files Browse the repository at this point in the history
Use busybox time explicitly for docker service update.
  • Loading branch information
shizunge committed Nov 26, 2024
1 parent d6bf10a commit 0e4a60e
Show file tree
Hide file tree
Showing 21 changed files with 374 additions and 263 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
run: |
export DOCKERHUB_PASSWORD=${{ secrets.DOCKERHUB_PASSWORD }}
export DOCKERHUB_USERNAME=${{ secrets.DOCKERHUB_USERNAME }}
bash shellspec --kcov --jobs 50 --tag "coverage:true"
bash shellspec --kcov --jobs 50
- name: Upload reports
uses: actions/upload-artifact@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/on-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,4 @@ jobs:
export DOCKERHUB_USERNAME=${{ secrets.DOCKERHUB_USERNAME }}
export GANTRY_TEST_CONTAINER_REPO_TAG=$(cat tag.txt)
echo "GANTRY_TEST_CONTAINER_REPO_TAG=${GANTRY_TEST_CONTAINER_REPO_TAG}"
bash shellspec --jobs 50 --tag "container_test:true"
bash shellspec --jobs 50
56 changes: 35 additions & 21 deletions src/lib-common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ _random_string() {

_pipe_name() {
local BASE_NAME="${1:-pipe-base-name}"
local RANDOM_STR=
RANDOM_STR=$(_random_string)
local PID=$$
local TIMESTAMP=
TIMESTAMP=$(date +%s)
local PIPE_NAME="/tmp/${BASE_NAME}-$$-${TIMESTAMP}-${RANDOM_STR}"
local RANDOM_STR=
RANDOM_STR=$(_random_string)
local PIPE_NAME="/tmp/${BASE_NAME}-${PID}-${TIMESTAMP}-${RANDOM_STR}"
echo "${PIPE_NAME}"
}

Expand Down Expand Up @@ -79,6 +80,7 @@ extract_string() {
# All lower or all upper. No mix.
_log_level_to_upper() {
local LEVEL="${1}";
# tr is slow.
case "${LEVEL}" in
"debug") echo "DEBUG"; ;;
"info") echo "INFO"; ;;
Expand Down Expand Up @@ -177,6 +179,8 @@ _color_iso_time() {
local TIME_STR=
TIME_STR=$(busybox date -d "@${EPOCH}" +"\033[1;30m%Y-%m-%dT\033[0;37m%H:%M:%S\033[1;30m%z")
# +0000 -> +00:00
# SC2028 (info): echo may not expand escape sequences. Use printf.
# shellcheck disable=SC2028
echo "${TIME_STR:0:-2}:${TIME_STR:0-2}\033[0m"
}

Expand All @@ -202,7 +206,14 @@ _log_formatter() {
SCOPE_STR="${DGRAY}${SCOPE}:${NO_COLOR} "
fi
local MSG_STR=
MSG_STR=$(echo "${*}" | tr '\n' ' ')
# echo without quotes remove carriage returns, tabs and multiple spaces.
# SC2116 (style): Useless echo? Instead of 'cmd $(echo foo)', just use 'cmd foo'.
# SC2048 (warning): Use "$@" (with quotes) to prevent whitespace problems.
# SC2086 (info): Double quote to prevent globbing and word splitting.
# shellcheck disable=SC2048,SC2086,2116
MSG_STR=$(echo ${*})
# tr is slow.
# MSG_STR=$(echo "${*}" | tr '\n' ' ')
echo -e "${TIME_STR}${LOC_STR}${LEVEL_STR}${SCOPE_STR}${MSG_STR}" >&2
}

Expand Down Expand Up @@ -441,15 +452,10 @@ _get_docker_command_name_arg() {
}

_get_docker_command_detach() {
if echo "${@}" | grep_q "--detach=false"; then
echo "false"
elif echo "${@}" | grep_q "--detach"; then
# assume we find --detach or --detach=true.
echo "true"
else
echo "false"
fi
return 0
echo "${@}" | grep_q "--detach=false" && return 1;
# assume we find --detach or --detach=true.
echo "${@}" | grep_q "--detach" && return 0;
return 1;
}

docker_service_logs() {
Expand Down Expand Up @@ -557,25 +563,35 @@ _all_tasks_reach_state() {
return 0
}

# Usage: wait_service_state <SERVICE_NAME> <WANT_STATE>
# Usage: wait_service_state <SERVICE_NAME> <WANT_STATE> [timeout in seconds]
# Wait for the service, usually a global job or a replicated job,
# to reach either running or complete state.
# Valid WANT_STATE includes "Running" and "Complete"
# When the WANT_STATE is complete, the function returns immediately
# when any of the tasks of the service fails.
# In case of task failing, the function returns a non-zero value.
# When the WANT_STATE is complete, the function returns immediately when any of the tasks of the service fails.
# In case of task failing, the function returns the first failing task's return value.
wait_service_state() {
local SERVICE_NAME="${1}";
local WANT_STATE="${2}";
local TIMEOUT_SECONDS="${3}";
local CHECK_FAILURES=false
[ "${WANT_STATE}" = "Complete" ] && CHECK_FAILURES=true
local SLEEP_SECONDS=1
local DOCKER_CMD_ERROR=1
local START_TIME=
START_TIME=$(date +%s)
local RETURN_VALUE=0
local DOCKER_CMD_ERROR=1
local STATES=
while STATES=$(_docker_service_task_states "${SERVICE_NAME}" 2>&1); do
DOCKER_CMD_ERROR=0
RETURN_VALUE=$(_all_tasks_reach_state "${WANT_STATE}" "${CHECK_FAILURES}" "${STATES}") && break
local SECONDS_ELAPSED=
if is_number "${TIMEOUT_SECONDS}" \
&& SECONDS_ELAPSED=$(first_minus_second "$(date +%s)" "${START_TIME}") \
&& [ "${SECONDS_ELAPSED}" -ge "${TIMEOUT_SECONDS}" ]; then
log ERROR "wait_service_state ${SERVICE_NAME} ${WANT_STATE} timeout after ${SECONDS_ELAPSED}s."
RETURN_VALUE=2
break
fi
sleep "${SLEEP_SECONDS}"
DOCKER_CMD_ERROR=1
done
Expand Down Expand Up @@ -636,9 +652,7 @@ docker_global_job() {
# A job could fail when using docker_replicated_job.
docker_replicated_job() {
local SERVICE_NAME=
local IS_DETACH=
SERVICE_NAME=$(_get_docker_command_name_arg "${@}")
IS_DETACH=$(_get_docker_command_detach "${@}")
# Add "--detach" to work around https://github.com/docker/cli/issues/2979
# The Docker CLI does not exit on failures.
log INFO "Starting replicated-job ${SERVICE_NAME}."
Expand All @@ -648,7 +662,7 @@ docker_replicated_job() {
return 1
fi
# If the command line does not contain '--detach', the function returns til the replicated job is complete.
if ! "${IS_DETACH}"; then
if ! _get_docker_command_detach "${@}"; then
wait_service_state "${SERVICE_NAME}" "Complete" || return $?
fi
return 0
Expand Down
8 changes: 6 additions & 2 deletions src/lib-gantry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1013,14 +1013,18 @@ _update_single_service() {
local UPDATE_COMMAND="${TIMEOUT_COMMAND} docker ${AUTH_CONFIG} service update"
local UPDATE_RETURN_VALUE=0
local UPDATE_MSG=
# Add "2>/dev/null" outside the $(cmd) to suppress the "Terminated" message from "busybox timeout".
# Add "-quiet" to suppress progress output.
# SC2086: Double quote to prevent globbing and word splitting.
# shellcheck disable=SC2086
UPDATE_MSG=$(${UPDATE_COMMAND} --quiet ${AUTOMATIC_OPTIONS} ${UPDATE_OPTIONS} --image="${IMAGE}" "${SERVICE_NAME}" 2>&1);
UPDATE_MSG=$(${UPDATE_COMMAND} --quiet ${AUTOMATIC_OPTIONS} ${UPDATE_OPTIONS} --image="${IMAGE}" "${SERVICE_NAME}" 2>&1) 2>/dev/null;
UPDATE_RETURN_VALUE=$?
if [ "${UPDATE_RETURN_VALUE}" != 0 ]; then
# https://git.savannah.gnu.org/cgit/coreutils.git/tree/src/timeout.c
# When there is a timeout:
# * coreutils timeout returns 124: https://git.savannah.gnu.org/cgit/coreutils.git/tree/src/timeout.c
# * busybox timeout returns 143
local TIMEOUT_RETURN_CODE=124
timeout --help 2>&1 | grep_q_i BusyBox && TIMEOUT_RETURN_CODE=143
local TIMEOUT_MSG=""
if [ -n "${TIMEOUT_COMMAND}" ] && [ "${UPDATE_RETURN_VALUE}" = "${TIMEOUT_RETURN_CODE}" ]; then
TIMEOUT_MSG="The return value ${UPDATE_RETURN_VALUE} indicates the job timed out."
Expand Down
7 changes: 2 additions & 5 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,11 @@ bash shellspec --jobs 50

To generate coverage (require [kcov](https://github.com/SimonKagstrom/kcov) installed):
```
bash shellspec --kcov --tag coverage:true
bash shellspec --kcov
```

If you want to test a container image of *Gantry*, you need to specify the image of *Gantry* via the environment variable `GANTRY_TEST_CONTAINER_REPO_TAG`.
```
export GANTRY_TEST_CONTAINER_REPO_TAG=<gantry image>:<tag>
bash shellspec --tag "container_test:true" "coverage:true"
bash shellspec --jobs 50
```

> NOTE: Negative tests will hang when testing a *Gantry* container, which may be due to a bug in shellspec. So when testing *Gantry* images, we should run only tests with tag `container_test:true`.
8 changes: 4 additions & 4 deletions tests/gantry_cleanup_images_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Describe 'cleanup-images'
SUITE_NAME="cleanup-images"
BeforeAll "initialize_all_tests ${SUITE_NAME}"
AfterAll "finish_all_tests ${SUITE_NAME}"
Describe "test_CLEANUP_IMAGES_false" "container_test:true" "coverage:true"
Describe "test_CLEANUP_IMAGES_false"
TEST_NAME="test_CLEANUP_IMAGES_false"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -63,7 +63,7 @@ Describe 'cleanup-images'
The stderr should satisfy spec_expect_no_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_CLEANUP_IMAGES_OPTIONS_bad" "container_test:true" "coverage:true"
Describe "test_CLEANUP_IMAGES_OPTIONS_bad"
TEST_NAME="test_CLEANUP_IMAGES_OPTIONS_bad"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -110,7 +110,7 @@ Describe 'cleanup-images'
The stderr should satisfy spec_expect_no_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_CLEANUP_IMAGES_OPTIONS_good" "container_test:true" "coverage:true"
Describe "test_CLEANUP_IMAGES_OPTIONS_good"
TEST_NAME="test_CLEANUP_IMAGES_OPTIONS_good"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down Expand Up @@ -156,7 +156,7 @@ Describe 'cleanup-images'
The stderr should satisfy spec_expect_message "${DONE_REMOVING_IMAGES}"
End
End
Describe "test_IMAGES_TO_REMOVE_none_empty" "container_test:true" "coverage:true"
Describe "test_IMAGES_TO_REMOVE_none_empty"
# Test the remove image entrypoint. To improve coverage.
TEST_NAME="test_IMAGES_TO_REMOVE_none_empty"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
Expand Down
61 changes: 29 additions & 32 deletions tests/gantry_common_options_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,16 @@ Describe 'common-options'
SUITE_NAME="common-options"
BeforeAll "initialize_all_tests ${SUITE_NAME}"
AfterAll "finish_all_tests ${SUITE_NAME}"
Describe "test_common_DOCKER_HOST_not_swarm_manager" "container_test:false" "coverage:true"
Describe "test_common_DOCKER_HOST_not_swarm_manager"
TEST_NAME="test_common_DOCKER_HOST_not_swarm_manager"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
test_common_DOCKER_HOST_not_swarm_manager() {
local TEST_NAME="${1}"
local SERVICE_NAME="${2}"
reset_gantry_env "${SERVICE_NAME}"
export DOCKER_HOST="8.8.8.8:53"
local RETURN_VALUE=0
export GANTRY_TEST_DOCKER_HOST="8.8.8.8:53"
run_gantry "${TEST_NAME}"
RETURN_VALUE="${?}"
export DOCKER_HOST=
return "${RETURN_VALUE}"
}
BeforeEach "common_setup_new_image ${TEST_NAME} ${IMAGE_WITH_TAG} ${SERVICE_NAME}"
AfterEach "common_cleanup ${TEST_NAME} ${IMAGE_WITH_TAG} ${SERVICE_NAME}"
Expand Down Expand Up @@ -69,7 +65,7 @@ Describe 'common-options'
The stderr should satisfy spec_expect_no_message "${SLEEP_SECONDS_BEFORE_NEXT_UPDATE}"
End
End
Describe "test_common_LOG_LEVEL_none" "container_test:true" "coverage:true"
Describe "test_common_LOG_LEVEL_none"
TEST_NAME="test_common_LOG_LEVEL_none"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand All @@ -93,10 +89,12 @@ Describe 'common-options'
End
End
# Do not run test_common_no_new_env with the kcov, which alters the environment variables.
Describe "test_common_no_new_env" "container_test:false" "coverage:false"
Describe "test_common_no_new_env"
# Check there is no new variable set,
# to avoid errors like https://github.com/shizunge/gantry/issues/64#issuecomment-2475499085
# We don't need to run this test using containers because we check env on the host, while the container test set env inside the container.
#
# It makes no sense to run run this test using containers because we check env on the host, while the container test set env inside the container.
# But it should not failed with a container. We are just testing GANTRY_LOG_LEVEL=WARN.
TEST_NAME="test_common_no_new_env"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand All @@ -111,16 +109,16 @@ Describe 'common-options'
reset_gantry_env "${SERVICE_NAME}"
# There should be no warnings or errors. So it should work the same as LOG_LEVLE=NONE.
export GANTRY_LOG_LEVEL=WARN

# Allow the following 3 mismatches used in log() function.
unset LOG_LEVEL NODE_NAME LOG_SCOPE
# _ contains the last command. declare is a bash builtin.
unset _; declare -p > "${ENV_BEFORE_RUN}"
declare -p > "${ENV_BEFORE_RUN}"
run_gantry "${TEST_NAME}"
# Allow the following 3 mismatches used in log() function.
unset LOG_LEVEL NODE_NAME LOG_SCOPE
unset _; declare -p > "${ENV_AFTER_RUN}"
diff "${ENV_BEFORE_RUN}" "${ENV_AFTER_RUN}"
declare -p > "${ENV_AFTER_RUN}"
# Allow the 3 mismatches LOG_LEVEL NODE_NAME LOG_SCOPE used in log() function.
# Allow the 2 mismatches LINENO _ for kcov coverage.
for ALLOWED in LOG_LEVEL NODE_NAME LOG_SCOPE LINENO _; do
sed -i "s/^declare .* ${ALLOWED}=.*//" "${ENV_BEFORE_RUN}"
sed -i "s/^declare .* ${ALLOWED}=.*//" "${ENV_AFTER_RUN}"
done
diff --ignore-blank-lines "${ENV_BEFORE_RUN}" "${ENV_AFTER_RUN}"
rm "${ENV_BEFORE_RUN}"
rm "${ENV_AFTER_RUN}"
}
Expand All @@ -135,7 +133,7 @@ Describe 'common-options'
The stderr should satisfy spec_expect_no_message ".+"
End
End
Describe "test_common_PRE_POST_RUN_CMD" "container_test:true" "coverage:true"
Describe "test_common_PRE_POST_RUN_CMD"
TEST_NAME="test_common_PRE_POST_RUN_CMD"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand All @@ -148,7 +146,7 @@ Describe 'common-options'
# Test that pre-run command can change the global configurations.
export GANTRY_PRE_RUN_CMD="echo \"Pre update\"; GANTRY_UPDATE_OPTIONS=--detach=true; GANTRY_CLEANUP_IMAGES=false;"
# This command outputs multiple lines.
local POST_CMD="for I in \$(seq 3 5); do echo \"OUTPUT_LINE=\$I\"; done"
local POST_CMD="for I in \$(seq 3 5); do echo \"TEST_OUTPUT_MULTIPLE_LINES=\$I\"; done"
# Test that the command returns a non-zero value.
export GANTRY_POST_RUN_CMD="echo \"Post update\"; ${POST_CMD}; false;"
run_gantry "${TEST_NAME}"
Expand All @@ -162,8 +160,8 @@ Describe 'common-options'
The stdout should satisfy spec_expect_no_message ".+"
The stderr should satisfy display_output
The stderr should satisfy spec_expect_no_message "${START_WITHOUT_A_SQUARE_BRACKET}"
The stderr should satisfy spec_expect_message "Pre update"
The stderr should satisfy spec_expect_message "Finish pre-run command."
The stderr should satisfy spec_expect_message "Pre update$"
The stderr should satisfy spec_expect_message "Finish pre-run command.$"
The stderr should satisfy spec_expect_no_message "${SKIP_UPDATING}.*${SERVICE_NAME}"
The stderr should satisfy spec_expect_message "${PERFORM_UPDATING}.*${SERVICE_NAME}.*${PERFORM_REASON_HAS_NEWER_IMAGE}"
The stderr should satisfy spec_expect_no_message "${NUM_SERVICES_SKIP_JOBS}"
Expand All @@ -186,19 +184,16 @@ Describe 'common-options'
The stderr should satisfy spec_expect_no_message "${REMOVED_IMAGE}.*"
The stderr should satisfy spec_expect_no_message "${FAILED_TO_REMOVE_IMAGE}.*"
The stderr should satisfy spec_expect_no_message "${DONE_REMOVING_IMAGES}"
The stderr should satisfy spec_expect_message "Post update"
The stderr should satisfy spec_expect_message "OUTPUT_LINE=3"
The stderr should satisfy spec_expect_message "OUTPUT_LINE=4"
The stderr should satisfy spec_expect_message "OUTPUT_LINE=5"
The stderr should satisfy spec_expect_message "Finish post-run command with a non-zero return value 1."
The stderr should satisfy spec_expect_message "Post update$"
The stderr should satisfy spec_expect_message "TEST_OUTPUT_MULTIPLE_LINES=3$"
The stderr should satisfy spec_expect_message "TEST_OUTPUT_MULTIPLE_LINES=4$"
The stderr should satisfy spec_expect_message "TEST_OUTPUT_MULTIPLE_LINES=5$"
The stderr should satisfy spec_expect_message "Finish post-run command with a non-zero return value 1.$"
The stderr should satisfy spec_expect_no_message "${SCHEDULE_NEXT_UPDATE_AT}"
The stderr should satisfy spec_expect_no_message "${SLEEP_SECONDS_BEFORE_NEXT_UPDATE}"
End
End
# run_gantry prints logs after gantry exists, while testing a container.
# In thes test, gantry never exit, but will be killed, thus there is no log.
# Therefore we disable the container test for this test.
Describe "test_common_SLEEP_SECONDS" "container_test:false" "coverage:true"
Describe "test_common_SLEEP_SECONDS"
TEST_NAME="test_common_SLEEP_SECONDS"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand All @@ -207,9 +202,11 @@ Describe 'common-options'
local SERVICE_NAME="${2}"
reset_gantry_env "${SERVICE_NAME}"
export GANTRY_SLEEP_SECONDS="7"
# Run run_gantry in background.
run_gantry "${TEST_NAME}" &
local PID="${!}"
sleep $((GANTRY_SLEEP_SECONDS*3+1))
stop_gantry_container "${TEST_NAME}"
kill "${PID}"
}
BeforeEach "common_setup_no_new_image ${TEST_NAME} ${IMAGE_WITH_TAG} ${SERVICE_NAME}"
Expand Down Expand Up @@ -249,7 +246,7 @@ Describe 'common-options'
The stderr should satisfy spec_expect_message "${SLEEP_SECONDS_BEFORE_NEXT_UPDATE}"
End
End
Describe "test_common_SLEEP_SECONDS_not_a_number" "container_test:false" "coverage:true"
Describe "test_common_SLEEP_SECONDS_not_a_number"
TEST_NAME="test_common_SLEEP_SECONDS_not_a_number"
IMAGE_WITH_TAG=$(get_image_with_tag "${SUITE_NAME}")
SERVICE_NAME=$(get_test_service_name "${TEST_NAME}")
Expand Down
Loading

0 comments on commit 0e4a60e

Please sign in to comment.