Skip to content

Commit

Permalink
[gantry][tests] Handler warning better from docker command. Do not us…
Browse files Browse the repository at this point in the history
…e /tmp folder for tests.

Sometimes there could be a problem read files from /tmp.
  • Loading branch information
shizunge committed Nov 29, 2024
1 parent 8b6fe71 commit 97d7c8d
Show file tree
Hide file tree
Showing 10 changed files with 210 additions and 159 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.shellspec-quick.log
coverage
gantry-test-tmp
3 changes: 2 additions & 1 deletion src/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ load_libraries() {

_run_on_node() {
local HOST_NAME=
if ! HOST_NAME=$(docker node inspect self --format "{{.Description.Hostname}}" 2>&1); then
if ! HOST_NAME=$(run_docker_cmd node inspect self --format "{{.Description.Hostname}}"); then
log DEBUG "Failed to run \"docker node inspect self\": ${HOST_NAME}"
return 1
fi
Expand Down Expand Up @@ -107,6 +107,7 @@ gantry() {
START_TIME=$(date +%s)

[ -n "${DOCKER_HOST}" ] && log DEBUG "DOCKER_HOST=${DOCKER_HOST}"
[ -n "${DOCKER_CONFIG}" ] && log DEBUG "DOCKER_CONFIG=${DOCKER_CONFIG}"
local RUN_ON_NODE=
if ! RUN_ON_NODE=$(_run_on_node); then
local HOST_STRING="${DOCKER_HOST:-"the current node"}"
Expand Down
72 changes: 47 additions & 25 deletions src/lib-common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -434,13 +434,37 @@ eval_cmd() {
return "${RETURN_VALUE}"
}

# To replace "docker" command
# When the docker command returns 0:
# Echo stdout and log stderr as a warning. Return 0.
# When the docker command returns non-zero:
# Echo stdout + stderr. Return the same value from the docker command.
run_docker_cmd() {
local STDERR_STR=
local RETURN_VALUE=
# Use "3>&2 2>&1 1>&3" to swap stdout and stderr
{ STDERR_STR=$(docker "${@}" 3>&2 2>&1 1>&3); } 2>&1
RETURN_VALUE=$?

if [ -n "${STDERR_STR}" ]; then
if [ "${RETURN_VALUE}" = 0 ]; then
log WARN "docker ${*}: ${STDERR_STR}"
else
echo "${STDERR_STR}"
fi
fi
return "${RETURN_VALUE}"
}

swarm_network_arguments() {
if [ -z "${NETWORK_NAME}" ]; then
echo ""
return 0
fi
NETWORK_NAME=$(docker network ls --filter "name=${NETWORK_NAME}" --format '{{.Name}}')
if [ -z "${NETWORK_NAME}" ]; then
local RETURN_VALUE=
NETWORK_NAME=$(run_docker_cmd network ls --filter "name=${NETWORK_NAME}" --format '{{.Name}}')
RETURN_VALUE=$?
if [ "${RETURN_VALUE}" != "0" ] || [ -z "${NETWORK_NAME}" ]; then
echo ""
return 0
fi
Expand Down Expand Up @@ -513,8 +537,8 @@ _docker_service_task_states() {
local SERVICE_NAME="${1}"
# We won't get the return value of the command via $? if we use "local STATES=$(command)".
local STATES=
if ! STATES=$(docker service ps --no-trunc --format '[{{.Name}}][{{.Node}}] {{.CurrentState}} {{.Error}}' "${SERVICE_NAME}" 2>&1); then
echo "${STATES}" >&2
if ! STATES=$(run_docker_cmd service ps --no-trunc --format '[{{.Name}}][{{.Node}}] {{.CurrentState}} {{.Error}}' "${SERVICE_NAME}"); then
log ERROR "${STATES}"
return 1
fi
local NAME_LIST=
Expand Down Expand Up @@ -591,7 +615,7 @@ wait_service_state() {
local RETURN_VALUE=0
local DOCKER_CMD_ERROR=1
local STATES=
while STATES=$(_docker_service_task_states "${SERVICE_NAME}" 2>&1); do
while STATES=$(_docker_service_task_states "${SERVICE_NAME}"); do
DOCKER_CMD_ERROR=0
RETURN_VALUE=$(_all_tasks_reach_state "${WANT_STATE}" "${CHECK_FAILURES}" "${STATES}") && break
local SECONDS_ELAPSED=
Expand All @@ -606,7 +630,7 @@ wait_service_state() {
DOCKER_CMD_ERROR=1
done
if [ "${DOCKER_CMD_ERROR}" != "0" ]; then
log ERROR "Failed to obtain task states of service ${SERVICE_NAME}: ${STATES}"
log ERROR "Failed to obtain task states of service ${SERVICE_NAME}."
return 1
fi
local LINE=
Expand All @@ -621,12 +645,10 @@ docker_service_remove() {
local POST_COMMAND="${2}"
! _docker_service_exists "${SERVICE_NAME}" && return 0
log DEBUG "Removing service ${SERVICE_NAME}."
local RETURN_VALUE=0
local LOG=
if ! LOG=$(docker service rm "${SERVICE_NAME}" 2>&1); then
RETURN_VALUE=$?
if ! LOG=$(run_docker_cmd service rm "${SERVICE_NAME}"); then
log ERROR "Failed to remove docker service ${SERVICE_NAME}: ${LOG}"
return "${RETURN_VALUE}"
return 1
fi
if [ -n "${POST_COMMAND}" ]; then
eval "${POST_COMMAND}"
Expand Down Expand Up @@ -654,7 +676,7 @@ docker_global_job() {
SERVICE_NAME=$(_get_docker_command_name_arg "${@}")
log INFO "Starting global-job ${SERVICE_NAME}."
local LOG=
if ! LOG=$(docker service create --mode global-job "${@}" 2>&1); then
if ! LOG=$(run_docker_cmd service create --mode global-job "${@}"); then
log ERROR "Failed to create global-job ${SERVICE_NAME}: ${LOG}"
return 1
fi
Expand All @@ -669,7 +691,7 @@ docker_replicated_job() {
# The Docker CLI does not exit on failures.
log INFO "Starting replicated-job ${SERVICE_NAME}."
local LOG=
if ! LOG=$(docker service create --mode replicated-job --detach "${@}" 2>&1); then
if ! LOG=$(run_docker_cmd service create --mode replicated-job --detach "${@}"); then
log ERROR "Failed to create replicated-job ${SERVICE_NAME}: ${LOG}"
return 1
fi
Expand All @@ -682,10 +704,10 @@ docker_replicated_job() {

docker_version() {
local cver capi sver sapi
if ! cver=$(docker version --format '{{.Client.Version}}' 2>&1); then log ERROR "${cver}"; cver="error"; fi
if ! capi=$(docker version --format '{{.Client.APIVersion}}' 2>&1); then log ERROR "${capi}"; capi="error"; fi
if ! sver=$(docker version --format '{{.Server.Version}}' 2>&1); then log ERROR "${sver}"; sver="error"; fi
if ! sapi=$(docker version --format '{{.Server.APIVersion}}' 2>&1); then log ERROR "${sapi}"; sapi="error"; fi
if ! cver=$(run_docker_cmd version --format '{{.Client.Version}}'); then log ERROR "${cver}"; cver="error"; fi
if ! capi=$(run_docker_cmd version --format '{{.Client.APIVersion}}'); then log ERROR "${capi}"; capi="error"; fi
if ! sver=$(run_docker_cmd version --format '{{.Server.Version}}'); then log ERROR "${sver}"; sver="error"; fi
if ! sapi=$(run_docker_cmd version --format '{{.Server.APIVersion}}'); then log ERROR "${sapi}"; sapi="error"; fi
echo "Docker version client ${cver} (API ${capi}) server ${sver} (API ${sapi})"
}

Expand All @@ -694,24 +716,24 @@ docker_version() {
# return 1 when there is an error.
docker_current_container_name() {
local ALL_NETWORKS=
ALL_NETWORKS=$(docker network ls --format '{{.ID}}') || return 1;
ALL_NETWORKS=$(run_docker_cmd network ls --format '{{.ID}}') || return 1;
[ -z "${ALL_NETWORKS}" ] && return 0;
local IPS=;
# Get the string after "src":
# 172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1 linkdown
IPS=$(ip route | grep src | sed -n -E "s/.* src (\S+).*$/\1/p");
[ -z "${IPS}" ] && return 0;
local GWBRIDGE_NETWORK HOST_NETWORK;
GWBRIDGE_NETWORK=$(docker network ls --format '{{.ID}}' --filter 'name=^docker_gwbridge$') || return 1;
HOST_NETWORK=$(docker network ls --format '{{.ID}}' --filter 'name=^host$') || return 1;
GWBRIDGE_NETWORK=$(run_docker_cmd network ls --format '{{.ID}}' --filter 'name=^docker_gwbridge$') || return 1;
HOST_NETWORK=$(run_docker_cmd network ls --format '{{.ID}}' --filter 'name=^host$') || return 1;
local NID=;
for NID in ${ALL_NETWORKS}; do
# The output of gwbridge does not contain the container name. It looks like gateway_8f55496ce4f1/172.18.0.5/16.
[ "${NID}" = "${GWBRIDGE_NETWORK}" ] && continue;
# The output of host does not contain an IP.
[ "${NID}" = "${HOST_NETWORK}" ] && continue;
local ALL_LOCAL_NAME_AND_IP=;
ALL_LOCAL_NAME_AND_IP=$(docker network inspect "${NID}" --format "{{range .Containers}}{{.Name}}/{{println .IPv4Address}}{{end}}") || return 1;
ALL_LOCAL_NAME_AND_IP=$(run_docker_cmd network inspect "${NID}" --format "{{range .Containers}}{{.Name}}/{{println .IPv4Address}}{{end}}") || return 1;
local NAME_AND_IP=;
for NAME_AND_IP in ${ALL_LOCAL_NAME_AND_IP}; do
[ -z "${NAME_AND_IP}" ] && continue;
Expand Down Expand Up @@ -758,15 +780,15 @@ docker_run() {
local RETRIES=0
local MAX_RETRIES=5
local SLEEP_SECONDS=10
local MSG=
while ! MSG=$(docker run "${@}" 2>&1); do
local LOG=
while ! LOG=$(run_docker_cmd run "${@}"); do
if [ ${RETRIES} -ge ${MAX_RETRIES} ]; then
log ERROR "Failed to run docker. Reached the max retries ${MAX_RETRIES}. ${MSG}"
log ERROR "Failed to run docker. Reached the max retries ${MAX_RETRIES}. ${LOG}"
return 1
fi
RETRIES=$((RETRIES + 1))
sleep ${SLEEP_SECONDS}
log WARN "Retry docker run (${RETRIES}). ${MSG}"
log WARN "Retry docker run (${RETRIES}). ${LOG}"
done
echo "${MSG}"
echo "${LOG}"
}
51 changes: 31 additions & 20 deletions src/lib-gantry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ _get_label_from_service() {
local SERVICE_NAME="${1}"
local LABEL="${2}"
local VALUE=
if ! VALUE=$(docker service inspect -f "{{index .Spec.Labels \"${LABEL}\"}}" "${SERVICE_NAME}" 2>&1); then
if ! VALUE=$(run_docker_cmd service inspect -f "{{index .Spec.Labels \"${LABEL}\"}}" "${SERVICE_NAME}"); then
log ERROR "Failed to obtain the value of label ${LABEL} from service ${SERVICE_NAME}. ${VALUE}"
return 1
fi
Expand Down Expand Up @@ -134,7 +134,7 @@ _login_registry() {
local LOGIN_MSG=
# SC2086: Double quote to prevent globbing and word splitting.
# shellcheck disable=SC2086
if ! LOGIN_MSG=$(echo "${PASSWORD}" | docker ${AUTH_CONFIG} login --username="${USER}" --password-stdin "${HOST}" 2>&1); then
if ! LOGIN_MSG=$(echo "${PASSWORD}" | run_docker_cmd ${AUTH_CONFIG} login --username="${USER}" --password-stdin "${HOST}"); then
log ERROR "Failed to login to ${REGISTRY_CONFIG_MESSAGE}. ${LOGIN_MSG}"
return 1
fi
Expand Down Expand Up @@ -364,15 +364,15 @@ _remove_container() {
local IMAGE="${1}";
local STATUS="${2}";
local CIDS=
if ! CIDS=$(docker container ls --all --filter "ancestor=${IMAGE}" --filter "status=${STATUS}" --format '{{.ID}}' 2>&1); then
if ! CIDS=$(run_docker_cmd container ls --all --filter "ancestor=${IMAGE}" --filter "status=${STATUS}" --format '{{.ID}}'); then
log ERROR "Failed to list ${STATUS} containers with image ${IMAGE}.";
echo "${CIDS}" | log_lines ERROR
return 1;
fi
local CID CNAME CRM_MSG
for CID in ${CIDS}; do
CNAME=$(docker container inspect --format '{{.Name}}' "${CID}");
if ! CRM_MSG=$(docker container rm "${CID}" 2>&1); then
CNAME=$(run_docker_cmd container inspect --format '{{.Name}}' "${CID}");
if ! CRM_MSG=$(run_docker_cmd container rm "${CID}"); then
log ERROR "Failed to remove ${STATUS} container ${CNAME}, which is using image ${IMAGE}.";
echo "${CRM_MSG}" | log_lines ERROR
continue;
Expand All @@ -387,13 +387,13 @@ gantry_remove_images() {
log DEBUG "$(docker_version)"
local IMAGE=
for IMAGE in ${IMAGES_TO_REMOVE}; do
if ! docker image inspect "${IMAGE}" 1>/dev/null 2>&1 ; then
if ! run_docker_cmd image inspect "${IMAGE}" 1>/dev/null; then
log DEBUG "There is no image ${IMAGE} on the node.";
continue;
fi
_remove_container "${IMAGE}" exited;
_remove_container "${IMAGE}" dead;
if ! RMI_MSG=$(docker rmi "${IMAGE}" 2>&1); then
if ! RMI_MSG=$(run_docker_cmd rmi "${IMAGE}"); then
log ERROR "Failed to remove image ${IMAGE}.";
echo "${RMI_MSG}" | log_lines ERROR
continue;
Expand Down Expand Up @@ -616,7 +616,9 @@ gantry_current_service_name() {
CNAME=$(_current_container_name) || return 1
[ -z "${CNAME}" ] && return 0
local SNAME=
SNAME=$(docker container inspect "${CNAME}" --format '{{range $key,$value := .Config.Labels}}{{$key}}={{println $value}}{{end}}' \
# SC2016 (info): Expressions don't expand in single quotes, use double quotes for that.
# shellcheck disable=SC2016
SNAME=$(run_docker_cmd container inspect "${CNAME}" --format '{{range $key,$value := .Config.Labels}}{{$key}}={{println $value}}{{end}}' \
| grep "com.docker.swarm.service.name" \
| sed -n -E "s/com.docker.swarm.service.name=(.*)$/\1/p") || return 1
_static_variable_add_unique_to_list STATIC_VAR_CURRENT_SERVICE_NAME "${SNAME}"
Expand All @@ -643,19 +645,31 @@ _service_is_self() {
_get_service_image() {
local SERVICE_NAME="${1}"
[ -z "${SERVICE_NAME}" ] && return 1
docker service inspect -f '{{.Spec.TaskTemplate.ContainerSpec.Image}}' "${SERVICE_NAME}"
local RETURN_VALUE=
local IMAGE_WITH_DIGEST=
IMAGE_WITH_DIGEST=$(run_docker_cmd service inspect -f '{{.Spec.TaskTemplate.ContainerSpec.Image}}' "${SERVICE_NAME}")
RETURN_VALUE=$?
[ "${RETURN_VALUE}" != "0" ] && log ERROR "Failed to obtain image from service ${SERVICE_NAME}. ${IMAGE_WITH_DIGEST}"
echo "${IMAGE_WITH_DIGEST}"
return "${RETURN_VALUE}"
}

_get_service_previous_image() {
local SERVICE_NAME="${1}"
[ -z "${SERVICE_NAME}" ] && return 1
docker service inspect -f '{{.PreviousSpec.TaskTemplate.ContainerSpec.Image}}' "${SERVICE_NAME}"
local RETURN_VALUE=
local IMAGE_WITH_DIGEST=
IMAGE_WITH_DIGEST=$(run_docker_cmd service inspect -f '{{.PreviousSpec.TaskTemplate.ContainerSpec.Image}}' "${SERVICE_NAME}")
RETURN_VALUE=$?
[ "${RETURN_VALUE}" != "0" ] && log ERROR "Failed to obtain previous image from service ${SERVICE_NAME}. ${IMAGE_WITH_DIGEST}"
echo "${IMAGE_WITH_DIGEST}"
return "${RETURN_VALUE}"
}

_get_service_mode() {
local SERVICE_NAME="${1}"
local MODE=
if ! MODE=$(docker service ls --filter "name=${SERVICE_NAME}" --format '{{.Mode}} {{.Name}}' 2>&1); then
if ! MODE=$(run_docker_cmd service ls --filter "name=${SERVICE_NAME}" --format '{{.Mode}} {{.Name}}'); then
log ERROR "Failed to obtain the mode of the service ${SERVICE_NAME}: ${MODE}"
return 1
fi
Expand Down Expand Up @@ -751,13 +765,13 @@ _get_image_info() {
[ -n "${MANIFEST_OPTIONS}" ] && log INFO "Adding options \"${MANIFEST_OPTIONS}\" to the command \"docker buildx imagetools inspect\"."
# SC2086: Double quote to prevent globbing and word splitting.
# shellcheck disable=SC2086
MSG=$(docker ${AUTH_CONFIG} buildx imagetools inspect ${MANIFEST_OPTIONS} "${IMAGE}" 2>&1);
MSG=$(run_docker_cmd ${AUTH_CONFIG} buildx imagetools inspect ${MANIFEST_OPTIONS} "${IMAGE}");
RETURN_VALUE=$?
elif echo "${MANIFEST_CMD}" | grep_q_i "manifest"; then
[ -n "${MANIFEST_OPTIONS}" ] && log INFO "Adding options \"${MANIFEST_OPTIONS}\" to the command \"docker manifest inspect\"."
# SC2086: Double quote to prevent globbing and word splitting.
# shellcheck disable=SC2086
MSG=$(docker ${AUTH_CONFIG} manifest inspect ${MANIFEST_OPTIONS} "${IMAGE}" 2>&1);
MSG=$(run_docker_cmd ${AUTH_CONFIG} manifest inspect ${MANIFEST_OPTIONS} "${IMAGE}");
RETURN_VALUE=$?
elif echo "${MANIFEST_CMD}" | grep_q_i "none"; then
# We should never reach here, the "none" command is already checked inside the function _inspect_image.
Expand All @@ -782,10 +796,7 @@ _inspect_image() {
local MANIFEST_CMD=
MANIFEST_CMD=$(_read_env_or_label "${SERVICE_NAME}" "GANTRY_MANIFEST_CMD" "gantry.manifest.cmd" "buildx")
local IMAGE_WITH_DIGEST=
if ! IMAGE_WITH_DIGEST=$(_get_service_image "${SERVICE_NAME}" 2>&1); then
log ERROR "Failed to obtain image from service ${SERVICE_NAME}. ${IMAGE_WITH_DIGEST}"
return 1
fi
IMAGE_WITH_DIGEST=$(_get_service_image "${SERVICE_NAME}") || return $?
local IMAGE=
local DIGEST=
IMAGE=$(extract_string "${IMAGE_WITH_DIGEST}" '@' 1)
Expand Down Expand Up @@ -869,7 +880,7 @@ _inspect_service() {
_get_number_of_running_tasks() {
local SERVICE_NAME="${1}"
local REPLICAS=
if ! REPLICAS=$(docker service ls --filter "name=${SERVICE_NAME}" --format '{{.Replicas}} {{.Name}}' 2>&1); then
if ! REPLICAS=$(run_docker_cmd service ls --filter "name=${SERVICE_NAME}" --format '{{.Replicas}} {{.Name}}'); then
log ERROR "Failed to obtain task states of service ${SERVICE_NAME}: ${REPLICAS}"
return 1
fi
Expand Down Expand Up @@ -956,7 +967,7 @@ _rollback_service() {
# Add "-quiet" to suppress progress output.
# SC2086: Double quote to prevent globbing and word splitting.
# shellcheck disable=SC2086
if ! ROLLBACK_MSG=$(docker ${AUTH_CONFIG} service update --quiet ${AUTOMATIC_OPTIONS} ${ROLLBACK_OPTIONS} --rollback "${SERVICE_NAME}" 2>&1); then
if ! ROLLBACK_MSG=$(run_docker_cmd ${AUTH_CONFIG} service update --quiet ${AUTOMATIC_OPTIONS} ${ROLLBACK_OPTIONS} --rollback "${SERVICE_NAME}"); then
log ERROR "Failed to roll back ${SERVICE_NAME}. ${ROLLBACK_MSG}"
return 1
fi
Expand Down Expand Up @@ -1098,7 +1109,7 @@ _get_services_filted() {
done
# SC2086: Double quote to prevent globbing and word splitting.
# shellcheck disable=SC2086
if ! SERVICES=$(docker service ls --quiet ${FILTERS} --format '{{.Name}}' 2>&1); then
if ! SERVICES=$(run_docker_cmd service ls --quiet ${FILTERS} --format '{{.Name}}'); then
log ERROR "Failed to obtain services list with \"${FILTERS}\". ${SERVICES}"
return 1
fi
Expand Down
4 changes: 2 additions & 2 deletions tests/gantry_common_options_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ Describe 'common-options'
local TEST_NAME="${1}"
local SERVICE_NAME="${2}"
local ENV_BEFORE_RUN=
ENV_BEFORE_RUN=$(mktemp)
ENV_BEFORE_RUN=$(make_test_temp_file)
local ENV_AFTER_RUN=
ENV_AFTER_RUN=$(mktemp)
ENV_AFTER_RUN=$(make_test_temp_file)

reset_gantry_env "${SUITE_NAME}" "${SERVICE_NAME}"
# There should be no warnings or errors. So it should work the same as LOG_LEVLE=NONE.
Expand Down
Loading

0 comments on commit 97d7c8d

Please sign in to comment.