From bbdcf305d0aec6cf5d671925c61466eda567cd06 Mon Sep 17 00:00:00 2001 From: Marketen Date: Thu, 12 Dec 2024 14:36:55 +0100 Subject: [PATCH] add healthchecks to chain & keyper --- shutter/Dockerfile | 2 +- shutter/scripts/run_chain.sh | 73 +++++++++++++++++++++++++++++++- shutter/scripts/run_keyper.sh | 78 +++++++++++++++++++++++++++++++---- 3 files changed, 142 insertions(+), 11 deletions(-) diff --git a/shutter/Dockerfile b/shutter/Dockerfile index c7784c9..056baa0 100644 --- a/shutter/Dockerfile +++ b/shutter/Dockerfile @@ -16,7 +16,7 @@ ARG CHAIN_PORT ARG STAKER_SCRIPTS_VERSION RUN apt-get update && \ - apt-get --yes --no-install-recommends install supervisor postgresql-client && \ + apt-get --yes --no-install-recommends install supervisor postgresql-client jq && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/shutter/scripts/run_chain.sh b/shutter/scripts/run_chain.sh index 3167447..e37a5e2 100755 --- a/shutter/scripts/run_chain.sh +++ b/shutter/scripts/run_chain.sh @@ -1,10 +1,79 @@ #!/bin/bash -run_chain() { +# To use staker scripts +# shellcheck disable=SC1091 +. /etc/profile + +SUPPORTED_NETWORKS="gnosis" + +# Set environment variables before running the health checks. +echo "[INFO | chain] Setting environment variables for beacon API and execution API..." +export SHUTTER_BEACONAPIURL=$(get_beacon_api_url_from_global_env "$NETWORK" "$SUPPORTED_NETWORKS") +export SHUTTER_GNOSIS_NODE_CONTRACTSURL=http://execution.gnosis.dncore.dappnode:8545 + +perform_node_healthcheck() { + echo "[INFO | chain] Starting health check for beacon API and execution API..." + + while true; do + # Check the syncing status using JSON-RPC + echo "[INFO | chain] Checking execution API syncing status at: $SHUTTER_GNOSIS_NODE_CONTRACTSURL/eth/v1/node/syncing" + sync_status=$(curl -s -X POST "$SHUTTER_GNOSIS_NODE_CONTRACTSURL" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_syncing","params":[],"id":1}' | jq -r '.result') + + if [ "$sync_status" != "false" ]; then + echo "[WARN | chain] Execution API is syncing. Sync status: $sync_status. Retrying in 30 seconds..." + sleep 30 + continue + else + echo "[INFO | chain] Execution API is not syncing." + fi + + # Check the syncing status using JSON-RPC + echo "[INFO | chain] Checking execution API syncing status at: $SHUTTER_GNOSIS_NODE_CONTRACTSURL/eth/v1/node/syncing" + + # Perform the request and capture the response + response=$(curl -s -w "%{http_code}" -o /tmp/sync_response.json -X POST "$SHUTTER_GNOSIS_NODE_CONTRACTSURL" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_syncing","params":[],"id":1}') + + # Extract HTTP status code and response body + http_code="${response: -3}" + response_body=$(cat /tmp/sync_response.json) - echo "[INFO | chain] Starting chain..." + # Check if the response body is empty or the HTTP status code is not 200 + if [ -z "$response_body" ] || [ "$http_code" -ne 200 ]; then + echo "[ERROR | chain] No response or error from execution API. Not healthy" + sleep 30 + continue + fi + # Parse the JSON response to check the syncing status + syncing_status=$(echo "$response_body" | jq -r '.result.is_syncing') + + # If syncing is true or an error occurs, treat it as not synced + if [ "$syncing_status" = "true" ]; then + echo "[WARN | chain] Execution API is syncing. Sync status: $syncing_status. Retrying in 30 seconds..." + sleep 30 + continue + else + echo "[INFO | chain] Execution API is healthy." + fi + + # If we reach this point, all checks have passed. + echo "[INFO | chain] All services are healthy. Exiting health check loop." + break + done +} + +run_chain() { + echo "[INFO | chain] Starting chain process..." + echo "[DEBUG | chain] Command: $SHUTTER_BIN chain --config \"$SHUTTER_CHAIN_CONFIG_FILE\"" $SHUTTER_BIN chain --config "$SHUTTER_CHAIN_CONFIG_FILE" } +# Run health checks first +perform_node_healthcheck + +# If everything is healthy, run the chain run_chain diff --git a/shutter/scripts/run_keyper.sh b/shutter/scripts/run_keyper.sh index b6f792c..358d0dc 100755 --- a/shutter/scripts/run_keyper.sh +++ b/shutter/scripts/run_keyper.sh @@ -1,26 +1,88 @@ #!/bin/bash +# To use staker scripts +# shellcheck disable=SC1091 +. /etc/profile + +SUPPORTED_NETWORKS="gnosis" + +# Set environment variables before running the health checks. +echo "[INFO | keyper] Setting environment variables for beacon API and execution API..." +export SHUTTER_BEACONAPIURL=$(get_beacon_api_url_from_global_env "$NETWORK" "$SUPPORTED_NETWORKS") +export SHUTTER_GNOSIS_NODE_CONTRACTSURL=http://execution.gnosis.dncore.dappnode:8545 + perform_chain_healthcheck() { - echo "[INFO | keyper] Waiting for chain to be healthy..." + echo "[INFO | keyper] Starting health check for chain, beacon API, and execution API..." while true; do - # Perform the health check - if curl -sf http://localhost:26657/status >/dev/null; then - echo "[INFO | keyper] Service is healthy. Exiting health check loop." - break # Exit the loop if the service is healthy + echo "[INFO | keyper] Checking chain health at: http://localhost:26657/status" + if ! curl -sf http://localhost:26657/status >/dev/null; then + echo "[WARN | keyper] Chain service is not healthy yet. Retrying in 30 seconds..." + sleep 30 + continue + else + echo "[INFO | keyper] Chain service is healthy." + fi + + # Check the syncing status using JSON-RPC + echo "[INFO | keyper] Checking execution API syncing status at: $SHUTTER_GNOSIS_NODE_CONTRACTSURL/eth/v1/node/syncing" + sync_status=$(curl -s -X POST "$SHUTTER_GNOSIS_NODE_CONTRACTSURL" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_syncing","params":[],"id":1}' | jq -r '.result') + + if [ "$sync_status" != "false" ]; then + echo "[WARN | keyper] Execution API is syncing. Sync status: $sync_status. Retrying in 30 seconds..." + sleep 30 + continue + else + echo "[INFO | keyper] Execution API is not syncing." + fi + + # Check the syncing status using JSON-RPC + echo "[INFO | keyper] Checking execution API syncing status at: $SHUTTER_GNOSIS_NODE_CONTRACTSURL/eth/v1/node/syncing" + + # Perform the request and capture the response + response=$(curl -s -w "%{http_code}" -o /tmp/sync_response.json -X POST "$SHUTTER_GNOSIS_NODE_CONTRACTSURL" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_syncing","params":[],"id":1}') + + # Extract HTTP status code and response body + http_code="${response: -3}" + response_body=$(cat /tmp/sync_response.json) + + # Check if the response body is empty or the HTTP status code is not 200 + if [ -z "$response_body" ] || [ "$http_code" -ne 200 ]; then + echo "[ERROR | keyper] No response or error from execution API. Not healthy" + sleep 30 + continue + fi + + # Parse the JSON response to check the syncing status + syncing_status=$(echo "$response_body" | jq -r '.result.is_syncing') + + # If syncing is true or an error occurs, treat it as not synced + if [ "$syncing_status" = "true" ]; then + echo "[WARN | keyper] Execution API is syncing. Sync status: $syncing_status. Retrying in 30 seconds..." + sleep 30 + continue else - echo "[INFO | keyper] Service is not healthy yet. Retrying in 30 seconds..." + echo "[INFO | keyper] Execution API is healthy." fi - # Wait for the next interval (30 seconds) - sleep 30 + # If we reach this point, all checks have passed. + echo "[INFO | keyper] All services are healthy. Exiting health check loop." + break done } run_keyper() { + echo "[INFO | keyper] Starting gnosiskeyper..." + echo "[DEBUG | keyper] Command: $SHUTTER_BIN gnosiskeyper --config \"$KEYPER_CONFIG_FILE\"" $SHUTTER_BIN gnosiskeyper --config "$KEYPER_CONFIG_FILE" } +# Run health checks first perform_chain_healthcheck +# If everything is healthy, run keyper run_keyper