Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into HEAD
Browse files Browse the repository at this point in the history
  • Loading branch information
sellout committed Sep 29, 2024
2 parents 8449878 + ac1242a commit 0438575
Show file tree
Hide file tree
Showing 70 changed files with 8,296 additions and 507 deletions.
107 changes: 69 additions & 38 deletions .github/workflows/cd-deploy-nodes-gcp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,27 +42,26 @@ on:
type: boolean
default: false

# TODO: Temporarily disabled to reduce network load, see #6894.
#push:
# # Skip main branch updates where Rust code and dependencies aren't modified.
# branches:
# - main
# paths:
# # code and tests
# - '**/*.rs'
# # hard-coded checkpoints and proptest regressions
# - '**/*.txt'
# # dependencies
# - '**/Cargo.toml'
# - '**/Cargo.lock'
# # configuration files
# - '.cargo/config.toml'
# - '**/clippy.toml'
# # workflow definitions
# - 'docker/**'
# - '.dockerignore'
# - '.github/workflows/cd-deploy-nodes-gcp.yml'
# - '.github/workflows/sub-build-docker-image.yml'
push:
# Skip main branch updates where Rust code and dependencies aren't modified.
branches:
- main
paths:
# code and tests
- '**/*.rs'
# hard-coded checkpoints and proptest regressions
- '**/*.txt'
# dependencies
- '**/Cargo.toml'
- '**/Cargo.lock'
# configuration files
- '.cargo/config.toml'
- '**/clippy.toml'
# workflow definitions
- 'docker/**'
- '.dockerignore'
- '.github/workflows/cd-deploy-nodes-gcp.yml'
- '.github/workflows/sub-build-docker-image.yml'

# Only runs the Docker image tests, doesn't deploy any instances
pull_request:
Expand Down Expand Up @@ -176,6 +175,19 @@ jobs:
test_variables: '-e NETWORK -e ZEBRA_CONF_PATH="zebrad/tests/common/configs/v1.0.0-rc.2.toml"'
network: ${{ inputs.network || vars.ZCASH_NETWORK }}

# Finds a `tip` cached state disk for zebra from the main branch
#
# Passes the disk name to subsequent jobs using `cached_disk_name` output
#
get-disk-name:
name: Get disk name
uses: ./.github/workflows/sub-find-cached-disks.yml
with:
network: ${{ inputs.network || vars.ZCASH_NETWORK }}
disk_prefix: zebrad-cache
disk_suffix: tip
prefer_main_cached_state: true

# Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet,
# with one node in the configured GCP region.
#
Expand All @@ -196,9 +208,11 @@ jobs:
matrix:
network: [Mainnet, Testnet]
name: Deploy ${{ matrix.network }} nodes
needs: [ build, versioning, test-configuration-file, test-zebra-conf-path ]
needs: [ build, versioning, test-configuration-file, test-zebra-conf-path, get-disk-name ]
runs-on: ubuntu-latest
timeout-minutes: 60
env:
CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }}
permissions:
contents: 'read'
id-token: 'write'
Expand Down Expand Up @@ -240,24 +254,31 @@ jobs:
# but the implementation is failing as it's requiring the disk names, contrary to what is stated in the official documentation
- name: Create instance template for ${{ matrix.network }}
run: |
NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd"
if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
else
echo "No cached disk found for ${{ matrix.network }} in main branch"
exit 1
fi
gcloud compute instance-templates create-with-container zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK} \
--boot-disk-size 300GB \
--machine-type ${{ vars.GCP_SMALL_MACHINE }} \
--boot-disk-size 50GB \
--boot-disk-type=pd-ssd \
--image-project=cos-cloud \
--image-family=cos-stable \
--user-output-enabled \
--metadata google-logging-enabled=true,google-logging-use-fluentbit=true,google-monitoring-enabled=true \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--create-disk="${DISK_PARAMS}" \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \
--container-stdin \
--container-tty \
--container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
--container-env "NETWORK=${{ matrix.network }},LOG_FILE=${{ vars.CD_LOG_FILE }},LOG_COLOR=false,SENTRY_DSN=${{ vars.SENTRY_DSN }}" \
--create-disk=name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},device-name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},auto-delete=yes,size=300GB,type=pd-ssd,mode=rw \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},mode=rw \
--machine-type ${{ vars.GCP_SMALL_MACHINE }} \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--service-account ${{ vars.GCP_DEPLOYMENTS_SA }} \
--scopes cloud-platform \
--labels=app=zebrad,environment=prod,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \
--metadata google-logging-enabled=true,google-logging-use-fluentbit=true,google-monitoring-enabled=true \
--labels=app=zebrad,environment=staging,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \
--tags zebrad
# Check if our destination instance group exists already
Expand Down Expand Up @@ -297,9 +318,11 @@ jobs:
# Note: this instances are not automatically replaced or deleted
deploy-instance:
name: Deploy single ${{ inputs.network }} instance
needs: [ build, test-configuration-file, test-zebra-conf-path ]
needs: [ build, test-configuration-file, test-zebra-conf-path, get-disk-name ]
runs-on: ubuntu-latest
timeout-minutes: 30
env:
CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }}
permissions:
contents: 'read'
id-token: 'write'
Expand Down Expand Up @@ -340,22 +363,30 @@ jobs:
# Create instance template from container image
- name: Manual deploy of a single ${{ inputs.network }} instance running zebrad
run: |
NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd"
if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
else
echo "No cached disk found for ${{ matrix.network }} in main branch"
exit 1
fi
gcloud compute instances create-with-container "zebrad-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" \
--boot-disk-size 300GB \
--machine-type ${{ vars.GCP_SMALL_MACHINE }} \
--boot-disk-size 50GB \
--boot-disk-type=pd-ssd \
--image-project=cos-cloud \
--image-family=cos-stable \
--user-output-enabled \
--metadata google-logging-enabled=true,google-logging-use-fluentbit=true,google-monitoring-enabled=true \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--create-disk="${DISK_PARAMS}" \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \
--container-stdin \
--container-tty \
--container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
--container-env "NETWORK=${{ inputs.network }},LOG_FILE=${{ inputs.log_file }},LOG_COLOR=false,SENTRY_DSN=${{ vars.SENTRY_DSN }}" \
--create-disk=name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},device-name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},auto-delete=yes,size=300GB,type=pd-ssd,mode=rw \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},mode=rw \
--machine-type ${{ vars.GCP_SMALL_MACHINE }} \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--service-account ${{ vars.GCP_DEPLOYMENTS_SA }} \
--scopes cloud-platform \
--metadata google-logging-enabled=true,google-monitoring-enabled=true \
--labels=app=zebrad,environment=qa,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \
--tags zebrad \
--zone ${{ vars.GCP_ZONE }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:

- name: Rust files
id: changed-files-rust
uses: tj-actions/[email protected].0
uses: tj-actions/[email protected].2
with:
files: |
**/*.rs
Expand All @@ -56,7 +56,7 @@ jobs:
- name: Workflow files
id: changed-files-workflows
uses: tj-actions/[email protected].0
uses: tj-actions/[email protected].2
with:
files: |
.github/workflows/*.yml
Expand Down
42 changes: 0 additions & 42 deletions .github/workflows/scripts/gcp-get-available-disks.sh

This file was deleted.

114 changes: 76 additions & 38 deletions .github/workflows/scripts/gcp-get-cached-disks.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,33 @@
#!/usr/bin/env bash

# Description:
# This script finds a cached Google Cloud Compute image based on specific criteria.
# It prioritizes images from the current commit, falls back to the main branch,
# and finally checks other branches if needed. The selected image is used for
# setting up the environment in a CI/CD pipeline.
#
# If there are multiple disks:
# - prefer images generated from the same commit, then
# - if prefer_main_cached_state is true, prefer images from the `main` branch, then
# - use any images from any other branch or commit.
#
# Within each of these categories:
# - prefer newer images to older images
#
# The selected image is used for setting up the environment in a CI/CD pipeline.
# It also checks if specific disk types are available for subsequent jobs.

set -eo pipefail

# Function to find and report a cached disk image
# Extract local state version
echo "Extracting local state version..."
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: ${LOCAL_STATE_VERSION}"

# Function to find a cached disk image based on the git pattern (commit, main, or any branch)
find_cached_disk_image() {
local search_pattern="${1}"
local git_pattern="${1}"
local git_source="${2}"
local disk_name
local disk_search_pattern="${DISK_PREFIX}-${git_pattern}-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"

disk_name=$(gcloud compute images list --filter="status=READY AND name~${search_pattern}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_search_pattern}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)

# Use >&2 to redirect to stderr and avoid sending wrong assignments to stdout
if [[ -n "${disk_name}" ]]; then
Expand All @@ -27,46 +40,71 @@ find_cached_disk_image() {
fi
}

# Extract local state version
echo "Extracting local state version..."
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: ${LOCAL_STATE_VERSION}"
# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image
if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then
# Find the most suitable cached disk image
echo "Finding the most suitable cached disk image..."
CACHED_DISK_NAME=""

# First, try to find a cached disk image from the current commit
CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_SHA_SHORT}" "commit")

# Define DISK_PREFIX based on the requiring state directory
if [[ "${NEEDS_LWD_STATE}" == "true" ]]; then
DISK_PREFIX="${LWD_STATE_DIR}"
# If no cached disk image is found
if [[ -z "${CACHED_DISK_NAME}" ]]; then
# Check if main branch images are preferred
if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch")
# Else, try to find one from any branch
else
CACHED_DISK_NAME=$(find_cached_disk_image ".+-[0-9a-f]+" "any branch")
fi
fi

# Handle case where no suitable disk image is found
if [[ -z "${CACHED_DISK_NAME}" ]]; then
echo "No suitable cached state disk available."
echo "Cached state test jobs must depend on the cached state rebuild job."
exit 1
fi

echo "Selected Disk: ${CACHED_DISK_NAME}"
else
DISK_PREFIX="${ZEBRA_STATE_DIR:-${DISK_PREFIX}}"
echo "DISK_PREFIX or DISK_SUFFIX is not set. Skipping disk image search."
fi

# Find the most suitable cached disk image
echo "Finding the most suitable cached disk image..."
if [[ -z "${CACHED_DISK_NAME}" ]]; then
# Try to find a cached disk image from the current commit
COMMIT_DISK_PREFIX="${DISK_PREFIX}-.+-${GITHUB_SHA_SHORT}-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
CACHED_DISK_NAME=$(find_cached_disk_image "${COMMIT_DISK_PREFIX}" "commit")
# If no cached disk image is found, try to find one from the main branch
if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
MAIN_DISK_PREFIX="${DISK_PREFIX}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
CACHED_DISK_NAME=$(find_cached_disk_image "${MAIN_DISK_PREFIX}" "main branch")
# Else, try to find one from any branch
# Function to find and output available disk image types (e.g., lwd_tip_disk, zebra_tip_disk, zebra_checkpoint_disk)
find_available_disk_type() {
local base_name="${1}"
local disk_type="${2}"
local disk_pattern="${base_name}-cache"
local output_var="${base_name}_${disk_type}_disk"
local disk_name

disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_pattern}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${disk_type}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)

# Use >&2 to redirect to stderr and avoid sending wrong assignments to stdout
if [[ -n "${disk_name}" ]]; then
echo "Found ${disk_type^^} disk: ${disk_name} for ${base_name^^} on network: ${NETWORK}" >&2
disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)")
echo "Description: ${disk_description}" >&2
echo "true" # This is the actual return value when a disk is found
else
ANY_DISK_PREFIX="${DISK_PREFIX}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
CACHED_DISK_NAME=$(find_cached_disk_image "${ANY_DISK_PREFIX}" "any branch")
echo "No ${disk_type^^} disk found for ${base_name^^} on network: ${NETWORK}" >&2
echo "false" # This is the actual return value when no disk is found
fi
}
if [[ -n "${NETWORK}" ]]; then
# Check for specific disk images (lwd_tip_disk, zebra_tip_disk, zebra_checkpoint_disk)
echo "Checking for specific disk images..."
LWD_TIP_DISK=$(find_available_disk_type "lwd" "tip")
ZEBRA_TIP_DISK=$(find_available_disk_type "zebrad" "tip")
ZEBRA_CHECKPOINT_DISK=$(find_available_disk_type "zebrad" "checkpoint")
fi

# Handle case where no suitable disk image is found
if [[ -z "${CACHED_DISK_NAME}" ]]; then
echo "No suitable cached state disk available."
echo "Expected pattern: ${COMMIT_DISK_PREFIX}"
echo "Cached state test jobs must depend on the cached state rebuild job."
exit 1
fi

echo "Selected Disk: ${CACHED_DISK_NAME}"

# Exporting variables for subsequent steps
echo "Exporting variables for subsequent steps..."
export CACHED_DISK_NAME="${CACHED_DISK_NAME}"
export LOCAL_STATE_VERSION="${LOCAL_STATE_VERSION}"
export LWD_TIP_DISK="${LWD_TIP_DISK}"
export ZEBRA_TIP_DISK="${ZEBRA_TIP_DISK}"
export ZEBRA_CHECKPOINT_DISK="${ZEBRA_CHECKPOINT_DISK}"
Loading

0 comments on commit 0438575

Please sign in to comment.