Skip to content

Commit

Permalink
use runs-on for replay-verify workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
aluon committed Oct 4, 2024
1 parent 41e82f7 commit 6684481
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 43 deletions.
6 changes: 0 additions & 6 deletions .github/workflows/module-verify.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ jobs:
BUCKET: aptos-testnet-backup-2223d95b
SUB_DIR: e1
BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/s3-public.yaml
# workflow config
RUNS_ON: high-perf-docker-with-local-ssd
TIMEOUT_MINUTES: 20

verify-modules-mainnet:
Expand All @@ -51,8 +49,6 @@ jobs:
BUCKET: aptos-mainnet-backup-backup-831a69a8
SUB_DIR: e1
BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/s3-public.yaml
# workflow config
RUNS_ON: high-perf-docker-with-local-ssd
TIMEOUT_MINUTES: 20

test-verify-modules:
Expand All @@ -64,6 +60,4 @@ jobs:
BUCKET: aptos-testnet-backup-2223d95b
SUB_DIR: e1
BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/s3-public.yaml
# workflow config
RUNS_ON: "high-perf-docker-with-local-ssd"
TIMEOUT_MINUTES: 20
9 changes: 3 additions & 6 deletions .github/workflows/replay-verify.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ on:
schedule:
- cron: "0 22 * * 0,2,4" # The main branch cadence. This runs every Sun,Tues,Thurs

permissions:
id-token: write

# cancel redundant builds
concurrency:
# cancel redundant builds on PRs (only on PR, not on branches)
Expand Down Expand Up @@ -70,8 +73,6 @@ jobs:
# 1195000000-122000000: https://github.com/aptos-labs/aptos-core/pull/13832
RANGES_TO_SKIP: "1195000000-1220000000"
BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/gcs.yaml
# workflow config
RUNS_ON: "high-perf-docker-with-local-ssd"
TIMEOUT_MINUTES: 180
MAX_VERSIONS_PER_RANGE: 2000000

Expand All @@ -94,8 +95,6 @@ jobs:
# 1197378568-1198492648: https://github.com/aptos-labs/aptos-core/pull/13832
RANGES_TO_SKIP: "1197378568-1198492648"
BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/gcs.yaml
# workflow config
RUNS_ON: "high-perf-docker-with-local-ssd"
TIMEOUT_MINUTES: 180
MAX_VERSIONS_PER_RANGE: 800000

Expand All @@ -115,7 +114,5 @@ jobs:
# 1195000000-1220000000: https://github.com/aptos-labs/aptos-core/pull/13832
RANGES_TO_SKIP: "1195000000-1220000000"
BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/gcs.yaml
# workflow config
RUNS_ON: "high-perf-docker-with-local-ssd"
TIMEOUT_MINUTES: 120 # increase test replay timeout to capture more flaky errors
MAX_VERSIONS_PER_RANGE: 2000000
59 changes: 50 additions & 9 deletions .github/workflows/workflow-run-module-verify.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,57 @@ on:
description: "The path to the backup config template to use."
type: string
required: true
# GHA job config
RUNS_ON:
description: "The runner to use for the job."

on:
# This allows the workflow to be triggered from another workflow
workflow_call:
inputs:
GIT_SHA:
required: true
type: string
description: The git SHA1 to test.
# replay-verify config
BUCKET:
required: true
type: string
description: The bucket to use for the backup. If not specified, it will use the default bucket.
SUB_DIR:
required: true
type: string
description: The subdirectory to use for the backup. If not specified, it will use the default subdirectory.
HISTORY_START:
required: true
type: string
description: The history start to use for the backup. If not specified, it will use the default history start.
TXNS_TO_SKIP:
required: false
type: string
description: The list of transaction versions to skip. If not specified, it will use the default list.
RANGES_TO_SKIP:
required: false
type: string
description: The optional list of transaction ranges to skip..
BACKUP_CONFIG_TEMPLATE_PATH:
description: "The path to the backup config template to use."
type: string
required: true
TIMEOUT_MINUTES:
description: "Github job timeout in minutes"
type: number
required: true
default: 180
MAX_VERSIONS_PER_RANGE:
description: "The maximum number of versions to process in a single job."
type: number
required: true
default: "high-perf-docker-with-local-ssd"
# This allows the workflow to be triggered manually from the Github UI or CLI
# NOTE: because the "number" type is not supported, we default to 720 minute timeout
workflow_dispatch:
inputs:
GIT_SHA:
required: true
type: string
description: The git SHA1 to test.
TIMEOUT_MINUTES:
description: "Github job timeout in minutes"
type: number
Expand All @@ -36,16 +81,12 @@ jobs:
module-verify:
# if we're running on a PR, it's only for testing purposes, so we can set a shorter timeout
timeout-minutes: ${{ inputs.TIMEOUT_MINUTES }}
runs-on: ${{ inputs.RUNS_ON }}
runs-on: runs-on,cpu=96,family=c5d+c5ad,image=aptos-ubuntu-x64,run-id=${{ github.run_id }},spot=false
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.GIT_SHA }}

- uses: aptos-labs/aptos-core/.github/actions/rust-setup@main
with:
GIT_CREDENTIALS: ${{ secrets.GIT_CREDENTIALS }}

- name: Install AWS CLI
shell: bash
run: |
Expand Down
32 changes: 10 additions & 22 deletions .github/workflows/workflow-run-replay-verify.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,6 @@ on:
description: "The path to the backup config template to use."
type: string
required: true
# GHA job config
RUNS_ON:
description: "The runner to use for the job."
type: string
required: true
default: "high-perf-docker-with-local-ssd"
TIMEOUT_MINUTES:
description: "Github job timeout in minutes"
type: number
Expand Down Expand Up @@ -81,19 +75,13 @@ on:
description: "The path to the backup config template to use."
type: string
required: true
# GHA job config
RUNS_ON:
description: "The runner to use for the job."
type: string
required: true
default: "high-perf-docker-with-local-ssd"
MAX_VERSIONS_PER_RANGE:
description: "The maximum number of versions to process in a single job."
type: number
required: true
jobs:
prepare:
runs-on: ${{ inputs.RUNS_ON }}
runs-on: runs-on,cpu=96,family=c5d+c5ad,image=aptos-ubuntu-x64,run-id=${{ github.run_id }},spot=false
outputs:
job_ids: ${{ steps.gen-jobs.outputs.job_ids }}
steps:
Expand Down Expand Up @@ -126,11 +114,11 @@ jobs:
strip -s target/release/aptos-debugger
cp target/release/aptos-debugger .
- name: Install GCloud SDK
uses: "google-github-actions/setup-gcloud@v2"
- name: Authenticate with Google Cloud
uses: "google-github-actions/auth@v2"
with:
version: ">= 418.0.0"
install_components: "kubectl,gke-gcloud-auth-plugin"
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }}

- name: get timestamp to use in cache key
id: get-timestamp
Expand Down Expand Up @@ -178,7 +166,7 @@ jobs:
replay-verify:
needs: prepare
timeout-minutes: ${{ inputs.TIMEOUT_MINUTES || 180 }}
runs-on: ${{ inputs.RUNS_ON }}
runs-on: runs-on,cpu=96,family=c5d+c5ad,image=aptos-ubuntu-x64,run-id=${{ github.run_id }},spot=false
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -208,11 +196,11 @@ jobs:
key: backup-config-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ github.run_id }}
fail-on-cache-miss: true

- name: Install GCloud SDK
uses: "google-github-actions/setup-gcloud@v2"
- name: Authenticate with Google Cloud
uses: "google-github-actions/auth@v2"
with:
version: ">= 418.0.0"
install_components: "kubectl,gke-gcloud-auth-plugin"
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }}

- name: Run replay-verify in parallel
env:
Expand Down

0 comments on commit 6684481

Please sign in to comment.