diff --git a/.github/workflows/module-verify.yaml b/.github/workflows/module-verify.yaml index 17b99be413f9db..f9fc022ee6c717 100644 --- a/.github/workflows/module-verify.yaml +++ b/.github/workflows/module-verify.yaml @@ -38,8 +38,6 @@ jobs: BUCKET: aptos-testnet-backup-2223d95b SUB_DIR: e1 BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/s3-public.yaml - # workflow config - RUNS_ON: high-perf-docker-with-local-ssd TIMEOUT_MINUTES: 20 verify-modules-mainnet: @@ -51,8 +49,6 @@ jobs: BUCKET: aptos-mainnet-backup-backup-831a69a8 SUB_DIR: e1 BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/s3-public.yaml - # workflow config - RUNS_ON: high-perf-docker-with-local-ssd TIMEOUT_MINUTES: 20 test-verify-modules: @@ -64,6 +60,4 @@ jobs: BUCKET: aptos-testnet-backup-2223d95b SUB_DIR: e1 BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/s3-public.yaml - # workflow config - RUNS_ON: "high-perf-docker-with-local-ssd" TIMEOUT_MINUTES: 20 diff --git a/.github/workflows/replay-verify.yaml b/.github/workflows/replay-verify.yaml index ac498ad123fc49..710a54b38a1df5 100644 --- a/.github/workflows/replay-verify.yaml +++ b/.github/workflows/replay-verify.yaml @@ -35,6 +35,9 @@ on: schedule: - cron: "0 22 * * 0,2,4" # The main branch cadence. This runs every Sun,Tues,Thurs +permissions: + id-token: write + # cancel redundant builds concurrency: # cancel redundant builds on PRs (only on PR, not on branches) @@ -70,8 +73,6 @@ jobs: # 1195000000-122000000: https://github.com/aptos-labs/aptos-core/pull/13832 RANGES_TO_SKIP: "1195000000-1220000000" BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/gcs.yaml - # workflow config - RUNS_ON: "high-perf-docker-with-local-ssd" TIMEOUT_MINUTES: 180 MAX_VERSIONS_PER_RANGE: 2000000 @@ -94,8 +95,6 @@ jobs: # 1197378568-1198492648: https://github.com/aptos-labs/aptos-core/pull/13832 RANGES_TO_SKIP: "1197378568-1198492648" BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/gcs.yaml - # workflow config - RUNS_ON: "high-perf-docker-with-local-ssd" TIMEOUT_MINUTES: 180 MAX_VERSIONS_PER_RANGE: 800000 @@ -115,7 +114,5 @@ jobs: # 1195000000-1220000000: https://github.com/aptos-labs/aptos-core/pull/13832 RANGES_TO_SKIP: "1195000000-1220000000" BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/gcs.yaml - # workflow config - RUNS_ON: "high-perf-docker-with-local-ssd" TIMEOUT_MINUTES: 120 # increase test replay timeout to capture more flaky errors MAX_VERSIONS_PER_RANGE: 2000000 diff --git a/.github/workflows/workflow-run-module-verify.yaml b/.github/workflows/workflow-run-module-verify.yaml index de16bbade2ffe5..f22a06c03894f3 100644 --- a/.github/workflows/workflow-run-module-verify.yaml +++ b/.github/workflows/workflow-run-module-verify.yaml @@ -20,12 +20,57 @@ on: description: "The path to the backup config template to use." type: string required: true - # GHA job config - RUNS_ON: - description: "The runner to use for the job." + +on: + # This allows the workflow to be triggered from another workflow + workflow_call: + inputs: + GIT_SHA: + required: true + type: string + description: The git SHA1 to test. + # replay-verify config + BUCKET: + required: true + type: string + description: The bucket to use for the backup. If not specified, it will use the default bucket. + SUB_DIR: + required: true + type: string + description: The subdirectory to use for the backup. If not specified, it will use the default subdirectory. + HISTORY_START: + required: true + type: string + description: The history start to use for the backup. If not specified, it will use the default history start. + TXNS_TO_SKIP: + required: false + type: string + description: The list of transaction versions to skip. If not specified, it will use the default list. + RANGES_TO_SKIP: + required: false type: string + description: The optional list of transaction ranges to skip.. + BACKUP_CONFIG_TEMPLATE_PATH: + description: "The path to the backup config template to use." + type: string + required: true + TIMEOUT_MINUTES: + description: "Github job timeout in minutes" + type: number + required: true + default: 180 + MAX_VERSIONS_PER_RANGE: + description: "The maximum number of versions to process in a single job." + type: number required: true - default: "high-perf-docker-with-local-ssd" + # This allows the workflow to be triggered manually from the Github UI or CLI + # NOTE: because the "number" type is not supported, we default to 720 minute timeout + workflow_dispatch: + inputs: + GIT_SHA: + required: true + type: string + description: The git SHA1 to test. TIMEOUT_MINUTES: description: "Github job timeout in minutes" type: number @@ -36,16 +81,12 @@ jobs: module-verify: # if we're running on a PR, it's only for testing purposes, so we can set a shorter timeout timeout-minutes: ${{ inputs.TIMEOUT_MINUTES }} - runs-on: ${{ inputs.RUNS_ON }} + runs-on: runs-on,cpu=96,family=c5d+c5ad,image=aptos-ubuntu-x64,run-id=${{ github.run_id }},spot=false steps: - uses: actions/checkout@v4 with: ref: ${{ inputs.GIT_SHA }} - - uses: aptos-labs/aptos-core/.github/actions/rust-setup@main - with: - GIT_CREDENTIALS: ${{ secrets.GIT_CREDENTIALS }} - - name: Install AWS CLI shell: bash run: | diff --git a/.github/workflows/workflow-run-replay-verify.yaml b/.github/workflows/workflow-run-replay-verify.yaml index 40788a9fa0e281..af3496df0ecc97 100644 --- a/.github/workflows/workflow-run-replay-verify.yaml +++ b/.github/workflows/workflow-run-replay-verify.yaml @@ -33,12 +33,6 @@ on: description: "The path to the backup config template to use." type: string required: true - # GHA job config - RUNS_ON: - description: "The runner to use for the job." - type: string - required: true - default: "high-perf-docker-with-local-ssd" TIMEOUT_MINUTES: description: "Github job timeout in minutes" type: number @@ -81,19 +75,13 @@ on: description: "The path to the backup config template to use." type: string required: true - # GHA job config - RUNS_ON: - description: "The runner to use for the job." - type: string - required: true - default: "high-perf-docker-with-local-ssd" MAX_VERSIONS_PER_RANGE: description: "The maximum number of versions to process in a single job." type: number required: true jobs: prepare: - runs-on: ${{ inputs.RUNS_ON }} + runs-on: runs-on,cpu=96,family=c5d+c5ad,image=aptos-ubuntu-x64,run-id=${{ github.run_id }},spot=false outputs: job_ids: ${{ steps.gen-jobs.outputs.job_ids }} steps: @@ -126,11 +114,11 @@ jobs: strip -s target/release/aptos-debugger cp target/release/aptos-debugger . - - name: Install GCloud SDK - uses: "google-github-actions/setup-gcloud@v2" + - name: Authenticate with Google Cloud + uses: "google-github-actions/auth@v2" with: - version: ">= 418.0.0" - install_components: "kubectl,gke-gcloud-auth-plugin" + workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} + service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} - name: get timestamp to use in cache key id: get-timestamp @@ -178,7 +166,7 @@ jobs: replay-verify: needs: prepare timeout-minutes: ${{ inputs.TIMEOUT_MINUTES || 180 }} - runs-on: ${{ inputs.RUNS_ON }} + runs-on: runs-on,cpu=96,family=c5d+c5ad,image=aptos-ubuntu-x64,run-id=${{ github.run_id }},spot=false strategy: fail-fast: false matrix: @@ -208,11 +196,11 @@ jobs: key: backup-config-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ github.run_id }} fail-on-cache-miss: true - - name: Install GCloud SDK - uses: "google-github-actions/setup-gcloud@v2" + - name: Authenticate with Google Cloud + uses: "google-github-actions/auth@v2" with: - version: ">= 418.0.0" - install_components: "kubectl,gke-gcloud-auth-plugin" + workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} + service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} - name: Run replay-verify in parallel env: