[gha] Refactor Forge Stable to run individual tests #1469

Workflow file for this run

.github/workflows/forge-stable.yaml at b4e9a55

	# Continuously run stable forge tests against the latest main branch.
	name: Continuous Forge Tests - Stable

	permissions:
	issues: write
	pull-requests: write
	contents: read
	id-token: write
	actions: write #required for workflow cancellation via check-aptos-core

	concurrency:
	group: forge-stable-${{ github.ref_name }}
	cancel-in-progress: true

	on:
	# Allow triggering manually
	workflow_dispatch:
	inputs:
	IMAGE_TAG:
	required: false
	type: string
	description: The docker image tag to test. This may be a git SHA1, or a tag like "<branch>_<git SHA1>". If not specified, Forge will find the latest build based on the git history (starting from GIT_SHA input)
	GIT_SHA:
	required: false
	type: string
	description: The git SHA1 to checkout. This affects the Forge test runner that is used. If not specified, the latest main will be used
	JOB_NAME:
	required: false
	type: choice
	description: The job to run. If all, all jobs will be run
	default: all
	options:
	- all
	- framework-upgrade-test
	- realistic-env-load-sweep
	- realistic-env-workload-sweep
	- realistic-env-graceful-overload
	- realistic-env-graceful-workload-sweep
	- realistic-env-fairness-workload-sweep
	- realistic-network-tuned-for-throughput
	- consensus-stress-test
	- workload-mix-test
	- single-vfn-perf
	- fullnode-reboot-stress-test
	- compat
	- changing-working-quorum-test
	- changing-working-quorum-test-high-load
	- pfn-const-tps-realistic-env
	- realistic-env-max-load-long

	# NOTE: to support testing different branches on different schedules, you need to specify the cron schedule in the 'determine-test-branch' step as well below
	# Reference: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#schedule
	schedule:
	- cron: "0 22 * * 0,2,4" # The main branch cadence. This runs every Sun,Tues,Thurs
	pull_request:
	paths:
	- ".github/workflows/forge-stable.yaml"
	- "testsuite/find_latest_image.py"

	env:
	AWS_ACCOUNT_NUM: ${{ secrets.ENV_ECR_AWS_ACCOUNT_NUM }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	IMAGE_TAG: ${{ inputs.IMAGE_TAG }} # this is only used for workflow_dispatch, otherwise defaults to empty
	AWS_REGION: us-west-2

	jobs:
	# This job determines the image tag and branch to test, and passes them to the other jobs
	# NOTE: this may be better as a separate workflow as the logic is quite complex but generalizable
	determine-test-metadata:
	runs-on: ubuntu-latest
	outputs:
	IMAGE_TAG: ${{ steps.get-docker-image-tag.outputs.IMAGE_TAG }}
	IMAGE_TAG_FOR_COMPAT_TEST: ${{ steps.get-last-released-image-tag-for-compat-test.outputs.IMAGE_TAG }}
	BRANCH: ${{ steps.determine-test-branch.outputs.BRANCH }}
	BRANCH_HASH: ${{ steps.hash-branch.outputs.BRANCH_HASH }}
	steps:
	- uses: actions/checkout@v4

	- name: Determine branch based on cadence
	id: determine-test-branch
	# NOTE: the schedule cron MUST match the one in the 'on.schedule.cron' section above
	run: \|
	if [[ "${{ github.event_name }}" == "schedule" ]]; then
	if [[ "${{ github.event.schedule }}" == "0 22 * * 0,2,4" ]]; then
	echo "Branch: main"
	echo "BRANCH=main" >> $GITHUB_OUTPUT
	else
	echo "Unknown schedule: ${{ github.event.schedule }}"
	exit 1
	fi
	elif [[ "${{ github.event_name }}" == "push" ]]; then
	echo "Branch: ${{ github.ref_name }}"
	echo "BRANCH=${{ github.ref_name }}" >> $GITHUB_OUTPUT
	# on workflow_dispatch, this will simply use the inputs.GIT_SHA given (or the default)
	elif [[ -n "${{ inputs.GIT_SHA }}" ]]; then
	echo "BRANCH=${{ inputs.GIT_SHA }}" >> $GITHUB_OUTPUT
	# if GIT_SHA not provided, use the branch where workflow runs on
	else
	echo "BRANCH=${{ github.head_ref }}" >> $GITHUB_OUTPUT
	fi
	# Use the branch hash instead of the full branch name to stay under kubernetes namespace length limit
	- name: Hash the branch
	id: hash-branch
	run: \|
	# Hashing the branch name
	echo "BRANCH_HASH=$(echo -n "${{ steps.determine-test-branch.outputs.BRANCH }}" \| sha256sum \| cut -c1-10)" >> $GITHUB_OUTPUT

	- uses: aptos-labs/aptos-core/.github/actions/check-aptos-core@main
	with:
	cancel-workflow: ${{ github.event_name == 'schedule' }} # Cancel the workflow if it is scheduled on a fork

	# actions/get-latest-docker-image-tag requires docker utilities and having authenticated to internal docker image registries
	- uses: aptos-labs/aptos-core/.github/actions/docker-setup@main
	id: docker-setup
	with:
	GCP_WORKLOAD_IDENTITY_PROVIDER: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
	GCP_SERVICE_ACCOUNT_EMAIL: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }}
	EXPORT_GCP_PROJECT_VARIABLES: "false"
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	AWS_DOCKER_ARTIFACT_REPO: ${{ secrets.AWS_DOCKER_ARTIFACT_REPO }}
	GIT_CREDENTIALS: ${{ secrets.GIT_CREDENTIALS }}

	- uses: aptos-labs/aptos-core/.github/actions/get-latest-docker-image-tag@main
	id: get-docker-image-tag
	with:
	branch: ${{ steps.determine-test-branch.outputs.BRANCH }}
	variants: "failpoints performance"

	- uses: aptos-labs/aptos-core/.github/actions/determine-or-use-target-branch-and-get-last-released-image@main
	id: get-last-released-image-tag-for-compat-test
	with:
	base-branch: ${{ steps.determine-test-branch.outputs.BRANCH }}
	variants: "failpoints performance"

	- name: Write summary
	run: \|
	IMAGE_TAG=${{ steps.get-docker-image-tag.outputs.IMAGE_TAG }}
	IMAGE_TAG_FOR_COMPAT_TEST=${{ steps.get-last-released-image-tag-for-compat-test.outputs.IMAGE_TAG }}
	TARGET_BRANCH_TO_FETCH_IMAGE_FOR_COMPAT_TEST=${{ steps.get-last-released-image-tag-for-compat-test.outputs.TARGET_BRANCH }}
	BRANCH=${{ steps.determine-test-branch.outputs.BRANCH }}
	if [ -n "${BRANCH}" ]; then
	echo "BRANCH: [${BRANCH}](https://github.com/${{ github.repository }}/tree/${BRANCH})" >> $GITHUB_STEP_SUMMARY
	fi
	echo "IMAGE_TAG: [${IMAGE_TAG}](https://github.com/${{ github.repository }}/commit/${IMAGE_TAG})" >> $GITHUB_STEP_SUMMARY
	echo "To cancel this job, do `pnpm infra ci cancel-workflow ${{ github.run_id }}` from internal-ops" >> $GITHUB_STEP_SUMMARY

	run:
	needs: [determine-test-metadata]
	strategy:
	fail-fast: false
	max-parallel: 1
	matrix:
	include:
	- TEST_NAME: framework-upgrade-test
	FORGE_RUNNER_DURATION_SECS: 7200
	FORGE_TEST_SUITE: framework_upgrade
	- TEST_NAME: realistic-env-load-sweep
	FORGE_RUNNER_DURATION_SECS: 1800
	FORGE_TEST_SUITE: realistic_env_load_sweep
	- TEST_NAME: realistic-env-workload-sweep
	FORGE_RUNNER_DURATION_SECS: 2000
	FORGE_TEST_SUITE: realistic_env_workload_sweep
	- TEST_NAME: realistic-env-graceful-overload
	FORGE_RUNNER_DURATION_SECS: 1200
	FORGE_TEST_SUITE: realistic_env_graceful_overload
	- TEST_NAME: realistic-env-graceful-workload-sweep
	FORGE_RUNNER_DURATION_SECS: 2100
	FORGE_TEST_SUITE: realistic_env_graceful_workload_sweep
	- TEST_NAME: realistic-env-fairness-workload-sweep
	FORGE_RUNNER_DURATION_SECS: 900
	FORGE_TEST_SUITE: realistic_env_fairness_workload_sweep
	- TEST_NAME: realistic-network-tuned-for-throughput
	FORGE_RUNNER_DURATION_SECS: 900
	FORGE_TEST_SUITE: realistic_network_tuned_for_throughput
	FORGE_ENABLE_PERFORMANCE: true
	- TEST_NAME: consensus-stress-test
	FORGE_RUNNER_DURATION_SECS: 2400
	FORGE_TEST_SUITE: consensus_stress_test
	- TEST_NAME: workload-mix-test
	FORGE_RUNNER_DURATION_SECS: 900
	FORGE_TEST_SUITE: workload_mix
	- TEST_NAME: single-vfn-perf
	FORGE_RUNNER_DURATION_SECS: 480
	FORGE_TEST_SUITE: single_vfn_perf
	- TEST_NAME: fullnode-reboot-stress-test
	FORGE_RUNNER_DURATION_SECS: 1800
	FORGE_TEST_SUITE: fullnode_reboot_stress_test
	- TEST_NAME: compat
	FORGE_RUNNER_DURATION_SECS: 300
	FORGE_TEST_SUITE: compat
	- TEST_NAME: changing-working-quorum-test
	FORGE_RUNNER_DURATION_SECS: 1200
	FORGE_TEST_SUITE: changing_working_quorum_test
	FORGE_ENABLE_FAILPOINTS: true
	- TEST_NAME: changing-working-quorum-test-high-load
	FORGE_RUNNER_DURATION_SECS: 900
	FORGE_TEST_SUITE: changing_working_quorum_test_high_load
	FORGE_ENABLE_FAILPOINTS: true
	- TEST_NAME: pfn-const-tps-realistic-env
	FORGE_RUNNER_DURATION_SECS: 900
	FORGE_TEST_SUITE: pfn_const_tps_with_realistic_env
	- TEST_NAME: realistic-env-max-load-long
	FORGE_RUNNER_DURATION_SECS: 7200
	FORGE_TEST_SUITE: realistic_env_max_load_large
	uses: aptos-labs/aptos-core/.github/workflows/workflow-run-forge.yaml@main
	with:
	IMAGE_TAG: ${{ ((matrix.FORGE_TEST_SUITE == 'compat' && needs.determine-test-metadata.outputs.IMAGE_TAG_FOR_COMPAT_TEST) \|\| needs.determine-test-metadata.outputs.IMAGE_TAG) }}
	FORGE_NAMESPACE: forge-${{ matrix.TEST_NAME }}-${{ needs.determine-test-metadata.outputs.BRANCH_HASH }}
	FORGE_TEST_SUITE: ${{ matrix.FORGE_TEST_SUITE }}
	FORGE_RUNNER_DURATION_SECS: ${{ matrix.FORGE_RUNNER_DURATION_SECS }}
	FORGE_ENABLE_PERFORMANCE: ${{ matrix.FORGE_ENABLE_PERFORMANCE \|\| false }}
	FORGE_ENABLE_FAILPOINTS: ${{ matrix.FORGE_ENABLE_FAILPOINTS \|\| false }}
	SKIP_JOB: ${{ github.event.inputs.JOB_NAME != 'all' && matrix.job != github.event.inputs.JOB_NAME }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[gha] Refactor Forge Stable to run individual tests #1469

Workflow file

[gha] Refactor Forge Stable to run individual tests #1469

Jobs

Run details

Workflow file for this run