Merge branch 'Significant-Gravitas:master' into master #2

Workflow file for this run

.github/workflows/benchmark-ci.yml at ee4ded3

	name: AGBenchmark CI

	on:
	push:
	branches: [ master, development, ci-test* ]
	paths:
	- 'benchmark/**'
	- .github/workflows/benchmark-ci.yml
	- '!benchmark/reports/**'
	pull_request:
	branches: [ master, development, release-* ]
	paths:
	- 'benchmark/**'
	- '!benchmark/reports/**'
	- .github/workflows/benchmark-ci.yml

	concurrency:
	group: ${{ format('benchmark-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) \|\| github.sha) }}
	cancel-in-progress: ${{ startsWith(github.event_name, 'pull_request') }}

	defaults:
	run:
	shell: bash

	env:
	min-python-version: '3.10'

	jobs:
	test:
	permissions:
	contents: read
	timeout-minutes: 30
	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.10"]
	platform-os: [ubuntu, macos, macos-arm64, windows]
	runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) \|\| 'macos-14' }}
	defaults:
	run:
	shell: bash
	working-directory: benchmark
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: true

	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python-version }}

	- name: Set up Python dependency cache
	# On Windows, unpacking cached dependencies takes longer than just installing them
	if: runner.os != 'Windows'
	uses: actions/cache@v4
	with:
	path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' \|\| '~/.cache/pypoetry' }}
	key: poetry-${{ runner.os }}-${{ hashFiles('benchmark/poetry.lock') }}

	- name: Install Poetry (Unix)
	if: runner.os != 'Windows'
	run: \|
	curl -sSL https://install.python-poetry.org \| python3 -

	if [ "${{ runner.os }}" = "macOS" ]; then
	PATH="$HOME/.local/bin:$PATH"
	echo "$HOME/.local/bin" >> $GITHUB_PATH
	fi

	- name: Install Poetry (Windows)
	if: runner.os == 'Windows'
	shell: pwsh
	run: \|
	(Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content \| python -

	$env:PATH += ";$env:APPDATA\Python\Scripts"
	echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH

	- name: Install Python dependencies
	run: poetry install

	- name: Run pytest with coverage
	run: \|
	poetry run pytest -vv \
	--cov=agbenchmark --cov-branch --cov-report term-missing --cov-report xml \
	--durations=10 \
	tests
	env:
	CI: true
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

	- name: Upload coverage reports to Codecov
	uses: codecov/codecov-action@v4
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	flags: agbenchmark,${{ runner.os }}

	self-test-with-agent:
	runs-on: ubuntu-latest
	strategy:
	matrix:
	agent-name: [ forge ]
	fail-fast: false
	timeout-minutes: 20
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: true

	- name: Set up Python ${{ env.min-python-version }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.min-python-version }}

	- name: Install Poetry
	run: \|
	curl -sSL https://install.python-poetry.org \| python -

	- name: Run regression tests
	working-directory: .
	run: \|
	./run agent start ${{ matrix.agent-name }}
	cd ${{ matrix.agent-name }}

	set +e # Ignore non-zero exit codes and continue execution
	echo "Running the following command: poetry run agbenchmark --maintain --mock"
	poetry run agbenchmark --maintain --mock
	EXIT_CODE=$?
	set -e # Stop ignoring non-zero exit codes
	# Check if the exit code was 5, and if so, exit with 0 instead
	if [ $EXIT_CODE -eq 5 ]; then
	echo "regression_tests.json is empty."
	fi

	echo "Running the following command: poetry run agbenchmark --mock"
	poetry run agbenchmark --mock

	echo "Running the following command: poetry run agbenchmark --mock --category=data"
	poetry run agbenchmark --mock --category=data

	echo "Running the following command: poetry run agbenchmark --mock --category=coding"
	poetry run agbenchmark --mock --category=coding

	echo "Running the following command: poetry run agbenchmark --test=WriteFile"
	poetry run agbenchmark --test=WriteFile
	cd ../benchmark
	poetry install
	echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"
	export BUILD_SKILL_TREE=true

	poetry run agbenchmark --mock

	CHANGED=$(git diff --name-only \| grep -E '(agbenchmark/challenges)\|(../frontend/assets)') \|\| echo "No diffs"
	if [ ! -z "$CHANGED" ]; then
	echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed."
	echo "$CHANGED"
	exit 1
	else
	echo "No unstaged changes."
	fi
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci
	TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Merge branch 'Significant-Gravitas:master' into master #2

Workflow file

Merge branch 'Significant-Gravitas:master' into master #2

Jobs

Run details

Workflow file for this run