Skip to content

AWS Scheduled Cleanup of test regions #266

AWS Scheduled Cleanup of test regions

AWS Scheduled Cleanup of test regions #266

---
name: AWS Scheduled Cleanup of test regions
on:
schedule:
# Single daily schedule at 5:00
- cron: 0 5 * * *
workflow_dispatch:
inputs:
region:
description: AWS Region to clean up
default: eu-west-2
cleanup_older_than:
description: Minimum age of the resources to cleanup
default: 12h
pull_request:
paths:
- .github/workflows/aws_nightly_cleanup.yml
- .github/workflows/scripts/aws_global_cleanup.sh
- .github/workflows/scripts/aws_regional_cleanup.sh
env:
AWS_PROFILE: infex
# renovate: datasource=github-tags depName=gruntwork-io/cloud-nuke
CLOUD_NUKE_VERSION: v0.37.2
# Limit workflow to a single execution per ref
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
aws-cleanup:
runs-on: ubuntu-latest
strategy:
fail-fast: false # don't propagate failing jobs
matrix:
# Define regions and types of cleanup based on day
# Please also update the README.md of the project
config:
- region: eu-west-2
day: All
cleanup_older_than: 12h
- region: eu-west-3
day: All
cleanup_older_than: 12h
- region: eu-north-1
day: Saturday
cleanup_older_than: 0h
- region: us-east-1
day: Saturday
cleanup_older_than: 0h
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Determine AWS_REGION and CLEANUP_OLDER_THAN
id: determine-values
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
AWS_REGION="${{ github.event.inputs.region }}"
CLEANUP_OLDER_THAN="${{ github.event.inputs.cleanup_older_than }}"
else
AWS_REGION="${{ matrix.config.region }}"
CLEANUP_OLDER_THAN="${{ matrix.config.cleanup_older_than }}"
fi
echo "AWS_REGION=$AWS_REGION" | tee -a "$GITHUB_ENV"
echo "CLEANUP_OLDER_THAN=$CLEANUP_OLDER_THAN" | tee -a "$GITHUB_ENV"
- name: Check if job should run based on the day
id: day-check
run: |
# Initialize a variable to determine if the job should continue
should_run=true
# If it's a workflow_dispatch, only allow the first region (eu-west-2) of the matrix to run
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
if [[ "${{ matrix.config.region }}" != "eu-west-2" ]]; then
echo "Skipping job for this region because only the first region is allowed to run for workflow_dispatch."
should_run=false
else
echo "Running job for the first region only since it's a workflow_dispatch event."
fi
else
# Otherwise, check if the day matches the one in the matrix
current_day=$(date +%A)
if [[ "${{ matrix.config.day }}" != "All" && "${{ matrix.config.day }}" != "$current_day" ]]; then
echo "Skipping job for region ${{ env.AWS_REGION }} as it’s intended for ${{ matrix.config.day }} only."
should_run=false
else
echo "Running job for region ${{ env.AWS_REGION }} on the correct day: $current_day."
fi
fi
echo "should_run=$should_run" | tee -a "$GITHUB_ENV"
- name: Import Secrets
id: secrets
if: ${{ env.should_run == 'true' }}
uses: hashicorp/vault-action@d1720f055e0635fd932a1d2a48f87a666a57906c # v3
with:
url: ${{ secrets.VAULT_ADDR }}
method: approle
roleId: ${{ secrets.VAULT_ROLE_ID }}
secretId: ${{ secrets.VAULT_SECRET_ID }}
exportEnv: false
secrets: |
secret/data/products/infrastructure-experience/ci/common AWS_ACCESS_KEY;
secret/data/products/infrastructure-experience/ci/common AWS_SECRET_KEY;
# Official action does not support profiles
- name: Add profile credentials to ~/.aws/credentials
if: ${{ env.should_run == 'true' }}
run: |
aws configure set aws_access_key_id ${{ steps.secrets.outputs.AWS_ACCESS_KEY }} --profile ${{ env.AWS_PROFILE }}
aws configure set aws_secret_access_key ${{ steps.secrets.outputs.AWS_SECRET_KEY }} --profile ${{ env.AWS_PROFILE }}
aws configure set region ${{ env.AWS_REGION }} --profile ${{ env.AWS_PROFILE }}
- name: Install Cloud Nuke
if: ${{ env.should_run == 'true' }}
run: |
curl -LO \
--retry 5 \
--max-time 15 \
--retry-delay 30 \
https://github.com/gruntwork-io/cloud-nuke/releases/download/${{ env.CLOUD_NUKE_VERSION }}/cloud-nuke_linux_amd64
chmod +x cloud-nuke_linux_amd64
- name: Delete additional regional AWS resources
timeout-minutes: 15
if: ${{ env.should_run == 'true' }}
run: .github/workflows/scripts/aws_regional_cleanup.sh "${{ env.AWS_REGION }}"
- name: Delete additional global AWS resources
# Only run in a single time per week
if: ${{ env.should_run == 'true' && env.AWS_REGION == 'eu-north-1' }}
timeout-minutes: 15
run: .github/workflows/scripts/aws_global_cleanup.sh
# This is likely to fail, therefore we ignore the error
# We're ignoring ec2_dhcp_option as they couldn't be deleted
# cloudtrail is managed by IT and can't be deleted either
- name: Run Cloud Nuke
timeout-minutes: 90
env:
DISABLE_TELEMETRY: 'true'
if: ${{ env.should_run == 'true' }}
run: |
./cloud-nuke_linux_amd64 aws \
--region ${{ env.AWS_REGION }} \
--force \
--older-than ${{ env.CLEANUP_OLDER_THAN }} \
--exclude-resource-type ec2_dhcp_option \
--exclude-resource-type ec2-keypairs \
--exclude-resource-type s3 \
--exclude-resource-type cloudtrail || true
# The second run should remove the remaining resources (VPCs) and fail if there's anything left
- name: Run Cloud Nuke
timeout-minutes: 90
env:
DISABLE_TELEMETRY: 'true'
if: ${{ env.should_run == 'true' }}
run: |
./cloud-nuke_linux_amd64 aws \
--region ${{ env.AWS_REGION }} \
--force \
--older-than ${{ env.CLEANUP_OLDER_THAN }} \
--exclude-resource-type ec2_dhcp_option \
--exclude-resource-type cloudtrail \
--exclude-resource-type ec2-keypairs \
--exclude-resource-type s3
notify-on-failure:
runs-on: ubuntu-latest
if: failure()
needs:
- aws-cleanup
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Notify in Slack in case of failure
id: slack-notification
if: github.event_name == 'schedule'
uses: ./.github/actions/report-failure-on-slack
with:
vault_addr: ${{ secrets.VAULT_ADDR }}
vault_role_id: ${{ secrets.VAULT_ROLE_ID }}
vault_secret_id: ${{ secrets.VAULT_SECRET_ID }}