Skip to content

Commit

Permalink
Dynamically provision self-hosted runner (#264)
Browse files Browse the repository at this point in the history
  • Loading branch information
ohinds authored Sep 12, 2023
1 parent 79bd97d commit 70a088c
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 69 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/destroy_runner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!bash

set -xe

GH_TOKEN=$(cat gh_token)

sudo ./svc.sh stop
sudo ./svc.sh uninstall

remove_token=$(curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${GH_TOKEN}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/orgs/neuronets/actions/runners/remove-token \
| jq -r ".token")

./config.sh remove --token ${remove_token}
116 changes: 47 additions & 69 deletions .github/workflows/guide-notebooks-ec2.yml
Original file line number Diff line number Diff line change
@@ -1,63 +1,44 @@
name: Guide Notebooks Regression - EC2
run-name: Guide Notebooks Regression - EC2
run-name: ${{ github.ref_name }} - Guide Notebooks Regression - EC2
on: [push]
jobs:
start_ec2_runner:
start-runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
env:
AWS_KEY_ID: ${{ secrets.AWS_KEY_ID }}
AWS_KEY_SECRET: ${{ secrets.AWS_KEY_SECRET }}
AWS_INSTANCE_TYPE: ${{ vars.AWS_INSTANCE_TYPE }}
AWS_IMAGE_ID: ${{ vars.AWS_IMAGE_ID }}
AWS_SECURITY_GROUP: ${{ vars.AWS_SECURITY_GROUP }}
outputs:
EC2_INSTANCE_ID: ${{ steps.start_runner.outputs.EC2_INSTANCE_ID }}
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: configure_aws
run: |
set -xe
sudo apt update
sudo apt install -y awscli jq
mkdir ${HOME}/.aws
echo "[default]" > ${HOME}/.aws/credentials
echo "aws_access_key_id = ${AWS_KEY_ID}" >> ${HOME}/.aws/credentials
echo "aws_secret_access_key = ${AWS_KEY_SECRET}" >> ${HOME}/.aws/credentials
echo "region = us-east-1" >> ${HOME}/.aws/credentials
cat ${HOME}/.aws/credentials
- name: start_runner
id: start_runner
run: |
set -xe
output=$(aws ec2 run-instances \
--instance-type ${AWS_INSTANCE_TYPE} \
--image-id ${AWS_IMAGE_ID} \
--security-group-ids ${AWS_SECURITY_GROUP} \
--count 1)
EC2_INSTANCE_ID=$(echo ${output} | jq -r ".Instances[0].InstanceId")
echo "EC2_INSTANCE_ID=${EC2_INSTANCE_ID}" >> "$GITHUB_OUTPUT"
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_KEY_SECRET }}
aws-region: ${{ vars.AWS_REGION }}
- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@v2
with:
mode: start
github-token: ${{ secrets.GH_TOKEN }}
ec2-image-id: ${{ vars.AWS_IMAGE_ID }}
ec2-instance-type: ${{ vars.AWS_INSTANCE_TYPE }}
subnet-id: ${{ vars.AWS_SUBNET }}
security-group-id: ${{ vars.AWS_SECURITY_GROUP }}

guide_notebooks_regression_ec2:
runs-on: [self-hosted, nobrainer-ci-ec2-gpu]
needs: start_ec2_runner
needs: start-runner # required to start the main job when the runner is ready
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
steps:
- name: verify-instance
env:
EC2_INSTANCE_ID: ${{ needs.start_ec2_runner.outputs.EC2_INSTANCE_ID }}
run: |
set -xe
if [[ $(ec2metadata --instance-id) != ${EC2_INSTANCE_ID} ]]; then
echo "Running on the wrong instance! Bailing, but try rerunning"
exit 1
fi
- name: clone
uses: actions/checkout@v3
- name: install
run: |
set -xe
cd ${{ github.workspace }}
python -m venv env
source env/bin/activate
pip install --upgrade pip
source /opt/tensorflow/bin/activate
export LD_LIBRARY_PATH=opt/amazon/efa/lib:/opt/amazon/openmpi/lib:/usr/local/cuda/efa/lib:/usr/local/cuda/lib:/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/targets/x86_64-linux/lib:/usr/local/lib:/usr/lib
echo $LD_LIBRARY_PATH
pip install matplotlib nilearn
pip install -e .
nobrainer info
Expand All @@ -77,34 +58,31 @@ jobs:
fi
cd ${{ github.workspace }}
source env/bin/activate
export LD_LIBRARY_PATH=opt/amazon/efa/lib:/opt/amazon/openmpi/lib:/usr/local/cuda/efa/lib:/usr/local/cuda/lib:/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/targets/x86_64-linux/lib:/usr/local/lib:/usr/lib
source /opt/tensorflow/bin/activate
for notebook_script in $(ls nobrainer-book/docs/nobrainer-guides/scripts/*.py); do
echo "running ${notebook_script}"
python ${notebook_script}
done
stop_ec2_runner:
if: always()
stop-runner:
name: Stop self-hosted EC2 runner
needs:
- start-runner # required to get output from the start-runner job
- guide_notebooks_regression_ec2 # required to wait when the main job is done
runs-on: ubuntu-latest
needs: [start_ec2_runner, guide_notebooks_regression_ec2]
env:
AWS_KEY_ID: ${{ secrets.AWS_KEY_ID }}
AWS_KEY_SECRET: ${{ secrets.AWS_KEY_SECRET }}
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
steps:
- name: configure_aws
run: |
set -xe
sudo apt update
sudo apt install -y awscli
mkdir ${HOME}/.aws
echo "[default]" > ${HOME}/.aws/credentials
echo "aws_access_key_id = ${AWS_KEY_ID}" >> ${HOME}/.aws/credentials
echo "aws_secret_access_key = ${AWS_KEY_SECRET}" >> ${HOME}/.aws/credentials
echo "region = us-east-1" >> ${HOME}/.aws/credentials
cat ${HOME}/.aws/credentials
- name: stop_runner
env:
EC2_INSTANCE_ID: ${{ needs.start_ec2_runner.outputs.EC2_INSTANCE_ID }}
run: |
set -xe
aws ec2 terminate-instances --instance-ids ${EC2_INSTANCE_ID}
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_KEY_SECRET }}
aws-region: ${{ vars.AWS_REGION }}
- name: Stop EC2 runner
uses: machulav/ec2-github-runner@v2
with:
mode: stop
github-token: ${{ secrets.GH_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
30 changes: 30 additions & 0 deletions .github/workflows/provision_runner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!bash

set -xe

GH_TOKEN=$(cat gh_token)

ar_url=$(curl https://github.com/actions/runner/releases | \
grep -m 1 actions-runner-linux-x64 | \
cut -d ' ' -f 4)
curl -O -L ${ar_url}
tar xzf $(basename ${ar_url})

reg_token=$(curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${GH_TOKEN}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/orgs/neuronets/actions/runners/registration-token \
| jq -r ".token")

runner_id=$(ec2metadata --instance-id)
./config.sh \
--url https://github.com/neuronets \
--token ${reg_token} \
--name ${runner_id} \
--labels ${runner_id} \
--unattended

sudo ./svc.sh install
sudo ./svc.sh start

0 comments on commit 70a088c

Please sign in to comment.