Skip to content

Commit

Permalink
Merge pull request #30 from ROCmSoftwarePlatform/run_amd_push_ci_caller
Browse files Browse the repository at this point in the history
Enable GitHub CI/CD runners
  • Loading branch information
AdrianAbeyta authored Nov 13, 2023
2 parents de9ddd9 + ca05884 commit 914c8b1
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 51 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/self-push-amd-mi210-caller.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ on:
- ".github/**"
- "templates/**"
- "utils/**"
pull_request:
types: [opened, reopened]
branches: ["main"]

jobs:
run_amd_ci:
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/self-push-amd-mi250-caller.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ on:
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"
- "utils/**"
pull_request:
types: [opened, reopened]
branches: ["main"]

jobs:
run_amd_ci:
Expand Down
73 changes: 23 additions & 50 deletions .github/workflows/self-push-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,12 @@ env:
RUN_PT_TF_CROSS_TESTS: 1

jobs:
check_runner_status:
name: Check Runner Status
runs-on: ubuntu-latest
steps:
- name: Checkout transformers
uses: actions/checkout@v3
with:
fetch-depth: 2

- name: Check Runner Status
run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}

check_runners:
name: Check Runners
needs: check_runner_status
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
runs-on: rocm
container:
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
Expand All @@ -54,14 +41,21 @@ jobs:
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
runs-on: rocm
container:
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
test_map: ${{ steps.set-matrix.outputs.test_map }}
steps:
- name: Remove transformers repository (installed during docker image build)
working-directory: /
shell: bash
run: |
rm -r transformers
git clone https://github.com/ROCmSoftwarePlatform/transformers.git
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
Expand Down Expand Up @@ -152,11 +146,23 @@ jobs:
matrix:
folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }}
machine_type: [single-gpu, multi-gpu]
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
runs-on: rocm
container:
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:

- name: Remove transformers repository (installed during docker image build)
working-directory: /
shell: bash
run: |
rm -r transformers
git clone https://github.com/ROCmSoftwarePlatform/transformers.git
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .

# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
Expand Down Expand Up @@ -189,10 +195,6 @@ jobs:
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .

- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
Expand Down Expand Up @@ -244,19 +246,15 @@ jobs:
runs-on: ubuntu-latest
if: always()
needs: [
check_runner_status,
check_runners,
setup_gpu,
run_tests_amdgpu,
# run_tests_torch_cuda_extensions_single_gpu,
# run_tests_torch_cuda_extensions_multi_gpu
run_tests_amdgpu
]
steps:
- name: Preliminary job status
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
echo "Runner availability: ${{ needs.check_runner_status.result }}"
echo "Setup status: ${{ needs.setup_gpu.result }}"
echo "Runner status: ${{ needs.check_runners.result }}"
Expand Down Expand Up @@ -297,28 +295,3 @@ jobs:
echo "updated branch = $(git branch --show-current)"
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- uses: actions/download-artifact@v3
- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }}
CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
CI_SHA: ${{ env.CI_SHA }}
RUNNER_STATUS: ${{ needs.check_runner_status.result }}
RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
SETUP_STATUS: ${{ needs.setup_gpu.result }}

# We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
run: |
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"

0 comments on commit 914c8b1

Please sign in to comment.