From aaadad7f0612034d80871bb403d6cb80c3772b1f Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Wed, 1 Nov 2023 20:36:32 +0000 Subject: [PATCH 01/27] Testing CI/CD --- HelloWorld.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 HelloWorld.txt diff --git a/HelloWorld.txt b/HelloWorld.txt new file mode 100644 index 00000000000000..dc2726c4062d1a --- /dev/null +++ b/HelloWorld.txt @@ -0,0 +1 @@ +File to test ci/cd github runner. From 2f689564337f87ed98ab708e83ea53c0222e789a Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Wed, 1 Nov 2023 20:43:06 +0000 Subject: [PATCH 02/27] Testing CI --- src/test.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/test.txt diff --git a/src/test.txt b/src/test.txt new file mode 100644 index 00000000000000..e69de29bb2d1d6 From 878d59b939f4635e0557bc95c9adacf2167ffeb7 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Mon, 6 Nov 2023 13:57:57 -0600 Subject: [PATCH 03/27] Update self-push-amd.yml --- .github/workflows/self-push-amd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 8751698277685d..fdd59cf1b6b391 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -19,7 +19,7 @@ env: jobs: check_runner_status: name: Check Runner Status - runs-on: ubuntu-latest + runs-on: rocm steps: - name: Checkout transformers uses: actions/checkout@v3 From 39c3676631afca3548eee3e9979786c1e64843ab Mon Sep 17 00:00:00 2001 From: root Date: Mon, 6 Nov 2023 23:31:13 +0000 Subject: [PATCH 04/27] Remove CI Tests --- .github/workflows/self-push-amd.yml | 36 ++--------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index fdd59cf1b6b391..43bcdcf7fe4b40 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -17,44 +17,12 @@ env: RUN_PT_TF_CROSS_TESTS: 1 jobs: - check_runner_status: - name: Check Runner Status - runs-on: rocm - steps: - - name: Checkout transformers - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - - name: Check Runner Status - run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - - check_runners: - name: Check Runners - needs: check_runner_status - strategy: - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] - container: - image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now - options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - steps: - - name: ROCM-SMI - run: | - rocminfo | grep "Agent" -A 14 - - name: Show HIP environment - run: | - echo "HIP: $HIP_VISIBLE_DEVICES" - echo "ROCR: $ROCR_VISIBLE_DEVICES" - setup_gpu: name: Setup - needs: check_runners strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: [rocm ,'${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -152,7 +120,7 @@ jobs: matrix: folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: [rocm, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ From df6d2be6815813442bda33293cf2b3b6feb8412d Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Mon, 6 Nov 2023 23:37:44 +0000 Subject: [PATCH 05/27] Added runner check --- .github/workflows/self-push-amd.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 43bcdcf7fe4b40..cedf855e36b9ff 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -17,8 +17,28 @@ env: RUN_PT_TF_CROSS_TESTS: 1 jobs: + check_runners: + name: Check Runners + needs: check_runner_status + strategy: + matrix: + machine_type: [single-gpu, multi-gpu] + runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + container: + image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now + options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - name: ROCM-SMI + run: | + rocminfo | grep "Agent" -A 14 + - name: Show HIP environment + run: | + echo "HIP: $HIP_VISIBLE_DEVICES" + echo "ROCR: $ROCR_VISIBLE_DEVICES" + setup_gpu: name: Setup + needs: check_runners strategy: matrix: machine_type: [single-gpu, multi-gpu] From 06deb155174a933c860abb2c85c3ebc3ad42ecfd Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Mon, 6 Nov 2023 23:39:57 +0000 Subject: [PATCH 06/27] Remove dependency on status --- .github/workflows/self-push-amd.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index cedf855e36b9ff..0de8ea84a3f611 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -19,7 +19,6 @@ env: jobs: check_runners: name: Check Runners - needs: check_runner_status strategy: matrix: machine_type: [single-gpu, multi-gpu] From 2622d0ad30c83d701135156fde154ee5fb91c735 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Mon, 6 Nov 2023 17:49:03 -0600 Subject: [PATCH 07/27] Update self-push-amd.yml Removed workload reporting to slack --- .github/workflows/self-push-amd.yml | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 0de8ea84a3f611..ef929dd268006a 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -231,7 +231,7 @@ jobs: runs-on: ubuntu-latest if: always() needs: [ - check_runner_status, +# check_runner_status, check_runners, setup_gpu, run_tests_amdgpu, @@ -243,7 +243,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - echo "Runner availability: ${{ needs.check_runner_status.result }}" + # echo "Runner availability: ${{ needs.check_runner_status.result }}" echo "Setup status: ${{ needs.setup_gpu.result }}" echo "Runner status: ${{ needs.check_runners.result }}" @@ -286,26 +286,3 @@ jobs: echo "log = $(git log -n 1)" - uses: actions/download-artifact@v3 - - name: Send message to Slack - env: - CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} - CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} - CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} - CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} - CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} - CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} - ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }} - CI_TITLE_PUSH: ${{ github.event.head_commit.message }} - CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} - CI_SHA: ${{ env.CI_SHA }} - RUNNER_STATUS: ${{ needs.check_runner_status.result }} - RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} - SETUP_STATUS: ${{ needs.setup_gpu.result }} - - # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change - # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. - run: | - pip install slack_sdk - pip show slack_sdk - python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}" From 865fcab00db661e5d62089458827ea0141a902f5 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Mon, 6 Nov 2023 17:51:55 -0600 Subject: [PATCH 08/27] Update self-push-amd.yml --- .github/workflows/self-push-amd.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index ef929dd268006a..7f2d55a6c573d4 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -284,5 +284,3 @@ jobs: echo "updated branch = $(git branch --show-current)" git checkout ${{ env.CI_SHA }} echo "log = $(git log -n 1)" - - - uses: actions/download-artifact@v3 From 528320ad4656d75fc17cd2e68e7c39c394d22f96 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Mon, 6 Nov 2023 17:53:17 -0600 Subject: [PATCH 09/27] Update self-push-amd.yml --- .github/workflows/self-push-amd.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 7f2d55a6c573d4..7d4dfa7466d997 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -231,19 +231,15 @@ jobs: runs-on: ubuntu-latest if: always() needs: [ -# check_runner_status, check_runners, setup_gpu, - run_tests_amdgpu, -# run_tests_torch_cuda_extensions_single_gpu, -# run_tests_torch_cuda_extensions_multi_gpu + run_tests_amdgpu ] steps: - name: Preliminary job status shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - # echo "Runner availability: ${{ needs.check_runner_status.result }}" echo "Setup status: ${{ needs.setup_gpu.result }}" echo "Runner status: ${{ needs.check_runners.result }}" From 9dbb2998ce27c01bc23ae57dc8e844eec709c009 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Mon, 6 Nov 2023 17:58:59 -0600 Subject: [PATCH 10/27] Update self-push-amd.yml --- .github/workflows/self-push-amd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 7d4dfa7466d997..7d548c440f8af8 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -22,7 +22,7 @@ jobs: strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: [rocm, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ From af4027799e3122d136c7920df3aca03bdab1e100 Mon Sep 17 00:00:00 2001 From: omkar kakarparthi <75638701+okakarpa@users.noreply.github.com> Date: Tue, 7 Nov 2023 10:21:48 -0600 Subject: [PATCH 11/27] Update self-push-amd-mi250-caller.yml --- .github/workflows/self-push-amd-mi250-caller.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml index a55378c4caa54b..96d213e455eb1b 100644 --- a/.github/workflows/self-push-amd-mi250-caller.yml +++ b/.github/workflows/self-push-amd-mi250-caller.yml @@ -16,6 +16,7 @@ on: - "utils/**" jobs: + runs-on: rocm run_amd_ci: name: AMD mi250 if: (cancelled() != true) && ((github.event_name != 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) From 67499a027c322ba04a4ce56b454a1e5786a301ac Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Tue, 7 Nov 2023 13:28:12 -0600 Subject: [PATCH 12/27] Update self-push-amd.yml --- .github/workflows/self-push-amd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 7d548c440f8af8..f08e42283a3472 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -22,7 +22,7 @@ jobs: strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: [rocm, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: rocm container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -41,7 +41,7 @@ jobs: strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: [rocm ,'${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: rocm container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ From 705a709fc33b347353c1ba3529822b5a5663a353 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Tue, 7 Nov 2023 13:49:32 -0600 Subject: [PATCH 13/27] Update self-push-amd-mi250-caller.yml --- .github/workflows/self-push-amd-mi250-caller.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml index 96d213e455eb1b..a55378c4caa54b 100644 --- a/.github/workflows/self-push-amd-mi250-caller.yml +++ b/.github/workflows/self-push-amd-mi250-caller.yml @@ -16,7 +16,6 @@ on: - "utils/**" jobs: - runs-on: rocm run_amd_ci: name: AMD mi250 if: (cancelled() != true) && ((github.event_name != 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) From c1e49d2763f7b84b644422053c3a4319cd39c1e4 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Tue, 7 Nov 2023 14:35:42 -0600 Subject: [PATCH 14/27] Update self-push-amd.yml Update image to use internal transformers. --- .github/workflows/self-push-amd.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index f08e42283a3472..53da26d95d9df6 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -49,6 +49,12 @@ jobs: matrix: ${{ steps.set-matrix.outputs.matrix }} test_map: ${{ steps.set-matrix.outputs.test_map }} steps: + - name: Use internal transformers repository + shell: bash + run: | + rm -r transformers + git clone https://github.com/ROCmSoftwarePlatform/transformers.git + # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) - name: Prepare custom environment variables From 5a22b23b010795fac4f3dba657ce3be529e820a1 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Tue, 7 Nov 2023 14:45:05 -0600 Subject: [PATCH 15/27] Update self-push-amd.yml --- .github/workflows/self-push-amd.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 53da26d95d9df6..e3e6107cc999fe 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -52,8 +52,10 @@ jobs: - name: Use internal transformers repository shell: bash run: | + cd / rm -r transformers git clone https://github.com/ROCmSoftwarePlatform/transformers.git + cd - # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) From f4fef578e6839f32d82be18548fd1a00bcba1f90 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Tue, 7 Nov 2023 14:53:35 -0600 Subject: [PATCH 16/27] Update self-push-amd.yml --- .github/workflows/self-push-amd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index e3e6107cc999fe..c49986a52f5540 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -147,7 +147,7 @@ jobs: matrix: folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} machine_type: [single-gpu, multi-gpu] - runs-on: [rocm, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: rocm container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ From 228ec6f9ede53a9b4e5a0cd6f56e6d804117223f Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Tue, 7 Nov 2023 15:16:10 -0600 Subject: [PATCH 17/27] Update self-push-amd.yml --- .github/workflows/self-push-amd.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index c49986a52f5540..b2839307a0cde9 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -141,12 +141,12 @@ jobs: name: Model tests needs: setup_gpu # `dummy` means there is no test to run - if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true - strategy: - fail-fast: false - matrix: - folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} - machine_type: [single-gpu, multi-gpu] + #if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true + #strategy: + # fail-fast: false + # matrix: + # folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} + # machine_type: [single-gpu, multi-gpu] runs-on: rocm container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now From af55eb32c50585b0f7f3fd42dc391a13ff1741ab Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Tue, 7 Nov 2023 15:31:24 -0600 Subject: [PATCH 18/27] Update self-push-amd.yml --- .github/workflows/self-push-amd.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index b2839307a0cde9..cc405b7ca40293 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -152,6 +152,13 @@ jobs: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: + - name: Use internal transformers repository + shell: bash + run: | + cd / + rm -r transformers + git clone https://github.com/ROCmSoftwarePlatform/transformers.git + cd - # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) - name: Prepare custom environment variables From 257f0a6e96e556ad3f6b431875d876b3d4b15070 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Tue, 7 Nov 2023 15:52:48 -0600 Subject: [PATCH 19/27] Modified CI to use internal transformers --- .github/workflows/self-push-amd.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index cc405b7ca40293..9797537f9aeb9b 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -49,13 +49,12 @@ jobs: matrix: ${{ steps.set-matrix.outputs.matrix }} test_map: ${{ steps.set-matrix.outputs.test_map }} steps: - - name: Use internal transformers repository + - name: Remove transformers repository (installed during docker image build) + working-directory: / shell: bash - run: | - cd / + run: | rm -r transformers git clone https://github.com/ROCmSoftwarePlatform/transformers.git - cd - # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -152,13 +151,18 @@ jobs: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - - name: Use internal transformers repository + + - name: Remove transformers repository (installed during docker image build) + working-directory: / shell: bash - run: | - cd / + run: | rm -r transformers git clone https://github.com/ROCmSoftwarePlatform/transformers.git - cd - + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) - name: Prepare custom environment variables @@ -191,10 +195,6 @@ jobs: git checkout ${{ env.CI_SHA }} echo "log = $(git log -n 1)" - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - name: Echo folder ${{ matrix.folders }} shell: bash # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to From cb342488b11bf71ec735861be711a6afcaa4c6d9 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Wed, 8 Nov 2023 15:31:42 -0600 Subject: [PATCH 20/27] Update self-push-amd.yml --- .github/workflows/self-push-amd.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 9797537f9aeb9b..313f3b85a63d41 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -140,12 +140,12 @@ jobs: name: Model tests needs: setup_gpu # `dummy` means there is no test to run - #if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true - #strategy: - # fail-fast: false - # matrix: - # folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} - # machine_type: [single-gpu, multi-gpu] + if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true + strategy: + fail-fast: false + matrix: + folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} + machine_type: [single-gpu, multi-gpu] runs-on: rocm container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now From c12093a99b76474bc293a7fd7baceaad7b9595da Mon Sep 17 00:00:00 2001 From: root Date: Wed, 8 Nov 2023 23:31:15 +0000 Subject: [PATCH 21/27] Change syntax to fit internal transformers runs --- utils/tests_fetcher.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index 2fd866e9d8dad3..d392c57634639a 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -19,7 +19,7 @@ This util is designed to fetch tests to run on a PR so that only the tests impacted by the modifications are run, and when too many models are being impacted, only run the tests of a subset of core models. It works like this. -Stage 1: Identify the modified files. For jobs that run on the main branch, it's just the diff with the last commit. +Stage 1: Identify the modified files. For jobs that run on the master branch, it's just the diff with the last commit. On a PR, this takes all the files from the branching point to the current commit (so all modifications in a PR, not just the last commit) but excludes modifications that are on docstrings or comments only. @@ -42,7 +42,7 @@ python utils/tests_fetcher.py ``` -Base use to fetch the tests on a the main branch (with diff from the last commit): +Base use to fetch the tests on a the master branch (with diff from the last commit): ```bash python utils/tests_fetcher.py --diff_with_last_commit @@ -300,7 +300,7 @@ def get_modified_python_files(diff_with_last_commit: bool = False) -> List[str]: """ Return a list of python files that have been modified between: - - the current head and the main branch if `diff_with_last_commit=False` (default) + - the current head and the master branch if `diff_with_last_commit=False` (default) - the current head and its parent commit otherwise. Returns: @@ -311,15 +311,15 @@ def get_modified_python_files(diff_with_last_commit: bool = False) -> List[str]: repo = Repo(PATH_TO_REPO) if not diff_with_last_commit: - print(f"main is at {repo.refs.main.commit}") + print(f"master is at {repo.refs.master.commit}") print(f"Current head is at {repo.head.commit}") - branching_commits = repo.merge_base(repo.refs.main, repo.head) + branching_commits = repo.merge_base(repo.refs.master, repo.head) for commit in branching_commits: print(f"Branching commit: {commit}") return get_diff(repo, repo.head.commit, branching_commits) else: - print(f"main is at {repo.head.commit}") + print(f"master is at {repo.head.commit}") parent_commits = repo.head.commit.parents for commit in parent_commits: print(f"Parent commit: {commit}") @@ -424,7 +424,7 @@ def get_doctest_files(diff_with_last_commit: bool = False) -> List[str]: """ Return a list of python and Markdown files where doc example have been modified between: - - the current head and the main branch if `diff_with_last_commit=False` (default) + - the current head and the master branch if `diff_with_last_commit=False` (default) - the current head and its parent commit otherwise. Returns: @@ -435,15 +435,15 @@ def get_doctest_files(diff_with_last_commit: bool = False) -> List[str]: test_files_to_run = [] # noqa if not diff_with_last_commit: - print(f"main is at {repo.refs.main.commit}") + print(f"master is at {repo.refs.master.commit}") print(f"Current head is at {repo.head.commit}") - branching_commits = repo.merge_base(repo.refs.main, repo.head) + branching_commits = repo.merge_base(repo.refs.master, repo.head) for commit in branching_commits: print(f"Branching commit: {commit}") test_files_to_run = get_diff_for_doctesting(repo, repo.head.commit, branching_commits) else: - print(f"main is at {repo.head.commit}") + print(f"master is at {repo.head.commit}") parent_commits = repo.head.commit.parents for commit in parent_commits: print(f"Parent commit: {commit}") @@ -452,7 +452,7 @@ def get_doctest_files(diff_with_last_commit: bool = False) -> List[str]: all_test_files_to_run = get_all_doctest_files() # Add to the test files to run any removed entry from "utils/not_doctested.txt". - new_test_files = get_new_doctest_files(repo, repo.head.commit, repo.refs.main.commit) + new_test_files = get_new_doctest_files(repo, repo.head.commit, repo.refs.master.commit) test_files_to_run = list(set(test_files_to_run + new_test_files)) # Do not run slow doctest tests on CircleCI @@ -766,8 +766,8 @@ def create_reverse_dependency_map() -> Dict[str, List[str]]: something_changed = False for m in all_modules: for d in direct_deps[m]: - # We stop recursing at an init (cause we always end up in the main init and we don't want to add all - # files which the main init imports) + # We stop recursing at an init (cause we always end up in the master init and we don't want to add all + # files which the master init imports) if d.endswith("__init__.py"): continue if d not in direct_deps: @@ -910,7 +910,7 @@ def infer_tests_to_run( json_output_file: Optional[str] = None, ): """ - The main function called by the test fetcher. Determines the tests to run from the diff. + The master function called by the test fetcher. Determines the tests to run from the diff. Args: output_file (`str`): @@ -922,8 +922,8 @@ def infer_tests_to_run( - doctest_list.txt: The list of doctests to run. diff_with_last_commit (`bool`, *optional*, defaults to `False`): - Whether to analyze the diff with the last commit (for use on the main branch after a PR is merged) or with - the branching point from main (for use on each PR). + Whether to analyze the diff with the last commit (for use on the master branch after a PR is merged) or with + the branching point from master (for use on each PR). filter_models (`bool`, *optional*, defaults to `True`): Whether or not to filter the tests to core models only, when a file modified results in a lot of model tests. @@ -1112,8 +1112,8 @@ def parse_commit_message(commit_message: str) -> Dict[str, bool]: print("Force-launching all tests") diff_with_last_commit = args.diff_with_last_commit - if not diff_with_last_commit and not repo.head.is_detached and repo.head.ref == repo.refs.main: - print("main branch detected, fetching tests against last commit.") + if not diff_with_last_commit and not repo.head.is_detached and repo.head.ref == repo.refs.master: + print("master branch detected, fetching tests against last commit.") diff_with_last_commit = True if not commit_flags["test_all"]: From 6a1a7d5c6502327e047c3e48499d3033a2c20bb2 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Wed, 8 Nov 2023 23:42:58 +0000 Subject: [PATCH 22/27] Modify file to test ci --- examples/pytorch/language-modeling/run_clm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index 15c9261be48c17..24b9e150f0420e 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -57,6 +57,7 @@ # Will error if the minimal version of Transformers is not installed. Remove at your own risks. check_min_version("4.35.0.dev0") +print("Modify file test for CI") require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt") From 4702912a31d5187492bdb54372a9d83fa506dbcb Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Wed, 8 Nov 2023 23:52:51 +0000 Subject: [PATCH 23/27] Modify test file --- examples/pytorch/language-modeling/run_clm.py | 1 - tests/models/ibert/test_modeling_ibert.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index 24b9e150f0420e..15c9261be48c17 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -57,7 +57,6 @@ # Will error if the minimal version of Transformers is not installed. Remove at your own risks. check_min_version("4.35.0.dev0") -print("Modify file test for CI") require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt") diff --git a/tests/models/ibert/test_modeling_ibert.py b/tests/models/ibert/test_modeling_ibert.py index 096a55169a0096..9a1b6abf1756e5 100644 --- a/tests/models/ibert/test_modeling_ibert.py +++ b/tests/models/ibert/test_modeling_ibert.py @@ -519,7 +519,7 @@ def test_int_gelu(self): gelu_q = IntGELU(quant_mode=True) gelu_dq = nn.GELU() - x_int = torch.range(-10000, 10000, 1) + x_int = torch.arange(-10000, 10001, 1) x_scaling_factor = torch.tensor(0.001) x = x_int * x_scaling_factor @@ -534,7 +534,7 @@ def test_int_gelu(self): self.assertTrue(torch.allclose(q_int, q_int.round(), atol=1e-4)) def test_force_dequant_gelu(self): - x_int = torch.range(-10000, 10000, 1) + x_int = torch.arange(-10000, 10001, 1) x_scaling_factor = torch.tensor(0.001) x = x_int * x_scaling_factor @@ -565,7 +565,7 @@ def test_int_softmax(self): softmax_q = IntSoftmax(output_bit, quant_mode=True) softmax_dq = nn.Softmax() - # x_int = torch.range(-10000, 10000, 1) + def _test(array): x_int = torch.tensor(array) x_scaling_factor = torch.tensor(0.1) From cbfa4037eba6ac537bfb5800556e9d7b5d8afe4a Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Thu, 9 Nov 2023 20:42:05 +0000 Subject: [PATCH 24/27] Clean up test cases --- HelloWorld.txt | 1 - src/test.txt | 0 tests/models/ibert/test_modeling_ibert.py | 6 +++--- 3 files changed, 3 insertions(+), 4 deletions(-) delete mode 100644 HelloWorld.txt delete mode 100644 src/test.txt diff --git a/HelloWorld.txt b/HelloWorld.txt deleted file mode 100644 index dc2726c4062d1a..00000000000000 --- a/HelloWorld.txt +++ /dev/null @@ -1 +0,0 @@ -File to test ci/cd github runner. diff --git a/src/test.txt b/src/test.txt deleted file mode 100644 index e69de29bb2d1d6..00000000000000 diff --git a/tests/models/ibert/test_modeling_ibert.py b/tests/models/ibert/test_modeling_ibert.py index 9a1b6abf1756e5..096a55169a0096 100644 --- a/tests/models/ibert/test_modeling_ibert.py +++ b/tests/models/ibert/test_modeling_ibert.py @@ -519,7 +519,7 @@ def test_int_gelu(self): gelu_q = IntGELU(quant_mode=True) gelu_dq = nn.GELU() - x_int = torch.arange(-10000, 10001, 1) + x_int = torch.range(-10000, 10000, 1) x_scaling_factor = torch.tensor(0.001) x = x_int * x_scaling_factor @@ -534,7 +534,7 @@ def test_int_gelu(self): self.assertTrue(torch.allclose(q_int, q_int.round(), atol=1e-4)) def test_force_dequant_gelu(self): - x_int = torch.arange(-10000, 10001, 1) + x_int = torch.range(-10000, 10000, 1) x_scaling_factor = torch.tensor(0.001) x = x_int * x_scaling_factor @@ -565,7 +565,7 @@ def test_int_softmax(self): softmax_q = IntSoftmax(output_bit, quant_mode=True) softmax_dq = nn.Softmax() - + # x_int = torch.range(-10000, 10000, 1) def _test(array): x_int = torch.tensor(array) x_scaling_factor = torch.tensor(0.1) From 3b73ca48cf6e15e0411ee3df3da5b72d9e1ab735 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Thu, 9 Nov 2023 15:34:08 -0600 Subject: [PATCH 25/27] Update MI250 caller to run jobs on PR. --- .github/workflows/self-push-amd-mi250-caller.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml index a55378c4caa54b..67ad9c9a4c8e44 100644 --- a/.github/workflows/self-push-amd-mi250-caller.yml +++ b/.github/workflows/self-push-amd-mi250-caller.yml @@ -13,7 +13,10 @@ on: - "tests/**" - ".github/**" - "templates/**" - - "utils/**" + - "utils/**" + pull_request: + types: [opened, reopened] + branches: ["main"] jobs: run_amd_ci: From 64394686b09d8bfdfa9e49259388d85d45ada777 Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Thu, 9 Nov 2023 15:35:11 -0600 Subject: [PATCH 26/27] Update mi210 caller to test on PR. --- .github/workflows/self-push-amd-mi210-caller.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml index 5dd010ef66d8fb..86ef67a46d6a6a 100644 --- a/.github/workflows/self-push-amd-mi210-caller.yml +++ b/.github/workflows/self-push-amd-mi210-caller.yml @@ -14,6 +14,9 @@ on: - ".github/**" - "templates/**" - "utils/**" + pull_request: + types: [opened, reopened] + branches: ["main"] jobs: run_amd_ci: From d2a2a60654bd6120d9c3641bca344e1e73d5e9db Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Thu, 9 Nov 2023 21:38:27 +0000 Subject: [PATCH 27/27] Revert to upstream syntax --- utils/tests_fetcher.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index d392c57634639a..2fd866e9d8dad3 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -19,7 +19,7 @@ This util is designed to fetch tests to run on a PR so that only the tests impacted by the modifications are run, and when too many models are being impacted, only run the tests of a subset of core models. It works like this. -Stage 1: Identify the modified files. For jobs that run on the master branch, it's just the diff with the last commit. +Stage 1: Identify the modified files. For jobs that run on the main branch, it's just the diff with the last commit. On a PR, this takes all the files from the branching point to the current commit (so all modifications in a PR, not just the last commit) but excludes modifications that are on docstrings or comments only. @@ -42,7 +42,7 @@ python utils/tests_fetcher.py ``` -Base use to fetch the tests on a the master branch (with diff from the last commit): +Base use to fetch the tests on a the main branch (with diff from the last commit): ```bash python utils/tests_fetcher.py --diff_with_last_commit @@ -300,7 +300,7 @@ def get_modified_python_files(diff_with_last_commit: bool = False) -> List[str]: """ Return a list of python files that have been modified between: - - the current head and the master branch if `diff_with_last_commit=False` (default) + - the current head and the main branch if `diff_with_last_commit=False` (default) - the current head and its parent commit otherwise. Returns: @@ -311,15 +311,15 @@ def get_modified_python_files(diff_with_last_commit: bool = False) -> List[str]: repo = Repo(PATH_TO_REPO) if not diff_with_last_commit: - print(f"master is at {repo.refs.master.commit}") + print(f"main is at {repo.refs.main.commit}") print(f"Current head is at {repo.head.commit}") - branching_commits = repo.merge_base(repo.refs.master, repo.head) + branching_commits = repo.merge_base(repo.refs.main, repo.head) for commit in branching_commits: print(f"Branching commit: {commit}") return get_diff(repo, repo.head.commit, branching_commits) else: - print(f"master is at {repo.head.commit}") + print(f"main is at {repo.head.commit}") parent_commits = repo.head.commit.parents for commit in parent_commits: print(f"Parent commit: {commit}") @@ -424,7 +424,7 @@ def get_doctest_files(diff_with_last_commit: bool = False) -> List[str]: """ Return a list of python and Markdown files where doc example have been modified between: - - the current head and the master branch if `diff_with_last_commit=False` (default) + - the current head and the main branch if `diff_with_last_commit=False` (default) - the current head and its parent commit otherwise. Returns: @@ -435,15 +435,15 @@ def get_doctest_files(diff_with_last_commit: bool = False) -> List[str]: test_files_to_run = [] # noqa if not diff_with_last_commit: - print(f"master is at {repo.refs.master.commit}") + print(f"main is at {repo.refs.main.commit}") print(f"Current head is at {repo.head.commit}") - branching_commits = repo.merge_base(repo.refs.master, repo.head) + branching_commits = repo.merge_base(repo.refs.main, repo.head) for commit in branching_commits: print(f"Branching commit: {commit}") test_files_to_run = get_diff_for_doctesting(repo, repo.head.commit, branching_commits) else: - print(f"master is at {repo.head.commit}") + print(f"main is at {repo.head.commit}") parent_commits = repo.head.commit.parents for commit in parent_commits: print(f"Parent commit: {commit}") @@ -452,7 +452,7 @@ def get_doctest_files(diff_with_last_commit: bool = False) -> List[str]: all_test_files_to_run = get_all_doctest_files() # Add to the test files to run any removed entry from "utils/not_doctested.txt". - new_test_files = get_new_doctest_files(repo, repo.head.commit, repo.refs.master.commit) + new_test_files = get_new_doctest_files(repo, repo.head.commit, repo.refs.main.commit) test_files_to_run = list(set(test_files_to_run + new_test_files)) # Do not run slow doctest tests on CircleCI @@ -766,8 +766,8 @@ def create_reverse_dependency_map() -> Dict[str, List[str]]: something_changed = False for m in all_modules: for d in direct_deps[m]: - # We stop recursing at an init (cause we always end up in the master init and we don't want to add all - # files which the master init imports) + # We stop recursing at an init (cause we always end up in the main init and we don't want to add all + # files which the main init imports) if d.endswith("__init__.py"): continue if d not in direct_deps: @@ -910,7 +910,7 @@ def infer_tests_to_run( json_output_file: Optional[str] = None, ): """ - The master function called by the test fetcher. Determines the tests to run from the diff. + The main function called by the test fetcher. Determines the tests to run from the diff. Args: output_file (`str`): @@ -922,8 +922,8 @@ def infer_tests_to_run( - doctest_list.txt: The list of doctests to run. diff_with_last_commit (`bool`, *optional*, defaults to `False`): - Whether to analyze the diff with the last commit (for use on the master branch after a PR is merged) or with - the branching point from master (for use on each PR). + Whether to analyze the diff with the last commit (for use on the main branch after a PR is merged) or with + the branching point from main (for use on each PR). filter_models (`bool`, *optional*, defaults to `True`): Whether or not to filter the tests to core models only, when a file modified results in a lot of model tests. @@ -1112,8 +1112,8 @@ def parse_commit_message(commit_message: str) -> Dict[str, bool]: print("Force-launching all tests") diff_with_last_commit = args.diff_with_last_commit - if not diff_with_last_commit and not repo.head.is_detached and repo.head.ref == repo.refs.master: - print("master branch detected, fetching tests against last commit.") + if not diff_with_last_commit and not repo.head.is_detached and repo.head.ref == repo.refs.main: + print("main branch detected, fetching tests against last commit.") diff_with_last_commit = True if not commit_flags["test_all"]: