Skip to content

Commit

Permalink
Merge branch 'main' into chhwang/code-coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
chhwang authored Mar 27, 2024
2 parents 9745fc3 + 5ba6ce0 commit 6fe900f
Show file tree
Hide file tree
Showing 140 changed files with 5,542 additions and 1,662 deletions.
20 changes: 15 additions & 5 deletions .azure-pipelines/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ jobs:
strategy:
matrix:
cuda11:
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-cuda11.8
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda11.8
cuda12:
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-cuda12.1
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.2

pool:
name: mscclpp
Expand All @@ -30,10 +30,8 @@ jobs:
inputs:
targetType: 'inline'
script: |
curl -L https://github.com/Kitware/CMake/releases/download/v3.26.4/cmake-3.26.4-linux-x86_64.tar.gz -o /tmp/cmake-3.26.4-linux-x86_64.tar.gz
tar xzf /tmp/cmake-3.26.4-linux-x86_64.tar.gz -C /tmp
mkdir build && cd build
MPI_HOME=/usr/local/mpi /tmp/cmake-3.26.4-linux-x86_64/bin/cmake -DCMAKE_BUILD_TYPE=Release ..
cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
make -j
workingDirectory: '$(System.DefaultWorkingDirectory)'

Expand Down Expand Up @@ -112,3 +110,15 @@ jobs:
set -e
python3 test/mscclpp-test/check_perf_result.py --perf-file output.jsonl --baseline-file test/deploy/perf_ndmv4.jsonl
workingDirectory: '$(System.DefaultWorkingDirectory)'

- task: Bash@3
name: PythonAllReduceBenchmark
displayName: Python Allreduce Benchmark
inputs:
targetType: 'inline'
script: |
set -e
export PATH=/usr/local/mpi/bin:$PATH
python3 -m pip install .
mpirun -tag-output -x MSCCLPP_HOME=$(System.DefaultWorkingDirectory) -np 8 python3 ./python/mscclpp_benchmark/allreduce_bench.py
workingDirectory: '$(System.DefaultWorkingDirectory)'
34 changes: 25 additions & 9 deletions .azure-pipelines/multi-nodes-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ jobs:
strategy:
matrix:
cuda11:
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-cuda11.8
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda11.8
cuda12:
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-cuda12.1
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.2
pool:
name: mscclpp-it
container:
Expand All @@ -25,12 +25,9 @@ jobs:
inputs:
targetType: 'inline'
script: |
curl -L https://github.com/Kitware/CMake/releases/download/v3.26.4/cmake-3.26.4-linux-x86_64.tar.gz -o /tmp/cmake-3.26.4-linux-x86_64.tar.gz
tar xzf /tmp/cmake-3.26.4-linux-x86_64.tar.gz -C /tmp
mkdir build && cd build
MPI_HOME=/usr/local/mpi /tmp/cmake-3.26.4-linux-x86_64/bin/cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_PEERMEM_CHECK=ON ..
cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
make -j
make pylib-copy
workingDirectory: '$(System.DefaultWorkingDirectory)'

- task: DownloadSecureFile@1
Expand Down Expand Up @@ -83,7 +80,7 @@ jobs:
tail -f output/mscclit-000000 &
CHILD_PID=$!
parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/run_tests.sh mscclpp-test'
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh mscclpp-test'
kill $CHILD_PID
- task: Bash@3
Expand All @@ -102,7 +99,7 @@ jobs:
tail -f output/mscclit-000000 &
CHILD_PID=$!
parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/run_tests.sh mp-ut'
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh mp-ut'
kill $CHILD_PID
- task: Bash@3
Expand All @@ -121,7 +118,26 @@ jobs:
tail -f output/mscclit-000000 &
CHILD_PID=$!
parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/run_tests.sh pytests'
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh pytests'
kill $CHILD_PID
- task: Bash@3
name: RunMultiNodePythonBenchmark
displayName: Run multi-nodes python benchmark
inputs:
targetType: 'inline'
script: |
set -e
HOSTFILE=$(System.DefaultWorkingDirectory)/test/mscclpp-test/deploy/hostfile
SSH_OPTION="StrictHostKeyChecking=no"
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
rm -rf output/*
mkdir -p output
touch output/mscclit-000000
tail -f output/mscclit-000000 &
CHILD_PID=$!
parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh py-benchmark'
kill $CHILD_PID
- task: AzureCLI@2
Expand Down
16 changes: 4 additions & 12 deletions .azure-pipelines/ut.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ jobs:
strategy:
matrix:
cuda11:
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-cuda11.8
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda11.8
cuda12:
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-cuda12.1
containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.2

container:
image: $[ variables['containerImage'] ]
Expand All @@ -30,10 +30,8 @@ jobs:
inputs:
targetType: 'inline'
script: |
curl -L -C- https://github.com/Kitware/CMake/releases/download/v3.26.4/cmake-3.26.4-linux-x86_64.tar.gz -o /tmp/cmake-3.26.4-linux-x86_64.tar.gz
tar xzf /tmp/cmake-3.26.4-linux-x86_64.tar.gz -C /tmp
mkdir build && cd build
MPI_HOME=/usr/local/mpi /tmp/cmake-3.26.4-linux-x86_64/bin/cmake -DCMAKE_BUILD_TYPE=Release ..
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j
workingDirectory: '$(System.DefaultWorkingDirectory)'

Expand Down Expand Up @@ -79,11 +77,5 @@ jobs:
script: |
set -e
export PATH=/usr/local/mpi/bin:$PATH
cd build && make pylib-copy
if [[ '$(containerImage)' == *'cuda11'* ]]; then
pip3 install -r ../python/test/requirements_cu11.txt
else
pip3 install -r ../python/test/requirements_cu12.txt
fi
mpirun -tag-output -np 8 ~/.local/bin/pytest ../python/test/test_mscclpp.py -x
mpirun -tag-output -x MSCCLPP_HOME=$(System.DefaultWorkingDirectory) -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x
workingDirectory: '$(System.DefaultWorkingDirectory)'
10 changes: 10 additions & 0 deletions .github/ISSUE_TEMPLATE/documentation-improvement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
name: Documentation improvement
about: Enhance or fix documentation
title: "[Doc]"
labels: ''
assignees: ''

---


6 changes: 3 additions & 3 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
name: Analyze
runs-on: 'ubuntu-latest'
container:
image: ghcr.io/microsoft/mscclpp/mscclpp:dev-${{ matrix.cuda-version }}
image: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-${{ matrix.cuda-version }}

permissions:
actions: read
Expand All @@ -24,7 +24,7 @@ jobs:
fail-fast: false
matrix:
language: [ 'cpp', 'python' ]
cuda-version: [ 'cuda11.8', 'cuda12.1' ]
cuda-version: [ 'cuda11.8', 'cuda12.2' ]

steps:
- name: Checkout repository
Expand All @@ -45,7 +45,7 @@ jobs:
- name: Build
run: |
MPI_HOME=/usr/local/mpi cmake -DBYPASS_PEERMEM_CHECK=ON .
cmake -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON .
make -j
- name: Perform CodeQL Analysis
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/integration-test-backup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ jobs:
shell: bash
strategy:
matrix:
cuda: [ cuda11.8, cuda12.1 ]
cuda: [ cuda11.8, cuda12.2 ]

container:
image: "ghcr.io/microsoft/mscclpp/mscclpp:dev-${{ matrix.cuda }}"
image: "ghcr.io/microsoft/mscclpp/mscclpp:base-dev-${{ matrix.cuda }}"
options: --privileged --ipc=host --gpus=all --ulimit memlock=-1:-1

steps:
Expand All @@ -23,7 +23,7 @@ jobs:
- name: Build
run: |
mkdir build && cd build
MPI_HOME=/usr/local/mpi cmake -DCMAKE_BUILD_TYPE=Release ..
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j
- name: Lock GPU clock frequency
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Run cpplint
run: |
CPPSOURCES=$(find ./ -regextype posix-extended -regex '.*\.(c|cpp|h|hpp|cc|cxx|cu)' -not -path "./build/*")
CPPSOURCES=$(find ./src ./include ./python ./test -regextype posix-extended -regex '.*\.(c|cpp|h|hpp|cc|cxx|cu)')
clang-format -style=file --verbose --Werror --dry-run ${CPPSOURCES}
pylint:
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/ut-backup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ jobs:
timeout-minutes: 30
strategy:
matrix:
cuda: [ cuda11.8, cuda12.1 ]
cuda: [ cuda11.8, cuda12.2 ]

container:
image: "ghcr.io/microsoft/mscclpp/mscclpp:dev-${{ matrix.cuda }}"
image: "ghcr.io/microsoft/mscclpp/mscclpp:base-dev-${{ matrix.cuda }}"
options: --privileged --ipc=host --gpus=all --ulimit memlock=-1:-1

steps:
Expand All @@ -29,7 +29,7 @@ jobs:
- name: Build
run: |
mkdir build && cd build
MPI_HOME=/usr/local/mpi cmake -DCMAKE_BUILD_TYPE=Release ..
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j
working-directory: ${{ github.workspace }}

Expand All @@ -54,11 +54,11 @@ jobs:
- name: PyTests
run: |
set -e
cd build && make pylib-copy
mpirun --allow-run-as-root -tag-output -np 8 $(which pytest) ../python/test/test_mscclpp.py -x
mpirun --allow-run-as-root -tag-output -np 8 $(which pytest) ./python/test/test_mscclpp.py -x
- name: ReportCoverage
run: |
set -e
cd build
lcov --capture --directory . --output-file coverage.info
lcov --remove coverage.info \
Expand All @@ -68,4 +68,4 @@ jobs:
'*/test/*' \
'*/tools/*' \
--output-file coverage.info
lcov --list coverage.info
lcov --list coverage.info
47 changes: 47 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
cff-version: 1.2.0
title: "MSCCL++: A GPU-driven communication stack for scalable AI applications"
version: 0.4.2
message: >-
If you use this project in your research, please cite it as below.
authors:
- given-names: Peng
family-names: Cheng
affiliation: Microsoft Research
- given-names: Changho
family-names: Hwang
affiliation: Microsoft Research
- given-names: Abhinav
family-names: Jangda
affiliation: Microsoft Research
- given-names: Suriya
family-names: Kalivardhan
affiliation: Microsoft Azure
- given-names: Binyang
family-names: Li
affiliation: Microsoft Azure
- given-names: Shuguang
family-names: Liu
affiliation: Microsoft Azure
- given-names: Saeed
family-names: Maleki
affiliation: Microsoft Research
- given-names: Madan
family-names: Musuvathi
affiliation: Microsoft Research
- given-names: Olli
family-names: Saarikivi
affiliation: Microsoft Research
- given-names: Wei
family-names: Tsui
affiliation: Microsoft Research
- given-names: Ziyue
family-names: Yang
affiliation: Microsoft Research

repository-code: 'https://github.com/microsoft/mscclpp'
abstract: >-
MSCCL++ redefines the interface for inter-GPU communication, thereby
delivering a highly efficient and customizable communication stack
tailored for distributed GPU applications.
license: MIT
license-url: https://github.com/microsoft/mscclpp/blob/main/LICENSE
Loading

0 comments on commit 6fe900f

Please sign in to comment.