Skip to content

Commit

Permalink
buildkite: get CUDA working again (JuliaParallel#607)
Browse files Browse the repository at this point in the history
* Use latest Open MPI
* Use --with-cuda when building Open MPI
* Add CUDA group to Buildkite
* Disable ROCm for now
* Disable soft-fail
* Update badge
* Move common test config to test/common.jl
* add synchronize() function
  • Loading branch information
simonbyrne authored May 31, 2022
1 parent 04cf5c4 commit f39af64
Show file tree
Hide file tree
Showing 25 changed files with 202 additions and 363 deletions.
280 changes: 127 additions & 153 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,163 +1,137 @@
steps:
- label: "Build OpenMPI -- CUDA"
agents:
queue: "juliagpu"
cuda: "11.0"
env:
OPENMPI_VER: "4.0"
OPENMPI_VER_FULL: "4.0.3"
UCX_VER: "1.12.1"
CCACHE_DIR: "/root/ccache"
commands: |
echo "--- Install packages"
apt-get install --yes --no-install-recommends curl ccache
export PATH="/usr/lib/ccache/:$$PATH"
- group: "CUDA"
key: "cuda"
steps:
- label: "Build OpenMPI"
key: "cuda-build-openmpi"
agents:
queue: "juliagpu"
cuda: "11.0"
env:
OPENMPI_VER: "4.0"
OPENMPI_VER_FULL: "4.0.3"
UCX_VER: "1.12.1"
CCACHE_DIR: "/root/ccache"
commands: |
echo "--- Install packages"
apt-get install --yes --no-install-recommends curl ccache
export PATH="/usr/lib/ccache/:$$PATH"
echo "--- Build UCX"
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
tar -zxf ucx.tar.gz
pushd ucx-*
./configure --with-cuda=/usr/local/cuda --enable-mt --prefix=$$(realpath ../mpi-prefix)
make -j
make install
popd
echo "--- Build UCX"
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
tar -zxf ucx.tar.gz
pushd ucx-*
./configure --with-cuda=/usr/local/cuda --enable-mt --prefix=$$(realpath ../mpi-prefix)
make -j
make install
popd
echo "--- Build OpenMPI"
curl -L https://download.open-mpi.org/release/open-mpi/v$${OPENMPI_VER}/openmpi-$${OPENMPI_VER_FULL}.tar.gz --output openmpi.tar.gz
tar -zxf openmpi.tar.gz
pushd openmpi-$${OPENMPI_VER_FULL}
./configure --with-ucx=$$(realpath ../mpi-prefix) --prefix=$$(realpath ../mpi-prefix)
make -j
make install
popd
echo "--- Build OpenMPI"
curl -L https://download.open-mpi.org/release/open-mpi/v$${OPENMPI_VER}/openmpi-$${OPENMPI_VER_FULL}.tar.gz --output openmpi.tar.gz
tar -zxf openmpi.tar.gz
pushd openmpi-$${OPENMPI_VER_FULL}
./configure --with-ucx=$$(realpath ../mpi-prefix) --with-cuda=/usr/local/cuda --prefix=$$(realpath ../mpi-prefix)
make -j
make install
popd
echo "--- Package prefix"
tar -zcf mpi-prefix.tar.gz mpi-prefix/
echo "--- Package prefix"
tar -zcf mpi-prefix.tar.gz mpi-prefix/
echo "--- ccache stats"
ccache -s
artifact_paths:
- "mpi-prefix.tar.gz"
- label: "Build OpenMPI -- ROCM"
agents:
queue: "juliagpu"
rocm: "*" # todo fix ROCM version
env:
OPENMPI_VER: "4.0"
OPENMPI_VER_FULL: "4.0.3"
UCX_VER: "1.12.1"
CCACHE_DIR: "/root/ccache"
commands: |
echo "--- Install packages"
apt-get install --yes --no-install-recommends curl ccache
export PATH="/usr/lib/ccache/:$$PATH"
echo "--- ccache stats"
ccache -s
artifact_paths:
- "mpi-prefix.tar.gz"

echo "--- Build UCX"
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
tar -zxf ucx.tar.gz
pushd ucx-*
./configure --with-rocm --enable-mt --prefix=$$(realpath ../mpi-prefix)
make -j
make install
popd
- wait

- label: "Tests -- Julia 1.6"
plugins:
- JuliaCI/julia#v1:
version: "1.6"
persist_depot_dirs: packages,artifacts,compiled
agents:
queue: "juliagpu"
cuda: "11.0"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60
env:
JULIA_MPI_TEST_ARRAYTYPE: CuArray
JULIA_MPI_TEST_NPROCS: 2
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
OMPI_ALLOW_RUN_AS_ROOT: 1
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
OMPI_MCA_btl_vader_single_copy_mechanism: 'none' # https://github.com/open-mpi/ompi/issues/4948
OPAL_PREFIX: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi" # Should we set this for the user?
JULIA_CUDA_MEMORY_POOL: "none"
commands: |
echo "--- Configure MPI"
buildkite-agent artifact download --step "cuda-build-openmpi" mpi-prefix.tar.gz .
mkdir -p $${JULIA_MPI_PATH}
tar -zxf mpi-prefix.tar.gz --strip-components 1 -C $${JULIA_MPI_PATH}
export PATH=$${JULIA_MPI_PATH}/bin:$${PATH}
export LD_LIBRARY_PATH=$${JULIA_MPI_PATH}/lib:$${LD_LIBRARY_PATH}
echo "--- Build OpenMPI"
curl -L https://download.open-mpi.org/release/open-mpi/v$${OPENMPI_VER}/openmpi-$${OPENMPI_VER_FULL}.tar.gz --output openmpi.tar.gz
tar -zxf openmpi.tar.gz
pushd openmpi-*
./configure --with-ucx=$$(realpath ../mpi-prefix) --prefix=$$(realpath ../mpi-prefix)
make -j
make install
popd
echo "--- Setup Julia packages"
julia --color=yes --project=. -e '
import Pkg
Pkg.develop(; path = joinpath(pwd(), "lib", "MPIPreferences"))
'
julia --color=yes --project=test -e '
using Pkg
Pkg.develop(path="lib/MPIPreferences")
using MPIPreferences
MPIPreferences.use_system_binary(export_prefs=true)
rm("test/Manifest.toml")
'
echo "--- Package prefix"
tar -zcf mpi-prefix.tar.gz mpi-prefix/
echo "+++ Run tests"
julia --color=yes --project=. -e '
import Pkg
Pkg.test("MPI")
'
echo "--- ccache stats"
ccache -s
artifact_paths:
- "mpi-prefix.tar.gz"
- wait: ~
- label: "CUDA -- 1.6"
plugins:
- JuliaCI/julia#v1:
version: "1.6"
persist_depot_dirs: packages,artifacts,compiled
agents:
queue: "juliagpu"
cuda: "11.0"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60
env:
JULIA_MPI_TEST_ARRAYTYPE: CuArray
JULIA_MPI_TEST_NPROCS: 2
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
OMPI_ALLOW_RUN_AS_ROOT: 1
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
OMPI_MCA_btl_vader_single_copy_mechanism: 'none' # https://github.com/open-mpi/ompi/issues/4948
OPAL_PREFIX: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi" # Should we set this for the user?
JULIA_CUDA_MEMORY_POOL: "none"
soft_fail: true
commands: |
echo "--- Configure MPI"
buildkite-agent artifact download --step "Build OpenMPI -- CUDA" mpi-prefix.tar.gz .
mkdir -p $${JULIA_MPI_PATH}
tar -zxf mpi-prefix.tar.gz --strip-components 1 -C $${JULIA_MPI_PATH}
export PATH=$${JULIA_MPI_PATH}/bin:$${PATH}
export LD_LIBRARY_PATH=$${JULIA_MPI_PATH}/lib:$${LD_LIBRARY_PATH}
- label: "Tests -- Julia 1.7"
plugins:
- JuliaCI/julia#v1:
version: "1.7"
persist_depot_dirs: packages,artifacts,compiled
agents:
queue: "juliagpu"
cuda: "11.0"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60
env:
JULIA_MPI_TEST_ARRAYTYPE: CuArray
JULIA_MPI_TEST_NPROCS: 2
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
OMPI_ALLOW_RUN_AS_ROOT: 1
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
OMPI_MCA_btl_vader_single_copy_mechanism: 'none' # https://github.com/open-mpi/ompi/issues/4948
OPAL_PREFIX: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi" # Should we set this for the user?
JULIA_CUDA_MEMORY_POOL: "none"
commands: |
echo "--- Configure MPI"
buildkite-agent artifact download --step "cuda-build-openmpi" mpi-prefix.tar.gz .
mkdir -p $${JULIA_MPI_PATH}
tar -zxf mpi-prefix.tar.gz --strip-components 1 -C $${JULIA_MPI_PATH}
export PATH=$${JULIA_MPI_PATH}/bin:$${PATH}
export LD_LIBRARY_PATH=$${JULIA_MPI_PATH}/lib:$${LD_LIBRARY_PATH}
echo "--- Setup Julia packages"
julia --color=yes --project=test -e '
using Pkg
Pkg.develop(path="lib/MPIPreferences")
using MPIPreferences
MPIPreferences.use_system_binary(export_prefs=true)
rm("test/Manifest.toml")'
julia -e 'import Pkg; Pkg.develop(; path = joinpath(pwd(), "lib", "MPIPreferences"))'
julia -e 'import Pkg; Pkg.develop(; path = pwd())'
julia -e 'import Pkg; Pkg.precompile()'
echo "--- Setup Julia packages"
julia --color=yes --project=. -e '
import Pkg
Pkg.develop(; path = joinpath(pwd(), "lib", "MPIPreferences"))
'
julia --color=yes --project=test -e '
using Pkg
Pkg.develop(path="lib/MPIPreferences")
using MPIPreferences
MPIPreferences.use_system_binary(export_prefs=true)
rm("test/Manifest.toml")
'
echo "+++ Run tests"
julia -e 'import Pkg; Pkg.test("MPI")'
- label: "CUDA -- 1.7"
plugins:
- JuliaCI/julia#v1:
version: "1.7"
persist_depot_dirs: packages,artifacts,compiled
agents:
queue: "juliagpu"
cuda: "11.0"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60
env:
JULIA_MPI_TEST_ARRAYTYPE: CuArray
JULIA_MPI_TEST_NPROCS: 2
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
OMPI_ALLOW_RUN_AS_ROOT: 1
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
OMPI_MCA_btl_vader_single_copy_mechanism: 'none' # https://github.com/open-mpi/ompi/issues/4948
OPAL_PREFIX: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi" # Should we set this for the user?
JULIA_CUDA_MEMORY_POOL: "none"
soft_fail: true
commands: |
echo "--- Configure MPI"
buildkite-agent artifact download --step "Build OpenMPI -- CUDA" mpi-prefix.tar.gz .
mkdir -p $${JULIA_MPI_PATH}
tar -zxf mpi-prefix.tar.gz --strip-components 1 -C $${JULIA_MPI_PATH}
export PATH=$${JULIA_MPI_PATH}/bin:$${PATH}
export LD_LIBRARY_PATH=$${JULIA_MPI_PATH}/lib:$${LD_LIBRARY_PATH}
echo "--- Setup Julia packages"
julia --color=yes --project=test -e '
using Pkg
Pkg.develop(path="lib/MPIPreferences")
using MPIPreferences
MPIPreferences.use_system_binary(export_prefs=true)
rm("test/Manifest.toml")'
julia -e 'import Pkg; Pkg.develop(; path = joinpath(pwd(), "lib", "MPIPreferences"))'
julia -e 'import Pkg; Pkg.develop(; path = pwd())'
julia -e 'import Pkg; Pkg.precompile()'
echo "+++ Run tests"
julia -e 'import Pkg; Pkg.test("MPI")'
echo "+++ Run tests"
julia --color=yes --project=. -e '
import Pkg
Pkg.test("MPI")
'
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[![Docs latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://juliaparallel.github.io/MPI.jl/latest/)
[![Docs stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliaparallel.github.io/MPI.jl/stable/)
[![Unit tests](https://github.com/JuliaParallel/MPI.jl/workflows/Unit%20Tests/badge.svg?branch=master)](https://github.com/JuliaParallel/MPI.jl/actions?query=workflow%3A%22Unit+Tests%22+branch%3Amaster)
[![GPU tests](https://gitlab.com/JuliaGPU/MPI.jl/badges/master/pipeline.svg)](https://gitlab.com/JuliaGPU/MPI.jl/pipelines)
[![GPU tests](https://badge.buildkite.com/ed813bc4d79f557adbdb821b1c8c8de98999686e697df4a373.svg?branch=master)](https://buildkite.com/julialang/mpi-dot-jl)
[![codecov.io](https://codecov.io/github/JuliaParallel/MPI.jl/coverage.svg?branch=master)](https://codecov.io/github/JuliaParallel/MPI.jl?branch=master)
[![Coverage Status](https://coveralls.io/repos/JuliaParallel/MPI.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/JuliaParallel/MPI.jl?branch=master)

Expand Down
11 changes: 11 additions & 0 deletions test/common.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
using Test
using MPI

if get(ENV,"JULIA_MPI_TEST_ARRAYTYPE","") == "CuArray"
import CUDA
ArrayType = CUDA.CuArray
synchronize() = CUDA.synchronize()
else
ArrayType = Array
synchronize() = nothing
end
19 changes: 7 additions & 12 deletions test/test_allgather.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,4 @@
using Test
using MPI

if get(ENV,"JULIA_MPI_TEST_ARRAYTYPE","") == "CuArray"
import CUDA
ArrayType = CUDA.CuArray
else
ArrayType = Array
end

include("common.jl")

MPI.Init()

Expand All @@ -18,6 +9,7 @@ rank = MPI.Comm_rank(comm)
for T in Base.uniontypes(MPI.MPIDatatype)
# test vector input
A = ArrayType{T}([rank + 1])
synchronize()
C = MPI.Allgather(A, comm)
@test C isa ArrayType{T,1}
@test C == ArrayType{T,1}(1:size)
Expand All @@ -32,11 +24,12 @@ for T in Base.uniontypes(MPI.MPIDatatype)

# Test passing output buffer with set size
A = ArrayType(T[val])

synchronize()

C = ArrayType{T}(undef, size)
MPI.Allgather!(A, C, comm) # implied size
@test C == ArrayType{T}(1:size)

C = ArrayType{T}(undef, size)
MPI.Allgather!(A, UBuffer(C,1), comm)
@test C == ArrayType{T}(1:size)
Expand All @@ -47,12 +40,14 @@ for T in Base.uniontypes(MPI.MPIDatatype)

# Test explicit IN_PLACE
C = ArrayType{T}([i == rank ? i : size + 1 for i = 0:size-1])
synchronize()
MPI.Allgather!(MPI.IN_PLACE, UBuffer(C, 1), comm)
@test C isa ArrayType{T,1}
@test C == ArrayType{T}(0:size-1)

# Test IN_PLACE
C = ArrayType{T}([i == rank ? i : size + 1 for i = 0:size-1])
synchronize()
MPI.Allgather!(UBuffer(C, 1), comm)
@test C isa ArrayType{T,1}
@test C == ArrayType{T}(0:size-1)
Expand Down
12 changes: 3 additions & 9 deletions test/test_allgatherv.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
using Test
using MPI

if get(ENV,"JULIA_MPI_TEST_ARRAYTYPE","") == "CuArray"
import CUDA
ArrayType = CUDA.CuArray
else
ArrayType = Array
end
include("common.jl")

MPI.Init()

Expand All @@ -19,6 +11,7 @@ check = collect(Iterators.flatten([fill(r, counts[r+1]) for r = 0:size-1]))

for T in Base.uniontypes(MPI.MPIDatatype)
A = ArrayType{T}(fill(T(rank), counts[rank+1]))
synchronize()

# Test passing the output buffer
B = ArrayType{T}(undef, sum(counts))
Expand All @@ -31,6 +24,7 @@ for T in Base.uniontypes(MPI.MPIDatatype)

# Test explicit MPI_IN_PLACE
B = ArrayType(fill(T(rank), sum(counts)))
synchronize()
MPI.Allgatherv!(MPI.IN_PLACE, VBuffer(B, counts), comm)
@test B == ArrayType{T}(check)
end
Expand Down
Loading

0 comments on commit f39af64

Please sign in to comment.