From a11db43cb8277c6d4e5179ff7d353f385a07384c Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sun, 25 Feb 2024 15:47:08 -0500 Subject: [PATCH 1/4] Add `has_rocm` for OpenMPI --- docs/src/reference/library.md | 1 + src/environment.jl | 29 +++++++++++++++++++++++++++-- test/test_basic.jl | 6 ++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/docs/src/reference/library.md b/docs/src/reference/library.md index 94e3942e0..5ce9b5299 100644 --- a/docs/src/reference/library.md +++ b/docs/src/reference/library.md @@ -14,5 +14,6 @@ MPI.MPI_LIBRARY_VERSION_STRING ```@docs MPI.versioninfo MPI.has_cuda +MPI.has_rocm MPI.identify_implementation ``` diff --git a/src/environment.jl b/src/environment.jl index 52597c430..e472d57be 100644 --- a/src/environment.jl +++ b/src/environment.jl @@ -320,7 +320,7 @@ Wtime() = API.MPI_Wtime() Check if the MPI implementation is known to have CUDA support. Currently only Open MPI provides a mechanism to check, so it will return `false` with other implementations -(unless overriden). +(unless overriden). For "IBMSpectrumMPI" it will return `true`. This can be overriden by setting the `JULIA_MPI_HAS_CUDA` environment variable to `true` or `false`. @@ -334,7 +334,7 @@ function has_cuda() # Only Open MPI provides a function to check CUDA support @static if MPI_LIBRARY == "OpenMPI" # int MPIX_Query_cuda_support(void) - return 0 != ccall((:MPIX_Query_cuda_support, libmpi), Cint, ()) + return @ccall libmpi.MPIX_Query_cuda_support()::Bool elseif MPI_LIBRARY == "IBMSpectrumMPI" return true else @@ -344,3 +344,28 @@ function has_cuda() return parse(Bool, flag) end end + +""" + MPI.has_rocm() + +Check if the MPI implementation is known to have ROCm support. Currently only Open MPI +provides a mechanism to check, so it will return `false` with other implementations +(unless overriden). + +This can be overriden by setting the `JULIA_MPI_HAS_ROCM` environment variable to `true` +or `false`. +""" +function has_rocm() + flag = get(ENV, "JULIA_MPI_HAS_ROCM", nothing) + if flag === nothing + # Only Open MPI provides a function to check ROCm support + @static if MPI_LIBRARY == "OpenMPI" && MPI_LIBRARY_VERSION ≥ v"5" + # int MPIX_Query_rocm_support(void) + return @ccall libmpi.MPIX_Query_rocm_support()::Bool + else + return false + end + else + return parse(Bool, flag) + end +end diff --git a/test/test_basic.jl b/test/test_basic.jl index 495c9f83b..57b6a65e5 100644 --- a/test/test_basic.jl +++ b/test/test_basic.jl @@ -12,6 +12,12 @@ if get(ENV,"JULIA_MPI_TEST_ARRAYTYPE","") == "CuArray" @test MPI.has_cuda() end +@test MPI.has_rocm() isa Bool + +if get(ENV,"JULIA_MPI_TEST_ARRAYTYPE","") == "ROCArray" + @test MPI.has_rocm() +end + @test !MPI.Finalized() MPI.Finalize() @test MPI.Finalized() From cf392d4541c1f8ae0f2df093887807a79875de89 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sat, 22 Jun 2024 09:46:16 -0700 Subject: [PATCH 2/4] Add `has_gpu` check --- docs/src/reference/library.md | 1 + src/environment.jl | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/docs/src/reference/library.md b/docs/src/reference/library.md index 5ce9b5299..302015ff6 100644 --- a/docs/src/reference/library.md +++ b/docs/src/reference/library.md @@ -15,5 +15,6 @@ MPI.MPI_LIBRARY_VERSION_STRING MPI.versioninfo MPI.has_cuda MPI.has_rocm +MPI.has_gpu MPI.identify_implementation ``` diff --git a/src/environment.jl b/src/environment.jl index e472d57be..5408bddfa 100644 --- a/src/environment.jl +++ b/src/environment.jl @@ -327,6 +327,8 @@ or `false`. !!! note For OpenMPI or OpenMPI-based implementations you first need to call [Init()](@ref). + +See also [`MPI.has_rocm`](@ref) for ROCm support. """ function has_cuda() flag = get(ENV, "JULIA_MPI_HAS_CUDA", nothing) @@ -354,6 +356,8 @@ provides a mechanism to check, so it will return `false` with other implementati This can be overriden by setting the `JULIA_MPI_HAS_ROCM` environment variable to `true` or `false`. + +See also [`MPI.has_cuda`](@ref) for CUDA support. """ function has_rocm() flag = get(ENV, "JULIA_MPI_HAS_ROCM", nothing) @@ -369,3 +373,17 @@ function has_rocm() return parse(Bool, flag) end end + +""" + MPI.has_gpu() + +Checks if the MPI implementation is known to have GPU support. Currently this checks for the +following GPUs: + +1. CUDA: via [`MPI.has_cuda`](@ref) +2. ROCm: via [`MPI.has_rocm`](@ref) + +See also [`MPI.has_cuda`](@ref) and [`MPI.has_rocm`](@ref) for more fine-grained +checks. +""" +has_gpu() = has_cuda() || has_rocm() From 27101327498ee2d91ef4c8b4b626beff19165a5b Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sat, 22 Jun 2024 09:58:33 -0700 Subject: [PATCH 3/4] Update ROCm query docs --- docs/src/usage.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/src/usage.md b/docs/src/usage.md index 2b8ebe49d..c57eae1af 100644 --- a/docs/src/usage.md +++ b/docs/src/usage.md @@ -98,7 +98,8 @@ should confirm your MPI implementation to have the ROCm support (AMDGPU) enabled [alltoall\_test\_rocm\_multigpu.jl](https://gist.github.com/luraess/a47931d7fb668bd4348a2c730d5489f4) should confirm your ROCm-aware MPI implementation to use multiple AMD GPUs (one GPU per rank). -The status of ROCm (AMDGPU) support cannot currently be queried. +If using OpenMPI, the status of ROCm support can be checked via the +[`MPI.has_rocm()`](@ref) function. ## Writing MPI tests From 01598f7ca3a33fa77d525bfbac8962da651306e4 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sat, 22 Jun 2024 10:01:03 -0700 Subject: [PATCH 4/4] Test for has_gpu --- docs/src/knownissues.md | 3 ++- test/test_basic.jl | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/src/knownissues.md b/docs/src/knownissues.md index d3e6a2a64..6fc71ed30 100644 --- a/docs/src/knownissues.md +++ b/docs/src/knownissues.md @@ -180,7 +180,7 @@ Make sure to: ``` - Then in Julia, upon loading MPI and CUDA modules, you can check - CUDA version: `CUDA.versioninfo()` - - If MPI has CUDA: `MPI.has_cuda()` + - If MPI has CUDA: [`MPI.has_cuda()`](@ref) - If you are using correct MPI library: `MPI.libmpi` After that, it may be preferred to run the Julia MPI script (as suggested [here](https://discourse.julialang.org/t/cuda-aware-mpi-works-on-system-but-not-for-julia/75060/11)) launching it from a shell script (as suggested [here](https://discourse.julialang.org/t/cuda-aware-mpi-works-on-system-but-not-for-julia/75060/4)). @@ -197,6 +197,7 @@ Make sure to: ``` - Then in Julia, upon loading MPI and CUDA modules, you can check - AMDGPU version: `AMDGPU.versioninfo()` + - If MPI has ROCm: [`MPI.has_rocm()`](@ref) - If you are using correct MPI implementation: `MPI.identify_implementation()` After that, [this script](https://gist.github.com/luraess/c228ec08629737888a18c6a1e397643c) can be used to verify if ROCm-aware MPI is functional (modified after the CUDA-aware version from [here](https://discourse.julialang.org/t/cuda-aware-mpi-works-on-system-but-not-for-julia/75060/11)). It may be preferred to run the Julia ROCm-aware MPI script launching it from a shell script (as suggested [here](https://discourse.julialang.org/t/cuda-aware-mpi-works-on-system-but-not-for-julia/75060/4)). diff --git a/test/test_basic.jl b/test/test_basic.jl index 57b6a65e5..3da013963 100644 --- a/test/test_basic.jl +++ b/test/test_basic.jl @@ -8,16 +8,22 @@ MPI.Init() @test MPI.has_cuda() isa Bool -if get(ENV,"JULIA_MPI_TEST_ARRAYTYPE","") == "CuArray" +if get(ENV, "JULIA_MPI_TEST_ARRAYTYPE", "") == "CuArray" @test MPI.has_cuda() end @test MPI.has_rocm() isa Bool -if get(ENV,"JULIA_MPI_TEST_ARRAYTYPE","") == "ROCArray" +if get(ENV, "JULIA_MPI_TEST_ARRAYTYPE", "") == "ROCArray" @test MPI.has_rocm() end +@test MPI.has_gpu() isa Bool + +if get(ENV, "JULIA_MPI_TEST_ARRAYTYPE", "") == "CuArray" || get(ENV, "JULIA_MPI_TEST_ARRAYTYPE", "") == "ROCArray" + @test MPI.has_gpu() +end + @test !MPI.Finalized() MPI.Finalize() @test MPI.Finalized()