Skip to content

Commit

Permalink
Add compat helper and fixup scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
luraess committed Sep 13, 2023
1 parent 747e2f0 commit d398f02
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 38 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: CompatHelper
on:
schedule:
- cron: 0 0 * * *
workflow_dispatch:
jobs:
CompatHelper:
runs-on: ubuntu-latest
steps:
- name: Pkg.add("CompatHelper")
run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- name: CompatHelper.main()
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
run: julia -e 'using CompatHelper; CompatHelper.main()'
47 changes: 24 additions & 23 deletions scripts_future_API/bench3d.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
using KernelAbstractions
using MPI
using CUDA
using NVTX

using AMDGPU

# using CUDA
# using NVTX

# using CairoMakie

include("mpi_utils.jl")
Expand All @@ -25,7 +29,8 @@ function main(backend=CPU(), T::DataType=Float64, dims=(0, 0, 0))
# numerics
nt = 10
nx, ny, nz = 1024, 1024, 1024
b_width = (16, 8, 4)
# b_width = (16, 8, 4)
b_width = (128, 32, 4)
dims, comm, me, neighbors, coords, device = init_distributed(dims; init_MPI=true)
dx, dy, dz = l ./ (nx, ny, nz)
_dx, _dy, _dz = 1.0 ./ (dx, dy, dz)
Expand Down Expand Up @@ -54,32 +59,28 @@ function main(backend=CPU(), T::DataType=Float64, dims=(0, 0, 0))
### to be hidden later

# actions
CUDA.Profile.start()
for it = 1:nt
# copyto!(A, A_new)
NVTX.@range "step $it" begin
NVTX.@range "inner" diffusion_kernel!(backend, 256)(A_new, A, h, _dx, _dy, _dz, first(ranges[end]); ndrange=size(ranges[end]))
for dim in reverse(eachindex(neighbors))
ntuple(Val(2)) do side
rank = neighbors[dim][side]
halo = get_recv_view(Val(side), Val(dim), A_new)
border = get_send_view(Val(side), Val(dim), A_new)
range = ranges[2*(dim-1) + side]
offset, ndrange = first(range), size(range)
start_exchange(exchangers[dim][side], comm, rank, halo, border) do compute_bc
NVTX.@range "borders" diffusion_kernel!(backend, 256)(A_new, A, h, _dx, _dy, _dz, offset; ndrange)
if compute_bc
# apply_bcs!(Val(dim), fields, bcs.velocity)
end
KernelAbstractions.synchronize(backend)
diffusion_kernel!(backend, 256)(A_new, A, h, _dx, _dy, _dz, first(ranges[end]); ndrange=size(ranges[end]))
for dim in reverse(eachindex(neighbors))
ntuple(Val(2)) do side
rank = neighbors[dim][side]
halo = get_recv_view(Val(side), Val(dim), A_new)
border = get_send_view(Val(side), Val(dim), A_new)
range = ranges[2*(dim-1) + side]
offset, ndrange = first(range), size(range)
start_exchange(exchangers[dim][side], comm, rank, halo, border) do compute_bc
diffusion_kernel!(backend, 256)(A_new, A, h, _dx, _dy, _dz, offset; ndrange)
if compute_bc
# apply_bcs!(Val(dim), fields, bcs.velocity)
end
KernelAbstractions.synchronize(backend)
end
wait.(exchangers[dim])
end
KernelAbstractions.synchronize(backend)
wait.(exchangers[dim])
end
KernelAbstractions.synchronize(backend)
end
CUDA.Profile.stop()

# for dim in eachindex(neighbors)
# setdone!.(exchangers[dim])
Expand All @@ -97,7 +98,7 @@ function main(backend=CPU(), T::DataType=Float64, dims=(0, 0, 0))
return
end

backend = CUDABackend()
backend = ROCBackend()
T::DataType = Float64
dims = (0, 0, 1)

Expand Down
3 changes: 3 additions & 0 deletions scripts_future_API/exchanger2.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
using KernelAbstractions
using MPI

# using AMDGPU

using CUDA
using NVTX

Expand Down
19 changes: 4 additions & 15 deletions scripts_future_API/mpi_utils2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ function init_distributed(dims::Tuple=(0, 0, 0); init_MPI=true)
# create communicator for the node and select device
comm_node = MPI.Comm_split_type(comm, MPI.COMM_TYPE_SHARED, me)
dev_id = MPI.Comm_rank(comm_node)
@show device = CUDA.device!(dev_id)
# @show AMDGPU.default_device_id!(dev_id + 1) # DEBUG: why default ???
# @show AMDGPU.device_id!(dev_id + 1)
# @show device = CUDA.device!(dev_id)
@show device = AMDGPU.device_id!(dev_id + 1)
return (dims, comm, me, neighbors, coords, device)
end

Expand All @@ -24,15 +23,6 @@ function finalize_distributed(; finalize_MPI=true)
return
end

# TODO: Implement in MPI.jl
# function cooperative_test!(req)
# done = false
# while !done
# done, _ = MPI.Test(req, MPI.Status)
# yield()
# end
# end

# exchanger
mutable struct Exchanger
@atomic done::Bool
Expand All @@ -51,7 +41,8 @@ mutable struct Exchanger
send_buf = nothing

this.task = Threads.@spawn begin
CUDA.device!(device)
# CUDA.device!(device)
AMDGPU.device!(device)
KernelAbstractions.priority!(backend, :high)
try
while !(@atomic this.done)
Expand All @@ -64,7 +55,6 @@ mutable struct Exchanger
recv_buf = similar(halo)
send_buf = similar(border)
end
NVTX.@mark "after wait(top)"
if has_neighbor
recv = MPI.Irecv!(recv_buf, comm; source=rank)
end
Expand All @@ -79,7 +69,6 @@ mutable struct Exchanger
wait(send)
end
notify(bottom)
NVTX.@mark "after notify(bottom)"
end
catch err
@show err
Expand Down

0 comments on commit d398f02

Please sign in to comment.