-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
238 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
module AMDGPUExt | ||
|
||
using AMDGPU | ||
using KernelAbstractions | ||
|
||
using FastIce.Architecture | ||
|
||
set_device!(dev::HIPDevice) = device!(dev) | ||
|
||
heuristic_groupsize(::ROCBackend, ::Val{1}) = (256, ) | ||
heuristic_groupsize(::ROCBackend, ::Val{2}) = (128, 2, ) | ||
heuristic_groupsize(::ROCBackend, ::Val{3}) = (128, 2, 1, ) | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
module CUDAExt | ||
|
||
using CUDA | ||
using KernelAbstractions | ||
|
||
using FastIce.Architecture | ||
|
||
set_device!(dev::CuDevice) = device!(dev) | ||
|
||
heuristic_groupsize(::CUDABackend, ::Val{1}) = (256, ) | ||
heuristic_groupsize(::CUDABackend, ::Val{2}) = (32, 8, ) | ||
heuristic_groupsize(::CUDABackend, ::Val{3}) = (32, 8, 1, ) | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
module Distributed | ||
|
||
using FastIce.Architecture | ||
using FastIce.Grids | ||
|
||
export CartesianTopology | ||
|
||
export global_rank, shared_rank, node_name, cartesian_communicator, shared_communicator | ||
|
||
export dimensions, global_size, node_size | ||
|
||
export global_grid_size, local_grid | ||
|
||
export split_ndrange | ||
|
||
using FastIce.Grids | ||
|
||
using MPI | ||
|
||
include("topology.jl") | ||
|
||
include("split_ndrange.jl") | ||
|
||
struct DistributedArchitecture{C,T,R} <: AbstractArchitecture | ||
child_arch::C | ||
topology::T | ||
ranges::R | ||
end | ||
|
||
device(arch::DistributedArchitecture) = device(arch.child_arch) | ||
|
||
function launch!(arch::DistributedArchitecture, grid::CartesianGrid, kernel::Pair{Kernel,Args}; boundary_conditions=nothing, async=true) where {Args} | ||
fun, args = kernel | ||
|
||
worksize = size(grid, Vertex()) | ||
groupsize = heuristic_groupsize(arch.child_arch) | ||
|
||
fun(arch.backend, groupsize)(args...; ndrange=size(arch.ranges[end]), offset=first(arch.ranges[end])) | ||
|
||
|
||
isnothing(boundary_conditions) || apply_boundary_conditions!(boundary_conditions) | ||
|
||
async || synchronize(arch.backend) | ||
return | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
mutable struct Exchanger | ||
@atomic done::Bool | ||
ch::Channel | ||
bottom::Base.Event | ||
task::Task | ||
@atomic err | ||
|
||
function Exchanger(f::F, arch::AbstractArchitecture, comm, rank, halo, border) where F | ||
top = Base.Event(true) | ||
bottom = Base.Event(true) | ||
|
||
send_buf = similar(border) | ||
recv_buf = similar(halo) | ||
this = new(false, top, bottom, nothing) | ||
|
||
has_neighbor = rank != MPI.PROC_NULL | ||
compute_bc = !has_neighbor | ||
|
||
this.task = Threads.@spawn begin | ||
set_device!(device(arch)) | ||
KernelAbstractions.priority!(backend(arch), :high) | ||
try | ||
while !(@atomic this.done) | ||
wait(top) | ||
if has_neighbor | ||
recv = MPI.Irecv!(recv_buf, comm; source=rank) | ||
end | ||
f(compute_bc) | ||
if has_neighbor | ||
copyto!(send_buf, border) | ||
send = MPI.Isend(send_buf, comm; dest=rank) | ||
cooperative_test!(recv) | ||
copyto!(halo, recv_buf) | ||
cooperative_test!(send) | ||
end | ||
notify(bottom) | ||
end | ||
catch err | ||
@show err | ||
@atomic this.done = true | ||
@atomic this.err = err | ||
end | ||
end | ||
errormonitor(this.task) | ||
return this | ||
end | ||
end | ||
|
||
setdone!(exc::Exchanger) = @atomic exc.done = true | ||
|
||
Base.isdone(exc::Exchanger) = @atomic exc.done | ||
|
||
function Base.notify(exc::Exchanger) | ||
if !(@atomic exc.done) | ||
notify(exc.top) | ||
else | ||
error("notify: Exchanger is not running") | ||
end | ||
end | ||
function Base.wait(exc::Exchanger) | ||
if !(@atomic exc.done) | ||
wait(exc.bottom) | ||
else | ||
error("wait: Exchanger is not running") | ||
end | ||
end | ||
|
||
get_recv_view(::Val{1}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? 1 : Colon(), Val(ndims(A)))...) | ||
get_recv_view(::Val{2}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? size(A, D) : Colon(), Val(ndims(A)))...) | ||
|
||
get_send_view(::Val{1}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? 2 : Colon(), Val(ndims(A)))...) | ||
get_send_view(::Val{2}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? size(A, D) - 1 : Colon(), Val(ndims(A)))...) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
@inline subrange(nr,bw,I,::Val{1}) = 1:bw[I] | ||
@inline subrange(nr,bw,I,::Val{2}) = (size(nr,I)-bw[I]+1):size(nr,I) | ||
@inline subrange(nr,bw,I,::Val{3}) = (bw[I]+1):(size(nr,I)-bw[I]) | ||
|
||
@inline split_ndrange(ndrange,ndwidth) = split_ndrange(CartesianIndices(ndrange),ndwidth) | ||
|
||
function split_ndrange(ndrange::CartesianIndices{N},ndwidth::NTuple{N,<:Integer}) where N | ||
@assert all(size(ndrange) .> ndwidth.*2) | ||
@inline ndsubrange(I,::Val{J}) where J = ntuple(Val(N)) do idim | ||
if idim < I | ||
1:size(ndrange,idim) | ||
elseif idim == I | ||
subrange(ndrange,ndwidth,idim,Val(J)) | ||
else | ||
subrange(ndrange,ndwidth,idim,Val(3)) | ||
end | ||
end | ||
ndinner = ntuple(idim -> subrange(ndrange,ndwidth,idim,Val(3)), Val(N)) | ||
return ntuple(Val(2N+1)) do i | ||
if i == 2N+1 | ||
ndrange[ndinner...] | ||
else | ||
idim,idir = divrem(i-1,2) .+ 1 | ||
ndrange[ndsubrange(idim,Val(idir))...] | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
module Architecture | ||
|
||
export AbstractArchitecture | ||
|
||
export SingleDeviceArchitecture | ||
|
||
export launch!, set_device!, heuristic_groupsize | ||
|
||
using FastIce.Grids | ||
|
||
using KernelAbstractions | ||
import KernelAbstractions.Kernel | ||
|
||
abstract type AbstractArchitecture end | ||
|
||
set_device!(arch::AbstractArchitecture) = set_device!(device(arch)) | ||
|
||
heuristic_groupsize(arch::AbstractArchitecture) = heuristic_groupsize(device(arch)) | ||
|
||
struct SingleDeviceArchitecture{B,D} <: AbstractArchitecture | ||
backend::B | ||
device::D | ||
end | ||
|
||
set_device!(::SingleDeviceArchitecture{CPU}) = nothing | ||
|
||
heuristic_groupsize(::SingleDeviceArchitecture{CPU}) = 256 | ||
|
||
device(arch::SingleDeviceArchitecture) = arch.device | ||
|
||
function launch!(arch::SingleDeviceArchitecture, grid::CartesianGrid, kernel::Pair{Kernel,Args}; kwargs...) where {Args} | ||
worksize = size(grid, Vertex()) | ||
launch!(arch, worksize, kernel; kwargs...) | ||
end | ||
|
||
function launch!(arch::SingleDeviceArchitecture, worksize::NTuple{N,Int}, kernel::Pair{Kernel,Args}; boundary_conditions=nothing, async=true) where {N,Args} | ||
fun, args = kernel | ||
|
||
groupsize = heuristic_groupsize(device(arch)) | ||
|
||
fun(arch.backend, groupsize, worksize)(args...) | ||
isnothing(boundary_conditions) || apply_boundary_conditions!(boundary_conditions) | ||
|
||
async || synchronize(arch.backend) | ||
return | ||
end | ||
|
||
end |