diff --git a/scripts_future_API/profileme.sh b/scripts_future_API/profileme.sh deleted file mode 100755 index 59b405fe..00000000 --- a/scripts_future_API/profileme.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -source /users/lurass/scratch/setenv_lumi.sh - -# basic -# srun --cpu-bind=map_cpu:49,57,17,25,1,9,33,41 -N1 -n8 --gpus-per-node=8 profileme.sh - -# optimal using only single GCD per MI250x Module -# srun --cpu-bind=map_cpu:49,17,1,33 -N1 -n1 --gpus-per-node=8 profileme.sh -# srun --cpu-bind=map_cpu:49,17,1,33 -N4 -n16 --gpus-per-node=8 profileme.sh -export ROCR_VISIBLE_DEVICES=0,2,4,6 - -# julia --project benchmark_diffusion_3D.jl -julia --project --color=yes tm_stokes_mpi_wip.jl - -# ENABLE_JITPROFILING=1 rocprof --hip-trace --hsa-trace -d ./prof_out${SLURM_PROCID} -o ./prof_out${SLURM_PROCID}/results${SLURM_PROCID}.csv julia --project bench3d.jl diff --git a/scripts_future_API/runme.sh b/scripts_future_API/runme.sh index 45ed56ab..f7c803f1 100755 --- a/scripts_future_API/runme.sh +++ b/scripts_future_API/runme.sh @@ -1,14 +1,23 @@ #!/bin/bash -module load LUMI/22.08 -module load partition/G -module load rocm/5.3.3 +source /users/lurass/scratch/setenv_lumi.sh +# module load LUMI/22.08 +# module load partition/G +# module load rocm/5.3.3 # ROCm-aware MPI set to 1, else 0 export MPICH_GPU_SUPPORT_ENABLED=1 -export IGG_ROCMAWARE_MPI=1 -# Needs to know about location of GTL lib -export LD_PRELOAD=${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so +## basic +# srun --cpu-bind=map_cpu:49,57,17,25,1,9,33,41 -N1 -n8 --gpus-per-node=8 profileme.sh -julia --project rocmaware.jl \ No newline at end of file +## optimal using only single GCD per MI250x Module +# srun --cpu-bind=map_cpu:49,17,1,33 -N1 -n1 --gpus-per-node=8 profileme.sh +# srun --cpu-bind=map_cpu:49,17,1,33 -N4 -n16 --gpus-per-node=8 profileme.sh +export ROCR_VISIBLE_DEVICES=0,2,4,6 + +# julia --project benchmark_diffusion_3D.jl +julia --project --color=yes tm_stokes_mpi_wip.jl + +# Profiling +# ENABLE_JITPROFILING=1 rocprof --hip-trace --hsa-trace -d ./prof_out${SLURM_PROCID} -o ./prof_out${SLURM_PROCID}/results${SLURM_PROCID}.csv julia --project bench3d.jl diff --git a/scripts_future_API/submit.sh b/scripts_future_API/submit.sh index 1014ee59..77ff13fe 100644 --- a/scripts_future_API/submit.sh +++ b/scripts_future_API/submit.sh @@ -2,14 +2,17 @@ #SBATCH --job-name="FastIce3D" #SBATCH --output=FastIce3D.%j.o #SBATCH --error=FastIce3D.%j.e -#SBATCH --time=00:05:00 -#SBATCH --nodes=16 -#SBATCH --ntasks=64 -# #SBATCH --ntasks-per-node=8 +#SBATCH --time=00:10:00 +#SBATCH --nodes=4 +#SBATCH --ntasks=16 +# #SBATCH --ntasks-per-node=8 # this somehow fails... #SBATCH --gpus-per-node=8 #SBATCH --partition=standard-g #SBATCH --account project_465000557 -# export ROCR_VISIBLE_DEVICES=0,2,4,6 +# CPU_BIND="map_cpu:49,57,17,25,1,9,33,41" -srun --cpu-bind=map_cpu:49,17,1,33 ./profileme.sh +# export ROCR_VISIBLE_DEVICES=0,2,4,6 # -> done in runme.sh +CPU_BIND="map_cpu:49,17,1,33" + +srun --cpu-bind=${CPU_BIND} ./runme.sh diff --git a/scripts_future_API/tm_stokes_mpi_wip.jl b/scripts_future_API/tm_stokes_mpi_wip.jl index 7f1e8ca1..347fe608 100644 --- a/scripts_future_API/tm_stokes_mpi_wip.jl +++ b/scripts_future_API/tm_stokes_mpi_wip.jl @@ -36,7 +36,7 @@ function main(; do_visu=false, do_save=false) MPI.Init() backend = ROCBackend() - dims = (2, 1, 1) + dims = (4, 2, 2) # dims = (4, 2, 2) # dims = (2, 1, 1) topo = CartesianTopology(dims) diff --git a/scripts_future_API/tm_stokes_wip.jl b/scripts_future_API/tm_stokes_wip.jl index 97cc14ff..ca12b24f 100644 --- a/scripts_future_API/tm_stokes_wip.jl +++ b/scripts_future_API/tm_stokes_wip.jl @@ -15,6 +15,7 @@ const SBC = BoundaryCondition{Slip} using LinearAlgebra, Printf using KernelAbstractions # using CUDA +# using AMDGPU using CairoMakie # using GLMakie