From b4f75351877a2da2edcced198874813a6424cdbd Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Oct 2023 14:37:08 -0400 Subject: [PATCH 01/16] Fix build on Frontier. --- EXPERIMENT.md | 21 ++++++++++++++++++++- build_all.sh | 23 +++++++---------------- get_deps.sh | 40 +++++++++++++++++----------------------- realm/main.cc | 2 +- realm_old/main.cc | 2 +- realm_subgraph/main.cc | 2 +- 6 files changed, 47 insertions(+), 43 deletions(-) diff --git a/EXPERIMENT.md b/EXPERIMENT.md index 661ded7e..56a7f952 100644 --- a/EXPERIMENT.md +++ b/EXPERIMENT.md @@ -6,10 +6,29 @@ with Task Bench.** Corresponding authors: * Elliott Slaughter - * Wei Wu + * Wei Wu ## Instructions for Specific Machines +### Frontier + +Note: This configuration is obsolete and is provided for documentation +purposes only. + +``` +git clone https://github.com/StanfordLegion/task-bench.git +cd task-bench +USE_GASNET=1 LEGION_GASNET_CONDUIT=ofi LEGION_GASNET_SYSTEM=slingshot11 ./get_deps.sh +THREADS=32 ./build_all.sh +cd experiment/frontier_metg_compute +sbatch --nodes 1 metg_legion.sh +``` + +## Deprecated or Obsolete Machines + +Note: These configurations are obsolete and are provided for documentation +purposes only. + ### Cori ``` diff --git a/build_all.sh b/build_all.sh index 5490166e..3dd38d02 100755 --- a/build_all.sh +++ b/build_all.sh @@ -19,11 +19,6 @@ else fi THREADS=${THREADS:-$DEFAULT_THREADS} -# On Cray machines, default to static build. (Cori switched this -# default from static to dynamic in the January 2020 maintenance -# cycle, but we want to stick with static builds.) -export CRAYPE_LINK_TYPE=static - make -C core clean make -C core -j$THREADS @@ -106,23 +101,19 @@ if [[ $USE_REGENT -eq 1 ]]; then fi unset LG_RT_DIR if [[ -z $GITHUB_ACTIONS ]]; then + export CONDUIT=$LEGION_GASNET_CONDUIT${LEGION_GASNET_SYSTEM+-}$LEGION_GASNET_SYSTEM ./scripts/setup_env.py -j$THREADS else ./install.py --rdir=auto fi ) popd - ( - if [[ -n $CRAYPE_VERSION ]]; then - export CC=gcc CXX=g++ - fi - SHARD_SIZE=30 make -C regent -j$THREADS & - sleep 1 - SHARD_SIZE=15 make -C regent -j$THREADS & - sleep 1 - SHARD_SIZE=14 make -C regent -j$THREADS & - wait - ) + SHARD_SIZE=30 make -C regent -j$THREADS & + sleep 1 + SHARD_SIZE=15 make -C regent -j$THREADS & + sleep 1 + SHARD_SIZE=14 make -C regent -j$THREADS & + wait fi if [[ $USE_LEGION -eq 1 ]]; then make -C legion -j$THREADS diff --git a/get_deps.sh b/get_deps.sh index 3923a62b..dc84bb1b 100755 --- a/get_deps.sh +++ b/get_deps.sh @@ -18,6 +18,8 @@ TASKBENCH_ROOT_DIR="\$(dirname "\$TASKBENCH_DEPS_DIR")" EOF +# Machine-specific settings. On Cray systems, we can use a common set of +# settings, but other machines require specific configuration. if [[ $(hostname --fqdn) = *"summit"* ]]; then cat >>deps/env.sh <>deps/env.sh <>deps/env.sh <>deps/env.sh <>deps/env.sh <>deps/env.sh + git clone https://github.com/StanfordLegion/gasnet.git "$GASNET_DIR" fi @@ -227,7 +221,7 @@ EOF elif [[ $USE_GASNET -eq 1 ]]; then cat >>deps/env.sh < -#include +#include #include #include diff --git a/realm_old/main.cc b/realm_old/main.cc index 15e467be..a75f210e 100644 --- a/realm_old/main.cc +++ b/realm_old/main.cc @@ -16,7 +16,7 @@ #include "main.h" #include -#include +#include #include #include diff --git a/realm_subgraph/main.cc b/realm_subgraph/main.cc index bad67350..ad78febb 100644 --- a/realm_subgraph/main.cc +++ b/realm_subgraph/main.cc @@ -16,7 +16,7 @@ #include "main.h" #include -#include +#include #include #include From 2fcc6e7519812e6d9d537cd21f8fcaf7035ca973 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 00:32:31 -0400 Subject: [PATCH 02/16] More fixes and adjustments for Frontier. --- .gitignore | 1 + build_all.sh | 12 ++++++------ scripts/chart_util.py | 9 +++++++++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index b332b3da..f861040b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .DS_Store *.swp *.a +*.d *.o *.so *.dSYM diff --git a/build_all.sh b/build_all.sh index 3dd38d02..78fe6c4f 100755 --- a/build_all.sh +++ b/build_all.sh @@ -78,9 +78,9 @@ if [[ $USE_PYGION -eq 1 ]]; then make -C pygion clean fi if [[ $USE_REGENT -eq 1 ]]; then - SHARD_SIZE=30 make -C regent clean - SHARD_SIZE=15 make -C regent clean - SHARD_SIZE=14 make -C regent clean + SHARD_SIZE=54 make -C regent clean + SHARD_SIZE=26 make -C regent clean + SHARD_SIZE=12 make -C regent clean fi if [[ $USE_REALM -eq 1 ]]; then make -C realm clean @@ -108,11 +108,11 @@ if [[ $USE_REGENT -eq 1 ]]; then fi ) popd - SHARD_SIZE=30 make -C regent -j$THREADS & + SHARD_SIZE=54 make -C regent -j$THREADS & sleep 1 - SHARD_SIZE=15 make -C regent -j$THREADS & + SHARD_SIZE=26 make -C regent -j$THREADS & sleep 1 - SHARD_SIZE=14 make -C regent -j$THREADS & + SHARD_SIZE=12 make -C regent -j$THREADS & wait fi if [[ $USE_LEGION -eq 1 ]]; then diff --git a/scripts/chart_util.py b/scripts/chart_util.py index ecc01d2c..1c760922 100644 --- a/scripts/chart_util.py +++ b/scripts/chart_util.py @@ -49,6 +49,15 @@ def get_machine_parameters(machine, processor_kind, resource): assert False else: assert False + elif machine == 'frontier': + assert processor_kind == 'cpu' + if resource == 'flops': + return {'cores': 56, 'peak_flops': 2.341191e+12, 'peak_bytes': None} + elif resource == 'bytes': + assert False # TODO: need to complete experiments + # return {'cores': 56, 'peak_flops': None, 'peak_bytes': 7.902120e+10} + else: + assert False else: assert False From 42943e61a5d6ab49a2638bf16f8427a601793487 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 00:33:21 -0400 Subject: [PATCH 03/16] More fixes. --- test_all.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_all.sh b/test_all.sh index 6f65ae0f..1839428b 100755 --- a/test_all.sh +++ b/test_all.sh @@ -131,8 +131,8 @@ fi if [[ $USE_REGENT -eq 1 ]]; then for t in trivial no_comm stencil_1d stencil_1d_periodic nearest "spread -period 2" random_nearest all_to_all; do # FIXME: dom tree fft for k in "${kernels[@]}"; do - ./regent/main.shard15 -steps $steps -type $t $k -ll:io 1 - ./regent/main.shard15 -steps $steps -type $t $k -ll:io 1 -ll:cpu 2 + ./regent/main.shard12 -steps $steps -type $t $k -ll:io 1 + ./regent/main.shard12 -steps $steps -type $t $k -ll:io 1 -ll:cpu 2 # FIXME: Regent doesn't support multiple graphs done done From d9698be32b99f8ee2397ae042f95b0c39cd23b47 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 02:41:52 -0400 Subject: [PATCH 04/16] Frontier scripts. --- .../frontier_metg_compute/metg_legion.sh | 69 +++++++++++++++++++ .../frontier_metg_compute/metg_legion_quad.sh | 69 +++++++++++++++++++ .../metg_legion_socket.sh | 69 +++++++++++++++++++ experiments/frontier_metg_compute/metg_mpi.sh | 43 ++++++++++++ .../frontier_metg_compute/metg_regent.sh | 69 +++++++++++++++++++ .../frontier_metg_compute/metg_regent_quad.sh | 69 +++++++++++++++++++ .../metg_regent_socket.sh | 69 +++++++++++++++++++ 7 files changed, 457 insertions(+) create mode 100644 experiments/frontier_metg_compute/metg_legion.sh create mode 100644 experiments/frontier_metg_compute/metg_legion_quad.sh create mode 100644 experiments/frontier_metg_compute/metg_legion_socket.sh create mode 100644 experiments/frontier_metg_compute/metg_mpi.sh create mode 100644 experiments/frontier_metg_compute/metg_regent.sh create mode 100644 experiments/frontier_metg_compute/metg_regent_quad.sh create mode 100644 experiments/frontier_metg_compute/metg_regent_socket.sh diff --git a/experiments/frontier_metg_compute/metg_legion.sh b/experiments/frontier_metg_compute/metg_legion.sh new file mode 100644 index 00000000..9ede83c2 --- /dev/null +++ b/experiments/frontier_metg_compute/metg_legion.sh @@ -0,0 +1,69 @@ +#!/bin/bash +#SBATCH --account=CHM137 +#SBATCH --partition=batch +#SBATCH --time=01:00:00 +#SBATCH --mail-type=ALL + +total_cores=56 +cores=$(( $total_cores - 2 )) + +function launch_util_0 { + memoize="-dm:memoize -lg:parallel_replay $cores" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $1 -N $1 --cpus-per-task=$(( total_cores )) --cpu_bind none $srun_flags ../../legion/task_bench "${@:2}" -fields 2 -ll:cpu $cores -ll:util 0 $memoize +} + +function launch_util_1 { + memoize="-dm:memoize" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $1 -N $1 --cpus-per-task=$(( total_cores )) --cpu_bind none $srun_flags ../../legion/task_bench "${@:2}" -fields 2 -ll:cpu $cores -ll:util 1 -ll:pin_util $memoize +} + +function launch_util_2 { + memoize="-dm:memoize" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $1 -N $1 --cpus-per-task=$(( total_cores )) --cpu_bind none $srun_flags ../../legion/task_bench "${@:2}" -fields 2 -ll:cpu $cores -ll:util 2 $memoize +} + +function repeat { + local -n result=$1 + local n=$2 + result=() + for i in $(seq 1 $n); do + result+=("${@:3}") + if (( i < n )); then + result+=("-and") + fi + done +} + +function sweep { + for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do + for rep in 0 1 2 3 4; do + if [[ $rep -le $s ]]; then + local args + repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores )) + $1 $2 "${args[@]}" + fi + done + done +} + +for n in $SLURM_JOB_NUM_NODES; do + for g in ${NGRAPHS:-1}; do + for t in ${PATTERN:-stencil_1d}; do + sweep launch_util_0 $n $g $t > legion_util_0_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > legion_util_1_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_2 $n $g $t > legion_util_2_ngraphs_${g}_type_${t}_nodes_${n}.log + done + done +done diff --git a/experiments/frontier_metg_compute/metg_legion_quad.sh b/experiments/frontier_metg_compute/metg_legion_quad.sh new file mode 100644 index 00000000..b8aa4073 --- /dev/null +++ b/experiments/frontier_metg_compute/metg_legion_quad.sh @@ -0,0 +1,69 @@ +#!/bin/bash +#SBATCH --account=CHM137 +#SBATCH --partition=batch +#SBATCH --time=01:00:00 +#SBATCH --mail-type=ALL + +total_cores=56 +cores=$(( $total_cores - 8 )) + +function launch_util_0 { + memoize="-dm:memoize -lg:parallel_replay $(( cores / 4 ))" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 4 )) -N $1 --cpus-per-task=$(( total_cores / 4 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 4 )) -ll:io 1 -ll:util 0 -lg:replay_on_cpus $memoize -lg:window 8192 -fields 2 +} + +function launch_util_1 { + memoize="-dm:memoize" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 4 )) -N $1 --cpus-per-task=$(( total_cores / 4 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 4 )) -ll:io 1 -ll:util 1 -ll:pin_util $memoize -lg:window 8192 -fields 2 +} + +function launch_util_2 { + memoize="-dm:memoize -lg:parallel_replay 2" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 4 )) -N $1 --cpus-per-task=$(( total_cores / 4 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 4 )) -ll:util 2 $memoize -lg:window 8192 -fields 2 +} + +function repeat { + local -n result=$1 + local n=$2 + result=() + for i in $(seq 1 $n); do + result+=("${@:3}") + if (( i < n )); then + result+=("-and") + fi + done +} + +function sweep { + for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do + for rep in 0 1 2 3 4; do + if [[ $rep -le $s ]]; then + local args + repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores )) + $1 $2 "${args[@]}" + fi + done + done +} + +for n in $SLURM_JOB_NUM_NODES; do + for g in ${NGRAPHS:-1}; do + for t in ${PATTERN:-stencil_1d}; do + # sweep launch_util_0 $n $g $t > legion_util_0_quad_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > legion_util_1_quad_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_2 $n $g $t > legion_util_2_quad_ngraphs_${g}_type_${t}_nodes_${n}.log + done + done +done diff --git a/experiments/frontier_metg_compute/metg_legion_socket.sh b/experiments/frontier_metg_compute/metg_legion_socket.sh new file mode 100644 index 00000000..428d1aa0 --- /dev/null +++ b/experiments/frontier_metg_compute/metg_legion_socket.sh @@ -0,0 +1,69 @@ +#!/bin/bash +#SBATCH --account=CHM137 +#SBATCH --partition=batch +#SBATCH --time=01:00:00 +#SBATCH --mail-type=ALL + +total_cores=56 +cores=$(( $total_cores - 4 )) + +function launch_util_0 { + memoize="-dm:memoize -lg:parallel_replay $(( cores / 2 ))" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 2 )) -ll:io 1 -ll:util 0 -lg:replay_on_cpus $memoize -lg:window 8192 -fields 2 +} + +function launch_util_1 { + memoize="-dm:memoize" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 2 )) -ll:io 1 -ll:util 1 -ll:pin_util $memoize -lg:window 8192 -fields 2 +} + +function launch_util_2 { + memoize="-dm:memoize -lg:parallel_replay 2" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 2 )) -ll:util 2 $memoize -lg:window 8192 -fields 2 +} + +function repeat { + local -n result=$1 + local n=$2 + result=() + for i in $(seq 1 $n); do + result+=("${@:3}") + if (( i < n )); then + result+=("-and") + fi + done +} + +function sweep { + for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do + for rep in 0 1 2 3 4; do + if [[ $rep -le $s ]]; then + local args + repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores )) + $1 $2 "${args[@]}" + fi + done + done +} + +for n in $SLURM_JOB_NUM_NODES; do + for g in ${NGRAPHS:-1}; do + for t in ${PATTERN:-stencil_1d}; do + # sweep launch_util_0 $n $g $t > legion_util_0_socket_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > legion_util_1_socket_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_2 $n $g $t > legion_util_2_socket_ngraphs_${g}_type_${t}_nodes_${n}.log + done + done +done diff --git a/experiments/frontier_metg_compute/metg_mpi.sh b/experiments/frontier_metg_compute/metg_mpi.sh new file mode 100644 index 00000000..0219aa31 --- /dev/null +++ b/experiments/frontier_metg_compute/metg_mpi.sh @@ -0,0 +1,43 @@ +#!/bin/bash +#SBATCH --account=CHM137 +#SBATCH --partition=batch +#SBATCH --time=01:00:00 +#SBATCH --mail-type=ALL + +cores=56 + +function launch { + srun -n $(( $1 * cores )) -N $1 --ntasks-per-node=$cores --cpus-per-task=1 --cpu_bind cores ../../mpi/$VARIANT "${@:2}" +} + +function repeat { + local -n result=$1 + local n=$2 + result=() + for i in $(seq 1 $n); do + result+=("${@:3}") + if (( i < n )); then + result+=("-and") + fi + done +} + +function sweep { + for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21; do + for rep in 0 1 2 3 4; do + if [[ $rep -le $s ]]; then + local args + repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores )) + $1 $2 "${args[@]}" + fi + done + done +} + +for n in $SLURM_JOB_NUM_NODES; do + for g in ${NGRAPHS:-1}; do + for t in ${PATTERN:-stencil_1d}; do + sweep launch $n $g $t > mpi_${VARIANT}_ngraphs_${g}_type_${t}_nodes_${n}.log + done + done +done diff --git a/experiments/frontier_metg_compute/metg_regent.sh b/experiments/frontier_metg_compute/metg_regent.sh new file mode 100644 index 00000000..febce3f0 --- /dev/null +++ b/experiments/frontier_metg_compute/metg_regent.sh @@ -0,0 +1,69 @@ +#!/bin/bash +#SBATCH --account=CHM137 +#SBATCH --partition=batch +#SBATCH --time=01:00:00 +#SBATCH --mail-type=ALL + +total_cores=56 +cores=$(( $total_cores - 2 )) + +function launch_util_0 { + memoize="-dm:memoize -lg:parallel_replay $cores" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $1 -N $1 --cpus-per-task=$(( total_cores )) --cpu_bind none $srun_flags ../../regent${VARIANT+_}$VARIANT/main.shard$cores "${@:2}" -ll:cpu $cores -ll:io 1 -ll:util 0 -lg:replay_on_cpus $memoize # -scratch 64 +} + +function launch_util_1 { + memoize="-dm:memoize" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $1 -N $1 --cpus-per-task=$(( total_cores )) --cpu_bind none $srun_flags ../../regent${VARIANT+_}$VARIANT/main.shard$cores "${@:2}" -ll:cpu $cores -ll:io 1 -ll:util 1 $memoize # -scratch 64 +} + +function launch_util_2 { + memoize="-dm:memoize -lg:parallel_replay 2" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $1 -N $1 --cpus-per-task=$(( total_cores )) --cpu_bind none $srun_flags ../../regent${VARIANT+_}$VARIANT/main.shard$cores "${@:2}" -ll:cpu $cores -ll:io 1 -ll:util 2 $memoize # -scratch 64 +} + +function repeat { + local -n result=$1 + local n=$2 + result=() + for i in $(seq 1 $n); do + result+=("${@:3}") + if (( i < n )); then + result+=("-and") + fi + done +} + +function sweep { + for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do + for rep in 0 1 2 3 4; do + if [[ $rep -le $s ]]; then + local args + repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores )) + $1 $2 "${args[@]}" + fi + done + done +} + +for n in $SLURM_JOB_NUM_NODES; do + for g in ${NGRAPHS:-1}; do + for t in ${PATTERN:-stencil_1d}; do + # sweep launch_util_0 $n $g $t > regent${VARIANT+_}${VARIANT}_util_0_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > regent${VARIANT+_}${VARIANT_}_util_1_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_2 $n $g $t > regent${VARIANT+_}${VARIANT}_util_2_ngraphs_${g}_type_${t}_nodes_${n}.log + done + done +done diff --git a/experiments/frontier_metg_compute/metg_regent_quad.sh b/experiments/frontier_metg_compute/metg_regent_quad.sh new file mode 100644 index 00000000..8dd36f17 --- /dev/null +++ b/experiments/frontier_metg_compute/metg_regent_quad.sh @@ -0,0 +1,69 @@ +#!/bin/bash +#SBATCH --account=CHM137 +#SBATCH --partition=batch +#SBATCH --time=01:00:00 +#SBATCH --mail-type=ALL + +total_cores=56 +cores=$(( $total_cores - 8 )) + +function launch_util_0 { + memoize="-dm:memoize -lg:parallel_replay $(( cores / 4 ))" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 4 )) -N $1 --cpus-per-task=$(( total_cores / 4 )) --cpu_bind cores $srun_flags ../../regent${VARIANT+_}$VARIANT/main.shard$(( cores / 4 )) "${@:2}" -ll:cpu $(( cores / 4 )) -ll:io 1 -ll:util 0 -lg:replay_on_cpus $memoize # -scratch 64 -lg:window 8192 +} + +function launch_util_1 { + memoize="-dm:memoize" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 4 )) -N $1 --cpus-per-task=$(( total_cores / 4 )) --cpu_bind cores $srun_flags ../../regent${VARIANT+_}$VARIANT/main.shard$(( cores / 4 )) "${@:2}" -ll:cpu $(( cores / 4 )) -ll:io 1 -ll:util 1 $memoize # -scratch 64 -lg:window 8192 +} + +function launch_util_2 { + memoize="-dm:memoize -lg:parallel_replay 2" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 4 )) -N $1 --cpus-per-task=$(( total_cores / 4 )) --cpu_bind cores $srun_flags ../../regent${VARIANT+_}$VARIANT/main.shard$(( cores / 4 )) "${@:2}" -ll:cpu $(( cores / 4 )) -ll:util 2 $memoize # -scratch 64 -lg:window 8192 +} + +function repeat { + local -n result=$1 + local n=$2 + result=() + for i in $(seq 1 $n); do + result+=("${@:3}") + if (( i < n )); then + result+=("-and") + fi + done +} + +function sweep { + for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do + for rep in 0 1 2 3 4; do + if [[ $rep -le $s ]]; then + local args + repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores )) + $1 $2 "${args[@]}" + fi + done + done +} + +for n in $SLURM_JOB_NUM_NODES; do + for g in ${NGRAPHS:-1}; do + for t in ${PATTERN:-stencil_1d}; do + # sweep launch_util_0 $n $g $t > regent${VARIANT+_}${VARIANT}_util_0_quad_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > regent${VARIANT+_}${VARIANT}_util_1_quad_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_2 $n $g $t > regent${VARIANT+_}${VARIANT}_util_2_quad_ngraphs_${g}_type_${t}_nodes_${n}.log + done + done +done diff --git a/experiments/frontier_metg_compute/metg_regent_socket.sh b/experiments/frontier_metg_compute/metg_regent_socket.sh new file mode 100644 index 00000000..1a3d4efd --- /dev/null +++ b/experiments/frontier_metg_compute/metg_regent_socket.sh @@ -0,0 +1,69 @@ +#!/bin/bash +#SBATCH --account=CHM137 +#SBATCH --partition=batch +#SBATCH --time=01:00:00 +#SBATCH --mail-type=ALL + +total_cores=56 +cores=$(( $total_cores - 4 )) + +function launch_util_0 { + memoize="-dm:memoize -lg:parallel_replay $(( cores / 2 ))" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../regent${VARIANT+_}$VARIANT/main.shard$(( cores / 2 )) "${@:2}" -ll:cpu $(( cores / 2 )) -ll:io 1 -ll:util 0 -lg:replay_on_cpus $memoize # -scratch 64 -lg:window 8192 +} + +function launch_util_1 { + memoize="-dm:memoize" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../regent${VARIANT+_}$VARIANT/main.shard$(( cores / 2 )) "${@:2}" -ll:cpu $(( cores / 2 )) -ll:io 1 -ll:util 1 $memoize # -scratch 64 -lg:window 8192 +} + +function launch_util_2 { + memoize="-dm:memoize -lg:parallel_replay 2" + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../regent${VARIANT+_}$VARIANT/main.shard$(( cores / 2 )) "${@:2}" -ll:cpu $(( cores / 2 )) -ll:util 2 $memoize # -scratch 64 -lg:window 8192 +} + +function repeat { + local -n result=$1 + local n=$2 + result=() + for i in $(seq 1 $n); do + result+=("${@:3}") + if (( i < n )); then + result+=("-and") + fi + done +} + +function sweep { + for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do + for rep in 0 1 2 3 4; do + if [[ $rep -le $s ]]; then + local args + repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores )) + $1 $2 "${args[@]}" + fi + done + done +} + +for n in $SLURM_JOB_NUM_NODES; do + for g in ${NGRAPHS:-1}; do + for t in ${PATTERN:-stencil_1d}; do + # sweep launch_util_0 $n $g $t > regent${VARIANT+_}${VARIANT}_util_0_socket_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > regent${VARIANT+_}${VARIANT}_util_1_socket_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_2 $n $g $t > regent${VARIANT+_}${VARIANT}_util_2_socket_ngraphs_${g}_type_${t}_nodes_${n}.log + done + done +done From c60681926cbf74e80dcf04e75f1b0365d0e72452 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 12:53:40 -0400 Subject: [PATCH 05/16] Scripts for Realm. --- .../frontier_metg_compute/metg_realm.sh | 48 +++++++++++++++++++ .../frontier_metg_compute/metg_realm_quad.sh | 48 +++++++++++++++++++ .../metg_realm_socket.sh | 48 +++++++++++++++++++ 3 files changed, 144 insertions(+) create mode 100644 experiments/frontier_metg_compute/metg_realm.sh create mode 100644 experiments/frontier_metg_compute/metg_realm_quad.sh create mode 100644 experiments/frontier_metg_compute/metg_realm_socket.sh diff --git a/experiments/frontier_metg_compute/metg_realm.sh b/experiments/frontier_metg_compute/metg_realm.sh new file mode 100644 index 00000000..7034e77b --- /dev/null +++ b/experiments/frontier_metg_compute/metg_realm.sh @@ -0,0 +1,48 @@ +#!/bin/bash +#SBATCH --account=CHM137 +#SBATCH --partition=batch +#SBATCH --time=01:00:00 +#SBATCH --mail-type=ALL + +total_cores=56 +cores=$(( $total_cores - 2 )) + +function launch { + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $1 -N $1 --cpus-per-task=$total_cores --cpu_bind none $srun_flags ../../realm${VARIANT+_}$VARIANT/task_bench "${@:2}" -ll:cpu $cores -field 6 -ll:util 0 -ll:rsize 512 +} + +function repeat { + local -n result=$1 + local n=$2 + result=() + for i in $(seq 1 $n); do + result+=("${@:3}") + if (( i < n )); then + result+=("-and") + fi + done +} + +function sweep { + for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do + for rep in 0 1 2 3 4; do + if [[ $rep -le $s ]]; then + local args + repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores )) + $1 $2 "${args[@]}" + fi + done + done +} + +for n in $SLURM_JOB_NUM_NODES; do + for g in ${NGRAPHS:-1}; do + for t in ${PATTERN:-stencil_1d}; do + sweep launch $n $g $t > realm${VARIANT+_}${VARIANT}_ngraphs_${g}_type_${t}_nodes_${n}.log + done + done +done diff --git a/experiments/frontier_metg_compute/metg_realm_quad.sh b/experiments/frontier_metg_compute/metg_realm_quad.sh new file mode 100644 index 00000000..b382b7b1 --- /dev/null +++ b/experiments/frontier_metg_compute/metg_realm_quad.sh @@ -0,0 +1,48 @@ +#!/bin/bash +#SBATCH --account=CHM137 +#SBATCH --partition=batch +#SBATCH --time=01:00:00 +#SBATCH --mail-type=ALL + +total_cores=56 +cores=$(( $total_cores - 4 )) + +function launch { + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 4 )) -N $1 --cpus-per-task=$(( total_cores / 4 )) --cpu_bind cores $srun_flags ../../realm${VARIANT+_}$VARIANT/task_bench "${@:2}" -field 6 -ll:cpu $(( cores / 4 )) -ll:util 0 -ll:rsize 512 +} + +function repeat { + local -n result=$1 + local n=$2 + result=() + for i in $(seq 1 $n); do + result+=("${@:3}") + if (( i < n )); then + result+=("-and") + fi + done +} + +function sweep { + for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do + for rep in 0 1 2 3 4; do + if [[ $rep -le $s ]]; then + local args + repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores )) + $1 $2 "${args[@]}" + fi + done + done +} + +for n in $SLURM_JOB_NUM_NODES; do + for g in ${NGRAPHS:-1}; do + for t in ${PATTERN:-stencil_1d}; do + sweep launch $n $g $t > realm${VARIANT+_}${VARIANT}_quad_ngraphs_${g}_type_${t}_nodes_${n}.log + done + done +done diff --git a/experiments/frontier_metg_compute/metg_realm_socket.sh b/experiments/frontier_metg_compute/metg_realm_socket.sh new file mode 100644 index 00000000..e814d0aa --- /dev/null +++ b/experiments/frontier_metg_compute/metg_realm_socket.sh @@ -0,0 +1,48 @@ +#!/bin/bash +#SBATCH --account=CHM137 +#SBATCH --partition=batch +#SBATCH --time=01:00:00 +#SBATCH --mail-type=ALL + +total_cores=56 +cores=$(( $total_cores - 2 )) + +function launch { + srun_flags= + if (( $1 == 1 )); then + srun_flags="--network=single_node_vni" + fi + srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../realm${VARIANT+_}$VARIANT/task_bench "${@:2}" -field 6 -ll:cpu $(( cores / 2 )) -ll:util 0 -ll:rsize 512 +} + +function repeat { + local -n result=$1 + local n=$2 + result=() + for i in $(seq 1 $n); do + result+=("${@:3}") + if (( i < n )); then + result+=("-and") + fi + done +} + +function sweep { + for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do + for rep in 0 1 2 3 4; do + if [[ $rep -le $s ]]; then + local args + repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores )) + $1 $2 "${args[@]}" + fi + done + done +} + +for n in $SLURM_JOB_NUM_NODES; do + for g in ${NGRAPHS:-1}; do + for t in ${PATTERN:-stencil_1d}; do + sweep launch $n $g $t > realm${VARIANT+_}${VARIANT}_socket_ngraphs_${g}_type_${t}_nodes_${n}.log + done + done +done From fa6e4bfc69e3c34a875359923e751790df19eed8 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Oct 2023 21:51:52 -0700 Subject: [PATCH 06/16] Modernize matplotlib code. --- scripts/render_metg.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/render_metg.py b/scripts/render_metg.py index eb285d8c..bc159e2f 100755 --- a/scripts/render_metg.py +++ b/scripts/render_metg.py @@ -158,12 +158,10 @@ def csv2rec(filename): if args.y_invert: ax.invert_yaxis() -if args.xlog and args.ylog: - plt.loglog(basex=args.xbase) -elif args.xlog: - plt.semilogx(basex=args.xbase) -elif args.ylog: - plt.semilogy() +if args.xlog: + ax.set_xscale("log", base=args.xbase) +if args.ylog: + ax.set_yscale("log", base=10) data = csv2rec(args.filename) nodes = getattr(data, args.xdata) From 2cf6ea28cd76b846fe4e2d59978ea30e34900e19 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 10:33:37 -0700 Subject: [PATCH 07/16] Update experiment instructions. --- EXPERIMENT.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/EXPERIMENT.md b/EXPERIMENT.md index 56a7f952..3865fcc5 100644 --- a/EXPERIMENT.md +++ b/EXPERIMENT.md @@ -12,9 +12,6 @@ Corresponding authors: ### Frontier -Note: This configuration is obsolete and is provided for documentation -purposes only. - ``` git clone https://github.com/StanfordLegion/task-bench.git cd task-bench From 15a7d9dbfac3703ef2af1b43d2753c6cd3d6f3f9 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 18:46:45 -0400 Subject: [PATCH 08/16] Enable control replication of top-level and work task. --- regent/main.rg | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/regent/main.rg b/regent/main.rg index 90868946..34cdec0c 100644 --- a/regent/main.rg +++ b/regent/main.rg @@ -17,6 +17,20 @@ import "regent" local c = regentlib.c local core = terralib.includec("core_c.h") +-- Hacks to allow control replication +core.app_create.replicable = true +core.app_display.replicable = true +core.app_report_timing.replicable = true +core.app_task_graphs.replicable = true +core.interval_list_destroy.replicable = true +core.interval_list_interval.replicable = true +core.interval_list_num_intervals.replicable = true +core.task_graph_dependencies.replicable = true +core.task_graph_list_num_task_graphs.replicable = true +core.task_graph_list_task_graph.replicable = true +core.task_graph_max_dependence_sets.replicable = true +core.task_graph_timestep_period.replicable = true + do local root_dir = arg[0]:match(".*/") or "./" @@ -324,6 +338,7 @@ terra make_secondary_partition(graph : core.task_graph_t, dset : int, input : in end return c.legion_logical_partition_create(runtime, r, ip) end +make_secondary_partition.replicable = true function gcd(a, b) while b ~= 0 do @@ -510,7 +525,9 @@ local work_task = terralib.memoize(function(n_graphs, n_dsets, max_inputs) end) end actions:insert(rquote - core.app_report_timing(app, double(stop_time - start_time)/1e9) + if regentlib.c.legion_context_get_shard_id(__runtime(), __context(), true) == 0 then + core.app_report_timing(app, double(stop_time - start_time)/1e9) + end end) return actions end @@ -526,10 +543,12 @@ local work_task = terralib.memoize(function(n_graphs, n_dsets, max_inputs) local main_loop_actions = generate_main_loop(graphs, primary_partitions, secondary_partitions, pscratch, ptiming) local report_actions = generate_report(app, graphs, timing) - local __demand(__inner) task w() + local __demand(__inner, __replicable) task w() var args = c.legion_runtime_get_input_args() var [app] = core.app_create(args.argc, args.argv) - core.app_display(app) + if regentlib.c.legion_context_get_shard_id(__runtime(), __context(), true) == 0 then + core.app_display(app) + end var [graph_list] = core.app_task_graphs(app); [graph_actions]; @@ -685,7 +704,7 @@ function dispatch_work_task(n_graphs, n_dsets, max_inputs) return actions end -__demand(__inner) +__demand(__inner, __replicable) task main() var args = c.legion_runtime_get_input_args() var app = core.app_create(args.argc, args.argv) From 5365a634157340dd3114bbe924c3e9985b0991a0 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 18:52:06 -0400 Subject: [PATCH 09/16] Fix Legion control replication code. --- legion/main.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/legion/main.cc b/legion/main.cc index a4f8a53e..6505b439 100644 --- a/legion/main.cc +++ b/legion/main.cc @@ -603,8 +603,7 @@ LegionApp::LegionApp(Runtime *runtime, Context ctx) void LegionApp::run() { - // FIXME (Elliott): Do this correctly for control replication - if (runtime->get_executing_processor(ctx).address_space() == 0) { + if (runtime->get_shard_id(ctx, true) == 0) { display(); } @@ -623,7 +622,7 @@ void LegionApp::run() unsigned long long stop = Realm::Clock::current_time_in_nanoseconds(); double elapsed = (stop - start) / 1e9; - if (runtime->get_executing_processor(ctx).address_space() == 0) { + if (runtime->get_shard_id(ctx, true) == 0) { report_timing(elapsed); } } From bf4e3aaa3de68519dad82b46a6f743b10431a073 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 15:53:24 -0700 Subject: [PATCH 10/16] Update mapper to control replicate two levels. --- regent/mapper.cc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/regent/mapper.cc b/regent/mapper.cc index 9ea6d883..16f6455d 100644 --- a/regent/mapper.cc +++ b/regent/mapper.cc @@ -29,6 +29,9 @@ class TaskBenchMapper : public DefaultMapper public: TaskBenchMapper(MapperRuntime *rt, Machine machine, Processor local, const char *mapper_name); + virtual void select_task_options(const MapperContext ctx, + const Task &task, + TaskOptions &output); virtual void default_policy_rank_processor_kinds( MapperContext ctx, const Task &task, std::vector &ranking); @@ -40,6 +43,16 @@ TaskBenchMapper::TaskBenchMapper(MapperRuntime *rt, Machine machine, Processor l { } +void TaskBenchMapper::select_task_options(const MapperContext ctx, + const Task &task, + TaskOptions &output) +{ + DefaultMapper::select_task_options(ctx, task, output); + // Replicate top two levels of tasks + if ((total_nodes > 1) && (task.get_depth() <= 1)) + output.replicate = replication_enabled; +} + void TaskBenchMapper::default_policy_rank_processor_kinds(MapperContext ctx, const Task &task, std::vector &ranking) { From 50d1f339611d1341d0eacbf615003b4110f33d54 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 19:21:14 -0400 Subject: [PATCH 11/16] Fix NIC binding in scripts. --- .../{metg_legion.sh => metg_legion_rank1.sh} | 6 +++--- .../{metg_legion_socket.sh => metg_legion_rank2.sh} | 11 ++++++++--- .../{metg_legion_quad.sh => metg_legion_rank4.sh} | 12 +++++++++--- .../{metg_realm.sh => metg_realm_rank1.sh} | 0 .../{metg_realm_quad.sh => metg_realm_rank4.sh} | 6 ++++++ .../{metg_realm_socket.sh => metg_realm_ranks2.sh} | 5 +++++ .../{metg_regent.sh => metg_regent_rank1.sh} | 6 +++--- .../{metg_regent_socket.sh => metg_regent_rank2.sh} | 11 ++++++++--- .../{metg_regent_quad.sh => metg_regent_rank4.sh} | 12 +++++++++--- 9 files changed, 51 insertions(+), 18 deletions(-) rename experiments/frontier_metg_compute/{metg_legion.sh => metg_legion_rank1.sh} (86%) rename experiments/frontier_metg_compute/{metg_legion_socket.sh => metg_legion_rank2.sh} (80%) rename experiments/frontier_metg_compute/{metg_legion_quad.sh => metg_legion_rank4.sh} (81%) rename experiments/frontier_metg_compute/{metg_realm.sh => metg_realm_rank1.sh} (100%) rename experiments/frontier_metg_compute/{metg_realm_quad.sh => metg_realm_rank4.sh} (89%) rename experiments/frontier_metg_compute/{metg_realm_socket.sh => metg_realm_ranks2.sh} (87%) rename experiments/frontier_metg_compute/{metg_regent.sh => metg_regent_rank1.sh} (92%) rename experiments/frontier_metg_compute/{metg_regent_socket.sh => metg_regent_rank2.sh} (86%) rename experiments/frontier_metg_compute/{metg_regent_quad.sh => metg_regent_rank4.sh} (87%) diff --git a/experiments/frontier_metg_compute/metg_legion.sh b/experiments/frontier_metg_compute/metg_legion_rank1.sh similarity index 86% rename from experiments/frontier_metg_compute/metg_legion.sh rename to experiments/frontier_metg_compute/metg_legion_rank1.sh index 9ede83c2..de35627e 100644 --- a/experiments/frontier_metg_compute/metg_legion.sh +++ b/experiments/frontier_metg_compute/metg_legion_rank1.sh @@ -61,9 +61,9 @@ function sweep { for n in $SLURM_JOB_NUM_NODES; do for g in ${NGRAPHS:-1}; do for t in ${PATTERN:-stencil_1d}; do - sweep launch_util_0 $n $g $t > legion_util_0_ngraphs_${g}_type_${t}_nodes_${n}.log - # sweep launch_util_1 $n $g $t > legion_util_1_ngraphs_${g}_type_${t}_nodes_${n}.log - # sweep launch_util_2 $n $g $t > legion_util_2_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_0 $n $g $t > legion_util_0_rank1_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > legion_util_1_rank1_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_2 $n $g $t > legion_util_2_rank1_ngraphs_${g}_type_${t}_nodes_${n}.log done done done diff --git a/experiments/frontier_metg_compute/metg_legion_socket.sh b/experiments/frontier_metg_compute/metg_legion_rank2.sh similarity index 80% rename from experiments/frontier_metg_compute/metg_legion_socket.sh rename to experiments/frontier_metg_compute/metg_legion_rank2.sh index 428d1aa0..774f31fc 100644 --- a/experiments/frontier_metg_compute/metg_legion_socket.sh +++ b/experiments/frontier_metg_compute/metg_legion_rank2.sh @@ -7,6 +7,11 @@ total_cores=56 cores=$(( $total_cores - 4 )) +export GASNET_OFI_DEVICE_TYPE=Node +export GASNET_OFI_DEVICE_0=dummy # https://gasnet-bugs.lbl.gov/bugzilla/show_bug.cgi?id=4669 +export GASNET_OFI_DEVICE_0_1=cxi1 +export GASNET_OFI_DEVICE_2_3=cxi0 + function launch_util_0 { memoize="-dm:memoize -lg:parallel_replay $(( cores / 2 ))" srun_flags= @@ -61,9 +66,9 @@ function sweep { for n in $SLURM_JOB_NUM_NODES; do for g in ${NGRAPHS:-1}; do for t in ${PATTERN:-stencil_1d}; do - # sweep launch_util_0 $n $g $t > legion_util_0_socket_ngraphs_${g}_type_${t}_nodes_${n}.log - # sweep launch_util_1 $n $g $t > legion_util_1_socket_ngraphs_${g}_type_${t}_nodes_${n}.log - sweep launch_util_2 $n $g $t > legion_util_2_socket_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_0 $n $g $t > legion_util_0_rank2_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > legion_util_1_rank2_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_2 $n $g $t > legion_util_2_rank2_ngraphs_${g}_type_${t}_nodes_${n}.log done done done diff --git a/experiments/frontier_metg_compute/metg_legion_quad.sh b/experiments/frontier_metg_compute/metg_legion_rank4.sh similarity index 81% rename from experiments/frontier_metg_compute/metg_legion_quad.sh rename to experiments/frontier_metg_compute/metg_legion_rank4.sh index b8aa4073..fc248181 100644 --- a/experiments/frontier_metg_compute/metg_legion_quad.sh +++ b/experiments/frontier_metg_compute/metg_legion_rank4.sh @@ -7,6 +7,12 @@ total_cores=56 cores=$(( $total_cores - 8 )) +export GASNET_OFI_DEVICE_TYPE=Node +export GASNET_OFI_DEVICE_0=cxi2 +export GASNET_OFI_DEVICE_1=cxi1 +export GASNET_OFI_DEVICE_2=cxi3 +export GASNET_OFI_DEVICE_3=cxi0 + function launch_util_0 { memoize="-dm:memoize -lg:parallel_replay $(( cores / 4 ))" srun_flags= @@ -61,9 +67,9 @@ function sweep { for n in $SLURM_JOB_NUM_NODES; do for g in ${NGRAPHS:-1}; do for t in ${PATTERN:-stencil_1d}; do - # sweep launch_util_0 $n $g $t > legion_util_0_quad_ngraphs_${g}_type_${t}_nodes_${n}.log - # sweep launch_util_1 $n $g $t > legion_util_1_quad_ngraphs_${g}_type_${t}_nodes_${n}.log - sweep launch_util_2 $n $g $t > legion_util_2_quad_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_0 $n $g $t > legion_util_0_rank4_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > legion_util_1_rank4_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_2 $n $g $t > legion_util_2_rank4_ngraphs_${g}_type_${t}_nodes_${n}.log done done done diff --git a/experiments/frontier_metg_compute/metg_realm.sh b/experiments/frontier_metg_compute/metg_realm_rank1.sh similarity index 100% rename from experiments/frontier_metg_compute/metg_realm.sh rename to experiments/frontier_metg_compute/metg_realm_rank1.sh diff --git a/experiments/frontier_metg_compute/metg_realm_quad.sh b/experiments/frontier_metg_compute/metg_realm_rank4.sh similarity index 89% rename from experiments/frontier_metg_compute/metg_realm_quad.sh rename to experiments/frontier_metg_compute/metg_realm_rank4.sh index b382b7b1..b61ad332 100644 --- a/experiments/frontier_metg_compute/metg_realm_quad.sh +++ b/experiments/frontier_metg_compute/metg_realm_rank4.sh @@ -7,6 +7,12 @@ total_cores=56 cores=$(( $total_cores - 4 )) +export GASNET_OFI_DEVICE_TYPE=Node +export GASNET_OFI_DEVICE_0=cxi2 +export GASNET_OFI_DEVICE_1=cxi1 +export GASNET_OFI_DEVICE_2=cxi3 +export GASNET_OFI_DEVICE_3=cxi0 + function launch { srun_flags= if (( $1 == 1 )); then diff --git a/experiments/frontier_metg_compute/metg_realm_socket.sh b/experiments/frontier_metg_compute/metg_realm_ranks2.sh similarity index 87% rename from experiments/frontier_metg_compute/metg_realm_socket.sh rename to experiments/frontier_metg_compute/metg_realm_ranks2.sh index e814d0aa..9bf12f13 100644 --- a/experiments/frontier_metg_compute/metg_realm_socket.sh +++ b/experiments/frontier_metg_compute/metg_realm_ranks2.sh @@ -7,6 +7,11 @@ total_cores=56 cores=$(( $total_cores - 2 )) +export GASNET_OFI_DEVICE_TYPE=Node +export GASNET_OFI_DEVICE_0=dummy # https://gasnet-bugs.lbl.gov/bugzilla/show_bug.cgi?id=4669 +export GASNET_OFI_DEVICE_0_1=cxi1 +export GASNET_OFI_DEVICE_2_3=cxi0 + function launch { srun_flags= if (( $1 == 1 )); then diff --git a/experiments/frontier_metg_compute/metg_regent.sh b/experiments/frontier_metg_compute/metg_regent_rank1.sh similarity index 92% rename from experiments/frontier_metg_compute/metg_regent.sh rename to experiments/frontier_metg_compute/metg_regent_rank1.sh index febce3f0..b2cd5923 100644 --- a/experiments/frontier_metg_compute/metg_regent.sh +++ b/experiments/frontier_metg_compute/metg_regent_rank1.sh @@ -61,9 +61,9 @@ function sweep { for n in $SLURM_JOB_NUM_NODES; do for g in ${NGRAPHS:-1}; do for t in ${PATTERN:-stencil_1d}; do - # sweep launch_util_0 $n $g $t > regent${VARIANT+_}${VARIANT}_util_0_ngraphs_${g}_type_${t}_nodes_${n}.log - # sweep launch_util_1 $n $g $t > regent${VARIANT+_}${VARIANT_}_util_1_ngraphs_${g}_type_${t}_nodes_${n}.log - sweep launch_util_2 $n $g $t > regent${VARIANT+_}${VARIANT}_util_2_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_0 $n $g $t > regent${VARIANT+_}${VARIANT}_util_0_rank1_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > regent${VARIANT+_}${VARIANT_}_util_1_rank1_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_2 $n $g $t > regent${VARIANT+_}${VARIANT}_util_2_rank1_ngraphs_${g}_type_${t}_nodes_${n}.log done done done diff --git a/experiments/frontier_metg_compute/metg_regent_socket.sh b/experiments/frontier_metg_compute/metg_regent_rank2.sh similarity index 86% rename from experiments/frontier_metg_compute/metg_regent_socket.sh rename to experiments/frontier_metg_compute/metg_regent_rank2.sh index 1a3d4efd..150c82c0 100644 --- a/experiments/frontier_metg_compute/metg_regent_socket.sh +++ b/experiments/frontier_metg_compute/metg_regent_rank2.sh @@ -7,6 +7,11 @@ total_cores=56 cores=$(( $total_cores - 4 )) +export GASNET_OFI_DEVICE_TYPE=Node +export GASNET_OFI_DEVICE_0=dummy # https://gasnet-bugs.lbl.gov/bugzilla/show_bug.cgi?id=4669 +export GASNET_OFI_DEVICE_0_1=cxi1 +export GASNET_OFI_DEVICE_2_3=cxi0 + function launch_util_0 { memoize="-dm:memoize -lg:parallel_replay $(( cores / 2 ))" srun_flags= @@ -61,9 +66,9 @@ function sweep { for n in $SLURM_JOB_NUM_NODES; do for g in ${NGRAPHS:-1}; do for t in ${PATTERN:-stencil_1d}; do - # sweep launch_util_0 $n $g $t > regent${VARIANT+_}${VARIANT}_util_0_socket_ngraphs_${g}_type_${t}_nodes_${n}.log - # sweep launch_util_1 $n $g $t > regent${VARIANT+_}${VARIANT}_util_1_socket_ngraphs_${g}_type_${t}_nodes_${n}.log - sweep launch_util_2 $n $g $t > regent${VARIANT+_}${VARIANT}_util_2_socket_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_0 $n $g $t > regent${VARIANT+_}${VARIANT}_util_0_rank2_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > regent${VARIANT+_}${VARIANT}_util_1_rank2_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_2 $n $g $t > regent${VARIANT+_}${VARIANT}_util_2_rank2_ngraphs_${g}_type_${t}_nodes_${n}.log done done done diff --git a/experiments/frontier_metg_compute/metg_regent_quad.sh b/experiments/frontier_metg_compute/metg_regent_rank4.sh similarity index 87% rename from experiments/frontier_metg_compute/metg_regent_quad.sh rename to experiments/frontier_metg_compute/metg_regent_rank4.sh index 8dd36f17..17011a7b 100644 --- a/experiments/frontier_metg_compute/metg_regent_quad.sh +++ b/experiments/frontier_metg_compute/metg_regent_rank4.sh @@ -7,6 +7,12 @@ total_cores=56 cores=$(( $total_cores - 8 )) +export GASNET_OFI_DEVICE_TYPE=Node +export GASNET_OFI_DEVICE_0=cxi2 +export GASNET_OFI_DEVICE_1=cxi1 +export GASNET_OFI_DEVICE_2=cxi3 +export GASNET_OFI_DEVICE_3=cxi0 + function launch_util_0 { memoize="-dm:memoize -lg:parallel_replay $(( cores / 4 ))" srun_flags= @@ -61,9 +67,9 @@ function sweep { for n in $SLURM_JOB_NUM_NODES; do for g in ${NGRAPHS:-1}; do for t in ${PATTERN:-stencil_1d}; do - # sweep launch_util_0 $n $g $t > regent${VARIANT+_}${VARIANT}_util_0_quad_ngraphs_${g}_type_${t}_nodes_${n}.log - # sweep launch_util_1 $n $g $t > regent${VARIANT+_}${VARIANT}_util_1_quad_ngraphs_${g}_type_${t}_nodes_${n}.log - sweep launch_util_2 $n $g $t > regent${VARIANT+_}${VARIANT}_util_2_quad_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_0 $n $g $t > regent${VARIANT+_}${VARIANT}_util_0_rank4_ngraphs_${g}_type_${t}_nodes_${n}.log + # sweep launch_util_1 $n $g $t > regent${VARIANT+_}${VARIANT}_util_1_rank4_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch_util_2 $n $g $t > regent${VARIANT+_}${VARIANT}_util_2_rank4_ngraphs_${g}_type_${t}_nodes_${n}.log done done done From 99e3dfb13c9f705407434e09b0b2156f85e0f53c Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 19:22:46 -0400 Subject: [PATCH 12/16] Fix script name. --- .../{metg_realm_ranks2.sh => metg_realm_rank2.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename experiments/frontier_metg_compute/{metg_realm_ranks2.sh => metg_realm_rank2.sh} (100%) diff --git a/experiments/frontier_metg_compute/metg_realm_ranks2.sh b/experiments/frontier_metg_compute/metg_realm_rank2.sh similarity index 100% rename from experiments/frontier_metg_compute/metg_realm_ranks2.sh rename to experiments/frontier_metg_compute/metg_realm_rank2.sh From 1eba538266ee707f9d6b68dd265c2ffdcc7645c0 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Oct 2023 19:26:43 -0400 Subject: [PATCH 13/16] Fix scripts. --- experiments/frontier_metg_compute/metg_realm_rank1.sh | 2 +- experiments/frontier_metg_compute/metg_realm_rank2.sh | 2 +- experiments/frontier_metg_compute/metg_realm_rank4.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/experiments/frontier_metg_compute/metg_realm_rank1.sh b/experiments/frontier_metg_compute/metg_realm_rank1.sh index 7034e77b..df6a8672 100644 --- a/experiments/frontier_metg_compute/metg_realm_rank1.sh +++ b/experiments/frontier_metg_compute/metg_realm_rank1.sh @@ -42,7 +42,7 @@ function sweep { for n in $SLURM_JOB_NUM_NODES; do for g in ${NGRAPHS:-1}; do for t in ${PATTERN:-stencil_1d}; do - sweep launch $n $g $t > realm${VARIANT+_}${VARIANT}_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch $n $g $t > realm${VARIANT+_}${VARIANT}_rank1_ngraphs_${g}_type_${t}_nodes_${n}.log done done done diff --git a/experiments/frontier_metg_compute/metg_realm_rank2.sh b/experiments/frontier_metg_compute/metg_realm_rank2.sh index 9bf12f13..7ffceef2 100644 --- a/experiments/frontier_metg_compute/metg_realm_rank2.sh +++ b/experiments/frontier_metg_compute/metg_realm_rank2.sh @@ -47,7 +47,7 @@ function sweep { for n in $SLURM_JOB_NUM_NODES; do for g in ${NGRAPHS:-1}; do for t in ${PATTERN:-stencil_1d}; do - sweep launch $n $g $t > realm${VARIANT+_}${VARIANT}_socket_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch $n $g $t > realm${VARIANT+_}${VARIANT}_rank2_ngraphs_${g}_type_${t}_nodes_${n}.log done done done diff --git a/experiments/frontier_metg_compute/metg_realm_rank4.sh b/experiments/frontier_metg_compute/metg_realm_rank4.sh index b61ad332..d4757b95 100644 --- a/experiments/frontier_metg_compute/metg_realm_rank4.sh +++ b/experiments/frontier_metg_compute/metg_realm_rank4.sh @@ -48,7 +48,7 @@ function sweep { for n in $SLURM_JOB_NUM_NODES; do for g in ${NGRAPHS:-1}; do for t in ${PATTERN:-stencil_1d}; do - sweep launch $n $g $t > realm${VARIANT+_}${VARIANT}_quad_ngraphs_${g}_type_${t}_nodes_${n}.log + sweep launch $n $g $t > realm${VARIANT+_}${VARIANT}_rank4_ngraphs_${g}_type_${t}_nodes_${n}.log done done done From 76c6385a65c5adc092cf2e02c3b2d588bd59edba Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 13 Oct 2023 17:43:12 -0400 Subject: [PATCH 14/16] Inline work task for now to simplify DCR setup. --- regent/main.rg | 2 +- regent/mapper.cc | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/regent/main.rg b/regent/main.rg index 34cdec0c..ce31229d 100644 --- a/regent/main.rg +++ b/regent/main.rg @@ -543,7 +543,7 @@ local work_task = terralib.memoize(function(n_graphs, n_dsets, max_inputs) local main_loop_actions = generate_main_loop(graphs, primary_partitions, secondary_partitions, pscratch, ptiming) local report_actions = generate_report(app, graphs, timing) - local __demand(__inner, __replicable) task w() + local __demand(__inner, __replicable, __inline) task w() var args = c.legion_runtime_get_input_args() var [app] = core.app_create(args.argc, args.argv) if regentlib.c.legion_context_get_shard_id(__runtime(), __context(), true) == 0 then diff --git a/regent/mapper.cc b/regent/mapper.cc index 16f6455d..acaf4eb8 100644 --- a/regent/mapper.cc +++ b/regent/mapper.cc @@ -48,9 +48,9 @@ void TaskBenchMapper::select_task_options(const MapperContext ctx, TaskOptions &output) { DefaultMapper::select_task_options(ctx, task, output); - // Replicate top two levels of tasks - if ((total_nodes > 1) && (task.get_depth() <= 1)) - output.replicate = replication_enabled; + // // Replicate top two levels of tasks + // if ((total_nodes > 1) && (task.get_depth() <= 1)) + // output.replicate = replication_enabled; } void TaskBenchMapper::default_policy_rank_processor_kinds(MapperContext ctx, From cfb05add33eea3640aace80c911c899c1508850c Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 13 Oct 2023 15:30:38 -0700 Subject: [PATCH 15/16] Fixes for DCR. --- regent/main.rg | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/regent/main.rg b/regent/main.rg index ce31229d..67691832 100644 --- a/regent/main.rg +++ b/regent/main.rg @@ -525,8 +525,9 @@ local work_task = terralib.memoize(function(n_graphs, n_dsets, max_inputs) end) end actions:insert(rquote + var elapsed = double(stop_time - start_time)/1e9 if regentlib.c.legion_context_get_shard_id(__runtime(), __context(), true) == 0 then - core.app_report_timing(app, double(stop_time - start_time)/1e9) + core.app_report_timing(app, elapsed) end end) return actions @@ -543,7 +544,7 @@ local work_task = terralib.memoize(function(n_graphs, n_dsets, max_inputs) local main_loop_actions = generate_main_loop(graphs, primary_partitions, secondary_partitions, pscratch, ptiming) local report_actions = generate_report(app, graphs, timing) - local __demand(__inner, __replicable, __inline) task w() + local __demand(__inner, __replicable, __local) task w() var args = c.legion_runtime_get_input_args() var [app] = core.app_create(args.argc, args.argv) if regentlib.c.legion_context_get_shard_id(__runtime(), __context(), true) == 0 then From 743cc67cc5e251084ff7d4c38a203f12d55c8722 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 13 Oct 2023 20:11:19 -0400 Subject: [PATCH 16/16] More memory for Realm tests. --- experiments/frontier_metg_compute/metg_realm_rank1.sh | 2 +- experiments/frontier_metg_compute/metg_realm_rank2.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/experiments/frontier_metg_compute/metg_realm_rank1.sh b/experiments/frontier_metg_compute/metg_realm_rank1.sh index df6a8672..b1b85166 100644 --- a/experiments/frontier_metg_compute/metg_realm_rank1.sh +++ b/experiments/frontier_metg_compute/metg_realm_rank1.sh @@ -12,7 +12,7 @@ function launch { if (( $1 == 1 )); then srun_flags="--network=single_node_vni" fi - srun -n $1 -N $1 --cpus-per-task=$total_cores --cpu_bind none $srun_flags ../../realm${VARIANT+_}$VARIANT/task_bench "${@:2}" -ll:cpu $cores -field 6 -ll:util 0 -ll:rsize 512 + srun -n $1 -N $1 --cpus-per-task=$total_cores --cpu_bind none $srun_flags ../../realm${VARIANT+_}$VARIANT/task_bench "${@:2}" -ll:cpu $cores -field 6 -ll:util 0 -ll:rsize 2048 } function repeat { diff --git a/experiments/frontier_metg_compute/metg_realm_rank2.sh b/experiments/frontier_metg_compute/metg_realm_rank2.sh index 7ffceef2..0a8d5f0c 100644 --- a/experiments/frontier_metg_compute/metg_realm_rank2.sh +++ b/experiments/frontier_metg_compute/metg_realm_rank2.sh @@ -17,7 +17,7 @@ function launch { if (( $1 == 1 )); then srun_flags="--network=single_node_vni" fi - srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../realm${VARIANT+_}$VARIANT/task_bench "${@:2}" -field 6 -ll:cpu $(( cores / 2 )) -ll:util 0 -ll:rsize 512 + srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../realm${VARIANT+_}$VARIANT/task_bench "${@:2}" -field 6 -ll:cpu $(( cores / 2 )) -ll:util 0 -ll:rsize 1024 } function repeat {