Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix support for building on Frontier #96

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
.DS_Store
*.swp
*.a
*.d
*.o
*.so
*.dSYM
Expand Down
18 changes: 17 additions & 1 deletion EXPERIMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,26 @@ with Task Bench.**
Corresponding authors:

* Elliott Slaughter <[email protected]>
* Wei Wu <[email protected]>
* Wei Wu <[email protected]>

## Instructions for Specific Machines

### Frontier

```
git clone https://github.com/StanfordLegion/task-bench.git
cd task-bench
USE_GASNET=1 LEGION_GASNET_CONDUIT=ofi LEGION_GASNET_SYSTEM=slingshot11 ./get_deps.sh
THREADS=32 ./build_all.sh
cd experiment/frontier_metg_compute
sbatch --nodes 1 metg_legion.sh
```

## Deprecated or Obsolete Machines

Note: These configurations are obsolete and are provided for documentation
purposes only.

### Cori

```
Expand Down
29 changes: 10 additions & 19 deletions build_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,6 @@ else
fi
THREADS=${THREADS:-$DEFAULT_THREADS}

# On Cray machines, default to static build. (Cori switched this
# default from static to dynamic in the January 2020 maintenance
# cycle, but we want to stick with static builds.)
export CRAYPE_LINK_TYPE=static

make -C core clean
make -C core -j$THREADS

Expand Down Expand Up @@ -83,9 +78,9 @@ if [[ $USE_PYGION -eq 1 ]]; then
make -C pygion clean
fi
if [[ $USE_REGENT -eq 1 ]]; then
SHARD_SIZE=30 make -C regent clean
SHARD_SIZE=15 make -C regent clean
SHARD_SIZE=14 make -C regent clean
SHARD_SIZE=54 make -C regent clean
SHARD_SIZE=26 make -C regent clean
SHARD_SIZE=12 make -C regent clean
fi
if [[ $USE_REALM -eq 1 ]]; then
make -C realm clean
Expand All @@ -106,23 +101,19 @@ if [[ $USE_REGENT -eq 1 ]]; then
fi
unset LG_RT_DIR
if [[ -z $GITHUB_ACTIONS ]]; then
export CONDUIT=$LEGION_GASNET_CONDUIT${LEGION_GASNET_SYSTEM+-}$LEGION_GASNET_SYSTEM
./scripts/setup_env.py -j$THREADS
else
./install.py --rdir=auto
fi
)
popd
(
if [[ -n $CRAYPE_VERSION ]]; then
export CC=gcc CXX=g++
fi
SHARD_SIZE=30 make -C regent -j$THREADS &
sleep 1
SHARD_SIZE=15 make -C regent -j$THREADS &
sleep 1
SHARD_SIZE=14 make -C regent -j$THREADS &
wait
)
SHARD_SIZE=54 make -C regent -j$THREADS &
sleep 1
SHARD_SIZE=26 make -C regent -j$THREADS &
sleep 1
SHARD_SIZE=12 make -C regent -j$THREADS &
wait
fi
if [[ $USE_LEGION -eq 1 ]]; then
make -C legion -j$THREADS
Expand Down
69 changes: 69 additions & 0 deletions experiments/frontier_metg_compute/metg_legion_rank1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/bin/bash
#SBATCH --account=CHM137
#SBATCH --partition=batch
#SBATCH --time=01:00:00
#SBATCH --mail-type=ALL

total_cores=56
cores=$(( $total_cores - 2 ))

function launch_util_0 {
memoize="-dm:memoize -lg:parallel_replay $cores"
srun_flags=
if (( $1 == 1 )); then
srun_flags="--network=single_node_vni"
fi
srun -n $1 -N $1 --cpus-per-task=$(( total_cores )) --cpu_bind none $srun_flags ../../legion/task_bench "${@:2}" -fields 2 -ll:cpu $cores -ll:util 0 $memoize
}

function launch_util_1 {
memoize="-dm:memoize"
srun_flags=
if (( $1 == 1 )); then
srun_flags="--network=single_node_vni"
fi
srun -n $1 -N $1 --cpus-per-task=$(( total_cores )) --cpu_bind none $srun_flags ../../legion/task_bench "${@:2}" -fields 2 -ll:cpu $cores -ll:util 1 -ll:pin_util $memoize
}

function launch_util_2 {
memoize="-dm:memoize"
srun_flags=
if (( $1 == 1 )); then
srun_flags="--network=single_node_vni"
fi
srun -n $1 -N $1 --cpus-per-task=$(( total_cores )) --cpu_bind none $srun_flags ../../legion/task_bench "${@:2}" -fields 2 -ll:cpu $cores -ll:util 2 $memoize
}

function repeat {
local -n result=$1
local n=$2
result=()
for i in $(seq 1 $n); do
result+=("${@:3}")
if (( i < n )); then
result+=("-and")
fi
done
}

function sweep {
for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do
for rep in 0 1 2 3 4; do
if [[ $rep -le $s ]]; then
local args
repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores ))
$1 $2 "${args[@]}"
fi
done
done
}

for n in $SLURM_JOB_NUM_NODES; do
for g in ${NGRAPHS:-1}; do
for t in ${PATTERN:-stencil_1d}; do
sweep launch_util_0 $n $g $t > legion_util_0_rank1_ngraphs_${g}_type_${t}_nodes_${n}.log
# sweep launch_util_1 $n $g $t > legion_util_1_rank1_ngraphs_${g}_type_${t}_nodes_${n}.log
# sweep launch_util_2 $n $g $t > legion_util_2_rank1_ngraphs_${g}_type_${t}_nodes_${n}.log
done
done
done
74 changes: 74 additions & 0 deletions experiments/frontier_metg_compute/metg_legion_rank2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/bin/bash
#SBATCH --account=CHM137
#SBATCH --partition=batch
#SBATCH --time=01:00:00
#SBATCH --mail-type=ALL

total_cores=56
cores=$(( $total_cores - 4 ))

export GASNET_OFI_DEVICE_TYPE=Node
export GASNET_OFI_DEVICE_0=dummy # https://gasnet-bugs.lbl.gov/bugzilla/show_bug.cgi?id=4669
export GASNET_OFI_DEVICE_0_1=cxi1
export GASNET_OFI_DEVICE_2_3=cxi0

function launch_util_0 {
memoize="-dm:memoize -lg:parallel_replay $(( cores / 2 ))"
srun_flags=
if (( $1 == 1 )); then
srun_flags="--network=single_node_vni"
fi
srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 2 )) -ll:io 1 -ll:util 0 -lg:replay_on_cpus $memoize -lg:window 8192 -fields 2
}

function launch_util_1 {
memoize="-dm:memoize"
srun_flags=
if (( $1 == 1 )); then
srun_flags="--network=single_node_vni"
fi
srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 2 )) -ll:io 1 -ll:util 1 -ll:pin_util $memoize -lg:window 8192 -fields 2
}

function launch_util_2 {
memoize="-dm:memoize -lg:parallel_replay 2"
srun_flags=
if (( $1 == 1 )); then
srun_flags="--network=single_node_vni"
fi
srun -n $(( $1 * 2 )) -N $1 --cpus-per-task=$(( total_cores / 2 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 2 )) -ll:util 2 $memoize -lg:window 8192 -fields 2
}

function repeat {
local -n result=$1
local n=$2
result=()
for i in $(seq 1 $n); do
result+=("${@:3}")
if (( i < n )); then
result+=("-and")
fi
done
}

function sweep {
for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do
for rep in 0 1 2 3 4; do
if [[ $rep -le $s ]]; then
local args
repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores ))
$1 $2 "${args[@]}"
fi
done
done
}

for n in $SLURM_JOB_NUM_NODES; do
for g in ${NGRAPHS:-1}; do
for t in ${PATTERN:-stencil_1d}; do
# sweep launch_util_0 $n $g $t > legion_util_0_rank2_ngraphs_${g}_type_${t}_nodes_${n}.log
# sweep launch_util_1 $n $g $t > legion_util_1_rank2_ngraphs_${g}_type_${t}_nodes_${n}.log
sweep launch_util_2 $n $g $t > legion_util_2_rank2_ngraphs_${g}_type_${t}_nodes_${n}.log
done
done
done
75 changes: 75 additions & 0 deletions experiments/frontier_metg_compute/metg_legion_rank4.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash
#SBATCH --account=CHM137
#SBATCH --partition=batch
#SBATCH --time=01:00:00
#SBATCH --mail-type=ALL

total_cores=56
cores=$(( $total_cores - 8 ))

export GASNET_OFI_DEVICE_TYPE=Node
export GASNET_OFI_DEVICE_0=cxi2
export GASNET_OFI_DEVICE_1=cxi1
export GASNET_OFI_DEVICE_2=cxi3
export GASNET_OFI_DEVICE_3=cxi0

function launch_util_0 {
memoize="-dm:memoize -lg:parallel_replay $(( cores / 4 ))"
srun_flags=
if (( $1 == 1 )); then
srun_flags="--network=single_node_vni"
fi
srun -n $(( $1 * 4 )) -N $1 --cpus-per-task=$(( total_cores / 4 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 4 )) -ll:io 1 -ll:util 0 -lg:replay_on_cpus $memoize -lg:window 8192 -fields 2
}

function launch_util_1 {
memoize="-dm:memoize"
srun_flags=
if (( $1 == 1 )); then
srun_flags="--network=single_node_vni"
fi
srun -n $(( $1 * 4 )) -N $1 --cpus-per-task=$(( total_cores / 4 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 4 )) -ll:io 1 -ll:util 1 -ll:pin_util $memoize -lg:window 8192 -fields 2
}

function launch_util_2 {
memoize="-dm:memoize -lg:parallel_replay 2"
srun_flags=
if (( $1 == 1 )); then
srun_flags="--network=single_node_vni"
fi
srun -n $(( $1 * 4 )) -N $1 --cpus-per-task=$(( total_cores / 4 )) --cpu_bind cores $srun_flags ../../legion/task_bench "${@:2}" -ll:cpu $(( cores / 4 )) -ll:util 2 $memoize -lg:window 8192 -fields 2
}

function repeat {
local -n result=$1
local n=$2
result=()
for i in $(seq 1 $n); do
result+=("${@:3}")
if (( i < n )); then
result+=("-and")
fi
done
}

function sweep {
for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18; do
for rep in 0 1 2 3 4; do
if [[ $rep -le $s ]]; then
local args
repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores ))
$1 $2 "${args[@]}"
fi
done
done
}

for n in $SLURM_JOB_NUM_NODES; do
for g in ${NGRAPHS:-1}; do
for t in ${PATTERN:-stencil_1d}; do
# sweep launch_util_0 $n $g $t > legion_util_0_rank4_ngraphs_${g}_type_${t}_nodes_${n}.log
# sweep launch_util_1 $n $g $t > legion_util_1_rank4_ngraphs_${g}_type_${t}_nodes_${n}.log
sweep launch_util_2 $n $g $t > legion_util_2_rank4_ngraphs_${g}_type_${t}_nodes_${n}.log
done
done
done
43 changes: 43 additions & 0 deletions experiments/frontier_metg_compute/metg_mpi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash
#SBATCH --account=CHM137
#SBATCH --partition=batch
#SBATCH --time=01:00:00
#SBATCH --mail-type=ALL

cores=56

function launch {
srun -n $(( $1 * cores )) -N $1 --ntasks-per-node=$cores --cpus-per-task=1 --cpu_bind cores ../../mpi/$VARIANT "${@:2}"
}

function repeat {
local -n result=$1
local n=$2
result=()
for i in $(seq 1 $n); do
result+=("${@:3}")
if (( i < n )); then
result+=("-and")
fi
done
}

function sweep {
for s in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21; do
for rep in 0 1 2 3 4; do
if [[ $rep -le $s ]]; then
local args
repeat args $3 -kernel compute_bound -iter $(( 1 << (26-s) )) -type $4 -radix ${RADIX:-5} -steps ${STEPS:-1000} -width $(( $2 * cores ))
$1 $2 "${args[@]}"
fi
done
done
}

for n in $SLURM_JOB_NUM_NODES; do
for g in ${NGRAPHS:-1}; do
for t in ${PATTERN:-stencil_1d}; do
sweep launch $n $g $t > mpi_${VARIANT}_ngraphs_${g}_type_${t}_nodes_${n}.log
done
done
done
Loading
Loading