From 4153cefd464768a4e762fca1045b71296ec3b4ea Mon Sep 17 00:00:00 2001 From: tkoskela <12845296+tkoskela@users.noreply.github.com> Date: Mon, 25 Mar 2024 16:29:41 +0000 Subject: [PATCH] Cleanup docs for PR #297 --- preview/PR297/404.html | 1144 --- preview/PR297/apps/babelstream/index.html | 1381 ---- preview/PR297/apps/cp2k/index.html | 1366 ---- preview/PR297/apps/grid/index.html | 1355 ---- preview/PR297/apps/hpcg/index.html | 1255 --- preview/PR297/apps/hpgmg/index.html | 1190 --- preview/PR297/apps/hpl/index.html | 1368 ---- preview/PR297/apps/imb/index.html | 1201 --- preview/PR297/apps/index.html | 1172 --- preview/PR297/apps/legacy/castep/index.html | 1196 --- preview/PR297/apps/legacy/gromacs/index.html | 1212 --- preview/PR297/apps/legacy/hpcg/index.html | 1207 --- preview/PR297/apps/legacy/index.html | 1172 --- preview/PR297/apps/legacy/openfoam/index.html | 1189 --- preview/PR297/apps/legacy/sysinfo/index.html | 1173 --- preview/PR297/apps/omb/index.html | 1215 --- preview/PR297/apps/openmm/index.html | 1286 ---- preview/PR297/apps/ramses/index.html | 1343 ---- preview/PR297/apps/sombrero/index.html | 1228 --- preview/PR297/apps/sphng/index.html | 1332 ---- preview/PR297/apps/swift/index.html | 1190 --- preview/PR297/apps/trove-pdsyev/index.html | 1332 ---- preview/PR297/apps/trove/index.html | 1332 ---- preview/PR297/apps/wrf/index.html | 1339 ---- preview/PR297/assets/images/favicon.png | Bin 1870 -> 0 bytes .../assets/javascripts/bundle.bd41221c.min.js | 29 - .../javascripts/bundle.bd41221c.min.js.map | 7 - .../javascripts/lunr/min/lunr.ar.min.js | 1 - .../javascripts/lunr/min/lunr.da.min.js | 18 - .../javascripts/lunr/min/lunr.de.min.js | 18 - .../javascripts/lunr/min/lunr.du.min.js | 18 - .../javascripts/lunr/min/lunr.el.min.js | 1 - .../javascripts/lunr/min/lunr.es.min.js | 18 - .../javascripts/lunr/min/lunr.fi.min.js | 18 - .../javascripts/lunr/min/lunr.fr.min.js | 18 - .../javascripts/lunr/min/lunr.he.min.js | 1 - .../javascripts/lunr/min/lunr.hi.min.js | 1 - .../javascripts/lunr/min/lunr.hu.min.js | 18 - .../javascripts/lunr/min/lunr.hy.min.js | 1 - .../javascripts/lunr/min/lunr.it.min.js | 18 - .../javascripts/lunr/min/lunr.ja.min.js | 1 - .../javascripts/lunr/min/lunr.jp.min.js | 1 - .../javascripts/lunr/min/lunr.kn.min.js | 1 - .../javascripts/lunr/min/lunr.ko.min.js | 1 - .../javascripts/lunr/min/lunr.multi.min.js | 1 - .../javascripts/lunr/min/lunr.nl.min.js | 18 - .../javascripts/lunr/min/lunr.no.min.js | 18 - .../javascripts/lunr/min/lunr.pt.min.js | 18 - .../javascripts/lunr/min/lunr.ro.min.js | 18 - .../javascripts/lunr/min/lunr.ru.min.js | 18 - .../javascripts/lunr/min/lunr.sa.min.js | 1 - .../lunr/min/lunr.stemmer.support.min.js | 1 - .../javascripts/lunr/min/lunr.sv.min.js | 18 - .../javascripts/lunr/min/lunr.ta.min.js | 1 - .../javascripts/lunr/min/lunr.te.min.js | 1 - .../javascripts/lunr/min/lunr.th.min.js | 1 - .../javascripts/lunr/min/lunr.tr.min.js | 18 - .../javascripts/lunr/min/lunr.vi.min.js | 1 - .../javascripts/lunr/min/lunr.zh.min.js | 1 - .../PR297/assets/javascripts/lunr/tinyseg.js | 206 - .../PR297/assets/javascripts/lunr/wordcut.js | 6708 ----------------- .../workers/search.b8dbb3d2.min.js | 42 - .../workers/search.b8dbb3d2.min.js.map | 7 - .../assets/stylesheets/main.7e359304.min.css | 1 - .../stylesheets/main.7e359304.min.css.map | 1 - .../stylesheets/palette.06af60db.min.css | 1 - .../stylesheets/palette.06af60db.min.css.map | 1 - preview/PR297/contributing/index.html | 1351 ---- preview/PR297/index.html | 1284 ---- preview/PR297/install/index.html | 1469 ---- preview/PR297/post-processing/index.html | 1990 ----- preview/PR297/search/search_index.json | 1 - preview/PR297/setup/index.html | 1619 ---- preview/PR297/sitemap.xml | 158 - preview/PR297/sitemap.xml.gz | Bin 423 -> 0 bytes preview/PR297/systems/index.html | 1502 ---- ..._vs_number_of_tasks_and_CPUs_per_task.html | 61 - preview/PR297/tutorial/SombreroBenchmark.log | 13 - .../tutorial/post_processing_config.yaml | 25 - preview/PR297/tutorial/tutorial/index.html | 2707 ------- preview/PR297/use/index.html | 1467 ---- 81 files changed, 51115 deletions(-) delete mode 100644 preview/PR297/404.html delete mode 100644 preview/PR297/apps/babelstream/index.html delete mode 100644 preview/PR297/apps/cp2k/index.html delete mode 100644 preview/PR297/apps/grid/index.html delete mode 100644 preview/PR297/apps/hpcg/index.html delete mode 100644 preview/PR297/apps/hpgmg/index.html delete mode 100644 preview/PR297/apps/hpl/index.html delete mode 100644 preview/PR297/apps/imb/index.html delete mode 100644 preview/PR297/apps/index.html delete mode 100644 preview/PR297/apps/legacy/castep/index.html delete mode 100644 preview/PR297/apps/legacy/gromacs/index.html delete mode 100644 preview/PR297/apps/legacy/hpcg/index.html delete mode 100644 preview/PR297/apps/legacy/index.html delete mode 100644 preview/PR297/apps/legacy/openfoam/index.html delete mode 100644 preview/PR297/apps/legacy/sysinfo/index.html delete mode 100644 preview/PR297/apps/omb/index.html delete mode 100644 preview/PR297/apps/openmm/index.html delete mode 100644 preview/PR297/apps/ramses/index.html delete mode 100644 preview/PR297/apps/sombrero/index.html delete mode 100644 preview/PR297/apps/sphng/index.html delete mode 100644 preview/PR297/apps/swift/index.html delete mode 100644 preview/PR297/apps/trove-pdsyev/index.html delete mode 100644 preview/PR297/apps/trove/index.html delete mode 100644 preview/PR297/apps/wrf/index.html delete mode 100644 preview/PR297/assets/images/favicon.png delete mode 100644 preview/PR297/assets/javascripts/bundle.bd41221c.min.js delete mode 100644 preview/PR297/assets/javascripts/bundle.bd41221c.min.js.map delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.ar.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.da.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.de.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.du.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.el.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.es.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.fi.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.fr.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.he.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.hi.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.hu.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.hy.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.it.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.ja.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.jp.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.kn.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.ko.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.multi.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.nl.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.no.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.pt.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.ro.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.ru.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.sa.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.stemmer.support.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.sv.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.ta.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.te.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.th.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.tr.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.vi.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/min/lunr.zh.min.js delete mode 100644 preview/PR297/assets/javascripts/lunr/tinyseg.js delete mode 100644 preview/PR297/assets/javascripts/lunr/wordcut.js delete mode 100644 preview/PR297/assets/javascripts/workers/search.b8dbb3d2.min.js delete mode 100644 preview/PR297/assets/javascripts/workers/search.b8dbb3d2.min.js.map delete mode 100644 preview/PR297/assets/stylesheets/main.7e359304.min.css delete mode 100644 preview/PR297/assets/stylesheets/main.7e359304.min.css.map delete mode 100644 preview/PR297/assets/stylesheets/palette.06af60db.min.css delete mode 100644 preview/PR297/assets/stylesheets/palette.06af60db.min.css.map delete mode 100644 preview/PR297/contributing/index.html delete mode 100644 preview/PR297/index.html delete mode 100644 preview/PR297/install/index.html delete mode 100644 preview/PR297/post-processing/index.html delete mode 100644 preview/PR297/search/search_index.json delete mode 100644 preview/PR297/setup/index.html delete mode 100644 preview/PR297/sitemap.xml delete mode 100644 preview/PR297/sitemap.xml.gz delete mode 100644 preview/PR297/systems/index.html delete mode 100644 preview/PR297/tutorial/Performance_vs_number_of_tasks_and_CPUs_per_task.html delete mode 100644 preview/PR297/tutorial/SombreroBenchmark.log delete mode 100644 preview/PR297/tutorial/post_processing_config.yaml delete mode 100644 preview/PR297/tutorial/tutorial/index.html delete mode 100644 preview/PR297/use/index.html diff --git a/preview/PR297/404.html b/preview/PR297/404.html deleted file mode 100644 index 912c574a..00000000 --- a/preview/PR297/404.html +++ /dev/null @@ -1,1144 +0,0 @@ - - - -
- - - - - - - - - - - - - - -Measure memory transfer rates to/from global device memory on GPUs. This benchmark is similar in spirit, and based on, the STREAM benchmark [1] for CPUs. -Unlike other GPU memory bandwidth benchmarks this does not include the PCIe transfer time. -There are multiple implementations of this benchmark in a variety of programming models. -This code was previously called GPU-STREAM.
-From the top-level directory of the repository, you can run the benchmarks with
-reframe -c benchmarks/apps/babelstream -r --tag <TAG> --system=<ENV:PARTITION> -Sbuild_locally=false -Sspack_spec='babelstream +tag <extra flags>'
-
The Spack directives for the babelstream could be found here
-You can run individual benchmarks with the --tag
option:
omp
to run the OpenMP
benchmark.ocl
to run the OpenCL
benchmark.std
to run the STD
benchmark.std20
to run the STD20
benchmark.hip
to run the HIP
benchmark.cuda
to run the CUDA
benchmark.kokkos
to run the Kokkos
benchmark.sycl
to run the SYCL
benchmark.sycl2020
to run the SYCL2020
benchmark.acc
to run the ACC
benchmark.raja
to run the RAJA
benchmark.tbb
to run the TBB
benchmark.thrust
to run the THRUST
benchmark,Examples:
-reframe -c benchmarks/apps/babelstream -r --tag omp --system=isambard-macs:volta -S build_locally=false -S spack_spec='babelstream%gcc@9.2.0 +omp cuda_arch=70'
-reframe -c benchmarks/apps/babelstream -r --tag tbb --system=isambard-macs:cascadelake -S build_locally=false -S spack_spec='babelstream@develop +tbb'
-reframe -c benchmarks/apps/babelstream -r --tag cuda --system=isambard-macs:volta -S build_locally=false -S spack_spec='babelstream@develop%gcc@9.2.0 +cuda cuda_arch=70'
-
By default, these benchmarks will use
-num_gpus_per_node
](https://reframe-hpc.readthedocs.io/en/stable/regression_test_api.html#reframe.core.pipeline.RegressionTest.num_gpus_per_node: This value is by default 1 for the benchmarks requiring GPU. (e.g. CUDA,HIP) You can override the value of this variable from the command line with the
---setvar
-option, for example
reframe -c benchmarks/apps/babelstream -r --tag cuda --system=isambard-macs:volta -S build_locally=false -S spack_spec='babelstream@develop%gcc@9.2.0 +cuda cuda_arch=70' --setvar=num_gpus_per_node=2
-
Note: you're responsible for overriding this variable in a consistent
-way, so that, for example, num_gpus_per_node
doesn't exceed the number of
-total GPUs runnable on each node.
The figure of merit captured by these benchmarks is the bandwidth. -For example, if the output of the program is
-BabelStream
-Version: 4.0
-Implementation: OpenMP
-Running kernels 100 times
-Precision: double
-Array size: 268.4 MB (=0.3 GB)
-Total size: 805.3 MB (=0.8 GB)
-Function MBytes/sec Min (sec) Max Average
-Copy 91018.241 0.00590 0.01087 0.00721
-Mul 80014.622 0.00671 0.01173 0.00837
-Add 92644.967 0.00869 0.01636 0.01121
-Triad 93484.396 0.00861 0.01416 0.01142
-Dot 114688.364 0.00468 0.01382 0.00707
-
the output numbers
- -will be captured.
- - - - - - - - - - - - - -CP2K is a quantum chemistry and solid state physics software package.
-This directory includes the H2O-64
, H20-256
, and LiH_HFX
CP2K benchmarks based on ARCHER 2 HPC benchmarks.
From the top-level directory of the repository, you can run the benchmarks with
- -By default all benchmarks will be run.
-You can run individual benchmarks with the --tag
option:
h2o-64
to run the H2O-64
benchmark,h2o-256
to run the H2O-256
benchmark,lih-hfx
to run the LiH_HFX
benchmark.Examples:
-reframe -c benchmarks/apps/cp2k -r --performance-report --tag h2o-64
-reframe -c benchmarks/apps/cp2k -r --performance-report --tag h2o-256
-reframe -c benchmarks/apps/cp2k -r --performance-report --tag lih-hfx
-
By default, these benchmarks will use
-num_cpus_per_task
:
- 2num_tasks
:
- current_partition.processor.num_cpus // min(1, current_partition.processor.num_cpus_per_core) // num_cpus_per_task
num_tasks_per_node
: current_partition.processor.num_cpus // num_cpus_per_task
You can override the values of these variables from the command line with the
---setvar
-option, for example
reframe -c benchmarks/apps/cp2k -r --performance-report --setvar=num_cpus_per_task=4 --setvar=num_tasks=16
-
Note: you're responsible for overriding these variables in a consistent
-way, so that, for example, num_tasks_per_node
doesn't exceed the number of
-total tasks runnable on each node.
The figure of merit captured by these benchmarks is the maximum total CP2K time. -For example, if the output of the program is
- -------------------------------------------------------------------------------
- - -
- - T I M I N G -
- - -
- -------------------------------------------------------------------------------
- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME
- MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM
- CP2K 1 1.0 0.178 0.295 200.814 200.816
- qs_energies 1 2.0 0.000 0.000 200.091 200.093
- scf_env_do_scf 1 3.0 0.000 0.000 198.017 198.018
- qs_ks_update_qs_env 8 5.0 0.000 0.000 161.422 161.440
- rebuild_ks_matrix 7 6.0 0.000 0.000 161.419 161.437
- qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 161.419 161.437
- hfx_ks_matrix 7 8.0 0.000 0.000 154.464 154.495
-
the number 200.816
will be captured.
ReFrame benchmarks for the GRID code, a data -parallel C++ mathematical object library.
-From the top-level directory of the repository, you can run the benchmarks with
- -You can run individual benchmarks with the
---tag
-option. At the moment we have the following tags:
ITT
to run the Benchmark_ITT
application.Examples:
- -There are some options you can set to control the settings of the benchmark. -These are the currently available options, with their default values:
-mpi
: '1.1.1.1'
. This is the string to pass to the benchmarking applications with the
- --mpi
flag. This will also automatically set the ReFrame variable
- num_tasks
num_cpus_per_task
:
- current_partition.processor.num_cpus // min(1, current_partition.processor.num_cpus_per_core)
num_tasks_per_node
:
- current_partition.processor.num_cpus // num_cpus_per_task
shm
: 1024
. This is the size of the shared memory used by the benchmark, in MiB, as an
- integer.You can override the values of these variables from the command line with the
---setvar
-option, for example
reframe -c benchmarks/apps/grid -r --performance-report --setvar=mpi='2.2.1.1' --setvar=num_cpus_per_task=12
-reframe -c benchmarks/apps/grid -r --performance-report --setvar=mpi='4.4.4.4' --setvar=shm=4096
-
Note: you're responsible for overriding these variables in a consistent
-way, so that, for example, num_tasks_per_node
doesn't exceed the number of
-total tasks runnable on each node.
If the output of the program contains
- -the number 143382.7
will be captured as figure of merit.
These are based upon the HPCG Conjugate Gradient solver benchmark. -At the time of writing, there are three benchmarks in the suite: the original implementation, one which solves the same problem with a hard-coded stencil, and one -which solves a different problem with an LFRic stencil and data.
-From the top-level directory of the repository, you can run the benchmarks with
- -You can use the -n/--name
argument to pick HPCG_Original / HPCG_Stencil / HPCG_LFRic
to select a particular benchmark.
-Alternatively, if you want to compare the two implementations of the 27 point stencil problem (Original and Stencil), you can filter by tag -t 27pt_stencil
.
This app is currently intended to be parallelized with MPI, and it is recommended to use the --system
argument to pick up the appropriate hardware details, as well as Spack libraries.
See hpgmg
- - - - - - - - - - - - - -Run Intel optimised HPL tests on one and all nodes.
-This uses the pre-built binaries supplied with Intel's MKL package. -Note: Intel MPI is also required.
-By default the intel-mkl
and intel-mpi
Spack recipes will be used.
-If these packages are already available on the system you are using and the Spack environment knows about them, the system packages will be automatically used, otherwise Spack will download and install them for you.
If you want to use the oneAPI distribution of MKL and MPI, pass --setvar spack_spec="intel-oneapi-mkl ^intel-oneapi-mpi"
as additional argument to the ReFrame invocation (see below).
-As usual, if these packages are available in the system and the Spack environment knows about them, those packages will be used.
HPL.dat
configuration filesAppropriate HPL.dat
configuration files must be generated and placed in <repo_root>/benchmarks/apps/hpl/<sysname>/<number of tasks>
, if not already available.
-ReFrame will copy these files into the staging directories before running a test, so changes made to these files will persist and apply to the next run.
Hints:
-pstree
and top
appear as expected.Note: not all systems have appropriate input data, or not for the number of tasks you want to run, so you may have to create the HPL.DAT
file yourself.
If you want to use an HPL.dat
file in a different directory, you can pass --setvar config_dir=<DIRECTORY>
as additional argument to the ReFrame invocation (see below), where <DIRECTORY>
is the absolute path of the directory where HPL.dat
is.
Run using e.g.:
- -You can set the number of nodes and tasks per node to use by setting the following variables:
-num_tasks_per_node
(default = 1)num_tasks
(default = 1)For example
-reframe -c benchmarks/apps/hpl --run --performance-report --setvar num_tasks=4 # 4 MPI ranks
-reframe -c benchmarks/apps/hpl --run --performance-report --setvar num_tasks=8 --setvar num_tasks_per_node=2 # 8 MPI ranks, 2 for each node (for a total of 4 nodes)
-
The ReFrame performance variable is:
-Gflops
: The performance.https://software.intel.com/en-us/imb-user-guide
-Builds automatically using spack.
-Runs the following MPI1 tests using Intel MPI and OpenMPI:
-The following tags are defined:
-This directory contains the benchmarks currently supported by the project. More can be added by opening a Pull Request following the guidance in contributing.
- - - - - - - - - - - - - -Performance tests of the material property code CASTEP using CASTEP-provided benchmarks: -- Small benchmark: TiN
---a 32-atom TiN surface, with an adsorbed H atom and a vacuum gap. There are 8 k-points, so it should scale well to 8 cores; beyond that, it relies on CASTEP's other parallelisation strategies. -- Medium benchmark: Al3x3 -a 270-atom sapphire surface, with a vacuum gap. There are only 2 k-points, so it is a good test of the performance of CASTEP's other parallelisation strategies. -- Large benchmark: DNA -a 1356-atom simulation of a DNA strand (poly-A) with counter-ions, in a large simulation box. There is only 1 k-point (the gamma-point), so, like the Crambin test, its parallel performance is reliant on CASTEP's other parallelisation strategies.
-
(descriptions from the CASTEP benchmarks page)
-Each benchmark is run on a range of number of nodes, from 1 up to all available. Each run uses as many mpi tasks (processes) per node as there are physical cores.
-The following performance variables are captured:
-- 'total_time' (s): Total time required for the simulation, as reported by CASTEP
-- 'peak_mem' (kB): Peak memory usage, as reported by CASTEP
-- 'parallel_efficiency' (%): Parallel efficiency, as reported by CASTEP
-- 'runtime_real' (s): Wallclock time reported by time
for entire MPI program start to finish (i.e. may include additional setup/teardown time not captured in 'total_time').
Run using e.g.:
-cd hpc-tests
-conda activate hpc-tests
-reframe/bin/reframe -C reframe_config.py -c apps/castep/ --run --performance-report
-
Run a specific test by appending e.g.:
---tag Al3x3
-
Performance tests of the Gromacs molecular dynamics code http://www.gromacs.org/ using benchmarks from HECBioSim: http://www.hecbiosim.ac.uk/benchmarks: - - 61K atom system - 1WDN Glutamine-Binding Protein - - 1.4M atom system - A Pair of hEGFR Dimers of 1IVO and 1NQL - - 3M atom system - A Pair of hEGFR tetramers of 1IVO and 1NQL
-NB: Gromacs 2016 is required due to the benchmark file used.
-Each benchmark is run on a range of number of nodes, from 1 up to all available. Each run uses as many mpi tasks (processes) per node as there are physical cores, and the default Gromacs -ntomp
OpenMP setting, which appears to add threads to use all cores (physical or logical). For further information on Gromacs parallelisation schemes see here and here.
The following performance variables are captured:
-- 'ns_per_day': Nano-seconds of simulation completed per day
-- 'hour_per_ns': Hours required per nano-second of simulation
-- 'core_t' (s): Gromacs-reported CPU time (sum for all processes)
-- 'wall_t' (s): Gromacs-reported wall-clock time
-- 'runtime_real' (s): Wallclock time reported by time
for entire MPI program start to finish (i.e. may include additional setup/teardown time not captured in 'wall_t').
NB: The Gromacs docs recommend using fftw
. While this is available as the OpenHPC package fftw-gnu8-openmpi3-ohpc
the Gromacs docs recommend letting Gromacs install its own.
This assumes e.g.: - - cmake - - gnu8-compilers-ohpc - - openmpi3-gnu8-ohpc
-wget http://ftp.gromacs.org/pub/gromacs/gromacs-2016.4.tar.gz
-tar -xf gromacs-2016.4.tar.gz
-cd gromacs-2016.4
-mkdir build_mpi
-cd build_mpi
-module load gnu8 openmpi3
-cmake ../ -DGMX_MPI=ON -DGMX_OPENMP=ON -DGMX_GPU=OFF -DGMX_X11=OFF -DGMX_DOUBLE=OFF -DGMX_BUILD_OWN_FFTW=ON -DREGRESSIONTEST_DOWNLOAD=ON -DCMAKE_INSTALL_PREFIX=<wherever>
-make
-make check
-make install # to DCMAKE_INSTALL_PREFIX above
-
Install gromacs@2016
with default variants.
See note in main README re. usage of spack with existing mpi library installations.
-Run using e.g.:
-cd hpc-tests
-conda activate hpc-tests
-reframe/bin/reframe -C reframe_config.py -c apps/gromacs/ --run --performance-report
-
Or for example, to run only the 3000k atom case on 1 node only for a single partition:
-reframe/bin/reframe -C reframe_config.py -c apps/gromacs/ --run --performance-report --system alaska:ib-gcc9-openmpi4-ucx --tag 'num_nodes=1$' --tag '3000k-atoms'
-
This runs an Intel version of the High Performance Conjugate Gradient Benchmark optimised for Intel Xeon processors and linked against using Intel's MKL.
-Documentation is here
-It follows recommendations for performance given in Intel docs
-Prebuilt binaries are provided with MKL - tests here use AVX2-specific binaries so assume system is AVX2-capable.
-E.g.:
-spack load patch
-spack load gcc@9
-spack install intel-mpi %gcc@9: # DONE
-spack install intel-mkl %gcc@9: # NB this doesn't have threads enabled here/by default
-
Note that the executables are provided within the MKL installation directory, e.g.:
-$HOME/spack/opt/spack/linux-centos7-broadwell/gcc-9.3.0/intel-mkl-2020.1.217-5tpgp7bze633d4bybvvumfp2nhyg64xf/compilers_and_libraries_2020.1.217/linux/mkl/benchmarks/hpcg/bin/
-
which contains:
-hpcg.dat xhpcg_avx xhpcg_avx2 xhpcg_knl xhpcg_skx
-
The ReFrame environment configuration(s) for this test must include two items:
-Firstly, to add the /bin
directory to $PATH (as the spack-generated MKL module does not do this) include:
['PATH', '$PATH:$MKLROOT/benchmarks/hpcg/bin/']
-
(Note that MKLROOT
is exported by the MKL module).
Secondly, to select the appropriate binary for the system include:
-['XHPCG_BIN', <binary>]
-
where <binary>
is one of the above - note xhpcg_skx
is AVX512 (Skylake).
Appropriate HPCG configuration files named hpcg-single.dat
and hpcg-all.dat
for the single- and all-node cases respectively must be generated and placed in either:
<repo_root>/systems/<sysname>/hpl/
<repo_root>/systems/<sysname>/<partition>/hpl/
Note that an example file is provided in the binary directory described above.
-Requires AVX2
-These applications were part of the original StackHPC benchmark suite, but they are not currently supported in the ExCALIBUR benchmarks due to a lack of resources. This may change at a later date.
- - - - - - - - - - - - - -Performance tests of the computational fluid dynamics package Openfoam https://openfoam.org/ using a larger version of the motorbike tutorial included with OpenFOAM, as documented here.
-The benchmark is run on a range of number of nodes, from 1 up to all available. Each run uses as many processes per node as there are physical cores.
-The following performance variables are captured:
-- 'ExecutionTime' (s): TODO: Clarify what this actually calculates.
-- 'ClockTime' (s): Wallclock time as reported by Openfoam.
-- 'runtime_real' (s): Wallclock time reported by time
for entire MPI program start to finish.
All these timings are for the solver run only, ignoring meshing etc.
-Install package openfoam-org
with default variants.
See note in main README re. usage of spack with existing mpi library installations.
-Run using e.g.:
-reframe/bin/reframe -C reframe_config.py -c apps/openfoam/ --run --performance-report
-
Run on a specific number of nodes by appending:
---tag 'num_nodes=N$'
-
where N must be one of 1, 2, 4, ..., etc.
- - - - - - - - - - - - - -This "application" provides a way to automatically gather and report information about system hardware.
-To use:
-1. In the ReFrame configuration reframe_config.py
, add an environment "sysinfo" for the system/parition(s) of interest (see Alaska example).
-1. Run:
reframe/bin/reframe -C reframe_config.py -c apps/sysinfo/ --run
-
This will run an mpi job, producing output/<system>/<partition>/sysinfo/Sysinfo/sysinfo.json
containing hardware information for every node in the partition. Commit this file.
-1. The apps/sysinfo/sysinfo/sysinfo.ipynb
notebook collates and presents this data: navigate to it in a browser, rerun, save and commit it.
http://mvapich.cse.ohio-state.edu/static/media/mvapich/README-OMB.txt
-The following tests are run (extracted performance variables described in brackets):
-On 2x nodes using 1x process per node:
-osu_bw
- bandwidth (max value over all message sizes)osu_latency
- latency (min value over all message sizes)osu_bibw
- bi-directional bandwidth (max value over all message sizes)On 2x nodes using as many processes per node as there are physical cores:
-osu_allgather
- latency (mean value calculated at each message size over pairs, then min taken over all message sizes)osu_allreduce
- as aboveosu_alltoall
- as aboveThe following tags are defined:
-Run all tests using e.g.:
- -Run only specified benchmark, by choosing the corresponding tag:
- - - - - - - - - - - - - - -OpenMM is high-performance toolkit for molecular simulation. -This directory includes a test based on the 1400k atom benchmark from the HECBioSim suite. -Note: this benchmark can run only on systems with a CUDA GPU.
-From the top-level directory of the repository, you can run the benchmarks with
- -The output of the program looks like
-#"Progress (%)" "Step" "Potential Energy (kJ/mole)" "Kinetic Energy (kJ/mole)" "Total Energy (kJ/mole)" "Temperature (K)" "Speed (ns/day)" "Time Remaining"
-10.0% 1000 -15688785.887127012 3656752.4413931114 -12032033.445733901 301.1644297760901 0 --
-20.0% 2000 -15722326.52227436 3651648.2543405197 -12070678.26793384 300.7440568884525 8.58 2:41
-30.0% 3000 -15748457.618506134 3653282.2518931925 -12095175.366612941 300.8786303793008 8.6 2:20
-40.0% 4000 -15766187.389856085 3650127.3583686342 -12116060.03148745 300.6187982674595 8.6 2:00
-50.0% 5000 -15771978.47168088 3640930.7606806774 -12131047.711000202 299.86138082043146 8.61 1:40
-60.0% 6000 -15779433.041706115 3640669.6428865143 -12138763.398819601 299.8398755660168 8.65 1:19
-70.0% 7000 -15774388.543227583 3646512.6161559885 -12127875.927071594 300.3210937346243 8.67 0:59
-80.0% 8000 -15777731.520400822 3641287.017230322 -12136444.5031705 299.89072155441534 8.68 0:39
-90.0% 9000 -15784781.923775911 3647212.6162459007 -12137569.30753001 300.3787446506489 8.7 0:19
-100.0% 10000 -15794411.8787518 3646944.5551444986 -12147467.323607301 300.3566675562755 8.71 0:00
-
The figure of merit is the speed of the last step, in units of ns/day
.
-In this example, the capture figure of merit is 8.71
.
This code is currently hosted on a private GitHub repo for the benchmarking purposes. If you want to run this benchmark you will -first need to request access. Please speak to the RSE team at Leicester for access.
-This code requires the following input data.
-cosmo3d-IC-256.tar.gz
cosmo3d-IC-322.tar.gz
cosmo3d-IC-406.tar.gz
cosmo3d-IC-512.tar.gz
They are publicly available on zenodo.
-NB They will be automatically downloaded by reframe, but it takes roughly 15 mins at 5MB/s. They will only be downloaded once
-per run, but if you manually re-run tests you may prefer to use the following options
---restore-session
and
---keep-stage-files
.
From the top-level directory of the repository, you can run the benchmarks with
- -By default all benchmarks will be run. You can run individual benchmarks with the
---tag
option:
weak
to run the weak scaling benchmarksstrong
to run the strong scaling benchmarksExamples:
-reframe -c benchmarks/apps/ramses -r --performance-report --tag weak
-reframe -c benchmarks/apps/ramses -r --performance-report --tag strong
-
Currently, only the intel compiler is supported for this program.
- - - - - - - - - - - - - -SOMBRERO is a benchmarking utility -for high performance computing based on lattice field theory applications.
-SOMBRERO is composed of 6 similar benchmarks -that are based on different lattice field theories, -each one with a different arithmetic intensity -and a different compute/communication balance. -Each benchmark consists of a fixed number (50) -of iterations of the Conjugate Gradient algorithm, -using the underlying Lattice Dirac operator -built in the relative theory.
-See the documentation -for more information.
-SOMBRERO uses a pure-mpi parallelisation.
-There are four benchmark cases that can be chosen
-using the --tag=<TAG>
command line option of reframe
:
mini
: A debug run, on a very small lattice, on 2 processes.ITT-sn
: A run on a single node, using all the cores in each node
- (as described here).ITT-64n
: A run on 64 nodes, using all the cores in each node
- (as described here).
- The number of nodes used can be changed by setting the variable num_nodes
,
- for example reframe ... -S num_nodes=48
.scaling
: A large benchmarking campaign, where of the benchmarks is launched
- on a range of number of processes
- (depending on the setup of the machine)
- and 4 different lattice sizes
- (details depend on how the cases are filtered).
-In all these cases, the benchmark for each theory is launched.The following performance variables are captured:
-This code is currently hosted on a private GitHub repo for the benchmarking purposes. If you want to run this benchmark you will -first need to request access. Please speak to the RSE team at Leicester for access.
-The main code is available at https://bitbucket.org/mrbate/sphng/src/master/g but the -spack recipe is not currently set up to work with it. We are working on this and soon we will switch over to the actual version. -This is because we need to be able to verify that the version we run is the same as the one already used for benchmarking.
-From the top-level directory of the repository, you can run the benchmarks with
- -By default all benchmarks will be run. You can run individual benchmarks with the
---tag
option:
single-node
to run benchmarks on a single nodeExamples:
- -Currently, only the intel compiler is supported for this program.
- - - - - - - - - - - - - -See swift
- - - - - - - - - - - - - -This code is currently hosted on a private GitHub repo for the benchmarking purposes. If you want to run this benchmark you will -first need to request access. Please speak to the RSE team at Leicester for access.
-The main code is publicly available at https://github.com/Trovemaster/PDSYEV but the -spack recipe is not currently set up to work with it. We are working on this and soon we will switch over to the public version. -This is because we need to be able to verify that the version we run is the same as the one already used for benchmarking.
-From the top-level directory of the repository, you can run the benchmarks with
- -By default all benchmarks will be run. You can run individual benchmarks with the
---tag
option:
single-node
to run benchmarks on a single nodeExamples:
- -Currently, only the intel compiler is supported for this program.
- - - - - - - - - - - - - -This code is currently hosted on a private GitHub repo for the benchmarking purposes. If you want to run this benchmark you will -first need to request access. Please speak to the RSE team at Leicester for access.
-The main code is publicly available at https://github.com/Trovemaster/TROVE but the -spack recipe is not currently set up to work with it. We are working on this and soon we will switch over to the public version. -This is because we need to be able to verify that the version we run is the same as the one already used for benchmarking.
-From the top-level directory of the repository, you can run the benchmarks with
- -By default all benchmarks will be run. You can run individual benchmarks with the
---tag
option:
12N
to run benchmarks w.r.t. N12.inpExamples:
- -Currently, only the intel compiler is supported for this program.
- - - - - - - - - - - - - -Results from WRF, the Weather Research & Forecasting Model using the WRFV3 benchmarks:
-12km
:--48-hour, 12km resolution case over the Continental U.S. (CONUS) domain October 24, 2001 with a time step of 72 seconds. The benchmark period is hours 25-27 (3 hours), starting from a restart file from the end of hour 24.
-
2.5km
:--Latter 3 hours of a 9-hour, 2.5km resolution case covering the Continental U.S. (CONUS) domain June 4, 2005 with a 15 second time step. The benchmark period is hours 6-9 (3 hours), starting from a restart file from the end of the initial 6 hour period -Descriptions from the above benchmark page.
-
The following performance variables are captured:
-Run using e.g.:
- -A precursor task automatically downloads the required benchmark files. -This may take some time due to the files size.
-You can filter the benchmark to run by filtering by tag:
-# For the 12km data
-reframe/bin/reframe -c benchmarks/apps/wrf/ --run --performance-report --tag '12km'
-# For the 2.5km data
-reframe/bin/reframe -c benchmarks/apps/wrf/ --run --performance-report --tag '2.5km'
-
By default, these benchmarks will use
-num_cpus_per_task
:
- 2num_tasks
:
- current_partition.processor.num_cpus // min(1, current_partition.processor.num_cpus_per_core) // num_cpus_per_task
num_tasks_per_node
: current_partition.processor.num_cpus // num_cpus_per_task
You can override the values of these variables from the command line with the
---setvar
-option, for example
reframe -c benchmarks/apps/wrf -r --performance-report --setvar=num_cpus_per_task=4 --setvar=num_tasks=16
-
Note: you're responsible for overriding these variables in a consistent
-way, so that, for example, num_tasks_per_node
doesn't exceed the number of
-total tasks runnable on each node.
{"use strict";/*!
- * escape-html
- * Copyright(c) 2012-2013 TJ Holowaychuk
- * Copyright(c) 2015 Andreas Lubbe
- * Copyright(c) 2015 Tiancheng "Timothy" Gu
- * MIT Licensed
- */var Va=/["'&<>]/;qn.exports=za;function za(e){var t=""+e,r=Va.exec(t);if(!r)return t;var o,n="",i=0,s=0;for(i=r.index;i