diff --git a/_static/css/custom.css b/_static/css/custom.css index 286c727..395c504 100644 --- a/_static/css/custom.css +++ b/_static/css/custom.css @@ -22,12 +22,14 @@ --n8-dark-theme-text-color: #F0F6FC; --n8-dark-theme-text-color-muted: #9ca4af; --n8-dark-theme-inline-code-background-color: #1f2733; + --sphinx-tabs-tab-color: var(--pst-color-primary); + --sphinx-tabs-background-color: var(--pst-color-surface); + --sphinx-tabs-border-color: var(--pst-color-primary); } /* ----------------------------------------- Non-theme specific / universal css changes ------------------------------------------*/ - html { --pst-sidebar-font-size: 1.0rem; } @@ -90,6 +92,30 @@ body { max-width:1600px } } +/* Sphinx tabs theming via variables for easier light/dark theming */ +html .sphinx-tabs-tab { + color: var(--sphinx-tabs-tab-color); + background-color: var(--sphinx-tabs-background-color); +} +html .sphinx-tabs-tab[aria-selected="true"] { + border-color: var(--sphinx-tabs-border-color); + border-bottom-color: var(--sphinx-tabs-background-color); + background: var(--sphinx-tabs-background-color); +} +html .sphinx-tabs-panel { + background: var(--sphinx-tabs-background-color); + border-color: var(--sphinx-tabs-border-color); +} +html [role="tablist"] { + border-bottom-color: var(--sphinx-tabs-border-color); +} +html .sphinx-tabs-panel pre { + background-color: var(--color-surface-tabs); +} +/* Fix right border of selected tab from being hidden */ +.sphinx-tabs-tab[aria-selected="true"] { + z-index: 2; +} /* -------------------------------------- Light theme specific formatting changes @@ -100,11 +126,13 @@ html[data-theme="light"] { --pst-color-text-muted: var(--n8-light-theme-text-color-muted); --pst-color-primary: var(--n8-deep-blue-color); --pst-color-secondary: var(--n8-burnt-orange-color-high-contrast-light); + --pst-color-warning: var(--n8-burnt-orange-color-high-contrast-light); --pst-color-accent: var(--n8-burnt-orange-color-high-contrast-light); --pst-color-inline-code: var(--n8-burnt-orange-color-high-contrast-light); --pst-color-link: var(--n8-deep-blue-color); --pst-color-preformatted-background: var(--pygments-default-background-color); --sbt-color-announcement: var(--n8-deep-blue-color); + --color-surface-tabs: #fcfcfc; } /* Override the default sphinx book theme announcement colour */ html[data-theme="light"] .bd-header-announcement a { @@ -126,6 +154,20 @@ html[data-theme="light"] .bd-article-container h4 html[data-theme="light"] .bd-article-container h5 { color: var(--n8-deep-blue-color); } +/* Sphinx-tabs light/dark themed styling tweaks */ +/* html[data-theme="light"] .sphinx-tabs-tab { + color: var(--pst-color-primary); + background-color: #0f0; +} +html[data-theme="light"] .sphinx-tabs-tab[aria-selected="true"] { + border-color: #ff0; + border-bottom-color: #00F; + background: #00f; +} +html[data-theme="light"] .sphinx-tabs-panel { + background: #00cccc; +} */ + /* ------------------------------------- Dark theme specific formatting changes @@ -136,13 +178,12 @@ html[data-theme="dark"] { --pst-color-text-muted: var(--n8-dark-theme-text-color-muted); --pst-color-primary: var(--n8-deep-blue-color-high-contrast-dark); --pst-color-secondary: var(--n8-burnt-orange-color); + --pst-color-warning: var(--n8-burnt-orange-color); --pst-color-accent: var(--n8-burnt-orange-color); --pst-color-surface: var(--n8-dark-theme-inline-code-background-color); /* inline code block background colour */ --pst-color-inline-code: var(--n8-burnt-orange-color); --pst-color-link: var(--n8-deep-blue-color-high-contrast-dark); - /* var(--n8-deep-blue-color); */ - /* --pst-color-preformatted-background: var(--pygments-default-background-color); */ - --sbt-color-announcement: var(--n8-deep-blue-color); + --color-surface-tabs: #181e27; } /* Override the default sphinx book theme announcement colour */ html[data-theme="dark"] .bd-header-announcement a { diff --git a/_static/js/custom.js b/_static/js/custom.js index a09d6f7..c97d66b 100644 --- a/_static/js/custom.js +++ b/_static/js/custom.js @@ -4,10 +4,12 @@ so cannot use sphinx :ref: for output destination URIs relative to the current p Instead, find the appropriate link within the current page, to add the anchor to the announcement. This does not currently support linking outside of the documentation website. */ -/* window.onload = function () { - expectedAnnouncementContent = ""; - expectedAnchorContent = ""; - var elements = document.getElementsByClassName("announcement"); + window.onload = function () { + expectedAnnouncementContent = "Using Bede"; + expectedAnchorContent = "Using Bede"; + optionalTargetID = "grace-hopper-pilot"; // null if linking to page + fullAnnouncementAnchor = false; + var elements = document.getElementsByClassName("bd-header-announcement"); for (var i = 0; i < elements.length; i++) { var element = elements.item(i); originalContent = element.innerHTML; @@ -28,10 +30,16 @@ This does not currently support linking outside of the documentation website. // then append the optionalTargetID, which should no longer begin with a hash. targetDestination += optionalTargetID; } - newAnnouncementContent = '' + originalContent + '' - element.innerHTML = newAnnouncementContent; + // Either wrap the full announcement body in the anchor, or just the expected anchor content + if (fullAnnouncementAnchor) { + newAnnouncementContent = '' + originalContent + '' + element.innerHTML = newAnnouncementContent; + } else { + newAnnouncementContent = originalContent.replace(expectedAnchorContent, '' + expectedAnchorContent + '') + element.innerHTML = newAnnouncementContent; + } } } } } -} */ +} diff --git a/common/aarch64-only-sidebar.rst b/common/aarch64-only-sidebar.rst new file mode 100644 index 0000000..ac5e15a --- /dev/null +++ b/common/aarch64-only-sidebar.rst @@ -0,0 +1,4 @@ +.. admonition:: aarch64 partitions only + :class: sidebar warning + + |arch_availabilty_name| is only provided on ``aarch64`` partitions (``gh``, ``ghtest``, ``ghlogin``). diff --git a/common/aarch64-only.rst b/common/aarch64-only.rst new file mode 100644 index 0000000..b028b80 --- /dev/null +++ b/common/aarch64-only.rst @@ -0,0 +1,4 @@ +.. admonition:: aarch64 partitions only + :class: warning + + |arch_availabilty_name| is only provided on ``aarch64`` partitions (``gh``, ``ghtest``, ``ghlogin``). diff --git a/common/ppc64le-only-sidebar.rst b/common/ppc64le-only-sidebar.rst new file mode 100644 index 0000000..e21bbea --- /dev/null +++ b/common/ppc64le-only-sidebar.rst @@ -0,0 +1,4 @@ +.. admonition:: ppc64le partitions only + :class: sidebar warning + + |arch_availabilty_name| is only provided on ``ppc64le`` partitions/nodes (``gpu``, ``infer``, ``test``). diff --git a/common/ppc64le-only.rst b/common/ppc64le-only.rst new file mode 100644 index 0000000..b56450f --- /dev/null +++ b/common/ppc64le-only.rst @@ -0,0 +1,4 @@ +.. admonition:: ppc64le partitions only + :class: warning + + |arch_availabilty_name| is only provided on ``ppc64le`` partitions/nodes (``gpu``, ``infer``, ``test``). diff --git a/conf.py b/conf.py index efd68f8..2ff33f0 100644 --- a/conf.py +++ b/conf.py @@ -11,7 +11,8 @@ extensions = [ "sphinxext.rediraffe", 'sphinx.ext.mathjax', - 'sphinx_copybutton' + 'sphinx_copybutton', + 'sphinx_tabs.tabs', ] # Add any paths that contain templates here, relative to this directory. @@ -52,6 +53,9 @@ ## Added by CA to get MathJax rendering loaded mathjax_path='https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js' +# Disable sphinx-tabs closing +sphinx_tabs_disable_tab_closing = True + # -- Options for HTML output ---------------------------------------------- html_theme = 'sphinx_book_theme' @@ -81,6 +85,7 @@ "home_page_in_toc": False, "show_navbar_depth": 1, # Sets the depth for expanded content # Control the right hand in-page toc + "navigation_with_keys": False, "toc_title": "Contents", "show_toc_level": 2, "show_prev_next": False, @@ -89,7 +94,7 @@ # Code highlighting theme for dark mode "pygment_dark_style": "github-dark-high-contrast", # Add an announcement bar, visible at the top of each page. - "announcement": "", + "announcement": "3 NVIDIA Grace-Hopper nodes (GH200 480) are now available. See Using Bede for more information.", # Add the traditional footer theme and sphinx acknowledgements "extra_footer": f"

 Built with Sphinx {sphinx.__version__} using a theme by the Executable Book Project.

" } diff --git a/faq/index.rst b/faq/index.rst index b43c089..11d49c0 100644 --- a/faq/index.rst +++ b/faq/index.rst @@ -137,3 +137,26 @@ find the support email address for your institution `on the N8CIR website There is also a `slack workspace `__ that you can join to get further support and contact the Bede user community. To request access, please e-mail: marion.weinzierl@durham.ac.uk. +.. _faq-architecture-specific-eng: + +How do I specialise my bash environment for Power 9 and Grace-Hopper systems? +----------------------------------------------------------------------------- + +If you have modified your Bede user environment (``.bashrc``, or ``.bash_profile``) to make software available by default (i.e. conda), +you may need to modify your environment to set environment variables or source scripts based on the CPU architecture. + +You can check the CPU architecture in bash using the ``uname`` command. + +This allows you to set different environment variables on the ``aarch64`` Grace-Hopper nodes than on the ``ppc64le`` Power 9 nodes: + +.. code-block:: bash + + # Get the CPU architecture + arch=$(uname -i) + if [[ $arch == "aarch64" ]]; then + # Set variables and source scripts for aarch64 + export MYVAR=FOO + elif [[ $arch == "ppc64le" ]]; then + # Set variables and source scripts for ppc64le + export MYVAR=BAR + fi \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ce2a70c..dc28955 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ sphinx-book-theme==1.1.2 sphinx-autobuild sphinxext-rediraffe sphinx-copybutton +sphinx-tabs==3.4.5 \ No newline at end of file diff --git a/software/applications/amber.rst b/software/applications/amber.rst index 351e695..2fdb415 100644 --- a/software/applications/amber.rst +++ b/software/applications/amber.rst @@ -1,7 +1,10 @@ .. _software-applications-amber: AMBER -------- +----- + +.. |arch_availabilty_name| replace:: AMBER +.. include:: /common/ppc64le-only-sidebar.rst `AMBER `__ is a suite of biomolecular simulation programs. It began in the late 1970's, and is maintained by an active development community. diff --git a/software/applications/conda.rst b/software/applications/conda.rst index b22efa6..18f3fd9 100644 --- a/software/applications/conda.rst +++ b/software/applications/conda.rst @@ -15,29 +15,61 @@ The simplest way to install Conda for use on Bede is through the `miniconda /$USER`` (where ``project`` is the project code for your project) directory rather than your ``home`` directory as it may consume considerable disk space + You may wish to install conda into the ``/nobackup/projects//$USER/`` (where ```` is the project code for your project, and ``>`` is CPU architecture) directory rather than your ``home`` directory as it may consume considerable disk space -.. code-block:: bash +.. tabs:: + + .. code-tab:: bash ppc64le + + export CONDADIR=/nobackup/projects//$USER/ppc64le # Update this with your code. + mkdir -p $CONDADIR + pushd $CONDADIR - export CONDADIR=/nobackup/projects//$USER # Update this with your code. - mkdir -p $CONDADIR - pushd $CONDADIR + # Download the latest miniconda installer for ppc64le + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-ppc64le.sh + # Validate the file checksum matches is listed on https://docs.conda.io/en/latest/miniconda_hashes.html. + sha256sum Miniconda3-latest-Linux-ppc64le.sh + + sh Miniconda3-latest-Linux-ppc64le.sh -b -p ./miniconda + source miniconda/etc/profile.d/conda.sh + conda update conda -y + + .. code-tab:: bash aarch64 - # Download the latest miniconda installer for ppcle64 - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-ppc64le.sh - # Validate the file checksum matches is listed on https://docs.conda.io/en/latest/miniconda_hashes.html. - sha256sum Miniconda3-latest-Linux-ppc64le.sh + export CONDADIR=/nobackup/projects//$USER/aarch64 # Update this with your code. + mkdir -p $CONDADIR + pushd $CONDADIR - sh Miniconda3-latest-Linux-ppc64le.sh -b -p ./miniconda - source miniconda/etc/profile.d/conda.sh - conda update conda -y + # Download the latest miniconda installer for aarch64 + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh + # Validate the file checksum matches is listed on https://docs.conda.io/en/latest/miniconda_hashes.html. + sha256sum Miniconda3-latest-Linux-aarch64.sh + + sh Miniconda3-latest-Linux-aarch64.sh -b -p ./miniconda + source miniconda/etc/profile.d/conda.sh + conda update conda -y On subsequent sessions, or in job scripts you may need to re-source miniconda. Alternatively you could add this to your bash environment. I.e. -.. code-block:: bash +.. tabs:: + + .. code-tab:: bash ppc64le + + arch=$(uname -i) # Get the CPU architecture + if [[ $arch == "ppc64le" ]]; then + # Set variables and source scripts for ppc64le + export CONDADIR=/nobackup/projects//$USER/ppc64le # Update this with your code. + source $CONDADIR/miniconda/etc/profile.d/conda.sh + fi - export CONDADIR=/nobackup/projects//$USER # Update this with your code. - source $CONDADIR/miniconda/etc/profile.d/conda.sh + .. code-tab:: bash aarch64 + + arch=$(uname -i) # Get the CPU architecture + if [[ $arch == "aarch64" ]]; then + # Set variables and source scripts for aarch64 + export CONDADIR=/nobackup/projects//$USER/aarch64 # Update this with your code. + source $CONDADIR/miniconda/etc/profile.d/conda.sh + fi Creating a new Conda Environment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -58,17 +90,17 @@ Once created, the environment can be activated using ``conda activate``. Alternatively, Conda environments can be created outside of the conda/miniconda install, using the ``-p`` / ``--prefix`` option of ``conda create``. -I.e. if you have installed miniconda to your home directory, but wish to create a conda environment within the ``/project//$USER/`` directory named ``example`` you can use: +I.e. if you have installed miniconda to your home directory, but wish to create a conda environment within the ``/project//$USER//`` directory named ``example`` you can use: .. code-block:: bash - conda create -y --prefix /project//$USER/example python=3.9 + conda create -y --prefix /project//$USER//example python=3.9 This can subsequently be loaded via: .. code-block:: bash - conda activate /project//$USER/example + conda activate /project//$USER//example Listing and Activating existing Conda Environments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/software/applications/eman2.rst b/software/applications/eman2.rst index f2877cb..e569f35 100644 --- a/software/applications/eman2.rst +++ b/software/applications/eman2.rst @@ -3,6 +3,9 @@ EMAN2 ===== +.. |arch_availabilty_name| replace:: EMAN2 +.. include:: /common/ppc64le-only-sidebar.rst + `EMAN2 `__ is a broadly based greyscale scientific image processing suite with a primary focus on processing data from transmission electron microscopes. On Bede, EMAN2 is provided by the :ref:`IBM Collaboration project `. diff --git a/software/applications/grace.rst b/software/applications/grace.rst index 06435bf..95ec9b1 100644 --- a/software/applications/grace.rst +++ b/software/applications/grace.rst @@ -3,6 +3,9 @@ Grace ----- +.. |arch_availabilty_name| replace:: Grace +.. include:: /common/ppc64le-only-sidebar.rst + `Grace `__ Grace is a WYSIWYG 2D plotting tool for the X Window System. On Bede, the batch-printing component of Grace, ``gracebat`` is provided via an environment module: diff --git a/software/applications/gromacs.rst b/software/applications/gromacs.rst index c0afc98..56b43e7 100644 --- a/software/applications/gromacs.rst +++ b/software/applications/gromacs.rst @@ -3,6 +3,9 @@ GROMACS ------- +.. |arch_availabilty_name| replace:: GROMACS +.. include:: /common/ppc64le-only-sidebar.rst + `GROMACS `__ is a versatile package for molecular dynamics simulation. It is primarily designed for biochemical molecules like proteins, lipids and nucleic acids that have a lot of complicated bonded interactions, but since GROMACS is extremely fast at calculating the nonbonded interactions (that usually dominate simulations) many groups are also using it for research on non-biological systems, e.g. polymers. diff --git a/software/applications/index.rst b/software/applications/index.rst index 950b035..7c9427e 100644 --- a/software/applications/index.rst +++ b/software/applications/index.rst @@ -9,6 +9,18 @@ If you notice any omissions, errors or have any suggested changes to the documen .. toctree:: :maxdepth: 1 - :glob: - * \ No newline at end of file + amber.rst + conda.rst + eman2.rst + grace.rst + gromacs.rst + namd.rst + open-ce.rst + openmm.rst + python.rst + pytorch.rst + r.rst + rust.rst + tensorflow.rst + \ No newline at end of file diff --git a/software/applications/namd.rst b/software/applications/namd.rst index 96b37f2..d7eafe7 100644 --- a/software/applications/namd.rst +++ b/software/applications/namd.rst @@ -3,6 +3,9 @@ NAMD ---- +.. |arch_availabilty_name| replace:: NAMD +.. include:: /common/ppc64le-only-sidebar.rst + `NAMD `__ is a parallel molecular dynamics code designed for high-performance simulation of large biomolecular systems. Based on Charm++ parallel objects, NAMD scales to hundreds of cores for typical simulations and beyond 500,000 cores for the largest simulations. diff --git a/software/applications/open-ce.rst b/software/applications/open-ce.rst index dc47895..9feeef3 100644 --- a/software/applications/open-ce.rst +++ b/software/applications/open-ce.rst @@ -3,6 +3,9 @@ Open-CE ======= +.. |arch_availabilty_name| replace:: Open-CE +.. include:: /common/ppc64le-only-sidebar.rst + The `Open Cognitive Environment (Open-CE) `__ is a community driven software distribution for machine learning and deep learning frameworks. Open-CE software is distributed via :ref:`Conda`, with all included packages for a given Open-CE release being installable in to the same conda environment. diff --git a/software/applications/openmm.rst b/software/applications/openmm.rst index a8739fe..ceaba36 100644 --- a/software/applications/openmm.rst +++ b/software/applications/openmm.rst @@ -3,6 +3,9 @@ OpenMM ------ +.. |arch_availabilty_name| replace:: OpenMM +.. include:: /common/ppc64le-only-sidebar.rst + `OpenMM `__ is a high-performance toolkit for molecular simulation. It can be used as an application, a library, or a flexible programming environment and includes extensive language bindings for Python, C, C++, and even Fortran. diff --git a/software/applications/python.rst b/software/applications/python.rst index d0caf78..5212613 100644 --- a/software/applications/python.rst +++ b/software/applications/python.rst @@ -5,17 +5,29 @@ Python `Python `__ is an interpreted, interactive, object-oriented programming language with dynamic typing. -Python 3.6 is available by default on Bede, as ``python3``, however, consider using :ref:`Conda ` for your python dependency management. +.. tabs:: -Conda is a cross-platform package and environment management system, which can provide alternate python versions than distributed centrally, and is more-suitable for managing packages which include non-python dependencies. + .. group-tab:: ppc64le -Python 2 is also available, but is no longer an officially supported version of python. -If you are still using python 2, upgrade to python 3 as soon as possible. + Python ``3.6`` is available by default, as ``python3``, however, consider using :ref:`Conda ` for your python dependency management. + Conda is a cross-platform package and environment management system, which can provide alternate python versions than distributed centrally, and is more-suitable for managing packages which include non-python dependencies. + + On the ``ppc64le`` nodes/partitions Python 2 is also available, but is no longer an officially supported version of python. + If you are still using python 2, upgrade to python 3 as soon as possible. + + .. group-tab:: aarch64 + + Python ``3.9`` is available by default on ``aarch64`` nodes, as ``python3`` and ``python``. + Alternate versions of Python can be installed via :ref:`Conda ` + + Conda is a cross-platform package and environment management system, which can provide alternate python versions than distributed centrally, and is more-suitable for managing packages which include non-python dependencies. + + Python 2 is not available on the ``aarch64`` nodes/partitions in Bede. + If you wish to use non-conda python, you should use `virtual environments `__ to isolate your python environment(s) from the system-wide environment. This will allow you to install your own python dependencies via pip. - For instance, to create and install `sphinx` (the python package used to create this documentation) into a python environment in your home directory: .. code-block:: bash @@ -31,7 +43,7 @@ For instance, to create and install `sphinx` (the python package used to create # Use pip to install sphinx into the environment python3 -m pip install sphinx -.. note:: +.. warning:: Python virtual environments can become large if large python packages such as TensorFlow are installed. Consider placing your python virtual environments in your project directories to avoid filling your home directory. @@ -52,4 +64,6 @@ I.e. to delete a python virtual environment located at ``~/.venvs/sphinx`` rm -r ~/.venvs/sphinx/ +Python packages may install architecture dependent binaries, so you should use a separate virtual environments for ``ppc64le`` and ``aarch64`` nodes/partitions. + For further information on please see the `Python Online Documentation `__. diff --git a/software/applications/pytorch.rst b/software/applications/pytorch.rst index 7e5302c..eb4a155 100644 --- a/software/applications/pytorch.rst +++ b/software/applications/pytorch.rst @@ -6,57 +6,149 @@ PyTorch `PyTorch `__ is an end-to-end machine learning framework. PyTorch enables fast, flexible experimentation and efficient production through a user-friendly front-end, distributed training, and ecosystem of tools and libraries. -The main method of distribution for PyTorch is via :ref:`Conda `, with :ref:`Open-CE` providing a simple method for installing multiple machine learning frameworks into a single conda environment. +The main method of distribution for PyTorch for ``ppc64le`` is via :ref:`Conda `, with :ref:`Open-CE` providing a simple method for installing multiple machine learning frameworks into a single conda environment. The upstream Conda and pip distributions do not provide ppc64le pytorch packages at this time. Installing via Conda ~~~~~~~~~~~~~~~~~~~~ -With a working Conda installation (see :ref:`Installing Miniconda`) the following instructions can be used to create a Python 3.9 conda environment named ``torch`` with the latest Open-CE provided PyTorch: +.. tabs:: -.. note:: + .. group-tab:: ppc64le - Pytorch installations via conda can be relatively large. Consider installing your miniconda (and therfore your conda environments) to the ``/nobackup`` file store. + With a working Conda installation (see :ref:`Installing Miniconda`) the following instructions can be used to create a Python 3.9 conda environment named ``torch`` with the latest Open-CE provided PyTorch: + .. note:: -.. code-block:: bash + Pytorch installations via conda can be relatively large. Consider installing your miniconda (and therfore your conda environments) to the ``/nobackup`` file store. - # Create a new conda environment named torch within your conda installation - conda create -y --name torch python=3.9 - # Activate the conda environment - conda activate torch + .. code-block:: bash - # Add the OSU Open-CE conda channel to the current environment config - conda config --env --prepend channels https://ftp.osuosl.org/pub/open-ce/current/ + # Create a new conda environment named torch within your conda installation + conda create -y --name torch python=3.9 - # Also use strict channel priority - conda config --env --set channel_priority strict + # Activate the conda environment + conda activate torch - # Install the latest available version of PyTorch - conda install -y pytorch + # Add the OSU Open-CE conda channel to the current environment config + conda config --env --prepend channels https://ftp.osuosl.org/pub/open-ce/current/ -In subsequent interactive sessions, and when submitting batch jobs which use PyTorch, you will then need to re-activate the conda environment. + # Also use strict channel priority + conda config --env --set channel_priority strict -For example, to verify that PyTorch is available and print the version: + # Install the latest available version of PyTorch + conda install -y pytorch -.. code-block:: bash + In subsequent interactive sessions, and when submitting batch jobs which use PyTorch, you will then need to re-activate the conda environment. - # Activate the conda environment - conda activate torch + For example, to verify that PyTorch is available and print the version: - # Invoke python - python3 -c "import torch;print(torch.__version__)" + .. code-block:: bash + # Activate the conda environment + conda activate torch -Installation via the upstream Conda channel is not currently possible, due to the lack of ``ppc64le`` or ``noarch`` distributions. + # Invoke python + python3 -c "import torch;print(torch.__version__)" -.. note:: - - The :ref:`Open-CE` distribution of PyTorch does not include IBM technologies such as DDL or LMS, which were previously available via :ref:`WMLCE`. - WMLCE is no longer supported. + Installation via the upstream Conda channel is not currently possible, due to the lack of ``ppc64le`` or ``noarch`` distributions. + + + .. note:: + + The :ref:`Open-CE` distribution of PyTorch does not include IBM technologies such as DDL or LMS, which were previously available via :ref:`WMLCE`. + WMLCE is no longer supported. + + + .. group-tab:: aarch64 + + .. warning:: + + Conda builds of PyTorch for ``aarch64`` do not include CUDA support as of April 2024. For now, see :ref:`software-applications-pytorch-ngc` or `build from source `__. + +.. _software-applications-pytorch-ngc: + +Using NGC PyTorch Containers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. tabs:: + + .. group-tab:: ppc64le + + .. warning:: + + NVIDIA do not provide ``ppc64le`` containers for pytorch through NGC. This method should only be used for ``aarch64`` partitions. + + .. group-tab:: aarch64 + + NVIDIA provide docker containers with CUDA-enabled pytorch builds for ``x86_64`` and ``aarch64`` architectures through NGC. + + The `NGC PyTorch `__ containers have included Hopper support since ``22.09``. + + * ``22.09`` and ``22.10`` provide a conda-based install of pytorch. + * ``22.11+`` provide a pip-based install in the default python environment. + + For details of which pytorch version is provided by the each container release, see the `NGC PyTorch container release notes `__. + + :ref:`software-tools-apptainer` can be used to convert and run docker containers, or to build an apptainer container based on a docker container. + These can be built on the ``aarch64`` nodes in Bede using :ref:`software-tools-apptainer-rootless`. + + .. note:: + + PyTorch containers can consume a large amount of disk space. Consider setting :ref:`software-tools-apptainer-cachedir` to an appropriate location in ``/nobackup``, e.g. ``export APPTAINER_CACHEDIR=/nobackup/projects/${SLURM_JOB_ACCOUNT}/${USER}/apptainer-cache``. + + .. note:: + + The following apptainer commands should be executed from an ``aarch64`` node only, i.e. on ``ghlogin``, ``gh`` or ``ghtest``. + + Docker containers can be fetched and converted using ``apptainer pull``, prior to using ``apptainer exec`` to execute code within the container. + + .. code:: bash + + # Pull and convert the docker container. This may take a while. + apptainer pull docker://nvcr.io/nvidia/pytorch:24.03-py3 + # Run a command in the container, i.e. showing the pytorch version + apptainer exec --nv docker://nvcr.io/nvidia/pytorch:24.03-py3 python3 -c "import torch;print(torch.__version__);" + + Alternatively, if you require more than just pytorch within the container you can create an `apptainer definition file `__. + E.g. for a container based on ``pytorch:24.03-py3`` which also installs HuggingFace Transformers ``4.37.0``, the following definition file could be used: + + .. code:: singularity + + Bootstrap: docker + From: nvcr.io/nvidia/pytorch:24.03-py3 + + %post + # Install other python dependencies, e.g. hugging face transformers + python3 -m pip install transformers[torch]==4.37.0 + + %test + # Print the torch version, if CUDA is enabled and which architectures + python3 -c "import torch;print(torch.__version__); print(torch.cuda.is_available());print(torch.cuda.get_arch_list());" + # Print the pytorch transformers version, demonstrating it is available. + python3 -c "import transformers;print(transformers.__version__);" + + Assuming this is named ``pytorch-transformers.def``, a corresponding apptainer image file name ``pytorch-transformers.sif`` can then be created via: + + .. code-block:: bash + + apptainer build --nv pytorch-transformers.sif pytorch-transformers.def + + Commands within this container can then be executed using ``apptainer exec``. + I.e. to see the version of transformers installed within the container: + + .. code-block:: bash + + apptainer exec --nv pytorch-transformers.sif python3 -c "import transformers;print(transformers.__version__);" + + Or in this case due to the ``%test`` segment of the container, run the test command. + + .. code-block:: bash + + apptainer test --nv pytorch-transformers.sif Further Information diff --git a/software/applications/r.rst b/software/applications/r.rst index ccfa5da..bf1d4a5 100644 --- a/software/applications/r.rst +++ b/software/applications/r.rst @@ -3,6 +3,9 @@ R - +.. |arch_availabilty_name| replace:: R +.. include:: /common/ppc64le-only-sidebar.rst + `R `__ is a free software environment for statistical computing and graphics. It is provided on the system by the ``r`` module(s), which make ``R`` and ``Rscript`` available for use. diff --git a/software/applications/tensorflow.rst b/software/applications/tensorflow.rst index 93491e0..bdb6d89 100644 --- a/software/applications/tensorflow.rst +++ b/software/applications/tensorflow.rst @@ -7,52 +7,145 @@ TensorFlow TensorFlow can be installed through a number of python package managers such as :ref:`Conda` or ``pip``. -For use on Bede, the simplest method is to install TensorFlow using the :ref:`Open-CE Conda distribution`. +For use on Bede's ``ppc64le`` nodes, the simplest method is to install TensorFlow using the :ref:`Open-CE Conda distribution`. + +For the ``aarch64`` nodes, using a NVIDIA provided `NGC Tensorflow container `__ is likely preferred. Installing via Conda (Open-CE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -With a working Conda installation (see :ref:`Installing Miniconda`) the following instructions can be used to create a Python 3.8 conda environment named ``tf-env`` with the latest Open-CE provided TensorFlow: +.. tabs:: + + .. group-tab:: ppc64le + + With a working Conda installation (see :ref:`Installing Miniconda`) the following instructions can be used to create a Python 3.8 conda environment named ``tf-env`` with the latest Open-CE provided TensorFlow: + + .. note:: + + TensorFlow installations via conda can be relatively large. Consider installing your miniconda (and therfore your conda environments) to the ``/nobackup`` file store. + + + .. code-block:: bash + + # Create a new conda environment named tf-env within your conda installation + conda create -y --name tf-env python=3.8 + + # Activate the conda environment + conda activate tf-env + + # Add the OSU Open-CE conda channel to the current environment config + conda config --env --prepend channels https://ftp.osuosl.org/pub/open-ce/current/ -.. note:: + # Also use strict channel priority + conda config --env --set channel_priority strict - TensorFlow installations via conda can be relatively large. Consider installing your miniconda (and therfore your conda environments) to the ``/nobackup`` file store. + # Install the latest available version of Tensorflow + conda install -y tensorflow + In subsequent interactive sessions, and when submitting batch jobs which use TensorFlow, you will then need to re-activate the conda environment. -.. code-block:: bash + For example, to verify that TensorFlow is available and print the version: - # Create a new conda environment named tf-env within your conda installation - conda create -y --name tf-env python=3.8 + .. code-block:: bash - # Activate the conda environment - conda activate tf-env + # Activate the conda environment + conda activate tf-env - # Add the OSU Open-CE conda channel to the current environment config - conda config --env --prepend channels https://ftp.osuosl.org/pub/open-ce/current/ + # Invoke python + python3 -c "import tensorflow;print(tensorflow.__version__)" - # Also use strict channel priority - conda config --env --set channel_priority strict + .. note:: + + The :ref:`Open-CE` distribution of TensorFlow does not include IBM technologies such as DDL or LMS, which were previously available via :ref:`WMLCE`. + WMLCE is no longer supported. - # Install the latest available version of Tensorflow - conda install -y tensorflow + .. group-tab:: aarch64 -In subsequent interactive sessions, and when submitting batch jobs which use TensorFlow, you will then need to re-activate the conda environment. + .. warning:: -For example, to verify that TensorFlow is available and print the version: + Conda and pip builds of TensorFlow for ``aarch64`` do not include CUDA support as of April 2024. For now, see :ref:`software-applications-tensorflow-ngc` or `build from source `__. -.. code-block:: bash +.. _software-applications-tensorflow-ngc: - # Activate the conda environment - conda activate tf-env +Using NGC TensorFlow Containers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Invoke python - python3 -c "import tensorflow;print(tensorflow.__version__)" -.. note:: +.. tabs:: + + .. group-tab:: ppc64le + + .. warning:: + + NVIDIA do not provide ``ppc64le`` containers for TensorFlow through NGC. This method should only be used for ``aarch64`` partitions. - The :ref:`Open-CE` distribution of TensorFlow does not include IBM technologies such as DDL or LMS, which were previously available via :ref:`WMLCE`. - WMLCE is no longer supported. + .. group-tab:: aarch64 + + NVIDIA provide docker containers with CUDA-enabled TensorFlow builds for ``x86_64`` and ``aarch64`` architectures through NGC. + + The `NGC Tensorflow `__ containers have included Hopper support since ``22.09``. + + For details of which TensorFlow version is provided by the each container release, see the `NGC TensorFlow container release notes `__. + + + :ref:`software-tools-apptainer` can be used to convert and run docker containers, or to build an apptainer container based on a docker container. + These can be built on the ``aarch64`` nodes in Bede using :ref:`software-tools-apptainer-rootless`. + + .. note:: + + TensorFlow containers can consume a large amount of disk space. Consider setting :ref:`software-tools-apptainer-cachedir` to an appropriate location in ``/nobackup``, e.g. ``export APPTAINER_CACHEDIR=/nobackup/projects/${SLURM_JOB_ACCOUNT}/${USER}/apptainer-cache``. + + .. note:: + + The following apptainer commands should be executed from an ``aarch64`` node only, i.e. on ``ghlogin``, ``gh`` or ``ghtest``. + + Docker containers can be fetched and converted using ``apptainer pull``, prior to using ``apptainer exec`` to execute code within the container. + + .. code:: bash + + # Pull and convert the docker container. This may take a while. + apptainer pull docker://nvcr.io/nvidia/tensorflow:24.03-tf2-py3 + # Run a command in the container, i.e. showing the TensorFlow version + apptainer exec --nv docker://nvcr.io/nvidia/tensorflow:24.03-tf2-py3 python3 -c "import tensorflow; print(tensorflow.__version__);" + + Alternatively, if you require more than just TensorFlow within the container you can create an `apptainer definition file `__. + E.g. for a container based on ``tensorflow:24.03-tf2-py3`` which also installs HuggingFace Transformers ``4.37.0``, the following definition file could be used: + + .. code:: singularity + + Bootstrap: docker + From: nvcr.io/nvidia/tensorflow:24.03-tf2-py3 + + %post + # Install other python dependencies, e.g. hugging face transformers + python3 -m pip install transformers==4.37.0 + + %test + # Print the torch version, if CUDA is enabled and which architectures + python3 -c "import tensorflow; print(tensorflow.__version__); print(tensorflow.config.list_physical_devices('GPU'));" + # Print the TensorFlow transformers version, demonstrating it is available. + python3 -c "import transformers;print(transformers.__version__);" + + Assuming this is named ``tf-transformers.def``, a corresponding apptainer image file name ``tf-transformers.sif`` can then be created via: + + .. code-block:: bash + + apptainer build --nv tf-transformers.sif tf-transformers.def + + Commands within this container can then be executed using ``apptainer exec``. + I.e. to see the version of transformers installed within the container: + + .. code-block:: bash + + apptainer exec --nv tf-transformers.sif python3 -c "import transformers;print(transformers.__version__);" + + Or in this case due to the ``%test`` segment of the container, run the test command. + + .. code-block:: bash + + apptainer test --nv tf-transformers.sif + Further Information ~~~~~~~~~~~~~~~~~~~ diff --git a/software/applications/wmlce.rst b/software/applications/wmlce.rst index a3c31bd..9efa91d 100644 --- a/software/applications/wmlce.rst +++ b/software/applications/wmlce.rst @@ -1,9 +1,12 @@ .. _software-applications-wmlce: +:orphan: + IBM WMLCE (End of Life) ======================= -.. warning:: +.. admonition:: End of Life + :class: danger WMLCE was archived by IBM on 2020-11-10 and is no longer updated, maintained or supported. It is no longer available on bede due to the migration away from RHEL 7. diff --git a/software/compilers/gcc.rst b/software/compilers/gcc.rst index 8f2cb9f..6d25d65 100644 --- a/software/compilers/gcc.rst +++ b/software/compilers/gcc.rst @@ -8,16 +8,38 @@ The `GNU Compiler Collection (GCC) `__ is available on Bed The copies of GCC available as modules have been compiled with CUDA offload support: -.. code-block:: bash +.. tabs:: - module load gcc/12.2 - module load gcc/10.2.0 - module load gcc/8.4.0 + .. code-tab:: bash ppc64le + + module load gcc/12.2 + module load gcc/10.2.0 + module load gcc/8.4.0 + + .. code-tab:: bash aarch64 + + module load gcc/13.2 + module load gcc/12.2 The version of GCC which is distributed with RHEL is also packaged as the ``gcc/native`` module, providing GCC ``8.5.0``. This does not include CUDA offload support. -.. code-block:: bash +.. tabs:: + + .. code-tab:: bash ppc64le + + module load gcc/native + + .. code-tab:: bash aarch64 + + module load gcc/native # provides 11.4.1 - module load gcc/native For further information please see the `GCC online documentation `__. + +``aarch64`` psABI warnings +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When compiling on the ``aarch64`` Grace-Hopper nodes with ``--std=c++17``, GCC may emit platform specific ABI warnings about a change made in GCC 10.1. +These warnings should only be a concern if you are linking objects compiled with ``GCC >= 10.1`` in c++17 mode with objects compiled with ``GCC < 10.1`` in c++17 mode. + +Use ``--Wno-psabi`` to suppress these warnings. diff --git a/software/compilers/ibmxl.rst b/software/compilers/ibmxl.rst index 3448311..635ad11 100644 --- a/software/compilers/ibmxl.rst +++ b/software/compilers/ibmxl.rst @@ -1,6 +1,9 @@ IBM XL ------ +.. |arch_availabilty_name| replace:: The IBM XL Compiler toolchain +.. include:: /common/ppc64le-only-sidebar.rst + The `IBM XL C and C++ compiler family `__ and `IBM XL Fortran compiler family `__ are available on Bede, provided by the ``xl`` module family: .. code-block:: bash diff --git a/software/compilers/llvm.rst b/software/compilers/llvm.rst index 56576a4..2d218fe 100644 --- a/software/compilers/llvm.rst +++ b/software/compilers/llvm.rst @@ -1,6 +1,9 @@ LLVM ---- +.. |arch_availabilty_name| replace:: The LLVM compiler toolchain +.. include:: /common/ppc64le-only-sidebar.rst + LLVM has been provided for use on the system by the ``llvm`` module. It has been built with CUDA GPU offloading support, allowing OpenMP regions to run on a GPU using the ``target`` directive. diff --git a/software/compilers/nvcc.rst b/software/compilers/nvcc.rst index 6ec5651..0978040 100644 --- a/software/compilers/nvcc.rst +++ b/software/compilers/nvcc.rst @@ -1,23 +1,36 @@ .. _software-compilers-nvcc: -CUDA and NVCC -============= +NVCC (CUDA) +=========== `CUDA `__ and the ``nvcc`` CUDA/C++ compiler are provided for use on the system by the `cuda` modules. Unlike other compiler modules, the cuda modules do not set ``CC`` or ``CXX`` environment variables. This is because ``nvcc`` can be used to compile device CUDA code in conjunction with a range of host compilers, such as GCC or LLVM clang. -.. code-block:: bash +.. tabs:: - module load cuda + .. code-tab:: bash ppc64le + + module load cuda + + module load cuda/12.0.1 + module load cuda/11.5.1 + module load cuda/11.4.1 + module load cuda/11.3.1 + module load cuda/11.2.2 + module load cuda/10.2.89 + module load cuda/10.1.243 + + .. code-tab:: bash aarch64 + + module load cuda - module load cuda/12.0.1 - module load cuda/11.5.1 - module load cuda/11.4.1 - module load cuda/11.3.1 - module load cuda/11.2.2 - module load cuda/10.2.89 - module load cuda/10.1.243 + module load cuda/12.3.2 + module load cuda/12.2.2 + module load cuda/12.1.1 + module load cuda/11.8.0 + module load cuda/11.7.0 + module load cuda/11.7.1 For further information please see the `CUDA Toolkit Archive `__. @@ -37,14 +50,8 @@ The C++ dialect used for host and device code can be controlled using the ``--st * ``c++03`` * ``c++11`` * ``c++14`` - -CUDA ``>= 11.0`` also accepts - -* ``c++17`` - -CUDA ``>= 12.0`` also accepts - -* ``c++20`` +* ``c++17`` (CUDA 11+) +* ``c++20`` (CUDA 12+) The default C++ dialect depends on the host compiler, with ``nvcc`` matching the default dialect by the host c++ compiler. @@ -105,22 +112,47 @@ Bede contains NVIDIA Tesla V100 and Tesla T4 GPUs, which are `compute capability To generate optimised code for both GPU models in Bede, the following ``-gencode`` options can be passed to ``nvcc``: -.. code-block:: bash +.. tabs:: + + .. code-tab:: bash ppc64le + + nvcc -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -o main main.cu + + .. code-tab:: bash aarch64 + + # nvcc >= 11.8 + nvcc -gencode=arch=compute_90,code=sm_90 -o main main.cu + # nvcc < 11.8 + nvcc -gencode=arch=compute_80,code=compute_80 -o main main.cu - nvcc -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -o main main.cu Alternatively, to reduce compile time and binary size a single ``-gencode`` option can be passed. -If only compute capability ``70`` is selected, code will be optimised for Volta GPUs, but will execute on Volta and Turing GPUs. +.. tabs:: -If only compute capability ``75`` is selected, code will be optimised for Turing GPUs, but it will not be executable on Volta GPUs. + .. group-tab:: ppc64le -.. code-block:: bash + If only compute capability ``70`` is selected, code will be optimised for Volta GPUs, but will execute on Volta and Turing GPUs. + + If only compute capability ``75`` is selected, code will be optimised for Turing GPUs, but it will not be executable on Volta GPUs. + + .. code-block:: bash + + # Optimise for V100 GPUs, executable on T4 GPUs + nvcc -gencode=arch=compute_70,code=sm_70 -o main main.cu + # Optimise for T4 GPUs, not executable on V100 GPUs + nvcc -gencode=arch=compute_75,code=sm_75 -o main main.cu + + .. group-tab:: aarch64 + + ``aarch64`` nodes in Bede only contain Hopper GPUs, so there is only need to provide a single compute capability (``90``, or embedding PTX for compute capability ``80``) + + .. code-block:: bash - # Optimise for V100 GPUs, executable on T4 GPUs - nvcc -gencode=arch=compute_70,code=sm_70 -o main main.cu - # Optimise for T4 GPUs, not executable on V100 GPUs - nvcc -gencode=arch=compute_75,code=sm_75 -o main main.cu + # nvcc >= 11.8 + nvcc -gencode=arch=compute_90,code=sm_90 -o main main.cu + # nvcc < 11.8 + nvcc -gencode=arch=compute_80,code=compute_80 -o main main.cu For more information on the use of ``-gencode``, ``-arch`` and ``-code`` please see the `NVCC Documentation `__. @@ -195,7 +227,7 @@ The automatic use of ``gcc`` / ``g++`` from the path may be overridden using the This option can be used to specify the directory in which the host compiler resides, and optionally may include the binary name itself, if for instance you wish to use ``clang++`` or ``xl`` as your host C++ compiler. -e.g. to use ``xlc++`` as the host compiler for the default CUDA module: +e.g. to use ``xlc++`` as the host compiler for the default CUDA module (on ``ppc64le`` nodes): .. code-block:: bash @@ -208,4 +240,4 @@ e.g. to use ``xlc++`` as the host compiler for the default CUDA module: This behaviour can be prevented using the ``--allow-unsupported-compiler`` / ``-allow-unsupported-compiler`` option (`docs `__), however, this may result in incorrect binaries. Use at your own risk. A list of officially supported host compilers can be found in the `CUDA Installation Guide for Linux `__, for the appropriate CUDA version. -For Bede, refer to the Power 9 section of the table with RHEL for the operating system. \ No newline at end of file +For Bede, refer to the Power 9 and aarch64 sections of the table with RHEL for the operating system. \ No newline at end of file diff --git a/software/compilers/nvhpc.rst b/software/compilers/nvhpc.rst index 7abd09b..9ae2168 100644 --- a/software/compilers/nvhpc.rst +++ b/software/compilers/nvhpc.rst @@ -11,12 +11,20 @@ It provides the ``nvc``, ``nvc++`` and ``nvfortran`` compilers. This module also provides the `NCCL `__ and `NVSHMEM `__ libraries, as well as the suite of math libraries typically included with the CUDA Toolkit, such as ``cublas``, ``cufft`` and ``nvblas``. -.. code-block:: bash +.. tabs:: - module load nvhpc + .. code-tab:: bash ppc64le - module load nvhpc/23.1 - module load nvhpc/22.1 - module load nvhpc/21.5 + module load nvhpc + + module load nvhpc/23.1 + module load nvhpc/22.1 + module load nvhpc/21.5 + + .. code-tab:: bash aarch64 + + module load nvhpc + + module load nvhpc/24.1 For further information please see the `NVIDIA HPC SDK Documentation Archive `__. diff --git a/software/environments/cryo-em.rst b/software/environments/cryo-em.rst index 5f67e1e..56cfeb6 100644 --- a/software/environments/cryo-em.rst +++ b/software/environments/cryo-em.rst @@ -3,6 +3,9 @@ Cryo-EM Software Environment ============================ +.. |arch_availabilty_name| replace:: The Cryo-EM Software Environment +.. include:: /common/ppc64le-only-sidebar.rst + Documentation on the the Cryo-EM Software Environment for Life Sciences is available :download:`here `. Note that this document is mainly based on the installation on `Satori `_ and might have some inconsistencies with the Bede installation. diff --git a/software/index.rst b/software/index.rst index 4957dfe..87ae582 100644 --- a/software/index.rst +++ b/software/index.rst @@ -9,7 +9,7 @@ These pages list software available on Bede and/or instructions on how to instal If you notice any omissions, errors or have any suggested changes to the documentation please create an `Issue `__ or open a `Pull Request `__ on GitHub. .. toctree:: - :maxdepth: 3 + :maxdepth: 2 :name: softwaretoc applications/index diff --git a/software/libraries/blas-lapack.rst b/software/libraries/blas-lapack.rst index 8bf41e8..d68eabf 100644 --- a/software/libraries/blas-lapack.rst +++ b/software/libraries/blas-lapack.rst @@ -3,6 +3,7 @@ BLAS/LAPACK =========== + The following numerical libraries provide optimised CPU implementations of BLAS and LAPACK on the system: - `ESSL `__ (IBM Engineering and Scientific Subroutine Library) @@ -10,16 +11,37 @@ The following numerical libraries provide optimised CPU implementations of BLAS The modules for each of these libraries provide some convenience environment variables: ``N8CIR_LINALG_CFLAGS`` contains the compiler arguments to link BLAS and LAPACK to C code; ``N8CIR_LINALG_FFLAGS`` contains the same to link to Fortran. When used with variables such as ``CC``, commands to build software can become entirely independent of what compilers and numerical libraries you have loaded, eg. for ESSL: -.. code-block:: bash +.. tabs:: + + .. code-tab:: bash ppc64le + + module load gcc essl/6.2 + $CC -o myprog myprog.c $N8CIR_LINALG_CFLAGS - module load gcc essl/6.2 - $CC -o myprog myprog.c $N8CIR_LINALG_CFLAGS + .. group-tab:: aarch64 + + .. |arch_availabilty_name| replace:: ESSL + .. include:: /common/ppc64le-only.rst Or for OpenBLAS: -.. code-block:: bash +.. tabs:: + + .. code-tab:: bash ppc64le + + module load gcc openblas/0.3.10 + $CC -o myprog myprog.c $N8CIR_LINALG_CFLAGS + + .. code-tab:: bash aarch64 + + module load gcc openblas/0.3.26 + $CC -o myprog myprog.c $N8CIR_LINALG_CFLAGS + + # or + module load gcc openblas/0.3.26omp + $CC -o myprog myprog.c $N8CIR_LINALG_CFLAGS + + - module load gcc openblas/0.3.10 - $CC -o myprog myprog.c $N8CIR_LINALG_CFLAGS diff --git a/software/libraries/boost.rst b/software/libraries/boost.rst index c8fffa7..d83ea76 100644 --- a/software/libraries/boost.rst +++ b/software/libraries/boost.rst @@ -5,8 +5,17 @@ Boost A centrally-installed version is available via the modules system, which can be loaded as follows: -.. code-block:: bash +.. tabs:: - module load boost - module load boost/1.81.0 - module load boost/1.74.0 + .. code-tab:: bash ppc64le + + module load boost + + module load boost/1.81.0 + module load boost/1.74.0 + + .. code-tab:: bash aarch64 + + module load boost + + module load boost/1.84.0 diff --git a/software/libraries/fftw.rst b/software/libraries/fftw.rst index 5d777e4..ea20c13 100644 --- a/software/libraries/fftw.rst +++ b/software/libraries/fftw.rst @@ -7,7 +7,16 @@ FFTW A centrally-installed version of FFTW can be loaded via ``module``: -.. code-block:: bash +.. tabs:: - module load fftw - module load fftw/3.3.8 + .. code-tab:: bash ppc64le + + module load fftw + + module load fftw/3.3.8 + + .. code-tab:: bash aarch64 + + module load fftw + + module load fftw/3.3.10 \ No newline at end of file diff --git a/software/libraries/hdf5.rst b/software/libraries/hdf5.rst index 61b97d0..d0c3c20 100644 --- a/software/libraries/hdf5.rst +++ b/software/libraries/hdf5.rst @@ -7,10 +7,19 @@ When loaded in conjunction with an MPI module such as ``openmpi``, the ``hdf5`` module provides both the serial and parallel versions of the library. -.. code-block:: bash +.. tabs:: + + .. code-tab:: bash ppc64le - module load hdf5 - module load hdf5/1.10.7 + module load hdf5 + + module load hdf5/1.10.7 + + .. code-tab:: bash aarch64 + + module load hdf5 + + module load hdf5/1.10.11 .. _software-libraries-hdf5-known-issues: diff --git a/software/libraries/mpi.rst b/software/libraries/mpi.rst index af134f9..89db0ab 100644 --- a/software/libraries/mpi.rst +++ b/software/libraries/mpi.rst @@ -15,36 +15,72 @@ We commit to the following convention for all MPIs we provide as modules: CUDA-enabled MPI is available through OpenMPI, when a cuda module is loaded alongside ``openmpi``, I.e. -.. code-block:: bash +.. tabs:: - module load gcc cuda openmpi + .. code-tab:: bash ppc64le + + module load gcc cuda openmpi + + .. code-tab:: bash aarch64 + + module load gcc cuda openmpi OpenMPI is provided by the ``openmpi`` module(s): -.. code-block:: bash - module load openmpi - module load openmpi/4.0.5 +.. tabs:: + + .. code-tab:: bash ppc64le + module load openmpi + + module load openmpi/4.0.5 + + .. code-tab:: bash ppc64le + + module load openmpi + + module load openmpi/4.1.6 + +.. |arch_availabilty_name| replace:: MVAPICH2 +.. include:: /common/ppc64le-only.rst MVAPICH2 is provided by the `mvapich2` module(s): -.. code-block:: bash +.. tabs:: + + .. code-tab:: bash ppc64le - module load mvapich2/2.3.5-2 + module load mvapich2 + + module load mvapich2/2.3.5-2 + + .. group-tab:: aarch64 + + .. admonition:: ppc64le partitions only + :class: warning + + mvapich2-gdr is only provided on ``ppc64le`` partitions/nodes (``gpu``, ``infer``, ``test``). + However, we plan to provide a ``mvapich-plus`` Smodule in future to provide this functionality. In the meantime, if this is of interest, please contact us. .. note:: There are a number of issues with OpenMPI 4 and the one-sided MPI communication features added by the MPI-3 standard. These features are typically useful when combined with GPUs, due to the asynchronous nature of the CUDA and OpenCL programming models. - For codes that require these features, we currently recommend using the ``mvapich2`` module. + For codes that require these features, we currently recommend using the ``mvapich2`` module on ``ppc64le`` nodes/partitions. -We also offer the ``mvapich2-gdr/2.3.6`` module. This is a version of MVAPICH2 that is specifically designed for machines like Bede, providing optimised communications directly between GPUs - even when housed in different compute nodes. +We also offer the ``mvapich2-gdr/2.3.6`` module on ``ppc64le`` nodes/partitions. This is a version of MVAPICH2 that is specifically designed for machines like Bede, providing optimised communications directly between GPUs - even when housed in different compute nodes. Unlike the ``openmpi`` and ``mvapich2`` modules, ``mvapich2-gdr`` does not adapt itself to the currently loaded compiler and CUDA modules. This version of the software was built using GCC 8.4.1 and CUDA 11.3. -.. code-block:: bash - - module load mvapich2-gdr/2.3.6 gcc/8.4.0 cuda/11.3.1 +.. tabs:: + + .. code-tab:: bash ppc64le + + module load mvapich2-gdr/2.3.6 gcc/8.4.0 cuda/11.3.1 + + .. group-tab:: aarch64 + + .. include:: /common/ppc64le-only.rst Further information can be found on the `MVAPICH2-GDR `__ pages. diff --git a/software/libraries/netcdf.rst b/software/libraries/netcdf.rst index 68b899d..94a6fdc 100644 --- a/software/libraries/netcdf.rst +++ b/software/libraries/netcdf.rst @@ -3,6 +3,9 @@ NetCDF ====== +.. |arch_availabilty_name| replace:: NetCDF +.. include:: /common/ppc64le-only-sidebar.rst + `Network Common Data Form (NetCDF) `__ is a set of software libraries and machine independent data formats for array-orientated scientific data. A centrally installed version of NetCDF is provided on Bede by the ``netcdf`` module(s). diff --git a/software/libraries/nvtoolsext.rst b/software/libraries/nvtoolsext.rst index b2e7740..3bf7f48 100644 --- a/software/libraries/nvtoolsext.rst +++ b/software/libraries/nvtoolsext.rst @@ -11,7 +11,7 @@ These markers and ranges can be used to increase the usability of the NVIDIA pro The location of the headers and shared libraries may vary between Operating Systems, and CUDA installation (i.e. CUDA toolkit, PGI compilers or HPC SDK). -On Bede, nvToolsExt is provided by the :ref:`CUDA ` and :ref:`NVHPC ` modules: +On Bede, ``nvToolsExt`` is provided by the :ref:`CUDA ` and :ref:`NVHPC ` modules: .. code-block:: bash diff --git a/software/libraries/plumed.rst b/software/libraries/plumed.rst index 2a0cce2..9e7e9bd 100644 --- a/software/libraries/plumed.rst +++ b/software/libraries/plumed.rst @@ -3,6 +3,9 @@ PLUMED ------ +.. |arch_availabilty_name| replace:: PLUMED +.. include:: /common/ppc64le-only-sidebar.rst + `PLUMED `__, the community-developed PLUgin for MolEcular Dynamics, is a an open-source, community-developed library that provides a wide range of different methods, which include: * enhanced-sampling algorithms diff --git a/software/libraries/vtk.rst b/software/libraries/vtk.rst index ac992b3..6914f28 100644 --- a/software/libraries/vtk.rst +++ b/software/libraries/vtk.rst @@ -3,6 +3,9 @@ VTK --- +.. |arch_availabilty_name| replace:: VTK +.. include:: /common/ppc64le-only-sidebar.rst + `The Visualization Toolkit (VTK) `__ is open source software for manipulating and displaying scientific data. The ``vtk`` module can be loaded by one of the following: diff --git a/software/projects/hecbiosim.rst b/software/projects/hecbiosim.rst index dafc1ad..d35712a 100644 --- a/software/projects/hecbiosim.rst +++ b/software/projects/hecbiosim.rst @@ -3,6 +3,9 @@ HECBioSim ========= +.. |arch_availabilty_name| replace:: The HECBioSim Software environment +.. include:: /common/ppc64le-only-sidebar.rst + The `HEC BioSim consortium `__ focusses on molecular simulations, at a variety of time and length scales but based on well-defined physics to complement experiment. The unique insight they can provide gives molecular level understanding of how biological macromolecules function. Simulations are crucial in analysing protein folding, mechanisms of biological catalysis, and how membrane proteins interact with lipid bilayers. diff --git a/software/projects/ibm-collaboration.rst b/software/projects/ibm-collaboration.rst index ecc721a..47afc70 100644 --- a/software/projects/ibm-collaboration.rst +++ b/software/projects/ibm-collaboration.rst @@ -3,6 +3,10 @@ IBM Collaboration ================= +.. |arch_availabilty_name| replace:: The "IBM Collaboration" Software environment +.. include:: /common/ppc64le-only-sidebar.rst + + On Bede, the ``ibm-collaboration`` project provides several software packages which were produced in collaboration with the system vendor `IBM `__. * :ref:`Cryo-EM ` - a collection of software packages for life sciences including: diff --git a/software/tools/apptainer.rst b/software/tools/apptainer.rst new file mode 100644 index 0000000..ab04a2d --- /dev/null +++ b/software/tools/apptainer.rst @@ -0,0 +1,54 @@ +.. _software-tools-apptainer: + +Apptainer +--------- + +`Apptainer `__ is a container platform similar to `Docker `__, previously known as Singularity. +It is a widely used container system for HPC, which allows you to create and run containers that package up pieces of software in a way that is portable and reproducible. + +Container platforms allow users to create and use container images, which are self-contained software stacks. + +.. admonition:: aarch64 partitions only + :class: warning + + Apptainer is only available on ``aarch64`` nodes/partitions within Bede. + + For ``ppc64le`` partitions, please see :ref:`Singularity CE`. + +Apptainer is provided by default on ``aarch64`` nodes/partitions, and can be used without loading any modules. + +.. code-block::bash + + apptainer --version + +.. note:: + Container images are not portable across CPU architectures. Containers created on ``x86_64`` machines may not be compatible with the ``ppc64le`` and ``aarch64`` nodes in Bede. + + +For more information on how to use singularity, please see the `Apptainer Documentation `__. + +Differences from Singularity CE +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Although Apptainer and Singularity share a common history, there are a number of important differences, as `documented by apptainer `_, including: + +* ``SINGULARITY_`` prefixed environment variables may issues warnings, preferring to be prefixed with ``APPTAINER_`` +* The ``singularity`` command/binary is still available, but is just a symlink to ``apptainer`` +* The ``library://`` protocol is not supported by apptainer's default configuration. See `Restoring pre-Apptainer library behaviour `_ for more information. + +.. _software-tools-apptainer-rootless: + +Rootless Container Builds +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The apptainer installation on Bede's ``aarch64`` nodes supports the creation of container images from apptainer definition files or docker containers without the need for root. + +I.e. it is possible to build your ``aarch64`` containers on the ``ghlogin`` interactive sessions rather than having to create containers on ``aarch64`` machines elsewhere and copying them into bede. + +.. _software-tools-apptainer-cachedir: + +``APPTAINER_CACHEDIR`` +^^^^^^^^^^^^^^^^^^^^^^ + +Container images for GPU acceleated code are often very large, and if creating containers based on a docker image multiple copies of th container will exist in your file stores. +Consider setting the ``APPTAINER_CACHEDIR`` environment variable to a location in ``/nobackup`` or ``/projects`` to avoid filling your home directory. \ No newline at end of file diff --git a/software/tools/cmake.rst b/software/tools/cmake.rst index 8b17a0a..40a10b0 100644 --- a/software/tools/cmake.rst +++ b/software/tools/cmake.rst @@ -10,11 +10,17 @@ The suite of CMake tools were created by `Kitware `__ CMake is part of Kitware’s collection of commercially supported `open-source platforms `__ for software development. -.. code-block:: bash +.. tabs:: - module load cmake - module load cmake/3.18.4 + .. code-tab:: bash ppc64le -Once loaded, the ``cmake``, ``ccmake``, ``cpack`` and ``ctest`` binaries are available for use, to configure, build and test software which uses CMake as the build system. + module load cmake + module load cmake/3.18.4 + + .. group-tab:: aarch64 + + On ``aarch64`` nodes, CMake ``3.20.2`` is provided by default. + +This provides the ``cmake``, ``ccmake``, ``cpack`` and ``ctest`` binaries are available for use, to configure, build and test software which uses CMake as the build system. For more information, see the `online documentation `__. \ No newline at end of file diff --git a/software/tools/make.rst b/software/tools/make.rst index 2d77206..a5a5910 100644 --- a/software/tools/make.rst +++ b/software/tools/make.rst @@ -8,12 +8,20 @@ Make Make gets its knowledge of how to build your program from a file called the makefile, which lists each of the non-source files and how to compute it from other files. When you write a program, you should write a makefile for it, so that it is possible to use Make to build and install the program. -On Bede, ``make 4.2`` is provided by default. -A more recent version of ``make``, is provided by the ``make`` family of modules. +.. tabs:: -.. code-block:: bash + .. group-tab:: ppc64le - module load make - module load make/4.3 + On Bede's ``ppc64le`` nodes, ``make 4.2`` is provided by default, and + A more recent version of ``make``, is provided by the ``make`` family of modules. + + .. code-block:: bash + + module load make + module load make/4.3 + + .. group-tab:: aarch64 + + On the ``aarch64`` nodes, ``make 4.3`` is provided by default without the need for a ``module load`` For more information on the usage of ``make``, see the `online documentation `__ or run ``man make`` after loading the module. \ No newline at end of file diff --git a/software/tools/nsight-compute.rst b/software/tools/nsight-compute.rst index c8acb1d..dc1cbfd 100644 --- a/software/tools/nsight-compute.rst +++ b/software/tools/nsight-compute.rst @@ -11,21 +11,36 @@ Remote GUI is not available on Bede, but profile data can be generated on Bede v You should use a versions of ``ncu`` that is at least as new as the CUDA toolkit used to compile your application (if appropriate). -.. code-block:: bash +.. tabs:: + + .. code-tab:: bash ppc64le + + module load nsight-compute/2022.4.1 + module load nsight-compute/2022.1.0 + module load nsight-compute/2020.2.1 + + module load cuda/12.0.1 # provides ncu 2022.4.1 + module load cuda/11.5.1 # provides ncu 2021.3.1 + module load cuda/11.4.1 # provides ncu 2021.2.1 + module load cuda/11.3.1 # provides ncu 2021.1.1 + module load cuda/11.2.2 # provides ncu 2020.3.1 + + module load nvhpc/23.1 # provides ncu 2022.4.0 + module load nvhpc/22.1 # provides ncu 2021.3.0 + module load nvhpc/21.5 # provides ncu 2021.1.0 + + .. code-tab:: bash aarch64 - module load nsight-compute/2022.4.1 # provides ncu 2022.4.1 - module load nsight-compute/2022.1.0 # provides ncu 2022.1.0 - module load nsight-compute/2020.2.1 # provides ncu 2020.2.1 + module load nsight-systems/2023.4.1 - module load cuda/12.0.1 # provides ncu 2022.4.1 - module load cuda/11.5.1 # provides ncu 2021.3.1 - module load cuda/11.4.1 # provides ncu 2021.2.1 - module load cuda/11.3.1 # provides ncu 2021.1.1 - module load cuda/11.2.2 # provides ncu 2020.3.1 + module load cuda/12.3.2 # provides ncu 2023.3.1 + module load cuda/12.2.2 # provides ncu 2023.2.2 + module load cuda/12.1.1 # provides ncu 2023.1.1 + module load cuda/11.8.0 # provides ncu 2022.3.0 + module load cuda/11.7.1 # provides ncu 2022.1.0 + module load cuda/11.7.0 # provides ncu 2022.2.0 - module load nvhpc/23.1 # provides ncu 2022.4.0 - module load nvhpc/22.1 # provides ncu 2021.3.0 - module load nvhpc/21.5 # provides ncu 2021.1.0 + module load nvhpc/24.1 # provides ncu 2023.3.1 Consider compiling your CUDA application using ``nvcc`` with ``-lineinfo`` or ``--generate-line-info`` to generate line-level profile information. diff --git a/software/tools/nsight-systems.rst b/software/tools/nsight-systems.rst index f48e1a9..b086f6e 100644 --- a/software/tools/nsight-systems.rst +++ b/software/tools/nsight-systems.rst @@ -12,21 +12,36 @@ The GUI is not available on Bede. On Bede, Nsight Systems is provided by a number of modules, with differing versions of ``nsys``. You should use a versions of ``nsys`` that is at least as new as the CUDA toolkit used to compile your application (if appropriate). -.. code-block:: bash +.. tabs:: + + .. code-tab:: bash ppc64le + + module load nsight-systems/2023.1.1 + module load nsight-systems/2022.1.1 + module load nsight-systems/2020.3.1 + + module load cuda/12.0.1 # provides nsys 2022.4.2 + module load cuda/11.5.1 # provides nsys 2021.3.3 + module load cuda/11.4.1 # provides nsys 2021.2.4 + module load cuda/11.3.1 # provides nsys 2021.1.3 + module load cuda/11.2.2 # provides nsys 2020.4.3 + + module load nvhpc/23.1 # provides nsys 2022.5.1 + module load nvhpc/22.1 # provides nsys 2021.5.1 + module load nvhpc/21.5 # provides nsys 2021.2.1 + + .. code-tab:: bash aarch64 - module load nsight-systems/2023.1.1 # provides nsys 2023.1.1 - module load nsight-systems/2022.1.1 # provides nsys 2022.1.1 - module load nsight-systems/2020.3.1 # provides nsys 2020.3.1 + module load nsight-systems/2023.4.1 - module load cuda/12.0.1 # provides nsys 2022.4.2 - module load cuda/11.5.1 # provides nsys 2021.3.3 - module load cuda/11.4.1 # provides nsys 2021.2.4 - module load cuda/11.3.1 # provides nsys 2021.1.3 - module load cuda/11.2.2 # provides nsys 2020.4.3 + module load cuda/12.3.2 # provides nsys 2023.3.3 + module load cuda/12.2.2 # provides nsys 2023.2.3 + module load cuda/12.1.1 # provides nsys 2023.1.2 + module load cuda/11.8.0 # provides nsys 2022.4.2 + module load cuda/11.7.1 # provides nsys 2022.1.3 + module load cuda/11.7.0 # provides nsys 2022.1.3 - module load nvhpc/23.1 # provides nsys 2022.5.1 - module load nvhpc/22.1 # provides nsys 2021.5.1 - module load nvhpc/21.5 # provides nsys 2021.2.1 + module load nvhpc/24.1 # provides nsys 2023.4.1 To generate an application timeline with Nsight Systems CLI (``nsys``): diff --git a/software/tools/singularity.rst b/software/tools/singularity.rst index 52252b8..e74beeb 100644 --- a/software/tools/singularity.rst +++ b/software/tools/singularity.rst @@ -1,22 +1,30 @@ .. _software-tools-singularity: -Singularity ------------ +Singularity CE +-------------- -`Singularity `__ (and `Apptainer `__) is a container platform similar to `Docker `__. -Singularity is the most widely used container system for HPC. +`Singularity CE `__ (and `Apptainer `__) is a container platform similar to `Docker `__. +Singularity is one of the most widely used container system for HPC. It allows you to create and run containers that package up pieces of software in a way that is portable and reproducible. Container platforms allow users to create and use container images, which are self-contained software stacks. -.. note:: - As Bede is a Power 9 Architecture (``ppc64le``) machine, containers created on more common ``x86_64`` machines may not be compatible. +.. admonition:: ppc64le partitions only + :class: warning + + Singularity CE is only available on ``ppc64le`` nodes/partitions within Bede. + + For ``aarch64`` partitions, please see :ref:`Apptainer`. -Singularity-ce is provided by default, and can be used without loading any modules. + +Singularity CE is provided by default on ``ppc64le`` nodes/partitions, and can be used without loading any modules. .. code-block::bash singularity --version -For more information on how to use singularity, please see the `Singularity Documentation `__. +.. note:: + Container images are not portable across CPU architectures. Containers created on ``x86_64`` machines may not be compatible with the ``ppc64le`` and ``aarch64`` nodes in Bede. + +For more information on how to use singularity, please see the `Singularity Documentation `__. diff --git a/usage/index.rst b/usage/index.rst index 1440d2b..10bfd7d 100644 --- a/usage/index.rst +++ b/usage/index.rst @@ -24,7 +24,7 @@ Login ----- Bede offers SSH and X2GO services running on host ``bede.dur.ac.uk`` (which -fronts the two login nodes, ``login1.bede.dur.ac.uk`` and +fronts the two ``ppc64le`` login nodes, ``login1.bede.dur.ac.uk`` and ``login2.bede.dur.ac.uk``). SSH or X2GO should be used for all interaction with the machine (including shell access, file transfer and graphics). @@ -183,10 +183,48 @@ the service and provide this information in a more responsive format in the future. +Node Architectures and Partitions +--------------------------------- + +As described on the :ref:`hardware` page, Bede contains a mix of nodes using 2 CPU architectures and 3 models of NVIDIA GPU. Software must be compiled for each CPU architecture, and not all software is available, provided or compatible with each architecture. + +Bede's original nodes contain Power 9 CPUs (``ppc64le``), with Nvidia Volta and Turing architecture GPUs (``SM_70`` & ``sm_75``). +Jobs in the ``gpu``, ``test`` and ``infer`` partitions will run on ``ppc64le`` architecture nodes. + +The newer Grace Hopper open pilot include `NVIDIA Grace Hopper Superchips `_ which are composed of an ARM CPU (``aarch64``) and an NVIDIA Hopper GPU (``sm_90``). +Jobs in the ``ghlogin``, ``gh`` and ``ghtest`` partitions will run on the ``aarch64`` architecture nodes. + + +.. _usage_connecting_ghlogin: + +Connecting to the ``ghlogin`` node +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To get an interactive login-session on a Grace-Hopper node in the ``ghlogin`` partition, you must connect to Bede's regular login nodes as usual via ssh / x2go. + +Once connected, the ``ghlogin`` command can be used to request an interactive session on the ``ghlogin`` node. +The login environment includes shared (unrestricted) access to the Hopper GPU, and by default will provide 4 CPU cores and 16GB of RAM for 8 hours. +Use additional srun style flags to request a different duration or resources. +You must provide your project account. + + +.. code-block:: bash + + # Request a default login session 4 cores, 16GB, 8 hours + ghlogin -A + # Request 4 hours with 8 cores and 24GB of memory + ghlogin -A --time 4:00:00 -c 8 --mem 24G + + +This will provide shell access to the login environment, which is a single Grace Hopper superchip. +Access is mediated by slurm and you'll have a default of 4 cores and 1GB RAM for 8 hours (amend by adding srun style flags to the ``ghlogin`` command). +Access to the GPU in the login environment is currently unrestricted. + + Running Jobs ------------ -Access beyond the two login node systems should only be done through the +Access beyond the login nodes should only be done through the Slurm batch scheduler, by packaging your work into units called jobs. A job consists of a shell script, called a job submission script, @@ -195,75 +233,153 @@ some specially formatted comment lines are added to the file, describing how much time and resources the job needs. Resources are requested in terms of the type of node, the number of GPUs -per node (for each GPU requested, the job receives 25% of the node’s -CPUs and RAM) and the number of nodes required. +per node and the number of nodes required. +The job will then recieve the corrsponding fraction of the node. + +For example: + +* a job requesting ``2`` GPUs on a ``gpu`` node (containing 4 V100 GPUs) will recieve ``2/4`` of the CPU cores (``20`` / ``40`` cores) and memory (``256`` / ``512GB``). +* a job requesting ``1`` GPU on a ``gh`` node (containing ``1`` GH200) will recieve the full ``72`` cores and ``480GB`` of memory. -There is a test partition, providing priority access to jobs requesting -up to two nodes (8x V100 GPUs) for 15 minutes to allow experimentation, -especially for jobs trying to make use of Bede's architecture for -multi-GPU, multi-node use. +Higher Priority access for short test jobs requesting up to ``2`` nodes (8x V100 GPUs or 2x GH200 GPUS) for up to ``30`` minutes, to allow experimentationespecially for jobs trying to make use of Bede's architecture for multi-GPU, multi-node use. There are a number of example job submission scripts below. +.. _usage_requesting_resources: + Requesting resources ~~~~~~~~~~~~~~~~~~~~ +Batch jobs for the ``gpu``, ``infer`` and ``test`` partitions should be submit from the ``ppc64le`` login nodes via ``sbatch`` or ``srun``. + + +To submit a job to the ``gh`` and ``ghtest`` partitions, you can use ``sbatch`` or ``srun`` as normal from within a ``ghlogin`` session. +Alternatively, use the ``ghbatch`` or ``ghrun`` commands from a Bede login node. + + Part of, or an entire node ^^^^^^^^^^^^^^^^^^^^^^^^^^ -Example job script for programs written to take advantage of a GPU or -multiple GPUs on a single computer: +.. tabs:: -.. code-block:: bash + .. group-tab:: ppc64le + + Example job script for programs written to take advantage of a GPU or + multiple GPUs on a single ppc64le node: + + .. code-block:: bash + + #!/bin/bash + + # Generic options: + + #SBATCH --account= # Run job under project + #SBATCH --time=1:0:0 # Run for a max of 1 hour + + # Node resources: + # (choose between 1-4 gpus per node) + + #SBATCH --partition=test # Choose either "gpu", "test" or "infer" partition type + #SBATCH --nodes=1 # Resources from a single node + #SBATCH --gres=gpu:1 # One GPU per node (plus 25% of node CPU and RAM per GPU) + + # Run commands: + + nvidia-smi # Display available gpu resources + nproc # Display available CPU cores + + # Place other commands here + + echo "end of job" + + + .. group-tab:: aarch64 + + Example job script for programs written to take advantage of single GPU on a single aarch64 node (1 GPU per node): - #!/bin/bash + .. code-block:: bash - # Generic options: + #!/bin/bash - #SBATCH --account= # Run job under project - #SBATCH --time=1:0:0 # Run for a max of 1 hour + # Generic options: - # Node resources: - # (choose between 1-4 gpus per node) + #SBATCH --account= # Run job under project + #SBATCH --time=0:15:0 # Run for a max of 15 minutes - #SBATCH --partition=gpu # Choose either "gpu" or "infer" node type - #SBATCH --nodes=1 # Resources from a single node - #SBATCH --gres=gpu:1 # One GPU per node (plus 25% of node CPU and RAM per GPU) + # Node resources: + # 1 gpu per node - # Run commands: + #SBATCH --partition=ghtest # Choose either "gh" or "ghtest" + #SBATCH --nodes=1 # Resources from a single node + #SBATCH --gres=gpu:1 # One GPU per node (plus 100% of node CPU and RAM per GPU) - nvidia-smi # Display available gpu resources + # Run commands: - # Place other commands here + nvidia-smi # Display available gpu resources + nproc # Display available CPU cores - echo "end of job" + # Place other commands here + + echo "end of job" Multiple nodes (MPI) ^^^^^^^^^^^^^^^^^^^^ Example job script for programs using MPI to take advantage of multiple -CPUs/GPUs across one or more machines: +CPUs/GPUs across one or more machines, via ``bede-mpirun``: + +.. tabs:: + + .. group-tab:: ppc64le + + .. code-block:: bash + + #!/bin/bash + + # Generic options: + + #SBATCH --account= # Run job under project + #SBATCH --time=1:0:0 # Run for a max of 1 hour + + # Node resources: + + #SBATCH --partition=gpu # Choose either "gpu", "test" or "infer" partition type + #SBATCH --nodes=2 # Resources from two nodes + #SBATCH --gres=gpu:4 # Four GPUs per node (plus 100% of node CPU and RAM per node) + + # Run commands: + + bede-mpirun --bede-par 1ppc + + echo "end of job" -.. code-block:: bash - #!/bin/bash + .. group-tab:: aarch64 - # Generic options: + .. code-block:: bash - #SBATCH --account= # Run job under project - #SBATCH --time=1:0:0 # Run for a max of 1 hour + #!/bin/bash - # Node resources: + # Generic options: - #SBATCH --partition=gpu # Choose either "gpu" or "infer" node type - #SBATCH --nodes=2 # Resources from a two nodes - #SBATCH --gres=gpu:4 # Four GPUs per node (plus 100% of node CPU and RAM per node) + #SBATCH --account= # Run job under project + #SBATCH --time=1:0:0 # Run for a max of 1 hour - # Run commands: + # Node resources: - bede-mpirun --bede-par 1ppc + #SBATCH --partition=gh # Choose either "gh" or"ghtest" partition type + #SBATCH --nodes=2 # Resources from two nodes + #SBATCH --gres=gpu:1 # 1 GPU per node (plus 100% of node CPU and RAM per node) - echo "end of job" + # Run commands: + + bede-mpirun --bede-par 1ppc + + echo "end of job" + + .. note:: + + There are only ``2`` ``gh`` nodes currently available for batch jobs in Bede. As a result multi-node Grace-Hopper jobs may queue for a significant time. The ``bede-mpirun`` command takes both ordinary ``mpirun`` arguments and the special ``--bede-par `` option, allowing control over how @@ -298,6 +414,10 @@ Examples: # - One MPI rank per hwthread: bede-mpirun --bede-par 1ppt +.. note:: + + On ``aarch64``, the ``--1ppt`` option is synonymous with ``--1ppc`` due to the absence of hardware SMT. + .. _usage-maximum-job-runtime: Maximum Job Runtime @@ -360,23 +480,8 @@ Connecting to the ``ghlogin`` node To get an interactive login-session on a Grace-Hopper node in the ``ghlogin`` partition, you must connect to Bede's regular login nodes as usual via ssh / x2go. Once connected, the ``ghlogin`` command can be used to request an interactive session on the ``ghlogin`` node. -The login environment includes shared (unrestricted) access to the Hopper GPU, and by default will provide 4 CPU cores and 16GB of RAM for 8 hours. -Use additional srun style flags to request a different duration or resources. -You must provide your project account. - - -.. code-block:: bash - - # Request a default login session 4 cores, 16GB, 8 hours - ghlogin -A - # Request 4 hours with 8 cores and 24GB of memory - ghlogin -A --time 4:00:00 -c 8 --mem 24G - - -This will provide shell access to the login environment, which is a single Grace Hopper. -Access is mediated by slurm and you'll have a default of 4 cores and 1GB RAM for 8 hours (amend by adding srun style flags to theghlogin command). -Access to the GPU in the login environment is currently unrestricted. +Please see :ref:`usage_connecting_ghlogin` for more information. Batch Jobs ~~~~~~~~~~ @@ -386,36 +491,7 @@ Alternatively, use the ``ghbatch`` or ``ghrun`` commands from a Bede login node. Your job submission scripts should specify the ``--partition=gh`` or ``--partition=ghtest``. -As with the power9 nodes in Bede, resources are allocated based on the proportion of the node's GPUs you have requested. As there is only a single GPU per node, the full nodes resources will be available for your job. - -As there are only 2 Grace-Hopper nodes for batch jobs, queue times may be significant. - -.. code-block:: bash - - #!/bin/bash - - # Generic options: - - #SBATCH --account= # Run job under project - #SBATCH --time=1:0:0 # Run for a max of 1 hour - - # Node resources: - - #SBATCH --partition=gh # Choose either "gh" or "ghtest" node type for grace-hopper - #SBATCH --gres=gpu:1 # Request 1 GPU, and implicitly the full 72 CPUs and 100% of the nodes memory - - # Run commands: - - # Query nvidia-smi - nvidia-smi - - # Print the number of CPUS - nproc - - # List information about the cpu - lscpu - - echo "end of job" +Further details and example batch job submission scripts are provided in the :ref:`usage_requesting_resources` section above. Software availability ~~~~~~~~~~~~~~~~~~~~~ @@ -427,26 +503,22 @@ The Grace-Hopper nodes are also running a newer operating (Rocky 9) system than Use ``module avail`` from the ``ghlogin`` node to list centrally provided software modules for the grace-hopper nodes. -The Bede documentation does not currently contain grace-hopper specific software documentation, this will be added over time. -However, during pilot use the following has been discovered: +Documentation for :ref:`software provided centrally on Bede` now includes grace-hopper specific information. -* CUDA +Key differences to be aware of include: - * CUDA 11.8 is the first CUDA version which can target the Hopper GPU architecture ``SM_90``. - * Older CUDA versions (i.e. 11.7) will require embedding PTX for an older architecture (i.e. ``-gencode=arch=compute_80,code=compute_80``) +* :ref:`CUDA` -* Singularity / Apptainer - - * The Grace-Hopper nodes provide ``apptainer`` rather than ``singularity`` for container support and enables the use of ``--fakeroot`` to build containers on the aarch64 nodes directly. - * Usage is broadly the same, however there are some differences as `documented by apptainer `_ - - * ``SINGULARITY_`` prefixed environment variables may issues warnings, preferring to be prefixed with ``APPTAINER_`` - * The ``singularity`` command/binary is still available, but is just a symlink to ``apptainer`` - * The ``library://`` protocol is not supported by apptainer's default configuration. See `Restoring pre-Apptainer library behaviour `_ for more information. + * Code should be compiled for Compuate Capability ``90`` using CUDA 11.8+. + * CUDA 11.7 available on ``aarch64`` nodes should embed PTX for Compute Capability ``80`` (i.e. ``-gencode=arch=compute_80,code=compute_80``) +* :ref:`Singularity` / :ref:`Apptainer` + + * The Grace-Hopper nodes provide :ref:`software-tools-apptainer` rather than :ref:`software-tools-singularity` for container support and enables the use of ``--fakeroot`` to build containers on the aarch64 nodes directly. + * Usage is broadly the same, however there are some differences as `documented by apptainer `_ and on the :ref:`Apptainer page` * Container files are large, consider setting ``APPTAINER_CACHEDIR`` to avoid filling your home directory quota -* PyTorch +* :ref:`PyTorch` * Current (at least up to ``2.1.0``) builds of pytorch provided via conda or pip for ``aarch64`` do not include cuda support (``torch.cuda.is_available()`` returns ``false``). * NVIDIA provide `NGC Pytorch containers `_ which can be used instead, with pytorch installed into the default python environment. @@ -457,13 +529,10 @@ However, during pilot use the following has been discovered: * Or follow `PyTorch Building from Source instructions `_ -* ``gcc`` / ``g++`` psABI warnings - - * ``g++`` >= 10.1 compiling with ``--std=c++17`` mode may emit psABI warnings for parameter passing of certain types. These can be suppressed via ``--Wno-psabi``. - -* MPI +* :ref:`software-compilers-gcc` compiling with ``--std=c++17`` may emit psABI warnings. These can be suppressed via ``--Wno-psabi``. +* :ref:`MPI` - * The ``openmpi`` module is available, and CUDA support is enabled if you additionally load a CUDA module. The ``mpirun`` command should be used to launch programs, and not the ``bede-mpirun`` command. + * The ``openmpi`` module is available, and CUDA support is enabled if you additionally load a CUDA module. The ``bede-mpirun`` command should be used to launch programs. * The ``ppc64le`` equipment has an MPI with optimised multi-node GPU communications provided by the ``mvapich2-gdr`` module. This is not available for the Grace Hopper equipment; however, we plan to provide a ``mvapich-plus`` module in future to provide this functionality. In the meantime, if this is of interest, please contact us. Bash environment