From d84204aed74d6c44abeb20e35e1980aa17cc3b74 Mon Sep 17 00:00:00 2001 From: bcumming Date: Fri, 28 Jun 2024 10:14:00 +0200 Subject: [PATCH 1/6] update icon build env recipe for todi --- recipes/wcp/icon/v1/gh200/config.yaml | 2 +- recipes/wcp/icon/v1/gh200/environments.yaml | 56 +++++++-------------- scripts/setup-stackinator | 2 +- 3 files changed, 21 insertions(+), 39 deletions(-) diff --git a/recipes/wcp/icon/v1/gh200/config.yaml b/recipes/wcp/icon/v1/gh200/config.yaml index 810d76bc..54f1aecb 100644 --- a/recipes/wcp/icon/v1/gh200/config.yaml +++ b/recipes/wcp/icon/v1/gh200/config.yaml @@ -1,6 +1,6 @@ name: icon-wcp store: /user-environment spack: - commit: releases/v0.21 + commit: releases/v0.22 repo: https://github.com/spack/spack.git description: The tools required to build ICON diff --git a/recipes/wcp/icon/v1/gh200/environments.yaml b/recipes/wcp/icon/v1/gh200/environments.yaml index cf6daa72..28293df7 100644 --- a/recipes/wcp/icon/v1/gh200/environments.yaml +++ b/recipes/wcp/icon/v1/gh200/environments.yaml @@ -9,45 +9,21 @@ nvidia: spec: cray-mpich@8.1.29%nvhpc gpu: cuda specs: - - boost%gcc ~mpi - - python@3.10%gcc - #- eccodes@2.25.0%nvhpc +tools +fortran +aec +openmp jp2k=jasper - - cmake%gcc - - cuda@12.3%gcc - - hdf5%gcc - - hwloc%gcc - - netcdf-c%gcc - - netcdf-cxx4%gcc + - boost ~mpi + - python@3.10 + - cmake + - cuda@12.3 + - hdf5 + - hwloc + - netcdf-c + - netcdf-cxx4 - netcdf-fortran%nvhpc - - numactl%gcc + - numactl - osu-micro-benchmarks@5.9%nvhpc # everything needed for nccl on SS11 - - aws-ofi-nccl@master%gcc - - nccl%gcc - - nccl-tests%gcc - # The following are required to stop spack from using nvhpc to build - # basic dependencies, some of which don't compile with nvc etc. - # Explicitly excluded as modules. - - autoconf%gcc - - automake%gcc - - ca-certificates-mozilla%gcc - - diffutils%gcc - - gnuconfig%gcc - - libiconv%gcc - - libxcrypt%gcc - - libxml2%gcc - - m4%gcc - - ncurses%gcc - - openssl%gcc - - perl%gcc - - xz%gcc - - zlib%gcc - - zstd%gcc - - c-blosc%gcc - - libaec%gcc - - jasper%gcc - - patchelf%gcc - - gmake%gcc + - aws-ofi-nccl@master + - nccl + - nccl-tests variants: - cuda_arch=90 - +mpi @@ -55,6 +31,12 @@ nvidia: packages: - curl - gmake + - perl views: - default: + icon: link: roots + uenv: + add_compilers: true + prefix_paths: + LD_LIBRARY_PATH: [lib, lib64] + diff --git a/scripts/setup-stackinator b/scripts/setup-stackinator index 2a1010b0..87cdb30b 100755 --- a/scripts/setup-stackinator +++ b/scripts/setup-stackinator @@ -5,7 +5,7 @@ tool_base_path="$(pwd)" # builds in the same pipeline do not conflict tool_path=`mktemp -d` tool_repo=https://github.com/eth-cscs/stackinator.git -tool_version=master +tool_version=v5.0-rc1 log "installing stackinator from git in '${tool_path}'" From 5b1952e499f5ec78f463381225015f02c573e4be Mon Sep 17 00:00:00 2001 From: bcumming Date: Fri, 28 Jun 2024 10:42:20 +0200 Subject: [PATCH 2/6] add libfyaml, simplify module definitions --- recipes/wcp/icon/v1/gh200/environments.yaml | 1 + recipes/wcp/icon/v1/gh200/modules.yaml | 18 ------------------ 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/recipes/wcp/icon/v1/gh200/environments.yaml b/recipes/wcp/icon/v1/gh200/environments.yaml index 28293df7..2b337dae 100644 --- a/recipes/wcp/icon/v1/gh200/environments.yaml +++ b/recipes/wcp/icon/v1/gh200/environments.yaml @@ -15,6 +15,7 @@ nvidia: - cuda@12.3 - hdf5 - hwloc + - libfyaml - netcdf-c - netcdf-cxx4 - netcdf-fortran%nvhpc diff --git a/recipes/wcp/icon/v1/gh200/modules.yaml b/recipes/wcp/icon/v1/gh200/modules.yaml index db999335..d13c2474 100644 --- a/recipes/wcp/icon/v1/gh200/modules.yaml +++ b/recipes/wcp/icon/v1/gh200/modules.yaml @@ -21,23 +21,5 @@ modules: exclude: - '%gcc@7.5.0' - 'gcc %gcc@7.5.0' - - 'autoconf' - - 'automake' - - 'c-blosc' - - 'ca-certificates-mozilla' - - 'diffutils' - - 'gnuconfig' - - 'jasper' - - 'libaec' - - 'libiconv' - - 'libxcrypt' - - 'libxml2' - - 'patchelf' - - 'm4' - - 'ncurses' - - 'openssl' - - 'xz' - - 'zlib' - - 'zstd' projections: all: '{name}/{version}' From 792374281e9636b7e12069643b870ebdeb6007f1 Mon Sep 17 00:00:00 2001 From: bcumming Date: Mon, 1 Jul 2024 14:47:22 +0200 Subject: [PATCH 3/6] use spack v0.21 for c2sm-spack compatibility --- recipes/wcp/icon/v1/gh200/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/wcp/icon/v1/gh200/config.yaml b/recipes/wcp/icon/v1/gh200/config.yaml index 54f1aecb..810d76bc 100644 --- a/recipes/wcp/icon/v1/gh200/config.yaml +++ b/recipes/wcp/icon/v1/gh200/config.yaml @@ -1,6 +1,6 @@ name: icon-wcp store: /user-environment spack: - commit: releases/v0.22 + commit: releases/v0.21 repo: https://github.com/spack/spack.git description: The tools required to build ICON From 15f81f61d47e4289496c74e23c877095f765264d Mon Sep 17 00:00:00 2001 From: bcumming Date: Wed, 3 Jul 2024 09:16:31 +0200 Subject: [PATCH 4/6] add libxml2 as a root spec, unify concretisation --- recipes/wcp/icon/v1/gh200/environments.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/recipes/wcp/icon/v1/gh200/environments.yaml b/recipes/wcp/icon/v1/gh200/environments.yaml index 2b337dae..0ae6c960 100644 --- a/recipes/wcp/icon/v1/gh200/environments.yaml +++ b/recipes/wcp/icon/v1/gh200/environments.yaml @@ -4,23 +4,24 @@ nvidia: spec: gcc - toolchain: llvm spec: nvhpc - unify: when_possible + unify: true mpi: spec: cray-mpich@8.1.29%nvhpc gpu: cuda specs: - boost ~mpi - - python@3.10 - cmake - cuda@12.3 - hdf5 - hwloc + - libxml2 - libfyaml - netcdf-c - netcdf-cxx4 - netcdf-fortran%nvhpc - numactl - osu-micro-benchmarks@5.9%nvhpc + - python@3.10 # everything needed for nccl on SS11 - aws-ofi-nccl@master - nccl From 98185eed841cf4c41931df1e785c0604195e2a49 Mon Sep 17 00:00:00 2001 From: bcumming Date: Fri, 12 Jul 2024 09:25:45 +0200 Subject: [PATCH 5/6] add high level library to hdf5; bump mpi version; add eccodes; post-install=[add image hash, remove la files] --- recipes/wcp/icon/v1/gh200/environments.yaml | 5 ++-- recipes/wcp/icon/v1/gh200/post-install | 29 +++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 recipes/wcp/icon/v1/gh200/post-install diff --git a/recipes/wcp/icon/v1/gh200/environments.yaml b/recipes/wcp/icon/v1/gh200/environments.yaml index 0ae6c960..b7663a7c 100644 --- a/recipes/wcp/icon/v1/gh200/environments.yaml +++ b/recipes/wcp/icon/v1/gh200/environments.yaml @@ -6,13 +6,14 @@ nvidia: spec: nvhpc unify: true mpi: - spec: cray-mpich@8.1.29%nvhpc + spec: cray-mpich@8.1.30%nvhpc gpu: cuda specs: - boost ~mpi - cmake - cuda@12.3 - - hdf5 + - eccodes%nvhpc +tools +fortran +aec +openmp jp2k=jasper + - hdf5%nvhpc +hl - hwloc - libxml2 - libfyaml diff --git a/recipes/wcp/icon/v1/gh200/post-install b/recipes/wcp/icon/v1/gh200/post-install new file mode 100644 index 00000000..97658030 --- /dev/null +++ b/recipes/wcp/icon/v1/gh200/post-install @@ -0,0 +1,29 @@ +#!/usr/bin/python3 + +import glob +import hashlib +import os +import yaml + + +# Add cuda_arch to packages.yaml +with open("{{ env.mount }}/config/packages.yaml", mode='r') as file: + packages = yaml.safe_load(file) + +packages['packages']['all'] = {'variants': 'cuda_arch=90'} + +with open("{{ env.mount }}/config/packages.yaml", mode='w') as file: + yaml.dump(packages, file) + + +# Generate a hash to make the uenv uniquely identifiable +with open("{{ env.mount }}/.spack-db/index.json", mode='rb') as f: + sha = hashlib.sha256(f.read()).hexdigest() + +with open("{{ env.mount }}/meta/hash", mode='w') as f: + f.write(sha) + +# delete .la files that force linking against uneccesary libraries +for path in ["{{ mount }}/env/icon/lib", "{{ mount }}/env/icon/lib64"]: + for file in glob.glob(os.path.join(path, '*.la')): + os.remove(file) From 198f7ca2d9c21c158ed2cf64041dad448970bfdc Mon Sep 17 00:00:00 2001 From: bcumming Date: Fri, 12 Jul 2024 09:54:19 +0200 Subject: [PATCH 6/6] bug: post-install was not deleting libtool files --- recipes/wcp/icon/v1/gh200/post-install | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/wcp/icon/v1/gh200/post-install b/recipes/wcp/icon/v1/gh200/post-install index 97658030..a9843086 100644 --- a/recipes/wcp/icon/v1/gh200/post-install +++ b/recipes/wcp/icon/v1/gh200/post-install @@ -24,6 +24,6 @@ with open("{{ env.mount }}/meta/hash", mode='w') as f: f.write(sha) # delete .la files that force linking against uneccesary libraries -for path in ["{{ mount }}/env/icon/lib", "{{ mount }}/env/icon/lib64"]: +for path in ["{{ env.mount }}/env/icon/lib", "{{ env.mount }}/env/icon/lib64"]: for file in glob.glob(os.path.join(path, '*.la')): os.remove(file)