diff --git a/docker/build_and_push.sh b/docker/scripts/build_and_push.sh similarity index 100% rename from docker/build_and_push.sh rename to docker/scripts/build_and_push.sh diff --git a/docker/scripts/install_print_test.sh b/docker/scripts/install_print_test.sh new file mode 100755 index 0000000000..2b38a0ad69 --- /dev/null +++ b/docker/scripts/install_print_test.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Scripts to quickly obtain all relevant information out of a new nvidia pytorch container. + +# Container setup +apt update && DEBIAN_FRONTEND=noninteractive apt install -y build-essential openssh-client python3-dev git && apt clean && rm -rf /var/lib/apt/lists/* + +# Setup heat dependencies +git clone https://github.com/helmholtz-analytics/heat.git +cd heat +pip install --upgrade pip +pip install mpi4py --no-binary :all: +pip install .[netcdf,hdf5,dev] + +# Print environment +pip list | grep heat +pip list | grep torch +python --version +nvcc --version +mpirun --version + +# Run tests +HEAT_TEST_USE_DEVICE=gpu mpirun -n 1 pytest heat/ diff --git a/docker/scripts/test_nvidia_image_haicore_enroot.sh b/docker/scripts/test_nvidia_image_haicore_enroot.sh new file mode 100755 index 0000000000..2e9eef3cea --- /dev/null +++ b/docker/scripts/test_nvidia_image_haicore_enroot.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Clear environment, else mpi4py will fail to install. +ml purge + +SBATCH_PARAMS=( + --partition normal + --time 00:10:00 + --nodes 1 + --tasks-per-node 1 + --gres gpu:1 + --container-image ~/containers/nvidia+pytorch+23.05-py3.sqsh + --container-writable + --container-mounts /etc/slurm/task_prolog.hk:/etc/slurm/task_prolog.hk,/scratch:/scratch + --container-mount-home +) + +sbatch "${SBATCH_PARAMS[@]}" ./install_print_test.sh diff --git a/docker/test_nvidia_image_haicore_enroot.sh b/docker/test_nvidia_image_haicore_enroot.sh deleted file mode 100755 index d114a33341..0000000000 --- a/docker/test_nvidia_image_haicore_enroot.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=1 -#SBATCH --gres=gpu:4 -#SBATCH --partition=normal -#SBATCH --time=00:10:00 -#SBATCH --container-name np-23.05 -#SBATCH --container-mounts=/etc/slurm/task_prolog.hk:/etc/slurm/task_prolog.hk,/scratch:/scratch -#SBATCH --container-mount-home -#SBATCH --container-writable - -# Container setup -RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y build-essential openssh-client python3-dev git && apt clean && rm -rf /var/lib/apt/lists/* - -# Setup heat dependencies -git clone https://github.com/helmholtz-analytics/heat.git -cd heat -pip install --upgrade pip -pip install mpi4py --no-binary :all: -pip install .[netcdf,hdf5,dev] - -# Print environment -pip list | grep heat -pip list | grep torch -python --version -nvcc --version -mpirun --version - -# Run tests -HEAT_TEST_USE_DEVICE=gpu mpirun -n 1 pytest heat/