From 02c29940063e3519cdeb7d3a3fca73922d143bad Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 16 May 2024 19:38:40 +0000 Subject: [PATCH 01/28] mid-work --- Dockerfile.gpu | 21 ++++++++++++++------- scripts/build_proc.sh | 9 +++++++-- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 9066135..dedcd23 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.4.1-devel-ubuntu20.04 +FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 # For opencontainers label definitions, see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md @@ -61,19 +61,26 @@ SHELL ["/bin/bash", "-l", "-c"] USER ${CONDA_UID} WORKDIR /home/conda/ -RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BACK_PROJECTION_TAG}.tar.gz > ./back-projection.tar.gz && \ - mkdir -p ./back-projection && \ - tar -xvf ./back-projection.tar.gz -C ./back-projection/ --strip=1 && \ - rm ./back-projection.tar.gz && \ - rm -rf ./back-projection/fft +# RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BACK_PROJECTION_TAG}.tar.gz > ./back-projection.tar.gz && \ +# mkdir -p ./back-projection && \ +# tar -xvf ./back-projection.tar.gz -C ./back-projection/ --strip=1 && \ +# rm ./back-projection.tar.gz && \ +# rm -rf ./back-projection/fft + +RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git COPY --chown=${CONDA_UID}:${CONDA_GID} ./scripts/build_proc.sh ./back-projection RUN cd /home/conda/back-projection && \ chmod +x ./build_proc.sh && \ - ./build_proc.sh && \ find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \ cd /home/conda/ +# COPY --chown=${CONDA_UID}:${CONDA_GID} ./scripts/build_proc.sh ./back-projection +# RUN cd /home/conda/back-projection && \ +# chmod +x ./build_proc.sh && \ +# ./build_proc.sh && \ +# cd /home/conda/ + COPY --chown=${CONDA_UID}:${CONDA_GID} . /hyp3-back-projection/ RUN mamba env create -f /hyp3-back-projection/environment.yml && \ diff --git a/scripts/build_proc.sh b/scripts/build_proc.sh index 08e080e..f72f1dc 100644 --- a/scripts/build_proc.sh +++ b/scripts/build_proc.sh @@ -72,7 +72,6 @@ gfortran -o psinterp psinterp.f90 -fopenmp echo 'Built cosine_sim and psinterp in ps directory' cd .. -tar xf snaphu_v2_0b0_0_0.tar cd snaphu_v2.0b0.0.0/src make CFLAGS=-O3 -s @@ -83,6 +82,12 @@ cd .. echo 'built snaphu' +if [[ "$USEGPU" == "true" ]]; then + nvcc -o gpu_arch gpu_arch.cu + echo 'built gpu architecture probe' + ./gpu_arch | cat > GPU_ARCH; source ./GPU_ARCH; rm GPU_ARCH +fi + cd sentinel gcc -c filelen.c io.c sentinel_raw_process.c decode_line_memory.c -lm -fopenmp @@ -90,7 +95,7 @@ gcc -c filelen.c io.c sentinel_raw_process.c decode_line_memory.c -lm -fopenmp echo 'built raw_process components in sentinel' if [[ "$USEGPU" == "true" ]]; then - nvcc -gencode arch=compute_89,code=sm_89 -c azimuth_compress.cu -Wno-deprecated-gpu-targets + nvcc -gencode arch=compute_$GPU_ARCH,code=sm_$GPU_ARCH -c azimuth_compress.cu -Wno-deprecated-gpu-targets fi gfortran -c processsub.f90 backprojectgpusub.f90 bounds.f90 orbitrangetime.f90 latlon.f90 intp_orbit.f90 radar_to_xyz.f90 unitvec.f90 tcnbasis.f90 curvature.f90 cross.f90 orbithermite.f sentineltimingsub.f90 getburststatevectors.f90 -ffixed-line-length-none -fopenmp From 79990f75667e7040598f9227e8f90b2ebeaf3a9f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 16 May 2024 21:32:19 +0000 Subject: [PATCH 02/28] first working --- Dockerfile.gpu | 9 ++------- scripts/build_proc.sh | 6 ++++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/Dockerfile.gpu b/Dockerfile.gpu index dedcd23..78596a3 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -61,26 +61,21 @@ SHELL ["/bin/bash", "-l", "-c"] USER ${CONDA_UID} WORKDIR /home/conda/ +# TODO: switch to released version of plugin when remove_files branch is merged # RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BACK_PROJECTION_TAG}.tar.gz > ./back-projection.tar.gz && \ # mkdir -p ./back-projection && \ # tar -xvf ./back-projection.tar.gz -C ./back-projection/ --strip=1 && \ # rm ./back-projection.tar.gz && \ # rm -rf ./back-projection/fft - RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git COPY --chown=${CONDA_UID}:${CONDA_GID} ./scripts/build_proc.sh ./back-projection RUN cd /home/conda/back-projection && \ chmod +x ./build_proc.sh && \ find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \ + ./build_proc.sh && \ cd /home/conda/ -# COPY --chown=${CONDA_UID}:${CONDA_GID} ./scripts/build_proc.sh ./back-projection -# RUN cd /home/conda/back-projection && \ -# chmod +x ./build_proc.sh && \ -# ./build_proc.sh && \ -# cd /home/conda/ - COPY --chown=${CONDA_UID}:${CONDA_GID} . /hyp3-back-projection/ RUN mamba env create -f /hyp3-back-projection/environment.yml && \ diff --git a/scripts/build_proc.sh b/scripts/build_proc.sh index f72f1dc..09ea840 100644 --- a/scripts/build_proc.sh +++ b/scripts/build_proc.sh @@ -83,9 +83,11 @@ cd .. echo 'built snaphu' if [[ "$USEGPU" == "true" ]]; then + # TODO: this is not grabbing the correct value during docker build nvcc -o gpu_arch gpu_arch.cu echo 'built gpu architecture probe' ./gpu_arch | cat > GPU_ARCH; source ./GPU_ARCH; rm GPU_ARCH + echo $GPU_ARCH fi cd sentinel @@ -95,13 +97,13 @@ gcc -c filelen.c io.c sentinel_raw_process.c decode_line_memory.c -lm -fopenmp echo 'built raw_process components in sentinel' if [[ "$USEGPU" == "true" ]]; then - nvcc -gencode arch=compute_$GPU_ARCH,code=sm_$GPU_ARCH -c azimuth_compress.cu -Wno-deprecated-gpu-targets + nvcc -gencode arch=compute_89,code=sm_89 -c azimuth_compress.cu -Wno-deprecated-gpu-targets fi gfortran -c processsub.f90 backprojectgpusub.f90 bounds.f90 orbitrangetime.f90 latlon.f90 intp_orbit.f90 radar_to_xyz.f90 unitvec.f90 tcnbasis.f90 curvature.f90 cross.f90 orbithermite.f sentineltimingsub.f90 getburststatevectors.f90 -ffixed-line-length-none -fopenmp if [[ "$USEGPU" == "true" ]]; then - nvcc -o sentinel_raw_process sentinel_raw_process.o decode_line_memory.o processsub.o backprojectgpusub.o azimuth_compress.o bounds.o orbitrangetime.o latlon.o intp_orbit.o radar_to_xyz.o unitvec.o tcnbasis.o curvature.o cross.o orbithermite.o filelen.o io.o sentineltimingsub.o getburststatevectors.o $FFTW_LIB -lstdc++ -lgfortran -lgomp + nvcc -gencode arch=compute_89,code=sm_89 -o sentinel_raw_process sentinel_raw_process.o decode_line_memory.o processsub.o backprojectgpusub.o azimuth_compress.o bounds.o orbitrangetime.o latlon.o intp_orbit.o radar_to_xyz.o unitvec.o tcnbasis.o curvature.o cross.o orbithermite.o filelen.o io.o sentineltimingsub.o getburststatevectors.o $FFTW_LIB -lstdc++ -lgfortran -lgomp fi cd .. From 8a77eb538679a7be2218197e64ace595e8a5976d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 16 May 2024 21:37:22 +0000 Subject: [PATCH 03/28] improve print statement --- scripts/build_proc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_proc.sh b/scripts/build_proc.sh index 09ea840..b6e411f 100644 --- a/scripts/build_proc.sh +++ b/scripts/build_proc.sh @@ -87,7 +87,7 @@ if [[ "$USEGPU" == "true" ]]; then nvcc -o gpu_arch gpu_arch.cu echo 'built gpu architecture probe' ./gpu_arch | cat > GPU_ARCH; source ./GPU_ARCH; rm GPU_ARCH - echo $GPU_ARCH + echo GPU Architecture version is: $GPU_ARCH fi cd sentinel From e26d953163c17b3b24323f711f774ffe5d10b0c9 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 17 May 2024 07:20:44 -0500 Subject: [PATCH 04/28] make gpu capability version a docker arg --- Dockerfile.gpu | 11 +++++++---- scripts/build_proc.sh | 14 +++----------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 78596a3..1c4c38d 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -11,16 +11,19 @@ LABEL org.opencontainers.image.url="https://github.com/ASFHyP3/hyp3-back-project LABEL org.opencontainers.image.source="https://github.com/ASFHyP3/hyp3-back-projection" LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" +ARG BACK_PROJECTION_TAG=0.2.0 +ARG FFTW_TAG=3.3.9 +# FIXME: should be able to find this dynamically +ARG GPU_ARCH=89 ARG DEBIAN_FRONTEND=noninteractive ARG CONDA_UID=1000 ARG CONDA_GID=1000 -ARG BACK_PROJECTION_TAG=0.2.0 -ARG FFTW_TAG=3.3.9 ARG MINIFORGE_NAME=Miniforge3 ARG MINIFORGE_VERSION=24.3.0-0 -# USEGPU environment variable used by build_proc.sh -ENV USEGPU="true" +# GPU_ARCH and USEGPU environment variable used by build_proc.sh +ENV GPU_ARCH=${GPU_ARCH} +ENV USEGPU=true ENV CONDA_DIR=/opt/conda ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 ENV PATH=${CONDA_DIR}/bin:${PATH} diff --git a/scripts/build_proc.sh b/scripts/build_proc.sh index b6e411f..e69074b 100644 --- a/scripts/build_proc.sh +++ b/scripts/build_proc.sh @@ -4,7 +4,7 @@ MULTIARCH_DIR=/usr/lib/$(gcc -print-multiarch) FFTW_LIB=$MULTIARCH_DIR/libfftw3f.a echo 'using FFTW library:' $FFTW_LIB if [[ "$USEGPU" == "true" ]]; then - echo 'building with GPU support' + echo 'building with GPU support, capability version' $GPU_ARCH fi cd DEM @@ -82,14 +82,6 @@ cd .. echo 'built snaphu' -if [[ "$USEGPU" == "true" ]]; then - # TODO: this is not grabbing the correct value during docker build - nvcc -o gpu_arch gpu_arch.cu - echo 'built gpu architecture probe' - ./gpu_arch | cat > GPU_ARCH; source ./GPU_ARCH; rm GPU_ARCH - echo GPU Architecture version is: $GPU_ARCH -fi - cd sentinel gcc -c filelen.c io.c sentinel_raw_process.c decode_line_memory.c -lm -fopenmp @@ -97,13 +89,13 @@ gcc -c filelen.c io.c sentinel_raw_process.c decode_line_memory.c -lm -fopenmp echo 'built raw_process components in sentinel' if [[ "$USEGPU" == "true" ]]; then - nvcc -gencode arch=compute_89,code=sm_89 -c azimuth_compress.cu -Wno-deprecated-gpu-targets + nvcc -gencode arch=compute_$GPU_ARCH,code=sm_$GPU_ARCH -c azimuth_compress.cu -Wno-deprecated-gpu-targets fi gfortran -c processsub.f90 backprojectgpusub.f90 bounds.f90 orbitrangetime.f90 latlon.f90 intp_orbit.f90 radar_to_xyz.f90 unitvec.f90 tcnbasis.f90 curvature.f90 cross.f90 orbithermite.f sentineltimingsub.f90 getburststatevectors.f90 -ffixed-line-length-none -fopenmp if [[ "$USEGPU" == "true" ]]; then - nvcc -gencode arch=compute_89,code=sm_89 -o sentinel_raw_process sentinel_raw_process.o decode_line_memory.o processsub.o backprojectgpusub.o azimuth_compress.o bounds.o orbitrangetime.o latlon.o intp_orbit.o radar_to_xyz.o unitvec.o tcnbasis.o curvature.o cross.o orbithermite.o filelen.o io.o sentineltimingsub.o getburststatevectors.o $FFTW_LIB -lstdc++ -lgfortran -lgomp + nvcc -gencode arch=compute_$GPU_ARCH,code=sm_$GPU_ARCH -o sentinel_raw_process sentinel_raw_process.o decode_line_memory.o processsub.o backprojectgpusub.o azimuth_compress.o bounds.o orbitrangetime.o latlon.o intp_orbit.o radar_to_xyz.o unitvec.o tcnbasis.o curvature.o cross.o orbithermite.o filelen.o io.o sentineltimingsub.o getburststatevectors.o $FFTW_LIB -lstdc++ -lgfortran -lgomp fi cd .. From f2e7cc9d28ade8a77ae73f583801efedce8014bb Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 17 May 2024 07:37:03 -0500 Subject: [PATCH 05/28] small cleanup --- Dockerfile.gpu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 1c4c38d..f58819e 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -64,13 +64,13 @@ SHELL ["/bin/bash", "-l", "-c"] USER ${CONDA_UID} WORKDIR /home/conda/ -# TODO: switch to released version of plugin when remove_files branch is merged +# FIXME: switch to released version of plugin when remove_files branch is merged +RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git # RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BACK_PROJECTION_TAG}.tar.gz > ./back-projection.tar.gz && \ # mkdir -p ./back-projection && \ # tar -xvf ./back-projection.tar.gz -C ./back-projection/ --strip=1 && \ # rm ./back-projection.tar.gz && \ # rm -rf ./back-projection/fft -RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git COPY --chown=${CONDA_UID}:${CONDA_GID} ./scripts/build_proc.sh ./back-projection RUN cd /home/conda/back-projection && \ From c63ff9248427e36a62d2c4ff293b2ee4bf38ba41 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 17 May 2024 07:45:59 -0500 Subject: [PATCH 06/28] multistage attempt 1 --- Dockerfile.gpu | 138 ++++++++++++++++++++++++++----------------------- 1 file changed, 72 insertions(+), 66 deletions(-) diff --git a/Dockerfile.gpu b/Dockerfile.gpu index f58819e..60ab843 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -1,69 +1,17 @@ -FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 - -# For opencontainers label definitions, see: -# https://github.com/opencontainers/image-spec/blob/master/annotations.md -LABEL org.opencontainers.image.title="HyP3 back-projection" -LABEL org.opencontainers.image.description="HyP3 plugin for back-projection processing" -LABEL org.opencontainers.image.vendor="Alaska Satellite Facility" -LABEL org.opencontainers.image.authors="ASF Tools Team " -LABEL org.opencontainers.image.licenses="BSD-3-Clause" -LABEL org.opencontainers.image.url="https://github.com/ASFHyP3/hyp3-back-projection" -LABEL org.opencontainers.image.source="https://github.com/ASFHyP3/hyp3-back-projection" -LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" +FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 as builder ARG BACK_PROJECTION_TAG=0.2.0 ARG FFTW_TAG=3.3.9 # FIXME: should be able to find this dynamically ARG GPU_ARCH=89 -ARG DEBIAN_FRONTEND=noninteractive -ARG CONDA_UID=1000 -ARG CONDA_GID=1000 -ARG MINIFORGE_NAME=Miniforge3 -ARG MINIFORGE_VERSION=24.3.0-0 # GPU_ARCH and USEGPU environment variable used by build_proc.sh ENV GPU_ARCH=${GPU_ARCH} -ENV USEGPU=true -ENV CONDA_DIR=/opt/conda -ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 -ENV PATH=${CONDA_DIR}/bin:${PATH} -ENV PYTHONDONTWRITEBYTECODE=true -ENV PROC_HOME=/home/conda/back-projection -ENV MYHOME=/home/conda - -# Conda setup -RUN apt-get update > /dev/null && \ - apt-get install --no-install-recommends --yes \ - wget bzip2 ca-certificates \ - git \ - tini \ - > /dev/null && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* && \ - wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ - /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ - rm /tmp/miniforge.sh && \ - conda clean --tarballs --index-cache --packages --yes && \ - find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ - find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ - conda clean --force-pkgs-dirs --all --yes && \ - echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ - echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc +ENV USEGPU=true RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl build-essential gfortran libfftw3-dev && \ apt-get clean && rm -rf /var/lib/apt/lists/* -RUN groupadd -g "${CONDA_GID}" --system conda && \ - useradd -l -u "${CONDA_UID}" -g "${CONDA_GID}" --system -d /home/conda -m -s /bin/bash conda && \ - chown -R conda:conda /opt && \ - echo ". /opt/conda/etc/profile.d/conda.sh" >> /home/conda/.profile && \ - echo "conda activate base" >> /home/conda/.profile - -SHELL ["/bin/bash", "-l", "-c"] - -USER ${CONDA_UID} -WORKDIR /home/conda/ - # FIXME: switch to released version of plugin when remove_files branch is merged RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git # RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BACK_PROJECTION_TAG}.tar.gz > ./back-projection.tar.gz && \ @@ -72,20 +20,78 @@ RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git # rm ./back-projection.tar.gz && \ # rm -rf ./back-projection/fft -COPY --chown=${CONDA_UID}:${CONDA_GID} ./scripts/build_proc.sh ./back-projection -RUN cd /home/conda/back-projection && \ +COPY . /hyp3-back-projection/ +COPY ./scripts/build_proc.sh ./back-projection +RUN cd /back-projection && \ chmod +x ./build_proc.sh && \ find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \ ./build_proc.sh && \ cd /home/conda/ -COPY --chown=${CONDA_UID}:${CONDA_GID} . /hyp3-back-projection/ - -RUN mamba env create -f /hyp3-back-projection/environment.yml && \ - conda clean -afy && \ - conda activate hyp3-back-projection && \ - sed -i 's/conda activate base/conda activate hyp3-back-projection/g' /home/conda/.profile && \ - python -m pip install --no-cache-dir /hyp3-back-projection - -ENTRYPOINT ["/hyp3-back-projection/src/hyp3_back_projection/etc/entrypoint.sh"] -CMD ["-h"] +# # For opencontainers label definitions, see: +# # https://github.com/opencontainers/image-spec/blob/master/annotations.md +# LABEL org.opencontainers.image.title="HyP3 back-projection" +# LABEL org.opencontainers.image.description="HyP3 plugin for back-projection processing" +# LABEL org.opencontainers.image.vendor="Alaska Satellite Facility" +# LABEL org.opencontainers.image.authors="ASF Tools Team " +# LABEL org.opencontainers.image.licenses="BSD-3-Clause" +# LABEL org.opencontainers.image.url="https://github.com/ASFHyP3/hyp3-back-projection" +# LABEL org.opencontainers.image.source="https://github.com/ASFHyP3/hyp3-back-projection" +# LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" +# +# ARG DEBIAN_FRONTEND=noninteractive +# ARG CONDA_UID=1000 +# ARG CONDA_GID=1000 +# ARG MINIFORGE_NAME=Miniforge3 +# ARG MINIFORGE_VERSION=24.3.0-0 +# +# ENV CONDA_DIR=/opt/conda +# ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 +# ENV PATH=${CONDA_DIR}/bin:${PATH} +# ENV PYTHONDONTWRITEBYTECODE=true +# ENV PROC_HOME=/home/conda/back-projection +# ENV MYHOME=/home/conda +# +# # Conda setup +# RUN apt-get update > /dev/null && \ +# apt-get install --no-install-recommends --yes \ +# wget bzip2 ca-certificates \ +# git \ +# tini \ +# > /dev/null && \ +# apt-get clean && \ +# rm -rf /var/lib/apt/lists/* && \ +# wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ +# /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ +# rm /tmp/miniforge.sh && \ +# conda clean --tarballs --index-cache --packages --yes && \ +# find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ +# find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ +# conda clean --force-pkgs-dirs --all --yes && \ +# echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ +# echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc +# +# RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl && \ +# apt-get clean && rm -rf /var/lib/apt/lists/* +# +# RUN groupadd -g "${CONDA_GID}" --system conda && \ +# useradd -l -u "${CONDA_UID}" -g "${CONDA_GID}" --system -d /home/conda -m -s /bin/bash conda && \ +# chown -R conda:conda /opt && \ +# echo ". /opt/conda/etc/profile.d/conda.sh" >> /home/conda/.profile && \ +# echo "conda activate base" >> /home/conda/.profile +# +# SHELL ["/bin/bash", "-l", "-c"] +# +# USER ${CONDA_UID} +# WORKDIR /home/conda/ +# +# COPY --chown=${CONDA_UID}:${CONDA_GID} . /hyp3-back-projection/ +# +# RUN mamba env create -f /hyp3-back-projection/environment.yml && \ +# conda clean -afy && \ +# conda activate hyp3-back-projection && \ +# sed -i 's/conda activate base/conda activate hyp3-back-projection/g' /home/conda/.profile && \ +# python -m pip install --no-cache-dir /hyp3-back-projection +# +# ENTRYPOINT ["/hyp3-back-projection/src/hyp3_back_projection/etc/entrypoint.sh"] +# CMD ["-h"] From 5e1d37d542cbd25413d4635d16df7e4661e4609a Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 17 May 2024 13:35:08 +0000 Subject: [PATCH 07/28] working multistage --- Dockerfile.gpu | 148 +++++++++++++++++++++++++------------------------ 1 file changed, 76 insertions(+), 72 deletions(-) diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 60ab843..c5a66d3 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -1,15 +1,16 @@ FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 as builder -ARG BACK_PROJECTION_TAG=0.2.0 -ARG FFTW_TAG=3.3.9 # FIXME: should be able to find this dynamically ARG GPU_ARCH=89 +ARG BACK_PROJECTION_TAG=0.2.0 +ARG FFTW_TAG=3.3.9 # GPU_ARCH and USEGPU environment variable used by build_proc.sh ENV GPU_ARCH=${GPU_ARCH} ENV USEGPU=true -RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl build-essential gfortran libfftw3-dev && \ +# FIXME: can remove git after switch back to released version of back-projection +RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl git build-essential gfortran libfftw3-dev && \ apt-get clean && rm -rf /var/lib/apt/lists/* # FIXME: switch to released version of plugin when remove_files branch is merged @@ -26,72 +27,75 @@ RUN cd /back-projection && \ chmod +x ./build_proc.sh && \ find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \ ./build_proc.sh && \ - cd /home/conda/ - -# # For opencontainers label definitions, see: -# # https://github.com/opencontainers/image-spec/blob/master/annotations.md -# LABEL org.opencontainers.image.title="HyP3 back-projection" -# LABEL org.opencontainers.image.description="HyP3 plugin for back-projection processing" -# LABEL org.opencontainers.image.vendor="Alaska Satellite Facility" -# LABEL org.opencontainers.image.authors="ASF Tools Team " -# LABEL org.opencontainers.image.licenses="BSD-3-Clause" -# LABEL org.opencontainers.image.url="https://github.com/ASFHyP3/hyp3-back-projection" -# LABEL org.opencontainers.image.source="https://github.com/ASFHyP3/hyp3-back-projection" -# LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" -# -# ARG DEBIAN_FRONTEND=noninteractive -# ARG CONDA_UID=1000 -# ARG CONDA_GID=1000 -# ARG MINIFORGE_NAME=Miniforge3 -# ARG MINIFORGE_VERSION=24.3.0-0 -# -# ENV CONDA_DIR=/opt/conda -# ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 -# ENV PATH=${CONDA_DIR}/bin:${PATH} -# ENV PYTHONDONTWRITEBYTECODE=true -# ENV PROC_HOME=/home/conda/back-projection -# ENV MYHOME=/home/conda -# -# # Conda setup -# RUN apt-get update > /dev/null && \ -# apt-get install --no-install-recommends --yes \ -# wget bzip2 ca-certificates \ -# git \ -# tini \ -# > /dev/null && \ -# apt-get clean && \ -# rm -rf /var/lib/apt/lists/* && \ -# wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ -# /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ -# rm /tmp/miniforge.sh && \ -# conda clean --tarballs --index-cache --packages --yes && \ -# find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ -# find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ -# conda clean --force-pkgs-dirs --all --yes && \ -# echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ -# echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc -# -# RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl && \ -# apt-get clean && rm -rf /var/lib/apt/lists/* -# -# RUN groupadd -g "${CONDA_GID}" --system conda && \ -# useradd -l -u "${CONDA_UID}" -g "${CONDA_GID}" --system -d /home/conda -m -s /bin/bash conda && \ -# chown -R conda:conda /opt && \ -# echo ". /opt/conda/etc/profile.d/conda.sh" >> /home/conda/.profile && \ -# echo "conda activate base" >> /home/conda/.profile -# -# SHELL ["/bin/bash", "-l", "-c"] -# -# USER ${CONDA_UID} -# WORKDIR /home/conda/ -# -# COPY --chown=${CONDA_UID}:${CONDA_GID} . /hyp3-back-projection/ -# -# RUN mamba env create -f /hyp3-back-projection/environment.yml && \ -# conda clean -afy && \ -# conda activate hyp3-back-projection && \ -# sed -i 's/conda activate base/conda activate hyp3-back-projection/g' /home/conda/.profile && \ -# python -m pip install --no-cache-dir /hyp3-back-projection -# -# ENTRYPOINT ["/hyp3-back-projection/src/hyp3_back_projection/etc/entrypoint.sh"] -# CMD ["-h"] + cd / + +FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 as runner + +# For opencontainers label definitions, see: +# https://github.com/opencontainers/image-spec/blob/master/annotations.md +LABEL org.opencontainers.image.title="HyP3 back-projection" +LABEL org.opencontainers.image.description="HyP3 plugin for back-projection processing" +LABEL org.opencontainers.image.vendor="Alaska Satellite Facility" +LABEL org.opencontainers.image.authors="ASF Tools Team " +LABEL org.opencontainers.image.licenses="BSD-3-Clause" +LABEL org.opencontainers.image.url="https://github.com/ASFHyP3/hyp3-back-projection" +LABEL org.opencontainers.image.source="https://github.com/ASFHyP3/hyp3-back-projection" +LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" + +ARG DEBIAN_FRONTEND=noninteractive +ARG CONDA_UID=1000 +ARG CONDA_GID=1000 +ARG MINIFORGE_NAME=Miniforge3 +ARG MINIFORGE_VERSION=24.3.0-0 + +ENV CONDA_DIR=/opt/conda +ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 +ENV PATH=${CONDA_DIR}/bin:${PATH} +ENV PYTHONDONTWRITEBYTECODE=true +ENV PROC_HOME=/back-projection +ENV MYHOME=/home/conda + +# Conda setup +RUN apt-get update > /dev/null && \ + apt-get install --no-install-recommends --yes \ + wget bzip2 ca-certificates \ + git \ + tini \ + > /dev/null && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ + /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ + rm /tmp/miniforge.sh && \ + conda clean --tarballs --index-cache --packages --yes && \ + find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ + find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ + conda clean --force-pkgs-dirs --all --yes && \ + echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ + echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc + +RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl gfortran && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN groupadd -g "${CONDA_GID}" --system conda && \ + useradd -l -u "${CONDA_UID}" -g "${CONDA_GID}" --system -d /home/conda -m -s /bin/bash conda && \ + chown -R conda:conda /opt && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> /home/conda/.profile && \ + echo "conda activate base" >> /home/conda/.profile + +SHELL ["/bin/bash", "-l", "-c"] + +USER ${CONDA_UID} +WORKDIR /home/conda/ + +COPY --chown=${CONDA_UID}:${CONDA_GID} --from=builder /back-projection /back-projection +COPY --chown=${CONDA_UID}:${CONDA_GID} --from=builder /hyp3-back-projection /hyp3-back-projection + +RUN mamba env create -f /hyp3-back-projection/environment.yml && \ + conda clean -afy && \ + conda activate hyp3-back-projection && \ + sed -i 's/conda activate base/conda activate hyp3-back-projection/g' /home/conda/.profile && \ + python -m pip install --no-cache-dir /hyp3-back-projection + +ENTRYPOINT ["/hyp3-back-projection/src/hyp3_back_projection/etc/entrypoint.sh"] +CMD ["-h"] From 20989525f861470fcc07894fe691746a76e0883a Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 17 May 2024 10:22:26 -0500 Subject: [PATCH 08/28] update readme --- README.md | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 933c3f2..f5c55b4 100644 --- a/README.md +++ b/README.md @@ -59,28 +59,11 @@ can be found [here](https://docs.nvidia.com/datacenter/cloud-native/container-to ### EC2 Setup When running on an EC2 instance, the following setup is recommended: -1. Create a [P3-family EC2 instance](https://aws.amazon.com/ec2/instance-types/p3/) with the [Amazon Linux 2 AMI with NVIDIA TESLA GPU Driver](https://aws.amazon.com/marketplace/pp/prodview-64e4rx3h733ru?sr=0-4&ref_=beagle&applicationId=AWSMPContessa) -2. Install Docker and the nvidia-container-toolkit on the EC2 instance: +1. Create a [G6-family EC2 instance](https://aws.amazon.com/ec2/instance-types/g6/) with the [Amazon Linux 2 Deep Learning AMI with NVIDIA Drivers](https://aws.amazon.com/machine-learning/amis/features/) +2. Build the GPU docker container with the correct compute capability version. To determine this value, run `nvidia-smi` on the instance to obtain GPU type, then cross-reference this information with NVIDIA's [GPU type compute capability list](https://developer.nvidia.com/cuda-gpus). For a g6.xlarge instance, this would be: ```bash -sudo yum-config-manager --disable amzn2-graphics -curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo -sudo yum install docker -y -sudo yum install nvidia-container-toolkit -y -sudo yum-config-manager --enable amzn2-graphics -``` -3. Optionally, set up Docker to not require `sudo` and to start when the EC2 instance starts -```bash -sudo systemctl start docker && \ -sudo usermod -a -G docker ec2-user && \ -sudo systemctl enable docker -``` -4. Exit the EC2 instance and re-enter -5. To test the GPU setup, run the base NVIDIA container: -```bash -docker run -it --gpus all nvidia/cuda:12.4.1-devel-ubuntu20.04 nvidia-smi -``` -6. Build the actual container and run it: -```bash -docker build -t back-projection:gpu -f Dockerfile.gpu . -docker run --gpus=all --rm -it back-projection:gpu ++process back_projection --help +docker --build-arg="GPU_ARCH=89" -t back-projection:gpu-89 -f Dockerfile.gpu . ``` +Note: this only needs to be done once per instance type, since the compute capability version will always be the same for a given instance type. + +The default value for this argument is `89` - the correct value for g6.xlarge instances. From 522e68e6b47fe601fe7fa202629ee824bb05c94c Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 17 May 2024 10:38:31 -0500 Subject: [PATCH 09/28] add ubuntu setup --- scripts/ubuntu_setup.sh | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100755 scripts/ubuntu_setup.sh diff --git a/scripts/ubuntu_setup.sh b/scripts/ubuntu_setup.sh new file mode 100755 index 0000000..6482b53 --- /dev/null +++ b/scripts/ubuntu_setup.sh @@ -0,0 +1,23 @@ +# GPU setup for the ubuntu 22.04 AMI + +# NVIDIA source setup +curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && \ +curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ +sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ +sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list && \ +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ +sudo dpkg -i cuda-keyring_1.1-1_all.deb && \ +rm cuda-keyring_1.1-1_all.deb + +# Docker source setup +sudo apt install -y ca-certificates curl gnupg lsb-release && \ +sudo mkdir -p /etc/apt/keyrings && \ +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \ +echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + +# Installs +sudo apt-get update && \ +sudo apt-get install -y nvidia-headless-535-server nvidia-utils-535-server docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin && \ +sudo usermod -aG docker $USER + +# RESTART YOUR INSTANCE!!! From a8494e54889d996c2a694a01a3bada2af284f8b2 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Fri, 17 May 2024 21:12:29 +0000 Subject: [PATCH 10/28] switch gpu build to use RHEL --- Dockerfile.gpu | 38 ++++++++++++-------------------------- scripts/build_proc.sh | 4 ++-- 2 files changed, 14 insertions(+), 28 deletions(-) mode change 100644 => 100755 scripts/build_proc.sh diff --git a/Dockerfile.gpu b/Dockerfile.gpu index c5a66d3..6d8c7ae 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -1,35 +1,27 @@ -FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 as builder +FROM nvidia/cuda:12.4.1-devel-ubi9 as builder # FIXME: should be able to find this dynamically ARG GPU_ARCH=89 -ARG BACK_PROJECTION_TAG=0.2.0 -ARG FFTW_TAG=3.3.9 +ARG FFTW_LIB=/usr/lib64/libfftw3f.so # GPU_ARCH and USEGPU environment variable used by build_proc.sh ENV GPU_ARCH=${GPU_ARCH} ENV USEGPU=true -# FIXME: can remove git after switch back to released version of back-projection -RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl git build-essential gfortran libfftw3-dev && \ - apt-get clean && rm -rf /var/lib/apt/lists/* - -# FIXME: switch to released version of plugin when remove_files branch is merged +RUN dnf -y update && dnf install --allowerasing -y unzip vim curl git make automake gcc gcc-c++ gfortran fftw-devel && \ + dnf clean all && rm -rf /var/cache/dnf/* + +# FIXME: switch main branch when remove_files branch is merged RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git -# RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BACK_PROJECTION_TAG}.tar.gz > ./back-projection.tar.gz && \ -# mkdir -p ./back-projection && \ -# tar -xvf ./back-projection.tar.gz -C ./back-projection/ --strip=1 && \ -# rm ./back-projection.tar.gz && \ -# rm -rf ./back-projection/fft COPY . /hyp3-back-projection/ COPY ./scripts/build_proc.sh ./back-projection RUN cd /back-projection && \ - chmod +x ./build_proc.sh && \ find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \ ./build_proc.sh && \ cd / -FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 as runner +FROM nvidia/cuda:12.4.1-runtime-ubi9 as runner # For opencontainers label definitions, see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md @@ -56,26 +48,19 @@ ENV PROC_HOME=/back-projection ENV MYHOME=/home/conda # Conda setup -RUN apt-get update > /dev/null && \ - apt-get install --no-install-recommends --yes \ - wget bzip2 ca-certificates \ - git \ - tini \ - > /dev/null && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* && \ +RUN dnf install --setopt=install_weak_deps=False --nodocs -y wget bzip2 ca-certificates git > /dev/null && \ wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ rm /tmp/miniforge.sh && \ conda clean --tarballs --index-cache --packages --yes && \ find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ - conda clean --force-pkgs-dirs --all --yes && \ + conda clean --force-pkgs-dirs --all --yes && \ echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc -RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl gfortran && \ - apt-get clean && rm -rf /var/lib/apt/lists/* +RUN dnf -y update && dnf install --allowerasing -y unzip vim curl gfortran fftw-devel && \ + dnf clean all && rm -rf /var/cache/dnf/* RUN groupadd -g "${CONDA_GID}" --system conda && \ useradd -l -u "${CONDA_UID}" -g "${CONDA_GID}" --system -d /home/conda -m -s /bin/bash conda && \ @@ -88,6 +73,7 @@ SHELL ["/bin/bash", "-l", "-c"] USER ${CONDA_UID} WORKDIR /home/conda/ +# UIDs above 999 cause a warning in rockylinux systems, but are OK in this case. COPY --chown=${CONDA_UID}:${CONDA_GID} --from=builder /back-projection /back-projection COPY --chown=${CONDA_UID}:${CONDA_GID} --from=builder /hyp3-back-projection /hyp3-back-projection diff --git a/scripts/build_proc.sh b/scripts/build_proc.sh old mode 100644 new mode 100755 index e69074b..5ac96d6 --- a/scripts/build_proc.sh +++ b/scripts/build_proc.sh @@ -1,7 +1,7 @@ #!/bin/bash -MULTIARCH_DIR=/usr/lib/$(gcc -print-multiarch) -FFTW_LIB=$MULTIARCH_DIR/libfftw3f.a +# MULTIARCH_DIR=/usr/lib/$(gcc -print-multiarch) +# FFTW_LIB=$MULTIARCH_DIR/libfftw3f.a echo 'using FFTW library:' $FFTW_LIB if [[ "$USEGPU" == "true" ]]; then echo 'building with GPU support, capability version' $GPU_ARCH From 290b04298e7d01791d7cbc3603c580100082bdba Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 17 May 2024 16:24:32 -0500 Subject: [PATCH 11/28] seperate RHEL and ubuntu dockerfiles --- Dockerfile.gpu | 38 +++++++++++++------- Dockerfile.rhel.gpu | 87 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 12 deletions(-) create mode 100644 Dockerfile.rhel.gpu diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 6d8c7ae..9f3c116 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -1,27 +1,35 @@ -FROM nvidia/cuda:12.4.1-devel-ubi9 as builder +FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 as builder # FIXME: should be able to find this dynamically ARG GPU_ARCH=89 -ARG FFTW_LIB=/usr/lib64/libfftw3f.so +ARG BACK_PROJECTION_TAG=0.2.0 # GPU_ARCH and USEGPU environment variable used by build_proc.sh +ENV FFTW_LIB=/usr/lib/x86_64-linux-gnu/libfftw3f.a ENV GPU_ARCH=${GPU_ARCH} ENV USEGPU=true -RUN dnf -y update && dnf install --allowerasing -y unzip vim curl git make automake gcc gcc-c++ gfortran fftw-devel && \ - dnf clean all && rm -rf /var/cache/dnf/* - -# FIXME: switch main branch when remove_files branch is merged +# FIXME: can remove git after switch back to released version of back-projection +RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl git build-essential gfortran libfftw3-dev && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# FIXME: switch to released version of plugin when remove_files branch is merged RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git +# RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BACK_PROJECTION_TAG}.tar.gz > ./back-projection.tar.gz && \ +# mkdir -p ./back-projection && \ +# tar -xvf ./back-projection.tar.gz -C ./back-projection/ --strip=1 && \ +# rm ./back-projection.tar.gz && \ +# rm -rf ./back-projection/fft COPY . /hyp3-back-projection/ COPY ./scripts/build_proc.sh ./back-projection RUN cd /back-projection && \ + chmod +x ./build_proc.sh && \ find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \ ./build_proc.sh && \ cd / -FROM nvidia/cuda:12.4.1-runtime-ubi9 as runner +FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 as runner # For opencontainers label definitions, see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md @@ -48,19 +56,26 @@ ENV PROC_HOME=/back-projection ENV MYHOME=/home/conda # Conda setup -RUN dnf install --setopt=install_weak_deps=False --nodocs -y wget bzip2 ca-certificates git > /dev/null && \ +RUN apt-get update > /dev/null && \ + apt-get install --no-install-recommends --yes \ + wget bzip2 ca-certificates \ + git \ + tini \ + > /dev/null && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ rm /tmp/miniforge.sh && \ conda clean --tarballs --index-cache --packages --yes && \ find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ - conda clean --force-pkgs-dirs --all --yes && \ + conda clean --force-pkgs-dirs --all --yes && \ echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc -RUN dnf -y update && dnf install --allowerasing -y unzip vim curl gfortran fftw-devel && \ - dnf clean all && rm -rf /var/cache/dnf/* +RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl gfortran && \ + apt-get clean && rm -rf /var/lib/apt/lists/* RUN groupadd -g "${CONDA_GID}" --system conda && \ useradd -l -u "${CONDA_UID}" -g "${CONDA_GID}" --system -d /home/conda -m -s /bin/bash conda && \ @@ -73,7 +88,6 @@ SHELL ["/bin/bash", "-l", "-c"] USER ${CONDA_UID} WORKDIR /home/conda/ -# UIDs above 999 cause a warning in rockylinux systems, but are OK in this case. COPY --chown=${CONDA_UID}:${CONDA_GID} --from=builder /back-projection /back-projection COPY --chown=${CONDA_UID}:${CONDA_GID} --from=builder /hyp3-back-projection /hyp3-back-projection diff --git a/Dockerfile.rhel.gpu b/Dockerfile.rhel.gpu new file mode 100644 index 0000000..6d8c7ae --- /dev/null +++ b/Dockerfile.rhel.gpu @@ -0,0 +1,87 @@ +FROM nvidia/cuda:12.4.1-devel-ubi9 as builder + +# FIXME: should be able to find this dynamically +ARG GPU_ARCH=89 +ARG FFTW_LIB=/usr/lib64/libfftw3f.so + +# GPU_ARCH and USEGPU environment variable used by build_proc.sh +ENV GPU_ARCH=${GPU_ARCH} +ENV USEGPU=true + +RUN dnf -y update && dnf install --allowerasing -y unzip vim curl git make automake gcc gcc-c++ gfortran fftw-devel && \ + dnf clean all && rm -rf /var/cache/dnf/* + +# FIXME: switch main branch when remove_files branch is merged +RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git + +COPY . /hyp3-back-projection/ +COPY ./scripts/build_proc.sh ./back-projection +RUN cd /back-projection && \ + find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \ + ./build_proc.sh && \ + cd / + +FROM nvidia/cuda:12.4.1-runtime-ubi9 as runner + +# For opencontainers label definitions, see: +# https://github.com/opencontainers/image-spec/blob/master/annotations.md +LABEL org.opencontainers.image.title="HyP3 back-projection" +LABEL org.opencontainers.image.description="HyP3 plugin for back-projection processing" +LABEL org.opencontainers.image.vendor="Alaska Satellite Facility" +LABEL org.opencontainers.image.authors="ASF Tools Team " +LABEL org.opencontainers.image.licenses="BSD-3-Clause" +LABEL org.opencontainers.image.url="https://github.com/ASFHyP3/hyp3-back-projection" +LABEL org.opencontainers.image.source="https://github.com/ASFHyP3/hyp3-back-projection" +LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" + +ARG DEBIAN_FRONTEND=noninteractive +ARG CONDA_UID=1000 +ARG CONDA_GID=1000 +ARG MINIFORGE_NAME=Miniforge3 +ARG MINIFORGE_VERSION=24.3.0-0 + +ENV CONDA_DIR=/opt/conda +ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 +ENV PATH=${CONDA_DIR}/bin:${PATH} +ENV PYTHONDONTWRITEBYTECODE=true +ENV PROC_HOME=/back-projection +ENV MYHOME=/home/conda + +# Conda setup +RUN dnf install --setopt=install_weak_deps=False --nodocs -y wget bzip2 ca-certificates git > /dev/null && \ + wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ + /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ + rm /tmp/miniforge.sh && \ + conda clean --tarballs --index-cache --packages --yes && \ + find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ + find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ + conda clean --force-pkgs-dirs --all --yes && \ + echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ + echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc + +RUN dnf -y update && dnf install --allowerasing -y unzip vim curl gfortran fftw-devel && \ + dnf clean all && rm -rf /var/cache/dnf/* + +RUN groupadd -g "${CONDA_GID}" --system conda && \ + useradd -l -u "${CONDA_UID}" -g "${CONDA_GID}" --system -d /home/conda -m -s /bin/bash conda && \ + chown -R conda:conda /opt && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> /home/conda/.profile && \ + echo "conda activate base" >> /home/conda/.profile + +SHELL ["/bin/bash", "-l", "-c"] + +USER ${CONDA_UID} +WORKDIR /home/conda/ + +# UIDs above 999 cause a warning in rockylinux systems, but are OK in this case. +COPY --chown=${CONDA_UID}:${CONDA_GID} --from=builder /back-projection /back-projection +COPY --chown=${CONDA_UID}:${CONDA_GID} --from=builder /hyp3-back-projection /hyp3-back-projection + +RUN mamba env create -f /hyp3-back-projection/environment.yml && \ + conda clean -afy && \ + conda activate hyp3-back-projection && \ + sed -i 's/conda activate base/conda activate hyp3-back-projection/g' /home/conda/.profile && \ + python -m pip install --no-cache-dir /hyp3-back-projection + +ENTRYPOINT ["/hyp3-back-projection/src/hyp3_back_projection/etc/entrypoint.sh"] +CMD ["-h"] From 735ed82d5711ddac314e0e73a2759a4bb89bcf92 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 17 May 2024 16:37:59 -0500 Subject: [PATCH 12/28] remove unneeded code --- Dockerfile.gpu | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 9f3c116..d1e2244 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -15,16 +15,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl # FIXME: switch to released version of plugin when remove_files branch is merged RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git -# RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BACK_PROJECTION_TAG}.tar.gz > ./back-projection.tar.gz && \ -# mkdir -p ./back-projection && \ -# tar -xvf ./back-projection.tar.gz -C ./back-projection/ --strip=1 && \ -# rm ./back-projection.tar.gz && \ -# rm -rf ./back-projection/fft COPY . /hyp3-back-projection/ COPY ./scripts/build_proc.sh ./back-projection RUN cd /back-projection && \ - chmod +x ./build_proc.sh && \ find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \ ./build_proc.sh && \ cd / @@ -56,13 +50,11 @@ ENV PROC_HOME=/back-projection ENV MYHOME=/home/conda # Conda setup -RUN apt-get update > /dev/null && \ - apt-get install --no-install-recommends --yes \ +RUN apt-get install --no-install-recommends --yes \ wget bzip2 ca-certificates \ git \ tini \ > /dev/null && \ - apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ From 167a0aa6be5fd6ebe873476ccba6197bcd20e801 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Mon, 20 May 2024 13:52:15 +0000 Subject: [PATCH 13/28] improve readability --- Dockerfile.gpu | 21 ++++++--------------- scripts/build_proc.sh | 9 ++++----- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/Dockerfile.gpu b/Dockerfile.gpu index d1e2244..2ee7d99 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -2,26 +2,22 @@ FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 as builder # FIXME: should be able to find this dynamically ARG GPU_ARCH=89 -ARG BACK_PROJECTION_TAG=0.2.0 # GPU_ARCH and USEGPU environment variable used by build_proc.sh ENV FFTW_LIB=/usr/lib/x86_64-linux-gnu/libfftw3f.a ENV GPU_ARCH=${GPU_ARCH} ENV USEGPU=true +ENV DEBIAN_FRONTEND=noninteractive # FIXME: can remove git after switch back to released version of back-projection RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl git build-essential gfortran libfftw3-dev && \ apt-get clean && rm -rf /var/lib/apt/lists/* -# FIXME: switch to released version of plugin when remove_files branch is merged +# FIXME: switch to main branch of plugin when remove_files branch is merged RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git - COPY . /hyp3-back-projection/ COPY ./scripts/build_proc.sh ./back-projection -RUN cd /back-projection && \ - find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \ - ./build_proc.sh && \ - cd / +RUN cd /back-projection && ./build_proc.sh && cd / FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 as runner @@ -36,7 +32,6 @@ LABEL org.opencontainers.image.url="https://github.com/ASFHyP3/hyp3-back-project LABEL org.opencontainers.image.source="https://github.com/ASFHyP3/hyp3-back-projection" LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" -ARG DEBIAN_FRONTEND=noninteractive ARG CONDA_UID=1000 ARG CONDA_GID=1000 ARG MINIFORGE_NAME=Miniforge3 @@ -48,14 +43,10 @@ ENV PATH=${CONDA_DIR}/bin:${PATH} ENV PYTHONDONTWRITEBYTECODE=true ENV PROC_HOME=/back-projection ENV MYHOME=/home/conda +ENV DEBIAN_FRONTEND=noninteractive # Conda setup -RUN apt-get install --no-install-recommends --yes \ - wget bzip2 ca-certificates \ - git \ - tini \ - > /dev/null && \ - rm -rf /var/lib/apt/lists/* && \ +RUN apt-get update && apt-get install --no-install-recommends --yes wget bzip2 ca-certificates git > /dev/null && \ wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ rm /tmp/miniforge.sh && \ @@ -66,7 +57,7 @@ RUN apt-get install --no-install-recommends --yes \ echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc -RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl gfortran && \ +RUN apt-get install -y --no-install-recommends unzip vim curl gfortran && \ apt-get clean && rm -rf /var/lib/apt/lists/* RUN groupadd -g "${CONDA_GID}" --system conda && \ diff --git a/scripts/build_proc.sh b/scripts/build_proc.sh index 5ac96d6..bd3453e 100755 --- a/scripts/build_proc.sh +++ b/scripts/build_proc.sh @@ -4,6 +4,7 @@ # FFTW_LIB=$MULTIARCH_DIR/libfftw3f.a echo 'using FFTW library:' $FFTW_LIB if [[ "$USEGPU" == "true" ]]; then + nvcc -o gpu_arch gpu_arch.cu echo 'building with GPU support, capability version' $GPU_ARCH fi @@ -22,7 +23,6 @@ gcc -o sentinel_raw_process_cpu sentinel_raw_process_cpu.o decode_line_memory.o echo 'built sentinel_raw_process_cpu' if [[ "$USEGPU" == "true" ]]; then - nvcc -o howmanygpus howmanygpus.cu echo 'built howmanygpus' fi @@ -88,14 +88,13 @@ gcc -c filelen.c io.c sentinel_raw_process.c decode_line_memory.c -lm -fopenmp echo 'built raw_process components in sentinel' -if [[ "$USEGPU" == "true" ]]; then - nvcc -gencode arch=compute_$GPU_ARCH,code=sm_$GPU_ARCH -c azimuth_compress.cu -Wno-deprecated-gpu-targets -fi - gfortran -c processsub.f90 backprojectgpusub.f90 bounds.f90 orbitrangetime.f90 latlon.f90 intp_orbit.f90 radar_to_xyz.f90 unitvec.f90 tcnbasis.f90 curvature.f90 cross.f90 orbithermite.f sentineltimingsub.f90 getburststatevectors.f90 -ffixed-line-length-none -fopenmp if [[ "$USEGPU" == "true" ]]; then + nvcc -o howmanygpus howmanygpus.cu + nvcc -gencode arch=compute_$GPU_ARCH,code=sm_$GPU_ARCH -c azimuth_compress.cu -Wno-deprecated-gpu-targets nvcc -gencode arch=compute_$GPU_ARCH,code=sm_$GPU_ARCH -o sentinel_raw_process sentinel_raw_process.o decode_line_memory.o processsub.o backprojectgpusub.o azimuth_compress.o bounds.o orbitrangetime.o latlon.o intp_orbit.o radar_to_xyz.o unitvec.o tcnbasis.o curvature.o cross.o orbithermite.o filelen.o io.o sentineltimingsub.o getburststatevectors.o $FFTW_LIB -lstdc++ -lgfortran -lgomp + echo 'built gpu components components in sentinel' fi cd .. From c68b4d88e01250d4326be154e3024a91affb0179 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 20 May 2024 10:18:56 -0500 Subject: [PATCH 14/28] update ubuntu script for AMI creation --- scripts/ubuntu_setup.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/ubuntu_setup.sh b/scripts/ubuntu_setup.sh index 6482b53..d27a7be 100755 --- a/scripts/ubuntu_setup.sh +++ b/scripts/ubuntu_setup.sh @@ -17,7 +17,14 @@ echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker. # Installs sudo apt-get update && \ -sudo apt-get install -y nvidia-headless-535-server nvidia-utils-535-server docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin && \ +sudo apt-get install -y nvidia-headless-535-server nvidia-utils-535-server docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin awscli git && \ sudo usermod -aG docker $USER +# Cleanup temporary files +sudo apt-get clean +sudo rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +# Optimize the filesystem (for ext4) +sudo e4defrag / + # RESTART YOUR INSTANCE!!! From 2db04a91fc92f697f0e6fa70a96b274bd6aff7e2 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 20 May 2024 14:27:22 -0500 Subject: [PATCH 15/28] update ubuntu script for AMI creation --- scripts/ubuntu_setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ubuntu_setup.sh b/scripts/ubuntu_setup.sh index d27a7be..b209f4e 100755 --- a/scripts/ubuntu_setup.sh +++ b/scripts/ubuntu_setup.sh @@ -17,7 +17,7 @@ echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker. # Installs sudo apt-get update && \ -sudo apt-get install -y nvidia-headless-535-server nvidia-utils-535-server docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin awscli git && \ +sudo apt-get install -y nvidia-headless-535-server nvidia-utils-535-server nvidia-container-toolkit docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin awscli git && \ sudo usermod -aG docker $USER # Cleanup temporary files From 49c785d42cc38eda13989f3ee2789a05188b322a Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 20 May 2024 14:29:20 -0500 Subject: [PATCH 16/28] always create product dir --- src/hyp3_back_projection/back_projection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyp3_back_projection/back_projection.py b/src/hyp3_back_projection/back_projection.py index d702eef..b4fc721 100644 --- a/src/hyp3_back_projection/back_projection.py +++ b/src/hyp3_back_projection/back_projection.py @@ -140,8 +140,8 @@ def back_project( utils.call_stanford_module('util/merge_slcs.py', work_dir=work_dir) + zip_path = create_product(work_dir) if bucket: - zip_path = create_product(work_dir) upload_file_to_s3(zip_path, bucket, bucket_prefix) print(f'Finished back-projection for {list(work_dir.glob("S1*.geo"))[0].with_suffix("").name}!') From 80a80a710438ecf991d45b4061f4763108ff7964 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 20 May 2024 14:53:09 -0500 Subject: [PATCH 17/28] update readme --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f5c55b4..274d306 100644 --- a/README.md +++ b/README.md @@ -58,12 +58,16 @@ The process is different for different OS's and Linux distros. The setup process can be found [here](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuration). Make sure to follow the [Docker configuration steps](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuration) after installing the package. ### EC2 Setup +> [!CAUTION] +> Running the docker container on an Amazon Linux 2 runs, but will result in all zero outputs. Work is ongoing to determine what is causing this issue. For now, we recommend using an Ubuntu base image. + When running on an EC2 instance, the following setup is recommended: -1. Create a [G6-family EC2 instance](https://aws.amazon.com/ec2/instance-types/g6/) with the [Amazon Linux 2 Deep Learning AMI with NVIDIA Drivers](https://aws.amazon.com/machine-learning/amis/features/) -2. Build the GPU docker container with the correct compute capability version. To determine this value, run `nvidia-smi` on the instance to obtain GPU type, then cross-reference this information with NVIDIA's [GPU type compute capability list](https://developer.nvidia.com/cuda-gpus). For a g6.xlarge instance, this would be: +1. Create a [G6-family EC2 instance](https://aws.amazon.com/ec2/instance-types/g6/) that has **at least 32 GB of memory** with the [Deep Learning Base OSS Nvidia Driver GPU AMI](https://aws.amazon.com/releasenotes/aws-deep-learning-base-gpu-ami-ubuntu-22-04/). +2. Alternatively, you can run `scripts/ubuntu_setup.sh` as a [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) of a base Ubuntu image. +3. Build the GPU docker container with the correct compute capability version. To determine this value, run `nvidia-smi` on the instance to obtain GPU type, then cross-reference this information with NVIDIA's [GPU type compute capability list](https://developer.nvidia.com/cuda-gpus). For a g6.2xlarge instance, this would be: ```bash docker --build-arg="GPU_ARCH=89" -t back-projection:gpu-89 -f Dockerfile.gpu . ``` Note: this only needs to be done once per instance type, since the compute capability version will always be the same for a given instance type. -The default value for this argument is `89` - the correct value for g6.xlarge instances. +The default value for this argument is `89` - the correct value for g6.2xlarge instances. From c37a0e986d2b47e0285c12d835fe35e2f482c7e1 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 20 May 2024 14:54:26 -0500 Subject: [PATCH 18/28] remove RHEL-based docker file --- Dockerfile.rhel.gpu | 87 --------------------------------------------- 1 file changed, 87 deletions(-) delete mode 100644 Dockerfile.rhel.gpu diff --git a/Dockerfile.rhel.gpu b/Dockerfile.rhel.gpu deleted file mode 100644 index 6d8c7ae..0000000 --- a/Dockerfile.rhel.gpu +++ /dev/null @@ -1,87 +0,0 @@ -FROM nvidia/cuda:12.4.1-devel-ubi9 as builder - -# FIXME: should be able to find this dynamically -ARG GPU_ARCH=89 -ARG FFTW_LIB=/usr/lib64/libfftw3f.so - -# GPU_ARCH and USEGPU environment variable used by build_proc.sh -ENV GPU_ARCH=${GPU_ARCH} -ENV USEGPU=true - -RUN dnf -y update && dnf install --allowerasing -y unzip vim curl git make automake gcc gcc-c++ gfortran fftw-devel && \ - dnf clean all && rm -rf /var/cache/dnf/* - -# FIXME: switch main branch when remove_files branch is merged -RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git - -COPY . /hyp3-back-projection/ -COPY ./scripts/build_proc.sh ./back-projection -RUN cd /back-projection && \ - find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \ - ./build_proc.sh && \ - cd / - -FROM nvidia/cuda:12.4.1-runtime-ubi9 as runner - -# For opencontainers label definitions, see: -# https://github.com/opencontainers/image-spec/blob/master/annotations.md -LABEL org.opencontainers.image.title="HyP3 back-projection" -LABEL org.opencontainers.image.description="HyP3 plugin for back-projection processing" -LABEL org.opencontainers.image.vendor="Alaska Satellite Facility" -LABEL org.opencontainers.image.authors="ASF Tools Team " -LABEL org.opencontainers.image.licenses="BSD-3-Clause" -LABEL org.opencontainers.image.url="https://github.com/ASFHyP3/hyp3-back-projection" -LABEL org.opencontainers.image.source="https://github.com/ASFHyP3/hyp3-back-projection" -LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" - -ARG DEBIAN_FRONTEND=noninteractive -ARG CONDA_UID=1000 -ARG CONDA_GID=1000 -ARG MINIFORGE_NAME=Miniforge3 -ARG MINIFORGE_VERSION=24.3.0-0 - -ENV CONDA_DIR=/opt/conda -ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 -ENV PATH=${CONDA_DIR}/bin:${PATH} -ENV PYTHONDONTWRITEBYTECODE=true -ENV PROC_HOME=/back-projection -ENV MYHOME=/home/conda - -# Conda setup -RUN dnf install --setopt=install_weak_deps=False --nodocs -y wget bzip2 ca-certificates git > /dev/null && \ - wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ - /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ - rm /tmp/miniforge.sh && \ - conda clean --tarballs --index-cache --packages --yes && \ - find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ - find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ - conda clean --force-pkgs-dirs --all --yes && \ - echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ - echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc - -RUN dnf -y update && dnf install --allowerasing -y unzip vim curl gfortran fftw-devel && \ - dnf clean all && rm -rf /var/cache/dnf/* - -RUN groupadd -g "${CONDA_GID}" --system conda && \ - useradd -l -u "${CONDA_UID}" -g "${CONDA_GID}" --system -d /home/conda -m -s /bin/bash conda && \ - chown -R conda:conda /opt && \ - echo ". /opt/conda/etc/profile.d/conda.sh" >> /home/conda/.profile && \ - echo "conda activate base" >> /home/conda/.profile - -SHELL ["/bin/bash", "-l", "-c"] - -USER ${CONDA_UID} -WORKDIR /home/conda/ - -# UIDs above 999 cause a warning in rockylinux systems, but are OK in this case. -COPY --chown=${CONDA_UID}:${CONDA_GID} --from=builder /back-projection /back-projection -COPY --chown=${CONDA_UID}:${CONDA_GID} --from=builder /hyp3-back-projection /hyp3-back-projection - -RUN mamba env create -f /hyp3-back-projection/environment.yml && \ - conda clean -afy && \ - conda activate hyp3-back-projection && \ - sed -i 's/conda activate base/conda activate hyp3-back-projection/g' /home/conda/.profile && \ - python -m pip install --no-cache-dir /hyp3-back-projection - -ENTRYPOINT ["/hyp3-back-projection/src/hyp3_back_projection/etc/entrypoint.sh"] -CMD ["-h"] From 66a12a977688cdd5b55345d3cf448e5852c01de6 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 20 May 2024 15:01:51 -0500 Subject: [PATCH 19/28] update changelog --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a5fac5..e135638 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.0] + +### Added +* `scripts/ubuntu_setup.sh` for setting up a GPU-based Ubuntu EC2 AMI. + +### Changed +* Refactored `scripts/build_proc.sh` to combine GPU compilation steps. +* Final product zip archive is now always created. + +### Fixed +* `Dockerfile.gpu` so that outputs will contain actual data. + ## [0.4.0] ### Added From 37b1d3fe6d7c6d2c7b4e2b9e20502dd961615219 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 20 May 2024 15:57:15 -0500 Subject: [PATCH 20/28] use main branch of back-projection --- Dockerfile.gpu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 2ee7d99..09cbabc 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -13,8 +13,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl git build-essential gfortran libfftw3-dev && \ apt-get clean && rm -rf /var/lib/apt/lists/* -# FIXME: switch to main branch of plugin when remove_files branch is merged -RUN git clone -b remove_files https://github.com/ASFHyP3/back-projection.git +RUN git clone -b main https://github.com/ASFHyP3/back-projection.git COPY . /hyp3-back-projection/ COPY ./scripts/build_proc.sh ./back-projection RUN cd /back-projection && ./build_proc.sh && cd / From 3c67708fb24f768235256a75d592e1945db0c078 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 21 May 2024 07:10:05 -0500 Subject: [PATCH 21/28] update based on review --- scripts/build_proc.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/build_proc.sh b/scripts/build_proc.sh index bd3453e..3abd0a5 100755 --- a/scripts/build_proc.sh +++ b/scripts/build_proc.sh @@ -1,5 +1,7 @@ #!/bin/bash +# Keeping these lines here in case we need to switch back to grabbing the FFTW location +# dynamically again # MULTIARCH_DIR=/usr/lib/$(gcc -print-multiarch) # FFTW_LIB=$MULTIARCH_DIR/libfftw3f.a echo 'using FFTW library:' $FFTW_LIB @@ -22,10 +24,6 @@ gfortran -c processsubcpu.f90 backprojectcpusub.f90 bounds.f90 orbitrangetime.f9 gcc -o sentinel_raw_process_cpu sentinel_raw_process_cpu.o decode_line_memory.o processsubcpu.o backprojectcpusub.o azimuth_compress_cpu.o bounds.o orbitrangetime.o latlon.o intp_orbit.o radar_to_xyz.o unitvec.o tcnbasis.o curvature.o cross.o orbithermite.o filelen.o io.o sentineltimingsub.o getburststatevectors.o $FFTW_LIB -lgfortran -lgomp -lm -lrt -lpthread echo 'built sentinel_raw_process_cpu' -if [[ "$USEGPU" == "true" ]]; then - echo 'built howmanygpus' -fi - cd geo2rdr gfortran -o estimatebaseline estimatebaseline.f90 intp_orbit.f90 latlon.f90 orbithermite.f -ffixed-line-length-none From a1b43b755b6f85479e6711720287166533de31d9 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 21 May 2024 08:53:25 -0500 Subject: [PATCH 22/28] create amazon linux bootstrap script --- scripts/amazon_linux_setup.sh | 32 ++++++++++++++++++++++++++++++++ scripts/ubuntu_setup.sh | 10 +++++----- 2 files changed, 37 insertions(+), 5 deletions(-) create mode 100755 scripts/amazon_linux_setup.sh diff --git a/scripts/amazon_linux_setup.sh b/scripts/amazon_linux_setup.sh new file mode 100755 index 0000000..a5a777c --- /dev/null +++ b/scripts/amazon_linux_setup.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# GPU setup for the Amazon Linux 2023 + +# Install NVIDIA driver +DRIVER_VERSION=550.54.14 +sudo dnf install -y kernel-devel-$(uname -r) kernel-headers-$(uname -r) kernel-modules-extra +curl -fSsl -O https://us.download.nvidia.com/tesla/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run +chmod +x NVIDIA-Linux-x86_64-$DRIVER_VERSION.run +sudo ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run --tmpdir . --silent +rm ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run + +# Install and enable Docker +sudo dnf install -y docker git +sudo systemctl start docker +sudo systemctl enable docker +sudo usermod -aG docker $USER + +# Install nvidia-container-toolkit +sudo dnf config-manager --add-repo https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo +sudo dnf install -y nvidia-container-toolkit +sudo nvidia-ctk runtime configure --runtime=docker +sudo systemctl restart docker + +# Install extra packages +sudo dnf install -y git + +# Cleanup +dnf clean all && rm -rf /var/cache/dnf/* + +# Reboot +sudo reboot diff --git a/scripts/ubuntu_setup.sh b/scripts/ubuntu_setup.sh index b209f4e..e7e17ea 100755 --- a/scripts/ubuntu_setup.sh +++ b/scripts/ubuntu_setup.sh @@ -1,4 +1,6 @@ -# GPU setup for the ubuntu 22.04 AMI +#!/bin/bash + +# GPU setup for the Ubuntu 22.04 # NVIDIA source setup curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && \ @@ -24,7 +26,5 @@ sudo usermod -aG docker $USER sudo apt-get clean sudo rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -# Optimize the filesystem (for ext4) -sudo e4defrag / - -# RESTART YOUR INSTANCE!!! +# Reboot +sudo reboot From 6fdbb9a6649e59570e5a7863069ff0090cb0a24c Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 21 May 2024 08:56:59 -0500 Subject: [PATCH 23/28] update users to be explicit --- scripts/amazon_linux_setup.sh | 2 +- scripts/ubuntu_setup.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/amazon_linux_setup.sh b/scripts/amazon_linux_setup.sh index a5a777c..0ee262a 100755 --- a/scripts/amazon_linux_setup.sh +++ b/scripts/amazon_linux_setup.sh @@ -14,7 +14,7 @@ rm ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run sudo dnf install -y docker git sudo systemctl start docker sudo systemctl enable docker -sudo usermod -aG docker $USER +sudo usermod -aG docker ec2-user # Install nvidia-container-toolkit sudo dnf config-manager --add-repo https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo diff --git a/scripts/ubuntu_setup.sh b/scripts/ubuntu_setup.sh index e7e17ea..0bba522 100755 --- a/scripts/ubuntu_setup.sh +++ b/scripts/ubuntu_setup.sh @@ -20,7 +20,7 @@ echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker. # Installs sudo apt-get update && \ sudo apt-get install -y nvidia-headless-535-server nvidia-utils-535-server nvidia-container-toolkit docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin awscli git && \ -sudo usermod -aG docker $USER +sudo usermod -aG docker ubuntu # Cleanup temporary files sudo apt-get clean From 98d1c3ee37ec6ea921fe60a6ed9a23732cbbe529 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 21 May 2024 09:06:49 -0500 Subject: [PATCH 24/28] update readme --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 274d306..e0bb7df 100644 --- a/README.md +++ b/README.md @@ -62,12 +62,14 @@ can be found [here](https://docs.nvidia.com/datacenter/cloud-native/container-to > Running the docker container on an Amazon Linux 2 runs, but will result in all zero outputs. Work is ongoing to determine what is causing this issue. For now, we recommend using an Ubuntu base image. When running on an EC2 instance, the following setup is recommended: -1. Create a [G6-family EC2 instance](https://aws.amazon.com/ec2/instance-types/g6/) that has **at least 32 GB of memory** with the [Deep Learning Base OSS Nvidia Driver GPU AMI](https://aws.amazon.com/releasenotes/aws-deep-learning-base-gpu-ami-ubuntu-22-04/). -2. Alternatively, you can run `scripts/ubuntu_setup.sh` as a [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) of a base Ubuntu image. +1. Create a [G6-family EC2 instance](https://aws.amazon.com/ec2/instance-types/g6/) that has **at least 32 GB of memory**. +2. Launch your instance with one of the following setups (**option a is recommended**): + a. Use the latest [Amazon Linux 2023 AMI](https://docs.aws.amazon.com/linux/al2023/ug/ec2.html) with the `scripts/amazon_linux_setup.sh` script configured as the as a [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) + b. Use the latest [Ubuntu AMI](https://cloud-images.ubuntu.com/locator/ec2/) with the `scripts/ubuntu_setup.sh` script configured as the as a [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) + c. Use the [Ubuntu Deep Learning Base OSS Nvidia Driver GPU AMI](https://aws.amazon.com/releasenotes/aws-deep-learning-base-gpu-ami-ubuntu-22-04/) (no install script required). 3. Build the GPU docker container with the correct compute capability version. To determine this value, run `nvidia-smi` on the instance to obtain GPU type, then cross-reference this information with NVIDIA's [GPU type compute capability list](https://developer.nvidia.com/cuda-gpus). For a g6.2xlarge instance, this would be: ```bash docker --build-arg="GPU_ARCH=89" -t back-projection:gpu-89 -f Dockerfile.gpu . ``` -Note: this only needs to be done once per instance type, since the compute capability version will always be the same for a given instance type. - +The compute capability version will always be the same for a given instance type, so you will only need to look this up once per instance type. The default value for this argument is `89` - the correct value for g6.2xlarge instances. From 9b05f93815318bac97619b4c7fc1ef215470ef77 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 21 May 2024 09:08:59 -0500 Subject: [PATCH 25/28] fix numbering --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e0bb7df..e0c839c 100644 --- a/README.md +++ b/README.md @@ -63,10 +63,10 @@ can be found [here](https://docs.nvidia.com/datacenter/cloud-native/container-to When running on an EC2 instance, the following setup is recommended: 1. Create a [G6-family EC2 instance](https://aws.amazon.com/ec2/instance-types/g6/) that has **at least 32 GB of memory**. -2. Launch your instance with one of the following setups (**option a is recommended**): - a. Use the latest [Amazon Linux 2023 AMI](https://docs.aws.amazon.com/linux/al2023/ug/ec2.html) with the `scripts/amazon_linux_setup.sh` script configured as the as a [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) - b. Use the latest [Ubuntu AMI](https://cloud-images.ubuntu.com/locator/ec2/) with the `scripts/ubuntu_setup.sh` script configured as the as a [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) - c. Use the [Ubuntu Deep Learning Base OSS Nvidia Driver GPU AMI](https://aws.amazon.com/releasenotes/aws-deep-learning-base-gpu-ami-ubuntu-22-04/) (no install script required). +2. Launch your instance with one of the following setups (**option i is recommended**): + 1. Use the latest [Amazon Linux 2023 AMI](https://docs.aws.amazon.com/linux/al2023/ug/ec2.html) with the `scripts/amazon_linux_setup.sh` script configured as the as a [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) + 2. Use the latest [Ubuntu AMI](https://cloud-images.ubuntu.com/locator/ec2/) with the `scripts/ubuntu_setup.sh` script configured as the as a [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) + 3. Use the [Ubuntu Deep Learning Base OSS Nvidia Driver GPU AMI](https://aws.amazon.com/releasenotes/aws-deep-learning-base-gpu-ami-ubuntu-22-04/) (no install script required). 3. Build the GPU docker container with the correct compute capability version. To determine this value, run `nvidia-smi` on the instance to obtain GPU type, then cross-reference this information with NVIDIA's [GPU type compute capability list](https://developer.nvidia.com/cuda-gpus). For a g6.2xlarge instance, this would be: ```bash docker --build-arg="GPU_ARCH=89" -t back-projection:gpu-89 -f Dockerfile.gpu . From 845d159dcd05fd85b0a12fd8a7e21f27d9422e4d Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 21 May 2024 09:10:33 -0500 Subject: [PATCH 26/28] fix grammar --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e0c839c..34bf186 100644 --- a/README.md +++ b/README.md @@ -64,8 +64,8 @@ can be found [here](https://docs.nvidia.com/datacenter/cloud-native/container-to When running on an EC2 instance, the following setup is recommended: 1. Create a [G6-family EC2 instance](https://aws.amazon.com/ec2/instance-types/g6/) that has **at least 32 GB of memory**. 2. Launch your instance with one of the following setups (**option i is recommended**): - 1. Use the latest [Amazon Linux 2023 AMI](https://docs.aws.amazon.com/linux/al2023/ug/ec2.html) with the `scripts/amazon_linux_setup.sh` script configured as the as a [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) - 2. Use the latest [Ubuntu AMI](https://cloud-images.ubuntu.com/locator/ec2/) with the `scripts/ubuntu_setup.sh` script configured as the as a [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html) + 1. Use the latest [Amazon Linux 2023 AMI](https://docs.aws.amazon.com/linux/al2023/ug/ec2.html) with `scripts/amazon_linux_setup.sh` as the [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html). + 2. Use the latest [Ubuntu AMI](https://cloud-images.ubuntu.com/locator/ec2/) with the `scripts/ubuntu_setup.sh` as the [user script on launch](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html). 3. Use the [Ubuntu Deep Learning Base OSS Nvidia Driver GPU AMI](https://aws.amazon.com/releasenotes/aws-deep-learning-base-gpu-ami-ubuntu-22-04/) (no install script required). 3. Build the GPU docker container with the correct compute capability version. To determine this value, run `nvidia-smi` on the instance to obtain GPU type, then cross-reference this information with NVIDIA's [GPU type compute capability list](https://developer.nvidia.com/cuda-gpus). For a g6.2xlarge instance, this would be: ```bash From 481ee94e7908330542b26c95d3a3768994dfc9c9 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 21 May 2024 09:14:38 -0500 Subject: [PATCH 27/28] update caution block --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 34bf186..85111e8 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ can be found [here](https://docs.nvidia.com/datacenter/cloud-native/container-to ### EC2 Setup > [!CAUTION] -> Running the docker container on an Amazon Linux 2 runs, but will result in all zero outputs. Work is ongoing to determine what is causing this issue. For now, we recommend using an Ubuntu base image. +> Running the docker container on an Amazon Linux 2023 Deep Learning AMI runs, but will result in all zero outputs. Work is ongoing to determine what is causing this issue. For now, we recommend using option 2.i. When running on an EC2 instance, the following setup is recommended: 1. Create a [G6-family EC2 instance](https://aws.amazon.com/ec2/instance-types/g6/) that has **at least 32 GB of memory**. From acd9d0676c1f6ca8162e0bfe8481e27a9fbb81c4 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 21 May 2024 09:28:06 -0500 Subject: [PATCH 28/28] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e135638..4e90db1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Added * `scripts/ubuntu_setup.sh` for setting up a GPU-based Ubuntu EC2 AMI. +* `scripts/amazon_linux_setup.sh` for setting up a GPU-based Amazon Linux 2023 EC2 AMI. ### Changed * Refactored `scripts/build_proc.sh` to combine GPU compilation steps.