Skip to content

Commit

Permalink
Merge branch 'develop' into feature/lnd_regrid_iau
Browse files Browse the repository at this point in the history
* develop:
  Ensure OCNRES and ICERES have 3 digits in the archive script (NOAA-EMC#3199)
  Set runtime shell requirements within Jenkins Pipeline (NOAA-EMC#3171)
  Add efcs and epos to ufs_hybatm xml (NOAA-EMC#3192) (NOAA-EMC#3193)
  Fix GEFS and SFS compile flags in build_all.sh (NOAA-EMC#3197)
  Remove early-cycle EnKF forecast (NOAA-EMC#3185)
  Fix mod_icec bug in atmos_prod (NOAA-EMC#3167)
  Create compute build option (NOAA-EMC#3186)
  Support global-workflow using Rocky 8 on CSPs (NOAA-EMC#2998)
  • Loading branch information
tsga committed Jan 4, 2025
2 parents 552642f + 29089be commit 066f60f
Show file tree
Hide file tree
Showing 38 changed files with 786 additions and 296 deletions.
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,4 @@ ush/python/pygfs/utils/marine_da_utils.py @guillaumevernieres @AndrewEichmann-NO

# Specific workflow scripts
workflow/generate_workflows.sh @DavidHuber-NOAA
workflow/build_compute.py @DavidHuber-NOAA @aerorahul
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ parm/wafs

# Ignore sorc and logs folders from externals
#--------------------------------------------
sorc/build.xml
sorc/build.db
sorc/build_lock.db
sorc/*log
sorc/logs
sorc/calc_analysis.fd
Expand Down
49 changes: 37 additions & 12 deletions ci/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,7 @@ pipeline {
def error_logs_message = ""
dir("${HOMEgfs}/sorc") {
try {
sh(script: './build_all.sh -kgu') // build the global-workflow executables for GFS variant (UFS-wx-model, WW3 pre/post executables)
sh(script: './build_ww3prepost.sh -w > ./logs/build_ww3prepost_gefs.log 2>&1') // build the WW3 pre/post processing executables for GEFS variant
sh(script: './build_ufs.sh -w -e gefs_model.x > ./logs/build_ufs_gefs.log 2>&1') // build the UFS-wx-model executable for GEFS variant
sh(script: './build_compute.sh all') // build the global-workflow executables
} catch (Exception error_build) {
echo "Failed to build global-workflow: ${error_build.getMessage()}"
if ( fileExists("logs/error.logs") ) {
Expand All @@ -140,8 +138,14 @@ pipeline {
}
}
try {
sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_BUILD_${env.CHANGE_ID}")
gist_url=sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_BUILD_${env.CHANGE_ID}", returnStdout: true).trim()
sh(script: """
source ${HOMEgfs}/workflow/gw_setup.sh
${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_BUILD_${env.CHANGE_ID}
""")
gist_url=sh(script: """
source ${HOMEgfs}/workflow/gw_setup.sh
${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_BUILD_${env.CHANGE_ID}
""", returnStdout: true).trim()
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Build **FAILED** on **${Machine}** in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """)
} catch (Exception error_comment) {
echo "Failed to comment on PR: ${error_comment.getMessage()}"
Expand All @@ -160,7 +164,10 @@ pipeline {
}
}
// Get a list of CI cases to run
CI_CASES = sh(script: "${HOMEgfs}/ci/scripts/utils/get_host_case_list.py ${machine}", returnStdout: true).trim().split()
CI_CASES = sh(script: """
source ${HOMEgfs}/workflow/gw_setup.sh
${HOMEgfs}/ci/scripts/utils/get_host_case_list.py ${machine}
""", returnStdout: true).trim().split()
echo "Cases to run: ${CI_CASES}"
}
}
Expand All @@ -181,7 +188,10 @@ pipeline {
script {
env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS"
try {
error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml", returnStdout: true).trim()
error_output = sh(script: """
source ${HOMEgfs}/workflow/gw_setup.sh
${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml
""", returnStdout: true).trim()
} catch (Exception error_create) {
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experiment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """)
error("Case ${caseName} failed to create experiment directory")
Expand All @@ -196,10 +206,19 @@ pipeline {
def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs"
sh(script: " rm -f ${error_file}")
try {
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow'")
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cleanup_experiment ${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}")
sh(script: """
source ${HOMEgfs}/workflow/gw_setup.sh
${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow'
""")
sh(script: """
source ${HOMEgfs}/workflow/gw_setup.sh
${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cleanup_experiment ${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}
""")
} catch (Exception error_experment) {
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}")
sh(script: """
source ${HOMEgfs}/workflow/gw_setup.sh
${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}
""")
ws(CUSTOM_WORKSPACE) {
def error_logs = ""
def error_logs_message = ""
Expand All @@ -219,9 +238,15 @@ pipeline {
}
}
try {
gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim()
gist_url = sh(script: """
source ${HOMEgfs}/workflow/gw_setup.sh
${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}
""", returnStdout: true).trim()
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """)
sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}")
sh(script: """
source ${HOMEgfs}/workflow/gw_setup.sh
${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}
""")
} catch (Exception error_comment) {
echo "Failed to comment on PR: ${error_comment.getMessage()}"
}
Expand Down
15 changes: 8 additions & 7 deletions ci/scripts/utils/launch_java_agent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ controller_url="https://jenkins.epic.oarcloud.noaa.gov"
controller_user=${controller_user:-"terry.mcguinness"}
controller_user_auth_token="jenkins_token"

HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )"
HOMEGFS_="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )"
host=$(hostname)

#########################################################################
# Set up runtime environment varibles for accounts on supproted machines
#########################################################################

source "${HOMEgfs}/ush/detect_machine.sh"
source "${HOMEGFS_}/ush/detect_machine.sh"
case ${MACHINE_ID} in
hera | orion | hercules | wcoss2 | gaea)
echo "Launch Jenkins Java Controler on ${MACHINE_ID}";;
Expand All @@ -84,10 +84,10 @@ esac
LOG=lanuched_agent-$(date +%Y%m%d%M).log
rm -f "${LOG}"

source "${HOMEgfs}/ush/module-setup.sh"
module use "${HOMEgfs}/modulefiles"
source "${HOMEGFS_}/ush/module-setup.sh"
module use "${HOMEGFS_}/modulefiles"
module load "module_gwsetup.${MACHINE_ID}"
source "${HOMEgfs}/ci/platforms/config.${MACHINE_ID}"
source "${HOMEGFS_}/ci/platforms/config.${MACHINE_ID}"

JAVA_HOME="${JENKINS_AGENT_LANUCH_DIR}/JAVA/jdk-17.0.10"
if [[ ! -d "${JAVA_HOME}" ]]; then
Expand All @@ -102,9 +102,10 @@ JAVA="${JAVA_HOME}/bin/java"
echo "JAVA VERSION: "
${JAVA} -version

export GH="${HOME}/bin/gh"
[[ -f "${GH}" ]] || echo "gh is not installed in ${HOME}/bin"
GH=$(command -v gh || echo "${HOME}/bin/gh")
[[ -f "${GH}" ]] || ( echo "ERROR: GitHub CLI (gh) not found. (exiting with error)"; exit 1 )
${GH} --version
export GH

check_mark=$("${GH}" auth status -t 2>&1 | grep "Token:" | awk '{print $1}') || true
if [[ "${check_mark}" != "" ]]; then
Expand Down
56 changes: 19 additions & 37 deletions docs/source/clone.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,35 +18,39 @@ Clone the `global-workflow` and `cd` into the `sorc` directory:
git clone --recursive https://github.com/NOAA-EMC/global-workflow
cd global-workflow/sorc

For forecast-only (coupled or uncoupled) build of the components:
.. _build_examples:

The build_all.sh script can be used to build all required components of the global workflow. The accepted arguments is a list of systems to be built. This includes builds for GFS and GEFS forecast-only experiments, GSI and GDASApp-based DA for cycled GFS experiments. See `feature availability <hpc.html#feature-availability-by-hpc>`__ to see which system(s) are available on each supported system.

::

./build_all.sh
./build_all.sh [gfs] [gefs] [gs] [gdas] [all]

For cycled (w/ data assimilation) use the `-g` option during build:
For example, to run GFS experiments with GSI DA, execute:

::

./build_all.sh -g
./build_all.sh gfs gsi

For coupled cycling (include new UFSDA) use the `-gu` options during build:
This builds the GFS, UFS-utils, GFS-utils, WW3 with PDLIB (structured wave grids), UPP, GSI, GSI-monitor, and GSI-utils executables.

[Currently only available on Hera, Orion, and Hercules]
For coupled cycling (include new UFSDA) execute:

::

./build_all.sh -gu
./build_all.sh gfs gdas

This builds all of the same executables, except it builds the GDASApp instead of the GSI.

For building without PDLIB (unstructured grid) for the wave model, use the `-w` options during build:
To run GEFS (forecast-only) execute:

::

./build_all.sh -w
./build_all.sh gefs

This builds the GEFS, UFS-utils, GFS-utils, WW3 *without* PDLIB (unstructure wave grids), and UPP executables.

Build workflow components and link workflow artifacts such as executables, etc.
Once the building is complete, link workflow artifacts such as executables, configuration files, and scripts via

::

Expand Down Expand Up @@ -107,40 +111,19 @@ Under the ``/sorc`` folder is a script to build all components called ``build_al

::

./build_all.sh [-a UFS_app][-g][-h][-u][-v]
./build_all.sh [-a UFS_app][-k][-h][-v] [list of system(s) to build]
-a UFS_app:
Build a specific UFS app instead of the default
-g:
Build GSI
-k:
Kill all builds immediately if one fails
-h:
Print this help message and exit
-j:
Specify maximum number of build jobs (n)
-u:
Build UFS-DA
-v:
Execute all build scripts with -v option to turn on verbose where supported

For forecast-only (coupled or uncoupled) build of the components:

::

./build_all.sh

For cycled (w/ data assimilation) use the `-g` option during build:

::

./build_all.sh -g

For coupled cycling (include new UFSDA) use the `-gu` options during build:

[Currently only available on Hera, Orion, and Hercules]

::

./build_all.sh -gu
Lastly, pass to build_all.sh a list of systems to build. This includes `gfs`, `gefs`, `sfs` (not fully supported), `gsi`, `gdas`, and `all`.

For examples of how to use this script, see :ref:`build examples <build_examples>`.

^^^^^^^^^^^^^^^
Link components
Expand All @@ -156,4 +139,3 @@ After running the checkout and build scripts run the link script:

Where:
``-o``: Run in operations (NCO) mode. This creates copies instead of using symlinks and is generally only used by NCO during installation into production.

68 changes: 27 additions & 41 deletions env/AWSPW.env
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,29 @@ else
exit 2
fi

if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then
if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then

export POE="NO"
export BACK="NO"
export sys_tp="AWSPW"
export launcher_PREP="srun"

elif [[ "${step}" = "prepsnowobs" ]]; then

export APRUN_CALCFIMS="${APRUN_default}"

elif [[ "${step}" = "prep_emissions" ]]; then

export APRUN="${APRUN_default}"

elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then

export CFP_MP="YES"
if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi
export wavempexec=${launcher}
export wave_mpmd=${mpmd_opt}

elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then

export launcher="srun --mpi=pmi2 -l"

Expand All @@ -52,52 +74,16 @@ elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}

elif [[ "${step}" = "post" ]]; then

export NTHREADS_NP=${NTHREADS1}
export APRUN_NP="${APRUN_default}"

export NTHREADS_DWN=${threads_per_task_dwn:-1}
[[ ${NTHREADS_DWN} -gt ${max_threads_per_task} ]] && export NTHREADS_DWN=${max_threads_per_task}
export APRUN_DWN="${launcher} -n ${ntasks_dwn}"

elif [[ "${step}" = "atmos_products" ]]; then

export USE_CFP="YES" # Use MPMD for downstream product generation on Hera
export NTHREADS_UPP=${NTHREADS1}
export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}"

elif [[ "${step}" = "oceanice_products" ]]; then

export NTHREADS_OCNICEPOST=${NTHREADS1}
export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}"

elif [[ "${step}" = "ecen" ]]; then

export NTHREADS_ECEN=${NTHREADSmax}
export APRUN_ECEN="${APRUN_default}"

export NTHREADS_CHGRES=${threads_per_task_chgres:-12}
[[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]] && export NTHREADS_CHGRES=${max_tasks_per_node}
export APRUN_CHGRES="time"

export NTHREADS_CALCINC=${threads_per_task_calcinc:-1}
[[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task}
export APRUN_CALCINC="${APRUN_default}"

elif [[ "${step}" = "esfc" ]]; then

export NTHREADS_ESFC=${NTHREADSmax}
export APRUN_ESFC="${APRUN_default}"

export NTHREADS_CYCLE=${threads_per_task_cycle:-14}
[[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node}
export APRUN_CYCLE="${APRUN_default}"

elif [[ "${step}" = "epos" ]]; then

export NTHREADS_EPOS=${NTHREADSmax}
export APRUN_EPOS="${APRUN_default}"

elif [[ "${step}" = "fit2obs" ]]; then
elif [[ "${step}" = "atmos_products" ]]; then

export NTHREADS_FIT2OBS=${NTHREADS1}
export MPIRUN="${APRUN_default}"
export USE_CFP="YES" # Use MPMD for downstream product generation on AWS

fi
17 changes: 11 additions & 6 deletions env/AZUREPW.env
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out"
# Configure MPI environment
export OMP_STACKSIZE=2048000
export NTHSTACK=1024000000
export UCX_TLS=ud,sm,self

ulimit -s unlimited
ulimit -a
Expand Down Expand Up @@ -50,6 +51,10 @@ elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}
export wavempexec=${launcher}
export wave_mpmd=${mpmd_opt}

elif [[ "${step}" = "prep_emissions" ]]; then

export APRUN="${APRUN_default}"

elif [[ "${step}" = "post" ]]; then

export NTHREADS_NP=${NTHREADS1}
Expand All @@ -71,33 +76,33 @@ elif [[ "${step}" = "oceanice_products" ]]; then
elif [[ "${step}" = "ecen" ]]; then

export NTHREADS_ECEN=${NTHREADSmax}
export APRUN_ECEN="${APRUN}"
export APRUN_ECEN="${APRUN_default}"

export NTHREADS_CHGRES=${threads_per_task_chgres:-12}
[[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]] && export NTHREADS_CHGRES=${max_tasks_per_node}
export APRUN_CHGRES="time"

export NTHREADS_CALCINC=${threads_per_task_calcinc:-1}
[[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task}
export APRUN_CALCINC="${APRUN}"
export APRUN_CALCINC="${APRUN_default}"

elif [[ "${step}" = "esfc" ]]; then

export NTHREADS_ESFC=${NTHREADSmax}
export APRUN_ESFC="${APRUN}"
export APRUN_ESFC="${APRUN_default}"

export NTHREADS_CYCLE=${threads_per_task_cycle:-14}
[[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node}
export APRUN_CYCLE="${APRUN}"
export APRUN_CYCLE="${APRUN_default}"

elif [[ "${step}" = "epos" ]]; then

export NTHREADS_EPOS=${NTHREADSmax}
export APRUN_EPOS="${APRUN}"
export APRUN_EPOS="${APRUN_default}"

elif [[ "${step}" = "fit2obs" ]]; then

export NTHREADS_FIT2OBS=${NTHREADS1}
export MPIRUN="${APRUN}"
export MPIRUN="${APRUN_default}"

fi
Loading

0 comments on commit 066f60f

Please sign in to comment.