From 42a3e0450573877670d4942c0ea2650089593c8d Mon Sep 17 00:00:00 2001 From: henrywinterbottom-wxdev Date: Mon, 5 Feb 2024 12:05:00 -0700 Subject: [PATCH 1/5] Cleanup npe_node_max throughout code base. --- parm/config/gfs/config.resources | 75 ++++++++++++------------- parm/config/gfs/config.ufs | 95 ++++++++++++++++---------------- 2 files changed, 86 insertions(+), 84 deletions(-) diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index 36d8e788c30..89cbafac45d 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -32,43 +32,44 @@ step=$1 echo "BEGIN: config.resources" -case ${machine} in - "WCOSS2") npe_node_max=128;; - "HERA") npe_node_max=40;; - "ORION") npe_node_max=40;; - "HERCULES") npe_node_max=80;; - "JET") - case ${PARTITION_BATCH} in - "xjet") npe_node_max=24;; - "vjet" | "sjet") npe_node_max=16;; - "kjet") npe_node_max=40;; - *) - echo "FATAL ERROR: Unknown partition ${PARTITION_BATCH} specified for ${machine}" - exit 3 - esac - ;; - "S4") - case ${PARTITION_BATCH} in - "s4") npe_node_max=32;; - "ivy") npe_node_max=20;; - *) - echo "FATAL ERROR: Unknown partition ${PARTITION_BATCH} specified for ${machine}" - exit 3 - esac - ;; - "AWSPW") - export PARTITION_BATCH="compute" - npe_node_max=40 - ;; - "CONTAINER") - npe_node_max=1 - ;; - *) - echo "FATAL ERROR: Unknown machine encountered by ${BASH_SOURCE[0]}" - exit 2 - ;; -esac -export npe_node_max +# TODO: Removing `npe_node_max` declaration. +#case ${machine} in +# "WCOSS2") npe_node_max=128;; +# "HERA") npe_node_max=40;; +# "ORION") npe_node_max=40;; +# "HERCULES") npe_node_max=80;; +# "JET") +# case ${PARTITION_BATCH} in +# "xjet") npe_node_max=24;; +# "vjet" | "sjet") npe_node_max=16;; +# "kjet") npe_node_max=40;; +# *) +# echo "FATAL ERROR: Unknown partition ${PARTITION_BATCH} specified for ${machine}" +# exit 3 +# esac +# ;; +# "S4") +# case ${PARTITION_BATCH} in +# "s4") npe_node_max=32;; +# "ivy") npe_node_max=20;; +# *) +# echo "FATAL ERROR: Unknown partition ${PARTITION_BATCH} specified for ${machine}" +# exit 3 +# esac +# ;; +# "AWSPW") +# export PARTITION_BATCH="compute" +# npe_node_max=40 +# ;; +# "CONTAINER") +# npe_node_max=1 +# ;; +# *) +# echo "FATAL ERROR: Unknown machine encountered by ${BASH_SOURCE[0]}" +# exit 2 +# ;; +#esac +#export npe_node_max case ${step} in "prep") diff --git a/parm/config/gfs/config.ufs b/parm/config/gfs/config.ufs index 0a59da47cac..f485a113caa 100644 --- a/parm/config/gfs/config.ufs +++ b/parm/config/gfs/config.ufs @@ -68,53 +68,54 @@ if [[ "${skip_mom6}" == "false" ]] || [[ "${skip_cice6}" == "false" ]] || [[ "${ skip_mediator=false fi -case "${machine}" in - "WCOSS2") - npe_node_max=128 - ;; - "HERA" | "ORION" ) - npe_node_max=40 - ;; - "HERCULES" ) - npe_node_max=80 - ;; - "JET") - case "${PARTITION_BATCH}" in - "xjet") - npe_node_max=24 - ;; - "vjet" | "sjet") - npe_node_max=16 - ;; - "kjet") - npe_node_max=40 - ;; - *) - echo "FATAL ERROR: Unsupported ${machine} PARTITION_BATCH = ${PARTITION_BATCH}, ABORT!" - exit 1 - ;; - esac - ;; - "S4") - case "${PARTITION_BATCH}" in - "s4") - npe_node_max=32 - ;; - "ivy") - npe_node_max=20 - ;; - *) - echo "FATAL ERROR: Unsupported ${machine} PARTITION_BATCH = ${PARTITION_BATCH}, ABORT!" - exit 1 - ;; - esac - ;; - *) - echo "FATAL ERROR: Unrecognized machine ${machine}" - exit 14 - ;; -esac -export npe_node_max +# TODO: Removing `npe_node_max` here. +#case "${machine}" in +# "WCOSS2") +# npe_node_max=128 +# ;; +# "HERA" | "ORION" ) +# npe_node_max=40 +# ;; +# "HERCULES" ) +# npe_node_max=80 +# ;; +# "JET") +# case "${PARTITION_BATCH}" in +# "xjet") +# npe_node_max=24 +# ;; +# "vjet" | "sjet") +# npe_node_max=16 +# ;; +# "kjet") +# npe_node_max=40 +# ;; +# *) +# echo "FATAL ERROR: Unsupported ${machine} PARTITION_BATCH = ${PARTITION_BATCH}, ABORT!" +# exit 1 +# ;; +# esac +# ;; +# "S4") +# case "${PARTITION_BATCH}" in +# "s4") +# npe_node_max=32 +# ;; +# "ivy") +# npe_node_max=20 +# ;; +# *) +# echo "FATAL ERROR: Unsupported ${machine} PARTITION_BATCH = ${PARTITION_BATCH}, ABORT!" +# exit 1 +# ;; +# esac +# ;; +# *) +# echo "FATAL ERROR: Unrecognized machine ${machine}" +# exit 14 +# ;; +#esac +#export npe_node_max # (Standard) Model resolution dependent variables case "${fv3_res}" in From fbdebab80ade78b4868fc414fc3ad3ef4a31ba33 Mon Sep 17 00:00:00 2001 From: henrywinterbottom-wxdev Date: Mon, 5 Feb 2024 12:09:35 -0700 Subject: [PATCH 2/5] Debugging. --- env/HERA.env | 3 +- parm/config/gfs/config.resources | 74 ++++++++++++++++---------------- 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/env/HERA.env b/env/HERA.env index fb156645f89..34f0c43f669 100755 --- a/env/HERA.env +++ b/env/HERA.env @@ -14,7 +14,8 @@ fi step=$1 -export npe_node_max=40 +# TODO: Removing `npe_node_max`. +#export npe_node_max=40 export launcher="srun -l --export=ALL" export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index 89cbafac45d..ce7a2cbf139 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -33,43 +33,43 @@ step=$1 echo "BEGIN: config.resources" # TODO: Removing `npe_node_max` declaration. -#case ${machine} in -# "WCOSS2") npe_node_max=128;; -# "HERA") npe_node_max=40;; -# "ORION") npe_node_max=40;; -# "HERCULES") npe_node_max=80;; -# "JET") -# case ${PARTITION_BATCH} in -# "xjet") npe_node_max=24;; -# "vjet" | "sjet") npe_node_max=16;; -# "kjet") npe_node_max=40;; -# *) -# echo "FATAL ERROR: Unknown partition ${PARTITION_BATCH} specified for ${machine}" -# exit 3 -# esac -# ;; -# "S4") -# case ${PARTITION_BATCH} in -# "s4") npe_node_max=32;; -# "ivy") npe_node_max=20;; -# *) -# echo "FATAL ERROR: Unknown partition ${PARTITION_BATCH} specified for ${machine}" -# exit 3 -# esac -# ;; -# "AWSPW") -# export PARTITION_BATCH="compute" -# npe_node_max=40 -# ;; -# "CONTAINER") -# npe_node_max=1 -# ;; -# *) -# echo "FATAL ERROR: Unknown machine encountered by ${BASH_SOURCE[0]}" -# exit 2 -# ;; -#esac -#export npe_node_max +case ${machine} in + "WCOSS2") npe_node_max=128;; + "HERA") npe_node_max=40;; + "ORION") npe_node_max=40;; + "HERCULES") npe_node_max=80;; + "JET") + case ${PARTITION_BATCH} in + "xjet") npe_node_max=24;; + "vjet" | "sjet") npe_node_max=16;; + "kjet") npe_node_max=40;; + *) + echo "FATAL ERROR: Unknown partition ${PARTITION_BATCH} specified for ${machine}" + exit 3 + esac + ;; + "S4") + case ${PARTITION_BATCH} in + "s4") npe_node_max=32;; + "ivy") npe_node_max=20;; + *) + echo "FATAL ERROR: Unknown partition ${PARTITION_BATCH} specified for ${machine}" + exit 3 + esac + ;; + "AWSPW") + export PARTITION_BATCH="compute" + npe_node_max=40 + ;; + "CONTAINER") + npe_node_max=1 + ;; + *) + echo "FATAL ERROR: Unknown machine encountered by ${BASH_SOURCE[0]}" + exit 2 + ;; +esac +export npe_node_max case ${step} in "prep") From a63de7a858c38220555a79bf1b1e8204560aafd2 Mon Sep 17 00:00:00 2001 From: henrywinterbottom-wxdev Date: Mon, 5 Feb 2024 12:33:36 -0700 Subject: [PATCH 3/5] Removed npe_node_max from all supported environment files. --- env/AWSPW.env | 1 - env/CONTAINER.env | 1 - env/HERA.env | 2 -- env/HERCULES.env | 1 - env/JET.env | 7 ------- env/ORION.env | 1 - env/S4.env | 6 ------ env/WCOSS2.env | 2 -- 8 files changed, 21 deletions(-) diff --git a/env/AWSPW.env b/env/AWSPW.env index 894cce23436..ea5002ecb9b 100755 --- a/env/AWSPW.env +++ b/env/AWSPW.env @@ -14,7 +14,6 @@ fi step=$1 -export npe_node_max=36 export launcher="mpiexec.hydra" export mpmd_opt="" diff --git a/env/CONTAINER.env b/env/CONTAINER.env index bfeb6dd6daa..b1f55a4c98c 100755 --- a/env/CONTAINER.env +++ b/env/CONTAINER.env @@ -14,7 +14,6 @@ fi step=$1 -export npe_node_max=40 export launcher="mpirun" export mpmd_opt="--multi-prog" diff --git a/env/HERA.env b/env/HERA.env index 34f0c43f669..e02c0aad224 100755 --- a/env/HERA.env +++ b/env/HERA.env @@ -14,8 +14,6 @@ fi step=$1 -# TODO: Removing `npe_node_max`. -#export npe_node_max=40 export launcher="srun -l --export=ALL" export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" diff --git a/env/HERCULES.env b/env/HERCULES.env index 6a4aad7a7d4..ebfa51398b3 100755 --- a/env/HERCULES.env +++ b/env/HERCULES.env @@ -12,7 +12,6 @@ fi step=$1 -export npe_node_max=80 export launcher="srun -l --export=ALL" export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" diff --git a/env/JET.env b/env/JET.env index 7bb152c5f3f..eada0b1c70e 100755 --- a/env/JET.env +++ b/env/JET.env @@ -14,13 +14,6 @@ fi step=$1 -if [[ "${PARTITION_BATCH}" = "xjet" ]]; then - export npe_node_max=24 -elif [[ "${PARTITION_BATCH}" = "vjet" ]]; then - export npe_node_max=16 -elif [[ "${PARTITION_BATCH}" = "kjet" ]]; then - export npe_node_max=40 -fi export launcher="srun -l --epilog=/apps/local/bin/report-mem --export=ALL" export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" diff --git a/env/ORION.env b/env/ORION.env index d91fd4db03b..c5e94cc5593 100755 --- a/env/ORION.env +++ b/env/ORION.env @@ -14,7 +14,6 @@ fi step=$1 -export npe_node_max=40 export launcher="srun -l --export=ALL" export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" diff --git a/env/S4.env b/env/S4.env index 3dab3fc3e79..b103e865d38 100755 --- a/env/S4.env +++ b/env/S4.env @@ -13,13 +13,7 @@ if [[ $# -ne 1 ]]; then fi step=$1 -PARTITION_BATCH=${PARTITION_BATCH:-"s4"} -if [[ ${PARTITION_BATCH} = "s4" ]]; then - export npe_node_max=32 -elif [[ ${PARTITION_BATCH} = "ivy" ]]; then - export npe_node_max=20 -fi export launcher="srun -l --export=ALL" export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" diff --git a/env/WCOSS2.env b/env/WCOSS2.env index a4fe81060dd..307ad71c430 100755 --- a/env/WCOSS2.env +++ b/env/WCOSS2.env @@ -18,8 +18,6 @@ step=$1 export launcher="mpiexec -l" export mpmd_opt="--cpu-bind verbose,core cfp" -export npe_node_max=128 - if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then nth_max=$((npe_node_max / npe_node_prep)) From 530be912c7349622444a01bdd54870e67fb3e9a9 Mon Sep 17 00:00:00 2001 From: henrywinterbottom-wxdev Date: Mon, 5 Feb 2024 12:36:02 -0700 Subject: [PATCH 4/5] Removed npe_node_max from config.ufs; maintain npe_node_max in config.resources. --- parm/config/gfs/config.resources | 1 - parm/config/gfs/config.ufs | 49 -------------------------------- 2 files changed, 50 deletions(-) diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index ce7a2cbf139..36d8e788c30 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -32,7 +32,6 @@ step=$1 echo "BEGIN: config.resources" -# TODO: Removing `npe_node_max` declaration. case ${machine} in "WCOSS2") npe_node_max=128;; "HERA") npe_node_max=40;; diff --git a/parm/config/gfs/config.ufs b/parm/config/gfs/config.ufs index f485a113caa..c8ce2168994 100644 --- a/parm/config/gfs/config.ufs +++ b/parm/config/gfs/config.ufs @@ -68,55 +68,6 @@ if [[ "${skip_mom6}" == "false" ]] || [[ "${skip_cice6}" == "false" ]] || [[ "${ skip_mediator=false fi -# TODO: Removing `npe_node_max` here. -#case "${machine}" in -# "WCOSS2") -# npe_node_max=128 -# ;; -# "HERA" | "ORION" ) -# npe_node_max=40 -# ;; -# "HERCULES" ) -# npe_node_max=80 -# ;; -# "JET") -# case "${PARTITION_BATCH}" in -# "xjet") -# npe_node_max=24 -# ;; -# "vjet" | "sjet") -# npe_node_max=16 -# ;; -# "kjet") -# npe_node_max=40 -# ;; -# *) -# echo "FATAL ERROR: Unsupported ${machine} PARTITION_BATCH = ${PARTITION_BATCH}, ABORT!" -# exit 1 -# ;; -# esac -# ;; -# "S4") -# case "${PARTITION_BATCH}" in -# "s4") -# npe_node_max=32 -# ;; -# "ivy") -# npe_node_max=20 -# ;; -# *) -# echo "FATAL ERROR: Unsupported ${machine} PARTITION_BATCH = ${PARTITION_BATCH}, ABORT!" -# exit 1 -# ;; -# esac -# ;; -# *) -# echo "FATAL ERROR: Unrecognized machine ${machine}" -# exit 14 -# ;; -#esac -#export npe_node_max - # (Standard) Model resolution dependent variables case "${fv3_res}" in "C48") From da429e215099fcb44186464771de3f1912c69584 Mon Sep 17 00:00:00 2001 From: henrywinterbottom-wxdev Date: Mon, 5 Feb 2024 12:37:09 -0700 Subject: [PATCH 5/5] npe_node_max not defined only in config.resources. --- parm/config/gefs/config.ufs | 48 ------------------------------------- 1 file changed, 48 deletions(-) diff --git a/parm/config/gefs/config.ufs b/parm/config/gefs/config.ufs index 2031d0b5387..866de529646 100644 --- a/parm/config/gefs/config.ufs +++ b/parm/config/gefs/config.ufs @@ -68,54 +68,6 @@ if [[ "${skip_mom6}" == "false" ]] || [[ "${skip_cice6}" == "false" ]] || [[ "${ skip_mediator=false fi -case "${machine}" in - "WCOSS2") - npe_node_max=128 - ;; - "HERA" | "ORION" ) - npe_node_max=40 - ;; - "HERCULES" ) - npe_node_max=80 - ;; - "JET") - case "${PARTITION_BATCH}" in - "xjet") - npe_node_max=24 - ;; - "vjet" | "sjet") - npe_node_max=16 - ;; - "kjet") - npe_node_max=40 - ;; - *) - echo "FATAL ERROR: Unsupported ${machine} PARTITION_BATCH = ${PARTITION_BATCH}, ABORT!" - exit 1 - ;; - esac - ;; - "S4") - case "${PARTITION_BATCH}" in - "s4") - npe_node_max=32 - ;; - "ivy") - npe_node_max=20 - ;; - *) - echo "FATAL ERROR: Unsupported ${machine} PARTITION_BATCH = ${PARTITION_BATCH}, ABORT!" - exit 1 - ;; - esac - ;; - *) - echo "FATAL ERROR: Unrecognized machine ${machine}" - exit 14 - ;; -esac -export npe_node_max - # (Standard) Model resolution dependent variables case "${fv3_res}" in "C48")