diff --git a/dev/parm/config/gfs/config.resources b/dev/parm/config/gfs/config.resources index 8643b6b95d1..2f3829622eb 100644 --- a/dev/parm/config/gfs/config.resources +++ b/dev/parm/config/gfs/config.resources @@ -1317,9 +1317,15 @@ case ${step} in "esfc") walltime="01:15:00" - ntasks=80 + ntasks=$(( NMEM_ENS * 6)) # The regrid program requires tasks to be tiles times members threads_per_task=1 - tasks_per_node=$(( max_tasks_per_node / threads_per_task )) + + # Use node_numerator/node_denominator of the tasks in a node + # to leave enough memory for regridding + node_numerator=1 + node_denominator=2 + tasks_per_node=$(( node_numerator*max_tasks_per_node/node_denominator )) + threads_per_task_cycle=${threads_per_task} tasks_per_node_cycle=$(( max_tasks_per_node / threads_per_task_cycle )) ;; diff --git a/env/AWSPW.env b/env/AWSPW.env index 1c2f903f28b..e974d109cd0 100755 --- a/env/AWSPW.env +++ b/env/AWSPW.env @@ -107,6 +107,9 @@ elif [[ "${step}" = "esfc" ]]; then export NTHREADS_CYCLE=${threads_per_task_cycle:-1} export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + # REGRID requires 6*NMEM_ENS tasks for reproducibility + export APRUN_REGRID="${launcher} -n ${ntasks}" + elif [[ "${step}" = "upp" ]]; then export NTHREADS_UPP=1 diff --git a/env/AZUREPW.env b/env/AZUREPW.env index 11a45c78ffd..f407b19302b 100755 --- a/env/AZUREPW.env +++ b/env/AZUREPW.env @@ -107,6 +107,9 @@ elif [[ "${step}" = "esfc" ]]; then fi export APRUN_CYCLE="${APRUN_default}" + # REGRID requires 6*NMEM_ENS tasks for reproducibility + export APRUN_REGRID="${launcher} -n ${ntasks}" + elif [[ "${step}" = "epos" ]]; then export NTHREADS_EPOS=${NTHREADSmax} diff --git a/env/GAEAC5.env b/env/GAEAC5.env index b6da2dd1c4f..83813f447c5 100755 --- a/env/GAEAC5.env +++ b/env/GAEAC5.env @@ -285,9 +285,8 @@ case ${step} in fi export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + # REGRID requires 6*NMEM_ENS tasks for reproducibility + export APRUN_REGRID="${launcher} -n ${ntasks}" ;; "epos") diff --git a/env/GAEAC6.env b/env/GAEAC6.env index 31182f89da4..b37572ba7cf 100755 --- a/env/GAEAC6.env +++ b/env/GAEAC6.env @@ -317,9 +317,8 @@ case ${step} in fi export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + # REGRID requires 6*NMEM_ENS tasks for reproducibility + export APRUN_REGRID="${launcher} -n ${ntasks}" ;; "epos") diff --git a/env/GOOGLEPW.env b/env/GOOGLEPW.env index b6a4d8c01f7..9a821023e3f 100755 --- a/env/GOOGLEPW.env +++ b/env/GOOGLEPW.env @@ -106,6 +106,9 @@ elif [[ "${step}" = "esfc" ]]; then fi export APRUN_CYCLE="${APRUN_default}" + # REGRID requires 6*NMEM_ENS tasks for reproducibility + export APRUN_REGRID="${launcher} -n ${ntasks}" + elif [[ "${step}" = "epos" ]]; then export NTHREADS_EPOS=${NTHREADSmax} diff --git a/env/HERA.env b/env/HERA.env index a2177cf0986..57aefcc66af 100755 --- a/env/HERA.env +++ b/env/HERA.env @@ -302,9 +302,8 @@ elif [[ "${step}" = "esfc" ]]; then fi export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + # REGRID requires 6*NMEM_ENS tasks for reproducibility + export APRUN_REGRID="${launcher} -n ${ntasks}" elif [[ "${step}" = "epos" ]]; then diff --git a/env/HERCULES.env b/env/HERCULES.env index 806714e38f1..46a6db49216 100755 --- a/env/HERCULES.env +++ b/env/HERCULES.env @@ -322,9 +322,8 @@ case ${step} in fi export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + # REGRID requires 6*NMEM_ENS tasks for reproducibility + export APRUN_REGRID="${launcher} -n ${ntasks}" ;; "epos") diff --git a/env/ORION.env b/env/ORION.env index 2bfd50bd6a8..10f896037ab 100755 --- a/env/ORION.env +++ b/env/ORION.env @@ -311,9 +311,8 @@ elif [[ "${step}" = "esfc" ]]; then fi export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + # REGRID requires 6*NMEM_ENS tasks for reproducibility + export APRUN_REGRID="${launcher} -n ${ntasks}" elif [[ "${step}" = "epos" ]]; then diff --git a/env/URSA.env b/env/URSA.env index d4dc0161877..1b952df431c 100644 --- a/env/URSA.env +++ b/env/URSA.env @@ -291,9 +291,8 @@ elif [[ "${step}" = "esfc" ]]; then fi export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + # REGRID requires 6*NMEM_ENS tasks for reproducibility + export APRUN_REGRID="${launcher} -n ${ntasks}" elif [[ "${step}" = "epos" ]]; then diff --git a/env/WCOSS2.env b/env/WCOSS2.env index 1734968ee51..b0da0aa559a 100755 --- a/env/WCOSS2.env +++ b/env/WCOSS2.env @@ -321,9 +321,8 @@ elif [[ "${step}" = "esfc" ]]; then fi export APRUN_CYCLE="${APRUN_default} -ppn ${tasks_per_node_cycle} --cpu-bind depth --depth ${NTHREADS_CYCLE}" - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + # REGRID requires 6*NMEM_ENS tasks for reproducibility + export APRUN_REGRID="${launcher} -n ${ntasks}" elif [[ "${step}" = "epos" ]]; then diff --git a/jobs/JGDAS_ENKF_SFC b/jobs/JGDAS_ENKF_SFC index 8ff4982e353..ee2bb8112a9 100755 --- a/jobs/JGDAS_ENKF_SFC +++ b/jobs/JGDAS_ENKF_SFC @@ -36,6 +36,9 @@ RUN=${GDUMP} YMD=${gPDY} HH=${gcyc} declare_from_tmpl -rx \ COMIN_OBS_PREV:COM_OBS_TMPL \ COMIN_ATMOS_ANALYSIS_DET_PREV:COM_ATMOS_ANALYSIS_TMPL +# Use CFP to stage and save files in parallel + export USE_CFP=YES + ############################################################### # Run relevant script diff --git a/jobs/JGLOBAL_ATMOS_SFCANL b/jobs/JGLOBAL_ATMOS_SFCANL index 6223c4cacf3..41fdeec5969 100755 --- a/jobs/JGLOBAL_ATMOS_SFCANL +++ b/jobs/JGLOBAL_ATMOS_SFCANL @@ -32,6 +32,9 @@ RUN="enkfgdas" MEMDIR="ensstat" YMD=${PDY} HH=${cyc} declare_from_tmpl -rx \ mkdir -p "${COMOUT_ATMOS_RESTART}" +# Use CFP to stage and save files in parallel + export USE_CFP=YES + ############################################################### # Run relevant script diff --git a/scripts/exgdas_enkf_sfc.sh b/scripts/exgdas_enkf_sfc.sh index 1fb8c53a79c..2a22d0cea51 100755 --- a/scripts/exgdas_enkf_sfc.sh +++ b/scripts/exgdas_enkf_sfc.sh @@ -203,7 +203,7 @@ if [[ "$DOIAU" == "YES" ]]; then cpreq "${FIXgfs}/orog/${CASE}/${CASE}_grid.tile${n}.nc" "${DATA}/fngrid.${cmem}" cpreq "${FIXgfs}/orog/${CASE}/${CASE}.mx${OCNRES}_oro_data.tile${n}.nc" "${DATA}/fnorog.${cmem}" - if [[ "${DO_GSISOILDA}" == "YES" ]]; then + if [[ "${DO_GSISOILDA}" == "YES" ]] && [[ "${DO_LAND_IAU}" == ".false." ]]; then cpreq "${COMIN_ATMOS_ANALYSIS_MEM}/increment.sfc.i00${LFHR}.tile${n}.nc" \ "${DATA}/soil_xainc.${cmem}" fi diff --git a/sorc/ufs_utils.fd b/sorc/ufs_utils.fd index 6c05801564a..154a3676735 160000 --- a/sorc/ufs_utils.fd +++ b/sorc/ufs_utils.fd @@ -1 +1 @@ -Subproject commit 6c05801564ae222d9232278e9a038c2add8c668d +Subproject commit 154a367673555910b57aeb5b064f2c45c2f61e05 diff --git a/ush/regrid_gsiSfcIncr_to_tile.sh b/ush/regrid_gsiSfcIncr_to_tile.sh index e450c9fa5c0..30f9f6067dc 100755 --- a/ush/regrid_gsiSfcIncr_to_tile.sh +++ b/ush/regrid_gsiSfcIncr_to_tile.sh @@ -6,12 +6,11 @@ source "${HOMEgfs}/ush/atparse.bash" # Script to regrid surface increment from GSI grid # to fv3 tiles. # Clara Draper, Dec 2024 +# David New, Nov 2025 (parallelization updates) #------------------------------------------------------------------------------------------------- export PGMOUT=${PGMOUT:-${pgmout:-'&1'}} export PGMERR=${PGMERR:-${pgmerr:-'&2'}} -export REDOUT=${REDOUT:-'1>'} -export REDERR=${REDERR:-'2>'} export PGM=${REGRID_EXEC} export pgm=${PGM} @@ -43,8 +42,9 @@ done if [[ "${DO_LAND_IAU}" = ".true." ]]; then IFS=',' read -ra landifhrs <<< "${IAUFHRS}" fi -export in_fname="'enkfgdas.sfci'" +export in_fname="'sfci'" export out_fname="'sfci'" +export in_dir="" export dir_mask_in="'./'" export fname_mask_in="'NULL'" export ires=${LONB_CASE_IN} @@ -54,21 +54,6 @@ export jreso=${CASE_OUT:1} regrid_nml_tmpl="${PARMgfs}/regrid_sfc/regrid.nml_tmpl" -# input, fixed files -cpreq "${FIXorog}/${CASE_IN}/gaussian.${LONB_CASE_IN}.${LATB_CASE_IN}.nc" \ - "${DATA}/gaussian_scrip.nc" - -# output, fixed files -cpreq "${FIXorog}/${CASE_OUT}/${CASE_OUT}_mosaic.nc" \ - "${DATA}/${CASE_OUT}_mosaic.nc" - -for n in $(seq 1 "${ntiles}"); do - cpreq "${FIXorog}/${CASE_OUT}/sfc/${CASE_OUT}.mx${OCNRES_OUT}.vegetation_type.tile${n}.nc" \ - "${DATA}/vegetation_type.tile${n}.nc" - cpreq "${FIXorog}/${CASE_OUT}/${CASE_OUT}_grid.tile${n}.nc" \ - "${DATA}/${CASE_OUT}_grid.tile${n}.nc" -done - if (( LFHR >= 0 )); then soilinc_fhrs=("${LFHR}") else # construct restart times for deterministic member @@ -79,63 +64,213 @@ else # construct restart times for deterministic member fi fi -for imem in $(seq 1 "${NMEM_REGRID}"); do - if (( NMEM_REGRID > 1 )); then - cmem=$(printf %03i "${imem}") - memchar="mem${cmem}" +# +# Stage input files +# - MEMDIR=${memchar} YMD=${PDY} HH=${cyc} declare_from_tmpl \ - COMOUT_ATMOS_ANALYSIS_MEM:COM_ATMOS_ANALYSIS_TMPL +# Create MDMD command file for fixed files +rm -f cmdfile.0 +touch cmdfile.0 +chmod 755 cmdfile.0 + +# Append fixed files command file to master command file +{ +echo "#!/bin/bash" + +# input, fixed files +echo "cpreq ${FIXorog}/${CASE_IN}/gaussian.${LONB_CASE_IN}.${LATB_CASE_IN}.nc \ + ${DATA}/gaussian_scrip.nc" + +# output, fixed files +echo "cpreq ${FIXorog}/${CASE_OUT}/${CASE_OUT}_mosaic.nc \ + ${DATA}/${CASE_OUT}_mosaic.nc" + +for n in $(seq 1 "${ntiles}"); do + echo "cpreq ${FIXorog}/${CASE_OUT}/sfc/${CASE_OUT}.mx${OCNRES_OUT}.vegetation_type.tile${n}.nc \ + ${DATA}/vegetation_type.tile${n}.nc" + echo "cpreq ${FIXorog}/${CASE_OUT}/${CASE_OUT}_grid.tile${n}.nc \ + ${DATA}/${CASE_OUT}_grid.tile${n}.nc" +done +} > cmdfile.0 +for imem in $(seq 1 "${NMEM_REGRID}"); do + cmem=$(printf %03i "${imem}") + memchar="mem${cmem}" + + # If deterministic job, COMOUT_ATMOS_ANALYSIS_MEM is just COMOUT_ATMOS_ANALYSIS + if (( NMEM_REGRID > 1 )); then MEMDIR=${memchar} YMD=${PDY} HH=${cyc} declare_from_tmpl \ COMIN_SOIL_ANALYSIS_MEM:COM_ATMOS_ANALYSIS_TMPL + + memdir="${DATA}/${memchar}" + mkdir -p "${memdir}" + + if [[ "${imem}" -gt 1 ]]; then + in_dir+=", " + fi + in_dir+="\"./${memchar}/\"" + else + # If deterministic job, memdir is just DATA + memdir="${DATA}" + + in_dir="'./'" fi + # Create MPMD command file for this member + rm -f "cmdfile.${imem}" + touch "cmdfile.${imem}" + chmod 755 "cmdfile.${imem}" + + # Create commands to stage input files + { + echo "#!/bin/bash" + for FHR in "${soilinc_fhrs[@]}"; do + echo "cpreq ${COMIN_SOIL_ANALYSIS_MEM}/${APREFIX_ENS}increment.sfc.i00${FHR}.nc \ + ${memdir}/sfci00${FHR}.nc" + done + + if [[ "${DO_LAND_IAU}" = ".true." ]]; then + for FHI in "${landifhrs[@]}"; do + echo "cpreq ${COMIN_SOIL_ANALYSIS_MEM}/${APREFIX_ENS}increment.sfc.i00${FHI}.nc \ + ${memdir}/sfci00${FHI}.nc" + done + fi + } > "cmdfile.${imem}" +done +# Create master MPMD command file +rm -f cmdfile +touch cmdfile +chmod 755 cmdfile + +# Append all members' command files to master command file +{ +echo "${DATA}/cmdfile.0" # fixed files +for imem in $(seq 1 "${NMEM_REGRID}"); do + echo "${DATA}/cmdfile.${imem}" +done +} >> cmdfile + +# Run MPMD to stage input files +"${USHgfs}/run_mpmd.sh" "cmdfile" && true +export err=$? +if [[ ${err} -ne 0 ]]; then + err_exit "run_mpmd.sh failed!" +fi + +# Finish defining input/output directory list +export out_dir="${in_dir}" + +# +# Regrid soil increments and save to COMOUT +# + +# Increments for offline analysis +# If land IAU --> deterministic only. If no land IAU --> both deterministic and ensemble +if [[ "${DO_LAND_IAU}" = ".false." || "${RUN}" == "gdas" || "${RUN}" == "gfs" ]]; then + for FHR in "${soilinc_fhrs[@]}"; do + # Set namelist variables export add_time_dim=".false." export time_list="${FHR}" + export out_fname="'sfci00${FHR}'" + # Create regrid namelist rm -f "regrid.nml" atparse < "${regrid_nml_tmpl}" >> "regrid.nml" - cpreq "${COMIN_SOIL_ANALYSIS_MEM}/${APREFIX_ENS}increment.sfc.i00${FHR}.nc" \ - "${DATA}/enkfgdas.sfci00${FHR}.nc" + # Run regrid executable + ${APRUN_REGRID} "${REGRID_EXEC}" 1>"${PGMOUT}" 2>"${PGMERR}" + export err=$? + if [[ ${err} -ne 0 ]]; then + err_exit "${REGRID_EXEC} failed, ABORT!" + fi + done +fi - ${APRUN_REGRID} "${REGRID_EXEC}" "${REDOUT}${PGMOUT}" "${REDERR}${PGMERR}" +# Increments for forecast job with land IAU +# If land IAU --> deterministic and ensemble +if [[ "${DO_LAND_IAU}" = ".true." ]]; then + # Set namelist variables + export add_time_dim=".true." + export time_list="${IAUFHRS}" + export out_fname="'sfci'" + + # Create regrid namelist + rm -f "regrid.nml" + atparse < "${regrid_nml_tmpl}" >> "regrid.nml" + + # Run regrid executable + export pgm="${REGRID_EXEC}" + ${APRUN_REGRID} "${REGRID_EXEC}" 1>"${PGMOUT}" 2>"${PGMERR}" + export err=$? + if [[ ${err} -ne 0 ]]; then + err_exit "${pgm} failed, ABORT!" + fi +fi - for n in $(seq 1 "${ntiles}"); do - cpfs "${DATA}/sfci.tile${n}.nc" "${COMOUT_ATMOS_ANALYSIS_MEM}/increment.sfc.i00${FHR}.tile${n}.nc" - done - done +# +# Save regridded files to COMOUT +# - if [[ "${DO_LAND_IAU}" = ".true." ]]; then +for imem in $(seq 1 "${NMEM_REGRID}"); do + cmem=$(printf %03i "${imem}") + memchar="mem${cmem}" - export add_time_dim=".true." - export time_list="${IAUFHRS}" + # If deterministic job, COMOUT_ATMOS_ANALYSIS_MEM is just COMOUT_ATMOS_ANALYSIS + if (( NMEM_REGRID > 1 )); then + MEMDIR=${memchar} YMD=${PDY} HH=${cyc} declare_from_tmpl \ + COMOUT_ATMOS_ANALYSIS_MEM:COM_ATMOS_ANALYSIS_TMPL - rm -f "regrid.nml" - atparse < "${regrid_nml_tmpl}" >> "regrid.nml" + memdir="${DATA}/${memchar}" + else + # If deterministic job, memdir is just DATA + memdir="${DATA}" + fi - for FHI in "${landifhrs[@]}"; do - cpreq "${COMIN_SOIL_ANALYSIS_MEM}/${APREFIX_ENS}increment.sfc.i00${FHI}.nc" \ - "${DATA}/enkfgdas.sfci00${FHI}.nc" + # Create MPMD command file for this member + rm -f "cmdfile.${imem}" + touch "cmdfile.${imem}" + chmod 755 "cmdfile.${imem}" + + { + echo "#!/bin/bash" + + if [[ "${DO_LAND_IAU}" = ".false." || "${RUN}" == "gdas" || "${RUN}" == "gfs" ]]; then + for FHR in "${soilinc_fhrs[@]}"; do + for n in $(seq 1 "${ntiles}"); do + echo "cpfs ${memdir}/sfci00${FHR}.mem${imem}.tile${n}.nc \ + ${COMOUT_ATMOS_ANALYSIS_MEM}/increment.sfc.i00${FHR}.tile${n}.nc" + done done - - export pgm="${REGRID_EXEC}" - ${APRUN_REGRID} "${REGRID_EXEC}" "${REDOUT}${PGMOUT}" "${REDERR}${PGMERR}" - export err=$? - if [[ ${err} -ne 0 ]]; then - err_exit "${pgm} failed, ABORT!" - fi + fi + if [[ "${DO_LAND_IAU}" = ".true." ]]; then for n in $(seq 1 "${ntiles}"); do - cpfs "${DATA}/sfci.tile${n}.nc" "${COMOUT_ATMOS_ANALYSIS_MEM}/increment.sfc.i006.tile${n}.nc" + echo "cpfs ${memdir}/sfci.mem${imem}.tile${n}.nc \ + ${COMOUT_ATMOS_ANALYSIS_MEM}/increment.sfc.i006.tile${n}.nc" done - fi + } > "cmdfile.${imem}" +done + +# Create master MPMD command file +rm -f cmdfile +touch cmdfile +chmod 755 cmdfile +# Append all members' command files to master command file +{ +for imem in $(seq 1 "${NMEM_REGRID}"); do + echo "${DATA}/cmdfile.${imem}" done +} >> cmdfile -exit 0 +# Run MPMD to save output files +"${USHgfs}/run_mpmd.sh" "cmdfile" && true +export err=$? +if [[ ${err} -ne 0 ]]; then + err_exit "run_mpmd.sh failed!" +fi +exit 0