From 37155048235d1ecab18414bd40257ec42c506774 Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Tue, 24 Jun 2025 20:12:17 +0000 Subject: [PATCH 01/17] refactor atm_jjob ctests to submit jobcard (#1762) --- test/atm/global-workflow/jjob_ens_final.sh | 30 ++++- test/atm/global-workflow/jjob_ens_inc.sh | 30 ++++- test/atm/global-workflow/jjob_ens_init.sh | 32 +++++- .../global-workflow/jjob_ens_init_split.sh | 32 +++++- test/atm/global-workflow/jjob_ens_letkf.sh | 30 ++++- test/atm/global-workflow/jjob_ens_obs.sh | 30 ++++- test/atm/global-workflow/jjob_ens_sol.sh | 30 ++++- test/atm/global-workflow/jjob_var_final.sh | 30 ++++- test/atm/global-workflow/jjob_var_inc.sh | 30 ++++- test/atm/global-workflow/jjob_var_init.sh | 35 ++++-- test/atm/global-workflow/jjob_var_run.sh | 30 ++++- test/workflow/generate_job_script.py | 103 ++++++++++++++++++ test/workflow/hosts/gaeac6.yaml | 19 ++++ test/workflow/hosts/hera.yaml | 8 ++ test/workflow/hosts/hercules.yaml | 8 ++ test/workflow/hosts/orion.yaml | 8 ++ test/workflow/hosts/ursa.yaml | 8 ++ test/workflow/hosts/wcoss2.yaml | 7 ++ 18 files changed, 440 insertions(+), 60 deletions(-) create mode 100755 test/workflow/generate_job_script.py create mode 100644 test/workflow/hosts/gaeac6.yaml create mode 100644 test/workflow/hosts/hera.yaml create mode 100644 test/workflow/hosts/hercules.yaml create mode 100644 test/workflow/hosts/orion.yaml create mode 100644 test/workflow/hosts/ursa.yaml create mode 100644 test/workflow/hosts/wcoss2.yaml diff --git a/test/atm/global-workflow/jjob_ens_final.sh b/test/atm/global-workflow/jjob_ens_final.sh index 479ee5bd0..8040afbd7 100755 --- a/test/atm/global-workflow/jjob_ens_final.sh +++ b/test/atm/global-workflow/jjob_ens_final.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_ens_final" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -45,11 +47,29 @@ elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 fi -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --ntasks=1 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait --output=atmensanlfinal-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FINALIZE -elif [[ $machine = 'URSA' ]]; then - sbatch --ntasks=1 --account=$ACCOUNT --qos=batch --partition=u1-compute --time=00:10:00 --export=ALL --wait --output=atmensanlfinal-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FINALIZE +# Create yaml with job configuration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 1 +threads_per_task: 1 +memory: 8Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FINALIZE +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FINALIZE fi diff --git a/test/atm/global-workflow/jjob_ens_inc.sh b/test/atm/global-workflow/jjob_ens_inc.sh index 2798eb5cb..1da9f6777 100755 --- a/test/atm/global-workflow/jjob_ens_inc.sh +++ b/test/atm/global-workflow/jjob_ens_inc.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_ens_inc" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -45,11 +47,29 @@ elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 fi -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --nodes=1 --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:30:00 --export=ALL --wait --output=atmensanlfv3inc-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FV3_INCREMENT -elif [[ $machine = 'URSA' ]]; then - sbatch --nodes=1 --ntasks=6 --account=$ACCOUNT --qos=batch --partition=u1-compute --time=00:30:00 --export=ALL --wait --output=atmensanlfv3inc-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FV3_INCREMENT +# Create yaml with job configuration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 1 +threads_per_task: 1 +memory: 8Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FV3_INCREMENT +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FV3_INCREMENT fi diff --git a/test/atm/global-workflow/jjob_ens_init.sh b/test/atm/global-workflow/jjob_ens_init.sh index dce1d69cd..38f55dce2 100755 --- a/test/atm/global-workflow/jjob_ens_init.sh +++ b/test/atm/global-workflow/jjob_ens_init.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_ens_init" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -110,11 +112,29 @@ done # NOTE: atmensanlinit creates input yaml for atmensanlletkf job cp $EXPDIR/config.base_lobsdiag_forenkf_false $EXPDIR/config.base -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --ntasks=1 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait --output=atmensanlinit-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_INITIALIZE -elif [[ $machine = 'URSA' ]]; then - sbatch --ntasks=1 --account=$ACCOUNT --qos=batch --partition=u1-compute --time=00:10:00 --export=ALL --wait --output=atmensanlinit-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_INITIALIZE +# Create yaml with job configuration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 1 +threads_per_task: 1 +memory: 8Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_INITIALIZE +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else - ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_INITIALIZE + ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_INITIALIZE fi diff --git a/test/atm/global-workflow/jjob_ens_init_split.sh b/test/atm/global-workflow/jjob_ens_init_split.sh index 52595c10f..1452bfa6b 100755 --- a/test/atm/global-workflow/jjob_ens_init_split.sh +++ b/test/atm/global-workflow/jjob_ens_init_split.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_ens_init_split" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -113,11 +115,29 @@ done # NOTE: atmensanlinit creates input yaml for atmensanlobs and atmensanlsol jobs cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --ntasks=1 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait --output=atmensanlinit_split-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_INITIALIZE -elif [[ $machine = 'URSA' ]]; then - sbatch --ntasks=1 --account=$ACCOUNT --qos=batch --partition=u1-compute --time=00:10:00 --export=ALL --wait --output=atmensanlinit_split-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_INITIALIZE +# Create yaml with job configuration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 1 +threads_per_task: 1 +memory: 8Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_INITIALIZE +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else - ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_INITIALIZE + ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_INITIALIZE fi diff --git a/test/atm/global-workflow/jjob_ens_letkf.sh b/test/atm/global-workflow/jjob_ens_letkf.sh index adfe4884a..78cff5c70 100755 --- a/test/atm/global-workflow/jjob_ens_letkf.sh +++ b/test/atm/global-workflow/jjob_ens_letkf.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_ens_letkf" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -51,11 +53,29 @@ fi # Set lobsdiag_forenkf=.false. to run letkf as combined observer and solver job cp $EXPDIR/config.base_lobsdiag_forenkf_false $EXPDIR/config.base -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --nodes=1 --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:30:00 --export=ALL --wait --output=atmensanlletkf-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_LETKF -elif [[ $machine = 'URSA' ]]; then - sbatch --nodes=1 --ntasks=6 --account=$ACCOUNT --qos=batch --partition=u1-compute --mem="32Gb" --time=00:30:00 --export=ALL --wait --output=atmensanlletkf-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_LETKF +# Create yaml with job configuration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 6 +threads_per_task: 1 +memory: 32Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_LETKF +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_LETKF fi diff --git a/test/atm/global-workflow/jjob_ens_obs.sh b/test/atm/global-workflow/jjob_ens_obs.sh index fac36ef9a..8001a0a62 100755 --- a/test/atm/global-workflow/jjob_ens_obs.sh +++ b/test/atm/global-workflow/jjob_ens_obs.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_ens_obs" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -51,11 +53,29 @@ fi # Set lobsdiag_forenkf=.true. to run letkf as stand-alone observer job cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --nodes=1 --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:30:00 --export=ALL --wait --output=atmensanlobs-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_OBS -elif [[ $machine = 'URSA' ]]; then - sbatch --nodes=1 --ntasks=6 --account=$ACCOUNT --qos=batch --partition=u1-compute --time=00:30:00 --export=ALL --wait --output=atmensanlobs-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_OBS +# Create yaml with job configuration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 6 +threads_per_task: 1 +memory: 32Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_OBS +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_OBS fi diff --git a/test/atm/global-workflow/jjob_ens_sol.sh b/test/atm/global-workflow/jjob_ens_sol.sh index 68dab69aa..6f860a584 100755 --- a/test/atm/global-workflow/jjob_ens_sol.sh +++ b/test/atm/global-workflow/jjob_ens_sol.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_ens_sol" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -51,11 +53,29 @@ fi # Set lobsdiag_forenkf=.true. to run letkf as stand-alone solver job cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --nodes=1 --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:30:00 --export=ALL --wait --output=atmensanlsol-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_SOL -elif [[ $machine = 'URSA' ]]; then - sbatch --nodes=1 --ntasks=6 --account=$ACCOUNT --qos=batch --partition=u1-compute --time=00:30:00 --export=ALL --wait --output=atmensanlsol-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_SOL +# Create yaml with job configuration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 1 +threads_per_task: 1 +memory: 8Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_SOL +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_SOL fi diff --git a/test/atm/global-workflow/jjob_var_final.sh b/test/atm/global-workflow/jjob_var_final.sh index 694f52efe..e2b4f1bf0 100755 --- a/test/atm/global-workflow/jjob_var_final.sh +++ b/test/atm/global-workflow/jjob_var_final.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_var_final" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -45,11 +47,29 @@ elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 fi -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --ntasks=1 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait --output=atmanlfinal-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FINALIZE -elif [[ $machine = 'URSA' ]]; then - sbatch --ntasks=1 --account=$ACCOUNT --qos=batch --partition=u1-compute --time=00:10:00 --export=ALL --wait --output=atmanlfinal-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FINALIZE +# Create yaml with job configuration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 1 +threads_per_task: 1 +memory: 8Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FINALIZE +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FINALIZE fi diff --git a/test/atm/global-workflow/jjob_var_inc.sh b/test/atm/global-workflow/jjob_var_inc.sh index 457f71299..f1a5cb38b 100755 --- a/test/atm/global-workflow/jjob_var_inc.sh +++ b/test/atm/global-workflow/jjob_var_inc.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_var_inc" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -45,11 +47,29 @@ elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 fi -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait --output=atmanlfv3inc-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FV3_INCREMENT -elif [[ $machine = "URSA" ]]; then - sbatch --ntasks=6 --account=$ACCOUNT --qos=batch --partition=u1-compute --time=00:10:00 --export=ALL --wait --output=atmanlfv3inc-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FV3_INCREMENT +# Create yaml with job confiugration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 6 +threads_per_task: 1 +memory: 8Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FV3_INCREMENT +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FV3_INCREMENT fi diff --git a/test/atm/global-workflow/jjob_var_init.sh b/test/atm/global-workflow/jjob_var_init.sh index 453a1448a..3febabf99 100755 --- a/test/atm/global-workflow/jjob_var_init.sh +++ b/test/atm/global-workflow/jjob_var_init.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_var_init" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -106,13 +108,13 @@ for imem in $(seq 1 $NMEM_ENS); do COMIN_ATMOS_HISTORY_PREV_ENS:COM_ATMOS_HISTORY_TMPL source=$GDASAPP_TESTDATA/lowres/$dpath/$memchar/model/atmos/history - target=$COMIN__ATMOS_HISTORY_PREV_ENS + target=$COMIN_ATMOS_HISTORY_PREV_ENS mkdir -p $target rm -rf $target/enkfgdas.t${gcyc}z.atmf006.nc ln -fs $source/enkfgdas.t${gcyc}z.atmf006.nc $target/ source=$GDASAPP_TESTDATA/lowres/$dpath/$memchar/model/atmos/history - target=$COMIN__ATMOS_HISTORY_PREV_ENS + target=$COMIN_ATMOS_HISTORY_PREV_ENS flist=("cubed_sphere_grid_atmf006.nc" "cubed_sphere_grid_sfcf006.nc") for file in "${flist[@]}"; do rm -rf $target/enkf${gprefix}.${file} @@ -120,12 +122,29 @@ for imem in $(seq 1 $NMEM_ENS); do done done - -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --ntasks=1 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait --output=atmanlinit-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_INITIALIZE -elif [[ $machine = 'URSA' ]]; then - sbatch --ntasks=1 --account=$ACCOUNT --qos=batch --partition=u1-compute --time=00:10:00 --export=ALL --wait --output=atmanlinit-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_INITIALIZE +# Create yaml with job configuration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 1 +threads_per_task: 1 +memory: 8Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_INITIALIZE +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_INITIALIZE fi diff --git a/test/atm/global-workflow/jjob_var_run.sh b/test/atm/global-workflow/jjob_var_run.sh index abf6e8d38..a5aa85134 100755 --- a/test/atm/global-workflow/jjob_var_run.sh +++ b/test/atm/global-workflow/jjob_var_run.sh @@ -4,6 +4,8 @@ set -x bindir=$1 srcdir=$2 +type="jjob_var_run" + # Set g-w HOMEgfs topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) export HOMEgfs=$topdir @@ -48,11 +50,29 @@ elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 fi -# Execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:20:00 --export=ALL --wait --output=atmanlvar-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_VARIATIONAL -elif [[ $machine = 'URSA' ]]; then - sbatch --ntasks=6 --account=$ACCOUNT --qos=batch --partition=u1-compute --time=00:20:00 --mem=96Gb --export=ALL --wait --output=atmanlvar-%j.out ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_VARIATIONAL +# Create yaml with job configuration +config_yaml="./config_${type}.yaml" +cat < ${config_yaml} +machine: ${machine} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 6 +threads_per_task: 1 +memory: 96Gb +command: ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_VARIATIONAL +filename: submit_${type}.sh +EOF + +# Create script to execute j-job +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} + +# Submit script to execute j-job +if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then + sbatch --export=ALL --wait submit_${type}.sh +elif [[ $machine = 'WCOSS2' ]]; then + qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_VARIATIONAL fi diff --git a/test/workflow/generate_job_script.py b/test/workflow/generate_job_script.py new file mode 100755 index 000000000..db9ca16c2 --- /dev/null +++ b/test/workflow/generate_job_script.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 + +import os +import yaml +import sys + +def create_job_script(job_config,machine_config): + scheduler = machine_config.get('SCHEDULER', 'slurm') + account = machine_config.get('HPC_ACCOUNT') + queue = machine_config.get('QUEUE') + partition = machine_config.get('PARTITION_BATCH', 'none') + job_name = job_config.get('job_name', 'myjob') + walltime = job_config.get('walltime', '01:00:00') + nodes = job_config.get('nodes', 1) + ntasks_per_node = job_config.get('ntasks_per_node', 1) + threads_per_task = job_config.get('threads_per_task', 1) + ncpus = ntasks_per_node * threads_per_task + memory = job_config.get('memory', '4gb') + command = job_config.get('command', 'python script.py') + filename = job_config.get('filename', 'job_script.sh') + + if scheduler == 'pbspro': + script = f"""#!/bin/bash +#PBS -N {job_name} +#PBS -j oe +#PBS -A {account} +#PBS -q {queue} +#PBS -l walltime={walltime} +#PBS -l select={nodes}:mpiprocs={ntasks_per_node}:ompthreads={threads_per_task}:ncpus={ncpus}:mem={memory} +#PBS -l place=vscatter + +set -x +cd $PBS_O_WORKDIR +{command} +""" + elif scheduler == 'slurm': + script = f"""#!/bin/bash +#SBATCH -J {job_name} +#SBATCH -o {job_name}.o%J +#SBATCH -e {job_name}.o%J +#SBATCH -A {account} +#SBATCH -q {queue} +#SBATCH -p {partition} +#SBATCH -t {walltime} +#SBATCH --nodes={nodes} +#SBATCH --ntasks-per-node={ntasks_per_node} +#SBATCH --cpus-per-task={threads_per_task} +#SBATCH --mem={memory} + +set -x +cd $SLURM_SUBMIT_DIR +{command} +""" + else: + raise ValueError("Unsupported scheduler. Use 'pbspro' or 'slurm'.") + + with open(filename, 'w') as f: + f.write(script) + + os.chmod(filename, 0o755) + print(f"{scheduler.upper()} job script written to {filename}") + + +def main(): + if len(sys.argv) != 2: + print("Usage: python generate_job_script.py config.yaml") + sys.exit(1) + + job_config_file = sys.argv[1] + + with open(job_config_file, 'r') as f: + job_config = yaml.safe_load(f) + + print(f" ") + print(f"job_config {job_config}") + print(f" ") + + homegfs = job_config.get('homegfs') + machine = job_config.get('machine') + + print(f" ") + print(f"homegfs {homegfs}") + print(f"machine {machine}") + print(f" ") + + machine_config_file = os.path.join(homegfs, "sorc/gdas.cd/test/workflow/hosts/") + machine.lower() + ".yaml" + + print(f" ") + print(f"machine_config_file {machine_config_file}") + print(f" ") + + with open(machine_config_file, 'r') as f: + machine_config = yaml.safe_load(f) + + print(f" ") + print(f"machine_config {machine_config}") + print(f" ") + + create_job_script(job_config, machine_config) + + +if __name__ == "__main__": + main() diff --git a/test/workflow/hosts/gaeac6.yaml b/test/workflow/hosts/gaeac6.yaml new file mode 100644 index 000000000..577e1a942 --- /dev/null +++ b/test/workflow/hosts/gaeac6.yaml @@ -0,0 +1,19 @@ +# Scheduler and queue information +SCHEDULER: slurm +QUEUE: normal +QUEUE_SERVICE: normal +QUEUE_DTN: 'hpss' +PARTITION_BUILD: eslogin_c6 +PARTITION_BATCH: batch +PARTITION_SERVICE: batch +PARTITION_DTN: 'dtn_f5_f6' +CLUSTERS: 'c6' +CLUSTERS_DTN: 'es' +CLUSTERS_BUILD: 'es' +CONSTRAINT_DTN: 'f6' +RESERVATION: '' +PARTITION_CRON: 'cron_c6' +USE_SCRONTAB: 'YES' + +# HPC account information +HPC_ACCOUNT: ira-sti \ No newline at end of file diff --git a/test/workflow/hosts/hera.yaml b/test/workflow/hosts/hera.yaml new file mode 100644 index 000000000..383b234fe --- /dev/null +++ b/test/workflow/hosts/hera.yaml @@ -0,0 +1,8 @@ +# Scheduler and queue information +SCHEDULER: slurm +QUEUE: batch +PARTITION_BATCH: hera +PARTITION_SERVICE: service + +# HPC account information +HPC_ACCOUNT: da-cpu diff --git a/test/workflow/hosts/hercules.yaml b/test/workflow/hosts/hercules.yaml new file mode 100644 index 000000000..a08e5f4b7 --- /dev/null +++ b/test/workflow/hosts/hercules.yaml @@ -0,0 +1,8 @@ +# Scheduler and queue information +SCHEDULER: slurm +QUEUE: batch +PARTITION_BATCH: hercules +PARTITION_SERVICE: service + +# HPC account information +HPC_ACCOUNT: da-cpu diff --git a/test/workflow/hosts/orion.yaml b/test/workflow/hosts/orion.yaml new file mode 100644 index 000000000..3ba436eac --- /dev/null +++ b/test/workflow/hosts/orion.yaml @@ -0,0 +1,8 @@ +# Scheduler and queue information +SCHEDULER: slurm +QUEUE: batch +PARTITION_BATCH: orion +PARTITION_SERVICE: service + +# HPC account information +HPC_ACCOUNT: GFS-DEV diff --git a/test/workflow/hosts/ursa.yaml b/test/workflow/hosts/ursa.yaml new file mode 100644 index 000000000..e91e5593e --- /dev/null +++ b/test/workflow/hosts/ursa.yaml @@ -0,0 +1,8 @@ +# Scheduler and queue information +SCHEDULER: slurm +QUEUE: batch +PARTITION_BATCH: u1-compute +PARTITION_SERVICE: service + +# HPC account information +HPC_ACCOUNT: da-cpu diff --git a/test/workflow/hosts/wcoss2.yaml b/test/workflow/hosts/wcoss2.yaml new file mode 100644 index 000000000..98195cddb --- /dev/null +++ b/test/workflow/hosts/wcoss2.yaml @@ -0,0 +1,7 @@ +# Scheduler and queue information +SCHEDULER: pbspro +QUEUE: dev +QUEUE_SERVICE: dev_transfer + +# HPC account information +HPC_ACCOUNT: GFS-DEV From 4ac7d85c5abe150b9a8a15a247fa1a263869217f Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Tue, 24 Jun 2025 20:17:22 +0000 Subject: [PATCH 02/17] correct python norm violations (#1762) --- test/workflow/generate_job_script.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/workflow/generate_job_script.py b/test/workflow/generate_job_script.py index db9ca16c2..4a69a77e5 100755 --- a/test/workflow/generate_job_script.py +++ b/test/workflow/generate_job_script.py @@ -4,7 +4,8 @@ import yaml import sys -def create_job_script(job_config,machine_config): + +def create_job_script(job_config, machine_config): scheduler = machine_config.get('SCHEDULER', 'slurm') account = machine_config.get('HPC_ACCOUNT') queue = machine_config.get('QUEUE') @@ -74,10 +75,10 @@ def main(): print(f" ") print(f"job_config {job_config}") print(f" ") - + homegfs = job_config.get('homegfs') machine = job_config.get('machine') - + print(f" ") print(f"homegfs {homegfs}") print(f"machine {machine}") @@ -88,10 +89,10 @@ def main(): print(f" ") print(f"machine_config_file {machine_config_file}") print(f" ") - + with open(machine_config_file, 'r') as f: machine_config = yaml.safe_load(f) - + print(f" ") print(f"machine_config {machine_config}") print(f" ") From 52b394b0ca012c5d7b01163daa5921a388f6c776 Mon Sep 17 00:00:00 2001 From: "russ.treadon" Date: Thu, 26 Jun 2025 15:10:52 +0000 Subject: [PATCH 03/17] use scheduler logic in atm jjob drivers (#1762) --- test/atm/global-workflow/jjob_ens_final.sh | 26 ++++++---------- test/atm/global-workflow/jjob_ens_inc.sh | 26 ++++++---------- test/atm/global-workflow/jjob_ens_init.sh | 26 ++++++---------- .../global-workflow/jjob_ens_init_split.sh | 28 ++++++----------- test/atm/global-workflow/jjob_ens_letkf.sh | 26 ++++++---------- test/atm/global-workflow/jjob_ens_obs.sh | 26 ++++++---------- test/atm/global-workflow/jjob_ens_sol.sh | 26 ++++++---------- test/atm/global-workflow/jjob_var_final.sh | 26 ++++++---------- test/atm/global-workflow/jjob_var_inc.sh | 26 ++++++---------- test/atm/global-workflow/jjob_var_init.sh | 27 ++++++---------- test/atm/global-workflow/jjob_var_run.sh | 31 +++++++------------ test/workflow/generate_job_script.py | 24 +++----------- test/workflow/hosts/orion.yaml | 2 +- 13 files changed, 109 insertions(+), 211 deletions(-) diff --git a/test/atm/global-workflow/jjob_ens_final.sh b/test/atm/global-workflow/jjob_ens_final.sh index 8040afbd7..ef4ca0909 100755 --- a/test/atm/global-workflow/jjob_ens_final.sh +++ b/test/atm/global-workflow/jjob_ens_final.sh @@ -27,30 +27,21 @@ export COMROOT=$DATAROOT export NMEM_ENS=3 export ACCOUNT=da-cpu +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Create yaml with job configuration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -64,11 +55,12 @@ EOF # Create script to execute j-job $HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FINALIZE diff --git a/test/atm/global-workflow/jjob_ens_inc.sh b/test/atm/global-workflow/jjob_ens_inc.sh index 1da9f6777..e17899bb9 100755 --- a/test/atm/global-workflow/jjob_ens_inc.sh +++ b/test/atm/global-workflow/jjob_ens_inc.sh @@ -27,30 +27,21 @@ export COMROOT=$DATAROOT export NMEM_ENS=3 export ACCOUNT=da-cpu +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Create yaml with job configuration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -64,11 +55,12 @@ EOF # Create script to execute j-job $HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FV3_INCREMENT diff --git a/test/atm/global-workflow/jjob_ens_init.sh b/test/atm/global-workflow/jjob_ens_init.sh index 38f55dce2..ac7bf76c5 100755 --- a/test/atm/global-workflow/jjob_ens_init.sh +++ b/test/atm/global-workflow/jjob_ens_init.sh @@ -31,25 +31,16 @@ export STRICT="NO" source "${HOMEgfs}/ush/preamble.sh" source "${HOMEgfs}/dev/parm/config/gfs/config.com" +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Set date variables for previous cycle GDATE=`date +%Y%m%d%H -d "${CDATE:0:8} ${CDATE:8:2} - 6 hours"` @@ -115,7 +106,7 @@ cp $EXPDIR/config.base_lobsdiag_forenkf_false $EXPDIR/config.base # Create yaml with job configuration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -129,11 +120,12 @@ EOF # Create script to execute j-job $HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_INITIALIZE diff --git a/test/atm/global-workflow/jjob_ens_init_split.sh b/test/atm/global-workflow/jjob_ens_init_split.sh index 1452bfa6b..edaddd011 100755 --- a/test/atm/global-workflow/jjob_ens_init_split.sh +++ b/test/atm/global-workflow/jjob_ens_init_split.sh @@ -31,26 +31,17 @@ export STRICT="NO" source "${HOMEgfs}/ush/preamble.sh" source "${HOMEgfs}/dev/parm/config/gfs/config.com" +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi - # Set date variables for previous cycle GDATE=`date +%Y%m%d%H -d "${CDATE:0:8} ${CDATE:8:2} - 6 hours"` gPDY=$(echo $GDATE | cut -c1-8) @@ -118,7 +109,7 @@ cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base # Create yaml with job configuration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -132,11 +123,12 @@ EOF # Create script to execute j-job $HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_INITIALIZE diff --git a/test/atm/global-workflow/jjob_ens_letkf.sh b/test/atm/global-workflow/jjob_ens_letkf.sh index 78cff5c70..6df3e4516 100755 --- a/test/atm/global-workflow/jjob_ens_letkf.sh +++ b/test/atm/global-workflow/jjob_ens_letkf.sh @@ -30,25 +30,16 @@ export COMROOT=$DATAROOT export NMEM_ENS=3 export ACCOUNT=da-cpu +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Set lobsdiag_forenkf=.false. to run letkf as combined observer and solver job cp $EXPDIR/config.base_lobsdiag_forenkf_false $EXPDIR/config.base @@ -56,7 +47,7 @@ cp $EXPDIR/config.base_lobsdiag_forenkf_false $EXPDIR/config.base # Create yaml with job configuration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -70,11 +61,12 @@ EOF # Create script to execute j-job $HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_LETKF diff --git a/test/atm/global-workflow/jjob_ens_obs.sh b/test/atm/global-workflow/jjob_ens_obs.sh index 8001a0a62..eb33de115 100755 --- a/test/atm/global-workflow/jjob_ens_obs.sh +++ b/test/atm/global-workflow/jjob_ens_obs.sh @@ -30,25 +30,16 @@ export COMROOT=$DATAROOT export NMEM_ENS=3 export ACCOUNT=da-cpu +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Set lobsdiag_forenkf=.true. to run letkf as stand-alone observer job cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base @@ -56,7 +47,7 @@ cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base # Create yaml with job configuration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -70,11 +61,12 @@ EOF # Create script to execute j-job $HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_OBS diff --git a/test/atm/global-workflow/jjob_ens_sol.sh b/test/atm/global-workflow/jjob_ens_sol.sh index 6f860a584..ea008cf50 100755 --- a/test/atm/global-workflow/jjob_ens_sol.sh +++ b/test/atm/global-workflow/jjob_ens_sol.sh @@ -30,25 +30,16 @@ export COMROOT=$DATAROOT export NMEM_ENS=3 export ACCOUNT=da-cpu +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Set lobsdiag_forenkf=.true. to run letkf as stand-alone solver job cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base @@ -56,7 +47,7 @@ cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base # Create yaml with job configuration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -70,11 +61,12 @@ EOF # Create script to execute j-job $HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_SOL diff --git a/test/atm/global-workflow/jjob_var_final.sh b/test/atm/global-workflow/jjob_var_final.sh index e2b4f1bf0..b73da7d53 100755 --- a/test/atm/global-workflow/jjob_var_final.sh +++ b/test/atm/global-workflow/jjob_var_final.sh @@ -27,30 +27,21 @@ export COMROOT=$DATAROOT export NMEM_ENS=0 export ACCOUNT=da-cpu +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Create yaml with job configuration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -64,11 +55,12 @@ EOF # Create script to execute j-job $HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FINALIZE diff --git a/test/atm/global-workflow/jjob_var_inc.sh b/test/atm/global-workflow/jjob_var_inc.sh index f1a5cb38b..fcc07d0fb 100755 --- a/test/atm/global-workflow/jjob_var_inc.sh +++ b/test/atm/global-workflow/jjob_var_inc.sh @@ -27,30 +27,21 @@ export COMROOT=$DATAROOT export NMEM_ENS=0 export ACCOUNT=da-cpu +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Create yaml with job confiugration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -64,11 +55,12 @@ EOF # Create script to execute j-job $HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FV3_INCREMENT diff --git a/test/atm/global-workflow/jjob_var_init.sh b/test/atm/global-workflow/jjob_var_init.sh index 3febabf99..96c2d3aaa 100755 --- a/test/atm/global-workflow/jjob_var_init.sh +++ b/test/atm/global-workflow/jjob_var_init.sh @@ -31,25 +31,16 @@ export STRICT="NO" source "${HOMEgfs}/ush/preamble.sh" source "${HOMEgfs}/dev/parm/config/gfs/config.com" +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Set date variables for previous cycle GDATE=`date +%Y%m%d%H -d "${CDATE:0:8} ${CDATE:8:2} - 6 hours"` @@ -84,7 +75,6 @@ for file in $flist; do ln -fs $GDASAPP_TESTDATA/lowres/$dpath/$gprefix.${file} $COMIN_ATMOS_ANALYSIS_PREV/$gprefix.${file} done - # Link atmospheric history on gaussian grid dpath=gdas.$gPDY/$gcyc/model/atmos/history mkdir -p $COMIN_ATMOS_HISTORY_PREV @@ -125,7 +115,7 @@ done # Create yaml with job configuration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -139,11 +129,12 @@ EOF # Create script to execute j-job $HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_INITIALIZE diff --git a/test/atm/global-workflow/jjob_var_run.sh b/test/atm/global-workflow/jjob_var_run.sh index a5aa85134..e494292e5 100755 --- a/test/atm/global-workflow/jjob_var_run.sh +++ b/test/atm/global-workflow/jjob_var_run.sh @@ -1,6 +1,7 @@ #! /usr/bin/env bash set -x + bindir=$1 srcdir=$2 @@ -30,30 +31,21 @@ export COMROOT=$DATAROOT export NMEM_ENS=0 export ACCOUNT=da-cpu +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" + # Set python path for workflow utilities and tasks wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" export PYTHONPATH -# Detemine machine from config.base -machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') - -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $machine = 'HERA' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 -fi +# Export library path +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Create yaml with job configuration config_yaml="./config_${type}.yaml" cat < ${config_yaml} -machine: ${machine} +machine: ${MACHINE_ID} homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" @@ -65,13 +57,14 @@ command: ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_VARIATIONAL filename: submit_${type}.sh EOF -# Create script to execute j-job -$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +# Create script to execute j-job. Set job scheduler +${HOMEgfs}/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') # Submit script to execute j-job -if [[ $machine = 'HERA' || $machine = 'ORION' || $machine = 'HERCULES' || $machine = 'URSA' || $machine = 'GAEAC6' ]]; then +if [[ $SCHEDULER = 'slurm' ]]; then sbatch --export=ALL --wait submit_${type}.sh -elif [[ $machine = 'WCOSS2' ]]; then +elif [[ $SCHEDULER = 'pbspro' ]]; then qsub -V -W block=true submit_${type}.sh else ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_VARIATIONAL diff --git a/test/workflow/generate_job_script.py b/test/workflow/generate_job_script.py index 4a69a77e5..b06147c30 100755 --- a/test/workflow/generate_job_script.py +++ b/test/workflow/generate_job_script.py @@ -68,35 +68,21 @@ def main(): sys.exit(1) job_config_file = sys.argv[1] - with open(job_config_file, 'r') as f: job_config = yaml.safe_load(f) - print(f" ") - print(f"job_config {job_config}") - print(f" ") - homegfs = job_config.get('homegfs') machine = job_config.get('machine') - print(f" ") - print(f"homegfs {homegfs}") - print(f"machine {machine}") - print(f" ") - - machine_config_file = os.path.join(homegfs, "sorc/gdas.cd/test/workflow/hosts/") + machine.lower() + ".yaml" - - print(f" ") - print(f"machine_config_file {machine_config_file}") - print(f" ") + machine_config_file = ( + os.path.join(homegfs, "sorc/gdas.cd/test/workflow/hosts/") + + machine.lower() + + ".yaml" + ) with open(machine_config_file, 'r') as f: machine_config = yaml.safe_load(f) - print(f" ") - print(f"machine_config {machine_config}") - print(f" ") - create_job_script(job_config, machine_config) diff --git a/test/workflow/hosts/orion.yaml b/test/workflow/hosts/orion.yaml index 3ba436eac..c475ae542 100644 --- a/test/workflow/hosts/orion.yaml +++ b/test/workflow/hosts/orion.yaml @@ -5,4 +5,4 @@ PARTITION_BATCH: orion PARTITION_SERVICE: service # HPC account information -HPC_ACCOUNT: GFS-DEV +HPC_ACCOUNT: da-cpu From 479a0f44d141f8f5836d41aef038c239036ae3f5 Mon Sep 17 00:00:00 2001 From: "russ.treadon" Date: Thu, 26 Jun 2025 15:17:59 +0000 Subject: [PATCH 04/17] resolve pynorm error (#1762) --- test/workflow/generate_job_script.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/workflow/generate_job_script.py b/test/workflow/generate_job_script.py index b06147c30..052c6e643 100755 --- a/test/workflow/generate_job_script.py +++ b/test/workflow/generate_job_script.py @@ -74,11 +74,7 @@ def main(): homegfs = job_config.get('homegfs') machine = job_config.get('machine') - machine_config_file = ( - os.path.join(homegfs, "sorc/gdas.cd/test/workflow/hosts/") - + machine.lower() - + ".yaml" - ) + machine_config_file = os.path.join(homegfs, "sorc/gdas.cd/test/workflow/hosts/") + machine.lower() + ".yaml" with open(machine_config_file, 'r') as f: machine_config = yaml.safe_load(f) From 174ca537e04783e2470e11ab3fffa51701c9d9d7 Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Mon, 7 Jul 2025 20:11:20 +0000 Subject: [PATCH 05/17] specify 6 tasks per node for jjob_ens_sol and jjob_ens_inc (#1762) --- test/atm/global-workflow/jjob_ens_inc.sh | 2 +- test/atm/global-workflow/jjob_ens_sol.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/atm/global-workflow/jjob_ens_inc.sh b/test/atm/global-workflow/jjob_ens_inc.sh index e17899bb9..8c6dbe191 100755 --- a/test/atm/global-workflow/jjob_ens_inc.sh +++ b/test/atm/global-workflow/jjob_ens_inc.sh @@ -46,7 +46,7 @@ homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" nodes: 1 -ntasks_per_node: 1 +ntasks_per_node: 6 threads_per_task: 1 memory: 8Gb command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FV3_INCREMENT diff --git a/test/atm/global-workflow/jjob_ens_sol.sh b/test/atm/global-workflow/jjob_ens_sol.sh index ea008cf50..b77d49745 100755 --- a/test/atm/global-workflow/jjob_ens_sol.sh +++ b/test/atm/global-workflow/jjob_ens_sol.sh @@ -52,7 +52,7 @@ homegfs: ${HOMEgfs} job_name: ${type} walltime: "00:30:00" nodes: 1 -ntasks_per_node: 1 +ntasks_per_node: 6 threads_per_task: 1 memory: 8Gb command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_SOL From 1f1cdeb3b0285d67b24758a5a06b4ded5171ba62 Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Tue, 8 Jul 2025 09:42:39 -0400 Subject: [PATCH 06/17] add logic to set --mem=0 in ctest batch scripts on Gaea C6 (#1762) --- test/atm/global-workflow/jjob_ens_final.sh | 6 +++++- test/atm/global-workflow/jjob_ens_inc.sh | 6 +++++- test/atm/global-workflow/jjob_ens_init.sh | 6 +++++- test/atm/global-workflow/jjob_ens_init_split.sh | 6 +++++- test/atm/global-workflow/jjob_ens_letkf.sh | 6 +++++- test/atm/global-workflow/jjob_ens_obs.sh | 6 +++++- test/atm/global-workflow/jjob_ens_sol.sh | 6 +++++- test/atm/global-workflow/jjob_var_final.sh | 6 +++++- test/atm/global-workflow/jjob_var_inc.sh | 6 +++++- test/atm/global-workflow/jjob_var_init.sh | 6 +++++- test/atm/global-workflow/jjob_var_run.sh | 6 +++++- 11 files changed, 55 insertions(+), 11 deletions(-) diff --git a/test/atm/global-workflow/jjob_ens_final.sh b/test/atm/global-workflow/jjob_ens_final.sh index ef4ca0909..2722de931 100755 --- a/test/atm/global-workflow/jjob_ens_final.sh +++ b/test/atm/global-workflow/jjob_ens_final.sh @@ -39,6 +39,10 @@ export PYTHONPATH export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Create yaml with job configuration +memory="8Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -48,7 +52,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 1 threads_per_task: 1 -memory: 8Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FINALIZE filename: submit_${type}.sh EOF diff --git a/test/atm/global-workflow/jjob_ens_inc.sh b/test/atm/global-workflow/jjob_ens_inc.sh index 8c6dbe191..45e800e8c 100755 --- a/test/atm/global-workflow/jjob_ens_inc.sh +++ b/test/atm/global-workflow/jjob_ens_inc.sh @@ -39,6 +39,10 @@ export PYTHONPATH export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Create yaml with job configuration +memory="8Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -48,7 +52,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 6 threads_per_task: 1 -memory: 8Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_FV3_INCREMENT filename: submit_${type}.sh EOF diff --git a/test/atm/global-workflow/jjob_ens_init.sh b/test/atm/global-workflow/jjob_ens_init.sh index ac7bf76c5..0f0a9cf7a 100755 --- a/test/atm/global-workflow/jjob_ens_init.sh +++ b/test/atm/global-workflow/jjob_ens_init.sh @@ -104,6 +104,10 @@ done cp $EXPDIR/config.base_lobsdiag_forenkf_false $EXPDIR/config.base # Create yaml with job configuration +memory="8Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -113,7 +117,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 1 threads_per_task: 1 -memory: 8Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_INITIALIZE filename: submit_${type}.sh EOF diff --git a/test/atm/global-workflow/jjob_ens_init_split.sh b/test/atm/global-workflow/jjob_ens_init_split.sh index edaddd011..5cdb99157 100755 --- a/test/atm/global-workflow/jjob_ens_init_split.sh +++ b/test/atm/global-workflow/jjob_ens_init_split.sh @@ -107,6 +107,10 @@ done cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base # Create yaml with job configuration +memory="8Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -116,7 +120,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 1 threads_per_task: 1 -memory: 8Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_INITIALIZE filename: submit_${type}.sh EOF diff --git a/test/atm/global-workflow/jjob_ens_letkf.sh b/test/atm/global-workflow/jjob_ens_letkf.sh index 6df3e4516..49d39e686 100755 --- a/test/atm/global-workflow/jjob_ens_letkf.sh +++ b/test/atm/global-workflow/jjob_ens_letkf.sh @@ -45,6 +45,10 @@ export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" cp $EXPDIR/config.base_lobsdiag_forenkf_false $EXPDIR/config.base # Create yaml with job configuration +memory="32Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -54,7 +58,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 6 threads_per_task: 1 -memory: 32Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_LETKF filename: submit_${type}.sh EOF diff --git a/test/atm/global-workflow/jjob_ens_obs.sh b/test/atm/global-workflow/jjob_ens_obs.sh index eb33de115..993d5fc3b 100755 --- a/test/atm/global-workflow/jjob_ens_obs.sh +++ b/test/atm/global-workflow/jjob_ens_obs.sh @@ -45,6 +45,10 @@ export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base # Create yaml with job configuration +memory="32Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -54,7 +58,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 6 threads_per_task: 1 -memory: 32Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_OBS filename: submit_${type}.sh EOF diff --git a/test/atm/global-workflow/jjob_ens_sol.sh b/test/atm/global-workflow/jjob_ens_sol.sh index b77d49745..d70eb8b3a 100755 --- a/test/atm/global-workflow/jjob_ens_sol.sh +++ b/test/atm/global-workflow/jjob_ens_sol.sh @@ -45,6 +45,10 @@ export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" cp $EXPDIR/config.base_lobsdiag_forenkf_true $EXPDIR/config.base # Create yaml with job configuration +memory="8Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -54,7 +58,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 6 threads_per_task: 1 -memory: 8Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATMENS_ANALYSIS_SOL filename: submit_${type}.sh EOF diff --git a/test/atm/global-workflow/jjob_var_final.sh b/test/atm/global-workflow/jjob_var_final.sh index b73da7d53..317f17e99 100755 --- a/test/atm/global-workflow/jjob_var_final.sh +++ b/test/atm/global-workflow/jjob_var_final.sh @@ -39,6 +39,10 @@ export PYTHONPATH export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Create yaml with job configuration +memory="8Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -48,7 +52,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 1 threads_per_task: 1 -memory: 8Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FINALIZE filename: submit_${type}.sh EOF diff --git a/test/atm/global-workflow/jjob_var_inc.sh b/test/atm/global-workflow/jjob_var_inc.sh index fcc07d0fb..be518f0cd 100755 --- a/test/atm/global-workflow/jjob_var_inc.sh +++ b/test/atm/global-workflow/jjob_var_inc.sh @@ -39,6 +39,10 @@ export PYTHONPATH export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Create yaml with job confiugration +memory="8Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -48,7 +52,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 6 threads_per_task: 1 -memory: 8Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FV3_INCREMENT filename: submit_${type}.sh EOF diff --git a/test/atm/global-workflow/jjob_var_init.sh b/test/atm/global-workflow/jjob_var_init.sh index 96c2d3aaa..c27e14658 100755 --- a/test/atm/global-workflow/jjob_var_init.sh +++ b/test/atm/global-workflow/jjob_var_init.sh @@ -113,6 +113,10 @@ for imem in $(seq 1 $NMEM_ENS); do done # Create yaml with job configuration +memory="8Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -122,7 +126,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 1 threads_per_task: 1 -memory: 8Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_INITIALIZE filename: submit_${type}.sh EOF diff --git a/test/atm/global-workflow/jjob_var_run.sh b/test/atm/global-workflow/jjob_var_run.sh index e494292e5..f3b3e6725 100755 --- a/test/atm/global-workflow/jjob_var_run.sh +++ b/test/atm/global-workflow/jjob_var_run.sh @@ -43,6 +43,10 @@ export PYTHONPATH export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" # Create yaml with job configuration +memory="96Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} @@ -52,7 +56,7 @@ walltime: "00:30:00" nodes: 1 ntasks_per_node: 6 threads_per_task: 1 -memory: 96Gb +memory: ${memory} command: ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_VARIATIONAL filename: submit_${type}.sh EOF From 9c6f4adc6b13e58509c97487cb48bc00bb68d658 Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Tue, 8 Jul 2025 13:59:11 -0400 Subject: [PATCH 07/17] set HPC_ACCOUNT to ira-sti for gw-ci on gaeac6 (#1762) --- test/gw-ci/create_exp.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/gw-ci/create_exp.sh b/test/gw-ci/create_exp.sh index 891229b85..102de618c 100755 --- a/test/gw-ci/create_exp.sh +++ b/test/gw-ci/create_exp.sh @@ -15,6 +15,8 @@ export RUNTESTS=${exp_path}/${pslot} export HPC_ACCOUNT="da-cpu" if [[ $MACHINE_ID = wcoss2 ]]; then export HPC_ACCOUNT="GFS-DEV" +elif [[ $MACHINE_ID = gaeac6 ]]; then + export HPC_ACCOUNT="ira-sti" fi # Source the gw environement From f2a8da2e18754481a83bf3dce9088e4608fb1a9d Mon Sep 17 00:00:00 2001 From: Russ-Treadon-NOAA Date: Wed, 9 Jul 2025 19:31:33 +0000 Subject: [PATCH 08/17] remove slurm assumption for snow ctests (#1762) --- test/snow/apply_jedi_incr.sh | 54 ++++++++++++++++++++++++++++++++++-- test/snow/test_imsproc.sh | 9 ++++++ 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/test/snow/apply_jedi_incr.sh b/test/snow/apply_jedi_incr.sh index 60413c58d..91b4fec0f 100755 --- a/test/snow/apply_jedi_incr.sh +++ b/test/snow/apply_jedi_incr.sh @@ -18,6 +18,10 @@ EXECDIR=$project_source_dir/build/bin WORKDIR=$project_binary_dir/test/snow/apply_jedi_incr RSTDIR=$GDASAPP_TESTDATA/lowres/gdas.$GYMD/$GHR/model/atmos/restart INCDIR=$GDASAPP_TESTDATA/snow/C${RES} +HOMEgfs=$project_source_dir/../../ + +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" export TPATH="$GDASAPP_TESTDATA/snow/C${RES}" export TSTUB="C${RES}_oro_data" @@ -77,9 +81,55 @@ done echo 'do_snowDA: calling apply snow increment' -# (n=6) -> this is fixed, at one task per tile (with minor code change, could run on a single proc). -srun '--export=ALL' -n 6 ${EXECDIR}/apply_incr.exe ${WORKDIR}/apply_incr.log +# Create script to run executable +runsh="./apply_incr.sh" +cat < $runsh +#!/bin/bash +set -ex +# Set APRUN for machine +APRUN="srun -n 6" +if [[ ${MACHINE_ID} == 'wcoss' ]]; then + APRUN="mpiexec -n 6" +fi + +# Run executable +$APRUN ${EXECDIR}/apply_incr.exe ${WORKDIR}/apply_incr.log +EOF +chmod 755 $runsh +# Create yaml with job configuration +memory="8Gb" +if [[ ${MACHINE_ID} == "gaeac6" ]]; then + memory=0 +fi +submitsh="./submit.sh" +config_yaml="./config.yaml" +cat < ${config_yaml} +machine: ${MACHINE_ID} +homegfs: ${HOMEgfs} +job_name: ${type} +walltime: "00:30:00" +nodes: 1 +ntasks_per_node: 6 +threads_per_task: 1 +memory: ${memory} +command: ${runsh} +filename: ${submitsh} +EOF + + +# Create submission script +$HOMEgfs/sorc/gdas.cd/test/workflow/generate_job_script.py ${config_yaml} +SCHEDULER=$(echo `grep SCHEDULER ${HOMEgfs}/sorc/gdas.cd/test/workflow/hosts/${MACHINE_ID}.yaml | cut -d":" -f2` | tr -d ' ') + +# Submit script +if [[ $SCHEDULER = 'slurm' ]]; then + sbatch --export=ALL --wait ${submitsh} +elif [[ $SCHEDULER = 'pbspro' ]]; then + qsub -V -W block=true ${submitsh} +else + echo "UNKOWN SCHEDULER $SCHEDULER" +fi rc=$? exit $rc diff --git a/test/snow/test_imsproc.sh b/test/snow/test_imsproc.sh index 01cd6ec61..83d7f493d 100755 --- a/test/snow/test_imsproc.sh +++ b/test/snow/test_imsproc.sh @@ -18,6 +18,10 @@ DOY=$(date +%j -d "$YY$MM$DD + 1 day") EXECDIR=$project_source_dir/build/bin WORKDIR=$project_binary_dir/test/snow/ims_proc RSTDIR=$GDASAPP_TESTDATA/lowres/gdas.$GYMD/$GHR/model/atmos/restart +HOMEgfs=$project_source_dir/../.. + +# Detect machine +source "${HOMEgfs}/ush/detect_machine.sh" export OBSDIR=$GDASAPP_TESTDATA/snow/snow_ice_cover export TSTUB="oro_C${RES}.mx100" @@ -59,6 +63,11 @@ ${EXECDIR}/calcfIMS.exe export PYTHONPATH=$PYTHONPATH:${project_source_dir}/iodaconv/src/:${project_source_dir}/build/lib/python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR} IMS_IODA=${EXECDIR}/imsfv3_scf2ioda.py +# TODO: Remove LD_LIBRARY_PATH line as soon as permanent solution is available +if [[ ${MACHINE_ID} == 'wcoss2' ]]; then + export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/cray/pe/mpich/8.1.19/ofi/intel/19.0/lib" +fi + echo 'do_snowDA: calling ioda converter' python ${IMS_IODA} -i IMSscf.${YY}${MM}${DD}.${TSTUB}.nc -o ioda.IMSscf.${YY}${MM}${DD}.${TSTUB}.nc From 4f30c58e1ede1bef7869aaf37c10519e346b485e Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Tue, 15 Jul 2025 14:29:31 -0400 Subject: [PATCH 09/17] add machine_id, add cluster to Gaea C6 submit script (#1762) --- test/workflow/generate_job_script.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/test/workflow/generate_job_script.py b/test/workflow/generate_job_script.py index 052c6e643..3d0672bfb 100755 --- a/test/workflow/generate_job_script.py +++ b/test/workflow/generate_job_script.py @@ -10,6 +10,8 @@ def create_job_script(job_config, machine_config): account = machine_config.get('HPC_ACCOUNT') queue = machine_config.get('QUEUE') partition = machine_config.get('PARTITION_BATCH', 'none') + cluster = machine_config.get('CLUSTERS', 'none') + machine_id = machine_config.get('MACHINE_ID', 'none') job_name = job_config.get('job_name', 'myjob') walltime = job_config.get('walltime', '01:00:00') nodes = job_config.get('nodes', 1) @@ -35,7 +37,27 @@ def create_job_script(job_config, machine_config): {command} """ elif scheduler == 'slurm': - script = f"""#!/bin/bash + if machine_id == 'gaeac6': + script = f"""#!/bin/bash +#SBATCH -J {job_name} +#SBATCH -o {job_name}.o%J +#SBATCH -e {job_name}.o%J +#SBATCH -A {account} +#SBATCH -q {queue} +#SBATCH -p {partition} +#SBATCH -M {cluster} +#SBATCH -t {walltime} +#SBATCH --nodes={nodes} +#SBATCH --ntasks-per-node={ntasks_per_node} +#SBATCH --cpus-per-task={threads_per_task} +#SBATCH --mem={memory} + +set -x +cd $SLURM_SUBMIT_DIR +{command} +""" + else: + script = f"""#!/bin/bash #SBATCH -J {job_name} #SBATCH -o {job_name}.o%J #SBATCH -e {job_name}.o%J @@ -79,6 +101,9 @@ def main(): with open(machine_config_file, 'r') as f: machine_config = yaml.safe_load(f) + machine_config["MACHINE_ID"] = machine.lower() + print(f"machine_config {machine_config}") + create_job_script(job_config, machine_config) From 52317b3137b9ea2b4a20c6e8ee8c50e1fed2515a Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Tue, 15 Jul 2025 14:39:06 -0400 Subject: [PATCH 10/17] remove extraneous white space from generate_job_script.py (#1762) --- test/workflow/generate_job_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/workflow/generate_job_script.py b/test/workflow/generate_job_script.py index 3d0672bfb..b65351906 100755 --- a/test/workflow/generate_job_script.py +++ b/test/workflow/generate_job_script.py @@ -103,7 +103,7 @@ def main(): machine_config["MACHINE_ID"] = machine.lower() print(f"machine_config {machine_config}") - + create_job_script(job_config, machine_config) From 17caebc3f2d85314c89be1b77eb4cdbe5b3987b9 Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Wed, 16 Jul 2025 15:41:23 +0000 Subject: [PATCH 11/17] replace CDATE with current_cycle in soca yaml templates (#1783) --- parm/soca/soca_det_files_stage.yaml.j2 | 2 +- parm/soca/soca_det_finalize.yaml.j2 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/parm/soca/soca_det_files_stage.yaml.j2 b/parm/soca/soca_det_files_stage.yaml.j2 index b7b08dfb4..6c5d3fc2f 100644 --- a/parm/soca/soca_det_files_stage.yaml.j2 +++ b/parm/soca/soca_det_files_stage.yaml.j2 @@ -1,7 +1,7 @@ {% if DOIAU %} {% set rst_date = MARINE_WINDOW_BEGIN | to_fv3time %} {% else %} -{% set rst_date = CDATE | to_fv3time %} +{% set rst_date = current_cycle | to_fv3time %} {% endif %} mkdir: diff --git a/parm/soca/soca_det_finalize.yaml.j2 b/parm/soca/soca_det_finalize.yaml.j2 index 93b886d17..000943d68 100644 --- a/parm/soca/soca_det_finalize.yaml.j2 +++ b/parm/soca/soca_det_finalize.yaml.j2 @@ -2,7 +2,7 @@ {% if DOIAU %} {% set cice_rst_date = MARINE_WINDOW_BEGIN | to_fv3time %} {% else %} -{% set cice_rst_date = CDATE | to_fv3time %} +{% set cice_rst_date = current_cycle | to_fv3time %} {% endif %} mkdir: From 8c9965be5515c1804e14d1b8a96c7e9b78ab1231 Mon Sep 17 00:00:00 2001 From: Russ-Treadon-NOAA Date: Thu, 17 Jul 2025 18:28:59 +0000 Subject: [PATCH 12/17] update rocoto load for wcoss2 (#1762) --- modulefiles/GDAS/wcoss2.intel.lua | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modulefiles/GDAS/wcoss2.intel.lua b/modulefiles/GDAS/wcoss2.intel.lua index 23f7d11b9..598e32bc2 100644 --- a/modulefiles/GDAS/wcoss2.intel.lua +++ b/modulefiles/GDAS/wcoss2.intel.lua @@ -52,7 +52,6 @@ load("libxaw/1.0.13") load("udunits/2.2.28") load("ncview/2.1.9") load("netcdf-cxx4/4.3.1") -load("core/rocoto/1.3.5") load("prod_util/2.0.14") load("py-setuptools/63.4.3") @@ -76,6 +75,9 @@ load("py-markupsafe/2.1.3") load("py-cftime/1.0.3.4") load("py-packaging/23.1") +prepend_path("MODULEPATH", "/apps/ops/test/nco/modulefiles/core") +load("rocoto/1.3.5") + setenv("CC","cc") setenv("CXX","CC") setenv("FC","ftn") From d9cca29ef095a59281756293d29b539ea0e29844 Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Thu, 17 Jul 2025 14:41:02 -0400 Subject: [PATCH 13/17] miscellaneous clean up to gaeac6 ctest job configuration (#1762) --- modulefiles/GDAS/gaeac6.intel.lua | 8 ++++++-- test/gw-ci/create_exp.sh | 2 +- test/snow/apply_jedi_incr.sh | 2 +- test/workflow/hosts/gaeac6.yaml | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/modulefiles/GDAS/gaeac6.intel.lua b/modulefiles/GDAS/gaeac6.intel.lua index b156f7409..f8a620e27 100644 --- a/modulefiles/GDAS/gaeac6.intel.lua +++ b/modulefiles/GDAS/gaeac6.intel.lua @@ -8,7 +8,6 @@ local pkgNameVer = myModuleFullName() prepend_path("MODULEPATH", '/ncrc/proj/epic/spack-stack/c6/spack-stack-1.9.2/envs/ue-intel-2023.2.0/install/modulefiles/Core') prepend_path("MODULEPATH", '/ncrc/proj/epic/spack-stack/c6/spack-stack-1.9.2/envs/ue-intel-2023.2.0/install/modulefiles/gcc/12.3.0') -prepend_path("MODULEPATH", '/ncrc/proj/epic/rocoto/modulefiles') -- below two lines get us access to the spack-stack modules load("stack-intel/2023.2.0") @@ -58,7 +57,6 @@ load("ncview/2.1.9") load("netcdf-cxx4/4.3.1") load("json/3.11.3") --load("crtm/2.4.0.1") -load("rocoto/1.3.6") load("prod_util/2.1.1") load("py-jinja2/3.1.4") @@ -73,6 +71,12 @@ load("py-pip/23.1.2") unload("cray-libsci") +prepend_path("MODULEPATH", "/autofs/ncrc-svm1_proj/hurr1/hafs/shared/modulefiles") +load("rocoto/1.3.7_fix") + +append_path("MODULEPATH", "/usw/hpss/modulefiles") +load("hsi/9.3") + setenv("CC","cc") setenv("CXX","CC") setenv("FC","ftn") diff --git a/test/gw-ci/create_exp.sh b/test/gw-ci/create_exp.sh index 102de618c..9093fbe40 100755 --- a/test/gw-ci/create_exp.sh +++ b/test/gw-ci/create_exp.sh @@ -16,7 +16,7 @@ export HPC_ACCOUNT="da-cpu" if [[ $MACHINE_ID = wcoss2 ]]; then export HPC_ACCOUNT="GFS-DEV" elif [[ $MACHINE_ID = gaeac6 ]]; then - export HPC_ACCOUNT="ira-sti" + export HPC_ACCOUNT="gfs-cpu" fi # Source the gw environement diff --git a/test/snow/apply_jedi_incr.sh b/test/snow/apply_jedi_incr.sh index 91b4fec0f..79b011ada 100755 --- a/test/snow/apply_jedi_incr.sh +++ b/test/snow/apply_jedi_incr.sh @@ -107,7 +107,7 @@ config_yaml="./config.yaml" cat < ${config_yaml} machine: ${MACHINE_ID} homegfs: ${HOMEgfs} -job_name: ${type} +job_name: apply_jedi_incr walltime: "00:30:00" nodes: 1 ntasks_per_node: 6 diff --git a/test/workflow/hosts/gaeac6.yaml b/test/workflow/hosts/gaeac6.yaml index 577e1a942..64452e7ea 100644 --- a/test/workflow/hosts/gaeac6.yaml +++ b/test/workflow/hosts/gaeac6.yaml @@ -16,4 +16,4 @@ PARTITION_CRON: 'cron_c6' USE_SCRONTAB: 'YES' # HPC account information -HPC_ACCOUNT: ira-sti \ No newline at end of file +HPC_ACCOUNT: gfs-cpu From c95beb78300a483f821adef64212c816fca28a16 Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Mon, 21 Jul 2025 14:37:48 -0400 Subject: [PATCH 14/17] rewind and reboot single task for g-w ci (#1762) --- test/gw-ci/run_exp.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/gw-ci/run_exp.sh b/test/gw-ci/run_exp.sh index 3c02dbb1a..8597ba63f 100755 --- a/test/gw-ci/run_exp.sh +++ b/test/gw-ci/run_exp.sh @@ -13,9 +13,9 @@ WORKFLOW_DB=${pslot}/EXPDIR/${pslot}/${pslot}.db # Boot the task echo "booting ${TASK_ARRAY[@]} for cycle $CYCLE" if [[ ! -e "$WORKFLOW_DB" ]]; then - rocotorun -w "$WORKFLOW_XML" -d "$WORKFLOW_DB" -t "$task_args" -c "$CYCLE" + rocotorun -w "$WORKFLOW_XML" -d "$WORKFLOW_DB" -t "$task_args" -c "$CYCLE" fi -rocotoboot -w "$WORKFLOW_XML" -d "$WORKFLOW_DB" -t "$task_args" -c "$CYCLE" +rocotorun -w "$WORKFLOW_XML" -d "$WORKFLOW_DB" -t "$task_args" -c "$CYCLE" # Loop through tasks IFS=',' read -r -a TASK_ARRAY <<< "$task_args" @@ -23,7 +23,7 @@ num_tasks=${#TASK_ARRAY[@]} while true; do # Update the status of the task rocotorun -w "$WORKFLOW_XML" -d "$WORKFLOW_DB" -t "$task_args" -c "$CYCLE" - + num_succeeded=0 for task in "${TASK_ARRAY[@]}"; do @@ -44,8 +44,8 @@ while true; do echo "$pslot"_"$task"_"$CYCLE"" is in state: $STATUS" else echo "$pslot"_"$task"_"$CYCLE"" is in unrecognized state: $STATUS. Rewinding..." - rocotorewind -w "$WORKFLOW_XML" -d "$WORKFLOW_DB" -t "$task_args" -c "$CYCLE" - rocotoboot -w "$WORKFLOW_XML" -d "$WORKFLOW_DB" -t "$task_args" -c "$CYCLE" + rocotorewind -w "$WORKFLOW_XML" -d "$WORKFLOW_DB" -t "$task" -c "$CYCLE" + rocotoboot -w "$WORKFLOW_XML" -d "$WORKFLOW_DB" -t "$task" -c "$CYCLE" fi done if [[ "$num_succeeded" == "$num_tasks" ]]; then From f93c6f70c37fd200193423543bf760e6240cd669 Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Wed, 23 Jul 2025 07:35:48 -0400 Subject: [PATCH 15/17] remove extraneous text from jjob_ens_final.sh (#1762) --- test/atm/global-workflow/jjob_ens_final.sh | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/test/atm/global-workflow/jjob_ens_final.sh b/test/atm/global-workflow/jjob_ens_final.sh index cab06b85d..7f3ee1071 100755 --- a/test/atm/global-workflow/jjob_ens_final.sh +++ b/test/atm/global-workflow/jjob_ens_final.sh @@ -46,18 +46,6 @@ export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${HOMEgfs}/lib" memory="8Gb" if [[ ${MACHINE_ID} == "gaeac6" ]]; then memory=0 -======= -# Set NETCDF and UTILROOT variables (used in config.base) -if [[ $MACHINE_ID = 'hera' ]]; then - NETCDF=$( which ncdump ) - export NETCDF - export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" -elif [[ $MACHINE_ID = 'orion' || $MACHINE_ID = 'hercules' ]]; then - ncdump=$( which ncdump ) - NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) - export NETCDF - export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 ->>>>>>> develop fi config_yaml="./config_${type}.yaml" cat < ${config_yaml} From 2f9d1ada74a1136499f3a32ca4fe27cbf2fa8ef4 Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Wed, 23 Jul 2025 08:24:35 -0400 Subject: [PATCH 16/17] comment out utils ctests for ghrsst2ioda and viirsaod2ioda (#1762) --- utils/test/CMakeLists.txt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/utils/test/CMakeLists.txt b/utils/test/CMakeLists.txt index e196c1c16..a74bea39c 100644 --- a/utils/test/CMakeLists.txt +++ b/utils/test/CMakeLists.txt @@ -2,7 +2,7 @@ list( APPEND utils_test_input testinput/gdas_meanioda.yaml testinput/gdas_rads2ioda.yaml - testinput/gdas_ghrsst2ioda.yaml +# testinput/gdas_ghrsst2ioda.yaml testinput/gdas_rtofstmp.yaml testinput/gdas_rtofssal.yaml testinput/gdas_smap2ioda.yaml @@ -12,12 +12,12 @@ list( APPEND utils_test_input testinput/gdas_icecmirs2ioda.yaml testinput/gdas_icecjpssrr2ioda.yaml testinput/gdas_insituall2ioda.yaml - testinput/gdas_viirsaod2ioda.yaml +# testinput/gdas_viirsaod2ioda.yaml ) set( gdas_utils_test_ref testref/rads2ioda.test - testref/ghrsst2ioda.test +# testref/ghrsst2ioda.test testref/rtofstmp.test testref/rtofssal.test testref/smap2ioda.test @@ -27,7 +27,7 @@ set( gdas_utils_test_ref testref/icecmirs2ioda.test testref/icecjpssrr2ioda.test testref/insituall2ioda.test - testref/viirsaod2ioda.test +# testref/viirsaod2ioda.test ) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testinput) @@ -74,12 +74,12 @@ ecbuild_add_test( TARGET test_gdasapp_util_rads2ioda TEST_DEPENDS test_gdasapp_util_prepdata) # Test the GHRSST to IODA converter -ecbuild_add_test( TARGET test_gdasapp_util_ghrsst2ioda - COMMAND ${CMAKE_BINARY_DIR}/bin/gdas_ioda_obsprovider2ioda.x - ARGS "../testinput/gdas_ghrsst2ioda.yaml" - LIBS gdas-utils - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/obsproc - TEST_DEPENDS test_gdasapp_util_prepdata) +#ecbuild_add_test( TARGET test_gdasapp_util_ghrsst2ioda +# COMMAND ${CMAKE_BINARY_DIR}/bin/gdas_ioda_obsprovider2ioda.x +# ARGS "../testinput/gdas_ghrsst2ioda.yaml" +# LIBS gdas-utils +# WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/obsproc +# TEST_DEPENDS test_gdasapp_util_prepdata) # copy rtofs binary input files to the testing area # and generate the tests @@ -146,12 +146,12 @@ ecbuild_add_test( TARGET test_gdasapp_util_smos2ioda TEST_DEPENDS test_gdasapp_util_prepdata) # Test the VIIRS AOD to IODA converter -ecbuild_add_test( TARGET test_gdasapp_util_viirsaod2ioda - COMMAND ${CMAKE_BINARY_DIR}/bin/gdas_ioda_obsprovider2ioda.x - ARGS "../testinput/gdas_viirsaod2ioda.yaml" - LIBS gdas-utils - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/obsproc - TEST_DEPENDS test_gdasapp_util_prepdata) +#ecbuild_add_test( TARGET test_gdasapp_util_viirsaod2ioda +# COMMAND ${CMAKE_BINARY_DIR}/bin/gdas_ioda_obsprovider2ioda.x +# ARGS "../testinput/gdas_viirsaod2ioda.yaml" +# LIBS gdas-utils +# WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/obsproc +# TEST_DEPENDS test_gdasapp_util_prepdata) # Test the ABI to IODA converter ecbuild_add_test( TARGET test_gdasapp_util_icecabi2ioda From 7e50c9af738cc30c70821c341073cb01f0cbc98f Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA Date: Wed, 23 Jul 2025 22:35:56 +0000 Subject: [PATCH 17/17] update apply_jedi_incr.sh to work on hera (#1762) --- test/snow/apply_jedi_incr.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/snow/apply_jedi_incr.sh b/test/snow/apply_jedi_incr.sh index 79b011ada..c32b991a1 100755 --- a/test/snow/apply_jedi_incr.sh +++ b/test/snow/apply_jedi_incr.sh @@ -93,7 +93,7 @@ if [[ ${MACHINE_ID} == 'wcoss' ]]; then fi # Run executable -$APRUN ${EXECDIR}/apply_incr.exe ${WORKDIR}/apply_incr.log +\${APRUN} ${EXECDIR}/apply_incr.exe ${WORKDIR}/apply_incr.log EOF chmod 755 $runsh