Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion FV3
Submodule FV3 updated 2 files
+1 −1 atmos_cubed_sphere
+1 −1 ccpp/physics
2 changes: 1 addition & 1 deletion WW3
57 changes: 57 additions & 0 deletions modulefiles/ufs_common_spack.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
help([[
loads UFS Model common libraries
]])

jasper_ver=os.getenv("jasper_ver") or "2.0.32"
load(pathJoin("jasper", jasper_ver))

zlib_ver=os.getenv("zlib_ver") or "1.2.13"
load(pathJoin("zlib", zlib_ver))

libpng_ver=os.getenv("libpng_ver") or "1.6.37"
load(pathJoin("libpng", libpng_ver))

hdf5_ver=os.getenv("hdf5_ver") or "1.14.0"
load(pathJoin("hdf5", hdf5_ver))

netcdf_ver=os.getenv("netcdf_ver") or "4.9.0"
load(pathJoin("netcdf-c", netcdf_ver))
load(pathJoin("netcdf-fortran", "4.6.0"))

pio_ver=os.getenv("pio_ver") or "2.5.9"
load(pathJoin("parallelio", pio_ver))

esmf_ver=os.getenv("esmf_ver") or "8.3.0b09"
load(pathJoin("esmf", esmf_ver))

fms_ver=os.getenv("fms_ver") or "2022.04"
load(pathJoin("fms",fms_ver))

bacio_ver=os.getenv("bacio_ver") or "2.4.1"
load(pathJoin("bacio", bacio_ver))

crtm_ver=os.getenv("crtm_ver") or "2.4.0"
load(pathJoin("crtm", crtm_ver))

g2_ver=os.getenv("g2_ver") or "3.4.5"
load(pathJoin("g2", g2_ver))

g2tmpl_ver=os.getenv("g2tmpl_ver") or "1.10.2"
load(pathJoin("g2tmpl", g2tmpl_ver))

ip_ver=os.getenv("ip_ver") or "3.3.3"
load(pathJoin("ip", ip_ver))

sp_ver=os.getenv("sp_ver") or "2.3.3"
load(pathJoin("sp", sp_ver))

w3emc_ver=os.getenv("w3emc_ver") or "2.9.2"
load(pathJoin("w3emc", w3emc_ver))

gftl_shared_ver=os.getenv("gftl_shared_ver") or "v1.5.0"
load(pathJoin("gftl-shared", gftl_shared_ver))

mapl_ver=os.getenv("mapl_ver") or "2.22.0-esmf-8.3.0b09"
load(pathJoin("mapl", mapl_ver))

whatis("Description: UFS build environment common libraries")
28 changes: 28 additions & 0 deletions modulefiles/ufs_noaacloud.intel.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
help([[
loads UFS Model prerequisites for NOAA Parallelworks/Intel
]])


prepend_path("MODULEPATH", "/contrib/EPIC/spack-stack/spack-stack-1.3.0/envs/unified-dev/install/modulefiles/Core")

stack_intel_ver=os.getenv("stack_intel_ver") or "2021.3.0"
load(pathJoin("stack-intel", stack_intel_ver))

stack_impi_ver=os.getenv("stack_impi_ver") or "2021.3.0"
load(pathJoin("stack-intel-oneapi-mpi", stack_impi_ver))

cmake_ver=os.getenv("cmake_ver") or "3.23.1"
load(pathJoin("cmake", cmake_ver))

prepend_path("MODULEPATH", "/contrib/spack-stack/modulefiles/core")
stack_python_ver=os.getenv("stack_python_ver") or "3.9.12"
load(pathJoin("stack-python", stack_python_ver))

load("ufs_common_spack")

setenv("CC", "mpiicc")
setenv("CXX", "mpiicpc")
setenv("FC", "mpiifort")
setenv("CMAKE_Platform", "noaacloud.intel")

whatis("Description: UFS build environment")
352 changes: 176 additions & 176 deletions tests/RegressionTests_cheyenne.gnu.log

Large diffs are not rendered by default.

1,065 changes: 532 additions & 533 deletions tests/RegressionTests_cheyenne.intel.log

Large diffs are not rendered by default.

466 changes: 233 additions & 233 deletions tests/RegressionTests_hera.gnu.log

Large diffs are not rendered by default.

1,412 changes: 706 additions & 706 deletions tests/RegressionTests_hera.intel.log

Large diffs are not rendered by default.

31 changes: 30 additions & 1 deletion tests/default_vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,34 @@ elif [[ $MACHINE_ID = stampede.* ]]; then
TPN_cpl_atmw_gdas=12; INPES_cpl_atmw_gdas=6; JNPES_cpl_atmw_gdas=8
THRD_cpl_atmw_gdas=4; WPG_cpl_atmw_gdas=24; APB_cpl_atmw_gdas="0 311"; WPB_cpl_atmw_gdas="312 559"

elif [[ ${MACHINE_ID} = noaacloud.* ]] ; then

if [[ $PW_CSP == aws ]]; then
TPN=36
elif [[ $PW_CSP == azure ]]; then
TPN=44
elif [[ $PW_CSP == google ]]; then
TPN=30
fi

INPES_dflt=3 ; JNPES_dflt=8
INPES_thrd=3 ; JNPES_thrd=4

INPES_c384=8 ; JNPES_c384=6 ; THRD_c384=2
INPES_c768=8 ; JNPES_c768=16 ; THRD_c768=2

THRD_cpl_dflt=1
INPES_cpl_dflt=3; JNPES_cpl_dflt=8; WPG_cpl_dflt=6
OCN_tasks_cpl_dflt=20
ICE_tasks_cpl_dflt=10
WAV_tasks_cpl_dflt=20

THRD_cpl_thrd=2
INPES_cpl_thrd=3; JNPES_cpl_thrd=4; WPG_cpl_thrd=6
OCN_tasks_cpl_thrd=20
ICE_tasks_cpl_thrd=10
WAV_tasks_cpl_thrd=12

elif [[ $MACHINE_ID = expanse.* ]]; then

echo "Unknown MACHINE_ID ${MACHINE_ID}. Please update tasks configurations in default_vars.sh"
Expand All @@ -269,7 +297,8 @@ elif [[ $MACHINE_ID = expanse.* ]]; then

TPN_cpl_atmw_gdas=12; INPES_cpl_atmw_gdas=6; JNPES_cpl_atmw_gdas=8
THRD_cpl_atmw_gdas=2; WPG_cpl_atmw_gdas=24; APB_cpl_atmw_gdas="0 311"; WPB_cpl_atmw_gdas="312 559"



else

echo "Unknown MACHINE_ID ${MACHINE_ID}"
Expand Down
13 changes: 12 additions & 1 deletion tests/detect_machine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,21 @@ case $(hostname -f) in
login2.stampede2.tacc.utexas.edu) MACHINE_ID=stampede ;; ### stampede2
login3.stampede2.tacc.utexas.edu) MACHINE_ID=stampede ;; ### stampede3
login4.stampede2.tacc.utexas.edu) MACHINE_ID=stampede ;; ### stampede4



login01.expanse.sdsc.edu) MACHINE_ID=expanse ;; ### expanse1
login02.expanse.sdsc.edu) MACHINE_ID=expanse ;; ### expanse2

esac

case $(echo $PW_CSP) in

aws) MACHINE_ID=aws ;; ### parallelworks aws
google) MACHINE_ID=gcp ;; ### parallelworks gcp
azure) MACHINE_ID=azure ;; ### parallelworks azure

esac
[[ ${MACHINE_ID} =~ "aws" || ${MACHINE_ID} =~ "gcp" || ${MACHINE_ID} =~ "azure" ]] && MACHINE_ID=noaacloud

# Overwrite auto-detect with RT_MACHINE if set
MACHINE_ID=${RT_MACHINE:-${MACHINE_ID}}
Expand Down
17 changes: 17 additions & 0 deletions tests/fv3_conf/compile_slurm.IN_noaacloud
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/sh
#SBATCH -e err
#SBATCH -o out
#SBATCH --qos=batch
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=@[TPN]
#SBATCH --job-name="@[JBNME]"

set -eux

echo -n " $( date +%s )," > job_timestamp.txt
echo "Compile started: " `date`

@[PATHRT]/compile.sh @[MACHINE_ID] "@[MAKE_OPT]" @[COMPILE_NR]

echo "Compile ended: " `date`
echo -n " $( date +%s )," >> job_timestamp.txt
45 changes: 45 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_noaacloud
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/sh
#SBATCH -e err
#SBATCH -o out
#SBATCH --qos=batch
### #SBATCH --ntasks=@[TASKS]
#SBATCH --nodes=@[NODES]
#SBATCH --ntasks-per-node=@[TPN]
#SBATCH --job-name="@[JBNME]"
#SBATCH --exclusive

set -eux
echo -n " $( date +%s )," > job_timestamp.txt

set +x
MACHINE_ID=noaacloud
module use $( pwd -P )
module use /contrib/EPIC/spack-stack/spack-stack-1.3.0/envs/unified-dev/install/modulefiles/Core
module load stack-intel/2021.3.0 stack-intel-oneapi-mpi/2021.3.0
module load ufs-weather-model-env/unified-dev
module list

set -x

ulimit -s unlimited
ulimit -l unlimited

echo "Model started: " `date`

#export MPI_TYPE_DEPTH=20
export OMP_STACKSIZE=512M
export KMP_AFFINITY=scatter
export OMP_NUM_THREADS=1
#export ESMF_RUNTIME_COMPLIANCECHECK=OFF:depth=4
#export PSM_RANKS_PER_CONTEXT=4
#export PSM_SHAREDCONTEXTS=1
#export ESMF_RUNTIME_PROFILE=ON
#export ESMF_RUNTIME_PROFILE_OUTPUT="SUMMARY"

# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

srun --mpi=pmi2 --label -n @[TASKS] ./fv3.exe

echo "Model ended: " `date`
echo -n " $( date +%s )," >> job_timestamp.txt
9 changes: 8 additions & 1 deletion tests/module-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,14 @@ elif [[ $MACHINE_ID = cheyenne* ]] ; then
source /glade/u/apps/ch/modulefiles/default/localinit/localinit.sh
fi
module purge


Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is hacky and terrible. Why are testing scripts trying to automatically determine what machine they are being run on instead of just being specified on the command line?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment should probably have gone under "detect_machine.sh", but it still stands

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that this has been around for as long as I've been involved with the GFS. I suppose that it's handy for the end user -- one less thing to specify when running tests.

elif [[ $MACHINE_ID = noaacloud* ]] ; then
# We are on NOAA Cloud
if ( ! eval module help > /dev/null 2>&1 ) ; then
source /apps/lmod/8.5.2/init/bash
fi
module purge

elif [[ $MACHINE_ID = stampede* ]] ; then
# We are on TACC Stampede
if ( ! eval module help > /dev/null 2>&1 ) ; then
Expand Down
4 changes: 2 additions & 2 deletions tests/rt.conf
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ RUN | cpld_control_c48
RUN | cpld_warmstart_c48 | | fv3 |
RUN | cpld_restart_c48 | | | cpld_warmstart_c48

COMPILE | -DAPP=S2SWA -DCCPP_SUITES=FV3_GFS_v17_coupled_p8,FV3_GFS_cpld_rasmgshocnsstnoahmp_ugwp -DFASTER=ON | | fv3 |
COMPILE | -DAPP=S2SWA -DCCPP_SUITES=FV3_GFS_v17_coupled_p8,FV3_GFS_cpld_rasmgshocnsstnoahmp_ugwp -DFASTER=ON | - cheyenne.intel | fv3 |
RUN | cpld_control_p8_faster | - cheyenne.intel | fv3 |

###################################################################################################################################################################################
Expand Down Expand Up @@ -309,4 +309,4 @@ COMPILE | -DAPP=ATMAQ -DCCPP_SUITES=FV3_GFS_v15p2 -DDEBUG=ON -D32BIT=ON
RUN | regional_atmaq_debug | - jet.intel gaea.intel cheyenne.intel | fv3 |

COMPILE | -DAPP=ATMAQ -DCCPP_SUITES=FV3_GFS_v15p2 -DFASTER=ON -D32BIT=ON | | fv3 |
RUN | regional_atmaq_faster | - jet.intel | fv3 |
RUN | regional_atmaq_faster | - jet.intel wcoss2.intel acorn.intel | fv3 |
28 changes: 27 additions & 1 deletion tests/rt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,29 @@ elif [[ $MACHINE_ID = expanse.* ]]; then
PTMP=$dprefix
SCHEDULER=slurm
cp fv3_conf/fv3_slurm.IN_expanse fv3_conf/fv3_slurm.IN

elif [[ $MACHINE_ID = noaacloud.* ]]; then

module use /apps/modules/modulefiles
module load rocoto/1.3.3

ROCOTORUN=$(which rocotorun)
ROCOTOSTAT=$(which rocotostat)
ROCOTOCOMPLETE=$(which rocotocomplete)
ROCOTO_SCHEDULER=slurm

QUEUE=batch
COMPILE_QUEUE=batch
PARTITION=
dprefix=/lustre/
DISKNM=/contrib/ufs-weather-model/RT
STMP=$dprefix/stmp4
PTMP=$dprefix/stmp2
SCHEDULER=slurm
cp fv3_conf/fv3_slurm.IN_noaacloud fv3_conf/fv3_slurm.IN
cp fv3_conf/compile_slurm.IN_noaacloud fv3_conf/compile_slurm.IN


else
die "Unknown machine ID, please edit detect_machine.sh file"
fi
Expand Down Expand Up @@ -446,7 +468,7 @@ if [[ $TESTS_FILE =~ '35d' ]] || [[ $TESTS_FILE =~ 'weekly' ]]; then
fi


BL_DATE=20230424
BL_DATE=20230504

RTPWD=${RTPWD:-$DISKNM/NEMSfv3gfs/develop-${BL_DATE}/${RT_COMPILER^^}}

Expand Down Expand Up @@ -510,6 +532,10 @@ if [[ $ROCOTO == true ]]; then
QUEUE=s4
COMPILE_QUEUE=s4
ROCOTO_SCHEDULER=slurm
elif [[ $MACHINE_ID = noaacloud.* ]]; then
QUEUE=batch
COMPILE_QUEUE=batch
ROCOTO_SCHEDULER=slurm
elif [[ $MACHINE_ID = jet.* ]]; then
QUEUE=batch
COMPILE_QUEUE=batch
Expand Down
17 changes: 11 additions & 6 deletions tests/rt_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -355,18 +355,23 @@ check_results() {
fi

if [[ $d -eq 1 && ${i##*.} == 'nc' ]] ; then
if [[ ${MACHINE_ID} =~ orion || ${MACHINE_ID} =~ hera || ${MACHINE_ID} =~ wcoss2 || ${MACHINE_ID} =~ acorn || ${MACHINE_ID} =~ cheyenne || ${MACHINE_ID} =~ gaea || ${MACHINE_ID} =~ jet || ${MACHINE_ID} =~ s4 ]] ; then
if [[ ${MACHINE_ID} =~ orion || ${MACHINE_ID} =~ hera || ${MACHINE_ID} =~ wcoss2 || ${MACHINE_ID} =~ acorn || ${MACHINE_ID} =~ cheyenne || ${MACHINE_ID} =~ gaea || ${MACHINE_ID} =~ jet || ${MACHINE_ID} =~ s4 || ${MACHINE_ID} =~ noaacloud ]] ; then
printf ".......ALT CHECK.." >> ${REGRESSIONTEST_LOG}
printf ".......ALT CHECK.."
if [[ ${MACHINE_ID} =~ orion || ${MACHINE_ID} =~ hera || ${MACHINE_ID} =~ gaea || ${MACHINE_ID} =~ jet || ${MACHINE_ID} =~ cheyenne ]] ; then
nccmp -d -f -g -B --Attribute=checksum --warn=format ${RTPWD}/${CNTL_DIR}/${i} ${RUNDIR}/${i} > ${i}_nccmp.log 2>&1 && d=$? || d=$?
if [[ $d -ne 0 && $d -ne 1 ]]; then
echo "....ERROR" >> ${REGRESSIONTEST_LOG}
echo "....ERROR"
exit 1
fi
else
${PATHRT}/compare_ncfile.py ${RTPWD}/${CNTL_DIR}/$i ${RUNDIR}/$i > compare_ncfile.log 2>&1 && d=$? || d=$?
fi
if [[ $d -eq 1 ]]; then
echo "....ERROR" >> ${REGRESSIONTEST_LOG}
echo "....ERROR"
exit 1
if [[ $d -eq 1 ]]; then
echo "....ERROR" >> ${REGRESSIONTEST_LOG}
echo "....ERROR"
exit 1
fi
fi
fi
fi
Expand Down