diff --git a/modulefiles/gsi_common.lua b/modulefiles/gsi_common.lua index b2929bcdf8..ba2eea3241 100644 --- a/modulefiles/gsi_common.lua +++ b/modulefiles/gsi_common.lua @@ -16,7 +16,7 @@ local nemsio_ver=os.getenv("nemsio_ver") or "2.5.4" local wrf_io_ver=os.getenv("wrf_io_ver") or "1.2.0" local ncio_ver=os.getenv("ncio_ver") or "1.1.2" local crtm_ver=os.getenv("crtm_ver") or "2.4.0" -local ncdiag_ver=os.getenv("ncdiag_ver") or "1.1.0" +local ncdiag_ver=os.getenv("ncdiag_ver") or "1.1.1" load(pathJoin("netcdf", netcdf_ver)) diff --git a/modulefiles/gsi_gaea.lua b/modulefiles/gsi_gaea.lua index ea1826fde9..f76c8f3ad9 100644 --- a/modulefiles/gsi_gaea.lua +++ b/modulefiles/gsi_gaea.lua @@ -16,11 +16,15 @@ load("gsi_common") local prod_util_ver=os.getenv("prod_util_ver") or "1.2.2" load(pathJoin("prod_util", prod_util_ver)) +-- Needed at runtime: +load("alps") + local MKLROOT="/opt/intel/oneapi/mkl/2022.0.2/" prepend_path("LD_LIBRARY_PATH",pathJoin(MKLROOT,"lib/intel64")) pushenv("MKLROOT", MKLROOT) -pushenv("GSI_BINARY_SOURCE_DIR", "/lustre/f2/dev/role.epic/contrib/GSI_data/fix") +pushenv("GSI_BINARY_SOURCE_DIR", "/lustre/f2/dev/role.epic/contrib/GSI_data/fix/20230601") + setenv("CC","cc") setenv("FC","ftn") setenv("CXX","CC") diff --git a/regression/regression_driver.sh b/regression/regression_driver.sh index e1d3b18dc7..58ebf64e85 100755 --- a/regression/regression_driver.sh +++ b/regression/regression_driver.sh @@ -10,6 +10,7 @@ if [ -d "$config_path" ]; then source $config_path/local_vars.sh fi + # source the necessary files to setup if [ "$#" -eq 2 ]; then export regdir=$2 @@ -41,7 +42,7 @@ for jn in `seq ${RSTART} ${REND}`; do fi rm -f ${job[$jn]}.out - /bin/sh $ush/$sub_cmd -q $queue -j ${job[$jn]} -t ${topts[$jn]} -p ${popts[$jn]} -r ${ropts[$jn]} $scripts/${regtest}.sh + /bin/sh $ush/$sub_cmd -q $queue -j ${job[$jn]} -t ${topts[$jn]} -p ${popts[$jn]} -r ${ropts[$jn]} $scripts/${regtest}.sh >& $ush/sub_cmd.${job[$jn]}.out if [ $debug == ".true." ]; then break; fi $scripts/regression_wait.sh ${job[$jn]} ${rcname} $check_resource diff --git a/regression/regression_param.sh b/regression/regression_param.sh index 6024dbdb54..ea597fe0ae 100755 --- a/regression/regression_param.sh +++ b/regression/regression_param.sh @@ -18,6 +18,11 @@ case $machine in sub_cmd="sub_jet" memnode=96 numcore=40 + ;; + Gaea) + sub_cmd="sub_gaea" + memnode=64 + numcore=36 ;; wcoss2) sub_cmd="sub_wcoss2" @@ -58,6 +63,9 @@ case $regtest in elif [[ "$machine" = "Cheyenne" ]]; then topts[1]="0:30:00" ; popts[1]="16/2/" ; ropts[1]="/1" topts[2]="0:30:00" ; popts[2]="16/4/" ; ropts[2]="/2" + elif [[ "$machine" = "Gaea" ]]; then + topts[1]="0:30:00" ; popts[1]="18/2/" ; ropts[1]="/1" + topts[2]="0:30:00" ; popts[2]="18/4/" ; ropts[2]="/2" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:15:00" ; popts[1]="12/5/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="12/9/" ; ropts[2]="/2" @@ -88,6 +96,9 @@ case $regtest in elif [[ "$machine" = "Cheyenne" ]]; then topts[1]="0:35:00" ; popts[1]="16/2/" ; ropts[1]="/1" topts[2]="0:25:00" ; popts[2]="16/4/" ; ropts[2]="/2" + elif [[ "$machine" = "Gaea" ]]; then + topts[1]="0:35:00" ; popts[1]="18/2/" ; ropts[1]="/1" + topts[2]="0:25:00" ; popts[2]="18/4/" ; ropts[2]="/2" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:15:00" ; popts[1]="28/2/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="28/4/" ; ropts[2]="/2" @@ -104,6 +115,8 @@ case $regtest in popts[1]="12/5/" elif [[ "$machine" = "Jet" ]]; then popts[1]="12/5/" + elif [[ "$machine" = "Gaea" ]]; then + popts[1]="18/5/" elif [[ "$machine" = "wcoss2" ]]; then popts[1]="28/4/" topts[1]="3:00:00" @@ -131,6 +144,9 @@ case $regtest in elif [[ "$machine" = "Cheyenne" ]]; then topts[1]="1:59:00" ; popts[1]="6/8/" ; ropts[1]="/1" topts[2]="0:35:00" ; popts[2]="6/10/" ; ropts[2]="/2" + elif [[ "$machine" = "Gaea" ]]; then + topts[1]="0:10:00" ; popts[1]="18/8/" ; ropts[1]="/1" + topts[2]="0:10:00" ; popts[2]="18/10/" ; ropts[2]="/2" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:10:00" ; popts[1]="12/8/" ; ropts[1]="/1" topts[2]="0:10:00" ; popts[2]="12/10/" ; ropts[2]="/2" @@ -155,6 +171,9 @@ case $regtest in elif [[ "$machine" = "Jet" ]]; then topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/1" + elif [[ "$machine" = "Gaea" ]]; then + topts[1]="0:15:00" ; popts[1]="18/1/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="18/2/" ; ropts[2]="/1" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" @@ -179,6 +198,9 @@ case $regtest in elif [[ "$machine" = "Jet" ]]; then topts[1]="0:15:00" ; popts[1]="4/4/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="6/6/" ; ropts[2]="/1" + elif [[ "$machine" = "Gaea" ]]; then + topts[1]="0:15:00" ; popts[1]="4/4/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="6/6/" ; ropts[2]="/1" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:15:00" ; popts[1]="28/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="28/2/" ; ropts[2]="/1" @@ -206,6 +228,9 @@ case $regtest in elif [[ "$machine" = "Cheyenne" ]]; then topts[1]="0:15:00" ; popts[1]="8/6/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="8/8/" ; ropts[2]="/1" + elif [[ "$machine" = "Gaea" ]]; then + topts[1]="0:30:00" ; popts[1]="8/6/" ; ropts[1]="/1" + topts[2]="0:30:00" ; popts[2]="8/8/" ; ropts[2]="/1" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:30:00" ; popts[1]="14/8/" ; ropts[1]="/1" topts[2]="0:30:00" ; popts[2]="14/14/" ; ropts[2]="/2" @@ -233,6 +258,9 @@ case $regtest in elif [[ "$machine" = "Cheyenne" ]]; then topts[1]="0:20:00" ; popts[1]="6/6/" ; ropts[1]="/1" topts[2]="0:20:00" ; popts[2]="8/8/" ; ropts[2]="/1" + elif [[ "$machine" = "Gaea" ]]; then + topts[1]="0:20:00" ; popts[1]="6/6/" ; ropts[1]="/1" + topts[2]="0:20:00" ; popts[2]="8/8/" ; ropts[2]="/1" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:15:00" ; popts[1]="10/10/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="14/14/" ; ropts[2]="/2" @@ -260,6 +288,9 @@ case $regtest in elif [[ "$machine" = "Cheyenne" ]]; then topts[1]="0:15:00" ; popts[1]="16/2/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="16/4/" ; ropts[2]="/2" + elif [[ "$machine" = "Gaea" ]]; then + topts[1]="0:10:00" ; popts[1]="12/3/" ; ropts[1]="/1" + topts[2]="0:10:00" ; popts[2]="12/5/" ; ropts[2]="/2" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:10:00" ; popts[1]="16/2/" ; ropts[1]="/1" topts[2]="0:10:00" ; popts[2]="16/4/" ; ropts[2]="/2" @@ -317,6 +348,12 @@ elif [[ "$machine" = "Jet" ]]; then export MPI_BUFS_PER_HOST=256 export MPI_GROUP_MAX=256 export APRUN="srun" +elif [[ "$machine" = "Gaea" ]]; then + export OMP_STACKSIZE=1024M + export MPI_BUFS_PER_PROC=256 + export MPI_BUFS_PER_HOST=256 + export MPI_GROUP_MAX=256 + export APRUN="srun --export=ALL --mpi=pmi2 -n \$size" elif [[ "$machine" = "Cheyenne" ]]; then export OMP_STACKSIZE=1024M export MPI_BUFS_PER_PROC=256 diff --git a/regression/regression_var.sh b/regression/regression_var.sh index 05b5563ef1..e883a5b1c8 100755 --- a/regression/regression_var.sh +++ b/regression/regression_var.sh @@ -49,6 +49,20 @@ fi echo "Running Regression Tests on '$machine'"; case $machine in + Gaea) + export queue="batch" + export noscrub="/lustre/f2/scratch/$LOGNAME/gsi_tmp/noscrub" + export ptmp="/lustre/f2/scratch/$LOGNAME/gsi_tmp/ptmp" + export casesdir="/lustre/f2/dev/role.epic/contrib/GSI_data/CASES/regtest" + + export group="global" + if [[ "$cmaketest" = "false" ]]; then + export basedir="/lustre/f2/dev/$LOGNAME/sandbox/GSI" + fi + + export check_resource="no" + export accnt="nggps_emc" + ;; Cheyenne) export queue="economy" export noscrub="/glade/scratch/$LOGNAME" diff --git a/ush/build.sh b/ush/build.sh index 71674c4f4c..f706f413c1 100755 --- a/ush/build.sh +++ b/ush/build.sh @@ -30,12 +30,13 @@ set -x # Set CONTROLPATH variable to user develop installation CONTROLPATH="$DIR_ROOT/../develop/install/bin" - +CMAKELIBS=".so .a" +CMAKE_OPTS+=" -DCMAKE_FIND_LIBRARY_SUFFIXES=${CMAKELIBS}" # Collect BUILD Options CMAKE_OPTS+=" -DCMAKE_BUILD_TYPE=$BUILD_TYPE" # Install destination for built executables, libraries, CMake Package config -CMAKE_OPTS+=" -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX" +CMAKE_OPTS+=" -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}" # Configure for GSI and EnKF CMAKE_OPTS+=" -DGSI_MODE=$GSI_MODE -DENKF_MODE=${ENKF_MODE}" @@ -44,7 +45,7 @@ CMAKE_OPTS+=" -DGSI_MODE=$GSI_MODE -DENKF_MODE=${ENKF_MODE}" [[ ${REGRESSION_TESTS} =~ [yYtT] ]] && CMAKE_OPTS+=" -DBUILD_REG_TESTING=ON -DCONTROLPATH=${CONTROLPATH:-}" # Re-use or create a new BUILD_DIR (Default: create new BUILD_DIR) -[[ ${BUILD_CLEAN:-"YES"} =~ [yYtT] ]] && rm -rf $BUILD_DIR +[[ ${BUILD_CLEAN:-"YES"} =~ [yYtT] ]] && rm -rf $BUILD_DIR && echo "Removing $BUILD_DIR" mkdir -p $BUILD_DIR && cd $BUILD_DIR # Configure, build, install diff --git a/ush/sub_gaea b/ush/sub_gaea new file mode 100755 index 0000000000..5c755e6930 --- /dev/null +++ b/ush/sub_gaea @@ -0,0 +1,166 @@ +#!/bin/sh --login +set -x +usage="\ +Usage: $0 [options] executable [args] + where the options are: + -a account account (default: none) + -b binding run smt binding or not (default:NO) + -d dirin initial directory (default: cwd) + -e envars copy comma-separated environment variables + -g group group name + -i append standard input to command file + -j jobname specify jobname (default: executable basename) + -m machine machine on which to run (default: current) + -n write command file to stdout rather than submitting it + -o output specify output file (default: jobname.out) + -p procs[/nodes[/ppreq] + number of MPI tasks and optional nodes or Bblocking and + ppreq option (N or S) (defaults: serial, Bunlimited, S) + -q queue[/qpreq] queue name and optional requirement, e.g. dev/P + (defaults: 1 if serial or dev if parallel and none) + (queue 3 or 4 is dev or prod with twice tasks over ip) + (options: P=parallel, B=bigmem, b=batch) + -r rmem[/rcpu] resources memory and cpus/task (default: '1024 mb', 1) + -t timew wall time limit in [[hh:]mm:]ss format (default: 900) + -u userid userid to run under (default: self) + -v verbose mode + -w when when to run, in yyyymmddhh[mm], +hh[mm], thh[mm], or + Thh[mm] (full, incremental, today or tomorrow) format + (default: now) +Function: This command submits a job to the batch queue." +subcmd="$*" +stdin=NO +nosub=NO +account="" +binding="NO" +dirin="" +envars="" +group="" +jobname="" +machine="" +output="" +procs=0 +nodes="" +ppreq="" +queue="" +qpreq="" +rmem="1024" +rcpu="1" +timew="900" +userid="" +verbose=NO +when="" +while getopts a:b:d:e:g:ij:m:no:p:q:r:t:u:vw: opt;do + case $opt in + a) account="$OPTARG";; + b) binding="$OPTARG";; + d) dirin="$OPTARG";; + e) envars="$OPTARG";; + g) group="$OPTARG";; + i) stdin=YES;; + j) jobname=$OPTARG;; + m) machine="$OPTARG";; + n) nosub=YES;; + o) output=$OPTARG;; + p) procs=$(echo $OPTARG/|cut -d/ -f1);nodes=$(echo $OPTARG/|cut -d/ -f2);ppreq=$(echo $OPTARG/|cut -d/ -f3);; + q) queue=$(echo $OPTARG/|cut -d/ -f1);qpreq=$(echo $OPTARG/|cut -d/ -f2);; + r) rmem=$(echo $OPTARG/|cut -d/ -f1);rcpu=$(echo $OPTARG/|cut -d/ -f2);; + t) timew=$OPTARG;; + u) userid=$OPTARG;; + v) verbose=YES;; + w) when=$OPTARG;; + \?) echo $0: invalid option >&2;echo "$usage" >&2;exit 1;; + esac +done +shift $(($OPTIND-1)) +if [[ $# -eq 0 ]];then + echo $0: missing executable name >&2;echo "$usage" >&2;exit 1 +fi +exec=$1 +if [[ ! -s $exec ]]&&which $exec >/dev/null 2>&1;then + exec=$(which $exec) +fi +shift +args="$*" +bn=$(basename $exec) +export jobname=${jobname:-$bn} +output=${output:-$jobname.out} +myuser=$LOGNAME +myhost=$(hostname) + +if [ -d /lustre/f2/scratch/$LOGNAME ]; then + DATA=/lustre/f2/scratch/$LOGNAME/tmp +fi +DATA=${DATA:-$ptmp/tmp} + +mkdir -p $DATA + +queue=${queue:-batch} +timew=${timew:-01:20:00} +task_node=${task_node:-$procs} +export size=$((nodes*task_node)) +echo "In sub_gaea: task_node, nodes, size=",$task_node,$nodes,$size +envars=$envars +threads=${rcpu:-1} + +export TZ=GMT +cfile=$DATA/sub$$ +> $cfile +echo "#!/bin/bash -l" >> $cfile +echo "" >> $cfile +echo "#SBATCH --output=$output" >> $cfile +echo "#SBATCH --job-name=$jobname" >> $cfile +echo "#SBATCH --qos=normal" >> $cfile +echo "#SBATCH --clusters=c4" >> $cfile +echo "#SBATCH --time=$timew" >> $cfile +echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --cpus-per-task=$threads" >> $cfile +echo "#SBATCH --account=nggps_emc" >> $cfile +echo "#SBATCH --mem=0" >> $cfile + +echo "" >>$cfile +echo "export OMP_NUM_THREADS=$threads" >> $cfile +echo "" >>$cfile +echo ". "$(awk '{ print $1, $2, $3, $4, $5, $6, $7, $8, $9 }' $regdir/regression_var.out) >>$cfile +echo "" >>$cfile + +echo "source /lustre/f2/dev/role.epic/contrib/Lmod_init.sh" >> $cfile +echo "module use $gsisrc/modulefiles" >> $cfile +echo "module load gsi_gaea" >> $cfile +echo "module list" >> $cfile +echo "" >>$cfile + +cat $exec >> $cfile + +if [[ $nosub = YES ]];then + cat $cfile + exit +elif [[ $verbose = YES ]];then + set -x + cat $cfile +fi + +if [[ $stdin = YES ]];then + cat +fi >>$cfile +if [[ $nosub = YES ]];then + cat $cfile + exit +elif [[ $verbose = YES ]];then + set -x + cat $cfile +fi +sbatch=${sbatch:-sbatch} + +ofile=$DATA/subout$$ +>$ofile +chmod 777 $ofile +$sbatch $cfile >$ofile +rc=$? +cat $ofile +if [[ -w $SUBLOG ]];then + jobn=$(grep -i submitted $ofile|head -n1|cut -d\" -f2) + date -u +"%Y%m%d%H%M%S : $subcmd : $jobn" >>$SUBLOG +fi +#rm $cfile $ofile +#[[ $MKDATA = YES ]] && rmdir $DATA +exit $rc