diff --git a/scripts/exregional_make_ics.sh b/scripts/exregional_make_ics.sh
index ec0fee623..1ee77918c 100755
--- a/scripts/exregional_make_ics.sh
+++ b/scripts/exregional_make_ics.sh
@@ -131,6 +131,7 @@ case "$MACHINE" in
;;
"LINUX")
+ ulimit -s unlimited
APRUN=$RUN_CMD_UTILS
;;
diff --git a/scripts/exregional_make_lbcs.sh b/scripts/exregional_make_lbcs.sh
index 1b1f84ff8..01aeaed96 100755
--- a/scripts/exregional_make_lbcs.sh
+++ b/scripts/exregional_make_lbcs.sh
@@ -131,6 +131,7 @@ case "$MACHINE" in
;;
"LINUX")
+ ulimit -s unlimited
APRUN=$RUN_CMD_UTILS
;;
diff --git a/scripts/exregional_make_orog.sh b/scripts/exregional_make_orog.sh
index e90b3b762..afd40d74d 100755
--- a/scripts/exregional_make_orog.sh
+++ b/scripts/exregional_make_orog.sh
@@ -153,6 +153,8 @@ case "$MACHINE" in
"LINUX")
APRUN=time
+ ulimit -s unlimited
+ ulimit -a
;;
*)
diff --git a/scripts/exregional_run_fcst.sh b/scripts/exregional_run_fcst.sh
index 4b16f79d9..aae18a749 100755
--- a/scripts/exregional_run_fcst.sh
+++ b/scripts/exregional_run_fcst.sh
@@ -159,6 +159,8 @@ case "$MACHINE" in
;;
"LINUX")
+ ulimit -s unlimited
+ ulimit -a
APRUN=$RUN_CMD_FCST
;;
diff --git a/ush/config_defaults.sh b/ush/config_defaults.sh
index dd6a2968c..a12f5119f 100644
--- a/ush/config_defaults.sh
+++ b/ush/config_defaults.sh
@@ -41,7 +41,12 @@ RUN_ENVIR="nco"
# Set machine and queue parameters. Definitions:
#
# MACHINE:
-# Machine on which the workflow will run.
+# Machine on which the workflow will run. If you are NOT on a named,
+# supported platform, and you want to use the Rocoto workflow manager,
+# you will need set MACHINE="linux" and WORKFLOW_MANAGER="rocoto". This
+# combination will assume a Slurm batch manager when generating the XML.
+# Please see ush/valid_param_vals.sh for a full list of supported
+# platforms.
#
# ACCOUNT:
# The account under which to submit jobs to the queue.
@@ -49,7 +54,27 @@ RUN_ENVIR="nco"
# WORKFLOW_MANAGER:
# The workflow manager to use (e.g. rocoto). This is set to "none" by
# default, but if the machine name is set to a platform that supports
-# rocoto, this will be overwritten and set to "rocoto".
+# rocoto, this will be overwritten and set to "rocoto". If set
+# explicitly to rocoto along with the use of the MACHINE=linux target,
+# the configuration layer assumes a Slurm batch manager when generating
+# the XML. Valid options: "rocoto" or "none"
+#
+# NCORES_PER_NODE:
+# The number of cores available per node on the compute platform. Set
+# for supported platforms in setup.sh, but is now also configurable for
+# all platforms.
+#
+# LMOD_PATH:
+# Path to the LMOD sh file on your Linux system. Is set automatically
+# for supported machines.
+#
+# BUILD_ENV_FN:
+# Name of alternative build environment file to use if using an
+# unsupported platform. Is set automatically for supported machines.
+#
+# WFLOW_ENV_FN:
+# Name of alternative workflow environment file to use if using an
+# unsupported platform. Is set automatically for supported machines.
#
# SCHED:
# The job scheduler to use (e.g. slurm). Set this to an empty string in
@@ -109,6 +134,10 @@ RUN_ENVIR="nco"
MACHINE="BIG_COMPUTER"
ACCOUNT="project_name"
WORKFLOW_MANAGER="none"
+NCORES_PER_NODE=""
+LMOD_PATH=""
+BUILD_ENV_FN=""
+WFLOW_ENV_FN=""
SCHED=""
PARTITION_DEFAULT=""
QUEUE_DEFAULT=""
diff --git a/ush/launch_FV3LAM_wflow.sh b/ush/launch_FV3LAM_wflow.sh
index 72cdd74c7..f92e5e076 100755
--- a/ush/launch_FV3LAM_wflow.sh
+++ b/ush/launch_FV3LAM_wflow.sh
@@ -84,6 +84,7 @@ fi
#-----------------------------------------------------------------------
#
. $exptdir/var_defns.sh
+. ${USHDIR}/source_util_funcs.sh
#
#-----------------------------------------------------------------------
#
@@ -101,13 +102,7 @@ expt_name="${EXPT_SUBDIR}"
#
#-----------------------------------------------------------------------
#
-if [ "$MACHINE" = "CHEYENNE" ]; then
- module use -a /glade/p/ral/jntp/UFS_SRW_app/modules/
- module load rocoto
-elif [ "$MACHINE" = "ORION" ]; then
- module purge
- module load contrib rocoto
-elif [ "$MACHINE" = "WCOSS_DELL_P3" ]; then
+if [ "$MACHINE" = "WCOSS_DELL_P3" ]; then
module purge
module load lsf/10.1
module use /gpfs/dell3/usrx/local/dev/emc_rocoto/modulefiles/
@@ -118,8 +113,14 @@ elif [ "$MACHINE" = "WCOSS_CRAY" ]; then
module use -a /usrx/local/emc_rocoto/modulefiles
module load rocoto/1.3.0rc2
else
+ machine=$(echo_lowercase $MACHINE)
+ env_fn=${WFLOW_ENV_FN:-"wflow_${machine}.env"}
+ env_fp="${SR_WX_APP_TOP_DIR}/env/${env_fn}"
module purge
- module load rocoto
+ source "${env_fp}" || print_err_msg_exit "\
+ Sourcing platform-specific environment file (env_fp) for
+ the workflow task failed :
+ env_fp = \"${env_fp}\""
fi
#
#-----------------------------------------------------------------------
@@ -162,35 +163,7 @@ cd "$exptdir"
#-----------------------------------------------------------------------
#
-#rocotorun_output=$( ls -alF )
-#echo
-#echo "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
-#echo "${rocotorun_output}"
-#echo "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"
-
-#rocotorun_output=$( \
-#rocotorun -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10 \
-#)
-#rocotorun_output=$( (rocotorun -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10) 2>&1 ) # This freezes the script.
-#rocotorun_output=$( (rocotorun -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10) 1>&2 ) # This leaves rocotorun_output empty.
-#rocotorun_output=$( rocotorun -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10 )
-#{ error=$(command 2>&1 1>&$out); } {out}>&1
-#{ rocotorun_output=$( rocotorun -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10 2>&1 1>&$out); } {out}>&1 # This freezes the script.
-
-#
-# Ideally, the following two lines should work, but for some reason the
-# output of rocotorun cannot be captured in a variable using the $(...)
-# notation. Maybe it's not being written to stdout, although I tried
-# redirecting stderr to stdout and other tricks but nothing seemed to
-# work. For this reason, below we first redirect the output of rocoto-
-# run to a temporary file and then read in the contents of that file in-
-# to the rocotorun_output variable using the cat command.
-#
-#rocotorun_cmd="rocotorun -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10"
-#rocotorun_output=$( eval ${rocotorun_cmd} 2>&1 )
-#
tmp_fn="rocotorun_output.txt"
-#rocotorun_cmd="rocotorun -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10 > ${tmp_fn}"
rocotorun_cmd="rocotorun -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10"
eval ${rocotorun_cmd} > ${tmp_fn} 2>&1
rocotorun_output=$( cat "${tmp_fn}" )
@@ -217,18 +190,9 @@ done <<< "${rocotorun_output}"
#
#-----------------------------------------------------------------------
#
-#rocotostat_cmd="{ pwd; rocotostat -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10; }"
-#rocotostat_cmd="{ pwd; ls -alF; rocotostat -w ${WFLOW_XML_FN} -d ${rocoto_database_fn} -v 10; }"
-#rocotostat_cmd="{ pwd; ls -alF; rocotostat -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10; }"
-#rocotostat_cmd="{ pwd; rocotostat -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10; }"
-#rocotostat_cmd="{ rocotostat -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10; }"
rocotostat_cmd="rocotostat -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10"
-#rocotostat_output=$( pwd; rocotostat -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10 2>&1 )
-#rocotostat_output=$( rocotostat -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10 2>&1 )
rocotostat_output=$( eval ${rocotostat_cmd} 2>&1 )
-#rocotostat_output=$( ${rocotostat_cmd} 2>&1 )
-#rocotostat_output=$( { pwd; ls -alF; } 2>&1 )
error_msg="DEAD"
while read -r line; do
grep_output=$( printf "$line" | grep "${error_msg}" )
diff --git a/ush/load_modules_run_task.sh b/ush/load_modules_run_task.sh
index 411f0bdb1..1f0d88126 100755
--- a/ush/load_modules_run_task.sh
+++ b/ush/load_modules_run_task.sh
@@ -71,30 +71,6 @@ fi
# ..." and "module load ..." calls later below that are used to load the
# appropriate module file for the specified task.
#
-# Note that the build of the FV3 forecast model code generates the shell
-# script at
-#
-# ${UFS_WTHR_MDL_DIR}/NEMS/src/conf/module-setup.sh
-#
-# that can be used to initialize the Lmod (Lua-based module) system/
-# software for handling modules. This script:
-#
-# 1) Detects the shell in which it is being invoked (i.e. the shell of
-# the "parent" script in which it is being sourced).
-# 2) Detects the machine it is running on and and calls the appropriate
-# (shell- and machine-dependent) initalization script to initialize
-# Lmod.
-# 3) Purges all modules.
-# 4) Uses the "module use ..." command to prepend or append paths to
-# Lmod's search path (MODULEPATH).
-#
-# We could use this module-setup.sh script to initialize Lmod, but since
-# it is only found in the forecast model's directory tree, here we pre-
-# fer to perform our own initialization. Ideally, there should be one
-# module-setup.sh script that is used by all external repos/codes, but
-# such a script does not exist. If/when it does, we will consider
-# switching to it instead of using the case-statement below.
-#
#-----------------------------------------------------------------------
#
print_info_msg "$VERBOSE" "
@@ -128,10 +104,14 @@ case "$MACHINE" in
;;
#
*)
- print_err_msg_exit "\
-The script to source to initialize lmod (module loads) has not yet been
-specified for the current machine (MACHINE):
- MACHINE = \"$MACHINE\""
+ if [[ -n ${LMOD_PATH:-""} && -f ${LMOD_PATH:-""} ]] ; then
+ . ${LMOD_PATH}
+ else
+ print_err_msg_exit "\
+ The script to source to initialize lmod (module loads) has not yet been
+ specified for the current machine (MACHINE):
+ MACHINE = \"$MACHINE\""
+ fi
;;
#
esac
@@ -147,13 +127,15 @@ jjob_fp="$2"
#
#-----------------------------------------------------------------------
#
-# Sourcing ufs-srweather-app README file (in directory specified by mod-
-# ules_dir) for the specified task
+# Sourcing ufs-srweather-app build env file
#
#-----------------------------------------------------------------------
#
+
+module purge
+
machine=$(echo_lowercase $MACHINE)
-env_fn="build_${machine}_${COMPILER}.env"
+env_fn=${BUILD_ENV_FN:-"build_${machine}_${COMPILER}.env"}
env_fp="${SR_WX_APP_TOP_DIR}/env/${env_fn}"
source "${env_fp}" || print_err_msg_exit "\
Sourcing platform- and compiler-specific environment file (env_fp) for the
@@ -172,25 +154,23 @@ workflow task specified by task_name failed:
# sets environment variables (including prepending/appending to paths)
# and loads modules.
#
-# The regional_workflow repository contains module files for all the
+# The regional_workflow repository contains module files for the
# workflow tasks in the template rocoto XML file for the FV3-LAM work-
-# flow. The full path to a module file for a given task is
+# flow that need modules not loaded in the env_fn above.
+#
+# The full path to a module file for a given task is
#
-# $HOMErrfs/modulefiles/$machine/${task_name}
+# $HOMErrfs/modulefiles/$machine/${task_name}.local
#
# where HOMErrfs is the base directory of the workflow, machine is the
# name of the machine that we're running on (in lowercase), and task_-
-# name is the name of the current task (an input to this script). The
-# collection of modulefiles is staged by the generate_workflow.sh
-# script. Please see that script for information on their creation.
+# name is the name of the current task (an input to this script).
#
#-----------------------------------------------------------------------
#
modules_dir="$HOMErrfs/modulefiles/tasks/$machine"
modulefile_name="${task_name}"
default_modules_dir="$HOMErrfs/modulefiles"
-default_modulefile_name="${machine}.default"
-use_default_modulefile=0
#
#-----------------------------------------------------------------------
#
@@ -198,46 +178,28 @@ use_default_modulefile=0
#
#-----------------------------------------------------------------------
#
- print_info_msg "$VERBOSE" "
+
+print_info_msg "$VERBOSE" "
Loading modules for task \"${task_name}\" ..."
- module use "${modules_dir}" || print_err_msg_exit "\
+module use "${modules_dir}" || print_err_msg_exit "\
Call to \"module use\" command failed."
- #
- # If NOT using the default modulefile...
- #
-# if [ ${use_default_modulefile} -eq 0 ]; then
-#
-# module use -a "${modules_dir}" || print_err_msg_exit "\
-#Call to \"module use\" command failed."
-#
- #
- # Load the .local module file if available for the given task
- #
- modulefile_local="${task_name}.local"
- if [ -f ${modules_dir}/${modulefile_local} ]; then
- module load "${modulefile_local}" || print_err_msg_exit "\
-Loading .local module file (in directory specified by mod-
-ules_dir) for the specified task (task_name) failed:
- task_name = \"${task_name}\"
- modulefile_local = \"${modulefile_local}\"
- modules_dir = \"${modules_dir}\""
- fi
-
-# else # using default modulefile
#
-# module load "${default_modulefile_name}" || print_err_msg_exit "\
-#Loading of default module file failed:
-# task_name = \"${task_name}\"
-# default_modulefile_name = \"${default_modulefile_name}\"
-# default_modules_dir = \"${default_modules_dir}\""
+# Load the .local module file if available for the given task
#
-# fi
+modulefile_local="${task_name}.local"
+if [ -f ${modules_dir}/${modulefile_local} ]; then
+ module load "${modulefile_local}" || print_err_msg_exit "\
+ Loading .local module file (in directory specified by mod-
+ ules_dir) for the specified task (task_name) failed:
+ task_name = \"${task_name}\"
+ modulefile_local = \"${modulefile_local}\"
+ modules_dir = \"${modules_dir}\""
+fi
- module list
+module list
-#fi #End if statement for tasks that load no modules
# Modules that use conda and need an environment activated will set the
# SRW_ENV variable to the name of the environment to be activated. That
diff --git a/ush/setup.sh b/ush/setup.sh
index 96b1ef071..cd60d5962 100755
--- a/ush/setup.sh
+++ b/ush/setup.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
#
#-----------------------------------------------------------------------
#
@@ -512,7 +513,7 @@ check_var_valid_value "MACHINE" "valid_vals_MACHINE"
# several queues. These queues are defined in the default and local
# workflow/experiment configuration script.
#
-# Also, set the machine-dependent flag RELAITVE_OR_NULL that specifies
+# Also, set the machine-dependent flag RELATIVE_OR_NULL that specifies
# the flag to pass to the link creation command (ln_vrfy) when attempting
# to create relative symlinks. On machines that don't support relative
# symlinks, it should be set to a null string.
@@ -520,12 +521,11 @@ check_var_valid_value "MACHINE" "valid_vals_MACHINE"
#-----------------------------------------------------------------------
#
RELATIVE_LINK_FLAG=""
-NCORES_PER_NODE="2" # Need some arbitrary default value to avoid division by zero errors
case $MACHINE in
"WCOSS_CRAY")
WORKFLOW_MANAGER="rocoto"
- NCORES_PER_NODE="24"
+ NCORES_PER_NODE="${NCORES_PER_NODE:-24}"
SCHED="lsfcray"
QUEUE_DEFAULT=${QUEUE_DEFAULT:-"dev"}
QUEUE_HPSS=${QUEUE_HPSS:-"dev_transfer"}
@@ -536,7 +536,7 @@ case $MACHINE in
"WCOSS_DELL_P3")
WORKFLOW_MANAGER="rocoto"
- NCORES_PER_NODE=24
+ NCORES_PER_NODE="${NCORES_PER_NODE:-24}"
SCHED="lsf"
QUEUE_DEFAULT=${QUEUE_DEFAULT:-"dev"}
QUEUE_HPSS=${QUEUE_HPSS:-"dev_transfer"}
@@ -547,7 +547,7 @@ case $MACHINE in
"HERA")
WORKFLOW_MANAGER="rocoto"
- NCORES_PER_NODE=40
+ NCORES_PER_NODE="${NCORES_PER_NODE:-40}"
SCHED=${SCHED:-"slurm"}
PARTITION_DEFAULT=${PARTITION_DEFAULT:-"hera"}
QUEUE_DEFAULT=${QUEUE_DEFAULT:-"batch"}
@@ -561,7 +561,7 @@ case $MACHINE in
"ORION")
WORKFLOW_MANAGER="rocoto"
- NCORES_PER_NODE=40
+ NCORES_PER_NODE="${NCORES_PER_NODE:-40}"
SCHED=${SCHED:-"slurm"}
PARTITION_DEFAULT=${PARTITION_DEFAULT:-"orion"}
QUEUE_DEFAULT=${QUEUE_DEFAULT:-"batch"}
@@ -575,7 +575,7 @@ case $MACHINE in
"JET")
WORKFLOW_MANAGER="rocoto"
- NCORES_PER_NODE=24
+ NCORES_PER_NODE="${NCORES_PER_NODE:-24}"
SCHED=${SCHED:-"slurm"}
PARTITION_DEFAULT=${PARTITION_DEFAULT:-"sjet,vjet,kjet,xjet"}
QUEUE_DEFAULT=${QUEUE_DEFAULT:-"batch"}
@@ -589,7 +589,7 @@ case $MACHINE in
"ODIN")
WORKFLOW_MANAGER="rocoto"
- NCORES_PER_NODE=24
+ NCORES_PER_NODE="${NCORES_PER_NODE:-24}"
SCHED=${SCHED:-"slurm"}
PARTITION_DEFAULT=${PARTITION_DEFAULT:-"workq"}
QUEUE_DEFAULT=${QUEUE_DEFAULT:-"workq"}
@@ -603,7 +603,7 @@ case $MACHINE in
"CHEYENNE")
WORKFLOW_MANAGER="rocoto"
- NCORES_PER_NODE=36
+ NCORES_PER_NODE="${NCORES_PER_NODE:-36}"
SCHED=${SCHED:-"pbspro"}
QUEUE_DEFAULT=${QUEUE_DEFAULT:-"regular"}
QUEUE_HPSS=${QUEUE_HPSS:-"regular"}
@@ -614,7 +614,7 @@ case $MACHINE in
"STAMPEDE")
WORKFLOW_MANAGER="rocoto"
- NCORES_PER_NODE=68
+ NCORES_PER_NODE="${NCORES_PER_NODE:-68}"
SCHED="slurm"
PARTITION_DEFAULT=${PARTITION_DEFAULT:-"normal"}
QUEUE_DEFAULT=${QUEUE_DEFAULT:-"normal"}
@@ -632,11 +632,26 @@ case $MACHINE in
;;
"LINUX")
- WORKFLOW_MANAGER="none"
- SCHED="none"
+ WORKFLOW_MANAGER=${WORKFLOW_MANAGER:-"none"}
+ SCHED=${SCHED:-"none"}
+ ;;
+
+ "*")
+ NCORES_PER_NODE="2" # Need some arbitrary default value to avoid division by zero errors
+
+ print_err_msg_exit "\
+ You are running on an unknown platform! MACHINE=${MACHINE} is not a valid
+ choice."
;;
esac
+
+if [ -z "$NCORES_PER_NODE" ]; then
+ print_err_msg_exit "\
+ NCORES_PER_NODE is a required setting for your platform! Please
+ set it in config.sh.
+ MACHINE = ${MACHINE}"
+fi
#
#-----------------------------------------------------------------------
#
@@ -658,9 +673,8 @@ check_var_valid_value "SCHED" "valid_vals_SCHED"
#
#-----------------------------------------------------------------------
#
-# If we are using a workflow manager, run some checks. First,
-# verify that the ACCOUNT variable is not empty. Second, ensure that the
-# custom RUN_CMD variables are not set.
+# If we are using a workflow manager check that the ACCOUNT variable is
+# not empty.
#
#-----------------------------------------------------------------------
#
@@ -671,9 +685,6 @@ The variable ACCOUNT cannot be empty if you are using a workflow manager:
ACCOUNT = \"$ACCOUNT\"
WORKFLOW_MANAGER = \"$WORKFLOW_MANAGER\""
fi
- RUN_CMD_UTILS=""
- RUN_CMD_FCST=""
- RUN_CMD_POST=""
fi
#
#-----------------------------------------------------------------------
@@ -2942,6 +2953,15 @@ fi
#
#-----------------------------------------------------------------------
#
+# Because RUN_CMD_FCST can include PE_MEMBER01 (and theoretically other
+# variables calculated in this script), delete the first occurrence of it
+# in the var_defns file, and write it again at the end.
+#
+#-----------------------------------------------------------------------
+$SED -i '/^RUN_CMD_FCST=/d' $GLOBAL_VAR_DEFNS_FP
+#
+#-----------------------------------------------------------------------
+#
# Continue appending variable definitions to the variable definitions
# file.
#
@@ -3040,7 +3060,7 @@ FVCOM_FILE="${FVCOM_FILE}"
#
NCORES_PER_NODE="${NCORES_PER_NODE}"
PE_MEMBER01="${PE_MEMBER01}"
-RUN_CMD_FCST="${RUN_CMD_FCST}"
+RUN_CMD_FCST=$(eval echo ${RUN_CMD_FCST})
#
#-----------------------------------------------------------------------
#
diff --git a/ush/templates/FV3LAM_wflow.xml b/ush/templates/FV3LAM_wflow.xml
index f7e7b9471..cc77b5ad4 100644
--- a/ush/templates/FV3LAM_wflow.xml
+++ b/ush/templates/FV3LAM_wflow.xml
@@ -414,7 +414,7 @@ MODULES_RUN_TASK_FP script.
&RSRV_FCST;
&LOAD_MODULES_RUN_TASK_FP; "&RUN_FCST_TN;" "&JOBSDIR;/JREGIONAL_RUN_FCST"
- {%- if machine in ["JET", "HERA"] %}
+ {%- if machine in ["JET", "HERA", "LINUX"] %}
{{ ncores_run_fcst }}
{{ native_run_fcst }}
{%- else %}