Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scripts/exregional_make_ics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ case "$MACHINE" in
;;

"LINUX")
ulimit -s unlimited
APRUN=$RUN_CMD_UTILS
;;

Expand Down
1 change: 1 addition & 0 deletions scripts/exregional_make_lbcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ case "$MACHINE" in
;;

"LINUX")
ulimit -s unlimited
APRUN=$RUN_CMD_UTILS
;;

Expand Down
2 changes: 2 additions & 0 deletions scripts/exregional_make_orog.sh
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ case "$MACHINE" in

"LINUX")
APRUN=time
ulimit -s unlimited
ulimit -a
;;

*)
Expand Down
2 changes: 2 additions & 0 deletions scripts/exregional_run_fcst.sh
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ case "$MACHINE" in
;;

"LINUX")
ulimit -s unlimited
ulimit -a
APRUN=$RUN_CMD_FCST
;;

Expand Down
33 changes: 31 additions & 2 deletions ush/config_defaults.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,40 @@ RUN_ENVIR="nco"
# Set machine and queue parameters. Definitions:
#
# MACHINE:
# Machine on which the workflow will run.
# Machine on which the workflow will run. If you are NOT on a named,
# supported platform, and you want to use the Rocoto workflow manager,
# you will need set MACHINE="linux" and WORKFLOW_MANAGER="rocoto". This
# combination will assume a Slurm batch manager when generating the XML.
# Please see ush/valid_param_vals.sh for a full list of supported
# platforms.
#
# ACCOUNT:
# The account under which to submit jobs to the queue.
#
# WORKFLOW_MANAGER:
# The workflow manager to use (e.g. rocoto). This is set to "none" by
# default, but if the machine name is set to a platform that supports
# rocoto, this will be overwritten and set to "rocoto".
# rocoto, this will be overwritten and set to "rocoto". If set
# explicitly to rocoto along with the use of the MACHINE=linux target,
# the configuration layer assumes a Slurm batch manager when generating
# the XML. Valid options: "rocoto" or "none"
#
# NCORES_PER_NODE:
# The number of cores available per node on the compute platform. Set
# for supported platforms in setup.sh, but is now also configurable for
# all platforms.
#
# LMOD_PATH:
# Path to the LMOD sh file on your Linux system. Is set automatically
# for supported machines.
#
# BUILD_ENV_FN:
# Name of alternative build environment file to use if using an
# unsupported platform. Is set automatically for supported machines.
#
# WFLOW_ENV_FN:
# Name of alternative workflow environment file to use if using an
# unsupported platform. Is set automatically for supported machines.
#
# SCHED:
# The job scheduler to use (e.g. slurm). Set this to an empty string in
Expand Down Expand Up @@ -109,6 +134,10 @@ RUN_ENVIR="nco"
MACHINE="BIG_COMPUTER"
ACCOUNT="project_name"
WORKFLOW_MANAGER="none"
NCORES_PER_NODE=""
LMOD_PATH=""
BUILD_ENV_FN=""
WFLOW_ENV_FN=""
SCHED=""
PARTITION_DEFAULT=""
QUEUE_DEFAULT=""
Expand Down
54 changes: 9 additions & 45 deletions ush/launch_FV3LAM_wflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ fi
#-----------------------------------------------------------------------
#
. $exptdir/var_defns.sh
. ${USHDIR}/source_util_funcs.sh
#
#-----------------------------------------------------------------------
#
Expand All @@ -101,13 +102,7 @@ expt_name="${EXPT_SUBDIR}"
#
#-----------------------------------------------------------------------
#
if [ "$MACHINE" = "CHEYENNE" ]; then
module use -a /glade/p/ral/jntp/UFS_SRW_app/modules/
module load rocoto
elif [ "$MACHINE" = "ORION" ]; then
module purge
module load contrib rocoto
elif [ "$MACHINE" = "WCOSS_DELL_P3" ]; then
if [ "$MACHINE" = "WCOSS_DELL_P3" ]; then
module purge
module load lsf/10.1
module use /gpfs/dell3/usrx/local/dev/emc_rocoto/modulefiles/
Expand All @@ -118,8 +113,14 @@ elif [ "$MACHINE" = "WCOSS_CRAY" ]; then
module use -a /usrx/local/emc_rocoto/modulefiles
module load rocoto/1.3.0rc2
else
machine=$(echo_lowercase $MACHINE)
env_fn=${WFLOW_ENV_FN:-"wflow_${machine}.env"}
env_fp="${SR_WX_APP_TOP_DIR}/env/${env_fn}"
module purge
module load rocoto
source "${env_fp}" || print_err_msg_exit "\
Sourcing platform-specific environment file (env_fp) for
the workflow task failed :
env_fp = \"${env_fp}\""

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A user will be defining this in their own wflow env file, anyway, so have the supported platforms do the same for consistency. I do not have enough familiarity with the WCOSS machines to make changes there.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds good.

But the same comment I have below for BUILD_ENV_FN in load_modules_run_task.sh goes here for WFLOW_ENV_FN, i.e. WFLOW_ENV_FN should be (re)set as necessary in setup.sh, i.e. in setup.sh, include this:
WFLOW_EN_FN=${WFLOW_ENV_FN:-"wflow_${machine}.env"}
so that it shows the correct value in var_defns.sh. Then it can be used here instead of needing the new variable env_fn (since var_defns.sh is sourced at the top of launch_FV3LAM_wflow.sh).

fi
#
#-----------------------------------------------------------------------
Expand Down Expand Up @@ -162,35 +163,7 @@ cd "$exptdir"
#-----------------------------------------------------------------------
#

#rocotorun_output=$( ls -alF )
#echo
#echo "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
#echo "${rocotorun_output}"
#echo "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"

#rocotorun_output=$( \
#rocotorun -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10 \
#)
#rocotorun_output=$( (rocotorun -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10) 2>&1 ) # This freezes the script.
#rocotorun_output=$( (rocotorun -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10) 1>&2 ) # This leaves rocotorun_output empty.
#rocotorun_output=$( rocotorun -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10 )
#{ error=$(command 2>&1 1>&$out); } {out}>&1
#{ rocotorun_output=$( rocotorun -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10 2>&1 1>&$out); } {out}>&1 # This freezes the script.

#
# Ideally, the following two lines should work, but for some reason the
# output of rocotorun cannot be captured in a variable using the $(...)
# notation. Maybe it's not being written to stdout, although I tried
# redirecting stderr to stdout and other tricks but nothing seemed to
# work. For this reason, below we first redirect the output of rocoto-
# run to a temporary file and then read in the contents of that file in-
# to the rocotorun_output variable using the cat command.
#
#rocotorun_cmd="rocotorun -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10"
#rocotorun_output=$( eval ${rocotorun_cmd} 2>&1 )
#
Comment thread
gsketefian marked this conversation as resolved.
tmp_fn="rocotorun_output.txt"
#rocotorun_cmd="rocotorun -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10 > ${tmp_fn}"
rocotorun_cmd="rocotorun -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10"
Comment thread
gsketefian marked this conversation as resolved.
eval ${rocotorun_cmd} > ${tmp_fn} 2>&1
rocotorun_output=$( cat "${tmp_fn}" )
Expand All @@ -217,18 +190,9 @@ done <<< "${rocotorun_output}"
#
#-----------------------------------------------------------------------
#
#rocotostat_cmd="{ pwd; rocotostat -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10; }"
#rocotostat_cmd="{ pwd; ls -alF; rocotostat -w ${WFLOW_XML_FN} -d ${rocoto_database_fn} -v 10; }"
#rocotostat_cmd="{ pwd; ls -alF; rocotostat -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10; }"
#rocotostat_cmd="{ pwd; rocotostat -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10; }"
#rocotostat_cmd="{ rocotostat -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10; }"
rocotostat_cmd="rocotostat -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\" -v 10"

#rocotostat_output=$( pwd; rocotostat -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10 2>&1 )
#rocotostat_output=$( rocotostat -w "${WFLOW_XML_FN}" -d "${rocoto_database_fn}" -v 10 2>&1 )
rocotostat_output=$( eval ${rocotostat_cmd} 2>&1 )
#rocotostat_output=$( ${rocotostat_cmd} 2>&1 )
#rocotostat_output=$( { pwd; ls -alF; } 2>&1 )
error_msg="DEAD"
Comment thread
gsketefian marked this conversation as resolved.
while read -r line; do
grep_output=$( printf "$line" | grep "${error_msg}" )
Expand Down
104 changes: 33 additions & 71 deletions ush/load_modules_run_task.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,30 +71,6 @@ fi
# ..." and "module load ..." calls later below that are used to load the
# appropriate module file for the specified task.
#
# Note that the build of the FV3 forecast model code generates the shell
# script at
#
# ${UFS_WTHR_MDL_DIR}/NEMS/src/conf/module-setup.sh
#
# that can be used to initialize the Lmod (Lua-based module) system/
# software for handling modules. This script:
#
# 1) Detects the shell in which it is being invoked (i.e. the shell of
# the "parent" script in which it is being sourced).
# 2) Detects the machine it is running on and and calls the appropriate
# (shell- and machine-dependent) initalization script to initialize
# Lmod.
# 3) Purges all modules.
# 4) Uses the "module use ..." command to prepend or append paths to
# Lmod's search path (MODULEPATH).
#
# We could use this module-setup.sh script to initialize Lmod, but since
# it is only found in the forecast model's directory tree, here we pre-
# fer to perform our own initialization. Ideally, there should be one
# module-setup.sh script that is used by all external repos/codes, but
# such a script does not exist. If/when it does, we will consider
# switching to it instead of using the case-statement below.
#
Comment thread
gsketefian marked this conversation as resolved.
#-----------------------------------------------------------------------
#
print_info_msg "$VERBOSE" "
Expand Down Expand Up @@ -128,10 +104,14 @@ case "$MACHINE" in
;;
#
*)
print_err_msg_exit "\
The script to source to initialize lmod (module loads) has not yet been
specified for the current machine (MACHINE):
MACHINE = \"$MACHINE\""
if [[ -n ${LMOD_PATH:-""} && -f ${LMOD_PATH:-""} ]] ; then
. ${LMOD_PATH}
else
print_err_msg_exit "\
The script to source to initialize lmod (module loads) has not yet been
specified for the current machine (MACHINE):
MACHINE = \"$MACHINE\""
fi
;;
#
esac
Expand All @@ -147,13 +127,15 @@ jjob_fp="$2"
#
#-----------------------------------------------------------------------
#
# Sourcing ufs-srweather-app README file (in directory specified by mod-
# ules_dir) for the specified task
# Sourcing ufs-srweather-app build env file
#
#-----------------------------------------------------------------------
#

module purge

machine=$(echo_lowercase $MACHINE)
env_fn="build_${machine}_${COMPILER}.env"
env_fn=${BUILD_ENV_FN:-"build_${machine}_${COMPILER}.env"}

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry again for the late input. Since BUILD_ENV_FN now appears in config_defaults.sh, it will also appear in var_defns.sh. If it is not explicitly specified by the user in config.sh, it will be set to its default value (a null string) in var_defns.sh because currently it does not get reset to "build_${machine}_${COMPILER}.env" anywhere. That should happen in setup.sh, i.e. include this in setup.sh:

BUILD_ENV_FN=${BUILD_ENV_FN:-"build_${machine}_${COMPILER}.env"}

This should be placed somewhere before the line

cp_vrfy $USHDIR/${EXPT_DEFAULT_CONFIG_FN} ${GLOBAL_VAR_DEFNS_FP}

in setup.sh. This will cause var_defns.sh to contain the correct name of the environment file. It will also make things easier in this file since now we don't need the env_fn variable; we can just use BUILD_ENV_FN (which will be available since var_defns.sh is sourced at the top of this file).

env_fp="${SR_WX_APP_TOP_DIR}/env/${env_fn}"
source "${env_fp}" || print_err_msg_exit "\
Sourcing platform- and compiler-specific environment file (env_fp) for the
Expand All @@ -172,72 +154,52 @@ workflow task specified by task_name failed:
# sets environment variables (including prepending/appending to paths)
# and loads modules.
#
# The regional_workflow repository contains module files for all the
# The regional_workflow repository contains module files for the
# workflow tasks in the template rocoto XML file for the FV3-LAM work-
# flow. The full path to a module file for a given task is
# flow that need modules not loaded in the env_fn above.
#
# The full path to a module file for a given task is
#
# $HOMErrfs/modulefiles/$machine/${task_name}
# $HOMErrfs/modulefiles/$machine/${task_name}.local
#
# where HOMErrfs is the base directory of the workflow, machine is the
# name of the machine that we're running on (in lowercase), and task_-
# name is the name of the current task (an input to this script). The
# collection of modulefiles is staged by the generate_workflow.sh
# script. Please see that script for information on their creation.
# name is the name of the current task (an input to this script).
#
#-----------------------------------------------------------------------
#
modules_dir="$HOMErrfs/modulefiles/tasks/$machine"
modulefile_name="${task_name}"
default_modules_dir="$HOMErrfs/modulefiles"
default_modulefile_name="${machine}.default"
use_default_modulefile=0
#
#-----------------------------------------------------------------------
#
# Load the module file for the specified task on the current machine.
#
#-----------------------------------------------------------------------
#
print_info_msg "$VERBOSE" "

print_info_msg "$VERBOSE" "
Loading modules for task \"${task_name}\" ..."

module use "${modules_dir}" || print_err_msg_exit "\
module use "${modules_dir}" || print_err_msg_exit "\
Call to \"module use\" command failed."

#
# If NOT using the default modulefile...
#
# if [ ${use_default_modulefile} -eq 0 ]; then
#
# module use -a "${modules_dir}" || print_err_msg_exit "\
#Call to \"module use\" command failed."
#
#
# Load the .local module file if available for the given task
#
modulefile_local="${task_name}.local"
if [ -f ${modules_dir}/${modulefile_local} ]; then
module load "${modulefile_local}" || print_err_msg_exit "\
Loading .local module file (in directory specified by mod-
ules_dir) for the specified task (task_name) failed:
task_name = \"${task_name}\"
modulefile_local = \"${modulefile_local}\"
modules_dir = \"${modules_dir}\""
fi

# else # using default modulefile
#
# module load "${default_modulefile_name}" || print_err_msg_exit "\
#Loading of default module file failed:
# task_name = \"${task_name}\"
# default_modulefile_name = \"${default_modulefile_name}\"
# default_modules_dir = \"${default_modules_dir}\""
Comment thread
gsketefian marked this conversation as resolved.
# Load the .local module file if available for the given task
#
# fi
modulefile_local="${task_name}.local"
if [ -f ${modules_dir}/${modulefile_local} ]; then
module load "${modulefile_local}" || print_err_msg_exit "\
Loading .local module file (in directory specified by mod-
ules_dir) for the specified task (task_name) failed:
task_name = \"${task_name}\"
modulefile_local = \"${modulefile_local}\"
modules_dir = \"${modules_dir}\""
fi

module list
module list

#fi #End if statement for tasks that load no modules

# Modules that use conda and need an environment activated will set the
# SRW_ENV variable to the name of the environment to be activated. That
Expand Down
Loading