diff --git a/.gitignore b/.gitignore index ec3b63bd0ca..eef0d9c7fb0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__ *.[aox] *.mod *.sw[a-p] +.idea/ # Ignore folders #------------------- diff --git a/parm/config/config.base.emc.dyn b/parm/config/config.base.emc.dyn index f44f4ecee39..c48260abdd1 100755 --- a/parm/config/config.base.emc.dyn +++ b/parm/config/config.base.emc.dyn @@ -159,6 +159,7 @@ export DO_ICE="NO" export DO_AERO="NO" export CCPP_SUITE="FV3_GFS_v17_p8" export WAVE_CDUMP="" # When to include wave suite: gdas, gfs, or both +export DOBNDPNT_WAVE="NO" export cplwav2atm=".false." case "${APP}" in @@ -190,7 +191,7 @@ case "${APP}" in if [[ "$APP" =~ ^S2SW ]]; then export DO_WAVE="YES" - export WAVE_CDUMP="both" + export WAVE_CDUMP="both" export cplwav2atm=".true." export confignamevarfornems="${confignamevarfornems}_wave" fi diff --git a/parm/config/config.base.nco.static b/parm/config/config.base.nco.static index 432e95cc327..4980ecf7527 100755 --- a/parm/config/config.base.nco.static +++ b/parm/config/config.base.nco.static @@ -161,7 +161,7 @@ export FHMAX_HF_GFS=120 export FHOUT_HF_GFS=1 export ILPOST=1 # gempak output frequency up to F120 -# frequency for saving restart files. set to 6,12,24,48 etc +# frequency for saving restart files. set to 6,12,24,48 etc export restart_interval_gfs=12 # I/O QUILTING, true--use Write Component; false--use GFDL FMS @@ -209,6 +209,7 @@ export gldas_cyc=00 # run wave component export DO_WAVE=YES export WAVE_CDUMP="both" +export DOBNDPNT_WAVE="YES" # Microphysics Options: 99-ZhaoCarr, 8-Thompson; 6-WSM6, 10-MG, 11-GFDL export imp_physics=8 diff --git a/parm/config/config.fv3 b/parm/config/config.fv3 index e7f521440c1..8c4b74ee87f 100755 --- a/parm/config/config.fv3 +++ b/parm/config/config.fv3 @@ -41,7 +41,7 @@ fi # (Standard) Model resolution dependent variables case $case_in in "C48") - export DELTIM=1200 + export DELTIM=450 export layout_x=3 export layout_y=2 export layout_x_gfs=3 diff --git a/parm/config/config.resources b/parm/config/config.resources index 109309bc4ff..47f9ee5bca4 100755 --- a/parm/config/config.resources +++ b/parm/config/config.resources @@ -184,12 +184,12 @@ elif [ $step = "sfcanl" ]; then elif [ $step = "gldas" ]; then export wtime_gldas="00:10:00" - export npe_gldas=96 - export nth_gldas=1 + export npe_gldas=96 + export nth_gldas=1 export npe_node_gldas=$npe_node_max - export npe_gaussian=96 - export nth_gaussian=1 - export npe_node_gaussian=24 + export npe_gaussian=96 + export nth_gaussian=1 + export npe_node_gaussian=24 if [[ "$machine" = "WCOSS_DELL_P3" ]]; then export npe_gldas=112 ; fi if [[ "$machine" == "WCOSS_C" ]]; then export memory_gldas="3072M"; fi @@ -360,13 +360,13 @@ elif [ $step = "vrfy" ]; then export npe_vrfy_gfs=1 export npe_node_vrfy_gfs=1 if [[ "$machine" == "WCOSS_C" ]]; then - export memory_vrfy="3072M" + export memory_vrfy="3072M" elif [[ "$machine" == "HERA" ]]; then - export memory_vrfy="16384M" + export memory_vrfy="16384M" fi elif [ $step = "metp" ]; then - + export nth_metp=1 export wtime_metp="03:00:00" export npe_metp=4 @@ -375,9 +375,9 @@ elif [ $step = "metp" ]; then export npe_metp_gfs=4 export npe_node_metp_gfs=4 if [[ "$machine" == "WCOSS_C" ]]; then - export memory_metp="3072M" + export memory_metp="3072M" elif [[ "$machine" == "THEIA" ]]; then - export memory_metp="16384M" + export memory_metp="16384M" fi elif [ $step = "echgres" ]; then @@ -394,9 +394,9 @@ elif [ $step = "init" ]; then export nth_init=1 export npe_node_init=6 if [ $machine = "WCOSS_DELL_P3" ]; then - export memory_init="10G" + export memory_init="10G" else - export memory_init="70G" + export memory_init="70G" fi elif [ $step = "init_chem" ]; then @@ -428,7 +428,7 @@ elif [ $step = "coupled_ic" ]; then elif [ $step = "eobs" -o $step = "eomg" ]; then - export wtime_eobs="00:15:00" + export wtime_eobs="00:45:00" export wtime_eomg="01:00:00" if [ $CASE = "C768" ]; then export npe_eobs=200 @@ -439,10 +439,16 @@ elif [ $step = "eobs" -o $step = "eomg" ]; then elif [ $CASE = "C96" -o $CASE = "C48" ]; then export npe_eobs=20 fi + export npe_eomg=$npe_eobs export nth_eobs=2 if [[ "$machine" = "WCOSS_DELL_P3" ]]; then export nth_eobs=7; fi + export nth_eomg=$nth_eobs export npe_node_eobs=$(echo "$npe_node_max / $nth_eobs" | bc) - if [[ "$machine" == "WCOSS_C" ]]; then export memory_eobs="3072M"; fi + export npe_node_eomg=$npe_node_eobs + if [[ "$machine" == "WCOSS_C" ]]; then + export memory_eobs="3072M" + export memory_eomg=$memory_eobs + fi elif [ $step = "ediag" ]; then diff --git a/parm/config/config.wave b/parm/config/config.wave index 40809b30c7e..a3b06a8041b 100755 --- a/parm/config/config.wave +++ b/parm/config/config.wave @@ -30,7 +30,6 @@ export waveGRD=${waveGRD:-'gnh_10m aoc_9km gsh_15m'} export waveGRDN=${waveGRDN:-'1 2 3'} # gridnumber for ww3_multi export waveGRDG=${waveGRDG:-'10 20 30'} # gridgroup for ww3_multi export USE_WAV_RMP=${USE_WAV_RMP:-'YES'} #yes/no rmp grid remapping pre-processed coefficients -export DOBNDPNT_WAVE=${DOBNDPNT_WAVE:-'YES'} export waveMULTIGRID=${waveMULTIGRID:-'.true.'} export MESH_WAV=${MESH_WAV:-'mesh.gwes_30m.nc'} @@ -84,20 +83,20 @@ fi # Restart timing business export RSTTYPE_WAV='T' # generate second tier of restart files -if [ "${CDUMP}" != gfs ]; then # Setting is valid for GDAS and GEFS +if [ "${CDUMP}" != gfs ]; then # Setting is valid for GDAS and GEFS export DT_1_RST_WAV=10800 # time between restart files, set to DTRST=1 for a single restart file export DT_2_RST_WAV=43200 # restart stride for checkpointing restart export RSTIOFF_WAV=0 # first restart file offset relative to model start else # This is a GFS run rst_dt_gfs=$(( restart_interval_gfs * 3600 )) - if [ $rst_dt_gfs -gt 0 ]; then + if [ $rst_dt_gfs -gt 0 ]; then export DT_1_RST_WAV=${rst_dt_gfs:-0} # time between restart files, set to DTRST=1 for a single restart file export DT_2_RST_WAV=${rst_dt_gfs:-0} # restart stride for checkpointing restart - else + else rst_dt_fhmax=$(( FHMAX_WAV * 3600 )) export DT_1_RST_WAV=0 # time between restart files, set to DTRST=1 for a single restart file - export DT_2_RST_WAV=${rst_dt_fhmax:-0} # use checkpoint restart file name for creating restart at end of run - fi + export DT_2_RST_WAV=${rst_dt_fhmax:-0} # use checkpoint restart file name for creating restart at end of run + fi export RSTIOFF_WAV=0 # first restart file offset relative to model start fi # diff --git a/ush/rocoto/applications.py b/ush/rocoto/applications.py new file mode 100644 index 00000000000..e91475e36f9 --- /dev/null +++ b/ush/rocoto/applications.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python3 + +from typing import Dict, Any +from datetime import timedelta +from configuration import Configuration +from hosts import Host + +__all__ = ['AppConfig'] + + +def get_gfs_interval(gfs_cyc: int) -> str: + """ + return interval in hours based on gfs_cyc + """ + + gfs_internal_map = {'0': None, '1': '24:00:00', '2': '12:00:00', '4': '06:00:00'} + + try: + return gfs_internal_map[str(gfs_cyc)] + except KeyError: + raise KeyError(f'Invalid gfs_cyc = {gfs_cyc}') + + +def get_gfs_cyc_dates(base: Dict[str, Any]) -> Dict[str, Any]: + """ + Generate GFS dates from experiment dates and gfs_cyc choice + """ + + base_out = base.copy() + + gfs_cyc = base['gfs_cyc'] + sdate = base['SDATE'] + edate = base['EDATE'] + base_out['INTERVAL'] = '06:00:00' # Cycled interval is 6 hours + + interval_gfs = get_gfs_interval(gfs_cyc) + + # Set GFS cycling dates + hrinc = 0 + hrdet = 0 + if gfs_cyc == 0: + return base_out + elif gfs_cyc == 1: + hrinc = 24 - sdate.hour + hrdet = edate.hour + elif gfs_cyc == 2: + if sdate.hour in [0, 12]: + hrinc = 12 + elif sdate.hour in [6, 18]: + hrinc = 6 + if edate.hour in [6, 18]: + hrdet = 6 + elif gfs_cyc == 4: + hrinc = 6 + sdate_gfs = sdate + timedelta(hours=hrinc) + edate_gfs = edate - timedelta(hours=hrdet) + if sdate_gfs > edate: + print('W A R N I N G!') + print('Starting date for GFS cycles is after Ending date of experiment') + print(f'SDATE = {sdate.strftime("%Y%m%d%H")}, EDATE = {edate.strftime("%Y%m%d%H")}') + print(f'SDATE_GFS = {sdate_gfs.strftime("%Y%m%d%H")}, EDATE_GFS = {edate_gfs.strftime("%Y%m%d%H")}') + gfs_cyc = 0 + + base_out['gfs_cyc'] = gfs_cyc + base_out['SDATE_GFS'] = sdate_gfs + base_out['EDATE_GFS'] = edate_gfs + base_out['INTERVAL_GFS'] = interval_gfs + + fhmax_gfs = {} + for hh in ['00', '06', '12', '18']: + fhmax_gfs[hh] = base.get(f'FHMAX_GFS_{hh}', base.get('FHMAX_GFS_00', 120)) + base_out['FHMAX_GFS'] = fhmax_gfs + + return base_out + + +class AppConfig: + + VALID_MODES = ['cycled', 'forecast-only'] + + def __init__(self, configuration: Configuration) -> None: + + self.scheduler = Host().scheduler + + _base = configuration.parse_config('config.base') + + self.mode = _base['MODE'] + + if self.mode not in self.VALID_MODES: + raise NotImplementedError(f'{self.mode} is not a valid application mode.\n' + + 'Valid application modes are:\n' + + f'{", ".join(self.VALID_MODES)}') + + self.model_app = _base.get('APP', 'ATM') + self.do_hybvar = _base.get('DOHYBVAR', False) + self.do_wave = _base.get('DO_WAVE', False) + self.do_wave_bnd = _base.get('DOBNDPNT_WAVE', False) + self.do_ocean = _base.get('DO_OCN', False) + self.do_ice = _base.get('DO_ICE', False) + self.do_aero = _base.get('DO_AERO', False) + self.do_gldas = _base.get('DO_GLDAS', False) + self.do_bufrsnd = _base.get('DO_BUFRSND', False) + self.do_gempak = _base.get('DO_GEMPAK', False) + self.do_awips = _base.get('DO_AWIPS', False) + self.do_wafs = _base.get('DO_WAFS', False) + self.do_vrfy = _base.get('DO_VRFY', True) + self.do_metp = _base.get('DO_METP', False) + + self.do_hpssarch = _base.get('HPSSARCH', False) + + self.wave_cdumps = None + if self.do_wave: + wave_cdump = _base.get('WAVE_CDUMP', 'BOTH').lower() + if wave_cdump in ['both']: + self.wave_cdumps = ['gfs', 'gdas'] + elif wave_cdump in ['gfs', 'gdas']: + self.wave_cdumps = [wave_cdump] + + self.lobsdiag_forenkf = False + self.eupd_cdumps = None + if self.do_hybvar: + self.lobsdiag_forenkf = _base.get('lobsdiag_forenkf', False) + eupd_cdump = _base.get('EUPD_CYC', 'gdas').lower() + if eupd_cdump in ['both']: + self.eupd_cdumps = ['gfs', 'gdas'] + elif eupd_cdump in ['gfs', 'gdas']: + self.eupd_cdumps = [eupd_cdump] + + # Get a list of all possible config_files that would be part of the application + self.configs_names = self._get_app_configs() + + # Source the config_files for the jobs in the application + self.configs = self._source_configs(configuration) + + # Update the base config dictionary based on application + upd_base_map = {'cycled': self._cycled_upd_base, + 'forecast-only': self._forecast_only_upd_base} + try: + self.configs['base'] = upd_base_map[self.mode](self.configs['base']) + except KeyError: + raise NotImplementedError(f'{self.mode} is not a valid application mode.\n' + + 'Valid application modes are:\n' + + f'{", ".join(self.VALID_MODES)}') + + # Save base in the internal state since it is often needed + self._base = self.configs['base'] + + # Get more configuration options into the class attributes + self.gfs_cyc = self._base.get('gfs_cyc') + + # Finally get task names for the application + self.task_names = self.get_task_names() + + def _get_app_configs(self): + + configs_map = {'cycled': self._cycled_configs, + 'forecast-only': self._forecast_only_configs} + try: + configs_names = configs_map[self.mode] + except KeyError: + raise NotImplementedError(f'{self.mode} is not a valid application mode.\n' + + 'Valid application modes are:\n' + + f'{", ".join(self.VALID_MODES)}') + + return configs_names + + @property + def _cycled_configs(self): + """ + Returns the config_files that are involved in the cycled app + """ + + configs = ['prep', + 'anal', 'sfcanl', 'analdiag', 'analcalc', + 'fcst', 'post', 'vrfy', 'arch'] + + if self.do_gldas: + configs += ['gldas'] + + if self.do_hybvar: + configs += ['eobs', 'eomg', 'ediag', 'eupd', 'ecen', 'esfc', 'efcs', 'echgres', 'epos', 'earc'] + + if self.do_metp: + configs += ['metp'] + + if self.do_gempak: + configs += ['gempak'] + + if self.do_awips: + configs += ['awips'] + + if self.do_wave: + configs += ['waveinit', 'waveprep', 'wavepostsbs', 'wavepostpnt'] + if self.do_wave_bnd: + configs += ['wavepostbndpnt', 'wavepostbndpntbll'] + if self.do_gempak: + configs += ['wavegempak'] + if self.do_awips: + configs += ['waveawipsbulls', 'waveawipsgridded'] + + if self.do_wafs: + configs += ['wafs', 'wafsgrib2', 'wafsblending', 'wafsgcip', 'wafsgrib20p25', 'wafsblending0p25'] + + return configs + + @property + def _forecast_only_configs(self): + """ + Returns the config_files that are involved in the forecast-only app + """ + + configs = ['fcst', 'post', 'vrfy', 'arch'] + + if self.model_app in ['S2S', 'S2SW', 'S2SWA']: + configs += ['coupled_ic'] + else: + configs += ['init'] + if self.do_hpssarch: + configs += ['getic'] + + if self.do_aero: + configs += ['aerosol_init'] + + if self.do_ocean or self.do_ice: + configs += ['ocnpost'] + + if self.do_metp: + configs += ['metp'] + + if self.do_gempak: + configs += ['gempak'] + + if self.do_awips: + configs += ['awips'] + + if self.do_wave: + configs += ['waveinit', 'waveprep', 'wavepostsbs', 'wavepostpnt'] + if self.do_wave_bnd: + configs += ['wavepostbndpnt', 'wavepostbndpntbll'] + if self.do_gempak: + configs += ['wavegempak'] + if self.do_awips: + configs += ['waveawipsbulls', 'waveawipsgridded'] + + if self.do_wafs: + configs += ['wafs', 'wafsgrib2', 'wafsblending', 'wafsgcip', 'wafsgrib20p25', 'wafsblending0p25'] + + return configs + + @staticmethod + def _cycled_upd_base(base_in): + + return get_gfs_cyc_dates(base_in) + + @staticmethod + def _forecast_only_upd_base(base_in): + + base_out = base_in.copy() + base_out['INTERVAL_GFS'] = get_gfs_interval(base_in['gfs_cyc']) + base_out['CDUMP'] = 'gfs' + + return base_out + + def _source_configs(self, configuration: Configuration) -> Dict[str, Any]: + """ + Given the configuration object and jobs, + source the configurations for each config and return a dictionary + Every config depends on "config.base" + """ + + configs = dict() + + # Return config.base as well + configs['base'] = configuration.parse_config('config.base') + + # Source the list of all config_files involved in the application + for config in self.configs_names: + + # All must source config.base first + files = ['config.base'] + + if config in ['eobs', 'eomg']: + files += ['config.anal', 'config.eobs'] + elif config in ['eupd']: + files += ['config.anal', 'config.eupd'] + elif config in ['efcs']: + files += ['config.fcst', 'config.efcs'] + elif 'wave' in config: + files += ['config.wave', f'config.{config}'] + else: + files += [f'config.{config}'] + + print(f'sourcing config.{config}') + configs[config] = configuration.parse_config(files) + + return configs + + def get_task_names(self): + + # Get a list of all possible tasks that would be part of the application + tasks_map = {'cycled': self._get_cycled_task_names, + 'forecast-only': self._get_forecast_only_task_names} + try: + task_names = tasks_map[self.mode]() + except KeyError: + raise NotImplementedError(f'{self.mode} is not a valid application mode.\n' + + 'Valid application modes are:\n' + + f'{", ".join(self.VALID_MODES)}') + + return task_names + + def _get_cycled_task_names(self): + """ + Get the task names for all the tasks in the cycled application. + Note that the order of the task names matters in the XML. + This is the place where that order is set. + """ + + gdas_gfs_common_tasks_before_fcst = ['prep', 'anal', 'sfcanl', 'analcalc'] + gdas_gfs_common_tasks_after_fcst = ['post', 'vrfy'] + gdas_gfs_common_cleanup_tasks = ['arch'] + + gldas_tasks = ['gldas'] + wave_prep_tasks = ['waveinit', 'waveprep'] + wave_bndpnt_tasks = ['wavepostbndpnt', 'wavepostbndpntbll'] + wave_post_tasks = ['wavepostsbs', 'wavepostpnt'] + + hybrid_gdas_or_gfs_tasks = [] + hybrid_gdas_tasks = [] + if self.do_hybvar: + hybrid_gdas_or_gfs_tasks += ['eobs', 'eupd', 'echgres'] + hybrid_gdas_or_gfs_tasks += ['ediag'] if self.lobsdiag_forenkf else ['eomg'] + hybrid_gdas_tasks += ['ecen', 'esfc', 'efcs', 'epos', 'earc'] + + # Collect all "gdas" cycle tasks + gdas_tasks = gdas_gfs_common_tasks_before_fcst + ['analdiag'] + + if self.do_gldas: + gdas_tasks += gldas_tasks + + if self.do_wave and 'gdas' in self.wave_cdumps: + gdas_tasks += wave_prep_tasks + + gdas_tasks += ['fcst'] + + gdas_tasks += gdas_gfs_common_tasks_after_fcst + + if self.do_hybvar: + if 'gdas' in self.eupd_cdumps: + gdas_tasks += hybrid_gdas_or_gfs_tasks + gdas_tasks += hybrid_gdas_tasks + + if self.do_wave and 'gdas' in self.wave_cdumps: + if self.do_wave_bnd: + gdas_tasks += wave_bndpnt_tasks + gdas_tasks += wave_post_tasks + + gdas_tasks += gdas_gfs_common_cleanup_tasks + + # Collect "gfs" cycle tasks + gfs_tasks = gdas_gfs_common_tasks_before_fcst + + if self.do_wave and 'gfs' in self.wave_cdumps: + gfs_tasks += wave_prep_tasks + + gfs_tasks += ['fcst'] + + gfs_tasks += gdas_gfs_common_tasks_after_fcst + + if self.do_metp: + gfs_tasks += ['metp'] + + if self.do_hybvar and 'gfs' in self.eupd_cdumps: + gfs_tasks += hybrid_gdas_or_gfs_tasks + + if self.do_wave and 'gfs' in self.wave_cdumps: + if self.do_wave_bnd: + gfs_tasks += wave_bndpnt_tasks + gfs_tasks += wave_post_tasks + if self.do_gempak: + gfs_tasks += ['wavegempak'] + if self.do_awips: + gfs_tasks += ['waveawipsbulls', 'waveawipsgridded'] + + if self.do_bufrsnd: + gfs_tasks += ['postsnd'] + + if self.do_gempak: + gfs_tasks += ['gempak'] + + if self.do_awips: + gfs_tasks += ['awips'] + + if self.do_wafs: + gfs_tasks += ['wafs', 'wafsgcip', 'wafsgrib2', 'wafsgrib20p25', 'wafsblending', 'wafsblending0p25'] + + gfs_tasks += gdas_gfs_common_cleanup_tasks + + tasks = {'gdas': gdas_tasks, 'gfs': gfs_tasks} + + return tasks + + def _get_forecast_only_task_names(self): + """ + Get the task names for all the tasks in the forecast-only application. + Note that the order of the task names matters in the XML. + This is the place where that order is set. + """ + + tasks = [] + + if 'S2S' in self.model_app: + tasks += ['coupled_ic'] + else: + if self.do_hpssarch: + tasks += ['getic'] + tasks += ['init'] + + if self.do_aero: + tasks += ['aerosol_init'] + + if self.do_wave: + tasks += ['waveinit'] + # tasks += ['waveprep'] # TODO - verify if waveprep is executed in forecast-only mode when APP=ATMW|S2SW + + tasks += ['fcst'] + + tasks += ['post'] + if 'S2S' in self.model_app: + tasks += ['ocnpost'] + + tasks += ['vrfy'] + if self.do_metp: + tasks += ['metp'] + + if self.do_wave: + if self.do_wave_bnd: + tasks += ['wavepostbndpnt', 'wavepostbndpntbll'] + tasks += ['wavepostsbs', 'wavepostpnt'] + if self.do_gempak: + tasks += ['wavegempak'] + if self.do_awips: + tasks += ['waveawipsbulls', 'waveawipsgridded'] + + if self.do_bufrsnd: + tasks += ['postsnd'] + + if self.do_gempak: + tasks += ['gempak'] + + if self.do_awips: + tasks += ['awips'] + + if self.do_wafs: + tasks += ['wafs', 'wafsgcip', 'wafsgrib2', 'wafsgrib20p25', 'wafsblending', 'wafsblending0p25'] + + tasks += ['arch'] # arch **must** be the last task + + return {f"{self._base['CDUMP']}": tasks} diff --git a/ush/rocoto/configuration.py b/ush/rocoto/configuration.py new file mode 100644 index 00000000000..ba027d47fb2 --- /dev/null +++ b/ush/rocoto/configuration.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 + +import os +import random +import glob +import subprocess +from pprint import pprint +from datetime import datetime +from pathlib import Path +from typing import Union, List, Dict, Any + + +__all__ = ['Configuration'] + + +class ShellScriptException(Exception): + def __init__(self, scripts, errors): + self.scripts = scripts + self.errors = errors + super(ShellScriptException, self).__init__( + str(errors) + + ': error processing' + + (' '.join(scripts))) + + +class UnknownConfigError(Exception): + pass + + +class Configuration: + """ + Configuration parser for the global-workflow + (or generally for sourcing a shell script into a python dictionary) + """ + + DATE_ENV_VARS = ['CDATE', 'SDATE', 'EDATE'] + TRUTHS = ['y', 'yes', 't', 'true', '.t.', '.true.'] + BOOLS = ['n', 'no', 'f', 'false', '.f.', '.false.'] + TRUTHS + BOOLS = [x.upper() for x in BOOLS] + BOOLS + + def __init__(self, config_dir: Union[str, Path]): + """ + Given a directory containing config files (config.XYZ), + return a list of config_files minus the ones ending with ".default" + """ + + self.config_dir = config_dir + self.config_files = self._get_configs + + @property + def _get_configs(self) -> List[str]: + """ + Given a directory containing config files (config.XYZ), + return a list of config_files minus the ones ending with ".default" + """ + result = list() + for config in glob.glob(f'{self.config_dir}/config.*'): + if not config.endswith('.default'): + result.append(config) + + return result + + def find_config(self, config_name: str) -> str: + """ + Given a config file name, find the full path of the config file + """ + + for config in self.config_files: + if config_name == os.path.basename(config): + return config + + raise UnknownConfigError( + f'{config_name} does not exist (known: {repr(config_name)}), ABORT!') + + def parse_config(self, files: Union[str, bytes, list]) -> Dict[str, Any]: + """ + Given the name of config file(s), key-value pair of all variables in the config file(s) + are returned as a dictionary + :param files: config file or list of config files + :type files: list or str or unicode + :return: Key value pairs representing the environment variables defined + in the script. + :rtype: dict + """ + if isinstance(files, (str, bytes)): + files = [files] + files = [self.find_config(file) for file in files] + varbles = dict() + for key, value in self._get_script_env(files).items(): + if key in self.DATE_ENV_VARS: # likely a date, convert to datetime + varbles[key] = datetime.strptime(value, '%Y%m%d%H') + elif value in self.BOOLS: # Likely a boolean, convert to True/False + varbles[key] = self._true_or_not(value) + elif '.' in value: # Likely a number and that too a float + varbles[key] = self._cast_or_not(float, value) + else: # Still could be a number, may be an integer + varbles[key] = self._cast_or_not(int, value) + + return varbles + + def print_config(self, files: Union[str, bytes, list]) -> None: + """ + Given the name of config file(s), key-value pair of all variables in the config file(s) are printed + Same signature as parse_config + :param files: config file or list of config files + :type files: list or str or unicode + :return: None + """ + config = self.parse_config(files) + pprint(config, width=4) + + @classmethod + def _get_script_env(cls, scripts: List) -> Dict[str, Any]: + default_env = cls._get_shell_env([]) + and_script_env = cls._get_shell_env(scripts) + vars_just_in_script = set(and_script_env) - set(default_env) + union_env = dict(default_env) + union_env.update(and_script_env) + return dict([(v, union_env[v]) for v in vars_just_in_script]) + + @staticmethod + def _get_shell_env(scripts: List) -> Dict[str, Any]: + varbls = dict() + runme = ''.join([f'source {s} ; ' for s in scripts]) + magic = f'--- ENVIRONMENT BEGIN {random.randint(0,64**5)} ---' + runme += f'/bin/echo -n "{magic}" ; /usr/bin/env -0' + with open('/dev/null', 'w') as null: + env = subprocess.Popen(runme, shell=True, stdin=null.fileno(), + stdout=subprocess.PIPE) + (out, err) = env.communicate() + out = out.decode() + begin = out.find(magic) + if begin < 0: + raise ShellScriptException(scripts, 'Cannot find magic string; ' + 'at least one script failed: '+repr(out)) + for entry in out[begin+len(magic):].split('\x00'): + iequal = entry.find('=') + varbls[entry[0:iequal]] = entry[iequal+1:] + return varbls + + @staticmethod + def _cast_or_not(type, value): + try: + return type(value) + except ValueError: + return value + + @staticmethod + def _true_or_not(value): + try: + return value.lower() in Configuration.TRUTHS + except AttributeError: + return value diff --git a/ush/rocoto/fv3gfs_workflow.sh b/ush/rocoto/fv3gfs_workflow.sh deleted file mode 100755 index 7331b646f77..00000000000 --- a/ush/rocoto/fv3gfs_workflow.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/sh - -# Checkout, build, setup and execute the workflow - -set -ex - -fv3gfs_tag="https://svnemc.ncep.noaa.gov/projects/fv3gfs/trunk" - -pslot="fv3test" -expdir="/path/to/expdir" -comrot="/path/to/comrot" -fv3gfs="/path/to/fv3gfs_tag/checkout" -idate="2017073118" -edate="2017080112" - -###################################### -# USER NEED NOT MODIFY BELOW THIS LINE -###################################### - -if [ -d /gpfs/hps3 ]; then - machine="cray" - icsdir="/gpfs/hps3/emc/global/noscrub/emc.glopara/ICS" -else - echo "Unknown machine $machine, ABORT!" - exit -1 -fi - -[[ -d $expdir/$pslot ]] && rm -rf $expdir/$pslot -[[ -d $comrot/$pslot ]] && rm -rf $comrot/$pslot -[[ -d $fv3gfs/$pslot ]] && rm -rf $fv3gfs/$pslot - -gfs_ver=v15.0.0 -mkdir -p $fv3gfs -cd $fv3gfs -git clone --recursive gerrit:fv3gfs gfs.${gfs_ver} - -cd $fv3gfs/gfs.${gfs_ver}/sorc -sh checkout.sh -sh build_all.sh $machine -sh link_fv3gfs.sh emc $machine - -cd $fv3gfs/gfs.${gfs_ver}/ush/rocoto -python setup_expt.py --pslot $pslot --comrot $comrot --expdir $expdir --idate $idate --edate $edate --icsdir $icsdir --configdir ../parm/config -python setup_workflow.py --expdir $expdir/$pslot - -cd $expdir/$pslot -crontab $pslot.crontab - -exit diff --git a/ush/rocoto/hosts.py b/ush/rocoto/hosts.py new file mode 100644 index 00000000000..c54112dce10 --- /dev/null +++ b/ush/rocoto/hosts.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +import os +from pathlib import Path +from yaml import load +try: + from yaml import CLoader as Loader +except ImportError: + from yaml import Loader + + +__all__ = ['Host'] + + +def load_yaml(_path: Path): + with open(_path, "r") as _file: + yaml_dict = load(_file, Loader=Loader) + return yaml_dict + + +class Host: + """ + Gather Host specific information. + """ + + SUPPORTED_HOSTS = ['HERA', 'ORION', 'JET', + 'WCOSS_DELL_P3', 'WCOSS2'] + + def __init__(self, host=None): + + detected_host = self.detect() + + if host is not None and host != detected_host: + raise ValueError(f'detected host: "{detected_host}" does not match host: "{host}"') + + self.machine = detected_host + self.info = self._get_info + self.scheduler = self.info['scheduler'] + + @classmethod + def detect(cls): + + machine = 'NOTFOUND' + + if os.path.exists('/scratch1/NCEPDEV'): + machine = 'HERA' + elif os.path.exists('/work/noaa'): + machine = 'ORION' + elif os.path.exists('/lfs4/HFIP'): + machine = 'JET' + elif os.path.exists('/gpfs') and os.path.exists('/etc/SuSE-release'): + machine = 'WCOSS_C' + elif os.path.exists('/gpfs/dell2'): + machine = 'WCOSS_DELL_P3' + elif os.path.exists('/lfs/f1'): + machine = 'WCOSS2' + + if machine not in Host.SUPPORTED_HOSTS: + raise NotImplementedError(f'This machine is not a supported host.\n' + + 'Currently supported hosts are:\n' + + f'{" | ".join(Host.SUPPORTED_HOSTS)}') + + return machine + + @property + def _get_info(self) -> dict: + + hostfile = Path(os.path.join(os.path.dirname(__file__), f'hosts/{self.machine.lower()}.yaml')) + try: + info = load_yaml(hostfile) + except FileNotFoundError: + raise FileNotFoundError(f'{hostfile} does not exist!') + except IOError: + raise IOError(f'Unable to read from {hostfile}') + except Exception: + raise Exception(f'unable to get information for {self.machine}') + + return info diff --git a/ush/rocoto/hosts/hera.yaml b/ush/rocoto/hosts/hera.yaml new file mode 100644 index 00000000000..84debf1b9aa --- /dev/null +++ b/ush/rocoto/hosts/hera.yaml @@ -0,0 +1,19 @@ +base_git: '/scratch1/NCEPDEV/global/glopara/git' +base_svn: '/scratch1/NCEPDEV/global/glopara/svn' +dmpdir: '/scratch1/NCEPDEV/global/glopara/dump' +nwprod: '/scratch1/NCEPDEV/global/glopara/nwpara' +comroot: '/scratch1/NCEPDEV/global/glopara/com' +homedir: '/scratch1/NCEPDEV/global/$USER' +stmp: '/scratch1/NCEPDEV/stmp2/$USER' +ptmp: '/scratch1/NCEPDEV/stmp4/$USER' +noscrub: $HOMEDIR +account: fv3-cpu +scheduler: slurm +queue: batch +queue_service: service +partition_batch: hera +chgrp_rstprod: 'YES' +chgrp_cmd: 'chgrp rstprod' +hpssarch: 'YES' +localarch: 'NO' +atardir: '/NCEPDEV/$HPSS_PROJECT/1year/$USER/$machine/scratch/$PSLOT' diff --git a/ush/rocoto/hosts/orion.yaml b/ush/rocoto/hosts/orion.yaml new file mode 100644 index 00000000000..bf59ea3e1d6 --- /dev/null +++ b/ush/rocoto/hosts/orion.yaml @@ -0,0 +1,19 @@ +base_git: '/work/noaa/global/glopara/git' +base_svn: '/work/noaa/global/glopara/svn' +dmpdir: '/work/noaa/rstprod/dump' +nwprod: '/work/noaa/global/glopara/nwpara' +comroot: '/work/noaa/global/glopara/com' +homedir: '/work/noaa/global/$USER' +stmp: '/work/noaa/stmp/$USER' +ptmp: '/work/noaa/stmp/$USER' +noscrub: $HOMEDIR +scheduler: slurm +account: fv3-cpu +queue: batch +queue_service: service +partition_batch: orion +chgrp_rstprod: 'YES' +chgrp_cmd: 'chgrp rstprod' +hpssarch: 'NO' +localarch: 'NO' +atardir: '$NOSCRUB/archive_rotdir/$PSLOT' \ No newline at end of file diff --git a/ush/rocoto/hosts/wcoss_dell_p3.yaml b/ush/rocoto/hosts/wcoss_dell_p3.yaml new file mode 100644 index 00000000000..daa9595c310 --- /dev/null +++ b/ush/rocoto/hosts/wcoss_dell_p3.yaml @@ -0,0 +1,19 @@ +base_git: '/gpfs/dell2/emc/modeling/noscrub/emc.glopara/git' +base_svn: '/gpfs/dell2/emc/modeling/noscrub/emc.glopara/git' +dmpdir: '/gpfs/dell3/emc/global/dump' +nwprod: '${NWROOT:-"/gpfs/dell1/nco/ops/nwprod"}' +comroot: '${COMROOT:-"/gpfs/dell1/nco/ops/com"}' +homedir: '/gpfs/dell2/emc/modeling/noscrub/$USER' +stmp: '/gpfs/dell3/stmp/$USER' +ptmp: '/gpfs/dell3/ptmp/$USER' +noscrub: $HOMEDIR +account: GFS-DEV +scheduler: lsf +queue: dev +queue_service: dev_transfer +partition_batch: None +chgrp_rstprod: 'YES' +chgrp_cmd: 'chgrp rstprod' +hpssarch: 'YES' +localarch: 'NO' +atardir: '/NCEPDEV/$HPSS_PROJECT/1year/$USER/$machine/scratch/$PSLOT' \ No newline at end of file diff --git a/ush/rocoto/rocoto.py b/ush/rocoto/rocoto.py old mode 100755 new mode 100644 index c4a336e9bd6..f17caccae2c --- a/ush/rocoto/rocoto.py +++ b/ush/rocoto/rocoto.py @@ -1,15 +1,22 @@ #!/usr/bin/env python3 +from typing import Union, List, Dict, Any + ''' MODULE: rocoto.py ABOUT: - Helper module to create tasks, metatasks, and dependencies + Helper module to create tasks, metatasks, and dependencies for Rocoto ''' -def create_metatask(task_dict, metatask_dict): - ''' +__all__ = ['create_task', 'create_metatask', + 'add_dependency', 'create_dependency', + 'create_envar', 'create_entity', 'create_cycledef'] + + +def create_metatask(task_dict: Dict[str, Any], metatask_dict: Dict[str, Any]) -> List[str]: + """ create a Rocoto metatask given a dictionary containing task and metatask information :param metatask_dict: metatask key-value parameters :type metatask_dict: dict @@ -17,7 +24,7 @@ def create_metatask(task_dict, metatask_dict): :type task_dict: dict :return: Rocoto metatask :rtype: list - ''' + """ # Grab metatask info from the metatask_dict metataskname = metatask_dict.get('metataskname', 'demometatask') @@ -25,11 +32,10 @@ def create_metatask(task_dict, metatask_dict): varval = metatask_dict.get('varval', 1) vardict = metatask_dict.get('vardict', None) - strings = [] + strings = [f'\n', + '\n', + f'\t{str(varval)}\n'] - strings.append(f'\n') - strings.append('\n') - strings.append(f'\t{str(varval)}\n') if vardict is not None: for key in vardict.keys(): value = str(vardict[key]) @@ -44,51 +50,52 @@ def create_metatask(task_dict, metatask_dict): return strings -def create_task(task_dict): - ''' +def create_task(task_dict: Dict[str, Any]) -> List[str]: + """ create a Rocoto task given a dictionary containing task information :param task_dict: task key-value parameters :type task_dict: dict :return: Rocoto task :rtype: list - ''' + """ - # Grab task info from the task_dict + # Grab task info from the task_names taskname = task_dict.get('taskname', 'demotask') cycledef = task_dict.get('cycledef', 'democycle') maxtries = task_dict.get('maxtries', 3) final = task_dict.get('final', False) command = task_dict.get('command', 'sleep 10') jobname = task_dict.get('jobname', 'demojob') - account = task_dict.get('account', 'batch') - queue = task_dict.get('queue', 'debug') - partition = task_dict.get('partition', None) - walltime = task_dict.get('walltime', '00:01:00') + resources_dict = task_dict['resources'] + account = resources_dict.get('account', 'batch') + queue = resources_dict.get('queue', 'debug') + partition = resources_dict.get('partition', None) + walltime = resources_dict.get('walltime', '00:01:00') + native = resources_dict.get('native', None) + memory = resources_dict.get('memory', None) + nodes = resources_dict.get('nodes', 1) + ppn = resources_dict.get('ppn', 1) + threads = resources_dict.get('threads', 1) log = task_dict.get('log', 'demo.log') - native = task_dict.get('native', None) - memory = task_dict.get('memory', None) - resources = task_dict.get('resources', None) - envar = task_dict.get('envar', None) + envar = task_dict.get('envars', None) dependency = task_dict.get('dependency', None) str_maxtries = str(maxtries) str_final = ' final="true"' if final else '' envar = envar if isinstance(envar, list) else [envar] - strings = [] + strings = [f'\n', + '\n', + f'\t{command}\n', + '\n', + f'\t{jobname}\n', + f'\t{account}\n', + f'\t{queue}\n'] - strings.append(f'\n') - strings.append('\n') - strings.append(f'\t{command}\n') - strings.append('\n') - strings.append(f'\t{jobname}\n') - strings.append(f'\t{account}\n') - strings.append(f'\t{queue}\n') if partition is not None: strings.append(f'\t{partition}\n') - if resources is not None: - strings.append(f'\t{resources}\n') strings.append(f'\t{walltime}\n') + strings.append(f'\t{nodes}:ppn={ppn}:tpp={threads}\n') if memory is not None: strings.append(f'\t{memory}\n') if native is not None: @@ -114,38 +121,31 @@ def create_task(task_dict): return strings -def add_dependency(dep_dict): - ''' +def add_dependency(dep_dict: Dict[str, Any]) -> str: + """ create a simple Rocoto dependency given a dictionary with dependency information :param dep_dict: dependency key-value parameters :type dep_dict: dict :return: Rocoto simple dependency :rtype: str - ''' + """ + + tag_map = {'task': _add_task_tag, + 'metatask': _add_task_tag, + 'data': _add_data_tag, + 'cycleexist': _add_cycle_tag, + 'streq': _add_streq_tag, + 'strneq': _add_streq_tag} dep_condition = dep_dict.get('condition', None) dep_type = dep_dict.get('type', None) - if dep_type in ['task', 'metatask']: - - string = add_task_tag(dep_dict) - - elif dep_type in ['data']: - - string = add_data_tag(dep_dict) - - elif dep_type in ['cycleexist']: - - string = add_cycle_tag(dep_dict) - - elif dep_type in ['streq', 'strneq']: - - string = add_streq_tag(dep_dict) - - else: - - msg = f'Unknown dependency type {dep_dict["type"]}' - raise KeyError(msg) + try: + string = tag_map[dep_type](dep_dict) + except KeyError: + raise KeyError(f'{dep_type} is an unknown dependency type.\n' + + 'Currently supported dependency types are:\n' + + f'{" | ".join(tag_map.keys())}') if dep_condition is not None: string = f'<{dep_condition}>{string}' @@ -153,14 +153,14 @@ def add_dependency(dep_dict): return string -def add_task_tag(dep_dict): - ''' +def _add_task_tag(dep_dict: Dict[str, Any]) -> str: + """ create a simple task or metatask tag :param dep_dict: dependency key-value parameters :type dep_dict: dict :return: Rocoto simple task or metatask dependency :rtype: str - ''' + """ dep_type = dep_dict.get('type', None) dep_name = dep_dict.get('name', None) @@ -178,14 +178,15 @@ def add_task_tag(dep_dict): return string -def add_data_tag(dep_dict): - ''' + +def _add_data_tag(dep_dict: Dict[str, Any]) -> str: + """ create a simple data tag :param dep_dict: dependency key-value parameters :type dep_dict: dict :return: Rocoto simple task or metatask dependency :rtype: str - ''' + """ dep_type = dep_dict.get('type', None) dep_data = dep_dict.get('data', None) @@ -206,7 +207,7 @@ def add_data_tag(dep_dict): strings = [''] for data, offset in zip(dep_data, dep_offset): if '@' in data: - offset_str = '' if offset in [None, ''] else f' offset="{offset}"' + offset_str = '' if offset in [None, ''] else f' offset="{offset}"' offset_string_b = f'' offset_string_e = '' else: @@ -219,14 +220,15 @@ def add_data_tag(dep_dict): return ''.join(strings) -def add_cycle_tag(dep_dict): - ''' + +def _add_cycle_tag(dep_dict: Dict[str, Any]) -> str: + """ create a simple cycle exist tag :param dep_dict: dependency key-value parameters :type dep_dict: dict :return: Rocoto simple task or metatask dependency :rtype: str - ''' + """ dep_type = dep_dict.get('type', None) dep_offset = dep_dict.get('offset', None) @@ -239,14 +241,15 @@ def add_cycle_tag(dep_dict): return string -def add_streq_tag(dep_dict): - ''' + +def _add_streq_tag(dep_dict: Dict[str, Any]) -> str: + """ create a simple string comparison tag :param dep_dict: dependency key-value parameters :type dep_dict: dict :return: Rocoto simple task or metatask dependency :rtype: str - ''' + """ dep_type = dep_dict.get('type', None) dep_left = dep_dict.get('left', None) @@ -271,8 +274,8 @@ def add_streq_tag(dep_dict): def _traverse(o, tree_types=(list, tuple)): - ''' - Traverse through a list of lists or tuples and yeild the value + """ + Traverse through a list of lists or tuples and yield the value Objective is to flatten a list of lists or tuples :param o: list of lists or not :type o: list, tuple, scalar @@ -280,7 +283,7 @@ def _traverse(o, tree_types=(list, tuple)): :type tree_types: tuple :return: value in the list or tuple :rtype: scalar - ''' + """ if isinstance(o, tree_types): for value in o: @@ -290,9 +293,9 @@ def _traverse(o, tree_types=(list, tuple)): yield o -def create_dependency(dep_condition=None, dep=None): - ''' - create a compound dependency given a list of dependendies, and compounding condition +def create_dependency(dep_condition=None, dep=None) -> List[str]: + """ + create a compound dependency given a list of dependencies, and compounding condition the list of dependencies are created using add_dependency :param dep_condition: dependency condition :type dep_condition: boolean e.g. and, or, true, false @@ -300,7 +303,7 @@ def create_dependency(dep_condition=None, dep=None): :type dep: str or list :return: Rocoto compound dependency :rtype: list - ''' + """ dep = dep if isinstance(dep, list) else [dep] @@ -323,9 +326,9 @@ def create_dependency(dep_condition=None, dep=None): return strings -def create_envar(name=None,value=None): - ''' - create an Rocoto environment variable given name and value +def create_envar(name: str, value: Union[str, float, int]) -> str: + """ + create a Rocoto environment variable given name and value returns the environment variable as a string :param name: name of the environment variable :type name: str @@ -333,45 +336,31 @@ def create_envar(name=None,value=None): :type value: str or float or int or unicode :return: Rocoto environment variable key-value pair :rtype: str - ''' + """ - string = '' - string += '' - string += f'{name}' - string += f'{str(value)}' - string += '' - - return string + return f'{name}{str(value)}' def create_cycledef(group=None, start=None, stop=None, step=None): - ''' - create an Rocoto cycle definition + """ + create a Rocoto cycle definition returns the environment variable as a string :param group: cycle definition group name :type group: str :param start: cycle start datetime :type start: str - :param end: cycle end datetime + :param step: cycle interval (timedelta) :type stop: str :param step: cycle interval (timedelta) - :type interval: str - :param value: value of the environment variable - :type value: str or float or int or unicode :return: Rocoto cycledef variable string :rtype: str - ''' + """ - string = '' - string += f'' - string += f'{start} {stop} {step}' - string += '' - - return string + return f'{start} {stop} {step}' -def create_entity(name=None, value=None): - ''' +def create_entity(name: str, value: Union[str, float, int]) -> str: + """ create an XML ENTITY variable given name and value returns the variable as a string :param name: name of the variable @@ -380,7 +369,6 @@ def create_entity(name=None, value=None): :type value: str or float or int or unicode :return: XML entity variable key-value pair :rtype: str - ''' - - return f'' + """ + return f'' diff --git a/ush/rocoto/setup_expt.py b/ush/rocoto/setup_expt.py index 4f0749df9f4..0d18ba7a569 100755 --- a/ush/rocoto/setup_expt.py +++ b/ush/rocoto/setup_expt.py @@ -1,32 +1,32 @@ #!/usr/bin/env python3 -''' +""" Entry point for setting up an experiment in the global-workflow -''' +""" import os import glob import shutil from datetime import datetime from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -import workflow_utils as wfu +from hosts import Host def makedirs_if_missing(dirname): - ''' + """ Creates a directory if not already present - ''' + """ if not os.path.exists(dirname): os.makedirs(dirname) def fill_COMROT(host, inputs): - ''' + """ Method to populate the COMROT for supported modes. INPUTS: - host: host specific object from class HostInfo in workflow_utils.py + host: host object from class Host inputs: user inputs to setup_expt.py - ''' + """ fill_modes = { 'cycled': fill_COMROT_cycled, @@ -36,7 +36,7 @@ def fill_COMROT(host, inputs): try: fill_modes[inputs.mode](host, inputs) except KeyError: - raise NotImplementedError(f'{mode} is not a supported mode.\n' + + raise NotImplementedError(f'{inputs.mode} is not a supported mode.\n' + 'Currently supported modes are:\n' + f'{" | ".join(fill_modes.keys())}') @@ -44,9 +44,9 @@ def fill_COMROT(host, inputs): def fill_COMROT_cycled(host, inputs): - ''' + """ Implementation of 'fill_COMROT' for cycled mode - ''' + """ idatestr = inputs.idate.strftime('%Y%m%d%H') comrot = os.path.join(inputs.comrot, inputs.pslot) @@ -75,18 +75,18 @@ def fill_COMROT_cycled(host, inputs): def fill_COMROT_forecasts(host, inputs): - ''' + """ Implementation of 'fill_COMROT' for forecast-only mode - ''' + """ return def fill_EXPDIR(inputs): - ''' + """ Method to copy config files from workflow to experiment directory INPUTS: inputs: user inputs to `setup_expt.py` - ''' + """ configdir = inputs.configdir expdir = os.path.join(inputs.expdir, inputs.pslot) @@ -106,9 +106,9 @@ def fill_EXPDIR(inputs): def edit_baseconfig(host, inputs): - ''' + """ Parses and populates the templated `config.base.emc.dyn` to `config.base` - ''' + """ here = os.path.dirname(__file__) top = os.path.abspath(os.path.join( @@ -147,6 +147,7 @@ def edit_baseconfig(host, inputs): "@APP@": inputs.app, } + extend_dict = dict() if inputs.mode in ['cycled']: extend_dict = { "@CASEENS@": f'C{inputs.resens}', @@ -179,9 +180,9 @@ def edit_baseconfig(host, inputs): def input_args(): - ''' + """ Method to collect user arguments for `setup_expt.py` - ''' + """ here = os.path.dirname(__file__) top = os.path.abspath(os.path.join( @@ -252,9 +253,9 @@ def input_args(): def query_and_clean(dirname): - ''' + """ Method to query if a directory exists and gather user input for further action - ''' + """ create_dir = True if os.path.exists(dirname): @@ -273,7 +274,7 @@ def query_and_clean(dirname): if __name__ == '__main__': user_inputs = input_args() - host=wfu.HostInfo(wfu.detectMachine()) + host = Host() comrot = os.path.join(user_inputs.comrot, user_inputs.pslot) expdir = os.path.join(user_inputs.expdir, user_inputs.pslot) diff --git a/ush/rocoto/setup_workflow.py b/ush/rocoto/setup_workflow.py deleted file mode 100755 index 1e5c102531b..00000000000 --- a/ush/rocoto/setup_workflow.py +++ /dev/null @@ -1,1434 +0,0 @@ -#!/usr/bin/env python3 - -''' - PROGRAM: - Create the ROCOTO workflow given the configuration of the GFS parallel - - AUTHOR: - Rahul.Mahajan - rahul.mahajan@noaa.gov - - FILE DEPENDENCIES: - 1. config files for the parallel; e.g. config.base, config.fcst[.gfs], etc. - Without these dependencies, the script will fail - - OUTPUT: - 1. PSLOT.xml: XML workflow - 2. PSLOT.crontab: crontab for ROCOTO run command -''' - -import os -import sys -import re -import numpy as np -from datetime import datetime, timedelta -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -from collections import OrderedDict -import rocoto -import workflow_utils as wfu - -def main(): - parser = ArgumentParser(description='Setup XML workflow and CRONTAB for a GFS parallel.', formatter_class=ArgumentDefaultsHelpFormatter) - parser.add_argument('--expdir', help='full path to experiment directory containing config files', type=str, required=False, default=os.environ['PWD']) - args = parser.parse_args() - - configs = wfu.get_configs(args.expdir) - - _base = wfu.config_parser([wfu.find_config('config.base', configs)]) - - if not os.path.samefile(args.expdir, _base['EXPDIR']): - print('MISMATCH in experiment directories!') - print(f'config.base: EXPDIR = {repr(_base["EXPDIR"])}') - print(f'input arg: --expdir = {repr(args.expdir)}') - sys.exit(1) - - gfs_steps = ['prep', 'anal', 'sfcanl', 'analdiag', 'analcalc', 'gldas', 'fcst', 'postsnd', 'post', 'vrfy', 'arch'] - gfs_steps_gempak = ['gempak'] - gfs_steps_awips = ['awips'] - gfs_steps_wafs = ['wafs', 'wafsgrib2', 'wafsblending', 'wafsgcip', 'wafsgrib20p25', 'wafsblending0p25'] - #hyb_steps = ['eobs', 'eomg', 'eupd', 'ecen', 'efcs', 'epos', 'earc'] - metp_steps = ['metp'] - wav_steps = ['waveinit', 'waveprep', 'wavepostsbs', 'wavepostbndpnt', 'wavepostbndpntbll', 'wavepostpnt'] - #Implement additional wave jobs at later date - wav_steps_gempak = ['wavegempak'] - wav_steps_awips = ['waveawipsbulls', 'waveawipsgridded'] -# From gfsv16b latest -# gfs_steps = ['prep', 'anal', 'gldas', 'fcst', 'postsnd', 'post', 'awips', 'gempak', 'vrfy', 'metp', 'arch'] - hyb_steps = ['eobs', 'ediag', 'eomg', 'eupd', 'ecen', 'esfc', 'efcs', 'echgres', 'epos', 'earc'] - - steps = gfs_steps + hyb_steps if _base.get('DOHYBVAR', 'NO') == 'YES' else gfs_steps - steps = steps + metp_steps if _base.get('DO_METP', 'NO') == 'YES' else steps - steps = steps + gfs_steps_gempak if _base.get('DO_GEMPAK', 'NO') == 'YES' else steps - steps = steps + gfs_steps_awips if _base.get('DO_AWIPS', 'NO') == 'YES' else steps - steps = steps + gfs_steps_wafs if _base.get('WAFSF', 'NO') == 'YES' else steps - steps = steps + wav_steps if _base.get('DO_WAVE', 'NO') == 'YES' else steps - steps = steps + wav_steps_gempak if _base.get('DO_GEMPAK', 'NO') == 'YES' else steps - steps = steps + wav_steps_awips if _base.get('DO_AWIPS', 'NO') == 'YES' else steps - - dict_configs = wfu.source_configs(configs, steps) - - # Check and set gfs_cyc specific variables - if dict_configs['base']['gfs_cyc'] != 0: - dict_configs['base'] = get_gfs_cyc_dates(dict_configs['base']) - - # First create workflow XML - create_xml(dict_configs) - - # Next create the crontab - wfu.create_crontab(dict_configs['base']) - - return - - -def get_gfs_cyc_dates(base): - ''' - Generate GFS dates from experiment dates and gfs_cyc choice - ''' - - base_out = base.copy() - - gfs_cyc = base['gfs_cyc'] - sdate = base['SDATE'] - edate = base['EDATE'] - - interval_gfs = wfu.get_gfs_interval(gfs_cyc) - - # Set GFS cycling dates - hrdet = 0 - if gfs_cyc == 1: - hrinc = 24 - sdate.hour - hrdet = edate.hour - elif gfs_cyc == 2: - if sdate.hour in [0, 12]: - hrinc = 12 - elif sdate.hour in [6, 18]: - hrinc = 6 - if edate.hour in [6, 18]: - hrdet = 6 - elif gfs_cyc == 4: - hrinc = 6 - sdate_gfs = sdate + timedelta(hours=hrinc) - edate_gfs = edate - timedelta(hours=hrdet) - if sdate_gfs > edate: - print('W A R N I N G!') - print('Starting date for GFS cycles is after Ending date of experiment') - print(f'SDATE = {sdate.strftime("%Y%m%d%H")}, EDATE = {edate.strftime("%Y%m%d%H")}') - print(f'SDATE_GFS = {sdate_gfs.strftime("%Y%m%d%H")}, EDATE_GFS = {edate_gfs.strftime("%Y%m%d%H")}') - gfs_cyc = 0 - - base_out['gfs_cyc'] = gfs_cyc - base_out['SDATE_GFS'] = sdate_gfs - base_out['EDATE_GFS'] = edate_gfs - base_out['INTERVAL_GFS'] = interval_gfs - - fhmax_gfs = {} - for hh in ['00', '06', '12', '18']: - fhmax_gfs[hh] = base.get(f'FHMAX_GFS_{hh}', 'FHMAX_GFS_00') - base_out['FHMAX_GFS'] = fhmax_gfs - - return base_out - - -def get_preamble(): - ''' - Generate preamble for XML - ''' - - strings = [] - - strings.append('\n') - strings.append('\n') - - return ''.join(strings) - - -def get_definitions(base): - ''' - Create entities related to the experiment - ''' - - machine = base.get('machine', wfu.detectMachine()) - scheduler = wfu.get_scheduler(machine) - hpssarch = base.get('HPSSARCH', 'NO').upper() - - strings = [] - - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - - if base['gfs_cyc'] != 0: - strings.append(get_gfs_dates(base)) - strings.append('\n') - - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - if scheduler in ['slurm']: - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append(f'\t\n') - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append('\n') - strings.append('\t\n') - strings.append('\t\n') - strings.append('\t\n') - strings.append('\t\n') - strings.append('\n') - - return ''.join(strings) - - -def get_gfs_dates(base): - ''' - Generate GFS dates entities - ''' - - strings = [] - - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - - return ''.join(strings) - - -def get_gdasgfs_resources(dict_configs, cdump='gdas'): - ''' - Create GDAS or GFS resource entities - ''' - - base = dict_configs['base'] - machine = base.get('machine', wfu.detectMachine()) - scheduler = wfu.get_scheduler(machine) - do_bufrsnd = base.get('DO_BUFRSND', 'NO').upper() - do_gempak = base.get('DO_GEMPAK', 'NO').upper() - do_awips = base.get('DO_AWIPS', 'NO').upper() - do_wafs = base.get('WAFSF', 'NO').upper() - do_metp = base.get('DO_METP', 'NO').upper() - do_gldas = base.get('DO_GLDAS', 'NO').upper() - do_wave = base.get('DO_WAVE', 'NO').upper() - do_wave_cdump = base.get('WAVE_CDUMP', 'BOTH').upper() - reservation = base.get('RESERVATION', 'NONE').upper() - - #tasks = ['prep', 'anal', 'fcst', 'post', 'vrfy', 'arch'] - tasks = ['prep', 'anal', 'sfcanl', 'analcalc'] - - if cdump in ['gdas']: - tasks += ['analdiag'] - if cdump in ['gdas'] and do_gldas in ['Y', 'YES']: - tasks += ['gldas'] - if cdump in ['gdas'] and do_wave in ['Y', 'YES'] and do_wave_cdump in ['GDAS', 'BOTH']: - #tasks += ['waveinit', 'waveprep', 'wavepostsbs', 'wavepostbndpnt', 'wavepostpnt', 'wavestat'] - tasks += ['waveinit', 'waveprep', 'wavepostsbs', 'wavepostbndpnt', 'wavepostbndpntbll', 'wavepostpnt'] - - tasks += ['fcst', 'post', 'vrfy', 'arch'] - - if cdump in ['gfs'] and do_wave in ['Y', 'YES'] and do_wave_cdump in ['GFS', 'BOTH']: - #tasks += ['waveinit', 'waveprep', 'wavepostsbs', 'wavepostbndpnt', 'wavepostpnt', 'wavestat'] - tasks += ['waveinit', 'waveprep', 'wavepostsbs', 'wavepostbndpnt', 'wavepostbndpntbll', 'wavepostpnt'] - if cdump in ['gfs'] and do_bufrsnd in ['Y', 'YES']: - tasks += ['postsnd'] - if cdump in ['gfs'] and do_gempak in ['Y', 'YES']: - tasks += ['gempak'] - if cdump in ['gfs'] and do_wave in ['Y', 'YES'] and do_gempak in ['Y', 'YES']: - tasks += ['wavegempak'] - if cdump in ['gfs'] and do_awips in ['Y', 'YES']: - tasks += ['awips'] - if cdump in ['gfs'] and do_wafs in ['Y', 'YES']: - tasks += ['wafs', 'wafsgrib2', 'wafsblending', 'wafsgcip', 'wafsgrib20p25', 'wafsblending0p25'] - if cdump in ['gfs'] and do_metp in ['Y', 'YES']: - tasks += ['metp'] - if cdump in ['gfs'] and do_wave in ['Y', 'YES'] and do_awips in ['Y', 'YES']: - tasks += ['waveawipsbulls', 'waveawipsgridded'] - - dict_resources = OrderedDict() - - for task in tasks: - - cfg = dict_configs[task] - - wtimestr, resstr, queuestr, memstr, natstr = wfu.get_resources(machine, cfg, task, reservation, cdump=cdump) - taskstr = f'{task.upper()}_{cdump.upper()}' - - strings = [] - strings.append(f'\t\n') - if scheduler in ['slurm']: - if task in ['arch']: - strings.append(f'\t\n') - else: - strings.append(f'\t\n') - - strings.append(f'\t\n') - strings.append(f'\t\n') - if len(memstr) != 0: - strings.append(f'\t\n') - strings.append(f'\t\n') - - dict_resources[f'{cdump}{task}'] = ''.join(strings) - - return dict_resources - - -def get_hyb_resources(dict_configs): - ''' - Create hybrid resource entities - ''' - - base = dict_configs['base'] - machine = base.get('machine', wfu.detectMachine()) - scheduler = wfu.get_scheduler(machine) - lobsdiag_forenkf = base.get('lobsdiag_forenkf', '.false.').upper() - eupd_cyc= base.get('EUPD_CYC', 'gdas').upper() - reservation = base.get('RESERVATION', 'NONE').upper() - - dict_resources = OrderedDict() - - # These tasks can be run in either or both cycles - if lobsdiag_forenkf in ['.T.', '.TRUE.']: - tasks1 = ['eobs', 'ediag', 'eupd', 'echgres'] - else: - tasks1 = ['eobs', 'eomg', 'eupd', 'echgres'] - - if eupd_cyc in ['BOTH']: - cdumps = ['gfs', 'gdas'] - elif eupd_cyc in ['GFS']: - cdumps = ['gfs'] - elif eupd_cyc in ['GDAS']: - cdumps = ['gdas'] - - for cdump in cdumps: - for task in tasks1: - - cfg = dict_configs['eobs'] if task in ['eomg'] else dict_configs[task] - - wtimestr, resstr, queuestr, memstr, natstr = wfu.get_resources(machine, cfg, task, reservation, cdump=cdump) - - taskstr = f'{task.upper()}_{cdump.upper()}' - - strings = [] - - strings.append(f'\t\n') - if scheduler in ['slurm']: - strings.append(f'\t\n') - strings.append(f'\t\n') - strings.append(f'\t\n') - if len(memstr) != 0: - strings.appendf(f'\t\n') - strings.append(f'\t\n') - - dict_resources[f'{cdump}{task}'] = ''.join(strings) - - - # These tasks are always run as part of the GDAS cycle - cdump = 'gdas' - tasks2 = ['ecen', 'esfc', 'efcs', 'epos', 'earc'] - for task in tasks2: - - cfg = dict_configs[task] - - wtimestr, resstr, queuestr, memstr, natstr = wfu.get_resources(machine, cfg, task, reservation, cdump=cdump) - - taskstr = f'{task.upper()}_{cdump.upper()}' - - strings = [] - strings.append(f'\t\n') - if scheduler in ['slurm']: - if task in ['earc']: - strings.append(f'\t\n') - else: - strings.append(f'\t\n') - - strings.append(f'\t\n') - strings.append(f'\t\n') - if len(memstr) != 0: - strings.append(f'\t\n') - strings.append(f'\t\n') - - dict_resources[f'{cdump}{task}'] = ''.join(strings) - - return dict_resources - - -def get_gdasgfs_tasks(dict_configs, cdump='gdas'): - ''' - Create GDAS or GFS tasks - ''' - - envars = [] - if wfu.get_scheduler(wfu.detectMachine()) in ['slurm']: - envars.append(rocoto.create_envar(name='SLURM_SET', value='YES')) - envars.append(rocoto.create_envar(name='RUN_ENVIR', value='&RUN_ENVIR;')) - envars.append(rocoto.create_envar(name='HOMEgfs', value='&HOMEgfs;')) - envars.append(rocoto.create_envar(name='EXPDIR', value='&EXPDIR;')) - envars.append(rocoto.create_envar(name='CDATE', value='@Y@m@d@H')) - envars.append(rocoto.create_envar(name='CDUMP', value=f'{cdump}')) - envars.append(rocoto.create_envar(name='PDY', value='@Y@m@d')) - envars.append(rocoto.create_envar(name='cyc', value='@H')) - - base = dict_configs['base'] - gfs_cyc = base.get('gfs_cyc', 0) - gldas_cyc = base.get('gldas_cyc', 0) - dohybvar = base.get('DOHYBVAR', 'NO').upper() - eupd_cyc = base.get('EUPD_CYC', 'gdas').upper() - do_bufrsnd = base.get('DO_BUFRSND', 'NO').upper() - do_gempak = base.get('DO_GEMPAK', 'NO').upper() - do_awips = base.get('DO_AWIPS', 'NO').upper() - do_wafs = base.get('WAFSF', 'NO').upper() - do_metp = base.get('DO_METP', 'NO').upper() - do_gldas = base.get('DO_GLDAS', 'NO').upper() - do_wave = base.get('DO_WAVE', 'NO').upper() - if do_wave in ['YES']: - do_wave_bnd = dict_configs['wavepostsbs'].get('DOBNDPNT_WAVE', "YES").upper() - do_wave_cdump = base.get('WAVE_CDUMP', 'BOTH').upper() - dumpsuffix = base.get('DUMP_SUFFIX', '') - gridsuffix = base.get('SUFFIX', '') - - dict_tasks = OrderedDict() - - # prep - deps = [] - dep_dict = {'type': 'metatask', 'name': f'{"gdas"}post', 'offset': '-06:00:00'} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/gdas.@Y@m@d/@H/atmos/gdas.t@Hz.atmf009{gridsuffix}' - dep_dict = {'type': 'data', 'data': data, 'offset': '-06:00:00'} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&DMPDIR;/{cdump}{dumpsuffix}.@Y@m@d/@H/atmos/{cdump}.t@Hz.updated.status.tm00.bufr_d' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - - gfs_enkf = True if eupd_cyc in ['BOTH', 'GFS'] and dohybvar in ['Y', 'YES'] else False - - if gfs_enkf and cdump in ['gfs']: - if gfs_cyc == 4: - task = wfu.create_wf_task('prep', cdump=cdump, envar=envars, dependency=dependencies) - else: - task = wfu.create_wf_task('prep', cdump=cdump, envar=envars, dependency=dependencies, cycledef='gdas') - - else: - task = wfu.create_wf_task('prep', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}prep'] = task - - # wave tasks in gdas or gfs or both - if do_wave_cdump in ['BOTH']: - cdumps = ['gfs', 'gdas'] - elif do_wave_cdump in ['GFS']: - cdumps = ['gfs'] - elif do_wave_cdump in ['GDAS']: - cdumps = ['gdas'] - - # waveinit - if do_wave in ['Y', 'YES'] and cdump in cdumps: - deps = [] - dep_dict = {'type': 'task', 'name': '{cdump}prep'} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'cycleexist', 'condition': 'not', 'offset': '-06:00:00'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='or', dep=deps) - task = wfu.create_wf_task('waveinit', cdump=cdump, envar=envars, dependency=dependencies) - dict_tasks['{cdump}waveinit'] = task - - # waveprep - if do_wave in ['Y', 'YES'] and cdump in cdumps: - deps = [] - dep_dict = {'type': 'task', 'name': '{cdump}waveinit'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('waveprep', cdump=cdump, envar=envars, dependency=dependencies) - dict_tasks['{cdump}waveprep'] = task - - # anal - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}prep'} - deps.append(rocoto.add_dependency(dep_dict)) - if dohybvar in ['y', 'Y', 'yes', 'YES']: - dep_dict = {'type': 'metatask', 'name': f'{"gdas"}epmn', 'offset': '-06:00:00'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - else: - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('anal', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}anal'] = task - - # sfcanl - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.loginc.txt' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}anal'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('sfcanl', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}sfcanl'] = task - - # analcalc - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.loginc.txt' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}anal'} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}sfcanl'} - deps.append(rocoto.add_dependency(dep_dict)) - if dohybvar in ['y', 'Y', 'yes', 'YES'] and cdump == 'gdas': - dep_dict = {'type': 'task', 'name': f'{"gdas"}echgres', 'offset': '-06:00:00'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('analcalc', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}analcalc'] = task - - # analdiag - if cdump in ['gdas']: - deps1 = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.loginc.txt' - dep_dict = {'type': 'data', 'data': data} - deps1.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}anal'} - deps1.append(rocoto.add_dependency(dep_dict)) - dependencies1 = rocoto.create_dependency(dep_condition='or', dep=deps1) - - deps2 = [] - deps2 = dependencies1 - dep_dict = {'type': 'cycleexist', 'offset': '-06:00:00'} - deps2.append(rocoto.add_dependency(dep_dict)) - dependencies2 = rocoto.create_dependency(dep_condition='and', dep=deps2) - - task = wfu.create_wf_task('analdiag', cdump=cdump, envar=envars, dependency=dependencies2) - - dict_tasks[f'{cdump}analdiag'] = task - - # gldas - if cdump in ['gdas'] and do_gldas in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}sfcanl'} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'cycleexist', 'offset': '-06:00:00'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - - task = wfu.create_wf_task('gldas', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}gldas'] = task - - # fcst - deps = [] - if cdump in ['gdas']: - dep_dict = {'type': 'cycleexist', 'condition': 'not', 'offset': '-06:00:00'} - deps.append(rocoto.add_dependency(dep_dict)) - if do_gldas in ['Y', 'YES']: - dep_dict = {'type': 'task', 'name': f'{cdump}gldas'} - deps.append(rocoto.add_dependency(dep_dict)) - else: - dep_dict = {'type': 'task', 'name': f'{cdump}sfcanl'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='or', dep=deps) - elif cdump in ['gfs']: - dep_dict = {'type': 'task', 'name': f'{cdump}sfcanl'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - - if do_wave in ['Y', 'YES'] and cdump in cdumps: - deps = dependencies - dep_dict = {'type': 'task', 'name': f'{cdump}waveprep'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - - task = wfu.create_wf_task('fcst', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}fcst'] = task - - # post - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.log#dep#.txt' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}fcst'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='or', dep=deps) - fhrgrp = rocoto.create_envar(name='FHRGRP', value='#grp#') - fhrlst = rocoto.create_envar(name='FHRLST', value='#lst#') - ROTDIR = rocoto.create_envar(name='ROTDIR', value='&ROTDIR;') - postenvars = envars + [fhrgrp] + [fhrlst] + [ROTDIR] - varname1, varname2, varname3 = 'grp', 'dep', 'lst' - varval1, varval2, varval3 = get_postgroups(dict_configs['post'], cdump=cdump) - vardict = {varname2: varval2, varname3: varval3} - task = wfu.create_wf_task('post', cdump=cdump, envar=postenvars, dependency=dependencies, - metatask='post', varname=varname1, varval=varval1, vardict=vardict) - - dict_tasks[f'{cdump}post'] = task - - # wavepostsbs - if do_wave in ['Y', 'YES'] and cdump in cdumps: - deps = [] - for wave_grid in dict_configs['wavepostsbs']['waveGRD'].split(): - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/wave/rundata/{cdump}wave.out_grd.{wave_grid}.@Y@m@d.@H0000' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wavepostsbs', cdump=cdump, envar=envars, dependency=dependencies) - dict_tasks[f'{cdump}wavepostsbs'] = task - - # wavepostbndpnt - if do_wave in ['Y', 'YES'] and do_wave_bnd in ['YES'] and cdump in ['gfs']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}fcst'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('wavepostbndpnt', cdump=cdump, envar=envars, dependency=dependencies) - dict_tasks[f'{cdump}wavepostbndpnt'] = task - - # wavepostbndpntbll - if do_wave in ['Y', 'YES'] and do_wave_bnd in ['YES'] and cdump in ['gfs']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.logf180.txt' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}wavepostbndpnt'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wavepostbndpntbll', cdump=cdump, envar=envars, dependency=dependencies) - dict_tasks[f'{cdump}wavepostbndpntbll'] = task - - # wavepostpnt - if do_wave in ['Y', 'YES'] and cdump in ['gdas', 'gfs']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}fcst'} - deps.append(rocoto.add_dependency(dep_dict)) - if do_wave_bnd in ['YES'] and cdump in ['gfs']: - dep_dict = {'type': 'task', 'name': f'{cdump}wavepostbndpntbll'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wavepostpnt', cdump=cdump, envar=envars, dependency=dependencies) - dict_tasks[f'{cdump}wavepostpnt'] = task - - # wavegempak - if do_wave in ['Y', 'YES'] and do_gempak in ['Y', 'YES'] and cdump in ['gfs']: - deps = [] - dep_dict = {'type':'task', 'name':f'{cdump}wavepostsbs'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('wavegempak', cdump=cdump, envar=envars, dependency=dependencies) - dict_tasks[f'{cdump}wavegempak'] = task - - # waveawipsgridded - if do_wave in ['Y', 'YES'] and do_awips in ['Y', 'YES'] and cdump in ['gfs']: - deps = [] - dep_dict = {'type':'task', 'name':f'{cdump}wavepostsbs'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('waveawipsgridded', cdump=cdump, envar=envars, dependency=dependencies) - dict_tasks[f'{cdump}waveawipsgridded'] = task - - # waveawipsbulls - if do_wave in ['Y', 'YES'] and do_awips in ['Y', 'YES'] and cdump in ['gfs']: - deps = [] - dep_dict = {'type':'task', 'name':f'{cdump}wavepostsbs'} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type':'task', 'name':f'{cdump}wavepostpnt'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('waveawipsbulls', cdump=cdump, envar=envars, dependency=dependencies) - dict_tasks[f'{cdump}waveawipsbulls'] = task - - # wavestat - #if do_wave in ['Y', 'YES'] and cdump in cdumps: - # deps = [] - # dep_dict = {'type':'task', 'name':'%swavepost' % cdump} - # deps.append(rocoto.add_dependency(dep_dict)) - # dependencies = rocoto.create_dependency(dep=deps) - # task = wfu.create_wf_task('wavestat', cdump=cdump, envar=envars, dependency=dependencies) - # dict_tasks['%swavestat' % cdump] = task - - # vrfy - deps = [] - dep_dict = {'type': 'metatask', 'name': f'{cdump}post'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('vrfy', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}vrfy'] = task - - # metp - if cdump in ['gfs'] and do_metp in ['Y', 'YES']: - deps = [] - dep_dict = {'type':'metatask', 'name':f'{cdump}post'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - sdate_gfs = rocoto.create_envar(name='SDATE_GFS', value='&SDATE_GFS;') - metpcase = rocoto.create_envar(name='METPCASE', value='#metpcase#') - metpenvars = envars + [sdate_gfs] + [metpcase] - varname1 = 'metpcase' - varval1 = 'g2g1 g2o1 pcp1' - task = wfu.create_wf_task('metp', cdump=cdump, envar=metpenvars, dependency=dependencies, - metatask='metp', varname=varname1, varval=varval1) - dict_tasks[f'{cdump}metp'] = task - - #postsnd - if cdump in ['gfs'] and do_bufrsnd in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}fcst'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('postsnd', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}postsnd'] = task - - # awips - if cdump in ['gfs'] and do_awips in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'metatask', 'name': f'{cdump}post'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - fhrgrp = rocoto.create_envar(name='FHRGRP', value='#grp#') - fhrlst = rocoto.create_envar(name='FHRLST', value='#lst#') - ROTDIR = rocoto.create_envar(name='ROTDIR', value='&ROTDIR;') - awipsenvars = envars + [fhrgrp] + [fhrlst] + [ROTDIR] - varname1, varname2, varname3 = 'grp', 'dep', 'lst' - varval1, varval2, varval3 = get_awipsgroups(dict_configs['awips'], cdump=cdump) - vardict = {varname2: varval2, varname3: varval3} - task = wfu.create_wf_task('awips', cdump=cdump, envar=awipsenvars, dependency=dependencies, - metatask='awips', varname=varname1, varval=varval1, vardict=vardict) - - dict_tasks[f'{cdump}awips'] = task - - # gempak - if cdump in ['gfs'] and do_gempak in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'metatask', 'name': f'{cdump}post'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('gempak', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}gempak'] = task - - # wafs - if cdump in ['gfs'] and do_wafs in ['Y', 'YES']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if006' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if012' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if015' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if018' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if021' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if024' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if027' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if030' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if033' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if036' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wafs', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}wafs'] = task - - # wafsgcip - if cdump in ['gfs'] and do_wafs in ['Y', 'YES']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if006' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if012' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if015' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if018' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if021' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if024' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if027' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if030' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if033' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if036' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wafsgcip', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}wafsgcip'] = task - - # wafsgrib2 - if cdump in ['gfs'] and do_wafs in ['Y', 'YES']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if006' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if012' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if015' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if018' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if021' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if024' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if027' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if030' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if033' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if036' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wafsgrib2', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}wafsgrib2'] = task - - # wafsgrib20p25 - if cdump in ['gfs'] and do_wafs in ['Y', 'YES']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if006' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if012' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if015' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if018' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if021' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if024' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if027' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if030' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if033' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if036' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wafsgrib20p25', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}wafsgrib20p25'] = task - - # wafsblending - if cdump in ['gfs'] and do_wafs in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}wafsgrib2'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('wafsblending', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}wafsblending'] = task - - # wafsblending0p25 - if cdump in ['gfs'] and do_wafs in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}wafsgrib20p25'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('wafsblending0p25', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}wafsblending0p25'] = task - - # arch - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}vrfy'} - deps.append(rocoto.add_dependency(dep_dict)) - if cdump in ['gfs'] and do_metp in ['Y', 'YES']: - dep_dict = {'type':'metatask', 'name':f'{cdump}metp'} - deps.append(rocoto.add_dependency(dep_dict)) - if do_wave in ['Y', 'YES']: - dep_dict = {'type': 'task', 'name': f'{cdump}wavepostsbs'} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}wavepostpnt'} - deps.append(rocoto.add_dependency(dep_dict)) - if do_wave_bnd in ['YES'] and cdump in ['gfs']: - dep_dict = {'type': 'task', 'name': f'{cdump}wavepostbndpnt'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('arch', cdump=cdump, envar=envars, dependency=dependencies) - - dict_tasks[f'{cdump}arch'] = task - - return dict_tasks - - -def get_hyb_tasks(dict_configs, cycledef='enkf'): - ''' - Create Hybrid tasks - ''' - - # Determine groups based on ensemble size and grouping - base = dict_configs['base'] - nens = base['NMEM_ENKF'] - lobsdiag_forenkf = base.get('lobsdiag_forenkf', '.false.').upper() - eupd_cyc = base.get('EUPD_CYC', 'gdas').upper() - - eobs = dict_configs['eobs'] - nens_eomg = eobs['NMEM_EOMGGRP'] - neomg_grps = nens / nens_eomg - EOMGGROUPS = ' '.join([f'{x:02d}' for x in range(1, int(neomg_grps) + 1)]) - - efcs = dict_configs['efcs'] - nens_efcs = efcs['NMEM_EFCSGRP'] - nefcs_grps = nens / nens_efcs - EFCSGROUPS = ' '.join([f'{x:02d}' for x in range(1, int(nefcs_grps) + 1)]) - - earc = dict_configs['earc'] - nens_earc = earc['NMEM_EARCGRP'] - nearc_grps = nens / nens_earc - EARCGROUPS = ' '.join([f'{x:02d}' for x in range(0, int(nearc_grps) + 1)]) - - envars = [] - if wfu.get_scheduler(wfu.detectMachine()) in ['slurm']: - envars.append(rocoto.create_envar(name='SLURM_SET', value='YES')) - envars.append(rocoto.create_envar(name='RUN_ENVIR', value='&RUN_ENVIR;')) - envars.append(rocoto.create_envar(name='HOMEgfs', value='&HOMEgfs;')) - envars.append(rocoto.create_envar(name='EXPDIR', value='&EXPDIR;')) - envars.append(rocoto.create_envar(name='CDATE', value='@Y@m@d@H')) - #envars.append(rocoto.create_envar(name='CDUMP', value=f'{cdump}')) - envars.append(rocoto.create_envar(name='PDY', value='@Y@m@d')) - envars.append(rocoto.create_envar(name='cyc', value='@H')) - - ensgrp = rocoto.create_envar(name='ENSGRP', value='#grp#') - - dict_tasks = OrderedDict() - - if eupd_cyc in ['BOTH']: - cdumps = ['gfs', 'gdas'] - elif eupd_cyc in ['GFS']: - cdumps = ['gfs'] - elif eupd_cyc in ['GDAS']: - cdumps = ['gdas'] - - for cdump in cdumps: - - envar_cdump = rocoto.create_envar(name='CDUMP', value=f'{cdump}') - envars1 = envars + [envar_cdump] - - # eobs - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}prep'} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'metatask', 'name': f'{"gdas"}epmn', 'offset': '-06:00:00'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('eobs', cdump=cdump, envar=envars1, dependency=dependencies, cycledef=cycledef) - - dict_tasks[f'{cdump}eobs'] = task - - # eomn, eomg - if lobsdiag_forenkf in ['.F.', '.FALSE.']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}eobs'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - eomgenvars= envars1 + [ensgrp] - task = wfu.create_wf_task('eomg', cdump=cdump, envar=eomgenvars, dependency=dependencies, - metatask='eomn', varname='grp', varval=EOMGGROUPS, cycledef=cycledef) - - dict_tasks[f'{cdump}eomn'] = task - - # ediag - else: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}eobs'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('ediag', cdump=cdump, envar=envars1, dependency=dependencies, cycledef=cycledef) - - dict_tasks[f'{cdump}ediag'] = task - - # eupd - deps = [] - if lobsdiag_forenkf in ['.F.', '.FALSE.']: - dep_dict = {'type': 'metatask', 'name': f'{cdump}eomn'} - else: - dep_dict = {'type': 'task', 'name': f'{cdump}ediag'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('eupd', cdump=cdump, envar=envars1, dependency=dependencies, cycledef=cycledef) - - dict_tasks[f'{cdump}eupd'] = task - - # All hybrid tasks beyond this point are always executed in the GDAS cycle - cdump = 'gdas' - envar_cdump = rocoto.create_envar(name='CDUMP', value=f'{cdump}') - envars1 = envars + [envar_cdump] - cdump_eupd = 'gfs' if eupd_cyc in ['GFS'] else 'gdas' - - # ecmn, ecen - deps1 = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.loganl.txt' - dep_dict = {'type': 'data', 'data': data} - deps1.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}analcalc'} - deps1.append(rocoto.add_dependency(dep_dict)) - dependencies1 = rocoto.create_dependency(dep_condition='or', dep=deps1) - - deps2 = [] - deps2 = dependencies1 - dep_dict = {'type': 'task', 'name': f'{cdump_eupd}eupd'} - deps2.append(rocoto.add_dependency(dep_dict)) - dependencies2 = rocoto.create_dependency(dep_condition='and', dep=deps2) - - fhrgrp = rocoto.create_envar(name='FHRGRP', value='#grp#') - fhrlst = rocoto.create_envar(name='FHRLST', value='#lst#') - ecenenvars = envars1 + [fhrgrp] + [fhrlst] - varname1, varname2, varname3 = 'grp', 'dep', 'lst' - varval1, varval2, varval3 = get_ecengroups(dict_configs, dict_configs['ecen'], cdump=cdump) - vardict = {varname2: varval2, varname3: varval3} - task = wfu.create_wf_task('ecen', cdump=cdump, envar=ecenenvars, dependency=dependencies2, - metatask='ecmn', varname=varname1, varval=varval1, vardict=vardict) - - dict_tasks[f'{cdump}ecmn'] = task - - # esfc - deps1 = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.loganl.txt' - dep_dict = {'type': 'data', 'data': data} - deps1.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}analcalc'} - deps1.append(rocoto.add_dependency(dep_dict)) - dependencies1 = rocoto.create_dependency(dep_condition='or', dep=deps1) - - deps2 = [] - deps2 = dependencies1 - dep_dict = {'type': 'task', 'name': f'{cdump_eupd}eupd'} - deps2.append(rocoto.add_dependency(dep_dict)) - dependencies2 = rocoto.create_dependency(dep_condition='and', dep=deps2) - task = wfu.create_wf_task('esfc', cdump=cdump, envar=envars1, dependency=dependencies2, cycledef=cycledef) - - dict_tasks[f'{cdump}esfc'] = task - - # efmn, efcs - deps1 = [] - dep_dict = {'type': 'metatask', 'name': f'{cdump}ecmn'} - deps1.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}esfc'} - deps1.append(rocoto.add_dependency(dep_dict)) - dependencies1 = rocoto.create_dependency(dep_condition='and', dep=deps1) - - deps2 = [] - deps2 = dependencies1 - dep_dict = {'type': 'cycleexist', 'condition': 'not', 'offset': '-06:00:00'} - deps2.append(rocoto.add_dependency(dep_dict)) - dependencies2 = rocoto.create_dependency(dep_condition='or', dep=deps2) - - efcsenvars = envars1 + [ensgrp] - task = wfu.create_wf_task('efcs', cdump=cdump, envar=efcsenvars, dependency=dependencies2, - metatask='efmn', varname='grp', varval=EFCSGROUPS, cycledef=cycledef) - - dict_tasks[f'{cdump}efmn'] = task - - # echgres - deps1 = [] - dep_dict = {'type': 'task', 'name': f'{cdump}fcst'} - deps1.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'metatask', 'name': f'{cdump}efmn'} - deps1.append(rocoto.add_dependency(dep_dict)) - dependencies1 = rocoto.create_dependency(dep_condition='and', dep=deps1) - task = wfu.create_wf_task('echgres', cdump=cdump, envar=envars1, dependency=dependencies1, cycledef=cycledef) - - dict_tasks[f'{cdump}echgres'] = task - - # epmn, epos - deps = [] - dep_dict = {'type': 'metatask', 'name': f'{cdump}efmn'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - fhrgrp = rocoto.create_envar(name='FHRGRP', value='#grp#') - fhrlst = rocoto.create_envar(name='FHRLST', value='#lst#') - eposenvars = envars1 + [fhrgrp] + [fhrlst] - varname1, varname2, varname3 = 'grp', 'dep', 'lst' - varval1, varval2, varval3 = get_eposgroups(dict_configs['epos'], cdump=cdump) - vardict = {varname2: varval2, varname3: varval3} - task = wfu.create_wf_task('epos', cdump=cdump, envar=eposenvars, dependency=dependencies, - metatask='epmn', varname=varname1, varval=varval1, vardict=vardict) - - dict_tasks[f'{cdump}epmn'] = task - - # eamn, earc - deps = [] - dep_dict = {'type': 'metatask', 'name': f'{cdump}epmn'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - earcenvars = envars1 + [ensgrp] - task = wfu.create_wf_task('earc', cdump=cdump, envar=earcenvars, dependency=dependencies, - metatask='eamn', varname='grp', varval=EARCGROUPS, cycledef=cycledef) - - dict_tasks[f'{cdump}eamn'] = task - - return dict_tasks - - -def get_workflow_header(base): - ''' - Create the workflow header block - ''' - - strings = [] - - strings.append('\n') - strings.append(']>\n') - strings.append('\n') - strings.append('\n') - strings.append('\n') - strings.append('\t&EXPDIR;/logs/@Y@m@d@H.log\n') - strings.append('\n') - strings.append('\t\n') - strings.append('\t&SDATE; &SDATE; 06:00:00\n') - strings.append('\t&SDATE; &EDATE; 06:00:00\n') - strings.append('\t&SDATE; &EDATE; 06:00:00\n') - if base['gfs_cyc'] != 0: - strings.append('\t&SDATE_GFS; &EDATE_GFS; &INTERVAL_GFS;\n') - - strings.append('\n') - - return ''.join(strings) - - -def get_workflow_footer(): - ''' - Generate workflow footer - ''' - - strings = [] - strings.append('\n\n') - - return ''.join(strings) - - -def get_postgroups(post, cdump='gdas'): - - fhmin = post['FHMIN'] - fhmax = post['FHMAX'] - fhout = post['FHOUT'] - - # Get a list of all forecast hours - if cdump in ['gdas']: - fhrs = range(fhmin, fhmax+fhout, fhout) - elif cdump in ['gfs']: - fhmax = np.max([post['FHMAX_GFS_00'],post['FHMAX_GFS_06'],post['FHMAX_GFS_12'],post['FHMAX_GFS_18']]) - fhout = post['FHOUT_GFS'] - fhmax_hf = post['FHMAX_HF_GFS'] - fhout_hf = post['FHOUT_HF_GFS'] - fhrs_hf = range(fhmin, fhmax_hf+fhout_hf, fhout_hf) - fhrs = list(fhrs_hf) + list(range(fhrs_hf[-1]+fhout, fhmax+fhout, fhout)) - - npostgrp = post['NPOSTGRP'] - ngrps = npostgrp if len(fhrs) > npostgrp else len(fhrs) - - fhrs = [f'f{f:03d}' for f in fhrs] - fhrs = np.array_split(fhrs, ngrps) - fhrs = [f.tolist() for f in fhrs] - - fhrgrp = ' '.join(['anl'] + [f'_{f[0]}-{f[-1]}' for f in fhrs]) - fhrdep = ' '.join(['anl'] + [f[-1] for f in fhrs]) - fhrlst = ' '.join(['anl'] + ['_'.join(f) for f in fhrs]) - - return fhrgrp, fhrdep, fhrlst - -def get_awipsgroups(awips, cdump='gdas'): - - fhmin = awips['FHMIN'] - fhmax = awips['FHMAX'] - fhout = awips['FHOUT'] - - # Get a list of all forecast hours - if cdump in ['gdas']: - fhrs = range(fhmin, fhmax+fhout, fhout) - elif cdump in ['gfs']: - fhmax = np.max([awips['FHMAX_GFS_00'],awips['FHMAX_GFS_06'],awips['FHMAX_GFS_12'],awips['FHMAX_GFS_18']]) - fhout = awips['FHOUT_GFS'] - fhmax_hf = awips['FHMAX_HF_GFS'] - fhout_hf = awips['FHOUT_HF_GFS'] - if fhmax > 240: - fhmax = 240 - if fhmax_hf > 240: - fhmax_hf = 240 - fhrs_hf = range(fhmin, fhmax_hf+fhout_hf, fhout_hf) - fhrs = fhrs_hf + range(fhrs_hf[-1]+fhout, fhmax+fhout, fhout) - - nawipsgrp = awips['NAWIPSGRP'] - ngrps = nawipsgrp if len(fhrs) > nawipsgrp else len(fhrs) - - fhrs = [f'f{f:03d}' for f in fhrs] - fhrs = np.array_split(fhrs, ngrps) - fhrs = [f.tolist() for f in fhrs] - - fhrgrp = ' '.join([f'{x:03d}' for x in range(0, ngrps)]) - fhrdep = ' '.join([f[-1] for f in fhrs]) - fhrlst = ' '.join(['_'.join(f) for f in fhrs]) - - return fhrgrp, fhrdep, fhrlst - -def get_ecengroups(dict_configs, ecen, cdump='gdas'): - - base = dict_configs['base'] - - if base.get('DOIAU_ENKF', 'NO') == 'YES' : - fhrs = list(base.get('IAUFHRS','6').split(',')) - ifhrs = [f'f00{ff}' for ff in fhrs] - ifhrs0 = ifhrs[0] - nfhrs = len(fhrs) - - ifhrs = [f'f00{ff}' for ff in fhrs] - ifhrs0 = ifhrs[0] - nfhrs = len(fhrs) - - necengrp = ecen['NECENGRP'] - ngrps = necengrp if len(fhrs) > necengrp else len(fhrs) - - ifhrs = np.array_split(ifhrs, ngrps) - - fhrgrp = ' '.join([f'{x:03d}' for x in range(0, ngrps)]) - fhrdep = ' '.join([f[-1] for f in ifhrs]) - fhrlst = ' '.join(['_'.join(f) for f in ifhrs]) - - else: - fhrgrp='000' - fhrdep='f006' - fhrlst='f006' - - return fhrgrp, fhrdep, fhrlst - -def get_eposgroups(epos, cdump='gdas'): - - fhmin = epos['FHMIN_ENKF'] - fhmax = epos['FHMAX_ENKF'] - fhout = epos['FHOUT_ENKF'] - fhrs = range(fhmin, fhmax+fhout, fhout) - - neposgrp = epos['NEPOSGRP'] - ngrps = neposgrp if len(fhrs) > neposgrp else len(fhrs) - - fhrs = [f'f{f:03d}' for f in fhrs] - fhrs = np.array_split(fhrs, ngrps) - fhrs = [f.tolist() for f in fhrs] - - fhrgrp = ' '.join([f'{x:03d}' for x in range(0, ngrps)]) - fhrdep = ' '.join([f[-1] for f in fhrs]) - fhrlst = ' '.join(['_'.join(f) for f in fhrs]) - - return fhrgrp, fhrdep, fhrlst - - -def dict_to_strings(dict_in): - - strings = [] - for key in dict_in.keys(): - strings.append(dict_in[key]) - strings.append('\n') - - return ''.join(strings) - - -def create_xml(dict_configs): - ''' - Given an dictionary of sourced config files, - create the workflow XML - ''' - - from builtins import any as b_any - #from __builtin__ import any as b_any - - base = dict_configs['base'] - dohybvar = base.get('DOHYBVAR', 'NO').upper() - gfs_cyc = base.get('gfs_cyc', 0) - eupd_cyc = base.get('EUPD_CYC', 'gdas').upper() - - # Start collecting workflow pieces - preamble = get_preamble() - definitions = get_definitions(base) - workflow_header = get_workflow_header(base) - workflow_footer = get_workflow_footer() - - # Get GDAS related entities, resources, workflow - dict_gdas_resources = get_gdasgfs_resources(dict_configs) - dict_gdas_tasks = get_gdasgfs_tasks(dict_configs) - - # Get hybrid related entities, resources, workflow - if dohybvar in ['Y', 'YES']: - - dict_hyb_resources = get_hyb_resources(dict_configs) - dict_hyb_tasks = get_hyb_tasks(dict_configs) - - # Removes &MEMORY_JOB_DUMP post mortem from hyb tasks - hyp_tasks = {'gdaseobs':'gdaseobs', - 'gdasediag':'gdasediag', - 'gdaseomg':'gdaseomn', - 'gdaseupd':'gdaseupd', - 'gdasecen':'gdasecmn', - 'gdasesfc':'gdasesfc', - 'gdasefcs':'gdasefmn', - 'gdasepos':'gdasepmn', - 'gdasearc':'gdaseamn', - 'gdasechgres':'gdasechgres'} - for each_task, each_resource_string in dict_hyb_resources.items(): - #print(each_task,hyp_tasks[each_task]) - #print(dict_hyb_tasks[hyp_tasks[each_task]]) - if 'MEMORY' not in each_resource_string: - if each_task in dict_hyb_tasks: - temp_task_string = [] - for each_line in re.split(r'(\s+)', dict_hyb_tasks[each_task]): - if 'memory' not in each_line: - temp_task_string.append(each_line) - dict_hyb_tasks[each_task] = ''.join(temp_task_string) - if hyp_tasks[each_task] in dict_hyb_tasks: - temp_task_string = [] - for each_line in re.split(r'(\s+)', dict_hyb_tasks[hyp_tasks[each_task]]): - if 'memory' not in each_line: - temp_task_string.append(each_line) - dict_hyb_tasks[hyp_tasks[each_task]] = ''.join(temp_task_string) - - # Get GFS cycle related entities, resources, workflow - dict_gfs_resources = get_gdasgfs_resources(dict_configs, cdump='gfs') - dict_gfs_tasks = get_gdasgfs_tasks(dict_configs, cdump='gfs') - - # Removes &MEMORY_JOB_DUMP post mortem from gdas tasks - for each_task, each_resource_string in dict_gdas_resources.items(): - if each_task not in dict_gdas_tasks: - continue - if 'MEMORY' not in each_resource_string: - temp_task_string = [] - for each_line in re.split(r'(\s+)', dict_gdas_tasks[each_task]): - if 'memory' not in each_line: - temp_task_string.append(each_line) - dict_gdas_tasks[each_task] = ''.join(temp_task_string) - - # Removes &MEMORY_JOB_DUMP post mortem from gfs tasks - for each_task, each_resource_string in dict_gfs_resources.items(): - if each_task not in dict_gfs_tasks: - continue - if 'MEMORY' not in each_resource_string: - temp_task_string = [] - for each_line in re.split(r'(\s+)', dict_gfs_tasks[each_task]): - if 'memory' not in each_line: - temp_task_string.append(each_line) - dict_gfs_tasks[each_task] = ''.join(temp_task_string) - - # Put together the XML file - xmlfile = [] - - xmlfile.append(preamble) - - xmlfile.append(definitions) - - xmlfile.append(dict_to_strings(dict_gdas_resources)) - - if dohybvar in ['Y', 'YES']: - xmlfile.append(dict_to_strings(dict_hyb_resources)) - - if gfs_cyc != 0: - xmlfile.append(dict_to_strings(dict_gfs_resources)) - elif gfs_cyc == 0 and dohybvar in ['Y', 'YES'] and eupd_cyc in ['BOTH', 'GFS']: - xmlfile.append(dict_gfs_resources['gfsprep']) - - xmlfile.append(workflow_header) - - xmlfile.append(dict_to_strings(dict_gdas_tasks)) - - if dohybvar in ['Y', 'YES']: - xmlfile.append(dict_to_strings(dict_hyb_tasks)) - - if gfs_cyc != 0: - xmlfile.append(dict_to_strings(dict_gfs_tasks)) - elif gfs_cyc == 0 and dohybvar in ['Y', 'YES'] and eupd_cyc in ['BOTH', 'GFS']: - xmlfile.append(dict_gfs_tasks['gfsprep']) - xmlfile.append('\n') - - xmlfile.append(workflow_footer) - - # Write the XML file - fh = open(f'{base["EXPDIR"]}/{base["PSLOT"]}.xml', 'w') - fh.write(''.join(xmlfile)) - fh.close() - - return - - -if __name__ == '__main__': - main() - sys.exit(0) diff --git a/ush/rocoto/setup_workflow_fcstonly.py b/ush/rocoto/setup_workflow_fcstonly.py deleted file mode 100755 index 430560f9d95..00000000000 --- a/ush/rocoto/setup_workflow_fcstonly.py +++ /dev/null @@ -1,896 +0,0 @@ -#!/usr/bin/env python3 - -''' - PROGRAM: - Create the ROCOTO workflow for a forecast only experiment given the configuration of the GFS parallel - - AUTHOR: - Rahul.Mahajan - rahul.mahajan@noaa.gov - - FILE DEPENDENCIES: - 1. config files for the parallel; e.g. config.base, config.fcst[.gfs], etc. - Without this dependency, the script will fail - - OUTPUT: - 1. PSLOT.xml: XML workflow - 2. PSLOT.crontab: crontab for ROCOTO run command - -''' - -import os -import sys -import re -import numpy as np -from datetime import datetime -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -import rocoto -import workflow_utils as wfu - -taskplan = ['getic', 'init', 'coupled_ic', 'aerosol_init', 'waveinit', 'waveprep', 'fcst', 'post', 'wavepostsbs', 'wavepostbndpnt', 'wavepostbndpntbll', 'wavepostpnt', 'wavegempak', 'waveawipsbulls', 'waveawipsgridded', 'wafs', 'wafsgrib2', 'wafsblending', 'wafsgcip', 'wafsgrib20p25', 'wafsblending0p25', 'postsnd', 'gempak', 'awips', 'vrfy', 'metp', 'arch', 'ocnpost'] - -def main(): - parser = ArgumentParser(description='Setup XML workflow and CRONTAB for a forecast only experiment.', formatter_class=ArgumentDefaultsHelpFormatter) - parser.add_argument('--expdir',help='full path to experiment directory containing config files', type=str, required=False, default=os.environ['PWD']) - parser.add_argument('--cdump',help='cycle to run forecasts', type=str, choices=['gdas', 'gfs'], default='gfs', required=False) - - args = parser.parse_args() - - configs = wfu.get_configs(args.expdir) - - _base = wfu.config_parser([wfu.find_config('config.base', configs)]) - - if not os.path.samefile(args.expdir,_base['EXPDIR']): - print('MISMATCH in experiment directories!') - print(f'''config.base: EXPDIR = {repr(_base['EXPDIR'])}''') - print(f'input arg: --expdir = {repr(args.expdir)}') - sys.exit(1) - - dict_configs = wfu.source_configs(configs, taskplan) - - dict_configs['base']['CDUMP'] = args.cdump - - # First create workflow XML - create_xml(dict_configs) - - # Next create the crontab - wfu.create_crontab(dict_configs['base']) - - return - - -def get_preamble(): - ''' - Generate preamble for XML - ''' - - strings = [] - - strings.append('\n') - strings.append('\n') - - return ''.join(strings) - - -def get_definitions(base): - ''' - Create entities related to the experiment - ''' - - machine = base.get('machine', wfu.detectMachine()) - scheduler = wfu.get_scheduler(machine) - hpssarch = base.get('HPSSARCH', 'NO').upper() - - strings = [] - - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - if base['INTERVAL'] is None: - print('cycle INTERVAL cannot be None') - sys.exit(1) - strings.append(f'''\t\n''') - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - if scheduler in ['slurm']: - strings.append(f'''\t\n''') - strings.append(f'''\t\n''') - strings.append(f'\t\n') - strings.append('\n') - strings.append('\t\n') - strings.append(f'''\t\n''') - strings.append('\n') - strings.append('\t\n') - strings.append('\t\n') - strings.append('\t\n') - strings.append('\t\n') - strings.append('\n') - - return ''.join(strings) - - -def get_resources(dict_configs, cdump='gdas'): - ''' - Create resource entities - ''' - - strings = [] - - strings.append('\t\n') - strings.append('\n') - - base = dict_configs['base'] - machine = base.get('machine', wfu.detectMachine()) - reservation = base.get('RESERVATION', 'NONE').upper() - scheduler = wfu.get_scheduler(machine) - - do_wave = base.get('DO_WAVE', 'NO').upper() - do_bufrsnd = base.get('DO_BUFRSND', 'NO').upper() - do_gempak = base.get('DO_GEMPAK', 'NO').upper() - do_awips = base.get('DO_AWIPS', 'NO').upper() - do_metp = base.get('DO_METP', 'NO').upper() - - for task in taskplan: - - cfg = dict_configs[task] - - wtimestr, resstr, queuestr, memstr, natstr = wfu.get_resources(machine, cfg, task, reservation, cdump=cdump) - - taskstr = f'{task.upper()}_{cdump.upper()}' - - strings.append(f'\t\n') - if scheduler in ['slurm']: - if task in ['getic', 'arch']: - strings.append(f'\t\n') - else: - strings.append(f'\t\n') - - strings.append(f'\t\n') - strings.append(f'\t\n') - if len(memstr) != 0: - strings.append(f'\t\n') - strings.append(f'\t\n') - - strings.append('\n') - - strings.append('\t\n') - - return ''.join(strings) - - -def get_postgroups(post, cdump='gdas'): - - fhmin = post['FHMIN'] - fhmax = post['FHMAX'] - fhout = post['FHOUT'] - - # Get a list of all forecast hours - if cdump in ['gdas']: - fhrs = list(range(fhmin, fhmax + fhout, fhout)) - elif cdump in ['gfs']: - fhmax = np.max([post['FHMAX_GFS_00'], post['FHMAX_GFS_06'], post['FHMAX_GFS_12'], post['FHMAX_GFS_18']]) - fhout = post['FHOUT_GFS'] - fhmax_hf = post['FHMAX_HF_GFS'] - fhout_hf = post['FHOUT_HF_GFS'] - fhrs_hf = list(range(fhmin, fhmax_hf + fhout_hf, fhout_hf)) - fhrs = fhrs_hf + list(range(fhrs_hf[-1] + fhout, fhmax + fhout, fhout)) - - npostgrp = post['NPOSTGRP'] - ngrps = npostgrp if len(fhrs) > npostgrp else len(fhrs) - - fhrs = [f'f{f:03d}' for f in fhrs] - fhrs = np.array_split(fhrs, ngrps) - fhrs = [f.tolist() for f in fhrs] - - fhrgrp = ' '.join([f'_{f[0]}-{f[-1]}' for f in fhrs]) - fhrdep = ' '.join([f[-1] for f in fhrs]) - fhrlst = ' '.join(['_'.join(f) for f in fhrs]) - - return fhrgrp, fhrdep, fhrlst - - -def get_workflow(dict_configs, cdump='gdas'): - ''' - Create tasks for forecast only workflow - ''' - - envars = [] - envars.append(rocoto.create_envar(name='RUN_ENVIR', value='&RUN_ENVIR;')) - envars.append(rocoto.create_envar(name='HOMEgfs', value='&HOMEgfs;')) - envars.append(rocoto.create_envar(name='EXPDIR', value='&EXPDIR;')) - envars.append(rocoto.create_envar(name='CDATE', value='@Y@m@d@H')) - envars.append(rocoto.create_envar(name='CDUMP', value='&CDUMP;')) - envars.append(rocoto.create_envar(name='PDY', value='@Y@m@d')) - envars.append(rocoto.create_envar(name='cyc', value='@H')) - - base = dict_configs['base'] - machine = base.get('machine', wfu.detectMachine()) - hpssarch = base.get('HPSSARCH', 'NO').upper() - app = base.get('APP', "ATM").upper() - do_wave = base.get('DO_WAVE', 'NO').upper() - do_ocean = base.get('DO_OCN', 'NO').upper() - do_ice = base.get('DO_ICE', 'NO').upper() - do_aero = base.get('DO_AERO', 'NO').upper() - do_wave_cdump = base.get('WAVE_CDUMP', 'BOTH').upper() - if do_wave in ['YES']: - do_wave_bnd = dict_configs['wavepostsbs'].get('DOBNDPNT_WAVE', "YES").upper() - do_bufrsnd = base.get('DO_BUFRSND', 'NO').upper() - do_gempak = base.get('DO_GEMPAK', 'NO').upper() - do_awips = base.get('DO_AWIPS', 'NO').upper() - do_wafs = base.get('WAFSF', 'NO').upper() - do_vrfy = base.get('DO_VRFY', 'YES').upper() - do_metp = base.get('DO_METP', 'NO').upper() - n_tiles = 6 - - tasks = [] - - if 'S2S' in app: - # Copy prototype ICs - deps = [] - base_cplic = dict_configs['coupled_ic']['BASE_CPLIC'] - - # ATM ICs - for file in ['gfs_ctrl.nc'] + [f'{datatype}_data.tile{tile_index}.nc' for datatype in ['gfs', 'sfc'] for tile_index in range(1, n_tiles + 1)]: - data = f"{base_cplic}/{dict_configs['coupled_ic'][f'CPL_ATMIC']}/@Y@m@d@H/&CDUMP;/{base.get('CASE','C384')}/INPUT/{file}" - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - - # Ocean ICs - if do_ocean in ["YES"]: - ocn_res = base.get('OCNRES', '025') - for res in ['res'] + [f'res_{res_index}' for res_index in range(1, 5)]: - data = f"{base_cplic}/{dict_configs['coupled_ic'][f'CPL_OCNIC']}/@Y@m@d@H/ocn/{ocn_res:03d}/MOM.{res}.nc" - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - - # Ice ICs - if do_ice in ["YES"]: - ice_res = base.get('ICERES', '025') - ice_res_dec = f'{float(ice_res)/100:.2f}' - data = f"{base_cplic}/{dict_configs['coupled_ic'][f'CPL_ICEIC']}/@Y@m@d@H/ice/{ice_res:03d}/cice5_model_{ice_res_dec}.res_@Y@m@d@H.nc" - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - - # Wave ICs - if do_wave in ["YES"]: - for wave_grid in dict_configs['waveinit']['waveGRD'].split(): - data = f"{base_cplic}/{dict_configs['coupled_ic'][f'CPL_WAVIC']}/@Y@m@d@H/wav/{wave_grid}/@Y@m@d.@H0000.restart.{wave_grid}" - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('coupled_ic', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - else: - if hpssarch in ['YES']: - deps = [] - data = '&ROTDIR;/&CDUMP;.@Y@m@d/@H/atmos/INPUT/sfc_data.tile6.nc' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = '&ROTDIR;/&CDUMP;.@Y@m@d/@H/atmos/RESTART/@Y@m@d.@H0000.sfcanl_data.tile6.nc' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='nor', dep=deps) - - task = wfu.create_wf_task('getic', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # init - deps = [] - data = '&ROTDIR;/&CDUMP;.@Y@m@d/@H/gfs.t@Hz.sanl' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = '&ROTDIR;/&CDUMP;.@Y@m@d/@H/gfs.t@Hz.atmanl.nemsio' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = '&ROTDIR;/&CDUMP;.@Y@m@d/@H/gfs.t@Hz.atmanl.nc' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = '&ROTDIR;/&CDUMP;.@Y@m@d/@H/atmos/gfs.t@Hz.atmanl.nc' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = '&ROTDIR;/&CDUMP;.@Y@m@d/@H/atmos/RESTART/@Y@m@d.@H0000.sfcanl_data.tile6.nc' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='or', dep=deps) - - if hpssarch in ['YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}getic'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies2 = rocoto.create_dependency(dep=deps) - - deps = [] - deps.append(dependencies) - if hpssarch in ['YES']: - deps.append(dependencies2) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - - task = wfu.create_wf_task('init', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # waveinit - if do_wave in ['Y', 'YES'] and do_wave_cdump in ['GFS', 'BOTH']: - task = wfu.create_wf_task('waveinit', cdump=cdump, envar=envars) - tasks.append(task) - tasks.append('\n') - - # waveprep - if do_wave in ['Y', 'YES'] and do_wave_cdump in ['GFS', 'BOTH'] and app in ['ATMW']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}waveinit'} - deps.append(rocoto.add_dependency(dep_dict)) - if 'S2S' not in app: - dep_dict = {'type': 'task', 'name': f'{cdump}init'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('waveprep', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # aerosol_init - if do_aero in ['Y', 'YES']: - deps = [] - if app in ['S2S', 'S2SW']: - dep_dict = {'type': 'task', 'name': 'coupled_ic'} - else: - dep_dict = {'type': 'task', 'name': f'{cdump}init'} - - deps.append(rocoto.add_dependency(dep_dict)) - - # Files from current cycle - files = ['gfs_ctrl.nc'] + [f'gfs_data.tile{tile_index}.nc' for tile_index in range(1, n_tiles + 1)] - for file in files: - data = f'&ROTDIR;/&CDUMP;.@Y@m@d/@H/atmos/INPUT/{file}' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - - # previous cycle - dep_dict = {'type': 'cycleexist', 'offset': f'-{base["INTERVAL"]}'} - deps.append(rocoto.add_dependency(dep_dict)) - - # Files from previous cycle - files = [f'@Y@m@d.@H0000.fv_core.res.nc'] + \ - [f'@Y@m@d.@H0000.fv_core.res.tile{tile_index}.nc' for tile_index in range(1, n_tiles + 1)] + \ - [f'@Y@m@d.@H0000.fv_tracer.res.tile{tile_index}.nc' for tile_index in range(1, n_tiles + 1)] - - for file in files: - data = ['&ROTDIR;/&CDUMP;.@Y@m@d/@H/atmos/RERUN_RESTART/', file] - offset = [f'-{base["INTERVAL"]}', None] - dep_dict = {'type': 'data', 'data': data, 'offset': offset} - deps.append(rocoto.add_dependency(dep_dict)) - - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('aerosol_init', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # fcst - deps = [] - data = '&ROTDIR;/&CDUMP;.@Y@m@d/@H/atmos/INPUT/sfc_data.tile6.nc' - dep_dict = {'type':'data', 'data':data} - deps.append(rocoto.add_dependency(dep_dict)) - data = '&ROTDIR;/&CDUMP;.@Y@m@d/@H/atmos/RESTART/@Y@m@d.@H0000.sfcanl_data.tile6.nc' - dep_dict = {'type':'data', 'data':data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='or', dep=deps) - - if do_wave in ['Y', 'YES'] and do_wave_cdump in ['GFS', 'BOTH']: - deps = [] - if app in ['ATMW']: - dep_dict = {'type': 'task', 'name': f'{cdump}waveprep'} - else: - dep_dict = {'type': 'task', 'name': f'{cdump}waveinit'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies2 = rocoto.create_dependency(dep_condition='and', dep=deps) - - if do_aero in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}aerosol_init'} - deps.append(rocoto.add_dependency(dep_dict)) - deps2 = [] - dep_dict = {'type': 'cycleexist', 'offset': f'-{base["INTERVAL"]}'} - deps2.append(rocoto.add_dependency(dep_dict)) - deps.append(rocoto.create_dependency(dep_condition='not', dep=deps2)) - dependencies3 = rocoto.create_dependency(dep_condition='or', dep=deps) - - deps = [] - deps.append(dependencies) - if do_wave in ['Y', 'YES'] and do_wave_cdump in ['GFS', 'BOTH']: - deps.append(dependencies2) - if do_aero in ['Y', 'YES']: - deps.append(dependencies3) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - - task = wfu.create_wf_task('fcst', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # post - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.log#dep#.txt' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - fhrgrp = rocoto.create_envar(name='FHRGRP', value='#grp#') - fhrlst = rocoto.create_envar(name='FHRLST', value='#lst#') - ROTDIR = rocoto.create_envar(name='ROTDIR', value='&ROTDIR;') - postenvars = envars + [fhrgrp] + [fhrlst] + [ROTDIR] - varname1, varname2, varname3 = 'grp', 'dep', 'lst' - varval1, varval2, varval3 = get_postgroups(dict_configs['post'], cdump=cdump) - vardict = {varname2: varval2, varname3: varval3} - task = wfu.create_wf_task('post', cdump=cdump, envar=postenvars, dependency=dependencies, - metatask='post', varname=varname1, varval=varval1, vardict=vardict) - tasks.append(task) - tasks.append('\n') - - # wavepostsbs - if do_wave in ['Y', 'YES'] and do_wave_cdump in ['GFS', 'BOTH']: - deps = [] - for wave_grid in dict_configs['wavepostsbs']['waveGRD'].split(): - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/wave/rundata/{cdump}wave.out_grd.{wave_grid}.@Y@m@d.@H0000' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wavepostsbs', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # wavepostbndpnt - if do_wave in ['Y', 'YES'] and do_wave_bnd in ['YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}fcst'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('wavepostbndpnt', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # wavepostbndpntbll - if do_wave in ['Y', 'YES'] and do_wave_bnd in ['YES']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.logf180.txt' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('wavepostbndpntbll', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # wavepostpnt - if do_wave in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}fcst'} - deps.append(rocoto.add_dependency(dep_dict)) - if do_wave_bnd in ['YES']: - dep_dict = {'type': 'task', 'name': f'{cdump}wavepostbndpntbll'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wavepostpnt', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # wavegempak - if do_wave in ['Y', 'YES'] and do_gempak in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}wavepostsbs'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('wavegempak', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # waveawipsbulls - if do_wave in ['Y', 'YES'] and do_awips in ['Y', 'YES']: - deps = [] - dep_dict = {'type':'task', 'name':f'{cdump}wavepostsbs'} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type':'task', 'name':f'{cdump}wavepostpnt'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('waveawipsbulls', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # waveawipsgridded - if do_wave in ['Y', 'YES'] and do_awips in ['Y', 'YES']: - deps = [] - dep_dict = {'type':'task', 'name':f'{cdump}wavepostsbs'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('waveawipsgridded', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # ocnpost - if do_ocean in ['YES']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.log#dep#.txt' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - fhrgrp = rocoto.create_envar(name='FHRGRP', value='#grp#') - fhrlst = rocoto.create_envar(name='FHRLST', value='#lst#') - ROTDIR = rocoto.create_envar(name='ROTDIR', value='&ROTDIR;') - postenvars = envars + [fhrgrp] + [fhrlst] + [ROTDIR] - varname1, varname2, varname3 = 'grp', 'dep', 'lst' - varval1, varval2, varval3 = get_postgroups(dict_configs['ocnpost'], cdump=cdump) - vardict = {varname2: varval2, varname3: varval3} - task = wfu.create_wf_task('ocnpost', cdump=cdump, envar=postenvars, dependency=dependencies, - metatask='ocnpost', varname=varname1, varval=varval1, vardict=vardict) - tasks.append(task) - tasks.append('\n') - - # wafs - if do_wafs in ['Y', 'YES']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if006' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if012' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if015' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if018' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if021' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if024' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if027' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if030' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if033' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if036' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wafs', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # wafsgcip - if do_wafs in ['Y', 'YES']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if006' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if012' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if015' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if018' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if021' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if024' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if027' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if030' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if033' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if036' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wafsgcip', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # wafsgrib2 - if do_wafs in ['Y', 'YES']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if006' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if012' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if015' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if018' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if021' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if024' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if027' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if030' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if033' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if036' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wafsgrib2', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # wafsgrib20p25 - if do_wafs in ['Y', 'YES']: - deps = [] - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if006' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if012' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if015' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if018' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if021' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if024' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if027' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if030' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if033' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - data = f'&ROTDIR;/{cdump}.@Y@m@d/@H/atmos/{cdump}.t@Hz.wafs.grb2if036' - dep_dict = {'type': 'data', 'data': data} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('wafsgrib20p25', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # wafsblending - if do_wafs in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}wafsgrib2'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('wafsblending', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # wafsblending0p25 - if do_wafs in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}wafsgrib20p25'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('wafsblending0p25', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - #postsnd - if do_bufrsnd in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'task', 'name': f'{cdump}fcst'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('postsnd', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # awips - if do_awips in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'metatask', 'name': f'{cdump}post'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - fhrgrp = rocoto.create_envar(name='FHRGRP', value='#grp#') - fhrlst = rocoto.create_envar(name='FHRLST', value='#lst#') - ROTDIR = rocoto.create_envar(name='ROTDIR', value='&ROTDIR;') - awipsenvars = envars + [fhrgrp] + [fhrlst] + [ROTDIR] - varname1, varname2, varname3 = 'grp', 'dep', 'lst' - varval1, varval2, varval3 = get_awipsgroups(dict_configs['awips'], cdump=cdump) - vardict = {varname2: varval2, varname3: varval3} - task = wfu.create_wf_task('awips', cdump=cdump, envar=awipsenvars, dependency=dependencies, - metatask='awips', varname=varname1, varval=varval1, vardict=vardict) - tasks.append(task) - tasks.append('\n') - - # gempak - if do_gempak in ['Y', 'YES']: - deps = [] - dep_dict = {'type': 'metatask', 'name': f'{cdump}post'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('gempak', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # vrfy - if do_vrfy in ['Y', 'YES']: - deps = [] - dep_dict = {'type':'metatask', 'name':f'{cdump}post'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep=deps) - task = wfu.create_wf_task('vrfy', cdump=cdump, envar=envars, dependency=dependencies) - tasks.append(task) - tasks.append('\n') - - # metp - if do_metp in ['Y', 'YES']: - deps = [] - dep_dict = {'type':'metatask', 'name':f'{cdump}post'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - sdate_gfs = rocoto.create_envar(name='SDATE_GFS', value='&SDATE;') - metpcase = rocoto.create_envar(name='METPCASE', value='#metpcase#') - metpenvars = envars + [sdate_gfs] + [metpcase] - varname1 = 'metpcase' - varval1 = 'g2g1 g2o1 pcp1' - task = wfu.create_wf_task('metp', cdump=cdump, envar=metpenvars, dependency=dependencies, - metatask='metp', varname=varname1, varval=varval1) - tasks.append(task) - tasks.append('\n') - - # arch - deps = [] - dep_dict = {'type':'metatask', 'name':f'{cdump}post'} - deps.append(rocoto.add_dependency(dep_dict)) - if do_vrfy in ['Y', 'YES']: - dep_dict = {'type':'task', 'name':f'{cdump}vrfy'} - deps.append(rocoto.add_dependency(dep_dict)) - if cdump in ['gfs'] and do_metp in ['Y', 'YES']: - dep_dict = {'type':'metatask', 'name':f'{cdump}metp'} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type':'streq', 'left':'&ARCHIVE_TO_HPSS;', 'right':f'{hpssarch}'} - deps.append(rocoto.add_dependency(dep_dict)) - if do_wave in ['Y', 'YES']: - dep_dict = {'type': 'task', 'name': f'{cdump}wavepostsbs'} - deps.append(rocoto.add_dependency(dep_dict)) - dep_dict = {'type': 'task', 'name': f'{cdump}wavepostpnt'} - deps.append(rocoto.add_dependency(dep_dict)) - if do_wave_bnd in ['YES']: - dep_dict = {'type': 'task', 'name': f'{cdump}wavepostbndpnt'} - deps.append(rocoto.add_dependency(dep_dict)) - if do_ocean in ['Y', 'YES']: - dep_dict = {'type': 'metatask', 'name': f'{cdump}ocnpost'} - deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) - task = wfu.create_wf_task('arch', cdump=cdump, envar=envars, dependency=dependencies, final=True) - tasks.append(task) - tasks.append('\n') - - return ''.join(tasks) - - -def get_workflow_body(dict_configs, cdump='gdas'): - ''' - Create the workflow body - ''' - - strings = [] - - strings.append('\n') - strings.append(']>\n') - strings.append('\n') - strings.append('\n') - strings.append('\n') - strings.append('\t&EXPDIR;/logs/@Y@m@d@H.log\n') - strings.append('\n') - strings.append('\t\n') - strings.append(f'\t&SDATE; &EDATE; &INTERVAL;\n') - strings.append('\n') - strings.append(get_workflow(dict_configs, cdump=cdump)) - strings.append('\n') - strings.append('\n') - - return ''.join(strings) - - -def create_xml(dict_configs): - ''' - Given an experiment directory containing config files and - XML directory containing XML templates, create the workflow XML - ''' - - - dict_configs['base']['INTERVAL'] = wfu.get_gfs_interval(dict_configs['base']['gfs_cyc']) - base = dict_configs['base'] - - preamble = get_preamble() - definitions = get_definitions(base) - resources = get_resources(dict_configs, cdump=base['CDUMP']) - workflow = get_workflow_body(dict_configs, cdump=base['CDUMP']) - - # Removes &MEMORY_JOB_DUMP post mortem from gdas tasks - temp_workflow = '' - memory_dict = [] - for each_resource_string in re.split(r'(\s+)', resources): - if 'MEMORY' in each_resource_string: - memory_dict.append(each_resource_string) - for each_line in re.split(r'(\s+)', workflow): - if 'MEMORY' not in each_line: - temp_workflow += each_line - else: - if any( substring in each_line for substring in memory_dict): - temp_workflow += each_line - workflow = temp_workflow - - # Start writing the XML file - fh = open(f'{base["EXPDIR"]}/{base["PSLOT"]}.xml', 'w') - - fh.write(preamble) - fh.write(definitions) - fh.write(resources) - fh.write(workflow) - - fh.close() - - return - -if __name__ == '__main__': - main() - sys.exit(0) diff --git a/ush/rocoto/setup_xml.py b/ush/rocoto/setup_xml.py new file mode 100755 index 00000000000..74d1b4eed27 --- /dev/null +++ b/ush/rocoto/setup_xml.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +""" +Entry point for setting up Rocoto XML for all applications in global-workflow +""" + +import os +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + +from configuration import Configuration +from applications import AppConfig +from workflow_xml import RocotoXML + + +def input_args(): + """ + Method to collect user arguments for `setup_xml.py` + """ + + description = """ + Sources configuration files based on application and + creates "$PSLOT.xml" for use with Rocoto. + """ + + parser = ArgumentParser(description=description, + formatter_class=ArgumentDefaultsHelpFormatter) + + # Common arguments across all modes + parser.add_argument('expdir', help='full path to experiment directory containing config files', + type=str, default=os.environ['PWD']) + + args = parser.parse_args() + + return args + + +def check_expdir(cmd_expdir, cfg_expdir): + + if not os.path.samefile(cmd_expdir, cfg_expdir): + print('MISMATCH in experiment directories!') + print(f'config.base: EXPDIR = {cfg_expdir}') + print(f' input arg: --expdir = {cmd_expdir}') + raise ValueError('Abort!') + + +if __name__ == '__main__': + + user_inputs = input_args() + + cfg = Configuration(user_inputs.expdir) + + check_expdir(user_inputs.expdir, cfg.parse_config('config.base')['EXPDIR']) + + # Configure the application + app_config = AppConfig(cfg) + + # Create Rocoto Tasks and Assemble them into an XML + xml = RocotoXML(app_config) + xml.write() diff --git a/ush/rocoto/test_configuration.py b/ush/rocoto/test_configuration.py new file mode 100644 index 00000000000..f210ceefa4e --- /dev/null +++ b/ush/rocoto/test_configuration.py @@ -0,0 +1,30 @@ +import sys +from configuration import Configuration + + +expdir = sys.argv[1] + +cfg = Configuration(expdir) + +print(f'experiment dir: {cfg.config_dir}') + +print('configuration files ...') +line_separator = '\n' # \escapes are not allowed inside f-strings +print(f'{line_separator.join(cfg.config_files)}') + +print(f'config.base: {cfg.find_config("config.base")}') + +print('*'*80) +print('config.base ...') +base = cfg.parse_config('config.base') +cfg.print_config('config.base') + +print('*'*80) +print('config.anal...') +cfg.print_config(['config.base', 'config.anal']) + + +print('*'*80) +print('config.efcs ...') +configs = ['config.base', 'config.fcst', 'config.efcs'] +cfg.print_config(configs) diff --git a/ush/rocoto/test_hosts.py b/ush/rocoto/test_hosts.py new file mode 100644 index 00000000000..b9fa969e143 --- /dev/null +++ b/ush/rocoto/test_hosts.py @@ -0,0 +1,17 @@ +from hosts import Host + +print(f'supported hosts are: {", ".join(Host.SUPPORTED_HOSTS)}') + +print(f'host detected as: {Host.detect}') +print(f'scheduler on host: {Host().scheduler}') + +print('initializing host ...') +host = Host() + +print(f'hostname: {host.machine}') + +print(f'scheduler on host: {host.scheduler}') + +print('host information ...') +line_separator = '\n' # \escapes are not allowed inside f-strings +print(f'{line_separator.join(f"{key}: {host.info[key]}" for key in host.info.keys())}') diff --git a/ush/rocoto/workflow_tasks.py b/ush/rocoto/workflow_tasks.py new file mode 100644 index 00000000000..b455ceb8871 --- /dev/null +++ b/ush/rocoto/workflow_tasks.py @@ -0,0 +1,1101 @@ +#!/usr/bin/env python3 + +import numpy as np +import rocoto +from applications import AppConfig +from typing import List + +__all__ = ['Tasks', 'create_wf_task', 'get_wf_tasks'] + + +class Tasks: + SERVICE_TASKS = ['arch', 'earc', 'getic'] + VALID_TASKS = ['aerosol_init', 'coupled_ic', 'getic', 'init', + 'prep', 'anal', 'sfcanl', 'analcalc', 'analdiag', 'gldas', 'arch', + 'earc', 'ecen', 'echgres', 'ediag', 'efcs', + 'eobs', 'eomg', 'epos', 'esfc', 'eupd', + 'fcst', 'post', 'ocnpost', 'vrfy', 'metp', + 'postsnd', 'awips', 'gempak', + 'wafs', 'wafsblending', 'wafsblending0p25', + 'wafsgcip', 'wafsgrib2', 'wafsgrib20p25', + 'waveawipsbulls', 'waveawipsgridded', 'wavegempak', 'waveinit', + 'wavepostbndpnt', 'wavepostbndpntbll', 'wavepostpnt', 'wavepostsbs', 'waveprep'] + + def __init__(self, app_config: AppConfig, cdump: str) -> None: + + self.app_config = app_config + self.cdump = cdump + + # Save dict_configs and base in the internal state (never know where it may be needed) + self._configs = self.app_config.configs + self._base = self._configs['base'] + + self.n_tiles = 6 # TODO - this needs to be elsewhere + + envar_dict = {'RUN_ENVIR': self._base.get('RUN_ENVIR', 'emc'), + 'HOMEgfs': self._base.get('HOMEgfs'), + 'EXPDIR': self._base.get('EXPDIR'), + 'CDUMP': self.cdump, + 'CDATE': '@Y@m@d@H', + 'PDY': '@Y@m@d', + 'cyc': '@H'} + self.envars = self._set_envars(envar_dict) + + @staticmethod + def _set_envars(envar_dict) -> list: + + envars = [] + for key, value in envar_dict.items(): + envars.append(rocoto.create_envar(name=key, value=str(value))) + + return envars + + @staticmethod + def _get_hybgroups(nens: int, nmem_per_group: int, start_index: int = 1): + ngrps = nens / nmem_per_group + groups = ' '.join([f'{x:02d}' for x in range(start_index, int(ngrps) + 1)]) + return groups + + @staticmethod + def _is_this_a_gdas_task(cdump, task_name): + if cdump != 'gdas': + raise TypeError(f'{task_name} must be part of the "gdas" cycle and not {cdump}') + + def get_resource(self, task_name): + """ + Given a task name (task_name) and its configuration (task_names), + return a dictionary of resources (task_resource) used by the task. + Task resource dictionary includes: + account, walltime, cores, nodes, ppn, threads, memory, queue, partition, native + """ + + scheduler = self.app_config.scheduler + + task_config = self._configs[task_name] + + account = task_config['ACCOUNT'] + + walltime = task_config[f'wtime_{task_name}'] + if self.cdump in ['gfs'] and f'wtime_{task_name}_gfs' in task_config.keys(): + walltime = task_config[f'wtime_{task_name}_gfs'] + + cores = task_config[f'npe_{task_name}'] + if self.cdump in ['gfs'] and f'npe_{task_name}_gfs' in task_config.keys(): + cores = task_config[f'npe_{task_name}_gfs'] + + ppn = task_config[f'npe_node_{task_name}'] + if self.cdump in ['gfs'] and f'npe_node_{task_name}_gfs' in task_config.keys(): + ppn = task_config[f'npe_node_{task_name}_gfs'] + + nodes = np.int(np.ceil(np.float(cores) / np.float(ppn))) + + threads = task_config[f'nth_{task_name}'] + if self.cdump in ['gfs'] and f'nth_{task_name}_gfs' in task_config.keys(): + threads = task_config[f'nth_{task_name}_gfs'] + + memory = task_config.get(f'memory_{task_name}', None) + + native = '--export=NONE' if scheduler in ['slurm'] else None + + queue = task_config['QUEUE'] + if task_name in Tasks.SERVICE_TASKS and scheduler not in ['slurm']: + queue = task_config['QUEUE_SERVICE'] + + partition = None + if scheduler in ['slurm']: + partition = task_config['QUEUE_SERVICE'] if task_name in Tasks.SERVICE_TASKS else task_config[ + 'PARTITION_BATCH'] + + task_resource = {'account': account, + 'walltime': walltime, + 'nodes': nodes, + 'cores': cores, + 'ppn': ppn, + 'threads': threads, + 'memory': memory, + 'native': native, + 'queue': queue, + 'partition': partition} + + return task_resource + + def get_task(self, task_name, *args, **kwargs): + """ + Given a task_name, call the method for that task + """ + try: + return getattr(self, task_name, *args, **kwargs)() + except AttributeError: + raise AttributeError(f'"{task_name}" is not a valid task.\n' + + 'Valid tasks are:\n' + + f'{", ".join(Tasks.VALID_TASKS)}') + + # Specific Tasks begin here + def coupled_ic(self): + + cpl_ic = self._configs['coupled_ic'] + + deps = [] + + # Atm ICs + atm_res = self._base.get('CASE', 'C384') + prefix = f"{cpl_ic['BASE_CPLIC']}/{cpl_ic['CPL_ATMIC']}/@Y@m@d@H/{self.cdump}" + for file in ['gfs_ctrl.nc'] + \ + [f'{datatype}_data.tile{tile}.nc' + for datatype in ['gfs', 'sfc'] + for tile in range(1, self.n_tiles + 1)]: + data = f"{prefix}/{atm_res}/INPUT/{file}" + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + + # Ocean ICs + if self.app_config.do_ocean: + ocn_res = f"{self._base.get('OCNRES', '025'):03d}" + prefix = f"{cpl_ic['BASE_CPLIC']}/{cpl_ic['CPL_OCNIC']}/@Y@m@d@H/ocn" + for res in ['res'] + [f'res_{res_index}' for res_index in range(1, 5)]: + data = f"{prefix}/{ocn_res}/MOM.{res}.nc" + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + + # Ice ICs + if self.app_config.do_ice: + ice_res = f"{self._base.get('ICERES', '025'):03d}" + ice_res_dec = f'{float(ice_res) / 100:.2f}' + prefix = f"{cpl_ic['BASE_CPLIC']}/{cpl_ic['CPL_ICEIC']}/@Y@m@d@H/ice" + data = f"{prefix}/{ice_res}/cice5_model_{ice_res_dec}.res_@Y@m@d@H.nc" + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + + # Wave ICs + if self.app_config.do_wave: + prefix = f"{cpl_ic['BASE_CPLIC']}/{cpl_ic['CPL_WAVIC']}/@Y@m@d@H/wav" + for wave_grid in self._configs['waveinit']['waveGRD'].split(): + data = f"{prefix}/{wave_grid}/@Y@m@d.@H0000.restart.{wave_grid}" + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('coupled_ic') + task = create_wf_task('coupled_ic', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def getic(self): + + files = ['INPUT/sfc_data.tile6.nc', + 'RESTART/@Y@m@d.@H0000.sfcanl_data.tile6.nc'] + + deps = [] + for file in files: + dep_dict = {'type': 'data', 'data': f'&ROTDIR;/{self.cdump}.@Y@m@d/@H/{file}'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='nor', dep=deps) + + resources = self.get_resource('getic') + task = create_wf_task('getic', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def init(self): + + files = ['gfs.t@Hz.sanl', + 'gfs.t@Hz.atmanl.nemsio', + 'gfs.t@Hz.atmanl.nc', + 'atmos/gfs.t@Hz.atmanl.nc', + 'atmos/RESTART/@Y@m@d.@H0000.sfcanl_data.tile6.nc'] + + deps = [] + for file in files: + dep_dict = {'type': 'data', 'data': f'&ROTDIR;/{self.cdump}.@Y@m@d/@H/{file}'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='or', dep=deps) + + if self.app_config.do_hpssarch: + dep_dict = {'type': 'task', 'name': f'{self.cdump}getic'} + dependencies.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=dependencies) + + resources = self.get_resource('init') + task = create_wf_task('init', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def prep(self): + + suffix = self._base["SUFFIX"] + dump_suffix = self._base["DUMP_SUFFIX"] + gfs_cyc = self._base["gfs_cyc"] + dmpdir = self._base["DMPDIR"] + gfs_enkf = True if self.app_config.do_hybvar and 'gfs' in self.app_config.eupd_cdumps else False + + deps = [] + dep_dict = {'type': 'metatask', 'name': f'{"gdas"}post', 'offset': '-06:00:00'} + deps.append(rocoto.add_dependency(dep_dict)) + data = f'&ROTDIR;/gdas.@Y@m@d/@H/atmos/gdas.t@Hz.atmf009{suffix}' + dep_dict = {'type': 'data', 'data': data, 'offset': '-06:00:00'} + deps.append(rocoto.add_dependency(dep_dict)) + data = f'{dmpdir}/{self.cdump}{dump_suffix}.@Y@m@d/@H/{self.cdump}.t@Hz.updated.status.tm00.bufr_d' + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + cycledef = self.cdump + if self.cdump in ['gfs'] and gfs_enkf and gfs_cyc != 4: + cycledef = 'gdas' + + resources = self.get_resource('prep') + task = create_wf_task('prep', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies, + cycledef=cycledef) + + return task + + def waveinit(self): + + resources = self.get_resource('waveinit') + dependencies = None + if self.app_config.mode in ['cycled']: + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}prep'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'cycleexist', 'condition': 'not', 'offset': '-06:00:00'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='or', dep=deps) + + task = create_wf_task('waveinit', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def waveprep(self): + + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}waveinit'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('waveprep') + task = create_wf_task('waveprep', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def aerosol_init(self): + + deps = [] + # Files from current cycle + files = ['gfs_ctrl.nc'] + [f'gfs_data.tile{tile}.nc' for tile in range(1, self.n_tiles + 1)] + for file in files: + data = f'&ROTDIR;/{self.cdump}.@Y@m@d/@H/atmos/INPUT/{file}' + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + + # Calculate offset based on CDUMP = gfs | gdas + interval = None + if self.cdump in ['gfs']: + interval = self._base['INTERVAL_GFS'] + elif self.cdump in ['gdas']: + interval = self._base['INTERVAL'] + offset = f'-{interval}' + + # Previous cycle + dep_dict = {'type': 'cycleexist', 'offset': offset} + deps.append(rocoto.add_dependency(dep_dict)) + + # Files from previous cycle + files = [f'@Y@m@d.@H0000.fv_core.res.nc'] + \ + [f'@Y@m@d.@H0000.fv_core.res.tile{tile}.nc' for tile in range(1, self.n_tiles + 1)] + \ + [f'@Y@m@d.@H0000.fv_tracer.res.tile{tile}.nc' for tile in range(1, self.n_tiles + 1)] + + for file in files: + data = [f'&ROTDIR;/{self.cdump}.@Y@m@d/@H/atmos/RERUN_RESTART/', file] + dep_dict = {'type': 'data', 'data': data, 'offset': [offset, None]} + deps.append(rocoto.add_dependency(dep_dict)) + + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('aerosol_init') + task = create_wf_task('aerosol_init', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def anal(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}prep'} + deps.append(rocoto.add_dependency(dep_dict)) + if self.app_config.do_hybvar: + dep_dict = {'type': 'metatask', 'name': f'{"gdas"}epmn', 'offset': '-06:00:00'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + else: + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('anal') + task = create_wf_task('anal', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def sfcanl(self): + + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}anal'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('sfcanl') + task = create_wf_task('sfcanl', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def analcalc(self): + + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}anal'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'task', 'name': f'{self.cdump}sfcanl'} + deps.append(rocoto.add_dependency(dep_dict)) + if self.app_config.do_hybvar and self.cdump in ['gdas']: + dep_dict = {'type': 'task', 'name': f'{"gdas"}echgres', 'offset': '-06:00:00'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('analcalc') + task = create_wf_task('analcalc', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def analdiag(self): + + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}anal'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'cycleexist', 'offset': '-06:00:00'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('analdiag') + task = create_wf_task('analdiag', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def gldas(self): + + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}sfcanl'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'cycleexist', 'offset': '-06:00:00'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('gldas') + task = create_wf_task('gldas', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def fcst(self): + + fcst_map = {'forecast-only': self._fcst_forecast_only, + 'cycled': self._fcst_cycled} + + try: + task = fcst_map[self.app_config.mode] + except KeyError: + raise NotImplementedError(f'{self.app_config.mode} is not a valid type.\n' + + 'Currently supported forecast types are:\n' + + f'{" | ".join(fcst_map.keys())}') + + return task + + @property + def _fcst_forecast_only(self): + dependencies = [] + deps = [] + data = f'&ROTDIR;/{self.cdump}.@Y@m@d/@H/atmos/INPUT/sfc_data.tile6.nc' + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + data = f'&ROTDIR;/{self.cdump}.@Y@m@d/@H/atmos/RESTART/@Y@m@d.@H0000.sfcanl_data.tile6.nc' + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies.append(rocoto.create_dependency(dep_condition='or', dep=deps)) + + if self.app_config.do_wave and self.cdump in self.app_config.wave_cdumps: + wave_job = 'waveprep' if self.app_config.model_app in ['ATMW'] else 'waveinit' + dep_dict = {'type': 'task', 'name': f'{self.cdump}{wave_job}'} + dependencies.append(rocoto.add_dependency(dep_dict)) + + if self.app_config.do_aero: + # Calculate offset based on CDUMP = gfs | gdas + interval = None + if self.cdump in ['gfs']: + interval = self._base['INTERVAL_GFS'] + elif self.cdump in ['gdas']: + interval = self._base['INTERVAL'] + offset = f'-{interval}' + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}aerosol_init'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'cycleexist', 'condition': 'not', 'offset': offset} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies.append(rocoto.create_dependency(dep_condition='or', dep=deps)) + + dependencies = rocoto.create_dependency(dep_condition='and', dep=dependencies) + + resources = self.get_resource('fcst') + task = create_wf_task('fcst', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + @property + def _fcst_cycled(self): + + dep_dict = {'type': 'task', 'name': f'{self.cdump}sfcanl'} + dep = rocoto.add_dependency(dep_dict) + dependencies = rocoto.create_dependency(dep=dep) + + if self.app_config.do_gldas and self.cdump in ['gdas']: + dep_dict = {'type': 'task', 'name': f'{self.cdump}gldas'} + dependencies.append(rocoto.add_dependency(dep_dict)) + + if self.app_config.do_wave and self.cdump in self.app_config.wave_cdumps: + dep_dict = {'type': 'task', 'name': f'{self.cdump}waveprep'} + dependencies.append(rocoto.add_dependency(dep_dict)) + + dependencies = rocoto.create_dependency(dep_condition='and', dep=dependencies) + + if self.cdump in ['gdas']: + dep_dict = {'type': 'cycleexist', 'condition': 'not', 'offset': '-06:00:00'} + dependencies.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='or', dep=dependencies) + + resources = self.get_resource('fcst') + task = create_wf_task('fcst', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def post(self): + add_anl_to_post = False + if self.app_config.mode in ['cycled']: + add_anl_to_post = True + + return self._post_task('post', add_anl_to_post=add_anl_to_post) + + def ocnpost(self): + return self._post_task('ocnpost', add_anl_to_post=False) + + def _post_task(self, task_name, add_anl_to_post=False): + if task_name not in ['post', 'ocnpost']: + raise KeyError(f'Invalid post-processing task: {task_name}') + + if task_name in ['ocnpost']: + add_anl_to_post = False + + def _get_postgroups(cdump, config, add_anl=False): + + fhmin = config['FHMIN'] + fhmax = config['FHMAX'] + fhout = config['FHOUT'] + + # Get a list of all forecast hours + fhrs = [] + if cdump in ['gdas']: + fhrs = range(fhmin, fhmax + fhout, fhout) + elif cdump in ['gfs']: + fhmax = np.max( + [config['FHMAX_GFS_00'], config['FHMAX_GFS_06'], config['FHMAX_GFS_12'], config['FHMAX_GFS_18']]) + fhout = config['FHOUT_GFS'] + fhmax_hf = config['FHMAX_HF_GFS'] + fhout_hf = config['FHOUT_HF_GFS'] + fhrs_hf = range(fhmin, fhmax_hf + fhout_hf, fhout_hf) + fhrs = list(fhrs_hf) + list(range(fhrs_hf[-1] + fhout, fhmax + fhout, fhout)) + + npostgrp = config['NPOSTGRP'] + ngrps = npostgrp if len(fhrs) > npostgrp else len(fhrs) + + fhrs = [f'f{fhr:03d}' for fhr in fhrs] + fhrs = np.array_split(fhrs, ngrps) + fhrs = [fhr.tolist() for fhr in fhrs] + + anl = ['anl'] if add_anl else [] + + grp = ' '.join(anl + [f'_{fhr[0]}-{fhr[-1]}' for fhr in fhrs]) + dep = ' '.join(anl + [fhr[-1] for fhr in fhrs]) + lst = ' '.join(anl + ['_'.join(fhr) for fhr in fhrs]) + + return grp, dep, lst + + deps = [] + data = f'&ROTDIR;/{self.cdump}.@Y@m@d/@H/atmos/{self.cdump}.t@Hz.log#dep#.txt' + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'task', 'name': f'{self.cdump}fcst'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='or', dep=deps) + + postenvars = self.envars.copy() + postenvar_dict = {'FHRGRP': '#grp#', + 'FHRLST': '#lst#', + 'ROTDIR': self._base.get('ROTDIR')} + for key, value in postenvar_dict.items(): + postenvars.append(rocoto.create_envar(name=key, value=str(value))) + + varname1, varname2, varname3 = 'grp', 'dep', 'lst' + varval1, varval2, varval3 = _get_postgroups(self.cdump, self._configs[task_name], add_anl=add_anl_to_post) + vardict = {varname2: varval2, varname3: varval3} + + resources = self.get_resource(task_name) + task = create_wf_task(task_name, resources, cdump=self.cdump, envar=postenvars, dependency=dependencies, + metatask=task_name, varname=varname1, varval=varval1, vardict=vardict) + + return task + + def wavepostsbs(self): + deps = [] + for wave_grid in self._configs['wavepostsbs']['waveGRD'].split(): + data = f'&ROTDIR;/{self.cdump}.@Y@m@d/@H/wave/rundata/{self.cdump}wave.out_grd.{wave_grid}.@Y@m@d.@H0000' + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('wavepostsbs') + task = create_wf_task('wavepostsbs', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def wavepostbndpnt(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}fcst'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('wavepostbndpnt') + task = create_wf_task('wavepostbndpnt', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def wavepostbndpntbll(self): + deps = [] + data = f'&ROTDIR;/{self.cdump}.@Y@m@d/@H/atmos/{self.cdump}.t@Hz.logf180.txt' + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('wavepostbndpntbll') + task = create_wf_task('wavepostbndpntbll', resources, cdump=self.cdump, envar=self.envars, + dependency=dependencies) + + return task + + def wavepostpnt(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}fcst'} + deps.append(rocoto.add_dependency(dep_dict)) + if self.app_config.do_wave_bnd: + dep_dict = {'type': 'task', 'name': f'{self.cdump}wavepostbndpntbll'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('wavepostpnt') + task = create_wf_task('wavepostpnt', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def wavegempak(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}wavepostsbs'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('wavegempak') + task = create_wf_task('wavegempak', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def waveawipsbulls(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}wavepostsbs'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'task', 'name': f'{self.cdump}wavepostpnt'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('waveawipsbulls') + task = create_wf_task('waveawipsbulls', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def waveawipsgridded(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}wavepostsbs'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('waeawipsgridded') + task = create_wf_task('waveawipsgridded', resources, cdump=self.cdump, envar=self.envars, + dependency=dependencies) + + return task + + def wafs(self): + return self._wafs_task('wafs') + + def wafsgcip(self): + return self._wafs_task('wafsgcip') + + def wafsgrib2(self): + return self._wafs_task('wafsgrib2') + + def wafsgrib20p25(self): + return self._wafs_task('wafsgrib20p25') + + def _wafs_task(self, task_name): + if task_name not in ['wafs', 'wafsgcip', 'wafsgrib2', 'wafsgrib20p25']: + raise KeyError(f'Invalid WAFS task: {task_name}') + + deps = [] + fhrlst = [6] + [*range(12, 36 + 3, 3)] + for fhr in fhrlst: + data = f'&ROTDIR;/{self.cdump}.@Y@m@d/@H/atmos/{self.cdump}.t@Hz.wafs.grb2if{fhr:03d}' + dep_dict = {'type': 'data', 'data': data} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource(task_name) + task = create_wf_task(task_name, resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def wafsblending(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}wafsgrib2'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('wafsblending') + task = create_wf_task('wafsblending', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def wafsblending0p25(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}wafsgrib20p25'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('wafsblending0p25') + task = create_wf_task('wafsblending0p25', resources, cdump=self.cdump, envar=self.envars, + dependency=dependencies) + + return task + + def postsnd(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}fcst'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('postsnd') + task = create_wf_task('postsnd', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def awips(self): + + def _get_awipsgroups(cdump, config): + + fhmin = config['FHMIN'] + fhmax = config['FHMAX'] + fhout = config['FHOUT'] + + # Get a list of all forecast hours + fhrs = [] + if cdump in ['gdas']: + fhrs = range(fhmin, fhmax + fhout, fhout) + elif cdump in ['gfs']: + fhmax = np.max( + [config['FHMAX_GFS_00'], config['FHMAX_GFS_06'], config['FHMAX_GFS_12'], config['FHMAX_GFS_18']]) + fhout = config['FHOUT_GFS'] + fhmax_hf = config['FHMAX_HF_GFS'] + fhout_hf = config['FHOUT_HF_GFS'] + if fhmax > 240: + fhmax = 240 + if fhmax_hf > 240: + fhmax_hf = 240 + fhrs_hf = list(range(fhmin, fhmax_hf + fhout_hf, fhout_hf)) + fhrs = fhrs_hf + list(range(fhrs_hf[-1] + fhout, fhmax + fhout, fhout)) + + nawipsgrp = config['NAWIPSGRP'] + ngrps = nawipsgrp if len(fhrs) > nawipsgrp else len(fhrs) + + fhrs = [f'f{fhr:03d}' for fhr in fhrs] + fhrs = np.array_split(fhrs, ngrps) + fhrs = [fhr.tolist() for fhr in fhrs] + + grp = ' '.join([f'_{fhr[0]}-{fhr[-1]}' for fhr in fhrs]) + dep = ' '.join([fhr[-1] for fhr in fhrs]) + lst = ' '.join(['_'.join(fhr) for fhr in fhrs]) + + return grp, dep, lst + + deps = [] + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}post'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + awipsenvars = self.envars.copy() + awipsenvar_dict = {'FHRGRP': '#grp#', + 'FHRLST': '#lst#', + 'ROTDIR': self._base.get('ROTDIR')} + for key, value in awipsenvar_dict.items(): + awipsenvars.append(rocoto.create_envar(name=key, value=str(value))) + + varname1, varname2, varname3 = 'grp', 'dep', 'lst' + varval1, varval2, varval3 = _get_awipsgroups(self.cdump, self._configs['awips']) + vardict = {varname2: varval2, varname3: varval3} + + resources = self.get_resource('awips') + task = create_wf_task('awips', resources, cdump=self.cdump, envar=awipsenvars, dependency=dependencies, + metatask='awips', varname=varname1, varval=varval1, vardict=vardict) + + return task + + def gempak(self): + + deps = [] + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}post'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('gempak') + task = create_wf_task('gempak', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def vrfy(self): + deps = [] + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}post'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('vrfy') + task = create_wf_task('vrfy', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def metp(self): + deps = [] + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}post'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + metpenvars = self.envars.copy() + metpenvar_dict = {'SDATE_GFS': self._base.get('SDATE_GFS'), + # TODO - in Forecast-only, this is `SDATE` on the RHS + 'METPCASE': '#metpcase#'} + for key, value in metpenvar_dict.items(): + metpenvars.append(rocoto.create_envar(name=key, value=str(value))) + + varname1 = 'metpcase' + varval1 = 'g2g1 g2o1 pcp1' + + resources = self.get_resource('metp') + task = create_wf_task('metp', resources, cdump=self.cdump, envar=metpenvars, dependency=dependencies, + metatask='metp', varname=varname1, varval=varval1) + + return task + + def arch(self): + deps = [] + if self.app_config.do_vrfy: + dep_dict = {'type': 'task', 'name': f'{self.cdump}vrfy'} + deps.append(rocoto.add_dependency(dep_dict)) + if self.app_config.do_metp and self.cdump in ['gfs']: + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}metp'} + deps.append(rocoto.add_dependency(dep_dict)) + if self.app_config.do_wave: + dep_dict = {'type': 'task', 'name': f'{self.cdump}wavepostsbs'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'task', 'name': f'{self.cdump}wavepostpnt'} + deps.append(rocoto.add_dependency(dep_dict)) + if self.app_config.do_wave_bnd: + dep_dict = {'type': 'task', 'name': f'{self.cdump}wavepostbndpnt'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('arch') + task = create_wf_task('arch', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + # Start of ensemble tasks + def eobs(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}prep'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'metatask', 'name': f'{"gdas"}epmn', 'offset': '-06:00:00'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('eobs') + task = create_wf_task('eobs', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def eomg(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}eobs'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + eomgenvars = self.envars.copy() + eomgenvars.append(rocoto.create_envar(name='ENSGRP', value='#grp#')) + + groups = self._get_hybgroups(self._base['NMEM_ENKF'], self._configs['eobs']['NMEM_EOMGGRP']) + + resources = self.get_resource('eomg') + task = create_wf_task('eomg', resources, cdump=self.cdump, envar=eomgenvars, dependency=dependencies, + metatask='eomn', varname='grp', varval=groups) + + return task + + def ediag(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}eobs'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('ediag') + task = create_wf_task('ediag', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def eupd(self): + deps = [] + if self.app_config.lobsdiag_forenkf: + dep_dict = {'type': 'task', 'name': f'{self.cdump}ediag'} + else: + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}eomn'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + resources = self.get_resource('eupd') + task = create_wf_task('eupd', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def ecen(self): + + self._is_this_a_gdas_task(self.cdump, 'ecen') + + def _get_ecengroups(): + + if self._base.get('DOIAU_ENKF', False): + fhrs = list(self._base.get('IAUFHRS', '6').split(',')) + + necengrp = self._configs['ecen']['NECENGRP'] + ngrps = necengrp if len(fhrs) > necengrp else len(fhrs) + + fhrs = [f'{int(fhr):03d}' for fhr in fhrs] + fhrs = np.array_split(fhrs, ngrps) + fhrs = [fhr.tolist() for fhr in fhrs] + + grp = ' '.join([f'{x:03d}' for x in range(0, ngrps)]) + dep = ' '.join([f[-1] for f in fhrs]) + lst = ' '.join(['_'.join(f) for f in fhrs]) + + else: + grp = '000' + dep = 'f006' + lst = 'f006' + + return grp, dep, lst + + eupd_cdump = 'gdas' if 'gdas' in self.app_config.eupd_cdumps else 'gfs' + + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}analcalc'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'task', 'name': f'{eupd_cdump}eupd'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + ecenenvars = self.envars.copy() + ecenenvar_dict = {'FHRGRP': '#grp#', + 'FHRLST': '#lst#'} + for key, value in ecenenvar_dict.items(): + ecenenvars.append(rocoto.create_envar(name=key, value=str(value))) + + varname1, varname2, varname3 = 'grp', 'dep', 'lst' + varval1, varval2, varval3 = _get_ecengroups() + vardict = {varname2: varval2, varname3: varval3} + + resources = self.get_resource('ecen') + task = create_wf_task('ecen', resources, cdump=self.cdump, envar=ecenenvars, dependency=dependencies, + metatask='ecmn', varname=varname1, varval=varval1, vardict=vardict) + return task + + def esfc(self): + + self._is_this_a_gdas_task(self.cdump, 'esfc') + + eupd_cdump = 'gdas' if 'gdas' in self.app_config.eupd_cdumps else 'gfs' + + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}analcalc'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'task', 'name': f'{eupd_cdump}eupd'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('esfc') + task = create_wf_task('esfc', resources, cdump='gdas', envar=self.envars, dependency=dependencies) + + return task + + def efcs(self): + + self._is_this_a_gdas_task(self.cdump, 'efcs') + + deps = [] + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}ecmn'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'task', 'name': f'{self.cdump}esfc'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + dep_dict = {'type': 'cycleexist', 'condition': 'not', 'offset': '-06:00:00'} + dependencies.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='or', dep=dependencies) + + efcsenvars = self.envars.copy() + efcsenvars.append(rocoto.create_envar(name='ENSGRP', value='#grp#')) + + groups = self._get_hybgroups(self._base['NMEM_ENKF'], self._configs['efcs']['NMEM_EFCSGRP']) + + resources = self.get_resource('efcs') + task = create_wf_task('efcs', resources, cdump=self.cdump, envar=efcsenvars, dependency=dependencies, + metatask='efmn', varname='grp', varval=groups) + + return task + + def echgres(self): + + self._is_this_a_gdas_task(self.cdump, 'echgres') + + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}fcst'} + deps.append(rocoto.add_dependency(dep_dict)) + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}efmn'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('echgres') + task = create_wf_task('echgres', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + + def epos(self): + + self._is_this_a_gdas_task(self.cdump, 'epos') + + def _get_eposgroups(epos): + fhmin = epos['FHMIN_ENKF'] + fhmax = epos['FHMAX_ENKF'] + fhout = epos['FHOUT_ENKF'] + fhrs = range(fhmin, fhmax + fhout, fhout) + + neposgrp = epos['NEPOSGRP'] + ngrps = neposgrp if len(fhrs) > neposgrp else len(fhrs) + + fhrs = [f'f{fhr:03d}' for fhr in fhrs] + fhrs = np.array_split(fhrs, ngrps) + fhrs = [f.tolist() for f in fhrs] + + grp = ' '.join([f'{x:03d}' for x in range(0, ngrps)]) + dep = ' '.join([f[-1] for f in fhrs]) + lst = ' '.join(['_'.join(f) for f in fhrs]) + + return grp, dep, lst + + deps = [] + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}efmn'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + eposenvars = self.envars.copy() + eposenvar_dict = {'FHRGRP': '#grp#', + 'FHRLST': '#lst#'} + for key, value in eposenvar_dict.items(): + eposenvars.append(rocoto.create_envar(name=key, value=str(value))) + + varname1, varname2, varname3 = 'grp', 'dep', 'lst' + varval1, varval2, varval3 = _get_eposgroups(self._configs['epos']) + vardict = {varname2: varval2, varname3: varval3} + + resources = self.get_resource('epos') + task = create_wf_task('epos', resources, cdump=self.cdump, envar=eposenvars, dependency=dependencies, + metatask='epmn', varname=varname1, varval=varval1, vardict=vardict) + + return task + + def earc(self): + + self._is_this_a_gdas_task(self.cdump, 'earc') + + deps = [] + dep_dict = {'type': 'metatask', 'name': f'{self.cdump}epmn'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep=deps) + + earcenvars = self.envars.copy() + earcenvars.append(rocoto.create_envar(name='ENSGRP', value='#grp#')) + + groups = self._get_hybgroups(self._base['NMEM_ENKF'], self._configs['earc']['NMEM_EARCGRP'], start_index=0) + + resources = self.get_resource('earc') + task = create_wf_task('earc', resources, cdump=self.cdump, envar=earcenvars, dependency=dependencies, + metatask='eamn', varname='grp', varval=groups) + + return task + + +def create_wf_task(task_name, resources, + cdump='gdas', cycledef=None, envar=None, dependency=None, + metatask=None, varname=None, varval=None, vardict=None, + final=False): + tasknamestr = f'{cdump}{task_name}' + metatask_dict = None + if metatask is not None: + tasknamestr = f'{tasknamestr}#{varname}#' + metatask_dict = {'metataskname': f'{cdump}{metatask}', + 'varname': f'{varname}', + 'varval': f'{varval}', + 'vardict': vardict} + + cycledefstr = cdump if cycledef is None else cycledef + + task_dict = {'taskname': f'{tasknamestr}', + 'cycledef': f'{cycledefstr}', + 'maxtries': '&MAXTRIES;', + 'command': f'&JOBS_DIR;/{task_name}.sh', + 'jobname': f'&PSLOT;_{tasknamestr}_@H', + 'resources': resources, + 'log': f'&ROTDIR;/logs/@Y@m@d@H/{tasknamestr}.log', + 'envars': envar, + 'dependency': dependency, + 'final': final} + + task = rocoto.create_task(task_dict) if metatask is None else rocoto.create_metatask(task_dict, metatask_dict) + + return ''.join(task) + + +def get_wf_tasks(app_config: AppConfig) -> List: + """ + Take application configuration to return a list of all tasks for that application + """ + + tasks = [] + # Loop over all keys of cycles (CDUMP) + for cdump, cdump_tasks in app_config.task_names.items(): + task_obj = Tasks(app_config, cdump) # create Task object based on cdump + for task_name in cdump_tasks: + tasks.append(task_obj.get_task(task_name)) + + return tasks diff --git a/ush/rocoto/workflow_utils.py b/ush/rocoto/workflow_utils.py deleted file mode 100755 index d981a17c257..00000000000 --- a/ush/rocoto/workflow_utils.py +++ /dev/null @@ -1,525 +0,0 @@ -#!/usr/bin/env python - -''' - Module containing functions all workflow setups require -''' -import random -import re -import os, sys, stat -import socket -import glob -import subprocess -import numpy as np -from distutils.spawn import find_executable -from datetime import datetime, timedelta -import rocoto - -DATE_ENV_VARS=['CDATE','SDATE','EDATE'] -SCHEDULER_MAP={'HERA':'slurm', - 'JET':'slurm', - 'ORION':'slurm', - 'WCOSS':'lsf', - 'WCOSS_DELL_P3':'lsf', - 'WCOSS_C':'lsfcray'} - -class UnknownMachineError(Exception): pass -class UnknownConfigError(Exception): pass -class ShellScriptException(Exception): - def __init__(self,scripts,errors): - self.scripts = scripts - self.errors = errors - super(ShellScriptException,self).__init__( - str(errors)+ - ': error processing'+ - (' '.join(scripts))) - -class HostInfo: - ''' - Gather Host specific information. - Someday the content will be pushed out of the code and into a yaml or something. - ''' - - def __init__(self, machine, **inputs): - - supported_hosts = { - 'WCOSS_C': self.wcoss_c, - 'WCOSS_DELL_P3': self.wcoss_dell_p3, - 'WCOSS_DELL_P3p5': self.wcoss_dell_p3p5, - 'HERA': self.hera, - 'ORION': self.orion} - - try: - self.info = supported_hosts[machine.upper()] - except KeyError: - raise NotImplementedError(f'{machine} is not a supported host.\n' + - 'Currently supported hosts are:\n' + - f'{" | ".join(supported_hosts.keys())}') - - self.machine = machine - - return - - @property - def wcoss_c(self): - - info = { - 'base_git': '/gpfs/hps3/emc/global/noscrub/emc.glopara/git', - 'base_svn': '/gpfs/hps3/emc/global/noscrub/emc.glopara/svn', - 'dmpdir': '/gpfs/dell3/emc/global/dump', - 'nwprod': '${NWROOT:-"/gpfs/hps/nco/ops/nwprod"}', - 'comroot': '${COMROOT:-"/gpfs/hps/nco/ops/com"}', - 'homedir': '/gpfs/hps3/emc/global/noscrub/$USER', - 'stmp': '/gpfs/hps2/stmp/$USER', - 'ptmp': '/gpfs/hps2/ptmp/$USER', - 'noscrub': '/gpfs/hps3/emc/global/noscrub/$USER', - 'account': 'GFS-DEV', - 'queue': 'dev', - 'queue_service': 'dev_transfer', - 'chgrp_rstprod': 'YES', - 'chgrp_cmd': 'chgrp rstprod', - 'hpssarch': 'YES', - 'localarch': 'NO', - 'atardir': '/NCEPDEV/$HPSS_PROJECT/1year/$USER/$machine/scratch/$PSLOT', - } - - return info - - - @property - def wcoss_dell_p3(self): - - info = { - 'base_git': '/gpfs/dell2/emc/modeling/noscrub/emc.glopara/git', - 'base_svn': '/gpfs/dell2/emc/modeling/noscrub/emc.glopara/git', - 'dmpdir': '/gpfs/dell3/emc/global/dump', - 'nwprod': '${NWROOT:-"/gpfs/dell1/nco/ops/nwprod"}', - 'comroot': '${COMROOT:-"/gpfs/dell1/nco/ops/com"}', - 'homedir': '/gpfs/dell2/emc/modeling/noscrub/$USER', - 'stmp': '/gpfs/dell3/stmp/$USER', - 'ptmp': '/gpfs/dell3/ptmp/$USER', - 'noscrub': '$HOMEDIR', - 'account': 'GFS-DEV', - 'queue': 'dev', - 'queue_service': 'dev_transfer', - 'partition_batch': None, - 'chgrp_rstprod': 'YES', - 'chgrp_cmd': 'chgrp rstprod', - 'hpssarch': 'YES', - 'localarch': 'NO', - 'atardir': '/NCEPDEV/$HPSS_PROJECT/1year/$USER/$machine/scratch/$PSLOT', - } - - return info - - @property - def wcoss_dell_p3p5(self): - - info = { - 'base_git': '/gpfs/dell2/emc/modeling/noscrub/emc.glopara/git', - 'base_svn': '/gpfs/dell2/emc/modeling/noscrub/emc.glopara/git', - 'dmpdir': '/gpfs/dell3/emc/global/dump', - 'nwprod': '${NWROOT:-"/gpfs/dell1/nco/ops/nwprod"}', - 'comroot': '${COMROOT:-"/gpfs/dell1/nco/ops/com"}', - 'homedir': '/gpfs/dell2/emc/modeling/noscrub/$USER', - 'stmp': '/gpfs/dell3/stmp/$USER', - 'ptmp': '/gpfs/dell3/ptmp/$USER', - 'noscrub': '$HOMEDIR', - 'account': 'GFS-DEV', - 'queue': 'dev2', - 'queue_service': 'dev2_transfer', - 'partition_batch': None, - 'chgrp_rstprod': 'YES', - 'chgrp_cmd': 'chgrp rstprod', - 'hpssarch': 'YES', - 'localarch': 'NO', - 'atardir': '/NCEPDEV/$HPSS_PROJECT/1year/$USER/$machine/scratch/$PSLOT', - } - - return info - - @property - def hera(self): - - info = { - 'base_git': '/scratch1/NCEPDEV/global/glopara/git', - 'base_svn': '/scratch1/NCEPDEV/global/glopara/svn', - 'dmpdir': '/scratch1/NCEPDEV/global/glopara/dump', - 'nwprod': '/scratch1/NCEPDEV/global/glopara/nwpara', - 'comroot': '/scratch1/NCEPDEV/global/glopara/com', - 'homedir': '/scratch1/NCEPDEV/global/$USER', - 'stmp': '/scratch1/NCEPDEV/stmp2/$USER', - 'ptmp': '/scratch1/NCEPDEV/stmp4/$USER', - 'noscrub': '$HOMEDIR', - 'account': 'fv3-cpu', - 'queue': 'batch', - 'queue_service': 'service', - 'partition_batch': 'hera', - 'chgrp_rstprod': 'YES', - 'chgrp_cmd': 'chgrp rstprod', - 'hpssarch': 'YES', - 'localarch': 'NO', - 'atardir': '/NCEPDEV/$HPSS_PROJECT/1year/$USER/$machine/scratch/$PSLOT', - } - - return info - - @property - def orion(self): - - info = { - 'base_git': '/work/noaa/global/glopara/git', - 'base_svn': '/work/noaa/global/glopara/svn', - 'dmpdir': '/work/noaa/rstprod/dump', - 'nwprod': '/work/noaa/global/glopara/nwpara', - 'comroot': '/work/noaa/global/glopara/com', - 'homedir': '/work/noaa/global/$USER', - 'stmp': '/work/noaa/stmp/$USER', - 'ptmp': '/work/noaa/stmp/$USER', - 'noscrub': '$HOMEDIR', - 'account': 'fv3-cpu', - 'queue': 'batch', - 'queue_service': 'service', - 'partition_batch': 'orion', - 'chgrp_rstprod': 'YES', - 'chgrp_cmd': 'chgrp rstprod', - 'hpssarch': 'NO', - 'localarch': 'NO', - 'atardir': '$NOSCRUB/archive_rotdir/$PSLOT', - } - - return info - -def get_shell_env(scripts): - vars=dict() - runme=''.join([ f'source {s} ; ' for s in scripts ]) - magic=f'--- ENVIRONMENT BEGIN {random.randint(0,64**5)} ---' - runme+=f'/bin/echo -n "{magic}" ; /usr/bin/env -0' - with open('/dev/null','w') as null: - env=subprocess.Popen(runme,shell=True,stdin=null.fileno(), - stdout=subprocess.PIPE) - (out,err)=env.communicate() - out = out.decode() - begin=out.find(magic) - if begin<0: - raise ShellScriptException(scripts,'Cannot find magic string; ' - 'at least one script failed: '+repr(out)) - for entry in out[begin+len(magic):].split('\x00'): - iequal=entry.find('=') - vars[entry[0:iequal]] = entry[iequal+1:] - return vars - -def get_script_env(scripts): - default_env=get_shell_env([]) - and_script_env=get_shell_env(scripts) - vars_just_in_script=set(and_script_env)-set(default_env) - union_env=dict(default_env) - union_env.update(and_script_env) - return dict([ (v,union_env[v]) for v in vars_just_in_script ]) - -def cast_or_not(type,value): - try: - return type(value) - except ValueError: - return value - -def get_configs(expdir): - """ - Given an experiment directory containing config files, - return a list of configs minus the ones ending with ".default" - """ - result=list() - for config in glob.glob(f'{expdir}/config.*'): - if not config.endswith('.default'): - result.append(config) - return result - -def find_config(config_name, configs): - - for config in configs: - if config_name == os.path.basename(config): - return config - - raise UnknownConfigError(f'{config_name} does not exist (known: {repr(config_name)}), ABORT!') - -def source_configs(configs, tasks): - ''' - Given list of config files, source them - and return a dictionary for each task - Every task depends on config.base - ''' - - dict_configs = {} - - # Return config.base as well - dict_configs['base'] = config_parser([find_config('config.base', configs)]) - - # Source the list of input tasks - for task in tasks: - - files = [] - - files.append(find_config('config.base', configs)) - - if task in ['eobs', 'eomg']: - files.append(find_config('config.anal', configs)) - files.append(find_config('config.eobs', configs)) - elif task in ['eupd']: - files.append(find_config('config.anal', configs)) - files.append(find_config('config.eupd', configs)) - elif task in ['efcs']: - files.append(find_config('config.fcst', configs)) - files.append(find_config('config.efcs', configs)) - elif 'wave' in task: - files.append(find_config(f'config.wave', configs)) - files.append(find_config(f'config.{task}', configs)) - else: - files.append(find_config(f'config.{task}', configs)) - - print(f'sourcing config.{task}') - dict_configs[task] = config_parser(files) - - return dict_configs - - -def config_parser(files): - """ - Given the name of config file, key-value pair of all variables in the config file is returned as a dictionary - :param files: config file or list of config files - :type files: list or str or unicode - :return: Key value pairs representing the environment variables defined - in the script. - :rtype: dict - """ - if isinstance(files,(str, bytes)): - files=[files] - varbles=dict() - for key,value in get_script_env(files).items(): - if key in DATE_ENV_VARS: # likely a date, convert to datetime - varbles[key] = datetime.strptime(value,'%Y%m%d%H') - elif '.' in value: # Likely a number and that too a float - varbles[key] = cast_or_not(float,value) - else: # Still could be a number, may be an integer - varbles[key] = cast_or_not(int,value) - - return varbles - -def detectMachine(): - - machines = ['HERA', 'ORION', 'WCOSS_C', 'WCOSS_DELL_P3', 'JET'] - - if os.path.exists('/scratch1/NCEPDEV'): - return 'HERA' - elif os.path.exists('/work/noaa'): - return 'ORION' - elif os.path.exists('/gpfs') and os.path.exists('/etc/SuSE-release'): - return 'WCOSS_C' - elif os.path.exists('/gpfs/dell2'): - return 'WCOSS_DELL_P3' - elif os.path.exists('/lfs4/HFIP'): - return 'JET' - else: - print(f'workflow is currently only supported on: {machines}') - raise NotImplementedError('Cannot auto-detect platform, ABORT!') - -def get_scheduler(machine): - try: - return SCHEDULER_MAP[machine] - except KeyError: - raise UnknownMachineError(f'Unknown machine: {machine}, ABORT!') - -def create_wf_task(task, cdump='gdas', cycledef=None, envar=None, dependency=None, \ - metatask=None, varname=None, varval=None, vardict=None, \ - final=False): - - if metatask is None: - taskstr = f'{task}' - else: - taskstr = f'{task}#{varname}#' - metataskstr = f'{cdump}{metatask}' - metatask_dict = {'metataskname': metataskstr, \ - 'varname': f'{varname}', \ - 'varval': f'{varval}', \ - 'vardict': vardict} - - taskstr = f'{cdump}{taskstr}' - cycledefstr = cdump if cycledef is None else cycledef - - task_dict = {'taskname': f'{taskstr}', \ - 'cycledef': f'{cycledefstr}', \ - 'maxtries': '&MAXTRIES;', \ - 'command': f'&JOBS_DIR;/{task}.sh', \ - 'jobname': f'&PSLOT;_{taskstr}_@H', \ - 'account': '&ACCOUNT;', \ - 'queue': f'&QUEUE_{task.upper()}_{cdump.upper()};', \ - 'walltime': f'&WALLTIME_{task.upper()}_{cdump.upper()};', \ - 'native': f'&NATIVE_{task.upper()}_{cdump.upper()};', \ - 'memory': f'&MEMORY_{task.upper()}_{cdump.upper()};', \ - 'resources': f'&RESOURCES_{task.upper()}_{cdump.upper()};', \ - 'log': f'&ROTDIR;/logs/@Y@m@d@H/{taskstr}.log', \ - 'envar': envar, \ - 'dependency': dependency, \ - 'final': final} - - # Add partition for machines using slurm - if get_scheduler(detectMachine()) in ['slurm']: - task_dict['partition'] = f'&PARTITION_{task.upper()}_{cdump.upper()};' - - if metatask is None: - task = rocoto.create_task(task_dict) - else: - task = rocoto.create_metatask(task_dict, metatask_dict) - task = ''.join(task) - - return task - - -def get_gfs_interval(gfs_cyc): - ''' - return interval in hours based on gfs_cyc - ''' - - # Get interval from cyc_input - if gfs_cyc == 0: - interval = None - if gfs_cyc == 1: - interval = '24:00:00' - elif gfs_cyc == 2: - interval = '12:00:00' - elif gfs_cyc == 4: - interval = '06:00:00' - - return interval - - -def get_resources(machine, cfg, task, reservation, cdump='gdas'): - - scheduler = get_scheduler(machine) - - if cdump in ['gfs'] and f'wtime_{task}_gfs' in cfg.keys(): - wtimestr = cfg[f'wtime_{task}_gfs'] - else: - wtimestr = cfg[f'wtime_{task}'] - - ltask = 'eobs' if task in ['eomg'] else task - - memory = cfg.get(f'memory_{ltask}', None) - - if cdump in ['gfs'] and f'npe_{task}_gfs' in cfg.keys(): - tasks = cfg[f'npe_{ltask}_gfs'] - else: - tasks = cfg[f'npe_{ltask}'] - - if cdump in ['gfs'] and f'npe_node_{task}_gfs' in cfg.keys(): - ppn = cfg[f'npe_node_{ltask}_gfs'] - else: - ppn = cfg[f'npe_node_{ltask}'] - - if machine in [ 'WCOSS_DELL_P3', 'HERA', 'ORION', 'JET' ]: - if cdump in ['gfs'] and f'nth_{task}_gfs' in cfg.keys(): - threads = cfg[f'nth_{ltask}_gfs'] - else: - threads = cfg[f'nth_{ltask}'] - - nodes = np.int(np.ceil(np.float(tasks) / np.float(ppn))) - - memstr = '' if memory is None else str(memory) - natstr = '' - - if scheduler in ['slurm']: - natstr = '--export=NONE' - - if machine in ['HERA', 'JET', 'ORION', 'WCOSS_C', 'WCOSS_DELL_P3']: - - if machine in ['HERA', 'JET', 'ORION']: - resstr = f'{nodes}:ppn={ppn}:tpp={threads}' - else: - resstr = f'{nodes}:ppn={ppn}' - - if machine in ['WCOSS_C'] and task in ['arch', 'earc', 'getic']: - resstr += '' - - if machine in ['WCOSS_DELL_P3']: - if not reservation in ['NONE']: - natstr = f"-U {reservation} -R 'affinity[core({threads})]'" - else: - natstr = f"-R 'affinity[core({threads})]'" - - if task in ['arch', 'earc', 'getic']: - natstr = "-R 'affinity[core(1)]'" - - - elif machine in ['WCOSS']: - resstr = f'{tasks}' - - if task in ['arch', 'earc', 'getic']: - queuestr = '&QUEUE;' if scheduler in ['slurm'] else '&QUEUE_SERVICE;' - else: - queuestr = '&QUEUE;' - - return wtimestr, resstr, queuestr, memstr, natstr - - -def create_crontab(base, cronint=5): - ''' - Create crontab to execute rocotorun every cronint (5) minutes - ''' - - # No point creating a crontab if rocotorun is not available. - rocotoruncmd = find_executable('rocotorun') - if rocotoruncmd is None: - print('Failed to find rocotorun, crontab will not be created') - return - -# Leaving the code for a wrapper around crontab file if needed again later -# if check_slurm(): -# -# cronintstr = '*/%d * * * *' % cronint -# rocotorunstr = '%s -d %s/%s.db -w %s/%s.xml' % (rocotoruncmd, base['EXPDIR'], base['PSLOT'], base['EXPDIR'], base['PSLOT']) -# -# wrapper_strings = [] -# wrapper_strings.append('#!/bin/env tcsh\n') -# wrapper_strings.append('\n') -# wrapper_strings.append('module load slurm\n') -# wrapper_strings.append('module load rocoto/1.3.0-RC4\n') -# wrapper_strings.append('\n') -# wrapper_strings.append(rocotorunstr) -# -# hostname = 'tfe02' -# script_file = os.path.join(base['EXPDIR'], '%s.sh' % base['PSLOT']) -# -# fh = open(script_file, 'w') -# fh.write(''.join(wrapper_strings)) -# os.chmod(script_file,stat.S_IRWXU|stat.S_IRWXG|stat.S_IRWXO) -# fh.close() -# -# rocotorunstr = 'ssh %s %s/%s.sh' % (socket.gethostname(), base['EXPDIR'], base['PSLOT']) -# -# else: - - rocotorunstr = f'''{rocotoruncmd} -d {base['EXPDIR']}/{base['PSLOT']}.db -w {base['EXPDIR']}/{base['PSLOT']}.xml''' - cronintstr = f'*/{cronint} * * * *' - - # On WCOSS, rocoto module needs to be loaded everytime cron runs - if base['machine'] in ['WCOSS']: - rocotoloadstr = '. /usrx/local/Modules/default/init/sh; module use -a /usrx/local/emc_rocoto/modulefiles; module load rocoto/1.3.0rc2)' - rocotorunstr = f'({rocotoloadstr} {rocotorunstr})' - - try: - REPLYTO = os.environ['REPLYTO'] - except: - REPLYTO = '' - - strings = [] - - strings.append('\n') - strings.append(f'''#################### {base['PSLOT']} ####################\n''') - strings.append(f'MAILTO="{REPLYTO}"\n') - strings.append(f'{cronintstr} {rocotorunstr}\n') - strings.append('#################################################################\n') - strings.append('\n') - - fh = open(os.path.join(base['EXPDIR'], f'''{base['PSLOT']}.crontab'''), 'w') - fh.write(''.join(strings)) - fh.close() - - return diff --git a/ush/rocoto/workflow_xml.py b/ush/rocoto/workflow_xml.py new file mode 100644 index 00000000000..eb92b7bde3c --- /dev/null +++ b/ush/rocoto/workflow_xml.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 + +import os +from distutils.spawn import find_executable +from datetime import datetime +from collections import OrderedDict +import rocoto +from applications import AppConfig +from workflow_tasks import get_wf_tasks + + +class RocotoXML: + + def __init__(self, app_config: AppConfig) -> None: + + self._app_config = app_config + + self._base = self._app_config.configs['base'] + + self.preamble = self._get_preamble() + self.definitions = self._get_definitions() + self.header = self._get_workflow_header() + self.cycledefs = self._get_cycledefs() + task_list = get_wf_tasks(app_config) + self.tasks = '\n'.join(task_list) + self.footer = self._get_workflow_footer() + + self.xml = self._assemble_xml() + + @staticmethod + def _get_preamble(): + """ + Generate preamble for XML + """ + + strings = ['', + '', + ''] + + return '\n'.join(strings) + + def _get_definitions(self) -> str: + """ + Create entities related to the experiment + """ + + entity = OrderedDict() + + entity['PSLOT'] = self._base['PSLOT'] + + if self._app_config.mode in ['forecast-only']: + entity['ICSDIR'] = self._base['ICSDIR'] + + entity['ROTDIR'] = self._base['ROTDIR'] + entity['JOBS_DIR'] = self._base['BASE_JOB'] + + entity['MAXTRIES'] = self._base.get('ROCOTO_MAXTRIES', 2) + + # Put them all in an XML key-value syntax + strings = [] + for key, value in entity.items(): + strings.append('\t' + rocoto.create_entity(key, value)) + + return '\n'.join(strings) + + def _get_workflow_header(self): + """ + Create the workflow header block + """ + + scheduler = self._app_config.scheduler + cyclethrottle = self._base.get('ROCOTO_CYCLETHROTTLE', 3) + taskthrottle = self._base.get('ROCOTO_TASKTHROTTLE', 25) + verbosity = self._base.get('ROCOTO_VERBOSITY', 10) + + expdir = self._base['EXPDIR'] + + strings = ['', + ']>', + '', + f'', + '', + f'\t{expdir}/logs/@Y@m@d@H.log', + '', + '\t', + ''] + + return '\n'.join(strings) + + def _get_cycledefs(self): + + cycledef_map = {'cycled': self._get_cycledefs_cycled, + 'forecast-only': self._get_cycledefs_forecast_only} + + try: + cycledefs = cycledef_map[self._app_config.mode]() + except KeyError: + raise KeyError(f'{self._app_config.mode} is not a valid application mode.\n' + + 'Valid application modes are:\n' + + f'{", ".join(cycledef_map.keys())}') + + return cycledefs + + def _get_cycledefs_cycled(self): + sdate = self._base['SDATE'].strftime('%Y%m%d%H%M') + edate = self._base['EDATE'].strftime('%Y%m%d%H%M') + interval = self._base.get('INTERVAL', '06:00:00') + strings = [f'\t{sdate} {edate} {interval}'] + + if self._app_config.gfs_cyc != 0: + sdate_gfs = self._base['SDATE_GFS'].strftime('%Y%m%d%H%M') + edate_gfs = self._base['EDATE_GFS'].strftime('%Y%m%d%H%M') + interval_gfs = self._base['INTERVAL_GFS'] + strings.append(f'\t{sdate_gfs} {edate_gfs} {interval_gfs}') + strings.append('') + strings.append('') + + return '\n'.join(strings) + + def _get_cycledefs_forecast_only(self): + sdate = self._base['SDATE'].strftime('%Y%m%d%H%M') + edate = self._base['EDATE'].strftime('%Y%m%d%H%M') + interval = self._base.get('INTERVAL_GFS', '24:00:00') + cdump = self._base['CDUMP'] + strings = f'\t{sdate} {edate} {interval}\n\n' + + return strings + + @staticmethod + def _get_workflow_footer(): + """ + Generate workflow footer + """ + + return '\n\n' + + def _assemble_xml(self) -> str: + + strings = [self.preamble, + self.definitions, + self.header, + self.cycledefs, + self.tasks, + self.footer] + + return ''.join(strings) + + def write(self, xml_file: str = None, crontab_file: str = None): + self._write_xml(xml_file = xml_file) + self._write_crontab(crontab_file = crontab_file) + + def _write_xml(self, xml_file: str = None) -> None: + + expdir = self._base['EXPDIR'] + pslot = self._base['PSLOT'] + + if xml_file is None: + xml_file = f"{expdir}/{pslot}.xml" + + with open(xml_file, 'w') as fh: + fh.write(self.xml) + + def _write_crontab(self, crontab_file: str = None, cronint: int = 5) -> None: + """ + Create crontab to execute rocotorun every cronint (5) minutes + """ + + # No point creating a crontab if rocotorun is not available. + rocotoruncmd = find_executable('rocotorun') + if rocotoruncmd is None: + print('Failed to find rocotorun, crontab will not be created') + return + + expdir = self._base['EXPDIR'] + pslot = self._base['PSLOT'] + + rocotorunstr = f'{rocotoruncmd} -d {expdir}/{pslot}.db -w {expdir}/{pslot}.xml' + cronintstr = f'*/{cronint} * * * *' + + try: + replyto = os.environ['REPLYTO'] + except KeyError: + replyto = '' + + strings = ['', + f'#################### {pslot} ####################', + f'MAILTO="{replyto}"', + f'{cronintstr} {rocotorunstr}', + '#################################################################', + ''] + + if crontab_file is None: + crontab_file = f"{expdir}/{pslot}.crontab" + + with open(crontab_file, 'w') as fh: + fh.write('\n'.join(strings)) + + return