diff --git a/parm/data_locations.yml b/parm/data_locations.yml index d20ca819aa..82b247d11a 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -271,3 +271,120 @@ NAM: fcst: - nam.t{hh}z.awphys{fcst_hr:02d}.tm00.grib2 +########################## +########################## +### Observation Data ### +########################## +########################## + +GFS_obs: + hpss: + protocol: htar + archive_format: zip + archive_path: + - /BMC/fdr/Permanent/{yyyy}/{mm}/{dd}/data/grids/gfs/prepbufr + archive_file_names: + prepbufr: + obs: + - "{yyyymmdd}0000.zip" + tcvitals: + obs: + - "{yyyymmdd}0000.zip" + file_names: + prepbufr: + obs: + - "{yy}{jjj}{hh}00.gfs.t{hh}z.prepbufr.nr" + tcvitals: + obs: + - "{yy}{jjj}{hh}00.gfs.t{hh}z.syndata.tcvitals.tm00" + +RAP_obs: + hpss: + protocol: htar + archive_format: zip + archive_path: + - /BMC/fdr/Permanent/{yyyy}/{mm}/{dd}/data/grids/rap/obs + archive_internal_dir: + - ./ + archive_file_names: + - "{yyyymmddhh}00.zip" + file_names: + obs: + - "{yyyymmddhh}.rap.t{hh}z.prepbufr.tm00" + - "{yyyymmddhh}.rap.t{hh}z.1bamua.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.1bhrs4.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.1bmhs.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.amsr2.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.ascatt.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.ascatw.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.atms.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.atmsdb.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.crisf4.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.crsfdb.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.esamua.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.esatms.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.eshrs3.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.esiasi.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.esmhs.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.gpsipw.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.gpsipw.tm00.bufr_d.nr" + - "{yyyymmddhh}.rap.t{hh}z.gsrasr.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.gsrcsr.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.iasidb.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.lghtng.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.lghtng.tm00.bufr_d.nr" + - "{yyyymmddhh}.rap.t{hh}z.lgycld.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.mtiasi.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.nexrad.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.rassda.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.satwnd.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.sevasr.tm00.bufr_d" + - "{yyyymmddhh}.rap.t{hh}z.ssmisu.tm00.bufr_d" + aws: + protocol: download + url: https://noaa-rap-pds.s3.amazonaws.com/rap.{yyyymmdd} + file_names: + obs: + - rap.t{hh}z.prepbufr.tm00.nr + - rap.t{hh}z.1bamua.tm00.bufr_d + - rap.t{hh}z.1bhrs4.tm00.bufr_d + - rap.t{hh}z.1bmhs.tm00.bufr_d + - rap.t{hh}z.amsr2.tm00.bufr_d + - rap.t{hh}z.ascatt.tm00.bufr_d + - rap.t{hh}z.ascatw.tm00.bufr_d + - rap.t{hh}z.atms.tm00.bufr_d + - rap.t{hh}z.atmsdb.tm00.bufr_d + - rap.t{hh}z.crisf4.tm00.bufr_d + - rap.t{hh}z.crsfdb.tm00.bufr_d + - rap.t{hh}z.esamua.tm00.bufr_d + - rap.t{hh}z.esatms.tm00.bufr_d + - rap.t{hh}z.eshrs3.tm00.bufr_d + - rap.t{hh}z.esiasi.tm00.bufr_d + - rap.t{hh}z.esmhs.tm00.bufr_d + - rap.t{hh}z.gpsipw.tm00.bufr_d + - rap.t{hh}z.gpsipw.tm00.bufr_d.nr + - rap.t{hh}z.gsrasr.tm00.bufr_d + - rap.t{hh}z.gsrcsr.tm00.bufr_d + - rap.t{hh}z.iasidb.tm00.bufr_d + - rap.t{hh}z.lghtng.tm00.bufr_d + - rap.t{hh}z.lghtng.tm00.bufr_d.nr + - rap.t{hh}z.lgycld.tm00.bufr_d + - rap.t{hh}z.mtiasi.tm00.bufr_d + - rap.t{hh}z.nexrad.tm00.bufr_d + - rap.t{hh}z.rassda.tm00.bufr_d + - rap.t{hh}z.satwnd.tm00.bufr_d + - rap.t{hh}z.sevasr.tm00.bufr_d + - rap.t{hh}z.ssmisu.tm00.bufr_d + +########################### +########################### +####### Fix Files ######### +########################### +########################### + +GSI-FIX: + remote: + protocol: download + url: https://epic-sandbox-srw.s3.amazonaws.com + file_names: + - gsi-fix.22.07.27.tar.gz diff --git a/scripts/exregional_get_extrn_mdl_files.sh b/scripts/exregional_get_extrn_mdl_files.sh index 97ac031f2d..eeb68c452f 100755 --- a/scripts/exregional_get_extrn_mdl_files.sh +++ b/scripts/exregional_get_extrn_mdl_files.sh @@ -57,9 +57,9 @@ or lateral boundary conditions for the FV3. set -x if [ "${ICS_OR_LBCS}" = "ICS" ]; then if [ ${TIME_OFFSET_HRS} -eq 0 ] ; then - anl_or_fcst="anl" + file_set="anl" else - anl_or_fcst="fcst" + file_set="fcst" fi fcst_hrs=${TIME_OFFSET_HRS} file_names=${EXTRN_MDL_FILES_ICS[@]} @@ -69,7 +69,7 @@ if [ "${ICS_OR_LBCS}" = "ICS" ]; then input_file_path=${EXTRN_MDL_SOURCE_BASEDIR_ICS:-$EXTRN_MDL_SYSBASEDIR_ICS} elif [ "${ICS_OR_LBCS}" = "LBCS" ]; then - anl_or_fcst="fcst" + file_set="fcst" first_time=$((TIME_OFFSET_HRS + LBC_SPEC_INTVL_HRS)) last_time=$((TIME_OFFSET_HRS + FCST_LEN_HRS)) fcst_hrs="${first_time} ${last_time} ${LBC_SPEC_INTVL_HRS}" @@ -151,7 +151,7 @@ fi cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ - --anl_or_fcst ${anl_or_fcst} \ + --file_set ${file_set} \ --config ${PARMdir}/data_locations.yml \ --cycle_date ${EXTRN_MDL_CDATE} \ --data_stores ${data_stores} \ diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index 3a7769a557..1038c3bd9d 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -32,3 +32,6 @@ platform: FIXsfc: /scratch2/BMC/det/UFS_SRW_App/develop/fix/fix_sfc_climo FIXshp: /scratch2/BMC/det/UFS_SRW_App/develop/NaturalEarth EXTRN_MDL_DATA_STORES: hpss aws nomads +data: + obs: + RAP_obs: /scratch2/BMC/public/data/grids/rap/obs diff --git a/ush/machine/jet.yaml b/ush/machine/jet.yaml index 22f945335d..ae6f8a8b6e 100644 --- a/ush/machine/jet.yaml +++ b/ush/machine/jet.yaml @@ -40,3 +40,8 @@ data: netcdf: /public/data/grids/gfs/anl/netcdf RAP: /public/data/grids/rap/full/wrfprs/grib2 HRRR: /public/data/grids/hrrr/conus/wrfprs/grib2 + obs: + RAP_obs: /public/data/grids/rap/obs + GFS_obs: + prepbufr: /public/data/grids/gfs/prepbufr + tcvitals: /public/data/grids/gfs/bufr diff --git a/ush/machine/noaacloud.yaml b/ush/machine/noaacloud.yaml index c1e96b362b..d0af150935 100644 --- a/ush/machine/noaacloud.yaml +++ b/ush/machine/noaacloud.yaml @@ -24,6 +24,7 @@ platform: FIXorg: /contrib/EPIC/UFS_SRW_App/develop/fix/fix_orog FIXsfc: /contrib/EPIC/UFS_SRW_App/develop/fix/fix_sfc_climo FIXshp: /contrib/EPIC/UFS_SRW_App/develop/NaturalEarth + FIXgsi: /contrib/EPIC/UFS_SRW_App/develop/fix/fix_gsi EXTRN_MDL_DATA_STORES: aws nomads data: ics_lbcs: diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index e6ad879fbc..bcf187ea1c 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -32,6 +32,7 @@ import shutil import subprocess import sys +import glob from textwrap import dedent import time from copy import deepcopy @@ -81,7 +82,7 @@ def copy_file(source, destination, copy_cmd): """ if not os.path.exists(source): - logging.info(f"File does not exist on disk \n {source}") + logging.info(f"File does not exist on disk \n {source} \n try using: --input_file_path ") return False # Using subprocess here because system copy is much faster than @@ -224,7 +225,7 @@ def fill_template(template_str, cycle_date, templates_only=False, **kwargs): if templates_only: return f'{",".join((format_values.keys()))}' return template_str.format(**format_values) - + def create_target_path(target_path): @@ -306,7 +307,7 @@ def get_file_templates(cla, known_data_info, data_store, use_cla_tmpl=False): if isinstance(file_templates, dict): if cla.file_type is not None: file_templates = file_templates[cla.file_type] - file_templates = file_templates[cla.anl_or_fcst] + file_templates = file_templates[cla.file_set] if not file_templates: msg = "No file naming convention found. They must be provided \ either on the command line or on in a config file." @@ -478,7 +479,7 @@ def hpss_requested_files(cla, file_names, store_specs, members=-1, ens_group=-1) archive_file_names = archive_file_names[cla.file_type] if isinstance(archive_file_names, dict): - archive_file_names = archive_file_names[cla.anl_or_fcst] + archive_file_names = archive_file_names[cla.file_set] unavailable = {} existing_archives = {} @@ -505,7 +506,7 @@ def hpss_requested_files(cla, file_names, store_specs, members=-1, ens_group=-1) archive_internal_dirs = store_specs.get("archive_internal_dir", [""]) if isinstance(archive_internal_dirs, dict): - archive_internal_dirs = archive_internal_dirs.get(cla.anl_or_fcst, [""]) + archive_internal_dirs = archive_internal_dirs.get(cla.file_set, [""]) # which_archive matters for choosing the correct file names within, # but we can safely just try all options for the @@ -683,6 +684,7 @@ def setup_logging(debug=False): user-defined level for logging in the script.""" level = logging.WARNING + level = logging.INFO if debug: level = logging.DEBUG @@ -743,7 +745,7 @@ def main(argv): cla.members = arg_list_to_range(cla.members) setup_logging(cla.debug) - print("Running script retrieve_data.py with args:\n", f"{('-' * 80)}\n{('-' * 80)}") + print("Running script retrieve_data.py with args:", f"\n{('-' * 80)}\n{('-' * 80)}") for name, val in cla.__dict__.items(): if name not in ["config"]: print(f"{name:>15s}: {val}") @@ -896,10 +898,10 @@ def parse_args(argv): # Required parser.add_argument( - "--anl_or_fcst", - choices=("anl", "fcst"), - help="Flag for whether analysis or forecast \ - files should be gathered", + "--file_set", + choices=("anl", "fcst", "obs", "fix"), + help="Flag for whether analysis, forecast, \ + fix, or observation files should be gathered", required=True, ) parser.add_argument( @@ -907,19 +909,22 @@ def parse_args(argv): help="Full path to a configuration file containing paths and \ naming conventions for known data streams. The default included \ in this repository is in parm/data_locations.yml", + required=True, type=config_exists, + ) parser.add_argument( "--cycle_date", help="Cycle date of the data to be retrieved in YYYYMMDDHH \ format.", - required=True, + required=False, # relaxed this arg option, and set a benign value when not used + default="1999123100", type=to_datetime, ) parser.add_argument( "--data_stores", help="List of priority data_stores. Tries first list item \ - first. Choices: hpss, nomads, aws, disk", + first. Choices: hpss, nomads, aws, disk, remote.", nargs="*", required=True, type=to_lower, @@ -928,6 +933,7 @@ def parse_args(argv): "--external_model", choices=( "FV3GFS", + "GFS_obs", "GDAS", "GEFS", "GSMGFS", @@ -935,7 +941,9 @@ def parse_args(argv): "NAM", "RAP", "RAPx", + "RAP_obs", "HRRRx", + "GSI-FIX", ), help="External model label. This input is case-sensitive", required=True, @@ -946,25 +954,30 @@ def parse_args(argv): one fhr will be processed. If 2 or 3 arguments, a sequence \ of forecast hours [start, stop, [increment]] will be \ processed. If more than 3 arguments, the list is processed \ - as-is.", + as-is. default=[0]", nargs="+", - required=True, + required=False, # relaxed this arg option, and set a default value when not used + default=[0], type=int, ) parser.add_argument( "--output_path", help="Path to a location on disk. Path is expected to exist.", - required=True, + required=True, type=os.path.abspath, ) parser.add_argument( "--ics_or_lbcs", choices=("ICS", "LBCS"), help="Flag for whether ICS or LBCS.", - required=True, + required=True ) # Optional + parser.add_argument( + "--version", # for file patterns that dont conform to cycle_date [TBD] + help="Version number of package to download, e.g. x.yy.zz", + ) parser.add_argument( "--symlink", action="store_true", @@ -984,7 +997,7 @@ def parse_args(argv): ) parser.add_argument( "--file_type", - choices=("grib2", "nemsio", "netcdf"), + choices=("grib2", "nemsio", "netcdf", "prepbufr", "tcvitals"), help="External model file format", ) parser.add_argument( diff --git a/ush/test_retrieve_data.py b/ush/test_retrieve_data.py index e35eac2dad..439b9b7fb2 100644 --- a/ush/test_retrieve_data.py +++ b/ush/test_retrieve_data.py @@ -41,7 +41,7 @@ def test_fv3gfs_grib2_lbcs_from_hpss(self): # fmt: off args = [ - '--anl_or_fcst', 'fcst', + '--file_set', 'fcst', '--config', self.config, '--cycle_date', '2022062512', '--data_stores', 'hpss', @@ -74,7 +74,7 @@ def test_fv3gfs_netcdf_lbcs_from_hpss(self): # fmt: off args = [ - '--anl_or_fcst', 'fcst', + '--file_set', 'fcst', '--config', self.config, '--cycle_date', '2022060112', '--data_stores', 'hpss', @@ -107,7 +107,7 @@ def test_gdas_ics_from_aws(self): # fmt: off args = [ - '--anl_or_fcst', 'anl', + '--file_set', 'anl', '--config', self.config, '--cycle_date', '2022052512', '--data_stores', 'aws', @@ -143,7 +143,7 @@ def test_gefs_grib2_ics_from_aws(self): # fmt: off args = [ - '--anl_or_fcst', 'anl', + '--file_set', 'anl', '--config', self.config, '--cycle_date', '2022052512', '--data_stores', 'aws', @@ -177,7 +177,7 @@ def test_hrrr_ics_from_hpss(self): # fmt: off args = [ - '--anl_or_fcst', 'anl', + '--file_set', 'anl', '--config', self.config, '--cycle_date', '2022062512', '--data_stores', 'hpss', @@ -207,7 +207,7 @@ def test_hrrr_lbcs_from_hpss(self): # fmt: off args = [ - '--anl_or_fcst', 'fcst', + '--file_set', 'fcst', '--config', self.config, '--cycle_date', '2022062512', '--data_stores', 'hpss', @@ -236,7 +236,7 @@ def test_hrrr_ics_from_aws(self): # fmt: off args = [ - '--anl_or_fcst', 'anl', + '--file_set', 'anl', '--config', self.config, '--cycle_date', '2022062512', '--data_stores', 'aws', @@ -262,10 +262,10 @@ def test_hrrr_lbcs_from_aws(self): with tempfile.TemporaryDirectory(dir=".") as tmp_dir: os.chdir(tmp_dir) - + # fmt: off args = [ - '--anl_or_fcst', 'fcst', + '--file_set', 'fcst', '--config', self.config, '--cycle_date', '2022062512', '--data_stores', 'aws', @@ -295,7 +295,7 @@ def test_rap_ics_from_aws(self): # fmt: off args = [ - '--anl_or_fcst', 'anl', + '--file_set', 'anl', '--config', self.config, '--cycle_date', '2022062509', '--data_stores', 'aws', @@ -325,7 +325,7 @@ def test_rap_lbcs_from_aws(self): # fmt: off args = [ - '--anl_or_fcst', 'fcst', + '--file_set', 'fcst', '--config', self.config, '--cycle_date', '2022062509', '--data_stores', 'aws',