Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
f071994
change noaacloud NodeName
weihuang-jedi Feb 25, 2025
11accd8
Merge branch 'develop' of github.com:NOAA-EPIC/global-workflow-cloud …
weihuang-jedi Feb 26, 2025
a4771df
add download fix subset data, also serve as a test PR to trigger CI t…
weihuang-jedi Feb 26, 2025
4f1e679
fix pynorm error
weihuang-jedi Feb 26, 2025
2241227
fix pynorm error 2
weihuang-jedi Feb 26, 2025
9679617
fix pynorm error3
weihuang-jedi Feb 26, 2025
fe88751
fix pynorm error 4
weihuang-jedi Feb 26, 2025
0284fb7
fix pynorm error 5
weihuang-jedi Feb 26, 2025
27ca2f1
fix pynorm error 6
weihuang-jedi Feb 26, 2025
f39bfd9
fix pynorm error 7
weihuang-jedi Feb 27, 2025
8f7fc23
fix pynorm error 8
weihuang-jedi Feb 27, 2025
c248e12
fix pynorm error 9
weihuang-jedi Feb 27, 2025
85120f3
fix pynorm error 10
weihuang-jedi Feb 27, 2025
2a7f1c9
fix a syntax error
weihuang-jedi Feb 27, 2025
9f8ee2d
use just /lutre/jenkins for CI testing work directory
weihuang-jedi Feb 27, 2025
fbeb220
Merge branch 'develop' of github.com:NOAA-EPIC/global-workflow-cloud …
weihuang-jedi Feb 28, 2025
df4179c
using argparse and logging instead of getopt and print
weihuang-jedi Mar 3, 2025
6a168ae
using argparse and logging instead of getopt and print
weihuang-jedi Mar 3, 2025
a23662f
Updating with proper nomenclature.
kbooker79 Mar 3, 2025
3c223cf
Merge pull request #5 from NOAA-EPIC/download-subset-fix-data
kbooker79 Mar 3, 2025
efdfb4a
Updating for CI.
kbooker79 Mar 3, 2025
fce1564
Updating.
kbooker79 Mar 3, 2025
3c9e171
Updating.
kbooker79 Mar 3, 2025
1a88632
Fixing syntax error.
kbooker79 Mar 3, 2025
6b60421
consist with Terry's code
weihuang-jedi Mar 3, 2025
35f35f2
add a ls command to make sure code in cloned
weihuang-jedi Mar 3, 2025
9e53aa4
try clone the code directly
weihuang-jedi Mar 3, 2025
98c50d9
use https to clone
weihuang-jedi Mar 3, 2025
b4c507f
Merge branch 'NOAA-EMC:develop' into develop
weihuang-jedi Mar 4, 2025
3089f00
add more debug ls
weihuang-jedi Mar 4, 2025
9c0dee6
adding comments
weihuang-jedi Mar 4, 2025
7179ed7
fix pynorm error
weihuang-jedi Mar 4, 2025
3846838
add more comments
weihuang-jedi Mar 4, 2025
ebb7035
add more comments, fixed a quota issue
weihuang-jedi Mar 4, 2025
93f313a
sync
weihuang-jedi Mar 4, 2025
57b2a51
fix a typo
weihuang-jedi Mar 4, 2025
8fefe2a
remove 2 comments
weihuang-jedi Mar 4, 2025
d4050e3
remove conflict
weihuang-jedi Mar 4, 2025
8f6db95
reset HOMEgfs
weihuang-jedi Mar 4, 2025
1f05a51
compile for gfs only for now
weihuang-jedi Mar 4, 2025
4640b6a
comment gh pr eidt for now
weihuang-jedi Mar 5, 2025
c906547
skip CI on AWS
weihuang-jedi Mar 5, 2025
0152b22
Merge remote-tracking branch 'origin/develop' into download-subset-fi…
weihuang-jedi Mar 5, 2025
eac272e
trying to fix runtime bug
weihuang-jedi Mar 5, 2025
35a6cd0
still trying to figure out HOEgfs issue
weihuang-jedi Mar 5, 2025
b55f4a1
add compile gefs
weihuang-jedi Mar 5, 2025
e1724cb
add compile gefs
weihuang-jedi Mar 5, 2025
4264c15
add compile gefs
weihuang-jedi Mar 5, 2025
cbdd79a
add main function
weihuang-jedi Mar 10, 2025
20c1211
switch back to 'checkout scm instead of git clone'
weihuang-jedi Mar 11, 2025
f8a6a6d
using checkout scm
weihuang-jedi Mar 12, 2025
c952a73
using checkout scm
weihuang-jedi Mar 12, 2025
64b1b0c
Updating to use SCM.
kbooker79 Mar 12, 2025
12b09df
remove memory from resource if on AWS
weihuang-jedi Mar 13, 2025
98570b7
remove memory requirement for AWS
weihuang-jedi Mar 13, 2025
2039132
Merge branch 'NOAA-EMC:develop' into develop
weihuang-jedi Mar 16, 2025
9e60539
sync
weihuang-jedi Mar 17, 2025
4539bd3
fix pynorms error
weihuang-jedi Mar 17, 2025
e5fe41a
sync
weihuang-jedi Mar 17, 2025
2799666
Update C48_S2SW.yaml
weihuang-jedi Mar 17, 2025
19b9a5d
sync
weihuang-jedi Mar 18, 2025
d7eb66c
sync
weihuang-jedi Mar 18, 2025
92921f4
Merge branch 'develop' into download-subset-fix-data
weihuang-jedi Mar 26, 2025
a976f11
Merge branch 'develop' into download-subset-fix-data
weihuang-jedi Mar 27, 2025
8b4a544
Merge remote-tracking branch 'origin/develop' into download-subset-fi…
weihuang-jedi Mar 29, 2025
39ee601
Update ush/fetch-fix-data.py
weihuang-jedi Apr 3, 2025
fa7bdd7
Update ush/fetch-fix-data.py
weihuang-jedi Apr 3, 2025
7c02791
Merge branch 'download-subset-fix-data' of github.com:NOAA-EPIC/globa…
weihuang-jedi Apr 3, 2025
f565b0c
ignore fetch-fix-data.log, remove unused module from script, and swit…
weihuang-jedi Apr 3, 2025
f5b4ef2
Merge branch 'develop' into download-subset-fix-data
weihuang-jedi Apr 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ ush/imsfv3_scf2ioda.py
ush/atparse.bash
ush/run_bufr2ioda.py
ush/bufr2ioda_insitu*
# ush log file
ush/fetch-fix-data.log

# version files
versions/build.ver
Expand Down
327 changes: 327 additions & 0 deletions ush/fetch-fix-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,327 @@
#!/usr/bin/env python3
# fetch-fix-data.py
# wei.huang@noaa.gov
# 2025-02-26
# script to download a subset of FIX data to local machines.
import os
import argparse
import subprocess
from pathlib import Path
import logging

# Create and configure logger
logging.basicConfig(filename="fetch-fix-data.log",
format='%(asctime)s %(message)s',
filemode='w')

# Creating an object
logger = logging.getLogger()

# Setting the threshold of logger to DEBUG
logger.setLevel(logging.DEBUG)

# ------------------------------------------------------------------------------


class FetchFIXdata():
"""Fetch a subset of FIX data from NOAA s3 bucket.
"""

def __init__(self, atmgridarray=['C48'], ocngridarray=['500'],
fix_bucket=None, fix_ver=None, localdir=None, verbose=False):
"""Constructor for FetchFIXdata
The constructor is responsible for collecting necessary parameters.

Parameters
----------
atmgrdiarray: list
A list of ATM grids
ocngrdiarray: list
A list of OCN grids
fix_bucket: str
NOAA s3 bucket of Global-Workflow full FIX data
fix_ver: str
FIX version file
localdir: str
Local dir to store the subset of FIX data.

Returns
-------
None
"""
# self.aws_fix_bucket = f's3://noaa-nws-global-pds/fix'
self.aws_fix_bucket = fix_bucket
self.aws_cp = f'aws --no-sign-request s3 cp'
self.aws_sync = f'aws --no-sign-request s3 sync'

self.atmgridarray = atmgridarray
self.ocngridarray = ocngridarray
self.localdir = localdir
self.fix_ver = fix_ver
self.verbose = verbose

logger.info(f'localdir: {localdir}')
logger.info(f'fix_ver: {fix_ver}')
logger.info(f'fix_buck: {fix_bucket}')
logger.info(f'verbose: {verbose}')

if (os.path.isdir(localdir)):
logger.info(f'Prepare to download FIX data for {self.atmgridarray} and {self.ocngridarray} to {localdir}')
else:
logger.error(f'local dir: <{localdir}> does not exist. Stop')
raise SystemExit

if (os.path.isfile(fix_ver)):
logger.info(f'Prepare to read FIX data for {self.atmgridarray} and {self.ocngridarray} to {fix_ver}')
else:
logger.error(f'File fix_ver: <{fix_ver}> does not exist. Stop')
raise SystemExit

self.s3dict = {}
self.s3dict['raworog'] = f'raw/orog'

if (self.localdir.find('fix') < 0):
self.targetdir = f'{self.localdir}/fix.subset'
else:
self.targetdir = self.localdir

self.get_fix_ver_dict()
self.create_s3dict()

# --------------------------------------------------------------------------
def create_s3dict(self):
"""
Create a dictionay based on fix_ver file,
corresponding to FIX data s3 bucket directory.
returns
----------
None
"""
for key in self.fix_ver_dict.keys():
val = self.fix_ver_dict[key]
s3key, _ = key.split('_ver')
if (s3key == 'chem'):
self.s3dict['fimdata_chem'] = f'chem/{val}/fimdata_chem'
self.s3dict['Emission_data'] = f'chem/{val}/Emission_data'
elif (s3key == 'datm'):
self.s3dict['cfsr'] = f'datm/{val}/cfsr'
self.s3dict['gefs'] = f'datm/{val}/gefs'
self.s3dict['gfs'] = f'datm/{val}/gfs'
self.s3dict['mom6'] = f'datm/{val}/mom6'
else:
if (s3key in ['orog', 'ugwd']):
self.add_atmgrid2s3dict(s3key, val)
elif (s3key in ['mom6', 'cice']):
self.add_ocngrid2s3dict(s3key, val)
elif (s3key == 'cpl'):
self.add_cpl2s3dict(s3key, val)
else:
self.s3dict[s3key] = f'{s3key}/{val}'

if (self.verbose):
self.printinfo()

# --------------------------------------------------------------------------
def add_atmgrid2s3dict(self, key, val):
"""
Add ATM grid data to dict.
returns
----------
None
"""
for atmgrid in self.atmgridarray:
newkey = f'{key}_{atmgrid}'
self.s3dict[newkey] = f'{key}/{val}/{atmgrid}'

# -------------------------------------------------------------------------
def add_ocngrid2s3dict(self, key, val):
"""
Add OCN grid data to dict.
returns
----------
None
"""
for ocngrid in self.ocngridarray:
newkey = f'{key}_{ocngrid}'
self.s3dict[newkey] = f'{key}/{val}/{ocngrid}'

# -------------------------------------------------------------------------
def add_cpl2s3dict(self, key, val):
"""
Add CPL (ATM and OCN complar) grid data to dict.
returns
----------
None
"""
for atmgrid in self.atmgridarray:
for ocngrid in self.ocngridarray:
newkey = f'{key}_a{atmgrid}o{ocngrid}'
self.s3dict[newkey] = f'{key}/{val}/a{atmgrid}o{ocngrid}'
Comment thread
weihuang-jedi marked this conversation as resolved.

# -------------------------------------------------------------------------
def printinfo(self):
"""Print dict info (data to download)
"""
print(f'Preparing to fetch')
print(f'ATM grid: {self.atmgridarray}')
print(f'ONC grid: {self.ocngridarray}')
print(f'From: {self.aws_fix_bucket}')
print(f'To: {self.targetdir}')
for key in self.s3dict.keys():
val = self.s3dict[key]
print(f'{key}: {val}')

# -------------------------------------------------------------------------
def fetchdata(self):
"""Fetch data defined in s3bucket.
"""
if (self.verbose):
logger.info(f'Create local fix dir: {self.targetdir}')

path = Path(self.targetdir)
path.mkdir(parents=True, exist_ok=True)

self.fetch_ugwp_limb_tau()

for key in self.s3dict.keys():
self.download_dir(self.s3dict[key])

# --------------------------------------------------------------------------
def download_dir(self, dir):
"""download a directory
"""
remotedir = f'{self.aws_fix_bucket}/{dir}'
localdir = f'{self.targetdir}/{dir}'
cmd = f'{self.aws_sync} {remotedir} {localdir}'

# returned_value = os.system(cmd) # returns the exit code in unix
# if (self.verbose):
# logger.info(f'returned value: {returned_value}')

if (os.path.isdir(localdir)):
logger.info(f'{localdir} already exist. skip')
else:
parentdir, dirname = os.path.split(localdir)
if (self.verbose):
logger.info(f'Create local {parentdir} dir:')
path = Path(parentdir)
path.mkdir(parents=True, exist_ok=True)
if (self.verbose):
logger.info(cmd)
logger.info(f'Downloading {localdir}')
returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix
if (self.verbose):
logger.info(f'returned value: {returned_value}')

# --------------------------------------------------------------------------
def fetch_ugwp_limb_tau(self):
"""download ugwp_limb_tau.nc
"""
ugwd_ver = self.fix_ver_dict['ugwd_ver']
ugwp_limb_tau_remotepath = f'{self.aws_fix_bucket}/ugwd/{ugwd_ver}/ugwp_limb_tau.nc'
ugwp_limb_tau_localdir = f'{self.targetdir}/ugwd/{ugwd_ver}'
filename = f'{ugwp_limb_tau_localdir}/ugwp_limb_tau.nc'
path = Path(ugwp_limb_tau_localdir)
path.mkdir(parents=True, exist_ok=True)
cmd = f'{self.aws_cp} {ugwp_limb_tau_remotepath} {filename}'

# returned_value = os.system(cmd) # returns the exit code in unix
# if (self.verbose):
# logger.info(f'returned value: {returned_value}')

if (os.path.isfile(filename)):
logger.info(f'{filename} already exist. skip')
else:
if (self.verbose):
logger.info(cmd)
logger.info(f'Downloading {filename}')
returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix
if (self.verbose):
logger.info(f'returned value: {returned_value}')

# --------------------------------------------------------------------------
def get_fix_ver_dict(self):
"""Get fix ver as dictionay from FIX ver file.
"""
self.fix_ver_dict = {}
with open(self.fix_ver, "r") as file:
for line in file.readlines():
if (line.find('export ') >= 0):
key, value = line.replace('export ', '', 1).split('=')
# skip gdas data, for DA projects, one should keep gdas part.
if (key.find('gdas_') >= 0):
continue
# skip nest data
if (key.find('nest') > 0):
continue
self.fix_ver_dict[key] = value.strip()


# ------------------------------------------------------------------------------
def main() -> None:

# define available ATM and OCN grids.
ATMGRIDLIST = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152']
OCNGRIDLIST = ['500', '100', '050', '025']

parser = argparse.ArgumentParser()
parser.add_argument("-v", "--verbose", action="store_true",
help="increase output verbosity")
parser.add_argument("-d", "--localdir", type=str, required=True,
help="local directory to store FIX data subset")
parser.add_argument("-f", "--fix_ver", type=str, required=True,
default="unknown",
help="fix.ver file from Global-Workflow versions directory")
parser.add_argument("-b", "--fix_bucket", type=str, required=False,
default="s3://noaa-nws-global-pds/fix",
help="Optional S3 Bucket directory of FIX data, default <s3://noaa-nws-global-pds/fix>")
parser.add_argument("-a", "--atmgrid", type=str, required=False,
default="C48",
help="ATM grid, like: C48,C96,C192,C384,C768,C1152, default: C48")
parser.add_argument("-o", "--ocngrid", type=str, required=False,
default="100",
help="OCN grid, like: 500,100,050,025, default: 100")
args = parser.parse_args()

if args.verbose:
logger.info(f"the atmgrid is {args.atmgrid}")
logger.info(f"the ocngrid is {args.ocngrid}")
logger.info(f"the localdir is {args.localdir}")
logger.info(f"the fix_file is {args.fix_ver}")
logger.info(f"the s3 bucket is {args.fix_bucket}")

atmgrid = args.atmgrid
if (atmgrid.find(',') > 0):
atmgridarray = atmgrid.split(',')
else:
atmgridarray = [atmgrid]

for grid in atmgridarray:
if (grid not in ATMGRIDLIST):
logger.error(f'atmgrid: {grid}')
logger.error(f'is not in supported grids: {ATMGRIDLIST}')
raise SystemExit

ocngrid = args.ocngrid
if (ocngrid.find(',') > 0):
ocngridarray = ocngrid.split(',')
else:
ocngridarray = [ocngrid]

for grid in ocngridarray:
if (grid not in OCNGRIDLIST):
logger.error(f'ocngrid: {grid}')
logger.error(f'is not in supported grids: {OCNGRIDLIST}')
raise SystemExit

# ----------------------------------------------------------------------
ffd = FetchFIXdata(atmgridarray=atmgridarray, ocngridarray=ocngridarray,
fix_ver=args.fix_ver, fix_bucket=args.fix_bucket,
localdir=args.localdir, verbose=args.verbose)

ffd.fetchdata()


# ------------------------------------------------------------------------------
if __name__ == '__main__':
main()