From f071994c1e0f1a47cb92baf9575dcf4ee2e1c31a Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 25 Feb 2025 19:42:32 +0000 Subject: [PATCH 001/134] change noaacloud NodeName --- ci/Jenkinsfile4AWS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index cac3f4cd119..c23ace3a968 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -5,7 +5,7 @@ def HOMEgfs = 'none' def CI_CASES = '' def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. -def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', noaacloud: 'AWS'] +def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', noaacloud: 'awsepicglobalworkflow'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', noaacloud: /lustre/jenkins/global-workflow/CI] def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' def STATUS = 'Passed' @@ -79,7 +79,7 @@ pipeline { Machine = machine[0].toUpperCase() + machine.substring(1) echo "Getting Common Workspace for ${Machine}" ws("${custom_workspace[machine]}/${env.CHANGE_ID}") { - properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'Gaea', 'AWS'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) + properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'Gaea', 'awsepicglobalworkflow'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() CUSTOM_WORKSPACE = "${WORKSPACE}" HOMEgfs = "${CUSTOM_WORKSPACE}/global-workflow" From a4771df691c38e0275f446fe295d1d00fe205538 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 26 Feb 2025 18:36:43 +0000 Subject: [PATCH 002/134] add download fix subset data, also serve as a test PR to trigger CI testing on AWS --- ush/fetch-fix-data.py | 331 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 331 insertions(+) create mode 100644 ush/fetch-fix-data.py diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py new file mode 100644 index 00000000000..717fecdcf7b --- /dev/null +++ b/ush/fetch-fix-data.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python +# cfetch-fix-data.py +# wei.huang@noaa.gov +# 2025-02-26 +# script to download a subset of FIX data to local machines. +import os +import time +import sys +#import requests +#import json +#import base64 +import getopt +import subprocess +from pathlib import Path + +#---------------------------------------------------------------------------------------------------------------- +class FetchFIXdata(): + def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, verbose=0): + self.aws_fix_bucket = 's3://noaa-nws-global-pds/fix' + self.aws_cp = 'aws --no-sign-request s3 cp' + self.aws_sync = 'aws --no-sign-request s3 sync' + + self.atmgridarray = atmgridarray + self.ocngridarray = ocngridarray + self.localdir = localdir + self.verbose = verbose + + #if (os.path.isdir(localdir)): + # print('Prepare to download FIX data for %s and %s to %s' %(atmgrid, ocngrid, localdir)) + #else: + # print('local dir: <%s> does not exist. Stop' %(localdir)) + # sys.exit(-1) + + self.verdict = {} + self.s3dict = {} + self.s3dict['raworog'] = 'raw/orog' + + if (self.localdir.find('fix') < 0): + self.targetdir = '%s/fix.subset' %(self.localdir) + else: + self.targetdir = self.localdir + +#---------------------------------------------------------------------------------------------------------------- + def update_s3dict(self): + self.update_s3dick_grid_independent() + self.add_grid_data() + + if (self.verbose): + self.printinfo() + +#---------------------------------------------------------------------------------------------------------------- + def update_s3dick_grid_independent(self): + for key in self.fix_ver_dict.keys(): + val = self.fix_ver_dict[key] + if (key == 'aer_ver'): + self.s3dict['aer'] = 'aer/%s' %(val) + elif (key == 'am_ver'): + self.s3dict['am'] = 'am/%s' %(val) + elif (key == 'chem_ver'): + self.s3dict['fimdata_chem'] = 'chem/%s/fimdata_chem' %(val) + self.s3dict['Emission_data'] = 'chem/%s/Emission_data' %(val) + elif (key == 'datm_ver'): + self.s3dict['cfsr'] = 'datm/%s/cfsr' %(val) + self.s3dict['gefs'] = 'datm/%s/gefs' %(val) + self.s3dict['gfs'] = 'datm/%s/gfs' %(val) + self.s3dict['mom6'] = 'datm/%s/mom6' %(val) + elif (key == 'glwu_ver'): + self.s3dict['glwu'] = 'glwu/%s' %(val) + elif (key == 'gsi_ver'): + self.s3dict['gsi'] = 'gsi/%s' %(val) + elif (key == 'lut_ver'): + self.s3dict['lut'] = 'lut/%s' %(val) + elif (key == 'mom6_ver'): + self.s3dict['mom6post'] = 'mom6/%s/post' %(val) + elif (key == 'reg2grb2_ver'): + self.s3dict['reg2grb2'] = 'reg2grb2/%s' %(val) + elif (key == 'sfc_climb_ver'): + self.s3dict['sfc_climo'] = 'sfc_climo/%s' %(val) + elif (key == 'verif_ver'): + self.s3dict['verif'] = 'verif/%s' %(val) + elif (key == 'wave_ver'): + self.s3dict['wave'] = 'wave/%s' %(val) + +#---------------------------------------------------------------------------------------------------------------- + def add_grid_data(self): + for key in self.fix_ver_dict.keys(): + val = self.fix_ver_dict[key] + if (key == 'orog_ver'): + self.add_atmgrid2s3dict('orog', key, val) + elif (key == 'ugwd_ver'): + self.add_atmgrid2s3dict('ugwd', key, val) + elif (key == 'mom6_ver'): + self.add_ocngrid2s3dict('mom6', key, val) + elif (key == 'cice_ver'): + self.add_ocngrid2s3dict('cice', key, val) + elif (key == 'cpl_ver'): + self.add_cpl2s3dict('cpl', key, val) + +#---------------------------------------------------------------------------------------------------------------- + def add_atmgrid2s3dict(self, varname, key, val): + for atmgrid in self.atmgridarray: + newkey = '%s_%s' %(key, atmgrid) + self.s3dict[newkey] = '%s/%s/%s' %(varname, val, atmgrid) + +#---------------------------------------------------------------------------------------------------------------- + def add_ocngrid2s3dict(self, varname, key, val): + for ocngrid in self.ocngridarray: + newkey = '%s_%s' %(key, atmgrid) + self.s3dict[newkey] = '%s/%s/%s' %(varname, val, ocngrid) + +#---------------------------------------------------------------------------------------------------------------- + def add_cpl2s3dict(self, varname, key, val): + for atmgrid in self.atmgridarray: + for ocngrid in self.ocngridarray: + newkey = '%s_a%so%s' %(key, atmgrid, ocngrid) + self.s3dict[newkey] = '%s/%s/a%so%s' %(varname, val, atmgrid, ocngrid) + +#---------------------------------------------------------------------------------------------------------------- + def printinfo(self): + print('Preparing to fetch') + print('ATM grid: ', self.atmgridarray) + print('ONC grid: ', self.ocngridarray) + print('From: %s' %(self.aws_fix_bucket)) + print('To: %s' %(self.targetdir)) + for key in self.s3dict.keys(): + val = self.s3dict[key] + print('%s: %s' %(key, val)) + +#---------------------------------------------------------------------------------------------------------------- + def fetchdata(self): + if (self.verbose): + print('Create local fix dir: ', self.targetdir) + + path = Path(self.targetdir) + path.mkdir(parents=True, exist_ok=True) + + self.fetch_ugwp_limb_tau() + + for key in self.s3dict.keys(): + self.fetch_dir(self.s3dict[key]) + +#---------------------------------------------------------------------------------------------------------------- + def fetch_dir(self, dir): + remotedir = '%s/%s' %(self.aws_fix_bucket, dir) + localdir = '%s/%s' %(self.targetdir, dir) + cmd = '%s %s %s'%(self.aws_sync, remotedir, localdir) + self.download_dir(cmd, localdir) + +#---------------------------------------------------------------------------------------------------------------- + def download_dir(self, cmd, localdir): + #returned_value = os.system(cmd) # returns the exit code in unix + #print('returned value:', returned_value) + + if (os.path.isdir(localdir)): + print('%s already exist. skip' %(localdir)) + else: + parentdir, dirname = os.path.split(localdir) + if (self.verbose): + print('Create local %s dir: ', parentdir) + path = Path(parentdir) + path.mkdir(parents=True, exist_ok=True) + if (self.verbose): + print(cmd) + print('Downloading ', localdir) + returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix + if (self.verbose): + print('returned value:', returned_value) + +#---------------------------------------------------------------------------------------------------------------- + def fetch_ugwp_limb_tau(self): + ugwp_limb_tau_remotepath = '%s/ugwd/%s/ugwp_limb_tau.nc' %(self.aws_fix_bucket, self.fix_ver_dict['ugwd_ver']) + ugwp_limb_tau_localdir = '%s/ugwd/%s' %(self.targetdir, self.fix_ver_dict['ugwd_ver']) + filename = '%s/ugwp_limb_tau.nc' %(ugwp_limb_tau_localdir) + path = Path(ugwp_limb_tau_localdir) + path.mkdir(parents=True, exist_ok=True) + cmd = '%s %s %s'%(self.aws_cp, ugwp_limb_tau_remotepath, filename) + self.download_file(cmd, filename) + +#---------------------------------------------------------------------------------------------------------------- + def download_file(self, cmd, filename): + #returned_value = os.system(cmd) # returns the exit code in unix + #print('returned value:', returned_value) + + if (os.path.isfile(filename)): + print('%s already exist. skip' %(filename)) + else: + if (self.verbose): + print(cmd) + print('Downloading ', filename) + returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix + if (self.verbose): + print('returned value:', returned_value) + +#---------------------------------------------------------------------------------------------------------------- + def set_fix_ver_from_gwhome(self, gwhome, verdict): + fix_ver_file = '%s/versions/fix.ver' + self.fix_ver_dict = verdict + if (os.path.isfile(fix_ver_file)): + with open(fix_ver_file, "r") as file: + for line in file.readlines(): + if (line.find('export ') >= 0): + headstr, _, value = line.strip().partition('=') + exphead, _, key = headstr.partition(' ') + self.fix_ver_dict[key] = value + else: + print('fix_ver_file: %s does not exist.' %(fix_ver_file)) + +#---------------------------------------------------------------------------------------------------------------- + def set_default_fix_ver(self, verdict): + self.fix_ver_dict = verdict + +#---------------------------------------------------------------------------------------------------------------- +def print_usage(verdict): + print('Usage: python fetch-fix-data.py \\') + print(' --atmgrid=AtmospericGrid (for multiple grids, separate with ",") \\') + print(' --ocngrid=OceanGrid (for multiple grids, separate with ",") \\') + print(' --localdir=Your-local-fix-dir \\') + print(' [options]') + print('options are:') + print('\t--gwhome=xxxx (Global-Workflow directory)') + + for key in verdict.keys(): + print('\t--%s=yyyymmdd default: %s' %(key, verdict[key])) + +#---------------------------------------------------------------------------------------------------------------- +if __name__ == '__main__': + atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] + ocngridlist = ['500', '100', '050', '025'] + + verbose = 0 + atmgrid = 'C48' + ocngrid = '500' + localdir = '/contrib/global-workflow-shared-data' + + #default fix-version + verdict = {} + verdict['aer_ver'] = '20220805' + verdict['am_ver'] = '20220805' + verdict['chem_ver'] = '20220805' + verdict['cice_ver'] = '20240416' + verdict['cpl_ver'] = '20230526' + verdict['datm_ver'] = '20220805' + verdict['glwu_ver'] = '20220805' + verdict['gsi_ver'] = '20240208' + verdict['lut_ver'] = '20220805' + verdict['mom6_ver'] = '20240416' + verdict['orog_ver'] = '20231027' + verdict['reg2grb2_ver'] = '20220805' + verdict['sfc_climo_ver'] = '20220805' + verdict['ugwd_ver'] = '20240624' + verdict['verif_ver'] = '20220805' + verdict['wave_ver'] = '20240105' + + gwhome=None + + opts, args = getopt.getopt(sys.argv[1:], '', ['help', 'atmgrid=', 'ocngrid=', + 'verbose=', 'localdir=', + 'gwhome=', + 'aer_ver=', + 'am_ver=', + 'chem_ver=', + 'cice_ver=', + 'cpl_ver=', + 'datm_ver=', + 'glwu_ver=', + 'gsi_ver=', + 'lut_ver=', + 'mom6_ver=', + 'orog_ver=', + 'reg2grb2_ver=', + 'sfc_climo_ver=', + 'ugwd_ver=', + 'verif_ver=', + 'wave_ver=']) + for o, a in opts: + print('o: %s, a: %s' %(o, a)) + if o in ['--help']: + print_usage(verdict) + sys.exit(0) + elif o in ['--verbose']: + verbose = int(a) + elif o in ['--atmgrid']: + atmgrid = a + elif o in ['--ocngrid']: + ocngrid = a + elif o in ['--localdir']: + localdir = a + elif o in ['--gwhome']: + gwhome = a + else: + _, vername = o.split('--') + print('vername: <%s>' %(vername)) + verdict[vername] = a + + if (atmgrid.find(',') > 0): + atmgridarray = atmgrid.split(',') + else: + atmgridarray = [atmgrid] + + for grid in atmgridarray: + if (grid not in atmgridlist): + print('atmgrid: ', grid) + print('is not in supported grids: ', atmgridlist) + print_usage(verdict) + sys.exit(-1) + + if (ocngrid.find(',') > 0): + ocngridarray = ocngrid.split(',') + else: + ocngridarray = [ocngrid] + + for grid in ocngridarray: + if (grid not in ocngridlist): + print('ocngrid: ', grid) + print('is not in supported grids: ', ocngridlist) + print_usage(verdict) + sys.exit(-1) + +#------------------------------------------------------------------ + ffd = FetchFIXdata(atmgridarray=atmgridarray, + ocngridarray=ocngridarray, + localdir=localdir, verbose=verbose) + + if (gwhome is None): + ffd.set_default_fix_ver(verdict) + else: + ffd.set_fix_ver_from_gwhome(gwhome, verdict) + + ffd.update_s3dict() + + ffd.fetchdata() From 4f1e67956a9718dee7cc5e0c784df55ca6f15d61 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 26 Feb 2025 19:26:17 +0000 Subject: [PATCH 003/134] fix pynorm error --- ush/fetch-fix-data.py | 126 +++++++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 56 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index 717fecdcf7b..ce4fa1bdec4 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -6,9 +6,6 @@ import os import time import sys -#import requests -#import json -#import base64 import getopt import subprocess from pathlib import Path @@ -16,6 +13,7 @@ #---------------------------------------------------------------------------------------------------------------- class FetchFIXdata(): def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, verbose=0): + self.aws_fix_bucket = 's3://noaa-nws-global-pds/fix' self.aws_cp = 'aws --no-sign-request s3 cp' self.aws_sync = 'aws --no-sign-request s3 sync' @@ -25,11 +23,11 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, ve self.localdir = localdir self.verbose = verbose - #if (os.path.isdir(localdir)): - # print('Prepare to download FIX data for %s and %s to %s' %(atmgrid, ocngrid, localdir)) - #else: - # print('local dir: <%s> does not exist. Stop' %(localdir)) - # sys.exit(-1) + #if (os.path.isdir(localdir)): + # print('Prepare to download FIX data for %s and %s to %s' %(atmgrid, ocngrid, localdir)) + #else: + # print('local dir: <%s> does not exist. Stop' %(localdir)) + # sys.exit(-1) self.verdict = {} self.s3dict = {} @@ -40,49 +38,52 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, ve else: self.targetdir = self.localdir -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def update_s3dict(self): + self.update_s3dick_grid_independent() self.add_grid_data() if (self.verbose): self.printinfo() -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def update_s3dick_grid_independent(self): + for key in self.fix_ver_dict.keys(): val = self.fix_ver_dict[key] if (key == 'aer_ver'): - self.s3dict['aer'] = 'aer/%s' %(val) + self.s3dict['aer'] = 'aer/%s' %( val ) elif (key == 'am_ver'): - self.s3dict['am'] = 'am/%s' %(val) + self.s3dict['am'] = 'am/%s' %( val ) elif (key == 'chem_ver'): - self.s3dict['fimdata_chem'] = 'chem/%s/fimdata_chem' %(val) - self.s3dict['Emission_data'] = 'chem/%s/Emission_data' %(val) + self.s3dict['fimdata_chem'] = 'chem/%s/fimdata_chem' %( val ) + self.s3dict['Emission_data'] = 'chem/%s/Emission_data' %( val ) elif (key == 'datm_ver'): - self.s3dict['cfsr'] = 'datm/%s/cfsr' %(val) - self.s3dict['gefs'] = 'datm/%s/gefs' %(val) - self.s3dict['gfs'] = 'datm/%s/gfs' %(val) - self.s3dict['mom6'] = 'datm/%s/mom6' %(val) + self.s3dict['cfsr'] = 'datm/%s/cfsr' %( val ) + self.s3dict['gefs'] = 'datm/%s/gefs' %( val ) + self.s3dict['gfs'] = 'datm/%s/gfs' %( val ) + self.s3dict['mom6'] = 'datm/%s/mom6' %( val ) elif (key == 'glwu_ver'): - self.s3dict['glwu'] = 'glwu/%s' %(val) + self.s3dict['glwu'] = 'glwu/%s' %( val ) elif (key == 'gsi_ver'): - self.s3dict['gsi'] = 'gsi/%s' %(val) + self.s3dict['gsi'] = 'gsi/%s' %( val ) elif (key == 'lut_ver'): - self.s3dict['lut'] = 'lut/%s' %(val) + self.s3dict['lut'] = 'lut/%s' %( val ) elif (key == 'mom6_ver'): - self.s3dict['mom6post'] = 'mom6/%s/post' %(val) + self.s3dict['mom6post'] = 'mom6/%s/post' %( val ) elif (key == 'reg2grb2_ver'): - self.s3dict['reg2grb2'] = 'reg2grb2/%s' %(val) + self.s3dict['reg2grb2'] = 'reg2grb2/%s' %( val ) elif (key == 'sfc_climb_ver'): - self.s3dict['sfc_climo'] = 'sfc_climo/%s' %(val) + self.s3dict['sfc_climo'] = 'sfc_climo/%s' %( val ) elif (key == 'verif_ver'): - self.s3dict['verif'] = 'verif/%s' %(val) + self.s3dict['verif'] = 'verif/%s' %( val ) elif (key == 'wave_ver'): - self.s3dict['wave'] = 'wave/%s' %(val) + self.s3dict['wave'] = 'wave/%s' %( val ) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def add_grid_data(self): + for key in self.fix_ver_dict.keys(): val = self.fix_ver_dict[key] if (key == 'orog_ver'): @@ -96,38 +97,43 @@ def add_grid_data(self): elif (key == 'cpl_ver'): self.add_cpl2s3dict('cpl', key, val) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def add_atmgrid2s3dict(self, varname, key, val): + for atmgrid in self.atmgridarray: - newkey = '%s_%s' %(key, atmgrid) - self.s3dict[newkey] = '%s/%s/%s' %(varname, val, atmgrid) + newkey = '%s_%s' %( key, atmgrid ) + self.s3dict[newkey] = '%s/%s/%s' %( varname, val, atmgrid ) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def add_ocngrid2s3dict(self, varname, key, val): + for ocngrid in self.ocngridarray: - newkey = '%s_%s' %(key, atmgrid) - self.s3dict[newkey] = '%s/%s/%s' %(varname, val, ocngrid) + newkey = '%s_%s' %( key, atmgrid ) + self.s3dict[newkey] = '%s/%s/%s' %( varname, val, ocngrid ) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def add_cpl2s3dict(self, varname, key, val): + for atmgrid in self.atmgridarray: for ocngrid in self.ocngridarray: - newkey = '%s_a%so%s' %(key, atmgrid, ocngrid) - self.s3dict[newkey] = '%s/%s/a%so%s' %(varname, val, atmgrid, ocngrid) + newkey = '%s_a%so%s' %( key, atmgrid, ocngrid ) + self.s3dict[newkey] = '%s/%s/a%so%s' %( varname, val, atmgrid, ocngrid ) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def printinfo(self): + print('Preparing to fetch') print('ATM grid: ', self.atmgridarray) print('ONC grid: ', self.ocngridarray) - print('From: %s' %(self.aws_fix_bucket)) - print('To: %s' %(self.targetdir)) + print('From: %s' %( self.aws_fix_bucket )) + print('To: %s' %( self.targetdir )) for key in self.s3dict.keys(): val = self.s3dict[key] - print('%s: %s' %(key, val)) + print('%s: %s' %( key, val )) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def fetchdata(self): + if (self.verbose): print('Create local fix dir: ', self.targetdir) @@ -139,17 +145,19 @@ def fetchdata(self): for key in self.s3dict.keys(): self.fetch_dir(self.s3dict[key]) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def fetch_dir(self, dir): + remotedir = '%s/%s' %(self.aws_fix_bucket, dir) - localdir = '%s/%s' %(self.targetdir, dir) + localdir = '%s/%s' %( self.targetdir, dir ) cmd = '%s %s %s'%(self.aws_sync, remotedir, localdir) self.download_dir(cmd, localdir) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def download_dir(self, cmd, localdir): - #returned_value = os.system(cmd) # returns the exit code in unix - #print('returned value:', returned_value) + + #returned_value = os.system(cmd) # returns the exit code in unix + #print('returned value:', returned_value) if (os.path.isdir(localdir)): print('%s already exist. skip' %(localdir)) @@ -166,8 +174,9 @@ def download_dir(self, cmd, localdir): if (self.verbose): print('returned value:', returned_value) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def fetch_ugwp_limb_tau(self): + ugwp_limb_tau_remotepath = '%s/ugwd/%s/ugwp_limb_tau.nc' %(self.aws_fix_bucket, self.fix_ver_dict['ugwd_ver']) ugwp_limb_tau_localdir = '%s/ugwd/%s' %(self.targetdir, self.fix_ver_dict['ugwd_ver']) filename = '%s/ugwp_limb_tau.nc' %(ugwp_limb_tau_localdir) @@ -176,13 +185,14 @@ def fetch_ugwp_limb_tau(self): cmd = '%s %s %s'%(self.aws_cp, ugwp_limb_tau_remotepath, filename) self.download_file(cmd, filename) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def download_file(self, cmd, filename): + #returned_value = os.system(cmd) # returns the exit code in unix #print('returned value:', returned_value) if (os.path.isfile(filename)): - print('%s already exist. skip' %(filename)) + print('%s already exist. skip' %( filename )) else: if (self.verbose): print(cmd) @@ -191,8 +201,9 @@ def download_file(self, cmd, filename): if (self.verbose): print('returned value:', returned_value) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def set_fix_ver_from_gwhome(self, gwhome, verdict): + fix_ver_file = '%s/versions/fix.ver' self.fix_ver_dict = verdict if (os.path.isfile(fix_ver_file)): @@ -205,12 +216,14 @@ def set_fix_ver_from_gwhome(self, gwhome, verdict): else: print('fix_ver_file: %s does not exist.' %(fix_ver_file)) -#---------------------------------------------------------------------------------------------------------------- + #---------------------------------------------------------------------------------------------------------------- def set_default_fix_ver(self, verdict): + self.fix_ver_dict = verdict #---------------------------------------------------------------------------------------------------------------- def print_usage(verdict): + print('Usage: python fetch-fix-data.py \\') print(' --atmgrid=AtmospericGrid (for multiple grids, separate with ",") \\') print(' --ocngrid=OceanGrid (for multiple grids, separate with ",") \\') @@ -220,10 +233,11 @@ def print_usage(verdict): print('\t--gwhome=xxxx (Global-Workflow directory)') for key in verdict.keys(): - print('\t--%s=yyyymmdd default: %s' %(key, verdict[key])) + print( '\t--%s=yyyymmdd default: %s' %( key, verdict[key] ) ) #---------------------------------------------------------------------------------------------------------------- if __name__ == '__main__': + atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] ocngridlist = ['500', '100', '050', '025'] @@ -232,7 +246,7 @@ def print_usage(verdict): ocngrid = '500' localdir = '/contrib/global-workflow-shared-data' - #default fix-version + #default fix-version verdict = {} verdict['aer_ver'] = '20220805' verdict['am_ver'] = '20220805' @@ -273,7 +287,7 @@ def print_usage(verdict): 'verif_ver=', 'wave_ver=']) for o, a in opts: - print('o: %s, a: %s' %(o, a)) + #print( 'o: %s, a: %s' %(o, a) ) if o in ['--help']: print_usage(verdict) sys.exit(0) @@ -289,7 +303,7 @@ def print_usage(verdict): gwhome = a else: _, vername = o.split('--') - print('vername: <%s>' %(vername)) + print( 'vername: <%s>' %(vername) ) verdict[vername] = a if (atmgrid.find(',') > 0): @@ -316,7 +330,7 @@ def print_usage(verdict): print_usage(verdict) sys.exit(-1) -#------------------------------------------------------------------ + #------------------------------------------------------------------ ffd = FetchFIXdata(atmgridarray=atmgridarray, ocngridarray=ocngridarray, localdir=localdir, verbose=verbose) From 2241227b7319fc5dfdc6b866fbef281dc0144a65 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 26 Feb 2025 20:10:17 +0000 Subject: [PATCH 004/134] fix pynorm error 2 --- ush/fetch-fix-data.py | 107 +++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 53 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index ce4fa1bdec4..2a39f0d60d0 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -10,8 +10,9 @@ import subprocess from pathlib import Path -#---------------------------------------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------------------------------------- class FetchFIXdata(): + def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, verbose=0): self.aws_fix_bucket = 's3://noaa-nws-global-pds/fix' @@ -38,7 +39,7 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, ve else: self.targetdir = self.localdir - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def update_s3dict(self): self.update_s3dick_grid_independent() @@ -47,41 +48,41 @@ def update_s3dict(self): if (self.verbose): self.printinfo() - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def update_s3dick_grid_independent(self): for key in self.fix_ver_dict.keys(): val = self.fix_ver_dict[key] - if (key == 'aer_ver'): - self.s3dict['aer'] = 'aer/%s' %( val ) - elif (key == 'am_ver'): - self.s3dict['am'] = 'am/%s' %( val ) - elif (key == 'chem_ver'): - self.s3dict['fimdata_chem'] = 'chem/%s/fimdata_chem' %( val ) - self.s3dict['Emission_data'] = 'chem/%s/Emission_data' %( val ) - elif (key == 'datm_ver'): - self.s3dict['cfsr'] = 'datm/%s/cfsr' %( val ) - self.s3dict['gefs'] = 'datm/%s/gefs' %( val ) - self.s3dict['gfs'] = 'datm/%s/gfs' %( val ) - self.s3dict['mom6'] = 'datm/%s/mom6' %( val ) - elif (key == 'glwu_ver'): - self.s3dict['glwu'] = 'glwu/%s' %( val ) - elif (key == 'gsi_ver'): - self.s3dict['gsi'] = 'gsi/%s' %( val ) - elif (key == 'lut_ver'): - self.s3dict['lut'] = 'lut/%s' %( val ) - elif (key == 'mom6_ver'): - self.s3dict['mom6post'] = 'mom6/%s/post' %( val ) - elif (key == 'reg2grb2_ver'): - self.s3dict['reg2grb2'] = 'reg2grb2/%s' %( val ) - elif (key == 'sfc_climb_ver'): - self.s3dict['sfc_climo'] = 'sfc_climo/%s' %( val ) - elif (key == 'verif_ver'): - self.s3dict['verif'] = 'verif/%s' %( val ) - elif (key == 'wave_ver'): - self.s3dict['wave'] = 'wave/%s' %( val ) - - #---------------------------------------------------------------------------------------------------------------- + if ( key == 'aer_ver' ): + self.s3dict['aer'] = 'aer/%s' %(val) + elif ( key == 'am_ver' ): + self.s3dict['am'] = 'am/%s' %(val) + elif ( key == 'chem_ver' ): + self.s3dict['fimdata_chem'] = 'chem/%s/fimdata_chem' %(val) + self.s3dict['Emission_data'] = 'chem/%s/Emission_data' %(val) + elif ( key == 'datm_ver' ): + self.s3dict['cfsr'] = 'datm/%s/cfsr' %(val) + self.s3dict['gefs'] = 'datm/%s/gefs' %(val) + self.s3dict['gfs'] = 'datm/%s/gfs' %(val) + self.s3dict['mom6'] = 'datm/%s/mom6' %(val) + elif ( key == 'glwu_ver' ): + self.s3dict['glwu'] = 'glwu/%s' %(val) + elif ( key == 'gsi_ver' ): + self.s3dict['gsi'] = 'gsi/%s' %(val) + elif ( key == 'lut_ver' ): + self.s3dict['lut'] = 'lut/%s' %(val) + elif ( key == 'mom6_ver' ): + self.s3dict['mom6post'] = 'mom6/%s/post' %(val) + elif ( key == 'reg2grb2_ver' ): + self.s3dict['reg2grb2'] = 'reg2grb2/%s' %(val) + elif ( key == 'sfc_climb_ver' ): + self.s3dict['sfc_climo'] = 'sfc_climo/%s' %(val) + elif ( key == 'verif_ver' ): + self.s3dict['verif'] = 'verif/%s' %(val) + elif ( key == 'wave_ver' ): + self.s3dict['wave'] = 'wave/%s' %(val) + + # ---------------------------------------------------------------------------------------------------------------- def add_grid_data(self): for key in self.fix_ver_dict.keys(): @@ -97,29 +98,29 @@ def add_grid_data(self): elif (key == 'cpl_ver'): self.add_cpl2s3dict('cpl', key, val) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def add_atmgrid2s3dict(self, varname, key, val): for atmgrid in self.atmgridarray: newkey = '%s_%s' %( key, atmgrid ) - self.s3dict[newkey] = '%s/%s/%s' %( varname, val, atmgrid ) + self.s3dict[newkey] = '%s/%s/%s' %(varname, val, atmgrid) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def add_ocngrid2s3dict(self, varname, key, val): for ocngrid in self.ocngridarray: - newkey = '%s_%s' %( key, atmgrid ) - self.s3dict[newkey] = '%s/%s/%s' %( varname, val, ocngrid ) + newkey = '%s_%s' %(key, atmgrid) + self.s3dict[newkey] = '%s/%s/%s' %(varname, val, ocngrid) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def add_cpl2s3dict(self, varname, key, val): for atmgrid in self.atmgridarray: for ocngrid in self.ocngridarray: newkey = '%s_a%so%s' %( key, atmgrid, ocngrid ) - self.s3dict[newkey] = '%s/%s/a%so%s' %( varname, val, atmgrid, ocngrid ) + self.s3dict[newkey] = '%s/%s/a%so%s' %(varname, val, atmgrid, ocngrid) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def printinfo(self): print('Preparing to fetch') @@ -131,7 +132,7 @@ def printinfo(self): val = self.s3dict[key] print('%s: %s' %( key, val )) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def fetchdata(self): if (self.verbose): @@ -145,15 +146,15 @@ def fetchdata(self): for key in self.s3dict.keys(): self.fetch_dir(self.s3dict[key]) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def fetch_dir(self, dir): remotedir = '%s/%s' %(self.aws_fix_bucket, dir) localdir = '%s/%s' %( self.targetdir, dir ) - cmd = '%s %s %s'%(self.aws_sync, remotedir, localdir) + cmd = '%s %s %s' %(self.aws_sync, remotedir, localdir) self.download_dir(cmd, localdir) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def download_dir(self, cmd, localdir): #returned_value = os.system(cmd) # returns the exit code in unix @@ -164,7 +165,7 @@ def download_dir(self, cmd, localdir): else: parentdir, dirname = os.path.split(localdir) if (self.verbose): - print('Create local %s dir: ', parentdir) + print('Create local %s dir: ' %(parentdir)) path = Path(parentdir) path.mkdir(parents=True, exist_ok=True) if (self.verbose): @@ -174,7 +175,7 @@ def download_dir(self, cmd, localdir): if (self.verbose): print('returned value:', returned_value) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def fetch_ugwp_limb_tau(self): ugwp_limb_tau_remotepath = '%s/ugwd/%s/ugwp_limb_tau.nc' %(self.aws_fix_bucket, self.fix_ver_dict['ugwd_ver']) @@ -185,7 +186,7 @@ def fetch_ugwp_limb_tau(self): cmd = '%s %s %s'%(self.aws_cp, ugwp_limb_tau_remotepath, filename) self.download_file(cmd, filename) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def download_file(self, cmd, filename): #returned_value = os.system(cmd) # returns the exit code in unix @@ -201,7 +202,7 @@ def download_file(self, cmd, filename): if (self.verbose): print('returned value:', returned_value) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def set_fix_ver_from_gwhome(self, gwhome, verdict): fix_ver_file = '%s/versions/fix.ver' @@ -216,12 +217,12 @@ def set_fix_ver_from_gwhome(self, gwhome, verdict): else: print('fix_ver_file: %s does not exist.' %(fix_ver_file)) - #---------------------------------------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------------------------------------- def set_default_fix_ver(self, verdict): self.fix_ver_dict = verdict -#---------------------------------------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------------------------------------- def print_usage(verdict): print('Usage: python fetch-fix-data.py \\') @@ -235,7 +236,7 @@ def print_usage(verdict): for key in verdict.keys(): print( '\t--%s=yyyymmdd default: %s' %( key, verdict[key] ) ) -#---------------------------------------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------------------------------------- if __name__ == '__main__': atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] @@ -330,7 +331,7 @@ def print_usage(verdict): print_usage(verdict) sys.exit(-1) - #------------------------------------------------------------------ + # ------------------------------------------------------------------ ffd = FetchFIXdata(atmgridarray=atmgridarray, ocngridarray=ocngridarray, localdir=localdir, verbose=verbose) From 9679617e8c7f9bc2978e7b66036fefe9e44f282b Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 26 Feb 2025 21:06:38 +0000 Subject: [PATCH 005/134] fix pynorm error3 --- ush/fetch-fix-data.py | 64 +++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index 2a39f0d60d0..558f12bee9b 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -24,10 +24,10 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, ve self.localdir = localdir self.verbose = verbose - #if (os.path.isdir(localdir)): + # if (os.path.isdir(localdir)): # print('Prepare to download FIX data for %s and %s to %s' %(atmgrid, ocngrid, localdir)) - #else: - # print('local dir: <%s> does not exist. Stop' %(localdir)) + # else: + # print(f'local dir: <{localdir}> does not exist. Stop') # sys.exit(-1) self.verdict = {} @@ -35,7 +35,7 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, ve self.s3dict['raworog'] = 'raw/orog' if (self.localdir.find('fix') < 0): - self.targetdir = '%s/fix.subset' %(self.localdir) + self.targetdir = f'{self.localdir}/fix.subset' else: self.targetdir = self.localdir @@ -53,34 +53,34 @@ def update_s3dick_grid_independent(self): for key in self.fix_ver_dict.keys(): val = self.fix_ver_dict[key] - if ( key == 'aer_ver' ): - self.s3dict['aer'] = 'aer/%s' %(val) - elif ( key == 'am_ver' ): - self.s3dict['am'] = 'am/%s' %(val) - elif ( key == 'chem_ver' ): - self.s3dict['fimdata_chem'] = 'chem/%s/fimdata_chem' %(val) - self.s3dict['Emission_data'] = 'chem/%s/Emission_data' %(val) - elif ( key == 'datm_ver' ): - self.s3dict['cfsr'] = 'datm/%s/cfsr' %(val) - self.s3dict['gefs'] = 'datm/%s/gefs' %(val) - self.s3dict['gfs'] = 'datm/%s/gfs' %(val) - self.s3dict['mom6'] = 'datm/%s/mom6' %(val) - elif ( key == 'glwu_ver' ): - self.s3dict['glwu'] = 'glwu/%s' %(val) - elif ( key == 'gsi_ver' ): - self.s3dict['gsi'] = 'gsi/%s' %(val) - elif ( key == 'lut_ver' ): - self.s3dict['lut'] = 'lut/%s' %(val) - elif ( key == 'mom6_ver' ): - self.s3dict['mom6post'] = 'mom6/%s/post' %(val) - elif ( key == 'reg2grb2_ver' ): - self.s3dict['reg2grb2'] = 'reg2grb2/%s' %(val) - elif ( key == 'sfc_climb_ver' ): - self.s3dict['sfc_climo'] = 'sfc_climo/%s' %(val) - elif ( key == 'verif_ver' ): - self.s3dict['verif'] = 'verif/%s' %(val) - elif ( key == 'wave_ver' ): - self.s3dict['wave'] = 'wave/%s' %(val) + if (key == 'aer_ver'): + self.s3dict['aer'] = f'aer/{val}' + elif ( key == 'am_ver'): + self.s3dict['am'] = f'am/{val}' + elif (key == 'chem_ver'): + self.s3dict['fimdata_chem'] = f'chem/{val}/fimdata_chem' + self.s3dict['Emission_data'] = f'chem/{val}/Emission_data' + elif (key == 'datm_ver'): + self.s3dict['cfsr'] = f'datm/{val}/cfsr' + self.s3dict['gefs'] = f'datm/{val}/gefs' + self.s3dict['gfs'] = f'datm/{val}/gfs' + self.s3dict['mom6'] = f'datm/{val}/mom6' + elif (key == 'glwu_ver'): + self.s3dict['glwu'] = f'glwu/{val}' + elif (key == 'gsi_ver'): + self.s3dict['gsi'] = f'gsi/{val}' + elif (key == 'lut_ver'): + self.s3dict['lut'] = f'lut/{val}' + elif (key == 'mom6_ver'): + self.s3dict['mom6post'] = f'mom6/{val}/post' + elif (key == 'reg2grb2_ver'): + self.s3dict['reg2grb2'] = f'reg2grb2/{val}' + elif (key == 'sfc_climb_ver' ): + self.s3dict['sfc_climo'] = f'sfc_climo/{val}' + elif (key == 'verif_ver'): + self.s3dict['verif'] = f'verif/{val}' + elif (key == 'wave_ver'): + self.s3dict['wave'] = f'wave/{val}' # ---------------------------------------------------------------------------------------------------------------- def add_grid_data(self): From fe887515dc8b8b3bed8312d189fb3ceddbe9ed85 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 26 Feb 2025 21:31:00 +0000 Subject: [PATCH 006/134] fix pynorm error 4 --- ush/fetch-fix-data.py | 74 ++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index 558f12bee9b..fe7120285e1 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -102,41 +102,41 @@ def add_grid_data(self): def add_atmgrid2s3dict(self, varname, key, val): for atmgrid in self.atmgridarray: - newkey = '%s_%s' %( key, atmgrid ) - self.s3dict[newkey] = '%s/%s/%s' %(varname, val, atmgrid) + newkey = f'{key}_{atmgrid}' + self.s3dict[newkey] = f'{varname}/{val}/{atmgrid}' # ---------------------------------------------------------------------------------------------------------------- def add_ocngrid2s3dict(self, varname, key, val): for ocngrid in self.ocngridarray: - newkey = '%s_%s' %(key, atmgrid) - self.s3dict[newkey] = '%s/%s/%s' %(varname, val, ocngrid) + newkey = f'{key}_{atmgrid}' + self.s3dict[newkey] = f'{varname}/{val}/{ocngrid}' # ---------------------------------------------------------------------------------------------------------------- def add_cpl2s3dict(self, varname, key, val): for atmgrid in self.atmgridarray: for ocngrid in self.ocngridarray: - newkey = '%s_a%so%s' %( key, atmgrid, ocngrid ) - self.s3dict[newkey] = '%s/%s/a%so%s' %(varname, val, atmgrid, ocngrid) + newkey = f'{key}_a{atmgrid}o{ocngrid}' + self.s3dict[newkey] = f'{varname}/{val}/a{atmgrid}o{ocngrid}' # ---------------------------------------------------------------------------------------------------------------- def printinfo(self): - print('Preparing to fetch') - print('ATM grid: ', self.atmgridarray) - print('ONC grid: ', self.ocngridarray) - print('From: %s' %( self.aws_fix_bucket )) - print('To: %s' %( self.targetdir )) + print(f'Preparing to fetch') + print(f'ATM grid: {self.atmgridarray}') + print(f'ONC grid: {self.ocngridarray}') + print(f'From: {self.aws_fix_bucket}') + print(f'To: {self.targetdir}') for key in self.s3dict.keys(): val = self.s3dict[key] - print('%s: %s' %( key, val )) + print(f'{key}: {val}') # ---------------------------------------------------------------------------------------------------------------- def fetchdata(self): if (self.verbose): - print('Create local fix dir: ', self.targetdir) + print('Create local fix dir: {self.targetdir}') path = Path(self.targetdir) path.mkdir(parents=True, exist_ok=True) @@ -149,28 +149,28 @@ def fetchdata(self): # ---------------------------------------------------------------------------------------------------------------- def fetch_dir(self, dir): - remotedir = '%s/%s' %(self.aws_fix_bucket, dir) - localdir = '%s/%s' %( self.targetdir, dir ) - cmd = '%s %s %s' %(self.aws_sync, remotedir, localdir) + remotedir = f'{self.aws_fix_bucket}/{dir}' + localdir = f'{self.targetdir}/{dir}' + cmd = f'{self.aws_sync} {remotedir} {localdir}' self.download_dir(cmd, localdir) # ---------------------------------------------------------------------------------------------------------------- def download_dir(self, cmd, localdir): - #returned_value = os.system(cmd) # returns the exit code in unix - #print('returned value:', returned_value) + # returned_value = os.system(cmd) # returns the exit code in unix + # print('returned value:', returned_value) if (os.path.isdir(localdir)): - print('%s already exist. skip' %(localdir)) + print(f'{localdir} already exist. skip' else: parentdir, dirname = os.path.split(localdir) if (self.verbose): - print('Create local %s dir: ' %(parentdir)) + print(f'Create local {parentdir} dir:') path = Path(parentdir) path.mkdir(parents=True, exist_ok=True) if (self.verbose): print(cmd) - print('Downloading ', localdir) + print(f'Downloading {localdir}') returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): print('returned value:', returned_value) @@ -178,26 +178,26 @@ def download_dir(self, cmd, localdir): # ---------------------------------------------------------------------------------------------------------------- def fetch_ugwp_limb_tau(self): - ugwp_limb_tau_remotepath = '%s/ugwd/%s/ugwp_limb_tau.nc' %(self.aws_fix_bucket, self.fix_ver_dict['ugwd_ver']) - ugwp_limb_tau_localdir = '%s/ugwd/%s' %(self.targetdir, self.fix_ver_dict['ugwd_ver']) - filename = '%s/ugwp_limb_tau.nc' %(ugwp_limb_tau_localdir) + ugwp_limb_tau_remotepath = f'{self.aws_fix_bucket}/ugwd/{self.fix_ver_dict['ugwd_ver']}/ugwp_limb_tau.nc' + ugwp_limb_tau_localdir = f'{self.targetdir}/ugwd/{self.fix_ver_dict['ugwd_ver']}' + filename = f'{ugwp_limb_tau_localdir}/ugwp_limb_tau.nc' path = Path(ugwp_limb_tau_localdir) path.mkdir(parents=True, exist_ok=True) - cmd = '%s %s %s'%(self.aws_cp, ugwp_limb_tau_remotepath, filename) + cmd = f'{self.aws_cp} {ugwp_limb_tau_remotepath} {filename}' self.download_file(cmd, filename) # ---------------------------------------------------------------------------------------------------------------- def download_file(self, cmd, filename): - #returned_value = os.system(cmd) # returns the exit code in unix - #print('returned value:', returned_value) + # returned_value = os.system(cmd) # returns the exit code in unix + # print('returned value:', returned_value) if (os.path.isfile(filename)): - print('%s already exist. skip' %( filename )) + print(f'{filename} already exist. skip') else: if (self.verbose): print(cmd) - print('Downloading ', filename) + print(f'Downloading {filename}') returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): print('returned value:', returned_value) @@ -205,7 +205,7 @@ def download_file(self, cmd, filename): # ---------------------------------------------------------------------------------------------------------------- def set_fix_ver_from_gwhome(self, gwhome, verdict): - fix_ver_file = '%s/versions/fix.ver' + fix_ver_file = f'{gwhome}/versions/fix.ver' self.fix_ver_dict = verdict if (os.path.isfile(fix_ver_file)): with open(fix_ver_file, "r") as file: @@ -215,7 +215,7 @@ def set_fix_ver_from_gwhome(self, gwhome, verdict): exphead, _, key = headstr.partition(' ') self.fix_ver_dict[key] = value else: - print('fix_ver_file: %s does not exist.' %(fix_ver_file)) + print(f'fix_ver_file: {ix_ver_file}s does not exist.') # ---------------------------------------------------------------------------------------------------------------- def set_default_fix_ver(self, verdict): @@ -225,6 +225,7 @@ def set_default_fix_ver(self, verdict): # ---------------------------------------------------------------------------------------------------------------- def print_usage(verdict): + print('Usage: python fetch-fix-data.py \\') print(' --atmgrid=AtmospericGrid (for multiple grids, separate with ",") \\') print(' --ocngrid=OceanGrid (for multiple grids, separate with ",") \\') @@ -234,11 +235,12 @@ def print_usage(verdict): print('\t--gwhome=xxxx (Global-Workflow directory)') for key in verdict.keys(): - print( '\t--%s=yyyymmdd default: %s' %( key, verdict[key] ) ) + print(f'\t--{key}=yyyymmdd default: {verdict[key]}') # ---------------------------------------------------------------------------------------------------------------- if __name__ == '__main__': + atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] ocngridlist = ['500', '100', '050', '025'] @@ -247,7 +249,7 @@ def print_usage(verdict): ocngrid = '500' localdir = '/contrib/global-workflow-shared-data' - #default fix-version + # default fix-version verdict = {} verdict['aer_ver'] = '20220805' verdict['am_ver'] = '20220805' @@ -266,7 +268,7 @@ def print_usage(verdict): verdict['verif_ver'] = '20220805' verdict['wave_ver'] = '20240105' - gwhome=None + gwhome = None opts, args = getopt.getopt(sys.argv[1:], '', ['help', 'atmgrid=', 'ocngrid=', 'verbose=', 'localdir=', @@ -288,7 +290,7 @@ def print_usage(verdict): 'verif_ver=', 'wave_ver=']) for o, a in opts: - #print( 'o: %s, a: %s' %(o, a) ) + # print(f'o: {o}, a: {a}') if o in ['--help']: print_usage(verdict) sys.exit(0) @@ -304,7 +306,7 @@ def print_usage(verdict): gwhome = a else: _, vername = o.split('--') - print( 'vername: <%s>' %(vername) ) + print(f'vername: <{vername}>') verdict[vername] = a if (atmgrid.find(',') > 0): From 0284fb7f3128c750d8df0b05400d3d754491246e Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 26 Feb 2025 23:00:23 +0000 Subject: [PATCH 007/134] fix pynorm error 5 --- ush/fetch-fix-data.py | 240 +++++++++++++++++++++--------------------- 1 file changed, 121 insertions(+), 119 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index fe7120285e1..6d6f8ac4fe6 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -13,16 +13,17 @@ # ---------------------------------------------------------------------------------------------------------------- class FetchFIXdata(): + def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, verbose=0): - self.aws_fix_bucket = 's3://noaa-nws-global-pds/fix' - self.aws_cp = 'aws --no-sign-request s3 cp' - self.aws_sync = 'aws --no-sign-request s3 sync' + self.aws_fix_bucket='s3://noaa-nws-global-pds/fix' + self.aws_cp='aws --no-sign-request s3 cp' + self.aws_sync='aws --no-sign-request s3 sync' - self.atmgridarray = atmgridarray - self.ocngridarray = ocngridarray - self.localdir = localdir - self.verbose = verbose + self.atmgridarray=atmgridarray + self.ocngridarray=ocngridarray + self.localdir=localdir + self.verbose=verbose # if (os.path.isdir(localdir)): # print('Prepare to download FIX data for %s and %s to %s' %(atmgrid, ocngrid, localdir)) @@ -30,14 +31,14 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, ve # print(f'local dir: <{localdir}> does not exist. Stop') # sys.exit(-1) - self.verdict = {} - self.s3dict = {} - self.s3dict['raworog'] = 'raw/orog' + self.verdict={} + self.s3dict={} + self.s3dict['raworog']='raw/orog' if (self.localdir.find('fix') < 0): - self.targetdir = f'{self.localdir}/fix.subset' + self.targetdir=f'{self.localdir}/fix.subset' else: - self.targetdir = self.localdir + self.targetdir=self.localdir # ---------------------------------------------------------------------------------------------------------------- def update_s3dict(self): @@ -52,41 +53,41 @@ def update_s3dict(self): def update_s3dick_grid_independent(self): for key in self.fix_ver_dict.keys(): - val = self.fix_ver_dict[key] + val=self.fix_ver_dict[key] if (key == 'aer_ver'): - self.s3dict['aer'] = f'aer/{val}' + self.s3dict['aer']=f'aer/{val}' elif ( key == 'am_ver'): - self.s3dict['am'] = f'am/{val}' + self.s3dict['am']=f'am/{val}' elif (key == 'chem_ver'): - self.s3dict['fimdata_chem'] = f'chem/{val}/fimdata_chem' - self.s3dict['Emission_data'] = f'chem/{val}/Emission_data' + self.s3dict['fimdata_chem']=f'chem/{val}/fimdata_chem' + self.s3dict['Emission_data']=f'chem/{val}/Emission_data' elif (key == 'datm_ver'): - self.s3dict['cfsr'] = f'datm/{val}/cfsr' - self.s3dict['gefs'] = f'datm/{val}/gefs' - self.s3dict['gfs'] = f'datm/{val}/gfs' - self.s3dict['mom6'] = f'datm/{val}/mom6' + self.s3dict['cfsr']=f'datm/{val}/cfsr' + self.s3dict['gefs']=f'datm/{val}/gefs' + self.s3dict['gfs']=f'datm/{val}/gfs' + self.s3dict['mom6']=f'datm/{val}/mom6' elif (key == 'glwu_ver'): - self.s3dict['glwu'] = f'glwu/{val}' + self.s3dict['glwu']=f'glwu/{val}' elif (key == 'gsi_ver'): - self.s3dict['gsi'] = f'gsi/{val}' + self.s3dict['gsi']=f'gsi/{val}' elif (key == 'lut_ver'): - self.s3dict['lut'] = f'lut/{val}' + self.s3dict['lut']=f'lut/{val}' elif (key == 'mom6_ver'): - self.s3dict['mom6post'] = f'mom6/{val}/post' + self.s3dict['mom6post']=f'mom6/{val}/post' elif (key == 'reg2grb2_ver'): - self.s3dict['reg2grb2'] = f'reg2grb2/{val}' + self.s3dict['reg2grb2']=f'reg2grb2/{val}' elif (key == 'sfc_climb_ver' ): - self.s3dict['sfc_climo'] = f'sfc_climo/{val}' + self.s3dict['sfc_climo']=f'sfc_climo/{val}' elif (key == 'verif_ver'): - self.s3dict['verif'] = f'verif/{val}' + self.s3dict['verif']=f'verif/{val}' elif (key == 'wave_ver'): - self.s3dict['wave'] = f'wave/{val}' + self.s3dict['wave']=f'wave/{val}' # ---------------------------------------------------------------------------------------------------------------- def add_grid_data(self): for key in self.fix_ver_dict.keys(): - val = self.fix_ver_dict[key] + val=self.fix_ver_dict[key] if (key == 'orog_ver'): self.add_atmgrid2s3dict('orog', key, val) elif (key == 'ugwd_ver'): @@ -102,23 +103,23 @@ def add_grid_data(self): def add_atmgrid2s3dict(self, varname, key, val): for atmgrid in self.atmgridarray: - newkey = f'{key}_{atmgrid}' - self.s3dict[newkey] = f'{varname}/{val}/{atmgrid}' + newkey=f'{key}_{atmgrid}' + self.s3dict[newkey]=f'{varname}/{val}/{atmgrid}' # ---------------------------------------------------------------------------------------------------------------- def add_ocngrid2s3dict(self, varname, key, val): for ocngrid in self.ocngridarray: - newkey = f'{key}_{atmgrid}' - self.s3dict[newkey] = f'{varname}/{val}/{ocngrid}' + newkey=f'{key}_{atmgrid}' + self.s3dict[newkey]=f'{varname}/{val}/{ocngrid}' # ---------------------------------------------------------------------------------------------------------------- def add_cpl2s3dict(self, varname, key, val): for atmgrid in self.atmgridarray: for ocngrid in self.ocngridarray: - newkey = f'{key}_a{atmgrid}o{ocngrid}' - self.s3dict[newkey] = f'{varname}/{val}/a{atmgrid}o{ocngrid}' + newkey=f'{key}_a{atmgrid}o{ocngrid}' + self.s3dict[newkey]=f'{varname}/{val}/a{atmgrid}o{ocngrid}' # ---------------------------------------------------------------------------------------------------------------- def printinfo(self): @@ -129,7 +130,7 @@ def printinfo(self): print(f'From: {self.aws_fix_bucket}') print(f'To: {self.targetdir}') for key in self.s3dict.keys(): - val = self.s3dict[key] + val=self.s3dict[key] print(f'{key}: {val}') # ---------------------------------------------------------------------------------------------------------------- @@ -138,7 +139,7 @@ def fetchdata(self): if (self.verbose): print('Create local fix dir: {self.targetdir}') - path = Path(self.targetdir) + path=Path(self.targetdir) path.mkdir(parents=True, exist_ok=True) self.fetch_ugwp_limb_tau() @@ -149,47 +150,47 @@ def fetchdata(self): # ---------------------------------------------------------------------------------------------------------------- def fetch_dir(self, dir): - remotedir = f'{self.aws_fix_bucket}/{dir}' - localdir = f'{self.targetdir}/{dir}' - cmd = f'{self.aws_sync} {remotedir} {localdir}' + remotedir=f'{self.aws_fix_bucket}/{dir}' + localdir=f'{self.targetdir}/{dir}' + cmd=f'{self.aws_sync} {remotedir} {localdir}' self.download_dir(cmd, localdir) # ---------------------------------------------------------------------------------------------------------------- def download_dir(self, cmd, localdir): - # returned_value = os.system(cmd) # returns the exit code in unix + # returned_value=os.system(cmd) # returns the exit code in unix # print('returned value:', returned_value) if (os.path.isdir(localdir)): print(f'{localdir} already exist. skip' else: - parentdir, dirname = os.path.split(localdir) + parentdir, dirname=os.path.split(localdir) if (self.verbose): print(f'Create local {parentdir} dir:') - path = Path(parentdir) + path=Path(parentdir) path.mkdir(parents=True, exist_ok=True) if (self.verbose): print(cmd) print(f'Downloading {localdir}') - returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix + returned_value=subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): print('returned value:', returned_value) # ---------------------------------------------------------------------------------------------------------------- def fetch_ugwp_limb_tau(self): - ugwp_limb_tau_remotepath = f'{self.aws_fix_bucket}/ugwd/{self.fix_ver_dict['ugwd_ver']}/ugwp_limb_tau.nc' - ugwp_limb_tau_localdir = f'{self.targetdir}/ugwd/{self.fix_ver_dict['ugwd_ver']}' - filename = f'{ugwp_limb_tau_localdir}/ugwp_limb_tau.nc' - path = Path(ugwp_limb_tau_localdir) + ugwp_limb_tau_remotepath=f'{self.aws_fix_bucket}/ugwd/{self.fix_ver_dict['ugwd_ver']}/ugwp_limb_tau.nc' + ugwp_limb_tau_localdir=f'{self.targetdir}/ugwd/{self.fix_ver_dict['ugwd_ver']}' + filename=f'{ugwp_limb_tau_localdir}/ugwp_limb_tau.nc' + path=Path(ugwp_limb_tau_localdir) path.mkdir(parents=True, exist_ok=True) - cmd = f'{self.aws_cp} {ugwp_limb_tau_remotepath} {filename}' + cmd=f'{self.aws_cp} {ugwp_limb_tau_remotepath} {filename}' self.download_file(cmd, filename) # ---------------------------------------------------------------------------------------------------------------- def download_file(self, cmd, filename): - # returned_value = os.system(cmd) # returns the exit code in unix + # returned_value=os.system(cmd) # returns the exit code in unix # print('returned value:', returned_value) if (os.path.isfile(filename)): @@ -198,29 +199,29 @@ def download_file(self, cmd, filename): if (self.verbose): print(cmd) print(f'Downloading {filename}') - returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix + returned_value= subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): print('returned value:', returned_value) - # ---------------------------------------------------------------------------------------------------------------- + # -------------------------------------------------------------------------------------------------------------- def set_fix_ver_from_gwhome(self, gwhome, verdict): - fix_ver_file = f'{gwhome}/versions/fix.ver' - self.fix_ver_dict = verdict + fix_ver_file=f'{gwhome}/versions/fix.ver' + self.fix_ver_dict=verdict if (os.path.isfile(fix_ver_file)): with open(fix_ver_file, "r") as file: for line in file.readlines(): if (line.find('export ') >= 0): - headstr, _, value = line.strip().partition('=') - exphead, _, key = headstr.partition(' ') - self.fix_ver_dict[key] = value + headstr, _, value=line.strip().partition('=') + exphead, _, key=headstr.partition(' ') + self.fix_ver_dict[key]=value else: print(f'fix_ver_file: {ix_ver_file}s does not exist.') - # ---------------------------------------------------------------------------------------------------------------- + # -------------------------------------------------------------------------------------------------------------- def set_default_fix_ver(self, verdict): - self.fix_ver_dict = verdict + self.fix_ver_dict=verdict # ---------------------------------------------------------------------------------------------------------------- def print_usage(verdict): @@ -241,78 +242,78 @@ def print_usage(verdict): if __name__ == '__main__': - atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] - ocngridlist = ['500', '100', '050', '025'] + atmgridlist=['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] + ocngridlist=['500', '100', '050', '025'] - verbose = 0 - atmgrid = 'C48' - ocngrid = '500' - localdir = '/contrib/global-workflow-shared-data' + verbose=0 + atmgrid='C48' + ocngrid='500' + localdir='/contrib/global-workflow-shared-data' # default fix-version - verdict = {} - verdict['aer_ver'] = '20220805' - verdict['am_ver'] = '20220805' - verdict['chem_ver'] = '20220805' - verdict['cice_ver'] = '20240416' - verdict['cpl_ver'] = '20230526' - verdict['datm_ver'] = '20220805' - verdict['glwu_ver'] = '20220805' - verdict['gsi_ver'] = '20240208' - verdict['lut_ver'] = '20220805' - verdict['mom6_ver'] = '20240416' - verdict['orog_ver'] = '20231027' - verdict['reg2grb2_ver'] = '20220805' - verdict['sfc_climo_ver'] = '20220805' - verdict['ugwd_ver'] = '20240624' - verdict['verif_ver'] = '20220805' - verdict['wave_ver'] = '20240105' - - gwhome = None - - opts, args = getopt.getopt(sys.argv[1:], '', ['help', 'atmgrid=', 'ocngrid=', - 'verbose=', 'localdir=', - 'gwhome=', - 'aer_ver=', - 'am_ver=', - 'chem_ver=', - 'cice_ver=', - 'cpl_ver=', - 'datm_ver=', - 'glwu_ver=', - 'gsi_ver=', - 'lut_ver=', - 'mom6_ver=', - 'orog_ver=', - 'reg2grb2_ver=', - 'sfc_climo_ver=', - 'ugwd_ver=', - 'verif_ver=', - 'wave_ver=']) + verdict={} + verdict['aer_ver']='20220805' + verdict['am_ver']='20220805' + verdict['chem_ver']='20220805' + verdict['cice_ver']='20240416' + verdict['cpl_ver']='20230526' + verdict['datm_ver']='20220805' + verdict['glwu_ver']='20220805' + verdict['gsi_ver']='20240208' + verdict['lut_ver']='20220805' + verdict['mom6_ver']='20240416' + verdict['orog_ver']='20231027' + verdict['reg2grb2_ver']='20220805' + verdict['sfc_climo_ver']='20220805' + verdict['ugwd_ver']='20240624' + verdict['verif_ver']='20220805' + verdict['wave_ver']='20240105' + + gwhome=None + + opts, args=getopt.getopt(sys.argv[1:], '', ['help', 'atmgrid=', 'ocngrid=', + 'verbose=', 'localdir=', + 'gwhome=', + 'aer_ver=', + 'am_ver=', + 'chem_ver=', + 'cice_ver=', + 'cpl_ver=', + 'datm_ver=', + 'glwu_ver=', + 'gsi_ver=', + 'lut_ver=', + 'mom6_ver=', + 'orog_ver=', + 'reg2grb2_ver=', + 'sfc_climo_ver=', + 'ugwd_ver=', + 'verif_ver=', + 'wave_ver=']) for o, a in opts: # print(f'o: {o}, a: {a}') if o in ['--help']: print_usage(verdict) sys.exit(0) elif o in ['--verbose']: - verbose = int(a) + verbose=int(a) elif o in ['--atmgrid']: - atmgrid = a + atmgrid=a elif o in ['--ocngrid']: - ocngrid = a + ocngrid=a elif o in ['--localdir']: - localdir = a + localdir=a elif o in ['--gwhome']: - gwhome = a + gwhome=a else: - _, vername = o.split('--') + _, vername=o.split('--') print(f'vername: <{vername}>') - verdict[vername] = a + verdict[vername]=a if (atmgrid.find(',') > 0): - atmgridarray = atmgrid.split(',') + atmgridarray=atmgrid.split(',') else: - atmgridarray = [atmgrid] + atmgridarray=[atmgrid] for grid in atmgridarray: if (grid not in atmgridlist): @@ -322,9 +323,9 @@ def print_usage(verdict): sys.exit(-1) if (ocngrid.find(',') > 0): - ocngridarray = ocngrid.split(',') + ocngridarray=ocngrid.split(',') else: - ocngridarray = [ocngrid] + ocngridarray=[ocngrid] for grid in ocngridarray: if (grid not in ocngridlist): @@ -334,9 +335,9 @@ def print_usage(verdict): sys.exit(-1) # ------------------------------------------------------------------ - ffd = FetchFIXdata(atmgridarray=atmgridarray, - ocngridarray=ocngridarray, - localdir=localdir, verbose=verbose) + ffd=FetchFIXdata(atmgridarray=atmgridarray, + ocngridarray=ocngridarray, + localdir=localdir, verbose=verbose) if (gwhome is None): ffd.set_default_fix_ver(verdict) @@ -346,3 +347,4 @@ def print_usage(verdict): ffd.update_s3dict() ffd.fetchdata() + From 27ca2f17f70b35280a96ffb919d6c819f609eaf4 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 26 Feb 2025 23:10:55 +0000 Subject: [PATCH 008/134] fix pynorm error 6 --- ush/fetch-fix-data.py | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index 6d6f8ac4fe6..34df3b19ba5 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -10,13 +10,13 @@ import subprocess from pathlib import Path -# ---------------------------------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------ class FetchFIXdata(): def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, verbose=0): - self.aws_fix_bucket='s3://noaa-nws-global-pds/fix' + self.aws_fix_bucket= 's3://noaa-nws-global-pds/fix' self.aws_cp='aws --no-sign-request s3 cp' self.aws_sync='aws --no-sign-request s3 sync' @@ -40,7 +40,7 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, ve else: self.targetdir=self.localdir - # ---------------------------------------------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def update_s3dict(self): self.update_s3dick_grid_independent() @@ -49,14 +49,14 @@ def update_s3dict(self): if (self.verbose): self.printinfo() - # ---------------------------------------------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def update_s3dick_grid_independent(self): for key in self.fix_ver_dict.keys(): val=self.fix_ver_dict[key] if (key == 'aer_ver'): self.s3dict['aer']=f'aer/{val}' - elif ( key == 'am_ver'): + elif (key == 'am_ver'): self.s3dict['am']=f'am/{val}' elif (key == 'chem_ver'): self.s3dict['fimdata_chem']=f'chem/{val}/fimdata_chem' @@ -76,14 +76,14 @@ def update_s3dick_grid_independent(self): self.s3dict['mom6post']=f'mom6/{val}/post' elif (key == 'reg2grb2_ver'): self.s3dict['reg2grb2']=f'reg2grb2/{val}' - elif (key == 'sfc_climb_ver' ): + elif (key == 'sfc_climb_ver'): self.s3dict['sfc_climo']=f'sfc_climo/{val}' elif (key == 'verif_ver'): self.s3dict['verif']=f'verif/{val}' elif (key == 'wave_ver'): self.s3dict['wave']=f'wave/{val}' - # ---------------------------------------------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def add_grid_data(self): for key in self.fix_ver_dict.keys(): @@ -99,21 +99,21 @@ def add_grid_data(self): elif (key == 'cpl_ver'): self.add_cpl2s3dict('cpl', key, val) - # ---------------------------------------------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def add_atmgrid2s3dict(self, varname, key, val): for atmgrid in self.atmgridarray: newkey=f'{key}_{atmgrid}' self.s3dict[newkey]=f'{varname}/{val}/{atmgrid}' - # ---------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def add_ocngrid2s3dict(self, varname, key, val): for ocngrid in self.ocngridarray: newkey=f'{key}_{atmgrid}' self.s3dict[newkey]=f'{varname}/{val}/{ocngrid}' - # ---------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def add_cpl2s3dict(self, varname, key, val): for atmgrid in self.atmgridarray: @@ -121,7 +121,7 @@ def add_cpl2s3dict(self, varname, key, val): newkey=f'{key}_a{atmgrid}o{ocngrid}' self.s3dict[newkey]=f'{varname}/{val}/a{atmgrid}o{ocngrid}' - # ---------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def printinfo(self): print(f'Preparing to fetch') @@ -133,7 +133,7 @@ def printinfo(self): val=self.s3dict[key] print(f'{key}: {val}') - # ---------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def fetchdata(self): if (self.verbose): @@ -147,7 +147,7 @@ def fetchdata(self): for key in self.s3dict.keys(): self.fetch_dir(self.s3dict[key]) - # ---------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def fetch_dir(self, dir): remotedir=f'{self.aws_fix_bucket}/{dir}' @@ -155,7 +155,7 @@ def fetch_dir(self, dir): cmd=f'{self.aws_sync} {remotedir} {localdir}' self.download_dir(cmd, localdir) - # ---------------------------------------------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def download_dir(self, cmd, localdir): # returned_value=os.system(cmd) # returns the exit code in unix @@ -176,7 +176,7 @@ def download_dir(self, cmd, localdir): if (self.verbose): print('returned value:', returned_value) - # ---------------------------------------------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def fetch_ugwp_limb_tau(self): ugwp_limb_tau_remotepath=f'{self.aws_fix_bucket}/ugwd/{self.fix_ver_dict['ugwd_ver']}/ugwp_limb_tau.nc' @@ -187,7 +187,7 @@ def fetch_ugwp_limb_tau(self): cmd=f'{self.aws_cp} {ugwp_limb_tau_remotepath} {filename}' self.download_file(cmd, filename) - # ---------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def download_file(self, cmd, filename): # returned_value=os.system(cmd) # returns the exit code in unix @@ -199,11 +199,11 @@ def download_file(self, cmd, filename): if (self.verbose): print(cmd) print(f'Downloading {filename}') - returned_value= subprocess.call(cmd, shell=True) # returns the exit code in unix + returned_value=subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): print('returned value:', returned_value) - # -------------------------------------------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def set_fix_ver_from_gwhome(self, gwhome, verdict): fix_ver_file=f'{gwhome}/versions/fix.ver' @@ -218,12 +218,12 @@ def set_fix_ver_from_gwhome(self, gwhome, verdict): else: print(f'fix_ver_file: {ix_ver_file}s does not exist.') - # -------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------ def set_default_fix_ver(self, verdict): self.fix_ver_dict=verdict -# ---------------------------------------------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- def print_usage(verdict): @@ -238,7 +238,7 @@ def print_usage(verdict): for key in verdict.keys(): print(f'\t--{key}=yyyymmdd default: {verdict[key]}') -# ---------------------------------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------ if __name__ == '__main__': @@ -347,4 +347,3 @@ def print_usage(verdict): ffd.update_s3dict() ffd.fetchdata() - From f39bfd9f4290a08a7b69955e7f61ff2681f3f39e Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 27 Feb 2025 00:18:28 +0000 Subject: [PATCH 009/134] fix pynorm error 7 --- ush/fetch-fix-data.py | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index 34df3b19ba5..7bd90026aa1 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -13,17 +13,16 @@ # ------------------------------------------------------------------------------ class FetchFIXdata(): - def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, verbose=0): - self.aws_fix_bucket= 's3://noaa-nws-global-pds/fix' - self.aws_cp='aws --no-sign-request s3 cp' - self.aws_sync='aws --no-sign-request s3 sync' + self.aws_fix_bucket= f's3://noaa-nws-global-pds/fix' + self.aws_cp = f'aws --no-sign-request s3 cp' + self.aws_sync = f'aws --no-sign-request s3 sync' - self.atmgridarray=atmgridarray - self.ocngridarray=ocngridarray - self.localdir=localdir - self.verbose=verbose + self.atmgridarray = atmgridarray + self.ocngridarray = ocngridarray + self.localdir = localdir + self.verbose = verbose # if (os.path.isdir(localdir)): # print('Prepare to download FIX data for %s and %s to %s' %(atmgrid, ocngrid, localdir)) @@ -31,14 +30,14 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, ve # print(f'local dir: <{localdir}> does not exist. Stop') # sys.exit(-1) - self.verdict={} - self.s3dict={} - self.s3dict['raworog']='raw/orog' + self.verdict = {} + self.s3dict = {} + self.s3dict['raworog'] = f'raw/orog' if (self.localdir.find('fix') < 0): - self.targetdir=f'{self.localdir}/fix.subset' + self.targetdir = f'{self.localdir}/fix.subset' else: - self.targetdir=self.localdir + self.targetdir = self.localdir # -------------------------------------------------------------------------- def update_s3dict(self): @@ -118,8 +117,8 @@ def add_cpl2s3dict(self, varname, key, val): for atmgrid in self.atmgridarray: for ocngrid in self.ocngridarray: - newkey=f'{key}_a{atmgrid}o{ocngrid}' - self.s3dict[newkey]=f'{varname}/{val}/a{atmgrid}o{ocngrid}' + newkey = f'{key}_a{atmgrid}o{ocngrid}' + self.s3dict[newkey] = f'{varname}/{val}/a{atmgrid}o{ocngrid}' # ------------------------------------------------------------------------- def printinfo(self): @@ -137,13 +136,13 @@ def printinfo(self): def fetchdata(self): if (self.verbose): - print('Create local fix dir: {self.targetdir}') + print(f'Create local fix dir: {self.targetdir}') path=Path(self.targetdir) path.mkdir(parents=True, exist_ok=True) self.fetch_ugwp_limb_tau() - + for key in self.s3dict.keys(): self.fetch_dir(self.s3dict[key]) @@ -162,7 +161,7 @@ def download_dir(self, cmd, localdir): # print('returned value:', returned_value) if (os.path.isdir(localdir)): - print(f'{localdir} already exist. skip' + print(f'{localdir} already exist. skip') else: parentdir, dirname=os.path.split(localdir) if (self.verbose): @@ -190,8 +189,8 @@ def fetch_ugwp_limb_tau(self): # ------------------------------------------------------------------------- def download_file(self, cmd, filename): - # returned_value=os.system(cmd) # returns the exit code in unix - # print('returned value:', returned_value) + # returned_value=os.system(cmd) # returns the exit code in unix + # print('returned value:', returned_value) if (os.path.isfile(filename)): print(f'{filename} already exist. skip') @@ -226,7 +225,6 @@ def set_default_fix_ver(self, verdict): # ----------------------------------------------------------------------------- def print_usage(verdict): - print('Usage: python fetch-fix-data.py \\') print(' --atmgrid=AtmospericGrid (for multiple grids, separate with ",") \\') print(' --ocngrid=OceanGrid (for multiple grids, separate with ",") \\') @@ -241,7 +239,6 @@ def print_usage(verdict): # ------------------------------------------------------------------------------ if __name__ == '__main__': - atmgridlist=['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] ocngridlist=['500', '100', '050', '025'] @@ -345,5 +342,4 @@ def print_usage(verdict): ffd.set_fix_ver_from_gwhome(gwhome, verdict) ffd.update_s3dict() - ffd.fetchdata() From 8f7fc23bfc7b2cec2f4b0cab7aa87d9b1417a2d7 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 27 Feb 2025 00:26:42 +0000 Subject: [PATCH 010/134] fix pynorm error 8 --- ush/fetch-fix-data.py | 211 +++++++++++++++++++++--------------------- 1 file changed, 107 insertions(+), 104 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index 7bd90026aa1..97a649443bb 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -13,9 +13,10 @@ # ------------------------------------------------------------------------------ class FetchFIXdata(): + def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, verbose=0): - self.aws_fix_bucket= f's3://noaa-nws-global-pds/fix' + self.aws_fix_bucket = f's3://noaa-nws-global-pds/fix' self.aws_cp = f'aws --no-sign-request s3 cp' self.aws_sync = f'aws --no-sign-request s3 sync' @@ -52,41 +53,41 @@ def update_s3dict(self): def update_s3dick_grid_independent(self): for key in self.fix_ver_dict.keys(): - val=self.fix_ver_dict[key] + val = self.fix_ver_dict[key] if (key == 'aer_ver'): - self.s3dict['aer']=f'aer/{val}' + self.s3dict['aer'] = f'aer/{val}' elif (key == 'am_ver'): - self.s3dict['am']=f'am/{val}' + self.s3dict['am'] = f'am/{val}' elif (key == 'chem_ver'): - self.s3dict['fimdata_chem']=f'chem/{val}/fimdata_chem' - self.s3dict['Emission_data']=f'chem/{val}/Emission_data' + self.s3dict['fimdata_chem'] = f'chem/{val}/fimdata_chem' + self.s3dict['Emission_data'] = f'chem/{val}/Emission_data' elif (key == 'datm_ver'): - self.s3dict['cfsr']=f'datm/{val}/cfsr' - self.s3dict['gefs']=f'datm/{val}/gefs' - self.s3dict['gfs']=f'datm/{val}/gfs' - self.s3dict['mom6']=f'datm/{val}/mom6' + self.s3dict['cfsr'] = f'datm/{val}/cfsr' + self.s3dict['gefs'] = f'datm/{val}/gefs' + self.s3dict['gfs'] = f'datm/{val}/gfs' + self.s3dict['mom6'] = f'datm/{val}/mom6' elif (key == 'glwu_ver'): - self.s3dict['glwu']=f'glwu/{val}' + self.s3dict['glwu'] = f'glwu/{val}' elif (key == 'gsi_ver'): - self.s3dict['gsi']=f'gsi/{val}' + self.s3dict['gsi'] = f'gsi/{val}' elif (key == 'lut_ver'): - self.s3dict['lut']=f'lut/{val}' + self.s3dict['lut'] = f'lut/{val}' elif (key == 'mom6_ver'): - self.s3dict['mom6post']=f'mom6/{val}/post' + self.s3dict['mom6post'] = f'mom6/{val}/post' elif (key == 'reg2grb2_ver'): - self.s3dict['reg2grb2']=f'reg2grb2/{val}' + self.s3dict['reg2grb2'] = f'reg2grb2/{val}' elif (key == 'sfc_climb_ver'): - self.s3dict['sfc_climo']=f'sfc_climo/{val}' + self.s3dict['sfc_climo'] = f'sfc_climo/{val}' elif (key == 'verif_ver'): - self.s3dict['verif']=f'verif/{val}' + self.s3dict['verif'] = f'verif/{val}' elif (key == 'wave_ver'): - self.s3dict['wave']=f'wave/{val}' + self.s3dict['wave'] = f'wave/{val}' # -------------------------------------------------------------------------- def add_grid_data(self): for key in self.fix_ver_dict.keys(): - val=self.fix_ver_dict[key] + val = self.fix_ver_dict[key] if (key == 'orog_ver'): self.add_atmgrid2s3dict('orog', key, val) elif (key == 'ugwd_ver'): @@ -102,15 +103,15 @@ def add_grid_data(self): def add_atmgrid2s3dict(self, varname, key, val): for atmgrid in self.atmgridarray: - newkey=f'{key}_{atmgrid}' - self.s3dict[newkey]=f'{varname}/{val}/{atmgrid}' + newkey = f'{key}_{atmgrid}' + self.s3dict[newkey] = f'{varname}/{val}/{atmgrid}' # ------------------------------------------------------------------------- def add_ocngrid2s3dict(self, varname, key, val): for ocngrid in self.ocngridarray: - newkey=f'{key}_{atmgrid}' - self.s3dict[newkey]=f'{varname}/{val}/{ocngrid}' + newkey = f'{key}_{atmgrid}' + self.s3dict[newkey] = f'{varname}/{val}/{ocngrid}' # ------------------------------------------------------------------------- def add_cpl2s3dict(self, varname, key, val): @@ -129,7 +130,7 @@ def printinfo(self): print(f'From: {self.aws_fix_bucket}') print(f'To: {self.targetdir}') for key in self.s3dict.keys(): - val=self.s3dict[key] + val = self.s3dict[key] print(f'{key}: {val}') # ------------------------------------------------------------------------- @@ -138,7 +139,7 @@ def fetchdata(self): if (self.verbose): print(f'Create local fix dir: {self.targetdir}') - path=Path(self.targetdir) + path = Path(self.targetdir) path.mkdir(parents=True, exist_ok=True) self.fetch_ugwp_limb_tau() @@ -149,47 +150,47 @@ def fetchdata(self): # ------------------------------------------------------------------------- def fetch_dir(self, dir): - remotedir=f'{self.aws_fix_bucket}/{dir}' - localdir=f'{self.targetdir}/{dir}' - cmd=f'{self.aws_sync} {remotedir} {localdir}' + remotedir = f'{self.aws_fix_bucket}/{dir}' + localdir = f'{self.targetdir}/{dir}' + cmd = f'{self.aws_sync} {remotedir} {localdir}' self.download_dir(cmd, localdir) # -------------------------------------------------------------------------- def download_dir(self, cmd, localdir): - # returned_value=os.system(cmd) # returns the exit code in unix + # returned_value = os.system(cmd) # returns the exit code in unix # print('returned value:', returned_value) if (os.path.isdir(localdir)): print(f'{localdir} already exist. skip') else: - parentdir, dirname=os.path.split(localdir) + parentdir, dirname = os.path.split(localdir) if (self.verbose): print(f'Create local {parentdir} dir:') - path=Path(parentdir) + path = Path(parentdir) path.mkdir(parents=True, exist_ok=True) if (self.verbose): print(cmd) print(f'Downloading {localdir}') - returned_value=subprocess.call(cmd, shell=True) # returns the exit code in unix + returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): print('returned value:', returned_value) # -------------------------------------------------------------------------- def fetch_ugwp_limb_tau(self): - ugwp_limb_tau_remotepath=f'{self.aws_fix_bucket}/ugwd/{self.fix_ver_dict['ugwd_ver']}/ugwp_limb_tau.nc' - ugwp_limb_tau_localdir=f'{self.targetdir}/ugwd/{self.fix_ver_dict['ugwd_ver']}' - filename=f'{ugwp_limb_tau_localdir}/ugwp_limb_tau.nc' - path=Path(ugwp_limb_tau_localdir) + ugwp_limb_tau_remotepath = f'{self.aws_fix_bucket}/ugwd/{self.fix_ver_dict['ugwd_ver']}/ugwp_limb_tau.nc' + ugwp_limb_tau_localdir = f'{self.targetdir}/ugwd/{self.fix_ver_dict['ugwd_ver']}' + filename = f'{ugwp_limb_tau_localdir}/ugwp_limb_tau.nc' + path = Path(ugwp_limb_tau_localdir) path.mkdir(parents=True, exist_ok=True) - cmd=f'{self.aws_cp} {ugwp_limb_tau_remotepath} {filename}' + cmd = f'{self.aws_cp} {ugwp_limb_tau_remotepath} {filename}' self.download_file(cmd, filename) # ------------------------------------------------------------------------- def download_file(self, cmd, filename): - # returned_value=os.system(cmd) # returns the exit code in unix + # returned_value = os.system(cmd) # returns the exit code in unix # print('returned value:', returned_value) if (os.path.isfile(filename)): @@ -198,33 +199,34 @@ def download_file(self, cmd, filename): if (self.verbose): print(cmd) print(f'Downloading {filename}') - returned_value=subprocess.call(cmd, shell=True) # returns the exit code in unix + returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): print('returned value:', returned_value) # -------------------------------------------------------------------------- def set_fix_ver_from_gwhome(self, gwhome, verdict): - fix_ver_file=f'{gwhome}/versions/fix.ver' - self.fix_ver_dict=verdict + fix_ver_file = f'{gwhome}/versions/fix.ver' + self.fix_ver_dict = verdict if (os.path.isfile(fix_ver_file)): with open(fix_ver_file, "r") as file: for line in file.readlines(): if (line.find('export ') >= 0): - headstr, _, value=line.strip().partition('=') - exphead, _, key=headstr.partition(' ') - self.fix_ver_dict[key]=value + headstr, _, value = line.strip().partition('=') + exphead, _, key = headstr.partition(' ') + self.fix_ver_dict[key] = value else: print(f'fix_ver_file: {ix_ver_file}s does not exist.') # ------------------------------------------------------------------------ def set_default_fix_ver(self, verdict): - self.fix_ver_dict=verdict + self.fix_ver_dict = verdict # ----------------------------------------------------------------------------- def print_usage(verdict): + print('Usage: python fetch-fix-data.py \\') print(' --atmgrid=AtmospericGrid (for multiple grids, separate with ",") \\') print(' --ocngrid=OceanGrid (for multiple grids, separate with ",") \\') @@ -239,78 +241,79 @@ def print_usage(verdict): # ------------------------------------------------------------------------------ if __name__ == '__main__': - atmgridlist=['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] - ocngridlist=['500', '100', '050', '025'] - verbose=0 - atmgrid='C48' - ocngrid='500' - localdir='/contrib/global-workflow-shared-data' + atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] + ocngridlist = ['500', '100', '050', '025'] + + verbose = 0 + atmgrid = f'C48' + ocngrid = f'500' + localdir = f'/contrib/global-workflow-shared-data' # default fix-version - verdict={} - verdict['aer_ver']='20220805' - verdict['am_ver']='20220805' - verdict['chem_ver']='20220805' - verdict['cice_ver']='20240416' - verdict['cpl_ver']='20230526' - verdict['datm_ver']='20220805' - verdict['glwu_ver']='20220805' - verdict['gsi_ver']='20240208' - verdict['lut_ver']='20220805' - verdict['mom6_ver']='20240416' - verdict['orog_ver']='20231027' - verdict['reg2grb2_ver']='20220805' - verdict['sfc_climo_ver']='20220805' - verdict['ugwd_ver']='20240624' - verdict['verif_ver']='20220805' - verdict['wave_ver']='20240105' - - gwhome=None - - opts, args=getopt.getopt(sys.argv[1:], '', ['help', 'atmgrid=', 'ocngrid=', - 'verbose=', 'localdir=', - 'gwhome=', - 'aer_ver=', - 'am_ver=', - 'chem_ver=', - 'cice_ver=', - 'cpl_ver=', - 'datm_ver=', - 'glwu_ver=', - 'gsi_ver=', - 'lut_ver=', - 'mom6_ver=', - 'orog_ver=', - 'reg2grb2_ver=', - 'sfc_climo_ver=', - 'ugwd_ver=', - 'verif_ver=', - 'wave_ver=']) + verdict = {} + verdict['aer_ver'] = f'20220805' + verdict['am_ver'] = f'20220805' + verdict['chem_ver'] = f'20220805' + verdict['cice_ver'] = f'20240416' + verdict['cpl_ver'] = f'20230526' + verdict['datm_ver'] = f'20220805' + verdict['glwu_ver'] = f'20220805' + verdict['gsi_ver'] = f'20240208' + verdict['lut_ver'] = f'20220805' + verdict['mom6_ver'] = f'20240416' + verdict['orog_ver'] = f'20231027' + verdict['reg2grb2_ver'] = f'20220805' + verdict['sfc_climo_ver'] = f'20220805' + verdict['ugwd_ver'] = f'20240624' + verdict['verif_ver'] = f'20220805' + verdict['wave_ver'] = f'20240105' + + gwhome = None + + opts, args = getopt.getopt(sys.argv[1:], '', ['help', 'atmgrid=', 'ocngrid=', + 'verbose=', 'localdir=', + 'gwhome=', + 'aer_ver=', + 'am_ver=', + 'chem_ver=', + 'cice_ver=', + 'cpl_ver=', + 'datm_ver=', + 'glwu_ver=', + 'gsi_ver=', + 'lut_ver=', + 'mom6_ver=', + 'orog_ver=', + 'reg2grb2_ver=', + 'sfc_climo_ver=', + 'ugwd_ver=', + 'verif_ver=', + 'wave_ver=']) for o, a in opts: # print(f'o: {o}, a: {a}') if o in ['--help']: print_usage(verdict) sys.exit(0) elif o in ['--verbose']: - verbose=int(a) + verbose = int(a) elif o in ['--atmgrid']: - atmgrid=a + atmgrid = a elif o in ['--ocngrid']: - ocngrid=a + ocngrid = a elif o in ['--localdir']: - localdir=a + localdir = a elif o in ['--gwhome']: - gwhome=a + gwhome = a else: - _, vername=o.split('--') + _, vername = o.split('--') print(f'vername: <{vername}>') - verdict[vername]=a + verdict[vername] = a if (atmgrid.find(',') > 0): - atmgridarray=atmgrid.split(',') + atmgridarray = atmgrid.split(',') else: - atmgridarray=[atmgrid] + atmgridarray = [atmgrid] for grid in atmgridarray: if (grid not in atmgridlist): @@ -320,9 +323,9 @@ def print_usage(verdict): sys.exit(-1) if (ocngrid.find(',') > 0): - ocngridarray=ocngrid.split(',') + ocngridarray = ocngrid.split(',') else: - ocngridarray=[ocngrid] + ocngridarray = [ocngrid] for grid in ocngridarray: if (grid not in ocngridlist): @@ -332,9 +335,9 @@ def print_usage(verdict): sys.exit(-1) # ------------------------------------------------------------------ - ffd=FetchFIXdata(atmgridarray=atmgridarray, - ocngridarray=ocngridarray, - localdir=localdir, verbose=verbose) + ffd = FetchFIXdata(atmgridarray=atmgridarray, + ocngridarray=ocngridarray, + localdir=localdir, verbose=verbose) if (gwhome is None): ffd.set_default_fix_ver(verdict) From c248e128c38079e09c9a54fcf5e69aad1036ec3a Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 27 Feb 2025 00:30:10 +0000 Subject: [PATCH 011/134] fix pynorm error 9 --- ush/fetch-fix-data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index 97a649443bb..f75c80c9b06 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -11,8 +11,8 @@ from pathlib import Path # ------------------------------------------------------------------------------ -class FetchFIXdata(): +class FetchFIXdata(): def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, verbose=0): @@ -224,8 +224,8 @@ def set_default_fix_ver(self, verdict): self.fix_ver_dict = verdict # ----------------------------------------------------------------------------- -def print_usage(verdict): +def print_usage(verdict): print('Usage: python fetch-fix-data.py \\') print(' --atmgrid=AtmospericGrid (for multiple grids, separate with ",") \\') @@ -239,8 +239,8 @@ def print_usage(verdict): print(f'\t--{key}=yyyymmdd default: {verdict[key]}') # ------------------------------------------------------------------------------ -if __name__ == '__main__': +if __name__ == '__main__': atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] ocngridlist = ['500', '100', '050', '025'] From 85120f3398662440b804996c63695567247bbea9 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 27 Feb 2025 00:37:53 +0000 Subject: [PATCH 012/134] fix pynorm error 10 --- ush/fetch-fix-data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index f75c80c9b06..e920723455f 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -12,6 +12,7 @@ # ------------------------------------------------------------------------------ + class FetchFIXdata(): def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, verbose=0): @@ -225,6 +226,7 @@ def set_default_fix_ver(self, verdict): # ----------------------------------------------------------------------------- + def print_usage(verdict): print('Usage: python fetch-fix-data.py \\') @@ -240,6 +242,7 @@ def print_usage(verdict): # ------------------------------------------------------------------------------ + if __name__ == '__main__': atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] From 2a7f1c9c103807fe8eef56741536286812bf15fd Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 27 Feb 2025 21:17:36 +0000 Subject: [PATCH 013/134] fix a syntax error --- ci/Jenkinsfile4AWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index c23ace3a968..731215a5665 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -6,7 +6,7 @@ def CI_CASES = '' def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', noaacloud: 'awsepicglobalworkflow'] -def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', noaacloud: /lustre/jenkins/global-workflow/CI] +def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', noaacloud: '/lustre/jenkins/global-workflow/CI'] def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' def STATUS = 'Passed' From 9f8ee2dd690e24c5d565ff08fb64c09b886cf4b8 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 27 Feb 2025 21:21:02 +0000 Subject: [PATCH 014/134] use just /lutre/jenkins for CI testing work directory --- ci/Jenkinsfile4AWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 731215a5665..5efc18bf89f 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -6,7 +6,7 @@ def CI_CASES = '' def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', noaacloud: 'awsepicglobalworkflow'] -def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', noaacloud: '/lustre/jenkins/global-workflow/CI'] +def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', noaacloud: '/lustre/jenkins'] def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' def STATUS = 'Passed' From df4179cf441278ed14ef20061c5a5a0e2edac6a7 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 3 Mar 2025 16:01:03 +0000 Subject: [PATCH 015/134] using argparse and logging instead of getopt and print --- ush/fetch-fix-data.py | 209 +++++++++++++++++++----------------------- 1 file changed, 96 insertions(+), 113 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index e920723455f..9d35e158774 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -7,17 +7,32 @@ import time import sys import getopt +import argparse import subprocess from pathlib import Path +import logging + +# Create and configure logger +logging.basicConfig(filename="cfetch-fix-data.log", + format='%(asctime)s %(message)s', + filemode='w') + +# Creating an object +logger = logging.getLogger() + +# Setting the threshold of logger to DEBUG +logger.setLevel(logging.DEBUG) # ------------------------------------------------------------------------------ class FetchFIXdata(): - def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, verbose=0): + def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], + fix_bucket=None, localdir=None, verbose=0): - self.aws_fix_bucket = f's3://noaa-nws-global-pds/fix' + # self.aws_fix_bucket = f's3://noaa-nws-global-pds/fix' + self.aws_fix_bucket = fix_bucket self.aws_cp = f'aws --no-sign-request s3 cp' self.aws_sync = f'aws --no-sign-request s3 sync' @@ -26,11 +41,11 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], localdir=None, ve self.localdir = localdir self.verbose = verbose - # if (os.path.isdir(localdir)): - # print('Prepare to download FIX data for %s and %s to %s' %(atmgrid, ocngrid, localdir)) - # else: - # print(f'local dir: <{localdir}> does not exist. Stop') - # sys.exit(-1) + if (os.path.isdir(localdir)): + logger.info(f'Prepare to download FIX data for {atmgrid} and {ocngrid} to {localdir}') + else: + logger.info(f'local dir: <{localdir}> does not exist. Stop') + sys.exit(-1) self.verdict = {} self.s3dict = {} @@ -125,20 +140,20 @@ def add_cpl2s3dict(self, varname, key, val): # ------------------------------------------------------------------------- def printinfo(self): - print(f'Preparing to fetch') - print(f'ATM grid: {self.atmgridarray}') - print(f'ONC grid: {self.ocngridarray}') - print(f'From: {self.aws_fix_bucket}') - print(f'To: {self.targetdir}') + logger.info(f'Preparing to fetch') + logger.info(f'ATM grid: {self.atmgridarray}') + logger.info(f'ONC grid: {self.ocngridarray}') + logger.info(f'From: {self.aws_fix_bucket}') + logger.info(f'To: {self.targetdir}') for key in self.s3dict.keys(): val = self.s3dict[key] - print(f'{key}: {val}') + logger.info(f'{key}: {val}') # ------------------------------------------------------------------------- def fetchdata(self): if (self.verbose): - print(f'Create local fix dir: {self.targetdir}') + logger.info(f'Create local fix dir: {self.targetdir}') path = Path(self.targetdir) path.mkdir(parents=True, exist_ok=True) @@ -160,28 +175,29 @@ def fetch_dir(self, dir): def download_dir(self, cmd, localdir): # returned_value = os.system(cmd) # returns the exit code in unix - # print('returned value:', returned_value) + # logger.info('returned value:', returned_value) if (os.path.isdir(localdir)): - print(f'{localdir} already exist. skip') + logger.info(f'{localdir} already exist. skip') else: parentdir, dirname = os.path.split(localdir) if (self.verbose): - print(f'Create local {parentdir} dir:') + logger.info(f'Create local {parentdir} dir:') path = Path(parentdir) path.mkdir(parents=True, exist_ok=True) if (self.verbose): - print(cmd) - print(f'Downloading {localdir}') + logger.info(cmd) + logger.info(f'Downloading {localdir}') returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): - print('returned value:', returned_value) + logger.info('returned value:', returned_value) # -------------------------------------------------------------------------- def fetch_ugwp_limb_tau(self): - ugwp_limb_tau_remotepath = f'{self.aws_fix_bucket}/ugwd/{self.fix_ver_dict['ugwd_ver']}/ugwp_limb_tau.nc' - ugwp_limb_tau_localdir = f'{self.targetdir}/ugwd/{self.fix_ver_dict['ugwd_ver']}' + ugwd_ver = self.fix_ver_dict['ugwd_ver'] + ugwp_limb_tau_remotepath = f'{self.aws_fix_bucket}/ugwd/{ugwd_ver}/ugwp_limb_tau.nc' + ugwp_limb_tau_localdir = f'{self.targetdir}/ugwd/{ugwd_ver}' filename = f'{ugwp_limb_tau_localdir}/ugwp_limb_tau.nc' path = Path(ugwp_limb_tau_localdir) path.mkdir(parents=True, exist_ok=True) @@ -192,17 +208,17 @@ def fetch_ugwp_limb_tau(self): def download_file(self, cmd, filename): # returned_value = os.system(cmd) # returns the exit code in unix - # print('returned value:', returned_value) + # logger.info('returned value:', returned_value) if (os.path.isfile(filename)): - print(f'{filename} already exist. skip') + logger.info(f'{filename} already exist. skip') else: if (self.verbose): - print(cmd) - print(f'Downloading {filename}') + logger.info(cmd) + logger.info(f'Downloading {filename}') returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): - print('returned value:', returned_value) + logger.info('returned value:', returned_value) # -------------------------------------------------------------------------- def set_fix_ver_from_gwhome(self, gwhome, verdict): @@ -217,7 +233,7 @@ def set_fix_ver_from_gwhome(self, gwhome, verdict): exphead, _, key = headstr.partition(' ') self.fix_ver_dict[key] = value else: - print(f'fix_ver_file: {ix_ver_file}s does not exist.') + logger.info(f'fix_ver_file: {fix_ver_file}s does not exist.') # ------------------------------------------------------------------------ def set_default_fix_ver(self, verdict): @@ -227,18 +243,11 @@ def set_default_fix_ver(self, verdict): # ----------------------------------------------------------------------------- -def print_usage(verdict): - - print('Usage: python fetch-fix-data.py \\') - print(' --atmgrid=AtmospericGrid (for multiple grids, separate with ",") \\') - print(' --ocngrid=OceanGrid (for multiple grids, separate with ",") \\') - print(' --localdir=Your-local-fix-dir \\') - print(' [options]') - print('options are:') - print('\t--gwhome=xxxx (Global-Workflow directory)') - - for key in verdict.keys(): - print(f'\t--{key}=yyyymmdd default: {verdict[key]}') +def namespace_to_dict(namespace): + return { + k: namespace_to_dict(v) if isinstance(v, argparse.Namespace) else v + for k, v in vars(namespace).items() + } # ------------------------------------------------------------------------------ @@ -248,71 +257,43 @@ def print_usage(verdict): atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] ocngridlist = ['500', '100', '050', '025'] - verbose = 0 - atmgrid = f'C48' - ocngrid = f'500' - localdir = f'/contrib/global-workflow-shared-data' - - # default fix-version - verdict = {} - verdict['aer_ver'] = f'20220805' - verdict['am_ver'] = f'20220805' - verdict['chem_ver'] = f'20220805' - verdict['cice_ver'] = f'20240416' - verdict['cpl_ver'] = f'20230526' - verdict['datm_ver'] = f'20220805' - verdict['glwu_ver'] = f'20220805' - verdict['gsi_ver'] = f'20240208' - verdict['lut_ver'] = f'20220805' - verdict['mom6_ver'] = f'20240416' - verdict['orog_ver'] = f'20231027' - verdict['reg2grb2_ver'] = f'20220805' - verdict['sfc_climo_ver'] = f'20220805' - verdict['ugwd_ver'] = f'20240624' - verdict['verif_ver'] = f'20220805' - verdict['wave_ver'] = f'20240105' - - gwhome = None - - opts, args = getopt.getopt(sys.argv[1:], '', ['help', 'atmgrid=', 'ocngrid=', - 'verbose=', 'localdir=', - 'gwhome=', - 'aer_ver=', - 'am_ver=', - 'chem_ver=', - 'cice_ver=', - 'cpl_ver=', - 'datm_ver=', - 'glwu_ver=', - 'gsi_ver=', - 'lut_ver=', - 'mom6_ver=', - 'orog_ver=', - 'reg2grb2_ver=', - 'sfc_climo_ver=', - 'ugwd_ver=', - 'verif_ver=', - 'wave_ver=']) - for o, a in opts: - # print(f'o: {o}, a: {a}') - if o in ['--help']: - print_usage(verdict) - sys.exit(0) - elif o in ['--verbose']: - verbose = int(a) - elif o in ['--atmgrid']: - atmgrid = a - elif o in ['--ocngrid']: - ocngrid = a - elif o in ['--localdir']: - localdir = a - elif o in ['--gwhome']: - gwhome = a - else: - _, vername = o.split('--') - print(f'vername: <{vername}>') - verdict[vername] = a + parser = argparse.ArgumentParser() + parser.add_argument("-v", "--verbose", action="store_true", + help="increase output verbosity") + parser.add_argument("-a", "--atmgrid", type=str, required=True, + help="ATM grid, like: C48, C96, C192, C384, C768, C1152") + parser.add_argument("-o", "--ocngrid", type=str, required=True, + help="OCN grid, like: 500, 100, 050, 025") + parser.add_argument("--localdir", type=str, required=True, + help="local directory to store FIX data subset") + parser.add_argument("--gwhome", type=str, default='unknown', + help="GW home diretory where can find fix.ver") + parser.add_argument("--fix_bucket", type=str, default='s3://noaa-nws-global-pds/fix', + help="S3 Bucket directory of FIX data") + parser.add_argument("--aer_ver", type=str, default='20220805', help="AER version") + parser.add_argument("--am_ver", type=str, default='20220805', help="AM version") + parser.add_argument("--chem_ver", type=str, default='20220805', help="chem version") + parser.add_argument("--cice_ver", type=str, default='20240416', help="cice version") + parser.add_argument("--cpl_ver", type=str, default='20230526', help="cpl version") + parser.add_argument("--datm_ver", type=str, default='20220805', help="datm version") + parser.add_argument("--glwu_ver", type=str, default='20220805', help="glwu version") + parser.add_argument("--gsi_ver", type=str, default='20240208', help="gsi version") + parser.add_argument("--lut_ver", type=str, default='20220805', help="lut version") + parser.add_argument("--mom6_ver", type=str, default='20240416', help="mom6 version") + parser.add_argument("--orog_ver", type=str, default='20231027', help="orog version") + parser.add_argument("--reg2grb2_ver", type=str, default='20220805', help="reg2grb2 version") + parser.add_argument("--sfc_climo_ver", type=str, default='20220805', help="sfc_climo version") + parser.add_argument("--ugwd_ver", type=str, default='20220805', help="ugwd version") + parser.add_argument("--verif_ver", type=str, default='20220805', help="verif version") + parser.add_argument("--wave_ver", type=str, default='20220805', help="wave version") + args = parser.parse_args() + + if args.verbose: + logger.info(f"the atmgrid is {args.atmgrid}") + else: + logger.info(f"the atmgrid is {args.atmgrid}") + atmgrid = args.atmgrid if (atmgrid.find(',') > 0): atmgridarray = atmgrid.split(',') else: @@ -320,11 +301,11 @@ def print_usage(verdict): for grid in atmgridarray: if (grid not in atmgridlist): - print('atmgrid: ', grid) - print('is not in supported grids: ', atmgridlist) - print_usage(verdict) + logger.info(f'atmgrid: {grid}') + logger.info(f'is not in supported grids: {atmgridlist}') sys.exit(-1) + ocngrid = args.ocngrid if (ocngrid.find(',') > 0): ocngridarray = ocngrid.split(',') else: @@ -332,20 +313,22 @@ def print_usage(verdict): for grid in ocngridarray: if (grid not in ocngridlist): - print('ocngrid: ', grid) - print('is not in supported grids: ', ocngridlist) - print_usage(verdict) + logger.info(f'ocngrid: {grid}') + logger.info(f'is not in supported grids: {ocngridlist}') sys.exit(-1) + verdict = namespace_to_dict(args) + # ------------------------------------------------------------------ ffd = FetchFIXdata(atmgridarray=atmgridarray, ocngridarray=ocngridarray, - localdir=localdir, verbose=verbose) + fix_bucket=args.fix_bucket, + localdir=args.localdir, verbose=args.verbose) - if (gwhome is None): + if (args.gwhome is None): ffd.set_default_fix_ver(verdict) else: - ffd.set_fix_ver_from_gwhome(gwhome, verdict) + ffd.set_fix_ver_from_gwhome(args.gwhome, verdict) ffd.update_s3dict() ffd.fetchdata() From 6a168ae2de2a6c566b88dcf67000b8747e1d8b88 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 3 Mar 2025 16:08:10 +0000 Subject: [PATCH 016/134] using argparse and logging instead of getopt and print --- ush/fetch-fix-data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/fetch-fix-data.py b/ush/fetch-fix-data.py index 9d35e158774..aa71f7e4f96 100644 --- a/ush/fetch-fix-data.py +++ b/ush/fetch-fix-data.py @@ -42,10 +42,10 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], self.verbose = verbose if (os.path.isdir(localdir)): - logger.info(f'Prepare to download FIX data for {atmgrid} and {ocngrid} to {localdir}') + logger.info(f'Prepare to download FIX data for {atmgrid} and {ocngrid} to {localdir}') else: - logger.info(f'local dir: <{localdir}> does not exist. Stop') - sys.exit(-1) + logger.info(f'local dir: <{localdir}> does not exist. Stop') + sys.exit(-1) self.verdict = {} self.s3dict = {} From a23662f56c9e4e825c2d85347a5577e719d9ef10 Mon Sep 17 00:00:00 2001 From: Kris Booker Date: Mon, 3 Mar 2025 09:50:50 -0700 Subject: [PATCH 017/134] Updating with proper nomenclature. --- ci/Jenkinsfile4AWS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 5efc18bf89f..2e5be62544b 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -5,9 +5,9 @@ def HOMEgfs = 'none' def CI_CASES = '' def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. -def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', noaacloud: 'awsepicglobalworkflow'] -def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', noaacloud: '/lustre/jenkins'] -def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' +def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', Awsepicglobalworkflow: 'awsepicglobalworkflow'] +def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', Awsepicglobalworkflow: '/lustre/jenkins'] +def repo_url = 'git@github.com:NOAA-EPIC/global-workflow-cloud.git' def STATUS = 'Passed' pipeline { From 6b60421e3690fae483df646160fa5a774e031891 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 3 Mar 2025 18:27:59 +0000 Subject: [PATCH 018/134] consist with Terry's code --- ci/Jenkinsfile4AWS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 2e5be62544b..a162b578072 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -5,8 +5,8 @@ def HOMEgfs = 'none' def CI_CASES = '' def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. -def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', Awsepicglobalworkflow: 'awsepicglobalworkflow'] -def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', Awsepicglobalworkflow: '/lustre/jenkins'] +def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', awsepicglobalworkflow: 'Awsepicglobalworkflow'] +def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', awsepicglobalworkflow: '/lustre/jenkins'] def repo_url = 'git@github.com:NOAA-EPIC/global-workflow-cloud.git' def STATUS = 'Passed' @@ -79,7 +79,7 @@ pipeline { Machine = machine[0].toUpperCase() + machine.substring(1) echo "Getting Common Workspace for ${Machine}" ws("${custom_workspace[machine]}/${env.CHANGE_ID}") { - properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'Gaea', 'awsepicglobalworkflow'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) + properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'Gaea', 'Awsepicglobalworkflow'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() CUSTOM_WORKSPACE = "${WORKSPACE}" HOMEgfs = "${CUSTOM_WORKSPACE}/global-workflow" From 35f35f2496983bbd817c779907b0a3beff8c315c Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 3 Mar 2025 21:40:43 +0000 Subject: [PATCH 019/134] add a ls command to make sure code in cloned --- ci/Jenkinsfile4AWS | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index a162b578072..449bffc20b4 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -120,6 +120,7 @@ pipeline { def error_logs_message = "" dir("${HOMEgfs}/sorc") { try { + sh(script: 'ls *.sh') // list files here to make sure all files exist. sh(script: './build_compute.sh all') // build the global-workflow executables } catch (Exception error_build) { echo "Failed to build global-workflow: ${error_build.getMessage()}" From 9e53aa44cc4227cca767b759adb0129824766920 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 3 Mar 2025 23:10:41 +0000 Subject: [PATCH 020/134] try clone the code directly --- ci/Jenkinsfile4AWS | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 449bffc20b4..b1ca85ee876 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -100,12 +100,14 @@ pipeline { ws(HOMEgfs) { echo "Checking out the code on ${Machine} using scm in ${HOMEgfs}" try { - checkout scm + // checkout scm + sh(script: "git clone --recursive ${repo_url}") } catch (Exception e) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}, try again ..." sleep time: 45, unit: 'SECONDS' try { - checkout scm + // checkout scm + sh(script: "git clone --recursive ${repo_url}") } catch (Exception ee) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}: ${ee.getMessage()}" if (env.CHANGE_ID) { @@ -121,6 +123,7 @@ pipeline { dir("${HOMEgfs}/sorc") { try { sh(script: 'ls *.sh') // list files here to make sure all files exist. + sh(script: 'ls ./build_compute.sh') // list files here to make sure all files exist. sh(script: './build_compute.sh all') // build the global-workflow executables } catch (Exception error_build) { echo "Failed to build global-workflow: ${error_build.getMessage()}" From 98c50d9a46342a9fac3a68a830e4256559de745f Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 3 Mar 2025 23:57:51 +0000 Subject: [PATCH 021/134] use https to clone --- ci/Jenkinsfile4AWS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index b1ca85ee876..016857f62fb 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -7,7 +7,8 @@ def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', awsepicglobalworkflow: 'Awsepicglobalworkflow'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', awsepicglobalworkflow: '/lustre/jenkins'] -def repo_url = 'git@github.com:NOAA-EPIC/global-workflow-cloud.git' +// def repo_url = 'git@github.com:NOAA-EPIC/global-workflow-cloud.git' +def repo_url = 'https://github.com/NOAA-EPIC/global-workflow-cloud.git' def STATUS = 'Passed' pipeline { From 3089f00f23bb4bfa0f8c96b768c06255c746e9ff Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 4 Mar 2025 16:09:55 +0000 Subject: [PATCH 022/134] add more debug ls --- ci/Jenkinsfile4AWS | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 016857f62fb..a13a3e38b92 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -4,11 +4,14 @@ def CUSTOM_WORKSPACE = 'none' def HOMEgfs = 'none' def CI_CASES = '' def GH = 'none' +//Trivial change // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', awsepicglobalworkflow: 'Awsepicglobalworkflow'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', awsepicglobalworkflow: '/lustre/jenkins'] // def repo_url = 'git@github.com:NOAA-EPIC/global-workflow-cloud.git' def repo_url = 'https://github.com/NOAA-EPIC/global-workflow-cloud.git' +// def aws_gw_name = 'global-workflow" +def aws_gw_name = 'global-workflow-cloud" def STATUS = 'Passed' pipeline { @@ -45,7 +48,9 @@ pipeline { for (label in pullRequest.labels) { if (label.matches("CI-(.*?)-Ready")) { def machine_name = label.split('-')[1].toString().toLowerCase() + print machine_name jenkins.model.Jenkins.get().computers.each { c -> + print c.node.selfLabel.name if (c.node.selfLabel.name == NodeName[machine_name]) { run_nodes.add(c.node.selfLabel.name) } @@ -57,7 +62,7 @@ pipeline { run_nodes.init().each { node -> def machine_name = node.split('-')[0].toLowerCase() echo "Spawning job on node: ${node} with machine name: ${machine_name}" - build job: "/global-workflow/EMC-Global-Pipeline/PR-${env.CHANGE_ID}", parameters: [ + build job: "/${aws_gw_name}/EPIC-AWS-CI-Pipeline/PR-${env.CHANGE_ID}", parameters: [ string(name: 'machine', value: machine_name), string(name: 'Node', value: node) ], wait: false @@ -83,8 +88,8 @@ pipeline { properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'Gaea', 'Awsepicglobalworkflow'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() CUSTOM_WORKSPACE = "${WORKSPACE}" - HOMEgfs = "${CUSTOM_WORKSPACE}/global-workflow" - sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/global-workflow; mkdir -p ${CUSTOM_WORKSPACE}/global-workflow") + HOMEgfs = "${CUSTOM_WORKSPACE}/${aws_gw_name}" + sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/${aws_gw_name}; mkdir -p ${CUSTOM_WORKSPACE}/${aws_gw_name}") sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS; mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS") sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) } @@ -103,12 +108,14 @@ pipeline { try { // checkout scm sh(script: "git clone --recursive ${repo_url}") + sh(script: "ls ${HOMEgfs}") } catch (Exception e) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}, try again ..." sleep time: 45, unit: 'SECONDS' try { // checkout scm sh(script: "git clone --recursive ${repo_url}") + sh(script: "ls ${HOMEgfs}") } catch (Exception ee) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}: ${ee.getMessage()}" if (env.CHANGE_ID) { From 57b2a519dd168ab914795dd6bb48e62b9300e64b Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 4 Mar 2025 20:23:52 +0000 Subject: [PATCH 023/134] fix a typo --- ci/Jenkinsfile4AWS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index a13a3e38b92..6a0ad2e23c8 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -10,8 +10,8 @@ def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', awsepicglobalworkflow: '/lustre/jenkins'] // def repo_url = 'git@github.com:NOAA-EPIC/global-workflow-cloud.git' def repo_url = 'https://github.com/NOAA-EPIC/global-workflow-cloud.git' -// def aws_gw_name = 'global-workflow" -def aws_gw_name = 'global-workflow-cloud" +// def aws_gw_name = 'global-workflow' +def aws_gw_name = 'global-workflow-cloud' def STATUS = 'Passed' pipeline { From 8fefe2a2873e411730fd9baacf5b3094dd01e746 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 4 Mar 2025 20:49:55 +0000 Subject: [PATCH 024/134] remove 2 comments --- ci/Jenkinsfile4AWS | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 6a0ad2e23c8..7f65aed9d8b 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -8,9 +8,7 @@ def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', awsepicglobalworkflow: 'Awsepicglobalworkflow'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', awsepicglobalworkflow: '/lustre/jenkins'] -// def repo_url = 'git@github.com:NOAA-EPIC/global-workflow-cloud.git' def repo_url = 'https://github.com/NOAA-EPIC/global-workflow-cloud.git' -// def aws_gw_name = 'global-workflow' def aws_gw_name = 'global-workflow-cloud' def STATUS = 'Passed' From 8f6db95c5df3235ddc6fbb40fbca896ad46190b4 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 4 Mar 2025 21:35:37 +0000 Subject: [PATCH 025/134] reset HOMEgfs --- ci/Jenkinsfile4AWS | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 7f65aed9d8b..bd5554d1768 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -126,6 +126,7 @@ pipeline { def gist_url = "" def error_logs = "" def error_logs_message = "" + HOMEgfs = "${CUSTOM_WORKSPACE}/${aws_gw_name}/${aws_gw_name}" dir("${HOMEgfs}/sorc") { try { sh(script: 'ls *.sh') // list files here to make sure all files exist. From 1f05a51c0753e7ae3080d6e784fa3adde8f3c1eb Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 4 Mar 2025 22:03:21 +0000 Subject: [PATCH 026/134] compile for gfs only for now --- ci/Jenkinsfile4AWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index bd5554d1768..98efcde33c7 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -131,7 +131,7 @@ pipeline { try { sh(script: 'ls *.sh') // list files here to make sure all files exist. sh(script: 'ls ./build_compute.sh') // list files here to make sure all files exist. - sh(script: './build_compute.sh all') // build the global-workflow executables + sh(script: './build_compute.sh gfs') // build the global-workflow executables } catch (Exception error_build) { echo "Failed to build global-workflow: ${error_build.getMessage()}" if ( fileExists("logs/error.logs") ) { From 4640b6a5143b6717e8bc5b7a403cdc6643a549d0 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 5 Mar 2025 00:04:50 +0000 Subject: [PATCH 027/134] comment gh pr eidt for now --- ci/Jenkinsfile4AWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 98efcde33c7..4722068f157 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -89,7 +89,7 @@ pipeline { HOMEgfs = "${CUSTOM_WORKSPACE}/${aws_gw_name}" sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/${aws_gw_name}; mkdir -p ${CUSTOM_WORKSPACE}/${aws_gw_name}") sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS; mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS") - sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) + // sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) } echo "Building and running on ${Machine} in directory ${CUSTOM_WORKSPACE}" } From c906547c62a7020f9b7a00285b71359222d63781 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 5 Mar 2025 01:41:14 +0000 Subject: [PATCH 028/134] skip CI on AWS --- ci/cases/pr/C48_S2SW_extended.yaml | 1 + ci/cases/pr/C48mx500_3DVarAOWCDA.yaml | 1 + ci/cases/pr/C48mx500_hybAOWCDA.yaml | 1 + ci/cases/pr/C96C48_hybatmDA.yaml | 3 +++ ci/cases/pr/C96C48_hybatmaerosnowDA.yaml | 1 + ci/cases/pr/C96C48_ufs_hybatmDA.yaml | 1 + ci/cases/pr/C96_atm3DVar.yaml | 1 + ci/cases/pr/C96_atm3DVar_extended.yaml | 1 + ci/cases/pr/C96mx100_S2S.yaml | 1 + 9 files changed, 11 insertions(+) diff --git a/ci/cases/pr/C48_S2SW_extended.yaml b/ci/cases/pr/C48_S2SW_extended.yaml index 11e0af9cb94..06d3ffff4c0 100644 --- a/ci/cases/pr/C48_S2SW_extended.yaml +++ b/ci/cases/pr/C48_S2SW_extended.yaml @@ -19,4 +19,5 @@ skip_ci_on_hosts: - gaeac6 - orion - hercules + - awsepicglobalworkflow - wcoss2 # TODO run on WCOSS2 once the gfs_waveawipsbulls job is fixed diff --git a/ci/cases/pr/C48mx500_3DVarAOWCDA.yaml b/ci/cases/pr/C48mx500_3DVarAOWCDA.yaml index fcfb3caadc6..f3627b4c1a1 100644 --- a/ci/cases/pr/C48mx500_3DVarAOWCDA.yaml +++ b/ci/cases/pr/C48mx500_3DVarAOWCDA.yaml @@ -21,3 +21,4 @@ skip_ci_on_hosts: - gaeac6 - gaeac5 - orion + - awsepicglobalworkflow diff --git a/ci/cases/pr/C48mx500_hybAOWCDA.yaml b/ci/cases/pr/C48mx500_hybAOWCDA.yaml index 36ea62b2dfd..1d9a5c526b8 100644 --- a/ci/cases/pr/C48mx500_hybAOWCDA.yaml +++ b/ci/cases/pr/C48mx500_hybAOWCDA.yaml @@ -22,3 +22,4 @@ skip_ci_on_hosts: - gaeac5 - gaeac6 - orion + - awsepicglobalworkflow diff --git a/ci/cases/pr/C96C48_hybatmDA.yaml b/ci/cases/pr/C96C48_hybatmDA.yaml index c0833acf141..abb6c1b0c55 100644 --- a/ci/cases/pr/C96C48_hybatmDA.yaml +++ b/ci/cases/pr/C96C48_hybatmDA.yaml @@ -17,3 +17,6 @@ arguments: interval: 24 start: cold yaml: {{ HOMEgfs }}/ci/cases/yamls/gfs_defaults_ci.yaml + +skip_ci_on_hosts: + - awsepicglobalworkflow diff --git a/ci/cases/pr/C96C48_hybatmaerosnowDA.yaml b/ci/cases/pr/C96C48_hybatmaerosnowDA.yaml index e231f30b047..78106beb3e3 100644 --- a/ci/cases/pr/C96C48_hybatmaerosnowDA.yaml +++ b/ci/cases/pr/C96C48_hybatmaerosnowDA.yaml @@ -22,3 +22,4 @@ skip_ci_on_hosts: - gaeac5 - gaeac6 - hercules + - awsepicglobalworkflow diff --git a/ci/cases/pr/C96C48_ufs_hybatmDA.yaml b/ci/cases/pr/C96C48_ufs_hybatmDA.yaml index 18fe4168c4e..74f31a27940 100644 --- a/ci/cases/pr/C96C48_ufs_hybatmDA.yaml +++ b/ci/cases/pr/C96C48_ufs_hybatmDA.yaml @@ -22,3 +22,4 @@ skip_ci_on_hosts: - gaeac6 - orion - hercules + - awsepicglobalworkflow diff --git a/ci/cases/pr/C96_atm3DVar.yaml b/ci/cases/pr/C96_atm3DVar.yaml index 0b242fbfd38..74490f69aaa 100644 --- a/ci/cases/pr/C96_atm3DVar.yaml +++ b/ci/cases/pr/C96_atm3DVar.yaml @@ -18,3 +18,4 @@ arguments: skip_ci_on_hosts: - wcoss2 + - awsepicglobalworkflow diff --git a/ci/cases/pr/C96_atm3DVar_extended.yaml b/ci/cases/pr/C96_atm3DVar_extended.yaml index a76c87b5fad..27a839f076f 100644 --- a/ci/cases/pr/C96_atm3DVar_extended.yaml +++ b/ci/cases/pr/C96_atm3DVar_extended.yaml @@ -22,3 +22,4 @@ skip_ci_on_hosts: - gaeac6 - orion - hercules + - awsepicglobalworkflow diff --git a/ci/cases/pr/C96mx100_S2S.yaml b/ci/cases/pr/C96mx100_S2S.yaml index 0f61c48fe62..4ce7c5e206d 100644 --- a/ci/cases/pr/C96mx100_S2S.yaml +++ b/ci/cases/pr/C96mx100_S2S.yaml @@ -20,3 +20,4 @@ arguments: skip_ci_on_hosts: - gaeac6 - gaeac5 + - awsepicglobalworkflow From eac272e7ce4d4023cb3f3895e03dc8d692581761 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 5 Mar 2025 16:03:56 +0000 Subject: [PATCH 029/134] trying to fix runtime bug --- ci/Jenkinsfile4AWS | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 4722068f157..9c60020ef32 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -85,7 +85,7 @@ pipeline { ws("${custom_workspace[machine]}/${env.CHANGE_ID}") { properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'Gaea', 'Awsepicglobalworkflow'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() - CUSTOM_WORKSPACE = "${WORKSPACE}" + CUSTOM_WORKSPACE = "${WORKSPACE}/${aws_gw_name}" HOMEgfs = "${CUSTOM_WORKSPACE}/${aws_gw_name}" sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/${aws_gw_name}; mkdir -p ${CUSTOM_WORKSPACE}/${aws_gw_name}") sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS; mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS") @@ -126,14 +126,13 @@ pipeline { def gist_url = "" def error_logs = "" def error_logs_message = "" - HOMEgfs = "${CUSTOM_WORKSPACE}/${aws_gw_name}/${aws_gw_name}" dir("${HOMEgfs}/sorc") { try { sh(script: 'ls *.sh') // list files here to make sure all files exist. sh(script: 'ls ./build_compute.sh') // list files here to make sure all files exist. - sh(script: './build_compute.sh gfs') // build the global-workflow executables + sh(script: './build_compute.sh gfs') // build the ${aws_gw_name} executables } catch (Exception error_build) { - echo "Failed to build global-workflow: ${error_build.getMessage()}" + echo "Failed to build ${aws_gw_name}: ${error_build.getMessage()}" if ( fileExists("logs/error.logs") ) { def fileContent = readFile 'logs/error.logs' def lines = fileContent.readLines() @@ -162,10 +161,10 @@ pipeline { echo "Failed to comment on PR: ${error_comment.getMessage()}" } STATUS = 'Failed' - error("Failed to build global-workflow on ${Machine}") + error("Failed to build ${aws_gw_name} on ${Machine}") } STATUS = 'Failed' - error("Failed to build global-workflow on ${Machine} and no error.logs file found") + error("Failed to build ${aws_gw_name} on ${Machine} and no error.logs file found") } sh(script: './link_workflow.sh') } @@ -201,6 +200,7 @@ pipeline { script { env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" try { + echo "HOMEgfs: ${HOMEgfs}" error_output = sh(script: """ source ${HOMEgfs}/workflow/gw_setup.sh ${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml @@ -219,9 +219,10 @@ pipeline { def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" sh(script: " rm -f ${error_file}") try { + echo "HOMEgfs: ${HOMEgfs}" sh(script: """ source ${HOMEgfs}/workflow/gw_setup.sh - ${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow' + ${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} "${aws_gw_name}" """) sh(script: """ source ${HOMEgfs}/workflow/gw_setup.sh @@ -310,7 +311,7 @@ pipeline { echo "Failed to remove custom work directory ${CUSTOM_WORKSPACE} on ${Machine}: ${e.getMessage()}" } } else { - echo "Failed to build and run global-workflow in ${CUSTOM_WORKSPACE} on ${Machine}" + echo "Failed to build and run ${aws_gw_name} in ${CUSTOM_WORKSPACE} on ${Machine}" } } } From 35a6cd0ead24c2c7731ac9b734c2e037bf905951 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 5 Mar 2025 17:36:22 +0000 Subject: [PATCH 030/134] still trying to figure out HOEgfs issue --- ci/Jenkinsfile4AWS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 9c60020ef32..054568f7d85 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -86,7 +86,7 @@ pipeline { properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'Gaea', 'Awsepicglobalworkflow'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() CUSTOM_WORKSPACE = "${WORKSPACE}/${aws_gw_name}" - HOMEgfs = "${CUSTOM_WORKSPACE}/${aws_gw_name}" + HOMEgfs = "${CUSTOM_WORKSPACE}" sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/${aws_gw_name}; mkdir -p ${CUSTOM_WORKSPACE}/${aws_gw_name}") sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS; mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS") // sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) @@ -126,6 +126,7 @@ pipeline { def gist_url = "" def error_logs = "" def error_logs_message = "" + HOMEgfs = "${CUSTOM_WORKSPACE}/${aws_gw_name}" dir("${HOMEgfs}/sorc") { try { sh(script: 'ls *.sh') // list files here to make sure all files exist. From b55f4a17ef14ce0974be023e8bbdfb1a50274df8 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 5 Mar 2025 21:00:51 +0000 Subject: [PATCH 031/134] add compile gefs --- ci/Jenkinsfile4AWS | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 054568f7d85..8f6f15db934 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -89,7 +89,7 @@ pipeline { HOMEgfs = "${CUSTOM_WORKSPACE}" sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/${aws_gw_name}; mkdir -p ${CUSTOM_WORKSPACE}/${aws_gw_name}") sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS; mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS") - // sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) + sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) } echo "Building and running on ${Machine} in directory ${CUSTOM_WORKSPACE}" } @@ -104,15 +104,15 @@ pipeline { ws(HOMEgfs) { echo "Checking out the code on ${Machine} using scm in ${HOMEgfs}" try { - // checkout scm - sh(script: "git clone --recursive ${repo_url}") + checkout scm + // sh(script: "git clone --recursive ${repo_url}") sh(script: "ls ${HOMEgfs}") } catch (Exception e) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}, try again ..." sleep time: 45, unit: 'SECONDS' try { - // checkout scm - sh(script: "git clone --recursive ${repo_url}") + checkout scm + // sh(script: "git clone --recursive ${repo_url}") sh(script: "ls ${HOMEgfs}") } catch (Exception ee) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}: ${ee.getMessage()}" @@ -132,6 +132,7 @@ pipeline { sh(script: 'ls *.sh') // list files here to make sure all files exist. sh(script: 'ls ./build_compute.sh') // list files here to make sure all files exist. sh(script: './build_compute.sh gfs') // build the ${aws_gw_name} executables + sh(script: './build_compute.sh gefs') // build the ${aws_gw_name} executables } catch (Exception error_build) { echo "Failed to build ${aws_gw_name}: ${error_build.getMessage()}" if ( fileExists("logs/error.logs") ) { From e1724cb02fdde8419dccef48788ea0826ea04576 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 5 Mar 2025 21:57:54 +0000 Subject: [PATCH 032/134] add compile gefs --- ci/Jenkinsfile4AWS | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 8f6f15db934..fdc6dfd0eee 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -85,6 +85,7 @@ pipeline { ws("${custom_workspace[machine]}/${env.CHANGE_ID}") { properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'Gaea', 'Awsepicglobalworkflow'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() + echo "GH: ${GH}" CUSTOM_WORKSPACE = "${WORKSPACE}/${aws_gw_name}" HOMEgfs = "${CUSTOM_WORKSPACE}" sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/${aws_gw_name}; mkdir -p ${CUSTOM_WORKSPACE}/${aws_gw_name}") From 4264c15c82f36ec75fe318d19856c94f318819bc Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 5 Mar 2025 22:27:28 +0000 Subject: [PATCH 033/134] add compile gefs --- ci/Jenkinsfile4AWS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index fdc6dfd0eee..b0528436a1e 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -90,7 +90,7 @@ pipeline { HOMEgfs = "${CUSTOM_WORKSPACE}" sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/${aws_gw_name}; mkdir -p ${CUSTOM_WORKSPACE}/${aws_gw_name}") sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS; mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS") - sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) + // sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) } echo "Building and running on ${Machine} in directory ${CUSTOM_WORKSPACE}" } @@ -105,15 +105,15 @@ pipeline { ws(HOMEgfs) { echo "Checking out the code on ${Machine} using scm in ${HOMEgfs}" try { - checkout scm - // sh(script: "git clone --recursive ${repo_url}") + // checkout scm + sh(script: "git clone --recursive ${repo_url}") sh(script: "ls ${HOMEgfs}") } catch (Exception e) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}, try again ..." sleep time: 45, unit: 'SECONDS' try { - checkout scm - // sh(script: "git clone --recursive ${repo_url}") + // checkout scm + sh(script: "git clone --recursive ${repo_url}") sh(script: "ls ${HOMEgfs}") } catch (Exception ee) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}: ${ee.getMessage()}" From 20c1211a530efb8c3ed9fcba56eb0210b4171797 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 11 Mar 2025 21:22:07 +0000 Subject: [PATCH 034/134] switch back to 'checkout scm instead of git clone' --- ci/Jenkinsfile4AWS | 10 +++++----- ci/cases/pr/C48_S2SW.yaml | 3 +++ ci/scripts/utils/ci_utils.sh | 13 +++++++++++-- ci/scripts/utils/launch_java_agent.sh | 9 ++++++++- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index b0528436a1e..fdc6dfd0eee 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -90,7 +90,7 @@ pipeline { HOMEgfs = "${CUSTOM_WORKSPACE}" sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/${aws_gw_name}; mkdir -p ${CUSTOM_WORKSPACE}/${aws_gw_name}") sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS; mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS") - // sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) + sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) } echo "Building and running on ${Machine} in directory ${CUSTOM_WORKSPACE}" } @@ -105,15 +105,15 @@ pipeline { ws(HOMEgfs) { echo "Checking out the code on ${Machine} using scm in ${HOMEgfs}" try { - // checkout scm - sh(script: "git clone --recursive ${repo_url}") + checkout scm + // sh(script: "git clone --recursive ${repo_url}") sh(script: "ls ${HOMEgfs}") } catch (Exception e) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}, try again ..." sleep time: 45, unit: 'SECONDS' try { - // checkout scm - sh(script: "git clone --recursive ${repo_url}") + checkout scm + // sh(script: "git clone --recursive ${repo_url}") sh(script: "ls ${HOMEgfs}") } catch (Exception ee) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}: ${ee.getMessage()}" diff --git a/ci/cases/pr/C48_S2SW.yaml b/ci/cases/pr/C48_S2SW.yaml index 63675645148..5ca9d45d4f4 100644 --- a/ci/cases/pr/C48_S2SW.yaml +++ b/ci/cases/pr/C48_S2SW.yaml @@ -12,3 +12,6 @@ arguments: idate: 2021032312 edate: 2021032312 yaml: {{ HOMEgfs }}/ci/cases/yamls/gfs_defaults_ci.yaml + +skip_ci_on_hosts: + - awsepicglobalworkflow diff --git a/ci/scripts/utils/ci_utils.sh b/ci/scripts/utils/ci_utils.sh index 56b0571adca..085b4c54fa7 100755 --- a/ci/scripts/utils/ci_utils.sh +++ b/ci/scripts/utils/ci_utils.sh @@ -119,11 +119,20 @@ function create_experiment () { case=$(basename "${yaml_config}" .yaml) || true export pslot=${case}_${pr_sha} - source "${HOMEgfs}/ci/platforms/config.${MACHINE_ID}" + if [[ ${MACHINE_ID} == "noaacloud" ]]; then + source "${HOMEgfs}/ci/platforms/config.${PW_CSP}" + else + source "${HOMEgfs}/ci/platforms/config.${MACHINE_ID}" + fi + source "${HOMEgfs}/workflow/gw_setup.sh" # Remove RUNDIRS dir incase this is a retry (STMP now in host file) - STMP=$("${HOMEgfs}/ci/scripts/utils/parse_yaml.py" -y "${HOMEgfs}/workflow/hosts/${MACHINE_ID}.yaml" -k STMP -s) + if [[ ${MACHINE_ID} == "noaacloud" ]]; then + STMP=$("${HOMEgfs}/ci/scripts/utils/parse_yaml.py" -y "${HOMEgfs}/workflow/hosts/${PW_CSP}pw.yaml" -k STMP -s) + else + STMP=$("${HOMEgfs}/ci/scripts/utils/parse_yaml.py" -y "${HOMEgfs}/workflow/hosts/${MACHINE_ID}.yaml" -k STMP -s) + fi echo "Removing ${STMP}/RUNDIRS/${pslot} directory incase this is a retry" rm -Rf "${STMP}/RUNDIRS/${pslot}" diff --git a/ci/scripts/utils/launch_java_agent.sh b/ci/scripts/utils/launch_java_agent.sh index 539ba4ca99d..8854f794152 100755 --- a/ci/scripts/utils/launch_java_agent.sh +++ b/ci/scripts/utils/launch_java_agent.sh @@ -76,6 +76,8 @@ source "${HOMEGFS_}/ush/detect_machine.sh" case ${MACHINE_ID} in hera | orion | hercules | wcoss2 | gaeac5 | gaeac6 ) echo "Launch Jenkins Java Controler on ${MACHINE_ID}";; + noaacloud ) + echo "Launch Jenkins Java Controler on ${PW_CSP}";; *) echo "Unsupported platform. Exiting with error." exit 1;; @@ -87,7 +89,12 @@ rm -f "${LOG}" HOMEgfs="${HOMEGFS_}" source "${HOMEGFS_}/ush/module-setup.sh" module use "${HOMEGFS_}/modulefiles" module load "module_gwsetup.${MACHINE_ID}" -source "${HOMEGFS_}/ci/platforms/config.${MACHINE_ID}" + +if [[ ${MACHINE_ID} == "noaacloud" ]]; then + source "${HOMEgfs_}/ci/platforms/config.${PW_CSP}" +else + source "${HOMEgfs_}/ci/platforms/config.${MACHINE_ID}" +fi JAVA_HOME="${JENKINS_AGENT_LANUCH_DIR}/JAVA/jdk-17.0.10" if [[ ! -d "${JAVA_HOME}" ]]; then From f8a6a6d8c0ebc66a23254210b1ce7130048f37ca Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 12 Mar 2025 14:46:48 +0000 Subject: [PATCH 035/134] using checkout scm --- ci/Jenkinsfile4AWS | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index fdc6dfd0eee..80d2a99e691 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -8,7 +8,8 @@ def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', awsepicglobalworkflow: 'Awsepicglobalworkflow'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', awsepicglobalworkflow: '/lustre/jenkins'] -def repo_url = 'https://github.com/NOAA-EPIC/global-workflow-cloud.git' +// def repo_url = 'https://github.com/NOAA-EPIC/global-workflow-cloud.git' +def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' def aws_gw_name = 'global-workflow-cloud' def STATUS = 'Passed' @@ -105,16 +106,18 @@ pipeline { ws(HOMEgfs) { echo "Checking out the code on ${Machine} using scm in ${HOMEgfs}" try { + echo "Check out code with scm" checkout scm // sh(script: "git clone --recursive ${repo_url}") - sh(script: "ls ${HOMEgfs}") + // sh(script: "ls ${HOMEgfs}") } catch (Exception e) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}, try again ..." sleep time: 45, unit: 'SECONDS' try { + echo "Check out code with scm" checkout scm // sh(script: "git clone --recursive ${repo_url}") - sh(script: "ls ${HOMEgfs}") + // sh(script: "ls ${HOMEgfs}") } catch (Exception ee) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}: ${ee.getMessage()}" if (env.CHANGE_ID) { From c952a7300eb708197669fa82155912a6fc208af9 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 12 Mar 2025 14:48:11 +0000 Subject: [PATCH 036/134] using checkout scm --- ci/Jenkinsfile4AWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index 80d2a99e691..cc24c2cb5b1 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -9,7 +9,7 @@ def GH = 'none' def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', awsepicglobalworkflow: 'Awsepicglobalworkflow'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', awsepicglobalworkflow: '/lustre/jenkins'] // def repo_url = 'https://github.com/NOAA-EPIC/global-workflow-cloud.git' -def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' +def repo_url = 'git@github.com:NOAA-EPIC/global-workflow-cloud.git' def aws_gw_name = 'global-workflow-cloud' def STATUS = 'Passed' From 12b09df40fbafd9dae146b2fe6116dd6e0964c49 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 13 Mar 2025 00:14:36 +0000 Subject: [PATCH 037/134] remove memory from resource if on AWS --- parm/config/gfs/config.resources.AWSPW | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/parm/config/gfs/config.resources.AWSPW b/parm/config/gfs/config.resources.AWSPW index d044c475fb3..85a7293b06c 100644 --- a/parm/config/gfs/config.resources.AWSPW +++ b/parm/config/gfs/config.resources.AWSPW @@ -4,13 +4,7 @@ export is_exclusive="True" unset memory - -# shellcheck disable=SC2312 -for mem_var in $(env | grep '^memory_' | cut -d= -f1); do - unset "${mem_var}" -done - -step=$1 +unset "memory_${RUN}" case ${step} in "fcst" | "efcs") From 98570b7f6e0c4473781bbd78ebcef384812ac57d Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 13 Mar 2025 19:16:33 +0000 Subject: [PATCH 038/134] remove memory requirement for AWS --- ci/Jenkinsfile4AWS | 72 ++++++++++++++++++-------- parm/config/gfs/config.resources.AWSPW | 6 +++ 2 files changed, 55 insertions(+), 23 deletions(-) diff --git a/ci/Jenkinsfile4AWS b/ci/Jenkinsfile4AWS index cc24c2cb5b1..64f8a54077b 100644 --- a/ci/Jenkinsfile4AWS +++ b/ci/Jenkinsfile4AWS @@ -8,8 +8,7 @@ def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', awsepicglobalworkflow: 'Awsepicglobalworkflow'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI', awsepicglobalworkflow: '/lustre/jenkins'] -// def repo_url = 'https://github.com/NOAA-EPIC/global-workflow-cloud.git' -def repo_url = 'git@github.com:NOAA-EPIC/global-workflow-cloud.git' +def repo_url = 'https://github.com/NOAA-EPIC/global-workflow-cloud.git' def aws_gw_name = 'global-workflow-cloud' def STATUS = 'Passed' @@ -83,12 +82,13 @@ pipeline { script { Machine = machine[0].toUpperCase() + machine.substring(1) echo "Getting Common Workspace for ${Machine}" + echo "Common Workspace for ${Machine} is ${custom_workspace[machine]}/${env.CHANGE_ID}" + echo "WORKSPACE: {WORKSPACE}" ws("${custom_workspace[machine]}/${env.CHANGE_ID}") { properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'Gaea', 'Awsepicglobalworkflow'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() - echo "GH: ${GH}" - CUSTOM_WORKSPACE = "${WORKSPACE}/${aws_gw_name}" - HOMEgfs = "${CUSTOM_WORKSPACE}" + CUSTOM_WORKSPACE = "${WORKSPACE}" + HOMEgfs = "${CUSTOM_WORKSPACE}/${aws_gw_name}" sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/${aws_gw_name}; mkdir -p ${CUSTOM_WORKSPACE}/${aws_gw_name}") sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS; mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS") sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) @@ -106,18 +106,47 @@ pipeline { ws(HOMEgfs) { echo "Checking out the code on ${Machine} using scm in ${HOMEgfs}" try { - echo "Check out code with scm" - checkout scm - // sh(script: "git clone --recursive ${repo_url}") - // sh(script: "ls ${HOMEgfs}") + echo "Getting Common Workspace for ${Machine}" + echo "Common Workspace for ${Machine} is ${custom_workspace[machine]}/${env.CHANGE_ID}" + echo "WORKSPACE: {WORKSPACE}" + echo "CUSTOM_WORKSPACE: {CUSTOM_WORKSPACE}" + + checkout([$class: 'GitSCM', + branches: [[name: "develop"]], + doGenerateSubmoduleConfigurations: false, + extensions: [[$class: 'SubmoduleOption', + disableSubmodules: false, + parentCredentials: false, + recursiveSubmodules: true, + reference: '', + trackingSubmodules: false], + [$class: 'CleanBeforeCheckout'], + [$class: 'CleanCheckout']], + submoduleCfg: [], + userRemoteConfigs: [[url: ${repo_url}]]]) + //sh(script: "git clone --recursive ${repo_url}") + sh(script: "pwd") + sh(script: "ls ${HOMEgfs}") } catch (Exception e) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}, try again ..." sleep time: 45, unit: 'SECONDS' try { - echo "Check out code with scm" - checkout scm - // sh(script: "git clone --recursive ${repo_url}") - // sh(script: "ls ${HOMEgfs}") + // checkout scm + checkout([$class: 'GitSCM', + branches: [[name: "develop"]], + doGenerateSubmoduleConfigurations: false, + extensions: [[$class: 'SubmoduleOption', + disableSubmodules: false, + parentCredentials: false, + recursiveSubmodules: true, + reference: '', + trackingSubmodules: false], + [$class: 'CleanBeforeCheckout'], + [$class: 'CleanCheckout']], + submoduleCfg: [], + userRemoteConfigs: [[url: ${repo_url}]]]) + //sh(script: "git clone --recursive ${repo_url}") + sh(script: "ls ${HOMEgfs}") } catch (Exception ee) { echo "Failed to checkout the code on ${Machine} using scm in ${HOMEgfs}: ${ee.getMessage()}" if (env.CHANGE_ID) { @@ -130,15 +159,14 @@ pipeline { def gist_url = "" def error_logs = "" def error_logs_message = "" - HOMEgfs = "${CUSTOM_WORKSPACE}/${aws_gw_name}" + HOMEgfs = "${CUSTOM_WORKSPACE}/${aws_gw_name}/${aws_gw_name}" dir("${HOMEgfs}/sorc") { try { sh(script: 'ls *.sh') // list files here to make sure all files exist. sh(script: 'ls ./build_compute.sh') // list files here to make sure all files exist. - sh(script: './build_compute.sh gfs') // build the ${aws_gw_name} executables - sh(script: './build_compute.sh gefs') // build the ${aws_gw_name} executables + sh(script: './build_compute.sh gfs') // build the global-workflow executables } catch (Exception error_build) { - echo "Failed to build ${aws_gw_name}: ${error_build.getMessage()}" + echo "Failed to build global-workflow: ${error_build.getMessage()}" if ( fileExists("logs/error.logs") ) { def fileContent = readFile 'logs/error.logs' def lines = fileContent.readLines() @@ -167,10 +195,10 @@ pipeline { echo "Failed to comment on PR: ${error_comment.getMessage()}" } STATUS = 'Failed' - error("Failed to build ${aws_gw_name} on ${Machine}") + error("Failed to build global-workflow on ${Machine}") } STATUS = 'Failed' - error("Failed to build ${aws_gw_name} on ${Machine} and no error.logs file found") + error("Failed to build global-workflow on ${Machine} and no error.logs file found") } sh(script: './link_workflow.sh') } @@ -206,7 +234,6 @@ pipeline { script { env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" try { - echo "HOMEgfs: ${HOMEgfs}" error_output = sh(script: """ source ${HOMEgfs}/workflow/gw_setup.sh ${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml @@ -225,10 +252,9 @@ pipeline { def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" sh(script: " rm -f ${error_file}") try { - echo "HOMEgfs: ${HOMEgfs}" sh(script: """ source ${HOMEgfs}/workflow/gw_setup.sh - ${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} "${aws_gw_name}" + ${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow' """) sh(script: """ source ${HOMEgfs}/workflow/gw_setup.sh @@ -317,7 +343,7 @@ pipeline { echo "Failed to remove custom work directory ${CUSTOM_WORKSPACE} on ${Machine}: ${e.getMessage()}" } } else { - echo "Failed to build and run ${aws_gw_name} in ${CUSTOM_WORKSPACE} on ${Machine}" + echo "Failed to build and run global-workflow in ${CUSTOM_WORKSPACE} on ${Machine}" } } } diff --git a/parm/config/gfs/config.resources.AWSPW b/parm/config/gfs/config.resources.AWSPW index 85a7293b06c..0255a9adad3 100644 --- a/parm/config/gfs/config.resources.AWSPW +++ b/parm/config/gfs/config.resources.AWSPW @@ -18,6 +18,12 @@ case ${step} in ;; + "atmos_products" | "oceanice_products" | "wavepostsbs" ) + export PARTITION_BATCH="process" + max_tasks_per_node=24 + ;; + + *) export PARTITION_BATCH="process" max_tasks_per_node=24 From 49b0a87bd09f8728f2935da2dd2d5e6541ad3193 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 11 Apr 2025 16:36:08 +0000 Subject: [PATCH 039/134] add container to link_workflow.sh --- sorc/link_workflow.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index 12391c398f0..ceec0771f7b 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -79,6 +79,7 @@ case "${machine}" in "gaeac5") FIX_DIR="/gpfs/f5/ufs-ard/world-shared/global/glopara/data/fix" ;; "gaeac6") FIX_DIR="/gpfs/f6/drsa-precip3/world-shared/role.glopara/fix" ;; "noaacloud") FIX_DIR="/contrib/global-workflow-shared-data/fix" ;; +"container") FIX_DIR="/contrib/global-workflow-shared-data/fix" ;; *) echo "FATAL: Unknown target machine ${machine}, couldn't set FIX_DIR" exit 1 From d862910d468f1f1770f3f9cefa0783cdf845c324 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 14 Apr 2025 16:57:05 +0000 Subject: [PATCH 040/134] changes for container, module reset, and machine-id over-write --- ush/detect_machine.sh | 11 +++++++---- ush/module-setup.sh | 5 +++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index 9328ff7b144..62e9b1c296d 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -60,16 +60,19 @@ fi # Overwrite auto-detect with MACHINE if set MACHINE_ID=${MACHINE:-${MACHINE_ID}} +# Overwrite auto-detect if it is in a container. +if [[ -d /opt/spack-stack ]]; then + # We are in a container + MACHINE_ID=container +fi + # If MACHINE_ID is no longer UNKNNOWN, return it if [[ "${MACHINE_ID}" != "UNKNOWN" ]]; then return fi # Try searching based on paths since hostname may not match on compute nodes -if [[ -d /opt/spack-stack ]]; then - # We are in a container - MACHINE_ID=container -elif [[ -d /lfs/h3 ]]; then +if [[ -d /lfs/h3 ]]; then # We are on NOAA Cactus or Dogwood MACHINE_ID=wcoss2 elif [[ -d /lfs/h1 && ! -d /lfs/h3 ]]; then diff --git a/ush/module-setup.sh b/ush/module-setup.sh index f754150cd8f..13f9ee25c3e 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -103,10 +103,11 @@ elif [[ ${MACHINE_ID} = discover* ]]; then elif [[ $MACHINE_ID = container ]] ; then # We are in a container - if ( ! eval module help > /dev/null 2>&1 ) ; then + #if ( ! eval module help > /dev/null 2>&1 ) ; then source /usr/lmod/lmod/init/bash - fi + #fi module purge + unset MODULEPATH # TODO: This can likely be made more general once other cloud # platforms come online. From 94a4ffae3e29b86d08c49f8c9a41eecf3992dd5f Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 6 May 2025 22:37:52 +0000 Subject: [PATCH 041/134] add more for container --- dev/workflow/hosts/container.yaml | 2 +- env/CONTAINER.env | 6 ++--- parm/config/gfs/config.resources | 6 +++-- parm/config/gfs/config.resources.CONTAINER | 29 ++++++++++++++++++++++ 4 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 parm/config/gfs/config.resources.CONTAINER diff --git a/dev/workflow/hosts/container.yaml b/dev/workflow/hosts/container.yaml index b353f002730..b30ab5d8ecc 100644 --- a/dev/workflow/hosts/container.yaml +++ b/dev/workflow/hosts/container.yaml @@ -20,7 +20,7 @@ CHGRP_CMD: 'chgrp rstprod' # TODO: This is not yet supported. # HPSS properties HPSS_PROJECT: emc-global #TODO: See `ATARDIR` below. ARCHCOM_TO: 'local' -ATARDIR: '' # TODO: This will not yet work from AWS. +ATARDIR: '' # TODO: This will not yet work from CONTAINER. CHGRP_RSTPROD: 'YES' CHGRP_CMD: 'chgrp rstprod' # TODO: This is not yet supported. # Features diff --git a/env/CONTAINER.env b/env/CONTAINER.env index 049b67439b2..eff2df8b931 100755 --- a/env/CONTAINER.env +++ b/env/CONTAINER.env @@ -33,7 +33,7 @@ if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:- fi APRUN_default="${launcher} -n ${ntasks}" else - echo "ERROR config.resources must be sourced before sourcing AWSPW.env" + echo "ERROR config.resources must be sourced before sourcing CONTAINER.env" exit 2 fi @@ -41,7 +41,7 @@ if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then export POE="NO" export BACK="NO" - export sys_tp="AWSPW" + export sys_tp="CONTAINER" export launcher_PREP="srun" elif [[ "${step}" = "prepsnowobs" ]]; then @@ -81,6 +81,6 @@ elif [[ "${step}" = "oceanice_products" ]]; then elif [[ "${step}" = "atmos_products" ]]; then - export USE_CFP="YES" # Use MPMD for downstream product generation on AWS + export USE_CFP="YES" # Use MPMD for downstream product generation in CONTAINER fi diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index 2d08d75b450..e769ed1263e 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -92,8 +92,10 @@ case ${machine} in mem_node_max="" ;; "CONTAINER") - max_tasks_per_node=1 - # TODO Supply a max mem/node value for a container + export PARTITION_BATCH="compute" + npe_node_max=48 + max_tasks_per_node=48 + # TODO Supply a max mem/node value for CONTAINER # shellcheck disable=SC2034 mem_node_max="" ;; diff --git a/parm/config/gfs/config.resources.CONTAINER b/parm/config/gfs/config.resources.CONTAINER new file mode 100644 index 00000000000..17a3924d29c --- /dev/null +++ b/parm/config/gfs/config.resources.CONTAINER @@ -0,0 +1,29 @@ +#! /usr/bin/env bash + +# AWS-specific job resources + +export is_exclusive="True" +unset memory +unset "memory_${RUN}" + +step=$1 + +case ${step} in + "fcst" | "efcs" | "wavepostpnt") + export PARTITION_BATCH="compute" + unset PARTITION_SERVICE + max_tasks_per_node=48 + tasks_per_node=48 + ;; + + *) + export PARTITION_BATCH="process" + unset PARTITION_SERVICE + max_tasks_per_node=24 + tasks_per_node=24 + ;; + +esac + +export max_tasks_per_node +export tasks_per_node From 5281e0b42870c6f2203f8fa7c0783ad6324a4710 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 7 May 2025 21:35:43 +0000 Subject: [PATCH 042/134] trying run in container --- modulefiles/module_base.container.lua | 13 +++++++------ ush/detect_machine.sh | 12 ++++++------ ush/load_fv3gfs_modules.sh | 2 ++ versions/run.container.ver | 2 +- versions/spack.ver | 6 +++--- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/modulefiles/module_base.container.lua b/modulefiles/module_base.container.lua index 733ff36d97a..5a850c51d94 100644 --- a/modulefiles/module_base.container.lua +++ b/modulefiles/module_base.container.lua @@ -2,20 +2,21 @@ help([[ Load environment to run GFS in container ]]) -local spack_mod_path=(os.getenv("spack_mod_path") or "None") -prepend_path("MODULEPATH", spack_mod_path) +prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/intel-oneapi-mpi/2021.9.0/intel/2021.10.0") +prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") +prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core") load("gnu") load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "None"))) unload("gnu") -load(pathJoin("python", (os.getenv("python_ver") or "None"))) +-- load(pathJoin("python", (os.getenv("python_ver") or "None"))) load(pathJoin("jasper", (os.getenv("jasper_ver") or "None"))) load(pathJoin("libpng", (os.getenv("libpng_ver") or "None"))) -load(pathJoin("cdo", (os.getenv("cdo_ver") or "None"))) ---load(pathJoin("R", (os.getenv("R_ver") or "None"))) +-- load(pathJoin("cdo", (os.getenv("cdo_ver") or "None"))) +-- load(pathJoin("R", (os.getenv("R_ver") or "None"))) load(pathJoin("hdf5", (os.getenv("hdf5_ver") or "None"))) load(pathJoin("netcdf-c", (os.getenv("netcdf_c_ver") or "None"))) @@ -25,7 +26,7 @@ load(pathJoin("nco", (os.getenv("nco_ver") or "None"))) load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "None"))) load(pathJoin("grib-util", (os.getenv("grib_util_ver") or "None"))) load(pathJoin("g2tmpl", (os.getenv("g2tmpl_ver") or "None"))) -load(pathJoin("gsi-ncdiag", (os.getenv("gsi_ncdiag_ver") or "None"))) +-- load(pathJoin("gsi-ncdiag", (os.getenv("gsi_ncdiag_ver") or "None"))) load(pathJoin("crtm", (os.getenv("crtm_ver") or "None"))) load(pathJoin("bufr", (os.getenv("bufr_ver") or "None"))) load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index 62e9b1c296d..5db20325d35 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -8,6 +8,12 @@ # # Thank you for your contribution +# Overwrite auto-detect if it is in a container. +if [[ -d /opt/spack-stack ]]; then + # We are in a container + MACHINE_ID=container +fi + # If the MACHINE_ID variable is set, skip this script. if [[ -n "${MACHINE_ID:-}" ]]; then return @@ -60,12 +66,6 @@ fi # Overwrite auto-detect with MACHINE if set MACHINE_ID=${MACHINE:-${MACHINE_ID}} -# Overwrite auto-detect if it is in a container. -if [[ -d /opt/spack-stack ]]; then - # We are in a container - MACHINE_ID=container -fi - # If MACHINE_ID is no longer UNKNNOWN, return it if [[ "${MACHINE_ID}" != "UNKNOWN" ]]; then return diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh index 8906ad93257..3b909f07eed 100755 --- a/ush/load_fv3gfs_modules.sh +++ b/ush/load_fv3gfs_modules.sh @@ -11,6 +11,8 @@ if [[ "${DEBUG_WORKFLOW:-NO}" == "NO" ]]; then echo "Loading modules quietly..." set +x fi +set -x +export HOMEgfs=/opt/global-workflow-cloud # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) diff --git a/versions/run.container.ver b/versions/run.container.ver index 3b6dc77c22e..391de348489 100644 --- a/versions/run.container.ver +++ b/versions/run.container.ver @@ -1,5 +1,5 @@ export stack_intel_ver=2021.10.0 -export stack_impi_ver=2021.10.0 +export stack_impi_ver=2021.9.0 source "${HOMEgfs:-}/versions/spack.ver" export spack_mod_path="/opt/spack-stack/spack-stack-${spack_stack_ver}/envs/unified-env/install/modulefiles/Core" diff --git a/versions/spack.ver b/versions/spack.ver index a212307a372..86728d42500 100644 --- a/versions/spack.ver +++ b/versions/spack.ver @@ -1,6 +1,6 @@ export spack_stack_ver=1.6.0 export cmake_ver=3.23.1 -export python_ver=3.11.6 +export python_ver=3.10.13 export jasper_ver=2.0.32 export libpng_ver=1.6.37 @@ -18,7 +18,7 @@ export bacio_ver=2.4.1 export nemsio_ver=2.5.4 export sigio_ver=2.3.2 export w3emc_ver=2.10.0 -export bufr_ver=11.7.0 +export bufr_ver=12.0.1 export g2_ver=3.4.5 export sp_ver=2.5.0 export ip_ver=4.3.0 @@ -30,7 +30,7 @@ export grib_util_ver=1.3.0 export prod_util_ver=2.1.1 export py_netcdf4_ver=1.5.8 export py_pyyaml_ver=6.0 -export py_jinja2_ver=3.1.2 +export py_jinja2_ver=3.0.3 export py_pandas_ver=1.5.3 export py_python_dateutil_ver=2.8.2 export py_f90nml_ver=1.4.3 From 12c95ad31cfad09d1e326c5e2bba293a44c51a92 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 8 May 2025 13:59:55 +0000 Subject: [PATCH 043/134] use EPIC container branch --- .gitmodules | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index c80d24c03aa..5c7edfe6e93 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,15 +1,21 @@ [submodule "sorc/ufs_model.fd"] path = sorc/ufs_model.fd - url = https://github.com/ufs-community/ufs-weather-model + #url = https://github.com/ufs-community/ufs-weather-model + url = https://github.com/NOAA-EPIC/ufs-weather-model.git + branch = feature/use_container_stack-stack-1.6.0 [submodule "sorc/wxflow"] path = sorc/wxflow url = https://github.com/NOAA-EMC/wxflow [submodule "sorc/gfs_utils.fd"] path = sorc/gfs_utils.fd - url = https://github.com/NOAA-EMC/gfs-utils + #url = https://github.com/NOAA-EMC/gfs-utils + url = https://github.com/NOAA-EPIC/gfs-utils.git + branch = feature/use_container_stack-stack-1.6.0 [submodule "sorc/ufs_utils.fd"] path = sorc/ufs_utils.fd - url = https://github.com/ufs-community/UFS_UTILS.git + #url = https://github.com/ufs-community/UFS_UTILS.git + url = https://github.com/NOAA-EPIC/UFS_UTILS-cloud.git + branch = feature/use_container_stack-stack-1.6.0 [submodule "sorc/verif-global.fd"] path = sorc/verif-global.fd url = https://github.com/NOAA-EMC/EMC_verif-global.git From def68825feabf037bb65858753e38f48c27a2484 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 14 May 2025 03:11:51 +0000 Subject: [PATCH 044/134] run on hera with container --- dev/workflow/hosts/hera.yaml | 4 ++- env/HERA.env | 2 ++ scripts/exglobal_forecast.sh | 9 ++++++ ush/forecast_postdet.sh | 2 ++ ush/forecast_predet.sh | 54 ++++++++++++++++++------------------ ush/load_fv3gfs_modules.sh | 1 - versions/spack.ver | 4 +-- 7 files changed, 45 insertions(+), 31 deletions(-) diff --git a/dev/workflow/hosts/hera.yaml b/dev/workflow/hosts/hera.yaml index 449cee84a7c..4685bc61e5d 100644 --- a/dev/workflow/hosts/hera.yaml +++ b/dev/workflow/hosts/hera.yaml @@ -5,7 +5,8 @@ BASE_DATA: '/scratch1/NCEPDEV/global/glopara/data' BASE_IC: '/scratch1/NCEPDEV/global/glopara/data/ICSDIR' AERO_INPUTS_DIR: /scratch1/NCEPDEV/global/glopara/data/GEFS_ExtData/20250310 PACKAGEROOT: '/scratch1/NCEPDEV/global/glopara/nwpara' -HOMEDIR: '/scratch1/NCEPDEV/global/${USER}' +#HOMEDIR: '/scratch1/NCEPDEV/global/${USER}' +HOMEDIR: '/scratch2/NAGAPE/epic/Wei.Huang' STMP: '/scratch1/NCEPDEV/stmp2/${USER}' PTMP: '/scratch1/NCEPDEV/stmp4/${USER}' NOSCRUB: '${HOMEDIR}' @@ -28,5 +29,6 @@ CHGRP_CMD: 'chgrp rstprod' SUPPORTED_RESOLUTIONS: ['C1152', 'C768', 'C384', 'C192', 'C96', 'C48'] DO_ARCHCOM: 'YES' DO_AWIPS: 'NO' +KEEPDATA: 'YES' MAKE_NSSTBUFR: 'NO' MAKE_ACFTBUFR: 'NO' diff --git a/env/HERA.env b/env/HERA.env index e9fc5de4ee9..964de4c23a2 100755 --- a/env/HERA.env +++ b/env/HERA.env @@ -234,6 +234,8 @@ elif [[ "${step}" = "eupd" ]]; then elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then + export launcher="srun" + (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) # With ESMF threading, the model wants to use the full node diff --git a/scripts/exglobal_forecast.sh b/scripts/exglobal_forecast.sh index 0abf9a14e34..8c127a8d82b 100755 --- a/scripts/exglobal_forecast.sh +++ b/scripts/exglobal_forecast.sh @@ -176,8 +176,17 @@ else fi ${NCP} "${EXECgfs}/${FCSTEXEC}" "${DATA}/" +module reset ${APRUN_UFS} "${DATA}/${FCSTEXEC}" 1>&1 2>&2 && true export err=$? +source "${USHgfs}/forecast_predet.sh" # include functions for variable definition +source "${USHgfs}/forecast_det.sh" # include functions for run type determination +source "${USHgfs}/forecast_postdet.sh" # include functions for variables after run type determination +source "${USHgfs}/parsing_ufs_configure.sh" # include functions for ufs_configure processing +source "${USHgfs}/atparse.bash" # include function atparse for parsing @[XYZ] templated files + +#source "${USHgfs}/load_fv3gfs_modules.sh" + err_chk FV3_out diff --git a/ush/forecast_postdet.sh b/ush/forecast_postdet.sh index 8349d6dbaf5..54b329b6165 100755 --- a/ush/forecast_postdet.sh +++ b/ush/forecast_postdet.sh @@ -253,6 +253,8 @@ EOF fi # warm_start == .true. #============================================================================ + make_nh=".true." + na_init=1 #============================================================================ if [[ "${QUILTING}" = ".true." ]] && [[ "${OUTPUT_GRID}" = "gaussian_grid" ]]; then diff --git a/ush/forecast_predet.sh b/ush/forecast_predet.sh index 918b4417846..588284233c0 100755 --- a/ush/forecast_predet.sh +++ b/ush/forecast_predet.sh @@ -324,18 +324,18 @@ FV3_predet(){ IEMS=${IEMS:-1} fi - if [[ "${TYPE}" == "nh" ]]; then # non-hydrostatic options + #if [[ "${TYPE}" == "nh" ]]; then # non-hydrostatic options hydrostatic=".false." phys_hydrostatic=".false." # enable heating in hydrostatic balance in non-hydrostatic simulation use_hydro_pressure=".false." # use hydrostatic pressure for physics make_nh=".true." # running in non-hydrostatic mode pass_full_omega_to_physics_in_non_hydrostatic_mode=".true." - else # hydrostatic options - hydrostatic=".true." - phys_hydrostatic=".false." # ignored when hydrostatic = T - use_hydro_pressure=".false." # ignored when hydrostatic = T - make_nh=".false." # running in hydrostatic mode - fi + #else # hydrostatic options + # hydrostatic=".true." + # phys_hydrostatic=".false." # ignored when hydrostatic = T + # use_hydro_pressure=".false." # ignored when hydrostatic = T + # make_nh=".false." # running in hydrostatic mode + #fi # Conserve total energy as heat globally consv_te=${consv_te:-1.} # range 0.-1., 1. will restore energy to orig. val. before physics @@ -354,38 +354,38 @@ FV3_predet(){ if [[ "${MONO:0:4}" == "mono" ]]; then # monotonic options d_con=${d_con_mono:-"0."} do_vort_damp=".false." - if [[ "${TYPE}" == "nh" ]]; then # monotonic and non-hydrostatic + #if [[ "${TYPE}" == "nh" ]]; then # monotonic and non-hydrostatic hord_mt=${hord_mt_nh_mono:-"10"} hord_xx=${hord_xx_nh_mono:-"10"} hord_dp=${hord_xx_nh_mono:-"10"} - else # monotonic and hydrostatic - hord_mt=${hord_mt_hydro_mono:-"10"} - hord_xx=${hord_xx_hydro_mono:-"10"} - hord_dp=${hord_xx_hydro_mono:-"10"} - kord_tm=${kord_tm_hydro_mono:-"-12"} - kord_mt=${kord_mt_hydro_mono:-"12"} - kord_wz=${kord_wz_hydro_mono:-"12"} - kord_tr=${kord_tr_hydro_mono:-"12"} - fi + #else # monotonic and hydrostatic + # hord_mt=${hord_mt_hydro_mono:-"10"} + # hord_xx=${hord_xx_hydro_mono:-"10"} + # hord_dp=${hord_xx_hydro_mono:-"10"} + # kord_tm=${kord_tm_hydro_mono:-"-12"} + # kord_mt=${kord_mt_hydro_mono:-"12"} + # kord_wz=${kord_wz_hydro_mono:-"12"} + # kord_tr=${kord_tr_hydro_mono:-"12"} + #fi else # non-monotonic options d_con=${d_con_nonmono:-"1."} do_vort_damp=".true." - if [[ "${TYPE}" == "nh" ]]; then # non-monotonic and non-hydrostatic + #if [[ "${TYPE}" == "nh" ]]; then # non-monotonic and non-hydrostatic hord_mt=${hord_mt_nh_nonmono:-"5"} hord_xx=${hord_xx_nh_nonmono:-"5"} hord_dp=${hord_dp_nh_nonmono:-"-5"} - else # non-monotonic and hydrostatic - hord_mt=${hord_mt_hydro_nonmono:-"10"} - hord_xx=${hord_xx_hydro_nonmono:-"10"} - hord_dp=${hord_xx_hydro_nonmono:-"10"} - fi + #else # non-monotonic and hydrostatic + # hord_mt=${hord_mt_hydro_nonmono:-"10"} + # hord_xx=${hord_xx_hydro_nonmono:-"10"} + # hord_dp=${hord_xx_hydro_nonmono:-"10"} + #fi fi - if [[ "${MONO:0:4}" != "mono" && "${TYPE}" == "nh" ]]; then + #if [[ "${MONO:0:4}" != "mono" && "${TYPE}" == "nh" ]]; then vtdm4=${vtdm4_nh_nonmono:-"0.06"} - else - vtdm4=${vtdm4:-"0.05"} - fi + #else + # vtdm4=${vtdm4:-"0.05"} + #fi # Initial conditions are chgres-ed from GFS analysis file nggps_ic=${nggps_ic:-".true."} diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh index 3b909f07eed..d304940b4db 100755 --- a/ush/load_fv3gfs_modules.sh +++ b/ush/load_fv3gfs_modules.sh @@ -12,7 +12,6 @@ if [[ "${DEBUG_WORKFLOW:-NO}" == "NO" ]]; then set +x fi set -x -export HOMEgfs=/opt/global-workflow-cloud # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) diff --git a/versions/spack.ver b/versions/spack.ver index 86728d42500..301cb65e107 100644 --- a/versions/spack.ver +++ b/versions/spack.ver @@ -1,6 +1,6 @@ export spack_stack_ver=1.6.0 export cmake_ver=3.23.1 -export python_ver=3.10.13 +export python_ver=3.11.6 export jasper_ver=2.0.32 export libpng_ver=1.6.37 @@ -30,7 +30,7 @@ export grib_util_ver=1.3.0 export prod_util_ver=2.1.1 export py_netcdf4_ver=1.5.8 export py_pyyaml_ver=6.0 -export py_jinja2_ver=3.0.3 +export py_jinja2_ver=3.1.2 export py_pandas_ver=1.5.3 export py_python_dateutil_ver=2.8.2 export py_f90nml_ver=1.4.3 From 0d04e51255fe02e2552224ae499384e558b274ba Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 14 May 2025 18:02:19 +0000 Subject: [PATCH 045/134] add run_gfs_model.sh script --- ush/run_gfs_model.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100755 ush/run_gfs_model.sh diff --git a/ush/run_gfs_model.sh b/ush/run_gfs_model.sh new file mode 100755 index 00000000000..2c11f83e02f --- /dev/null +++ b/ush/run_gfs_model.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD +export OMP_NUM_THREADS=1 +export FPATH=/usr/lmod/lmod/libexec +export HOMEgfs=/opt/global-workflow-cloud +source ${HOMEgfs}/versions/run.ver +source /usr/lmod/lmod/init/bash +#module reset +module use ${HOMEgfs}/modulefiles +module load module_base.container +export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2023.2.3/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.0/lib:$LD_LIBRARY_PATH +${HOMEgfs}/sorc/ufs_model.fd/tests/gfs_model.x + From 0fd25c0f15cc486161c149205ace8cdae0d6e20b Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 20 May 2025 21:08:00 +0000 Subject: [PATCH 046/134] use wgrib2 from container --- modulefiles/module_run.hera.lua | 27 ++++++++++++++++++++++++++ scripts/exglobal_atmos_products.sh | 21 +++++++++++++++----- scripts/exglobal_forecast.sh | 2 +- ush/jjob_header.sh | 3 +++ ush/run_wgrib2.sh | 31 ++++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 modulefiles/module_run.hera.lua create mode 100755 ush/run_wgrib2.sh diff --git a/modulefiles/module_run.hera.lua b/modulefiles/module_run.hera.lua new file mode 100644 index 00000000000..1914992ecef --- /dev/null +++ b/modulefiles/module_run.hera.lua @@ -0,0 +1,27 @@ +help([[ +Load environment to run GFS on Hera +]]) + +prepend_path("MODULEPATH", "/scratch1/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/gsi-addon-dev-fms-2024.01/install/modulefiles/Core") + +-- load(pathJoin("hpss", (os.getenv("hpss_ver") or "None"))) + +load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "2021.5.0"))) +load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "2021.5.1"))) +load(pathJoin("python", (os.getenv("python_ver") or "3.11.6"))) +load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "2.1.1"))) +load(pathJoin("py-f90nml", (os.getenv("py_f90nml_ver") or "1.4.3"))) +load(pathJoin("py-netcdf4", (os.getenv("py_netcdf4_ver") or "1.5.8"))) +load(pathJoin("py-pyyaml", (os.getenv("py_pyyaml_ver") or "6.0"))) +load(pathJoin("py-jinja2", (os.getenv("py_jinja2_ver") or "3.1.2"))) +load(pathJoin("py-pandas", (os.getenv("py_pandas_ver") or "1.5.3"))) +load(pathJoin("py-numpy", (os.getenv("py_numpy_ver") or "1.23.4"))) +load(pathJoin("py-xarray", (os.getenv("py_xarray_ver") or "2023.7.0"))) +load(pathJoin("py-python-dateutil", (os.getenv("py_python_dateutil_ver") or "2.8.2"))) + +setenv("WGRIB2","wgrib2") +setenv("WGRIB","wgrib") +-- setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) + +whatis("Description: GFS run host environment") + diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 812675b40f3..6bcb4712aa7 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -1,7 +1,11 @@ #! /usr/bin/env bash +source "${USHgfs}/load_fv3gfs_modules.sh" +module load wgrib2/2.0.8 + # Programs used -export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} +#export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} +export WGRIB2="${HOMEgfs}/bin/run_wgrib2.sh" # Scripts used INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/interp_atmos_master.sh"} @@ -45,12 +49,16 @@ MASTER_FILE="${COMIN_ATMOS_MASTER}/${PREFIX}master.grb2${fhr3}" # Get inventory from ${MASTER_FILE} that matches patterns from ${paramlista} # Extract this inventory from ${MASTER_FILE} into a smaller tmpfile or tmpfileb based on paramlista or paramlistb # shellcheck disable=SC2312 -${WGRIB2} "${MASTER_FILE}" | grep -F -f "${paramlista}" | ${WGRIB2} -i -grib "tmpfile_${fhr3}" "${MASTER_FILE}" && true +${WGRIB2} "${MASTER_FILE}" > wgrib2.log +grep -F -f "${paramlista}" wgrib2.log > grep.res +${WGRIB2} -i -grib "tmpfile_${fhr3}" "${MASTER_FILE}" < grep.res export err=$?; err_chk # Do the same as above for ${paramlistb} if [[ ${downset} -eq 2 ]]; then # shellcheck disable=SC2312 - ${WGRIB2} "${MASTER_FILE}" | grep -F -f "${paramlistb}" | ${WGRIB2} -i -grib "tmpfileb_${fhr3}" "${MASTER_FILE}" && true + ${WGRIB2} "${MASTER_FILE}" > wgrib2.log + grep -F -f "${paramlistb}" wgrib2.log > grep.res + ${WGRIB2} -i -grib "tmpfileb_${fhr3}" "${MASTER_FILE}" < grep.res export err=$?; err_chk fi @@ -83,7 +91,9 @@ for (( nset=1 ; nset <= downset ; nset++ )); do tmpfile="tmpfile${grp}_${fhr3}" # shellcheck disable=SC2312 - ncount=$(${WGRIB2} "${tmpfile}" | wc -l) + #ncount=$(${WGRIB2} "${tmpfile}" | wc -l) + ${WGRIB2} "${tmpfile}" > wgrib2.log + ncount=$(cat wgrib2.log | wc -l) if (( nproc > ncount )); then echo "WARNING: Total no. of available processors '${nproc}' exceeds no. of records '${ncount}' in ${tmpfile}" echo "Reduce nproc to ${ncount} (or less) to not waste resources" @@ -102,7 +112,8 @@ for (( nset=1 ; nset <= downset ; nset++ )); do # grep returns 1 if no match is found, so temporarily turn off exit on non-zero rc set +e # shellcheck disable=SC2312 - ${WGRIB2} -d "${last}" "${tmpfile}" | grep -E -i "ugrd|ustm|uflx|u-gwd|land|maxuw" + ${WGRIB2} -d "${last}" "${tmpfile}" > wgrib2.log + grep -E -i "ugrd|ustm|uflx|u-gwd|land|maxuw" wgrib2.log rc=$? set_strict if (( rc == 0 )); then # Matched the grep diff --git a/scripts/exglobal_forecast.sh b/scripts/exglobal_forecast.sh index 8c127a8d82b..efd7868075f 100755 --- a/scripts/exglobal_forecast.sh +++ b/scripts/exglobal_forecast.sh @@ -185,7 +185,7 @@ source "${USHgfs}/forecast_postdet.sh" # include functions for variables after r source "${USHgfs}/parsing_ufs_configure.sh" # include functions for ufs_configure processing source "${USHgfs}/atparse.bash" # include function atparse for parsing @[XYZ] templated files -#source "${USHgfs}/load_fv3gfs_modules.sh" +source "${USHgfs}/load_fv3gfs_modules.sh" err_chk diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index 0884e8db1ea..44992e80841 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -42,6 +42,9 @@ _calling_script="${BASH_SOURCE[1]}" source "${HOMEgfs}/ush/preamble.sh" +module reset +module use ${HOMEgfs}/modulefiles +module load module_run.hera OPTIND=1 while getopts "c:e:" option; do diff --git a/ush/run_wgrib2.sh b/ush/run_wgrib2.sh new file mode 100755 index 00000000000..6db8265cd44 --- /dev/null +++ b/ush/run_wgrib2.sh @@ -0,0 +1,31 @@ +#!/bin/bash +#img=/scratch2/NAGAPE/epic/Wei.Huang/src/gw-container-spack-stack-1.6.0/gw-container.sif + img=/scratch2/NAGAPE/epic/Wei.Huang/src/gw-container-spack-stack-1.6.0/gw-container + cmd=/opt/global-workflow-cloud/ush/run_wgrib2.sh + arg="$@" +#echo running: singularity exec "${img}" $cmd $arg + singularity exec ${img} $cmd $arg + +module reset +unset MACHINE_ID +export HOMEgfs=/scratch2/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud +# Find module command and purge: +source "${HOMEgfs}/ush/detect_machine.sh" +source "${HOMEgfs}/ush/module-setup.sh" + +# Source versions file for runtime +source "${HOMEgfs}/versions/run.ver" + +# Load our modules: +module use "${HOMEgfs}/modulefiles" + +case "${MACHINE_ID}" in + "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud" | "container") + module load "module_base.${MACHINE_ID}" + ;; + *) + echo "WARNING: UNKNOWN PLATFORM" + ;; +esac + +module load prod_util From c3866ac8b6c9eb30cd2650504e5ed8340e0e42bf Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 21 May 2025 16:34:14 +0000 Subject: [PATCH 047/134] using python for stage-ic --- dev/jobs/stage_ic.sh | 10 +++++----- jobs/JGLOBAL_STAGE_IC | 3 ++- ush/jjob_header.sh | 9 ++++++++- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/dev/jobs/stage_ic.sh b/dev/jobs/stage_ic.sh index d23ab42af0c..cf261fae607 100755 --- a/dev/jobs/stage_ic.sh +++ b/dev/jobs/stage_ic.sh @@ -3,11 +3,11 @@ set -x # Source FV3GFS workflow modules -. "${HOMEgfs}/ush/load_fv3gfs_modules.sh" -status=$? -if [[ "${status}" -ne 0 ]]; then - exit "${status}" -fi +# . "${HOMEgfs}/ush/load_fv3gfs_modules.sh" +# status=$? +# if [[ "${status}" -ne 0 ]]; then +# exit "${status}" +# fi export job="stage_ic" export jobid="${job}.$$" diff --git a/jobs/JGLOBAL_STAGE_IC b/jobs/JGLOBAL_STAGE_IC index dbae7f46eef..cbac3822515 100755 --- a/jobs/JGLOBAL_STAGE_IC +++ b/jobs/JGLOBAL_STAGE_IC @@ -2,8 +2,9 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "stage_ic" -c "base stage_ic" +module purge # Execute staging -"${SCRgfs}/exglobal_stage_ic.py" +${HOMEgfs}/bin/run_python.sh ${SCRgfs}/exglobal_stage_ic.py err=$? ############################################################### diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index 44992e80841..e21ff0b9dfb 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -40,12 +40,16 @@ # - $pid : Override the default process id # [default: $$] +echo "jjob_header.sh part 1" _calling_script="${BASH_SOURCE[1]}" source "${HOMEgfs}/ush/preamble.sh" -module reset + +echo "jjob_header.sh part 2" +module purge module use ${HOMEgfs}/modulefiles module load module_run.hera +echo "jjob_header.sh part 3" OPTIND=1 while getopts "c:e:" option; do case "${option}" in @@ -68,6 +72,7 @@ if [[ -z ${env_job} ]]; then err_chk "FATAL ERROR: [${BASH_SOURCE[0]}]: Must specify a job name with -e" fi +echo "jjob_header.sh part 4" ############################################## # make temp directory ############################################## @@ -93,6 +98,7 @@ export pgmerr=errfile export pgm=${pgm:-} +echo "jjob_header.sh part 5" ############################################## # Run setpdy and initialize PDY variables ############################################## @@ -114,6 +120,7 @@ for config in "${configs[@]:-''}"; do done +echo "jjob_header.sh part 6" ########################################## # Source machine runtime environment ########################################## From 4773dcc770e3071f6c16134d01223b88d6077bc1 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 21 May 2025 22:56:26 +0000 Subject: [PATCH 048/134] ursa changes --- c48atm.sh | 16 +++++++++++++++ modulefiles/module_base.ursa.lua | 30 +++++++++++++++++++++++++++++ modulefiles/module_gwci.ursa.lua | 12 ++++++++++++ modulefiles/module_gwsetup.ursa.lua | 13 +++++++++++++ modulefiles/module_run.ursa.lua | 19 ++++++++++++++++++ sorc/link_workflow.sh | 1 + ush/detect_machine.sh | 6 ++++++ versions/build.ursa.ver | 5 +++++ versions/run.ursa.ver | 9 +++++++++ 9 files changed, 111 insertions(+) create mode 100755 c48atm.sh create mode 100644 modulefiles/module_base.ursa.lua create mode 100644 modulefiles/module_gwci.ursa.lua create mode 100644 modulefiles/module_gwsetup.ursa.lua create mode 100644 modulefiles/module_run.ursa.lua create mode 100644 versions/build.ursa.ver create mode 100644 versions/run.ursa.ver diff --git a/c48atm.sh b/c48atm.sh new file mode 100755 index 00000000000..f754586cc36 --- /dev/null +++ b/c48atm.sh @@ -0,0 +1,16 @@ +#!/bin/bash + + set -x + + rundir=/scratch4/NAGAPE/epic/Wei.Huang/run + mkdir -p ${rundir} + + source ~/.bashrc + source dev/ush/gw_setup.sh + + HPC_ACCOUNT=epic \ + pslot=c48atm \ + RUNTESTS=${rundir} \ + dev/workflow/create_experiment.py \ + --yaml dev/ci/cases/pr/C48_ATM.yaml + diff --git a/modulefiles/module_base.ursa.lua b/modulefiles/module_base.ursa.lua new file mode 100644 index 00000000000..655c0725219 --- /dev/null +++ b/modulefiles/module_base.ursa.lua @@ -0,0 +1,30 @@ +help([[ +Load environment to run GFS on Ursa +]]) + +local spack_mod_path=(os.getenv("spack_mod_path") or "None") +prepend_path("MODULEPATH", spack_mod_path) + +-- load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) +-- load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "None"))) +-- load(pathJoin("python", (os.getenv("python_ver") or "None"))) + +load(pathJoin("hpss", (os.getenv("hpss_ver") or "None"))) +load(pathJoin("gempak", (os.getenv("gempak_ver") or "None"))) +load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "None"))) + +setenv("WGRIB2","wgrib2") + +-- Stop gap fix for wgrib with spack-stack 1.6.0 +-- TODO Remove this when spack-stack issue #1097 is resolved +-- setenv("WGRIB","wgrib") +-- setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) + +-- prepend_path("MODULEPATH", pathJoin("/scratch1/NCEPDEV/global/glopara/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) +-- load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) + +-- prepend_path("MODULEPATH", pathJoin("/scratch1/NCEPDEV/global/glopara/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) +-- load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) + +whatis("Description: GFS run environment") + diff --git a/modulefiles/module_gwci.ursa.lua b/modulefiles/module_gwci.ursa.lua new file mode 100644 index 00000000000..4cd5a4d3c7a --- /dev/null +++ b/modulefiles/module_gwci.ursa.lua @@ -0,0 +1,12 @@ +help([[ +Load environment to run GFS workflow setup scripts on Ursa +]]) + +prepend_path("MODULEPATH", "/scratch3/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/gsi-addon-dev-rocky8/install/modulefiles/Core") + +load(pathJoin("stack-intel", os.getenv("2021.5.0"))) +load(pathJoin("stack-intel-oneapi-mpi", os.getenv("2021.5.1"))) + +-- load(pathJoin("wgrib2", "2.0.8")) + +whatis("Description: GFS run setup CI environment") diff --git a/modulefiles/module_gwsetup.ursa.lua b/modulefiles/module_gwsetup.ursa.lua new file mode 100644 index 00000000000..f22b6dd94c8 --- /dev/null +++ b/modulefiles/module_gwsetup.ursa.lua @@ -0,0 +1,13 @@ +help([[ +Load environment to run GFS workflow setup scripts on Ursa +]]) + +load(pathJoin("rocoto")) + +-- prepend_path("MODULEPATH", "/scratch1/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/gsi-addon-dev-rocky8/install/modulefiles/Core") + +-- local stack_intel_ver=os.getenv("stack_intel_ver") or "2021.5.0" + +-- load(pathJoin("stack-intel", stack_intel_ver)) + +whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_run.ursa.lua b/modulefiles/module_run.ursa.lua new file mode 100644 index 00000000000..3cc54a308d6 --- /dev/null +++ b/modulefiles/module_run.ursa.lua @@ -0,0 +1,19 @@ +help([[ +Load environment to run GFS on Ursa +]]) + +prepend_path("MODULEPATH", "/scratch3/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/gsi-addon-dev-fms-2024.01/install/modulefiles/Core") + +-- load(pathJoin("hpss", (os.getenv("hpss_ver") or "None"))) + +load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "2021.5.0"))) +load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "2021.5.1"))) +load(pathJoin("python", (os.getenv("python_ver") or "3.11.6"))) +load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "2.1.1"))) + +setenv("WGRIB2","wgrib2") +setenv("WGRIB","wgrib") +-- setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) + +whatis("Description: GFS run host environment") + diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index d526191f646..26c6bee125a 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -72,6 +72,7 @@ ${LINK_OR_COPY} "${HOMEgfs}/versions/run.${machine}.ver" "${HOMEgfs}/versions/ru case "${machine}" in "wcoss2") FIX_DIR="/lfs/h2/emc/global/noscrub/emc.global/FIX/fix" ;; "hera") FIX_DIR="/scratch1/NCEPDEV/global/glopara/fix" ;; +"ursa") FIX_DIR="/scratch3/NCEPDEV/global/role.glopara/fix" ;; "orion") FIX_DIR="/work2/noaa/global/role-global/fix" ;; "hercules") FIX_DIR="/work2/noaa/global/role-global/fix" ;; "gaeac5") FIX_DIR="/gpfs/f5/ufs-ard/world-shared/global/glopara/data/fix" ;; diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index 5db20325d35..1be6071fdd5 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -39,6 +39,9 @@ case $(hostname -f) in hfe1[0-2]) MACHINE_ID=hera ;; ### hera10-12 hecflow01) MACHINE_ID=hera ;; ### heraecflow01 + ufe0[1-9]) MACHINE_ID=ursa ;; ### ursa01-09 + ufe1[0-2]) MACHINE_ID=ursa ;; ### ursa10-12 + s4-submit.ssec.wisc.edu) MACHINE_ID=s4 ;; ### s4 fe[1-8]) MACHINE_ID=jet ;; ### jet01-8 @@ -84,6 +87,9 @@ elif [[ -d /mnt/lfs5 ]]; then elif [[ -d /scratch1 ]]; then # We are on NOAA Hera MACHINE_ID=hera +elif [[ -d /scratch3 ]]; then + # We are on NOAA Ursa + MACHINE_ID=ursa elif [[ -d /work ]]; then # We are on MSU Orion or Hercules mount=$(findmnt -n -o SOURCE /home) diff --git a/versions/build.ursa.ver b/versions/build.ursa.ver new file mode 100644 index 00000000000..efe60dbfc10 --- /dev/null +++ b/versions/build.ursa.ver @@ -0,0 +1,5 @@ +export stack_intel_ver=2021.5.0 +export stack_impi_ver=2021.5.1 +export spack_env=gsi-addon-dev-fms-2024.01 +source "${HOMEgfs:-}/versions/spack.ver" +export spack_mod_path="/scratch3/NCEPDEV/nems/role.epic/spack-stack/spack-stack-${spack_stack_ver}/envs/${spack_env}/install/modulefiles/Core" diff --git a/versions/run.ursa.ver b/versions/run.ursa.ver new file mode 100644 index 00000000000..04e7e348354 --- /dev/null +++ b/versions/run.ursa.ver @@ -0,0 +1,9 @@ +export stack_intel_ver=2021.5.0 +export stack_impi_ver=2021.5.1 +export spack_env=gsi-addon-dev-fms-2024.01 + +export hpss_ver=hpss + +source "${HOMEgfs:-}/versions/spack.ver" +export spack_mod_path="/scratch3/NCEPDEV/nems/role.epic/spack-stack/spack-stack-${spack_stack_ver}/envs/${spack_env}/install/modulefiles/Core" + From 8d226eade63f9f463049cdfdae2220b657062a13 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 22 May 2025 14:04:26 +0000 Subject: [PATCH 049/134] for ursa --- bin/run_python.sh | 9 ++++ bin/run_wgrib2.sh | 9 ++++ c48atm.sh | 3 +- dev/parm/config/gfs/config.resources | 6 +++ dev/parm/config/gfs/config.resources.URSA | 64 +++++++++++++++++++++++ dev/workflow/hosts.py | 26 ++++++--- dev/workflow/hosts/ursa.yaml | 34 ++++++++++++ dev/workflow/rocoto/tasks.py | 3 +- dev/workflow/rocoto/workflow_xml.py | 6 ++- ush/jjob_header.sh | 11 +--- ush/run_python.sh | 28 ++++++++++ 11 files changed, 180 insertions(+), 19 deletions(-) create mode 100755 bin/run_python.sh create mode 100755 bin/run_wgrib2.sh create mode 100644 dev/parm/config/gfs/config.resources.URSA create mode 100644 dev/workflow/hosts/ursa.yaml create mode 100644 ush/run_python.sh diff --git a/bin/run_python.sh b/bin/run_python.sh new file mode 100755 index 00000000000..29d0fdf0265 --- /dev/null +++ b/bin/run_python.sh @@ -0,0 +1,9 @@ +#!/bin/bash + img=/scratch4/NAGAPE/epic/Wei.Huang/src/container/gw-container + cmd=/opt/global-workflow-cloud/ush/run_python.sh + arg="$@" + echo "running: singularity exec ${img} $cmd $arg" + singularity exec \ + -B /scratch4 \ + ${img} $cmd $arg + diff --git a/bin/run_wgrib2.sh b/bin/run_wgrib2.sh new file mode 100755 index 00000000000..950a2d4fded --- /dev/null +++ b/bin/run_wgrib2.sh @@ -0,0 +1,9 @@ +#!/bin/bash + img=/scratch4/NAGAPE/epic/Wei.Huang/src/container/gw-container + cmd=/opt/global-workflow-cloud/ush/run_wgrib2.sh + arg="$@" + echo "running: singularity exec ${img} $cmd $arg" + singularity exec \ + -B /scratch4 \ + ${img} $cmd $arg + diff --git a/c48atm.sh b/c48atm.sh index f754586cc36..566ebc57ef4 100755 --- a/c48atm.sh +++ b/c48atm.sh @@ -11,6 +11,7 @@ HPC_ACCOUNT=epic \ pslot=c48atm \ RUNTESTS=${rundir} \ - dev/workflow/create_experiment.py \ + RUNDIR=/scratch4/NAGAPE/epic/Wei.Huang \ + bin/run_python.sh dev/workflow/create_experiment.py \ --yaml dev/ci/cases/pr/C48_ATM.yaml diff --git a/dev/parm/config/gfs/config.resources b/dev/parm/config/gfs/config.resources index b497991b9f6..966fafef1a0 100644 --- a/dev/parm/config/gfs/config.resources +++ b/dev/parm/config/gfs/config.resources @@ -47,6 +47,12 @@ case ${machine} in # shellcheck disable=SC2034 mem_node_max="96GB" ;; + "URSA") + export PARTITION_BATCH="u1-compute" + max_tasks_per_node=96 + # shellcheck disable=SC2034 + mem_node_max="96GB" + ;; "GAEAC5") max_tasks_per_node=128 # shellcheck disable=SC2034 diff --git a/dev/parm/config/gfs/config.resources.URSA b/dev/parm/config/gfs/config.resources.URSA new file mode 100644 index 00000000000..2c728761699 --- /dev/null +++ b/dev/parm/config/gfs/config.resources.URSA @@ -0,0 +1,64 @@ +#! /usr/bin/env bash + +# Ursa-specific job resources + +case ${step} in + "prep") + # Run on 7 nodes for memory requirement + tasks_per_node=2 + ;; + + "stage_ic") + tasks_per_node=1 + export PARTITION_BATCH="u1-service" + ;; + + "anal") + if [[ "${CASE}" == "C384" ]]; then + export ntasks=270 + export threads_per_task=8 + export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) + fi + ;; + + "atmanlvar" | "atmensanlobs" | "atmensanlsol") + export tasks_per_node=12 + ;; + + "eupd") + case "${CASE}" in + "C1152" | "C768") + export ntasks=80 + export threads_per_task=20 + ;; + "C384") + export ntasks=80 + ;; + "C192" | "C96" | "C48") + export threads_per_task=4 + ;; + *) + ;; + esac + export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) + ;; + + "ecen") + if [[ "${CASE}" == "C768" || "${CASE}" == "C1152" ]]; then export threads_per_task=6; fi + export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) + ;; + + "upp") + if (( "${CASE:1}" >= 768 )); then + # Run fewer tasks per node for memory + tasks_per_node=20 + fi + ;; + + "epos") + tasks_per_node=20 + ;; + + *) + ;; +esac diff --git a/dev/workflow/hosts.py b/dev/workflow/hosts.py index 91d711b7fe7..793b8b877d9 100644 --- a/dev/workflow/hosts.py +++ b/dev/workflow/hosts.py @@ -15,11 +15,12 @@ class Host: Gather Host specific information. """ - SUPPORTED_HOSTS = ['HERA', 'ORION', 'HERCULES', 'WCOSS2', 'CONTAINER', + SUPPORTED_HOSTS = ['HERA', 'ORION', 'HERCULES', 'WCOSS2', 'URSA', 'GAEAC5', 'GAEAC6', 'AWSPW', 'AZUREPW', 'GOOGLEPW'] - def __init__(self, host=None): + def __init__(self, host='URSA'): + print(f'host: {host}') if host is not None and host not in Host.SUPPORTED_HOSTS: raise NotImplementedError(f'{host} is not a supported host.\n' + 'Currently supported hosts are:\n' + @@ -43,8 +44,11 @@ def detect(self) -> None: # Detect the machine name and store in self.machine machine_id = os.getenv('MACHINE_ID', 'UNKNOWN') + machine_id = 'URSA' pw_csp = os.getenv('PW_CSP', 'UNKNOWN') - container = os.getenv('SINGULARITY_NAME', None) + #container = os.getenv('SINGULARITY_NAME', None) + + print(f'machine_id: {machine_id}') # Detect the machine since MACHINE_ID is set, # Additionaly, if PW_CSP is set, then the machine is a cloud machine @@ -53,9 +57,14 @@ def detect(self) -> None: self.machine = f"{pw_csp.upper()}PW" return + print("os.path.exists('/scratch3/NCEPDEV'): ", os.path.exists('/scratch3/NCEPDEV')) # Detect the machine since MACHINE_ID is not set - if os.path.exists('/scratch1/NCEPDEV'): + if os.path.exists('/scratch3/NCEPDEV'): + self.machine = 'URSA' + machine_id = 'URSA' + elif os.path.exists('/scratch1/NCEPDEV'): self.machine = 'HERA' + machine_id = 'HERA' elif os.path.exists('/work/noaa'): self.machine = socket.gethostname().split("-", 1)[0].upper() elif os.path.exists('/lfs/f1'): @@ -64,14 +73,19 @@ def detect(self) -> None: self.machine = 'GAEAC5' elif os.path.exists('/gpfs/f6'): self.machine = 'GAEAC6' - elif container is not None: - self.machine = 'CONTAINER' + #elif container is not None: + # self.machine = 'CONTAINER' elif pw_csp is not None: if pw_csp.lower() not in ['azure', 'aws', 'google']: raise ValueError( f'cloud service provider "{pw_csp}" is not supported.') self.machine = f"{pw_csp.upper()}PW" + print(f'self.machine: {self.machine}') + print('Host.SUPPORTED_HOSTS: ', Host.SUPPORTED_HOSTS) + self.machine = 'URSA' + machine_id = 'URSA' + if self.machine not in Host.SUPPORTED_HOSTS: raise NotImplementedError('This machine is not a supported host.\n' + 'Currently supported hosts are:\n' + diff --git a/dev/workflow/hosts/ursa.yaml b/dev/workflow/hosts/ursa.yaml new file mode 100644 index 00000000000..cda4c94d6c9 --- /dev/null +++ b/dev/workflow/hosts/ursa.yaml @@ -0,0 +1,34 @@ +# Paths +DMPDIR: '/scratch3/NCEPDEV/global/role.glopara/dump' +BASE_GIT: '/scratch3/NCEPDEV/global/role.glopara/git' +BASE_DATA: '/scratch3/NCEPDEV/global/role.glopara/data' +#BASE_IC: '/scratch3/NCEPDEV/global/role.glopara/data/ICSDIR' +BASE_IC: '/scratch4/NAGAPE/epic/Wei.Huang/data/ICSDIR' +AERO_INPUTS_DIR: /scratch3/NCEPDEV/global/role.glopara/data/GEFS_ExtData/20250310 +PACKAGEROOT: '/scratch3/NCEPDEV/global/role.glopara/nwpara' +HOMEDIR: '/scratch3/NCEPDEV/global/role.glopara${USER}' +STMP: '/scratch4/NAGAPE/epic/Wei.Huang/run/stmp2' +PTMP: '/scratch4/NAGAPE/epic/Wei.Huang/run/stmp2' +NOSCRUB: '${HOMEDIR}' +COMINsyn: '/scratch3/NCEPDEV/global/role.glopara/com/gfs/prod/syndat' +COMINecmwf: /scratch3/NCEPDEV/global/role.glopara/data/external_gempak/ecmwf +COMINnam: /scratch3/NCEPDEV/global/role.glopara/data/external_gempak/nam +COMINukmet: /scratch3/NCEPDEV/global/role.glopara/data/external_gempak/ukmet +# BQS properties +SCHEDULER: slurm +QUEUE: batch +PARTITION_BATCH: u1-compute +PARTITION_SERVICE: u1-service +# HPSS properties +HPSS_PROJECT: emc-global +ARCHCOM_TO: 'hpss' +ATARDIR: '/NCEPDEV/${HPSS_PROJECT}/1year/${USER}/${machine}/scratch/${PSLOT}' +CHGRP_RSTPROD: 'YES' +CHGRP_CMD: 'chgrp rstprod' +# Features +SUPPORTED_RESOLUTIONS: ['C1152', 'C768', 'C384', 'C192', 'C96', 'C48'] +DO_ARCHCOM: 'YES' +DO_AWIPS: 'NO' +KEEPDATA: 'YES' +MAKE_NSSTBUFR: 'NO' +MAKE_ACFTBUFR: 'NO' diff --git a/dev/workflow/rocoto/tasks.py b/dev/workflow/rocoto/tasks.py index 1208654b589..91633fda35a 100644 --- a/dev/workflow/rocoto/tasks.py +++ b/dev/workflow/rocoto/tasks.py @@ -52,7 +52,8 @@ def __init__(self, app_config: AppConfig, run: str) -> None: # Save base in the internal state (never know where it may be needed) self._base = self._configs['base'] - self.HOMEgfs = self._base['HOMEgfs'] + #self.HOMEgfs = self._base['HOMEgfs'] + self.HOMEgfs = '/scratch4/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud' self.rotdir = self._base['ROTDIR'] self.pslot = self._base['PSLOT'] if self.run == "enkfgfs": diff --git a/dev/workflow/rocoto/workflow_xml.py b/dev/workflow/rocoto/workflow_xml.py index d84f02c5b45..f5820a47ae6 100644 --- a/dev/workflow/rocoto/workflow_xml.py +++ b/dev/workflow/rocoto/workflow_xml.py @@ -34,7 +34,8 @@ def __init__(self, app_config: AppConfig, rocoto_config: Dict) -> None: self.use_scrontab = self.host_info.get("USE_SCRONTAB", False) # Add ACCOUNT to host_info, with that from config.base self.host_info.ACCOUNT = self._base['ACCOUNT'] - self.HOMEgfs = self._base['HOMEgfs'] + #self.HOMEgfs = self._base['HOMEgfs'] + self.HOMEgfs = '/scratch4/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud' self.expdir = self._base['EXPDIR'] self.pslot = self._base['PSLOT'] @@ -157,7 +158,8 @@ def _write_crontab(self, crontab_file: str = None, cronint: int = 5) -> None: """ # No point creating a crontab if rocotorun is not available. - rocotoruncmd = find_executable('rocotorun') + #rocotoruncmd = find_executable('rocotorun') + rocotoruncmd = '/apps/rocoto/1.3.7/bin/rocotorun' if rocotoruncmd is None: print('Failed to find rocotorun, crontab will not be created') return diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index e21ff0b9dfb..1582848adff 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -40,16 +40,12 @@ # - $pid : Override the default process id # [default: $$] -echo "jjob_header.sh part 1" _calling_script="${BASH_SOURCE[1]}" source "${HOMEgfs}/ush/preamble.sh" -echo "jjob_header.sh part 2" -module purge -module use ${HOMEgfs}/modulefiles -module load module_run.hera +export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH +export LD_LIBRARY_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2023.2.0-uov33rpz3lplh3hh3v5c6vssbc7ndxuk/lib -echo "jjob_header.sh part 3" OPTIND=1 while getopts "c:e:" option; do case "${option}" in @@ -72,7 +68,6 @@ if [[ -z ${env_job} ]]; then err_chk "FATAL ERROR: [${BASH_SOURCE[0]}]: Must specify a job name with -e" fi -echo "jjob_header.sh part 4" ############################################## # make temp directory ############################################## @@ -98,7 +93,6 @@ export pgmerr=errfile export pgm=${pgm:-} -echo "jjob_header.sh part 5" ############################################## # Run setpdy and initialize PDY variables ############################################## @@ -120,7 +114,6 @@ for config in "${configs[@]:-''}"; do done -echo "jjob_header.sh part 6" ########################################## # Source machine runtime environment ########################################## diff --git a/ush/run_python.sh b/ush/run_python.sh new file mode 100644 index 00000000000..2838560b0a7 --- /dev/null +++ b/ush/run_python.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +export FPATH=/usr/lmod/lmod/libexec + +export HOMEgfs=/opt/global-workflow-cloud +source /usr/lmod/lmod/init/bash +module purge +source ${HOMEgfs}/versions/run.ver +module use ${HOMEgfs}/modulefiles +module load module_base.container + +#module load python/3.10.13 +#module load py-f90nml/1.4.3 +#module load py-netcdf4/1.5.8 +#module load py-pyyaml/6.0 +#module load py-jinja2/3.1.2 +#module load py-pandas/1.5.3 +#module load py-numpy/1.22.3 +#module load py-xarray/2023.7.0 +#module load py-python-dateutil/2.8.2 + +wxflowPATH="${HOMEgfs}/ush/python" +export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" + +arg="$@" + +/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/python-3.10.13-h3oyipv/bin/python $arg + From b57bdb6ed42510dbc7267e28d21d8c4cea9038dc Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 28 May 2025 20:40:04 +0000 Subject: [PATCH 050/134] create bin dir --- bin/run_python.sh | 6 ++++-- bin/run_wgrib2.sh | 4 +++- c48atm.sh | 7 ++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/bin/run_python.sh b/bin/run_python.sh index 29d0fdf0265..72d43fa67f8 100755 --- a/bin/run_python.sh +++ b/bin/run_python.sh @@ -1,9 +1,11 @@ #!/bin/bash - img=/scratch4/NAGAPE/epic/Wei.Huang/src/container/gw-container +#containerdir=/gpfs/f6/scratch/Wei.Huang/container + containerdir=/contrib/Wei.Huang/src/gw-container-spack-stack-1.6.0 + img=${containerdir}/wei-gw-container cmd=/opt/global-workflow-cloud/ush/run_python.sh arg="$@" echo "running: singularity exec ${img} $cmd $arg" singularity exec \ - -B /scratch4 \ + -B /gpfs/f6/scratch/Wei.Huang \ ${img} $cmd $arg diff --git a/bin/run_wgrib2.sh b/bin/run_wgrib2.sh index 950a2d4fded..6e45c5d5f37 100755 --- a/bin/run_wgrib2.sh +++ b/bin/run_wgrib2.sh @@ -1,5 +1,7 @@ #!/bin/bash - img=/scratch4/NAGAPE/epic/Wei.Huang/src/container/gw-container +#img=/scratch4/NAGAPE/epic/Wei.Huang/src/container/gw-container + containerdir=/contrib/Wei.Huang/src/gw-container-spack-stack-1.6.0 + img=${containerdir}/wei-gw-container cmd=/opt/global-workflow-cloud/ush/run_wgrib2.sh arg="$@" echo "running: singularity exec ${img} $cmd $arg" diff --git a/c48atm.sh b/c48atm.sh index 566ebc57ef4..a2247557757 100755 --- a/c48atm.sh +++ b/c48atm.sh @@ -2,16 +2,17 @@ set -x - rundir=/scratch4/NAGAPE/epic/Wei.Huang/run + rundir=/gpfs/f6/scratch/Wei.Huang/run mkdir -p ${rundir} source ~/.bashrc source dev/ush/gw_setup.sh + source ~/prod_util.env - HPC_ACCOUNT=epic \ + HPC_ACCOUNT=bil-fire8 \ pslot=c48atm \ RUNTESTS=${rundir} \ - RUNDIR=/scratch4/NAGAPE/epic/Wei.Huang \ + RUNDIR=/gpfs/f6/scratch/Wei.Huang/run \ bin/run_python.sh dev/workflow/create_experiment.py \ --yaml dev/ci/cases/pr/C48_ATM.yaml From 856fb2b5dd70929d5af90bd9d5d9e7eb15260ebf Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 29 May 2025 17:25:07 +0000 Subject: [PATCH 051/134] add run_python and run_wgrib2 to use python and wgrib2 from container --- bin/run_python.sh | 3 ++- bin/run_wgrib2.sh | 3 ++- c48atm.sh | 7 ++++--- dev/workflow/hosts/awspw.yaml | 1 + env/AWSPW.env | 2 +- ush/run_python.sh | 0 6 files changed, 10 insertions(+), 6 deletions(-) mode change 100644 => 100755 ush/run_python.sh diff --git a/bin/run_python.sh b/bin/run_python.sh index 72d43fa67f8..8d576936707 100755 --- a/bin/run_python.sh +++ b/bin/run_python.sh @@ -6,6 +6,7 @@ arg="$@" echo "running: singularity exec ${img} $cmd $arg" singularity exec \ - -B /gpfs/f6/scratch/Wei.Huang \ + -B /contrib \ + -B /lustre \ ${img} $cmd $arg diff --git a/bin/run_wgrib2.sh b/bin/run_wgrib2.sh index 6e45c5d5f37..d39d9bab67e 100755 --- a/bin/run_wgrib2.sh +++ b/bin/run_wgrib2.sh @@ -6,6 +6,7 @@ arg="$@" echo "running: singularity exec ${img} $cmd $arg" singularity exec \ - -B /scratch4 \ + -B /contrib \ + -B /lustre \ ${img} $cmd $arg diff --git a/c48atm.sh b/c48atm.sh index a2247557757..684edf697e2 100755 --- a/c48atm.sh +++ b/c48atm.sh @@ -2,17 +2,18 @@ set -x - rundir=/gpfs/f6/scratch/Wei.Huang/run + rundir=/lustre/Wei.Huang/run mkdir -p ${rundir} source ~/.bashrc source dev/ush/gw_setup.sh source ~/prod_util.env - HPC_ACCOUNT=bil-fire8 \ + HPC_ACCOUNT=Wei.Huang \ pslot=c48atm \ RUNTESTS=${rundir} \ - RUNDIR=/gpfs/f6/scratch/Wei.Huang/run \ + RUNDIR=/lustre/Wei.Huang/run/RUNDIR \ + HOMEDIR=/lustre/Wei.Huang \ bin/run_python.sh dev/workflow/create_experiment.py \ --yaml dev/ci/cases/pr/C48_ATM.yaml diff --git a/dev/workflow/hosts/awspw.yaml b/dev/workflow/hosts/awspw.yaml index e0b1aac36fb..c7b8eee0a13 100644 --- a/dev/workflow/hosts/awspw.yaml +++ b/dev/workflow/hosts/awspw.yaml @@ -27,5 +27,6 @@ DO_ARCHCOM: 'NO' DO_TRACKER: 'NO' DO_GENESIS: 'NO' DO_METP: 'NO' +KEEPDATA: 'YES' MAKE_NSSTBUFR: 'YES' MAKE_ACFTBUFR: 'NO' diff --git a/env/AWSPW.env b/env/AWSPW.env index 049b67439b2..ec7abb93ffe 100755 --- a/env/AWSPW.env +++ b/env/AWSPW.env @@ -61,7 +61,7 @@ elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step} elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then - export launcher="srun --mpi=pmi2 -l" + export launcher="srun --mpi=pmi2" (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) diff --git a/ush/run_python.sh b/ush/run_python.sh old mode 100644 new mode 100755 From dd257aa67ab663d5167b78ddb34a31bbcc152650 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 30 May 2025 03:36:27 +0000 Subject: [PATCH 052/134] stage-ic ok on AWS --- dev/parm/config/gfs/config.base.j2 | 3 +++ dev/parm/config/gfs/config.resources | 3 +++ dev/workflow/hosts.py | 26 +++++---------------- dev/workflow/rocoto/tasks.py | 3 ++- dev/workflow/rocoto/workflow_xml.py | 3 ++- dev/workflow/setup_expt.py | 12 ++++++++++ dev/workflow/setup_xml.py | 6 ++++- ush/jjob_header.sh | 3 ++- ush/run_wgrib2.sh | 35 ++++++++++++++-------------- 9 files changed, 53 insertions(+), 41 deletions(-) diff --git a/dev/parm/config/gfs/config.base.j2 b/dev/parm/config/gfs/config.base.j2 index 7394ed3af59..2c4a9762b76 100644 --- a/dev/parm/config/gfs/config.base.j2 +++ b/dev/parm/config/gfs/config.base.j2 @@ -8,6 +8,9 @@ echo "BEGIN: config.base" # Machine environment export machine="{{ MACHINE }}" +echo "%0 step: ${step}" +echo "%0 machine: ${machine}" + # EMC parallel or NCO production export RUN_ENVIR="emc" diff --git a/dev/parm/config/gfs/config.resources b/dev/parm/config/gfs/config.resources index 7c29a5da50f..4f96f1e3b01 100644 --- a/dev/parm/config/gfs/config.resources +++ b/dev/parm/config/gfs/config.resources @@ -35,6 +35,9 @@ step=$1 echo "BEGIN: config.resources" +echo "%0 step: ${step}" +echo "%0 machine: ${machine}" + case ${machine} in "WCOSS2") max_tasks_per_node=128 diff --git a/dev/workflow/hosts.py b/dev/workflow/hosts.py index 793b8b877d9..91d711b7fe7 100644 --- a/dev/workflow/hosts.py +++ b/dev/workflow/hosts.py @@ -15,12 +15,11 @@ class Host: Gather Host specific information. """ - SUPPORTED_HOSTS = ['HERA', 'ORION', 'HERCULES', 'WCOSS2', 'URSA', + SUPPORTED_HOSTS = ['HERA', 'ORION', 'HERCULES', 'WCOSS2', 'CONTAINER', 'GAEAC5', 'GAEAC6', 'AWSPW', 'AZUREPW', 'GOOGLEPW'] - def __init__(self, host='URSA'): + def __init__(self, host=None): - print(f'host: {host}') if host is not None and host not in Host.SUPPORTED_HOSTS: raise NotImplementedError(f'{host} is not a supported host.\n' + 'Currently supported hosts are:\n' + @@ -44,11 +43,8 @@ def detect(self) -> None: # Detect the machine name and store in self.machine machine_id = os.getenv('MACHINE_ID', 'UNKNOWN') - machine_id = 'URSA' pw_csp = os.getenv('PW_CSP', 'UNKNOWN') - #container = os.getenv('SINGULARITY_NAME', None) - - print(f'machine_id: {machine_id}') + container = os.getenv('SINGULARITY_NAME', None) # Detect the machine since MACHINE_ID is set, # Additionaly, if PW_CSP is set, then the machine is a cloud machine @@ -57,14 +53,9 @@ def detect(self) -> None: self.machine = f"{pw_csp.upper()}PW" return - print("os.path.exists('/scratch3/NCEPDEV'): ", os.path.exists('/scratch3/NCEPDEV')) # Detect the machine since MACHINE_ID is not set - if os.path.exists('/scratch3/NCEPDEV'): - self.machine = 'URSA' - machine_id = 'URSA' - elif os.path.exists('/scratch1/NCEPDEV'): + if os.path.exists('/scratch1/NCEPDEV'): self.machine = 'HERA' - machine_id = 'HERA' elif os.path.exists('/work/noaa'): self.machine = socket.gethostname().split("-", 1)[0].upper() elif os.path.exists('/lfs/f1'): @@ -73,19 +64,14 @@ def detect(self) -> None: self.machine = 'GAEAC5' elif os.path.exists('/gpfs/f6'): self.machine = 'GAEAC6' - #elif container is not None: - # self.machine = 'CONTAINER' + elif container is not None: + self.machine = 'CONTAINER' elif pw_csp is not None: if pw_csp.lower() not in ['azure', 'aws', 'google']: raise ValueError( f'cloud service provider "{pw_csp}" is not supported.') self.machine = f"{pw_csp.upper()}PW" - print(f'self.machine: {self.machine}') - print('Host.SUPPORTED_HOSTS: ', Host.SUPPORTED_HOSTS) - self.machine = 'URSA' - machine_id = 'URSA' - if self.machine not in Host.SUPPORTED_HOSTS: raise NotImplementedError('This machine is not a supported host.\n' + 'Currently supported hosts are:\n' + diff --git a/dev/workflow/rocoto/tasks.py b/dev/workflow/rocoto/tasks.py index e3490dc0b9a..0b43eee6afc 100644 --- a/dev/workflow/rocoto/tasks.py +++ b/dev/workflow/rocoto/tasks.py @@ -53,7 +53,8 @@ def __init__(self, app_config: AppConfig, run: str) -> None: self._base = self._configs['base'] #self.HOMEgfs = self._base['HOMEgfs'] - self.HOMEgfs = '/scratch4/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud' + #self.HOMEgfs = '/scratch4/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud' + self.HOMEgfs = '/contrib/Wei.Huang/src/global-workflow-cloud' self.rotdir = self._base['ROTDIR'] self.pslot = self._base['PSLOT'] if self.run == "enkfgfs": diff --git a/dev/workflow/rocoto/workflow_xml.py b/dev/workflow/rocoto/workflow_xml.py index f5820a47ae6..9d899c27aa8 100644 --- a/dev/workflow/rocoto/workflow_xml.py +++ b/dev/workflow/rocoto/workflow_xml.py @@ -35,7 +35,8 @@ def __init__(self, app_config: AppConfig, rocoto_config: Dict) -> None: # Add ACCOUNT to host_info, with that from config.base self.host_info.ACCOUNT = self._base['ACCOUNT'] #self.HOMEgfs = self._base['HOMEgfs'] - self.HOMEgfs = '/scratch4/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud' + #self.HOMEgfs = '/scratch4/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud' + self.HOMEgfs = '/contrib/Wei.Huang/src/global-workflow-cloud' self.expdir = self._base['EXPDIR'] self.pslot = self._base['PSLOT'] diff --git a/dev/workflow/setup_expt.py b/dev/workflow/setup_expt.py index 538ae44c33a..623d1ec3110 100755 --- a/dev/workflow/setup_expt.py +++ b/dev/workflow/setup_expt.py @@ -22,6 +22,13 @@ _here = os.path.dirname(__file__) _top = os.path.abspath(os.path.join(os.path.abspath(_here), '../..')) +print('__file__:', __file__) +print('_here:', _here) +print('_top:', _top) + +current_dir = os.getcwd() +print('current_dir:', current_dir) + # Setup the logger logger = getLogger(__name__) @@ -83,13 +90,18 @@ def _update_defaults(dict_in: dict) -> dict: # Combine host.info and inputs_dict into a single dict, add some additional keys host_plus_inputs_dict = AttrDict(host.info, **inputs_dict_remapped) + host_plus_inputs_dict.HOMEgfs = _top host_plus_inputs_dict.MACHINE = str(host).upper() # Read in the YAML file yaml_path = inputs.yaml + + if yaml_path.find('/opt/global-workflow-cloud') >= 0: + yaml_path = yaml_path.replace('/opt/global-workflow-cloud', host_plus_inputs_dict.HOMEgfs) if not os.path.exists(yaml_path): raise FileNotFoundError(f'YAML file does not exist, check path: {yaml_path}') + yaml_dict = parse_j2yaml(yaml_path, host_plus_inputs_dict) # yaml_dict is in the form {defaults: {key1: val1, ...}, base: {key1: val1, ...}, ...} diff --git a/dev/workflow/setup_xml.py b/dev/workflow/setup_xml.py index ad1be5c4db4..574936d2779 100755 --- a/dev/workflow/setup_xml.py +++ b/dev/workflow/setup_xml.py @@ -97,9 +97,13 @@ def main(*argv): check_expdir(user_inputs.expdir, base['EXPDIR']) + print('user_inputs: ', user_inputs) + # Check if "HOMEDIR","STMP","PTMP" dirrctories are writable - dir_keys = ["HOMEDIR", "STMP", "PTMP"] + #dir_keys = ["HOMEDIR", "STMP", "PTMP"] + dir_keys = ["STMP", "PTMP"] for dk in dir_keys: + print(f'base[{dk}]: {base[dk]}') check_dir_writable(base[dk]) if not check_dir_writable(base[dk]): msg = f'The {dk} path {base[dk]} cannot be written to! Please correct this path and try again.' diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index 23aa861654c..bd88113cfc1 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -43,7 +43,8 @@ _calling_script="${BASH_SOURCE[1]}" source "${HOMEgfs}/ush/preamble.sh" -export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH +#export PATH=/contrib/Wei.Huang/prod-util-2.1.1/bin:$PATH +source /contrib/Wei.Huang/prod_util.env export LD_LIBRARY_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2023.2.0-uov33rpz3lplh3hh3v5c6vssbc7ndxuk/lib OPTIND=1 diff --git a/ush/run_wgrib2.sh b/ush/run_wgrib2.sh index 6db8265cd44..f24b1306143 100755 --- a/ush/run_wgrib2.sh +++ b/ush/run_wgrib2.sh @@ -1,31 +1,32 @@ #!/bin/bash #img=/scratch2/NAGAPE/epic/Wei.Huang/src/gw-container-spack-stack-1.6.0/gw-container.sif - img=/scratch2/NAGAPE/epic/Wei.Huang/src/gw-container-spack-stack-1.6.0/gw-container +#img=/scratch2/NAGAPE/epic/Wei.Huang/src/gw-container-spack-stack-1.6.0/gw-container + img=/contrib/Wei.Huang/src/gw-container-spack-stack-1.6.0/wei-gw-container cmd=/opt/global-workflow-cloud/ush/run_wgrib2.sh arg="$@" #echo running: singularity exec "${img}" $cmd $arg singularity exec ${img} $cmd $arg -module reset -unset MACHINE_ID -export HOMEgfs=/scratch2/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud +#module reset +#unset MACHINE_ID +#export HOMEgfs=/scratch2/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud # Find module command and purge: -source "${HOMEgfs}/ush/detect_machine.sh" -source "${HOMEgfs}/ush/module-setup.sh" +#source "${HOMEgfs}/ush/detect_machine.sh" +#source "${HOMEgfs}/ush/module-setup.sh" # Source versions file for runtime -source "${HOMEgfs}/versions/run.ver" +#source "${HOMEgfs}/versions/run.ver" # Load our modules: -module use "${HOMEgfs}/modulefiles" +#module use "${HOMEgfs}/modulefiles" -case "${MACHINE_ID}" in - "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud" | "container") - module load "module_base.${MACHINE_ID}" - ;; - *) - echo "WARNING: UNKNOWN PLATFORM" - ;; -esac +#case "${MACHINE_ID}" in +# "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud" | "container") +# module load "module_base.${MACHINE_ID}" +# ;; +# *) +# echo "WARNING: UNKNOWN PLATFORM" +# ;; +#esac -module load prod_util +#module load prod_util From a7f8d13892ed9b62191b2dbdd396c9be125089a6 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 2 Jun 2025 13:52:15 +0000 Subject: [PATCH 053/134] ad-hoc work around to run container GW on Ursa to demo that only rocoto, slurm, and shell are needed on host machine --- env/URSA.env | 328 +++++++++++++++++++++++++++++ scripts/exglobal_atmos_products.sh | 4 +- scripts/exglobal_forecast.sh | 3 +- sorc/link_workflow.sh | 88 ++++++-- ush/forecast_predet.sh | 2 +- ush/load_fv3gfs_modules.sh | 32 +-- ush/parsing_ufs_configure.sh | 2 +- 7 files changed, 418 insertions(+), 41 deletions(-) create mode 100755 env/URSA.env diff --git a/env/URSA.env b/env/URSA.env new file mode 100755 index 00000000000..7674636474b --- /dev/null +++ b/env/URSA.env @@ -0,0 +1,328 @@ +#! /usr/bin/env bash + +if [[ $# -ne 1 ]]; then + + echo "Must specify an input argument to set runtime environment variables!" + exit 1 + +fi + +step=$1 + +export launcher="srun -l --export=ALL --hint=nomultithread" +export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" + +#export POSTAMBLE_CMD='report-mem' + +# Configure MPI environment +#export I_MPI_ADJUST_ALLREDUCE=5 +#export MPI_BUFS_PER_PROC=2048 +#export MPI_BUFS_PER_HOST=2048 +#export MPI_GROUP_MAX=256 +#export MPI_MEMMAP_OFF=1 +#export MP_STDOUTMODE="ORDERED" +export OMP_STACKSIZE=2048000 +export NTHSTACK=1024000000 +#export LD_BIND_NOW=1 + +# Setting stacksize to unlimited on login nodes is prohibited +if [[ -n "${SLURM_JOB_ID:-}" ]]; then + ulimit -s unlimited + ulimit -a +fi + +# Calculate common variables +# Check first if the dependent variables are set +if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then + max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) + NTHREADSmax=${threads_per_task:-${max_threads_per_task}} + NTHREADS1=${threads_per_task:-1} + if [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]]; then + NTHREADSmax=${max_threads_per_task} + fi + if [[ ${NTHREADS1} -gt ${max_threads_per_task} ]]; then + NTHREADS1=${max_threads_per_task} + fi + APRUN_default="${launcher} -n ${ntasks}" +else + echo "ERROR config.resources must be sourced before sourcing URSA.env" + exit 2 +fi + +if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then + + export POE="NO" + export BACK="NO" + export sys_tp="URSA" + export launcher_PREP="srun --hint=nomultithread" + +elif [[ "${step}" = "prep_emissions" ]]; then + + export APRUN="${APRUN_default}" + +elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then + + export USE_CFP="YES" + if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi + export wavempexec=${launcher} + export wave_mpmd=${mpmd_opt} + +elif [[ "${step}" = "atmanlvar" ]]; then + + export NTHREADS_ATMANLVAR=${NTHREADSmax} + export APRUN_ATMANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLVAR}" + +elif [[ "${step}" = "atmensanlobs" ]]; then + + export NTHREADS_ATMENSANLOBS=${NTHREADSmax} + export APRUN_ATMENSANLOBS="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLOBS}" + +elif [[ "${step}" = "atmensanlsol" ]]; then + + export NTHREADS_ATMENSANLSOL=${NTHREADSmax} + export APRUN_ATMENSANLSOL="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLSOL}" + +elif [[ "${step}" = "atmensanlletkf" ]]; then + + export NTHREADS_ATMENSANLLETKF=${NTHREADSmax} + export APRUN_ATMENSANLLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLLETKF}" + +elif [[ "${step}" = "atmensanlfv3inc" ]]; then + + export NTHREADS_ATMENSANLFV3INC=${NTHREADSmax} + export APRUN_ATMENSANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLFV3INC}" + +elif [[ "${step}" = "aeroanlvar" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_AEROANL=${NTHREADSmax} + export APRUN_AEROANL="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANL}" + +elif [[ "${step}" = "aeroanlgenb" ]]; then + + export NTHREADS_AEROANLGENB=${NTHREADSmax} + export APRUN_AEROANLGENB="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANLGENB}" + +elif [[ "${step}" = "atmanlfv3inc" ]]; then + + export NTHREADS_ATMANLFV3INC=${NTHREADSmax} + export APRUN_ATMANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLFV3INC}" + +elif [[ "${step}" = "anlstat" ]]; then + + export NTHREADS_ANLSTAT=${NTHREADSmax} + export APRUN_ANLSTAT="${APRUN_default} --cpus-per-task=${NTHREADS_ANLSTAT}" + +elif [[ "${step}" = "prepobsaero" ]]; then + + export NTHREADS_PREPOBSAERO=${NTHREADS1} + export APRUN_PREPOBSAERO="${APRUN_default} --cpus-per-task=${NTHREADS_PREPOBSAERO}" + +elif [[ "${step}" = "snowanl" ]]; then + + export APRUN_CALCFIMS="${launcher} -n 1" + + export NTHREADS_SNOWANL=${NTHREADSmax} + export APRUN_SNOWANL="${APRUN_default} --mem=0 --cpus-per-task=${NTHREADS_SNOWANL}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + +elif [[ "${step}" = "esnowanl" ]]; then + + export APRUN_CALCFIMS="${launcher} -n 1" + + export NTHREADS_ESNOWANL=${NTHREADSmax} + export APRUN_ESNOWANL="${APRUN_default} --mem=0 --cpus-per-task=${NTHREADS_ESNOWANL}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + +elif [[ "${step}" = "marinebmat" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd --multi-prog" + export APRUN_MARINEBMAT="${APRUN_default}" + +elif [[ "${step}" = "marineanlvar" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd --multi-prog" + export APRUN_MARINEANLVAR="${APRUN_default}" + +elif [[ "${step}" = "marineanlchkpt" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd --multi-prog" + export APRUN_MARINEANLCHKPT="${APRUN_default}" + +elif [[ "${step}" = "ocnanalecen" ]]; then + + export NTHREADS_OCNANALECEN=${NTHREADSmax} + export APRUN_OCNANALECEN="${APRUN_default} --cpus-per-task=${NTHREADS_OCNANALECEN}" + +elif [[ "${step}" = "marineanlletkf" ]]; then + + export NTHREADS_MARINEANLLETKF=${NTHREADSmax} + export APRUN_MARINEANLLETKF=${APRUN_default} + +elif [[ "${step}" = "ecen_fv3jedi" ]]; then + + export NTHREADS_ECEN_FV3JEDI=${NTHREADSmax} + export APRUN_CORRECTION_INCREMENT="${launcher} -n ${ntasks_correction_increment} --cpus-per-task=${NTHREADS_ECEN_FV3JEDI}" + export APRUN_ENSEMBLE_RECENTER="${launcher} -n ${ntasks_ensemble_recenter} --cpus-per-task=${NTHREADS_ECEN_FV3JEDI}" + +elif [[ "${step}" = "analcalc_fv3jedi" ]]; then + + export NTHREADS_ANALCALC_FV3JEDI=${NTHREADSmax} + export APRUN_ANALCALC_FV3JEDI="${APRUN_default} --cpus-per-task=${NTHREADS_ANALCALC_FV3JEDI}" + +elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then + + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_GSI=${NTHREADSmax} + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then + export NTHREADS_CALCINC=${max_threads_per_task} + fi + export APRUN_CALCINC="${launcher} \$ncmd --cpus-per-task=${NTHREADS_CALCINC}" + + export NTHREADS_CYCLE=${threads_per_task_cycle:-12} + if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CYCLE=${max_tasks_per_node} + fi + ntasks_cycle=${ntiles:-6} + export APRUN_CYCLE="${launcher} -n ${ntasks_cycle} --cpus-per-task=${NTHREADS_CYCLE}" + + export NTHREADS_GAUSFCANL=1 + ntasks_gausfcanl=${ntasks_gausfcanl:-1} + export APRUN_GAUSFCANL="${launcher} -n ${ntasks_gausfcanl} --cpus-per-task=${NTHREADS_GAUSFCANL}" + +elif [[ "${step}" = "sfcanl" ]]; then + + export NTHREADS_CYCLE=${threads_per_task:-14} + export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + + # REGRID requires 6 tasks for reproducibility + ntasks_regrid=6 + export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + +elif [[ "${step}" = "eobs" ]]; then + + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export NTHREADS_GSI=${NTHREADSmax} + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + +elif [[ "${step}" = "eupd" ]]; then + + export NTHREADS_ENKF=${NTHREADSmax} + export APRUN_ENKF="${launcher} -n ${ntasks_enkf:-${ntasks}} --cpus-per-task=${NTHREADS_ENKF}" + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + +elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then + + export launcher="srun --mpi=pmi2 -l" + + (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) + (( ufs_ntasks = nnodes*tasks_per_node )) + # With ESMF threading, the model wants to use the full node + export APRUN_UFS="${launcher} -n ${ufs_ntasks}" + unset nnodes ufs_ntasks + +elif [[ "${step}" = "upp" ]]; then + + export NTHREADS_UPP=${NTHREADS1} + export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}" + +elif [[ "${step}" = "atmos_products" ]]; then + + export USE_CFP="YES" # Use MPMD for downstream product generation on URSA + +elif [[ "${step}" = "oceanice_products" ]]; then + + export NTHREADS_OCNICEPOST=${NTHREADS1} + export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" + +elif [[ "${step}" = "ecen" ]]; then + + export NTHREADS_ECEN=${NTHREADSmax} + export APRUN_ECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ECEN}" + + export NTHREADS_CHGRES=${threads_per_task_chgres:-12} + if [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CHGRES=${max_tasks_per_node} + fi + export APRUN_CHGRES="time" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then + export NTHREADS_CALCINC=${max_threads_per_task} + fi + export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" + +elif [[ "${step}" = "esfc" ]]; then + + export NTHREADS_ESFC=${threads_per_task_esfc:-${max_threads_per_task}} + export APRUN_ESFC="${APRUN_default} --cpus-per-task=${NTHREADS_ESFC}" + + export NTHREADS_CYCLE=${threads_per_task_cycle:-14} + if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CYCLE=${max_tasks_per_node} + fi + export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + + # REGRID requires 6 tasks for reproducibility + ntasks_regrid=6 + export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + +elif [[ "${step}" = "epos" ]]; then + + export NTHREADS_EPOS=${NTHREADSmax} + export APRUN_EPOS="${APRUN_default} --cpus-per-task=${NTHREADS_EPOS}" + +elif [[ "${step}" = "postsnd" ]]; then + + export CFP_MP="YES" + + export NTHREADS_POSTSND=${NTHREADS1} + export APRUN_POSTSND="${APRUN_default} --cpus-per-task=${NTHREADS_POSTSND}" + + export NTHREADS_POSTSNDCFP=${threads_per_task_postsndcfp:-1} + if [[ ${NTHREADS_POSTSNDCFP} -gt ${max_threads_per_task} ]]; then + export NTHREADS_POSTSNDCFP=${max_threads_per_task} + fi + export APRUN_POSTSNDCFP="${launcher} -n ${ntasks_postsndcfp} ${mpmd_opt}" + +elif [[ "${step}" = "awips" ]]; then + + export NTHREADS_AWIPS=${NTHREADS1} + export APRUN_AWIPSCFP="${APRUN_default} ${mpmd_opt}" + +elif [[ "${step}" = "gempak" ]]; then + + export CFP_MP="YES" + + export NTHREADS_GEMPAK=${NTHREADS1} + if [[ ${NTHREADS_GEMPAK} -gt ${max_threads_per_task} ]]; then + export NTHREADS_GEMPAK=${max_threads_per_task} + fi + +elif [[ "${step}" = "fit2obs" ]]; then + + export NTHREADS_FIT2OBS=${NTHREADS1} + export MPIRUN="${APRUN_default} --cpus-per-task=${NTHREADS_FIT2OBS}" + +fi diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 6bcb4712aa7..d187729a3f7 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -1,7 +1,7 @@ #! /usr/bin/env bash -source "${USHgfs}/load_fv3gfs_modules.sh" -module load wgrib2/2.0.8 +#source "${USHgfs}/load_fv3gfs_modules.sh" +#module load wgrib2/2.0.8 # Programs used #export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} diff --git a/scripts/exglobal_forecast.sh b/scripts/exglobal_forecast.sh index efd7868075f..113e7024c30 100755 --- a/scripts/exglobal_forecast.sh +++ b/scripts/exglobal_forecast.sh @@ -176,7 +176,6 @@ else fi ${NCP} "${EXECgfs}/${FCSTEXEC}" "${DATA}/" -module reset ${APRUN_UFS} "${DATA}/${FCSTEXEC}" 1>&1 2>&2 && true export err=$? source "${USHgfs}/forecast_predet.sh" # include functions for variable definition @@ -185,7 +184,7 @@ source "${USHgfs}/forecast_postdet.sh" # include functions for variables after r source "${USHgfs}/parsing_ufs_configure.sh" # include functions for ufs_configure processing source "${USHgfs}/atparse.bash" # include function atparse for parsing @[XYZ] templated files -source "${USHgfs}/load_fv3gfs_modules.sh" +#source "${USHgfs}/load_fv3gfs_modules.sh" err_chk diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index 26c6bee125a..0baab2bf4d6 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -15,6 +15,8 @@ Usage: ${BASH_SOURCE[0]} [-h][-o][--nest] Print this help message and exit -o: Configure for NCO (copy instead of link) + -c: + Configure for container (copy from .sif instead of link) EOF exit 1 } @@ -23,13 +25,20 @@ RUN_ENVIR="emc" # Reset option counter in case this script is sourced OPTIND=1 -while getopts ":ho-:" option; do +while getopts ":hoc-:" option; do case "${option}" in h) usage ;; o) echo "-o option received, configuring for NCO" RUN_ENVIR="nco" ;; + c) + echo "-o option received, configuring for CONTAINER" + RUN_ENVIR="container" + SINGULARITY_IMAGE_FILE=/scratch4/NAGAPE/epic/Wei.Huang/src/container/gw-container + CONTAINER_DIR="/opt/global-workflow-cloud" + CONTAINER_COPY="singularity exec ${SINGULARITY_IMAGE_FILE} cp" + ;; -) if [[ "${OPTARG}" == "nest" ]]; then LINK_NEST=ON @@ -147,21 +156,41 @@ fi #--------------------------------------- #--copy/link NoahMp table form ccpp-physics repository cd "${HOMEgfs}/parm/ufs" || exit 1 -${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_model.fd/tests/parm/noahmptable.tbl" . +if [ "${RUN_ENVIR}" == "container" ]; then + ${CONTAINER_COPY} ${CONTAINER_DIR}/sorc/ufs_model.fd/tests/parm/noahmptable.tbl . +else + ${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_model.fd/tests/parm/noahmptable.tbl" . +fi cd "${HOMEgfs}/parm/post" || exit 1 -${LINK_OR_COPY} "${HOMEgfs}/sorc/upp.fd/parm/params_grib2_tbl_new" . -${LINK_OR_COPY} "${HOMEgfs}/sorc/upp.fd/fix/nam_micro_lookup.dat" . -for dir in gfs gefs sfs -do - ${LINK_OR_COPY} "${HOMEgfs}/sorc/upp.fd/parm/${dir}" . -done -for file in ice_gfs.csv ice_gefs.csv ocean_gfs.csv ocean_gefs.csv ocnicepost.nml.jinja2; do - ${LINK_OR_COPY} "${HOMEgfs}/sorc/gfs_utils.fd/parm/ocnicepost/${file}" . -done +if [ "${RUN_ENVIR}" == "container" ]; then + ${CONTAINER_COPY} ${CONTAINER_DIR}/sorc/upp.fd/parm/params_grib2_tbl_new . + ${CONTAINER_COPY} ${CONTAINER_DIR}/sorc/upp.fd/fix/nam_micro_lookup.dat . + for dir in gfs gefs sfs + do + ${CONTAINER_COPY} -r ${CONTAINER_DIR}/sorc/upp.fd/parm/${dir} . + done + for file in ice_gfs.csv ice_gefs.csv ocean_gfs.csv ocean_gefs.csv ocnicepost.nml.jinja2; do + ${CONTAINER_COPY} "${CONTAINER_DIR}/sorc/gfs_utils.fd/parm/ocnicepost/${file}" . + done +else + ${LINK_OR_COPY} "${HOMEgfs}/sorc/upp.fd/parm/params_grib2_tbl_new" . + ${LINK_OR_COPY} "${HOMEgfs}/sorc/upp.fd/fix/nam_micro_lookup.dat" . + for dir in gfs gefs sfs + do + ${LINK_OR_COPY} "${HOMEgfs}/sorc/upp.fd/parm/${dir}" . + done + for file in ice_gfs.csv ice_gefs.csv ocean_gfs.csv ocean_gefs.csv ocnicepost.nml.jinja2; do + ${LINK_OR_COPY} "${HOMEgfs}/sorc/gfs_utils.fd/parm/ocnicepost/${file}" . + done +fi cd "${HOMEgfs}/scripts" || exit 8 -${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_utils.fd/scripts/exemcsfc_global_sfc_prep.sh" . +if [ "${RUN_ENVIR}" == "container" ]; then + ${CONTAINER_COPY} "${CONTAINER_DIR}/sorc/ufs_utils.fd/scripts/exemcsfc_global_sfc_prep.sh" . +else + ${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_utils.fd/scripts/exemcsfc_global_sfc_prep.sh" . +fo if [[ -d "${HOMEgfs}/sorc/gdas.cd" ]]; then declare -a gdas_scripts=(exglobal_prep_ocean_obs.py exgdas_global_marine_analysis_ecen.py @@ -171,9 +200,15 @@ if [[ -d "${HOMEgfs}/sorc/gdas.cd" ]]; then done fi cd "${HOMEgfs}/ush" || exit 8 -for file in emcsfc_ice_blend.sh global_cycle_driver.sh emcsfc_snow.sh global_cycle.sh; do - ${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_utils.fd/ush/${file}" . -done +if [ "${RUN_ENVIR}" == "container" ]; then + for file in emcsfc_ice_blend.sh global_cycle_driver.sh emcsfc_snow.sh global_cycle.sh; do + ${CONTAINER_COPY} "${CONTAINER_DIR}/sorc/ufs_utils.fd/ush/${file}" . + done +else + for file in emcsfc_ice_blend.sh global_cycle_driver.sh emcsfc_snow.sh global_cycle.sh; do + ${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_utils.fd/ush/${file}" . + done +fi # Link these templates from ufs-weather-model cd "${HOMEgfs}/parm/ufs" || exit 1 @@ -195,7 +230,11 @@ for file in "${ufs_templates[@]}"; do if [[ -s "${file}" ]]; then rm -f "${file}" fi - ${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_model.fd/tests/parm/${file}" . + if [ "${RUN_ENVIR}" == "container" ]; then + ${CONTAINER_COPY} "${CONTAINER_DIR}/sorc/ufs_model.fd/tests/parm/${file}" . +else + ${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_model.fd/tests/parm/${file}" . + fi done # Link the script from ufs-weather-model that parses the templates @@ -203,7 +242,11 @@ cd "${HOMEgfs}/ush" || exit 1 if [[ -s "atparse.bash" ]]; then rm -f "atparse.bash" fi -${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_model.fd/tests/atparse.bash" . +if [ "${RUN_ENVIR}" == "container" ]; then + ${CONTAINER_COPY} "${CONTAINER_DIR}/sorc/ufs_model.fd/tests/atparse.bash" . +else + ${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_model.fd/tests/atparse.bash" . +fi #------------------------------ #--add GDASApp fix directory @@ -308,8 +351,15 @@ for sys in "${model_systems[@]}"; do if [[ -s "${model_exe}" ]]; then rm -f "${model_exe}" fi - if [[ -f "${HOMEgfs}/sorc/ufs_model.fd/tests/${model_exe}" ]]; then - ${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_model.fd/tests/${model_exe}" "${model_exe}" + + if [ "${RUN_ENVIR}" == "container" ]; then + if [[ -f "${HOMEgfs}/bin/${model_exe}" ]]; then + ${LINK_OR_COPY} "${HOMEgfs}/bin/${model_exe}" "${model_exe}" + fi + else + if [[ -f "${HOMEgfs}/sorc/ufs_model.fd/tests/${model_exe}" ]]; then + ${LINK_OR_COPY} "${HOMEgfs}/sorc/ufs_model.fd/tests/${model_exe}" "${model_exe}" + fi fi done diff --git a/ush/forecast_predet.sh b/ush/forecast_predet.sh index 03c66519039..81309e93369 100755 --- a/ush/forecast_predet.sh +++ b/ush/forecast_predet.sh @@ -277,7 +277,7 @@ FV3_predet(){ # <0 means older adiabatic pre-conditioning na_init=${na_init:-1} - local suite_file="${HOMEgfs}/sorc/ufs_model.fd/FV3/ccpp/suites/suite_${CCPP_SUITE}.xml" + local suite_file="${HOMEgfs}/dev/FV3-ccpp-suites/suite_${CCPP_SUITE}.xml" if [[ ! -f "${suite_file}" ]]; then echo "FATAL ERROR: CCPP Suite file ${suite_file} does not exist, ABORT!" exit 2 diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh index d304940b4db..787758836bb 100755 --- a/ush/load_fv3gfs_modules.sh +++ b/ush/load_fv3gfs_modules.sh @@ -17,25 +17,25 @@ set -x ulimit_s=$( ulimit -S -s ) # Find module command and purge: -source "${HOMEgfs}/ush/detect_machine.sh" -source "${HOMEgfs}/ush/module-setup.sh" +#source "${HOMEgfs}/ush/detect_machine.sh" +#source "${HOMEgfs}/ush/module-setup.sh" # Source versions file for runtime -source "${HOMEgfs}/versions/run.ver" +#source "${HOMEgfs}/versions/run.ver" # Load our modules: -module use "${HOMEgfs}/modulefiles" +#module use "${HOMEgfs}/modulefiles" -case "${MACHINE_ID}" in - "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud" | "container") - module load "module_base.${MACHINE_ID}" - ;; - *) - echo "WARNING: UNKNOWN PLATFORM" - ;; -esac +#case "${MACHINE_ID}" in +# "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud" | "ursa") +# module load "module_base.${MACHINE_ID}" +# ;; +# *) +# echo "WARNING: UNKNOWN PLATFORM" +# ;; +#esac -module list +#module list # If this function exists in the environment, run it; else set -x if it was set on entering this script ftype=$(type -t set_trace || echo "") @@ -46,9 +46,9 @@ elif [[ "${set_x}" == "YES" ]]; then fi # Add wxflow to PYTHONPATH -wxflowPATH="${HOMEgfs}/ush/python" -PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" -export PYTHONPATH +#wxflowPATH="${HOMEgfs}/ush/python" +#PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" +#export PYTHONPATH # Restore stack soft limit: ulimit -S -s "${ulimit_s}" diff --git a/ush/parsing_ufs_configure.sh b/ush/parsing_ufs_configure.sh index def93883650..c418619561b 100755 --- a/ush/parsing_ufs_configure.sh +++ b/ush/parsing_ufs_configure.sh @@ -125,7 +125,7 @@ atparse < "${ufs_configure_template}" >> "${DATA}/ufs.configure" echo "Rendered ufs.configure:" cat ufs.configure -${NCP} "${HOMEgfs}/sorc/ufs_model.fd/tests/parm/fd_ufs.yaml" fd_ufs.yaml +${NCP} "${HOMEgfs}/parm/ufs/fd_ufs.yaml" fd_ufs.yaml echo "SUB ${FUNCNAME[0]}: ufs.configure ends" From 7402ec52e35a144ecf18629c07a5f02b0c55f3ed Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 2 Jun 2025 12:02:55 -0400 Subject: [PATCH 054/134] on gaea --- ush/jjob_header.sh | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index f5d502402d5..d40053d0cca 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -40,16 +40,15 @@ # - $pid : Override the default process id # [default: $$] -echo "jjob_header.sh part 1" _calling_script="${BASH_SOURCE[1]}" source "${HOMEgfs}/ush/preamble.sh" -echo "jjob_header.sh part 2" -module purge -module use ${HOMEgfs}/modulefiles -module load module_run.hera +#module purge +#module use ${HOMEgfs}/modulefiles +#module load module_run.hera + +source ~/prod_util.env -echo "jjob_header.sh part 3" OPTIND=1 while getopts "c:e:" option; do case "${option}" in @@ -72,7 +71,6 @@ if [[ -z ${env_job} ]]; then err_exit "[${BASH_SOURCE[0]}]: Must specify a job name with -e" fi -echo "jjob_header.sh part 4" ############################################## # make temp directory ############################################## @@ -98,7 +96,6 @@ export pgmerr=errfile export pgm=${pgm:-} -echo "jjob_header.sh part 5" ############################################## # Run setpdy and initialize PDY variables ############################################## @@ -120,7 +117,6 @@ for config in "${configs[@]:-''}"; do done -echo "jjob_header.sh part 6" ########################################## # Source machine runtime environment ########################################## From ca63cf180e895b8e01f07047e6403be5f594c983 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 5 Jun 2025 15:12:09 +0000 Subject: [PATCH 055/134] working on demo --- bin/run_python.sh | 12 +++++++++--- bin/run_wgrib2.sh | 15 ++++++++++++--- c48atm.sh | 1 + ush/forecast_predet.sh | 3 ++- ush/parsing_ufs_configure.sh | 4 ++-- ush/run_gfs_model.sh | 18 +++++++++++------- ush/run_python.sh | 31 ++++++++++++++++-------------- ush/run_wgrib2.sh | 37 +++++++++++------------------------- 8 files changed, 65 insertions(+), 56 deletions(-) diff --git a/bin/run_python.sh b/bin/run_python.sh index 29d0fdf0265..e77230f4fae 100755 --- a/bin/run_python.sh +++ b/bin/run_python.sh @@ -1,9 +1,15 @@ #!/bin/bash - img=/scratch4/NAGAPE/epic/Wei.Huang/src/container/gw-container - cmd=/opt/global-workflow-cloud/ush/run_python.sh + containerdir=/scratch4/NAGAPE/epic/Wei.Huang/demo + img=${containerdir}/ubuntu22.04-intel-ufs-env-v1.6.0.img + HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud + cmd=${HOMEgfs}/ush/run_python.sh arg="$@" - echo "running: singularity exec ${img} $cmd $arg" + +#wxflowPATH="${HOMEgfs}/ush/python" +#export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" + singularity exec \ + -B /scratch3 \ -B /scratch4 \ ${img} $cmd $arg diff --git a/bin/run_wgrib2.sh b/bin/run_wgrib2.sh index 950a2d4fded..9fa7bb53ea2 100755 --- a/bin/run_wgrib2.sh +++ b/bin/run_wgrib2.sh @@ -1,9 +1,18 @@ #!/bin/bash - img=/scratch4/NAGAPE/epic/Wei.Huang/src/container/gw-container - cmd=/opt/global-workflow-cloud/ush/run_wgrib2.sh + containerdir=/scratch4/NAGAPE/epic/Wei.Huang/demo + img=${containerdir}/ubuntu22.04-intel-ufs-env-v1.6.0.img + HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud + cmd=${HOMEgfs}/ush/run_wgrib2.sh + + source /usr/lmod/lmod/init/bash + module purge + source ${HOMEgfs}/versions/run.ver + module use ${HOMEgfs}/modulefiles + module load module_base.container + arg="$@" - echo "running: singularity exec ${img} $cmd $arg" singularity exec \ + -B /scratch3 \ -B /scratch4 \ ${img} $cmd $arg diff --git a/c48atm.sh b/c48atm.sh index 566ebc57ef4..30df57d7114 100755 --- a/c48atm.sh +++ b/c48atm.sh @@ -2,6 +2,7 @@ set -x + HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/demo rundir=/scratch4/NAGAPE/epic/Wei.Huang/run mkdir -p ${rundir} diff --git a/ush/forecast_predet.sh b/ush/forecast_predet.sh index 7da621dd3f4..770132421a6 100755 --- a/ush/forecast_predet.sh +++ b/ush/forecast_predet.sh @@ -281,7 +281,8 @@ FV3_predet(){ # <0 means older adiabatic pre-conditioning na_init=${na_init:-1} - local suite_file="${HOMEgfs}/dev/FV3-ccpp-suites/suite_${CCPP_SUITE}.xml" +# local suite_file="${HOMEgfs}/dev/FV3-ccpp-suites/suite_${CCPP_SUITE}.xml" + local suite_file="${HOMEgfs}/sorc/ufs_model.fd/FV3/ccpp/suites/suite_${CCPP_SUITE}.xml" if [[ ! -f "${suite_file}" ]]; then echo "FATAL ERROR: CCPP Suite file ${suite_file} does not exist, ABORT!" exit 2 diff --git a/ush/parsing_ufs_configure.sh b/ush/parsing_ufs_configure.sh index 85b5b310a8c..d131b0cb0e0 100755 --- a/ush/parsing_ufs_configure.sh +++ b/ush/parsing_ufs_configure.sh @@ -125,8 +125,8 @@ atparse < "${ufs_configure_template}" >> "${DATA}/ufs.configure" echo "Rendered ufs.configure:" cat ufs.configure -#cpreq "${HOMEgfs}/sorc/ufs_model.fd/tests/parm/fd_ufs.yaml" fd_ufs.yaml -cpreq "${HOMEgfs}/parm/ufs/fd_ufs.yaml" fd_ufs.yaml +cpreq "${HOMEgfs}/sorc/ufs_model.fd/tests/parm/fd_ufs.yaml" fd_ufs.yaml +#cpreq "${HOMEgfs}/parm/ufs/fd_ufs.yaml" fd_ufs.yaml echo "SUB ${FUNCNAME[0]}: ufs.configure ends" diff --git a/ush/run_gfs_model.sh b/ush/run_gfs_model.sh index 2c11f83e02f..b8b3014833d 100755 --- a/ush/run_gfs_model.sh +++ b/ush/run_gfs_model.sh @@ -2,14 +2,18 @@ # Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD export OMP_NUM_THREADS=1 -export FPATH=/usr/lmod/lmod/libexec -export HOMEgfs=/opt/global-workflow-cloud -source ${HOMEgfs}/versions/run.ver -source /usr/lmod/lmod/init/bash +#export FPATH=/usr/lmod/lmod/libexec #module reset -module use ${HOMEgfs}/modulefiles -module load module_base.container +#module use ${HOMEgfs}/modulefiles +#module load module_base.container +source /opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/intel-oneapi-mpi-2021.9.0-6bnjcwc/setvars.sh --force export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2023.2.3/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.0/lib:$LD_LIBRARY_PATH -${HOMEgfs}/sorc/ufs_model.fd/tests/gfs_model.x + +#export I_MPI_OFI_PROVIDER=tcp +#export I_MPI_FABRICS=shm:ofi +#export FI_PROVIDER=tcp +export HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud +arg="$@" +${HOMEgfs}/sorc/ufs_model.fd/tests/gfs_model.x $arg diff --git a/ush/run_python.sh b/ush/run_python.sh index 2838560b0a7..0efdabfb368 100644 --- a/ush/run_python.sh +++ b/ush/run_python.sh @@ -1,23 +1,26 @@ -#!/usr/bin/env bash +#!/bin/bash -export FPATH=/usr/lmod/lmod/libexec +HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud + +#source /opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/intel-oneapi-mpi-2021.9.0-6bnjcwc/setvars.sh --force -export HOMEgfs=/opt/global-workflow-cloud source /usr/lmod/lmod/init/bash module purge -source ${HOMEgfs}/versions/run.ver -module use ${HOMEgfs}/modulefiles -module load module_base.container +source ${HOMEgfs}/dev/ush/gw_setup.sh + +module list #module load python/3.10.13 -#module load py-f90nml/1.4.3 -#module load py-netcdf4/1.5.8 -#module load py-pyyaml/6.0 -#module load py-jinja2/3.1.2 -#module load py-pandas/1.5.3 -#module load py-numpy/1.22.3 -#module load py-xarray/2023.7.0 -#module load py-python-dateutil/2.8.2 +module load py-f90nml/1.4.3 +module load py-netcdf4/1.5.8 +module load py-pyyaml/6.0 +module load py-jinja2/3.0.3 +module load py-pandas/1.5.3 +module load py-numpy/1.22.3 +module load py-xarray/2023.7.0 +module load py-python-dateutil/2.8.2 + +module list wxflowPATH="${HOMEgfs}/ush/python" export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" diff --git a/ush/run_wgrib2.sh b/ush/run_wgrib2.sh index 6db8265cd44..8c243bb2819 100755 --- a/ush/run_wgrib2.sh +++ b/ush/run_wgrib2.sh @@ -1,31 +1,16 @@ -#!/bin/bash -#img=/scratch2/NAGAPE/epic/Wei.Huang/src/gw-container-spack-stack-1.6.0/gw-container.sif - img=/scratch2/NAGAPE/epic/Wei.Huang/src/gw-container-spack-stack-1.6.0/gw-container - cmd=/opt/global-workflow-cloud/ush/run_wgrib2.sh - arg="$@" -#echo running: singularity exec "${img}" $cmd $arg - singularity exec ${img} $cmd $arg +#!/usr/bin/env bash -module reset -unset MACHINE_ID -export HOMEgfs=/scratch2/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud -# Find module command and purge: -source "${HOMEgfs}/ush/detect_machine.sh" -source "${HOMEgfs}/ush/module-setup.sh" +export HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud -# Source versions file for runtime -source "${HOMEgfs}/versions/run.ver" +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/modulefiles +module load module_gwsetup.container -# Load our modules: -module use "${HOMEgfs}/modulefiles" +module load wgrib2/2.0.8 +export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.0/lib:$LD_LIBRARY_PATH -case "${MACHINE_ID}" in - "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud" | "container") - module load "module_base.${MACHINE_ID}" - ;; - *) - echo "WARNING: UNKNOWN PLATFORM" - ;; -esac +arg="$@" + +/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/wgrib2-2.0.8-bq36dgw/bin/wgrib2 $arg -module load prod_util From 7b841cdedf2ef0b8f4416a36df263dd4476a4449 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 5 Jun 2025 15:26:09 +0000 Subject: [PATCH 056/134] working on demo --- bin/run_wgrib2.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/run_wgrib2.sh b/bin/run_wgrib2.sh index 9fa7bb53ea2..3b6171c3e8d 100755 --- a/bin/run_wgrib2.sh +++ b/bin/run_wgrib2.sh @@ -4,11 +4,11 @@ HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud cmd=${HOMEgfs}/ush/run_wgrib2.sh - source /usr/lmod/lmod/init/bash - module purge - source ${HOMEgfs}/versions/run.ver - module use ${HOMEgfs}/modulefiles - module load module_base.container +#source /usr/lmod/lmod/init/bash +#module purge +#source ${HOMEgfs}/versions/run.ver +#module use ${HOMEgfs}/modulefiles +#module load module_base.container arg="$@" singularity exec \ From 6ec552fd2af472fe0e21e32c3d14205f22f70f78 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Sat, 14 Jun 2025 17:36:12 -0400 Subject: [PATCH 057/134] for GW run with container on gaeac6 --- bin/run_python.sh | 11 +++++++++++ bin/run_wgrib2.sh | 9 +++++++++ env/GAEAC6.env | 1 + scripts/exglobal_forecast.sh | 5 +++++ ush/run_gfs_model.sh | 17 +++++++++++++---- ush/run_python.sh | 28 ++++++++++++++++++++++++++++ versions/spack.ver | 2 +- 7 files changed, 68 insertions(+), 5 deletions(-) create mode 100755 bin/run_python.sh create mode 100755 bin/run_wgrib2.sh create mode 100755 ush/run_python.sh diff --git a/bin/run_python.sh b/bin/run_python.sh new file mode 100755 index 00000000000..0462c8e070c --- /dev/null +++ b/bin/run_python.sh @@ -0,0 +1,11 @@ +#!/bin/bash + containerdir=/gpfs/f6/scratch/Wei.Huang/container + img=${containerdir}/ubuntu22.04-intel-ufs-env-v1.6.0.img + + HOMEgfs=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud + cmd=${HOMEgfs}/ush/run_python.sh + arg="$@" + echo "running: singularity exec ${img} $cmd $arg" + singularity exec \ + -B /gpfs/f6/scratch \ + ${img} $cmd $arg diff --git a/bin/run_wgrib2.sh b/bin/run_wgrib2.sh new file mode 100755 index 00000000000..a80d80c31ae --- /dev/null +++ b/bin/run_wgrib2.sh @@ -0,0 +1,9 @@ +#!/bin/bash + containerdir=/gpfs/f6/scratch/Wei.Huang/container + img=${containerdir}/gw-container.sif + cmd=/opt/global-workflow-cloud/ush/run_wgrib2.sh + arg="$@" + echo "running: singularity exec ${img} $cmd $arg" + singularity exec \ + -B /scratch4 \ + ${img} $cmd $arg diff --git a/env/GAEAC6.env b/env/GAEAC6.env index 3f216e450d9..e7c3443b0ea 100755 --- a/env/GAEAC6.env +++ b/env/GAEAC6.env @@ -239,6 +239,7 @@ case ${step} in ;; "fcst" | "efcs") + export launcher="srun --mpi=pmi2 -l --export=ALL" export OMP_STACKSIZE=1024M export MPICH_COLL_SYNC=MPI_Bcast diff --git a/scripts/exglobal_forecast.sh b/scripts/exglobal_forecast.sh index 776018019c3..cbc6942bf44 100755 --- a/scripts/exglobal_forecast.sh +++ b/scripts/exglobal_forecast.sh @@ -175,6 +175,11 @@ else export OMP_NUM_THREADS=${UFS_THREADS:-1} fi +echo "EXECgfs: ${EXECgfs}" +echo "FCSTEXEC: ${FCSTEXEC}" +echo "DATA: ${DATA}" +echo "APRUN_UFS: ${APRUN_UFS}" + cpreq "${EXECgfs}/${FCSTEXEC}" "${DATA}/" ${APRUN_UFS} "${DATA}/${FCSTEXEC}" 1>&1 2>&2 && true export err=$? diff --git a/ush/run_gfs_model.sh b/ush/run_gfs_model.sh index 2c11f83e02f..58553540c99 100755 --- a/ush/run_gfs_model.sh +++ b/ush/run_gfs_model.sh @@ -3,13 +3,22 @@ # Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD export OMP_NUM_THREADS=1 export FPATH=/usr/lmod/lmod/libexec -export HOMEgfs=/opt/global-workflow-cloud -source ${HOMEgfs}/versions/run.ver +export HOMEgfs=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud source /usr/lmod/lmod/init/bash -#module reset +module purge +source ${HOMEgfs}/versions/run.container.ver module use ${HOMEgfs}/modulefiles module load module_base.container export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2023.2.3/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.0/lib:$LD_LIBRARY_PATH -${HOMEgfs}/sorc/ufs_model.fd/tests/gfs_model.x +#export I_MPI_DEBUG=30 +#export I_MPI_FABRICS=shm:tcp +#export FI_PROVIDER=shm:tcp +export I_MPI_DEBUG=1 +export I_MPI_FABRICS=shm:ofi +export I_MPI_OFI_PROVIDER=tcp +export FI_PROVIDER=tcp +export FI_TCP_IFACE=eth0 +args=$@ +${HOMEgfs}/sorc/ufs_model.fd/tests/gfs_model.x $args diff --git a/ush/run_python.sh b/ush/run_python.sh new file mode 100755 index 00000000000..1903e172611 --- /dev/null +++ b/ush/run_python.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +export FPATH=/usr/lmod/lmod/libexec + +export HOMEgfs=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud +source /usr/lmod/lmod/init/bash +module purge +source ${HOMEgfs}/versions/run.container.ver +module use ${HOMEgfs}/modulefiles +module load module_base.container + +#module load python/3.10.13 +#module load py-f90nml/1.4.3 +#module load py-netcdf4/1.5.8 +#module load py-pyyaml/6.0 +#module load py-jinja2/3.1.2 +#module load py-pandas/1.5.3 +#module load py-numpy/1.22.3 +#module load py-xarray/2023.7.0 +#module load py-python-dateutil/2.8.2 + +wxflowPATH="${HOMEgfs}/ush/python" +export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" + +arg="$@" + +/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/python-3.10.13-h3oyipv/bin/python $arg + diff --git a/versions/spack.ver b/versions/spack.ver index 301cb65e107..382167e6396 100644 --- a/versions/spack.ver +++ b/versions/spack.ver @@ -30,7 +30,7 @@ export grib_util_ver=1.3.0 export prod_util_ver=2.1.1 export py_netcdf4_ver=1.5.8 export py_pyyaml_ver=6.0 -export py_jinja2_ver=3.1.2 +export py_jinja2_ver=3.0.3 export py_pandas_ver=1.5.3 export py_python_dateutil_ver=2.8.2 export py_f90nml_ver=1.4.3 From fa132ab692c694d1a40f4dbd4ee8b903615a0763 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 16 Jun 2025 15:17:28 +0000 Subject: [PATCH 058/134] generalize container setup on ursa --- c48atm.sh | 4 +- dev/ci/cases/pr/C48_S2SW.yaml | 2 - dev/ci/cases/sfs/C96mx100_S2S.yaml | 3 - dev/jobs/atmos_products.sh | 2 - dev/jobs/stage_ic.sh | 10 +- dev/parm/config/gfs/config.resources | 11 +- dev/parm/config/gfs/config.resources.AWSPW | 8 - .../config/gfs/config.resources.CONTAINER | 29 -- dev/ush/fetch-fix-data.py | 272 ------------------ dev/workflow/generate_workflows.sh | 86 +++--- dev/workflow/hosts.py | 30 +- dev/workflow/hosts/container.yaml | 41 ++- dev/workflow/hosts/ursa.yaml | 7 +- dev/workflow/rocoto/tasks.py | 3 +- dev/workflow/rocoto/workflow_xml.py | 14 +- env/CONTAINER.env | 76 +---- env/HERA.env | 2 - gen-c48atm.sh | 20 ++ jobs/JGLOBAL_STAGE_IC | 8 +- modulefiles/module_base.container.lua | 53 ---- modulefiles/module_gwci.container.lua | 15 - modulefiles/module_gwsetup.container.lua | 21 -- modulefiles/module_run.hera.lua | 27 -- parm/ufs/fv3/global_control.nml.IN | 1 - scripts/exglobal_atmos_products.sh | 13 +- sorc/link_workflow.sh | 104 ++----- ush/detect_machine.sh | 8 - ush/forecast_postdet.sh | 2 - ush/forecast_predet.sh | 55 ++-- ush/jjob_header.sh | 6 +- ush/load_fv3gfs_modules.sh | 45 +-- ush/module-setup.sh | 5 +- ush/parsing_ufs_configure.sh | 1 - versions/build.container.ver | 5 - versions/run.container.ver | 7 - versions/spack.ver | 2 +- 36 files changed, 237 insertions(+), 761 deletions(-) delete mode 100644 dev/parm/config/gfs/config.resources.CONTAINER create mode 100755 gen-c48atm.sh delete mode 100644 modulefiles/module_base.container.lua delete mode 100644 modulefiles/module_gwci.container.lua delete mode 100644 modulefiles/module_gwsetup.container.lua delete mode 100644 modulefiles/module_run.hera.lua delete mode 120000 parm/ufs/fv3/global_control.nml.IN delete mode 100644 versions/build.container.ver delete mode 100644 versions/run.container.ver diff --git a/c48atm.sh b/c48atm.sh index 30df57d7114..ab58004056c 100755 --- a/c48atm.sh +++ b/c48atm.sh @@ -2,11 +2,11 @@ set -x - HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/demo + HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev rundir=/scratch4/NAGAPE/epic/Wei.Huang/run mkdir -p ${rundir} - source ~/.bashrc +#source ~/.bashrc source dev/ush/gw_setup.sh HPC_ACCOUNT=epic \ diff --git a/dev/ci/cases/pr/C48_S2SW.yaml b/dev/ci/cases/pr/C48_S2SW.yaml index 819c6279e78..eed4500b270 100644 --- a/dev/ci/cases/pr/C48_S2SW.yaml +++ b/dev/ci/cases/pr/C48_S2SW.yaml @@ -12,5 +12,3 @@ arguments: idate: 2021032312 edate: 2021032312 yaml: {{ HOMEgfs }}/dev/ci/cases/yamls/gfs_defaults_ci.yaml -skip_ci_on_hosts: - - awsepicglobalworkflow diff --git a/dev/ci/cases/sfs/C96mx100_S2S.yaml b/dev/ci/cases/sfs/C96mx100_S2S.yaml index 384b7c08f08..276d49075a3 100644 --- a/dev/ci/cases/sfs/C96mx100_S2S.yaml +++ b/dev/ci/cases/sfs/C96mx100_S2S.yaml @@ -16,6 +16,3 @@ arguments: expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96mx100/20240610 yaml: {{ HOMEgfs }}/dev/ci/cases/yamls/sfs_full.yaml - -skip_ci_on_hosts: - - awsepicglobalworkflow diff --git a/dev/jobs/atmos_products.sh b/dev/jobs/atmos_products.sh index df40dc9ddea..83c91e68067 100755 --- a/dev/jobs/atmos_products.sh +++ b/dev/jobs/atmos_products.sh @@ -7,8 +7,6 @@ set -x ## FHRLST : forecast hour list to post-process (e.g. -f001, f000, f000_f001_f002, ...) ############################################################### -module reset - # Source FV3GFS workflow modules . "${HOMEgfs}/ush/load_fv3gfs_modules.sh" status=$? diff --git a/dev/jobs/stage_ic.sh b/dev/jobs/stage_ic.sh index f7b076c3d82..89a4f4ea084 100755 --- a/dev/jobs/stage_ic.sh +++ b/dev/jobs/stage_ic.sh @@ -3,11 +3,11 @@ set -x # Source FV3GFS workflow modules -# . "${HOMEgfs}/ush/load_fv3gfs_modules.sh" -# status=$? -# if [[ "${status}" -ne 0 ]]; then -# exit "${status}" -# fi +. "${HOMEgfs}/ush/load_fv3gfs_modules.sh" +status=$? +if [[ "${status}" -ne 0 ]]; then + exit "${status}" +fi export job="stage_ic" export jobid="${job}.$$" diff --git a/dev/parm/config/gfs/config.resources b/dev/parm/config/gfs/config.resources index 7c29a5da50f..e328050b46b 100644 --- a/dev/parm/config/gfs/config.resources +++ b/dev/parm/config/gfs/config.resources @@ -49,9 +49,9 @@ case ${machine} in ;; "URSA") export PARTITION_BATCH="u1-compute" - max_tasks_per_node=96 + max_tasks_per_node=192 # shellcheck disable=SC2034 - mem_node_max="96GB" + mem_node_max="384GB" ;; "GAEAC5") max_tasks_per_node=128 @@ -98,10 +98,9 @@ case ${machine} in mem_node_max="" ;; "CONTAINER") - export PARTITION_BATCH="compute" - npe_node_max=48 - max_tasks_per_node=48 - # TODO Supply a max mem/node value for CONTAINER + npe_node_max=1 + max_tasks_per_node= + # TODO Supply a max mem/node value for container # shellcheck disable=SC2034 mem_node_max="" ;; diff --git a/dev/parm/config/gfs/config.resources.AWSPW b/dev/parm/config/gfs/config.resources.AWSPW index 58638c4f89d..0b3e3fc4fbd 100644 --- a/dev/parm/config/gfs/config.resources.AWSPW +++ b/dev/parm/config/gfs/config.resources.AWSPW @@ -17,14 +17,6 @@ case ${step} in tasks_per_node=48 ;; - "arch_vrfy" | "arch_tars") - export PARTITION_BATCH="process" - max_tasks_per_node=24 - ;; - - "atmos_products" | "oceanice_products" | "wavepostsbs" ) - export PARTITION_BATCH="process" - max_tasks_per_node=24 "anal") export PARTITION_BATCH="compute" max_tasks_per_node=48 diff --git a/dev/parm/config/gfs/config.resources.CONTAINER b/dev/parm/config/gfs/config.resources.CONTAINER deleted file mode 100644 index 17a3924d29c..00000000000 --- a/dev/parm/config/gfs/config.resources.CONTAINER +++ /dev/null @@ -1,29 +0,0 @@ -#! /usr/bin/env bash - -# AWS-specific job resources - -export is_exclusive="True" -unset memory -unset "memory_${RUN}" - -step=$1 - -case ${step} in - "fcst" | "efcs" | "wavepostpnt") - export PARTITION_BATCH="compute" - unset PARTITION_SERVICE - max_tasks_per_node=48 - tasks_per_node=48 - ;; - - *) - export PARTITION_BATCH="process" - unset PARTITION_SERVICE - max_tasks_per_node=24 - tasks_per_node=24 - ;; - -esac - -export max_tasks_per_node -export tasks_per_node diff --git a/dev/ush/fetch-fix-data.py b/dev/ush/fetch-fix-data.py index d16d2db79b5..3dcbc54466e 100644 --- a/dev/ush/fetch-fix-data.py +++ b/dev/ush/fetch-fix-data.py @@ -1,31 +1,16 @@ -<<<<<<< HEAD -#!/usr/bin/env python -# cfetch-fix-data.py -======= #!/usr/bin/env python3 # fetch-fix-data.py ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 # wei.huang@noaa.gov # 2025-02-26 # script to download a subset of FIX data to local machines. import os -<<<<<<< HEAD -import time -import sys -import getopt -======= ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 import argparse import subprocess from pathlib import Path import logging # Create and configure logger -<<<<<<< HEAD -logging.basicConfig(filename="cfetch-fix-data.log", -======= logging.basicConfig(filename="fetch-fix-data.log", ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 format='%(asctime)s %(message)s', filemode='w') @@ -39,12 +24,6 @@ class FetchFIXdata(): -<<<<<<< HEAD - - def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], - fix_bucket=None, localdir=None, verbose=0): - -======= """Fetch a subset of FIX data from NOAA s3 bucket. """ @@ -70,7 +49,6 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], ------- None """ ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 # self.aws_fix_bucket = f's3://noaa-nws-global-pds/fix' self.aws_fix_bucket = fix_bucket self.aws_cp = f'aws --no-sign-request s3 cp' @@ -79,17 +57,6 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], self.atmgridarray = atmgridarray self.ocngridarray = ocngridarray self.localdir = localdir -<<<<<<< HEAD - self.verbose = verbose - - if (os.path.isdir(localdir)): - logger.info(f'Prepare to download FIX data for {atmgrid} and {ocngrid} to {localdir}') - else: - logger.info(f'local dir: <{localdir}> does not exist. Stop') - sys.exit(-1) - - self.verdict = {} -======= self.fix_ver = fix_ver self.verbose = verbose @@ -110,7 +77,6 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], logger.error(f'File fix_ver: <{fix_ver}> does not exist. Stop') raise SystemExit ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 self.s3dict = {} self.s3dict['raworog'] = f'raw/orog' @@ -119,13 +85,6 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], else: self.targetdir = self.localdir -<<<<<<< HEAD - # -------------------------------------------------------------------------- - def update_s3dict(self): - - self.update_s3dick_grid_independent() - self.add_grid_data() -======= self.get_fix_ver_dict() self.create_s3dict() @@ -158,100 +117,11 @@ def create_s3dict(self): self.add_cpl2s3dict(s3key, val) else: self.s3dict[s3key] = f'{s3key}/{val}' ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 if (self.verbose): self.printinfo() # -------------------------------------------------------------------------- -<<<<<<< HEAD - def update_s3dick_grid_independent(self): - - for key in self.fix_ver_dict.keys(): - val = self.fix_ver_dict[key] - if (key == 'aer_ver'): - self.s3dict['aer'] = f'aer/{val}' - elif (key == 'am_ver'): - self.s3dict['am'] = f'am/{val}' - elif (key == 'chem_ver'): - self.s3dict['fimdata_chem'] = f'chem/{val}/fimdata_chem' - self.s3dict['Emission_data'] = f'chem/{val}/Emission_data' - elif (key == 'datm_ver'): - self.s3dict['cfsr'] = f'datm/{val}/cfsr' - self.s3dict['gefs'] = f'datm/{val}/gefs' - self.s3dict['gfs'] = f'datm/{val}/gfs' - self.s3dict['mom6'] = f'datm/{val}/mom6' - elif (key == 'glwu_ver'): - self.s3dict['glwu'] = f'glwu/{val}' - elif (key == 'gsi_ver'): - self.s3dict['gsi'] = f'gsi/{val}' - elif (key == 'lut_ver'): - self.s3dict['lut'] = f'lut/{val}' - elif (key == 'mom6_ver'): - self.s3dict['mom6post'] = f'mom6/{val}/post' - elif (key == 'reg2grb2_ver'): - self.s3dict['reg2grb2'] = f'reg2grb2/{val}' - elif (key == 'sfc_climb_ver'): - self.s3dict['sfc_climo'] = f'sfc_climo/{val}' - elif (key == 'verif_ver'): - self.s3dict['verif'] = f'verif/{val}' - elif (key == 'wave_ver'): - self.s3dict['wave'] = f'wave/{val}' - - # -------------------------------------------------------------------------- - def add_grid_data(self): - - for key in self.fix_ver_dict.keys(): - val = self.fix_ver_dict[key] - if (key == 'orog_ver'): - self.add_atmgrid2s3dict('orog', key, val) - elif (key == 'ugwd_ver'): - self.add_atmgrid2s3dict('ugwd', key, val) - elif (key == 'mom6_ver'): - self.add_ocngrid2s3dict('mom6', key, val) - elif (key == 'cice_ver'): - self.add_ocngrid2s3dict('cice', key, val) - elif (key == 'cpl_ver'): - self.add_cpl2s3dict('cpl', key, val) - - # -------------------------------------------------------------------------- - def add_atmgrid2s3dict(self, varname, key, val): - - for atmgrid in self.atmgridarray: - newkey = f'{key}_{atmgrid}' - self.s3dict[newkey] = f'{varname}/{val}/{atmgrid}' - - # ------------------------------------------------------------------------- - def add_ocngrid2s3dict(self, varname, key, val): - - for ocngrid in self.ocngridarray: - newkey = f'{key}_{atmgrid}' - self.s3dict[newkey] = f'{varname}/{val}/{ocngrid}' - - # ------------------------------------------------------------------------- - def add_cpl2s3dict(self, varname, key, val): - - for atmgrid in self.atmgridarray: - for ocngrid in self.ocngridarray: - newkey = f'{key}_a{atmgrid}o{ocngrid}' - self.s3dict[newkey] = f'{varname}/{val}/a{atmgrid}o{ocngrid}' - - # ------------------------------------------------------------------------- - def printinfo(self): - - logger.info(f'Preparing to fetch') - logger.info(f'ATM grid: {self.atmgridarray}') - logger.info(f'ONC grid: {self.ocngridarray}') - logger.info(f'From: {self.aws_fix_bucket}') - logger.info(f'To: {self.targetdir}') - for key in self.s3dict.keys(): - val = self.s3dict[key] - logger.info(f'{key}: {val}') - - # ------------------------------------------------------------------------- - def fetchdata(self): - -======= def add_atmgrid2s3dict(self, key, val): """ Add ATM grid data to dict. @@ -305,7 +175,6 @@ def printinfo(self): def fetchdata(self): """Fetch data defined in s3bucket. """ ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 if (self.verbose): logger.info(f'Create local fix dir: {self.targetdir}') @@ -315,23 +184,6 @@ def fetchdata(self): self.fetch_ugwp_limb_tau() for key in self.s3dict.keys(): -<<<<<<< HEAD - self.fetch_dir(self.s3dict[key]) - - # ------------------------------------------------------------------------- - def fetch_dir(self, dir): - - remotedir = f'{self.aws_fix_bucket}/{dir}' - localdir = f'{self.targetdir}/{dir}' - cmd = f'{self.aws_sync} {remotedir} {localdir}' - self.download_dir(cmd, localdir) - - # -------------------------------------------------------------------------- - def download_dir(self, cmd, localdir): - - # returned_value = os.system(cmd) # returns the exit code in unix - # logger.info('returned value:', returned_value) -======= self.download_dir(self.s3dict[key]) # -------------------------------------------------------------------------- @@ -345,7 +197,6 @@ def download_dir(self, dir): # returned_value = os.system(cmd) # returns the exit code in unix # if (self.verbose): # logger.info(f'returned value: {returned_value}') ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 if (os.path.isdir(localdir)): logger.info(f'{localdir} already exist. skip') @@ -360,20 +211,12 @@ def download_dir(self, dir): logger.info(f'Downloading {localdir}') returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): -<<<<<<< HEAD - logger.info('returned value:', returned_value) - - # -------------------------------------------------------------------------- - def fetch_ugwp_limb_tau(self): - -======= logger.info(f'returned value: {returned_value}') # -------------------------------------------------------------------------- def fetch_ugwp_limb_tau(self): """download ugwp_limb_tau.nc """ ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 ugwd_ver = self.fix_ver_dict['ugwd_ver'] ugwp_limb_tau_remotepath = f'{self.aws_fix_bucket}/ugwd/{ugwd_ver}/ugwp_limb_tau.nc' ugwp_limb_tau_localdir = f'{self.targetdir}/ugwd/{ugwd_ver}' @@ -381,20 +224,10 @@ def fetch_ugwp_limb_tau(self): path = Path(ugwp_limb_tau_localdir) path.mkdir(parents=True, exist_ok=True) cmd = f'{self.aws_cp} {ugwp_limb_tau_remotepath} {filename}' -<<<<<<< HEAD - self.download_file(cmd, filename) - - # ------------------------------------------------------------------------- - def download_file(self, cmd, filename): - - # returned_value = os.system(cmd) # returns the exit code in unix - # logger.info('returned value:', returned_value) -======= # returned_value = os.system(cmd) # returns the exit code in unix # if (self.verbose): # logger.info(f'returned value: {returned_value}') ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 if (os.path.isfile(filename)): logger.info(f'{filename} already exist. skip') @@ -404,46 +237,6 @@ def download_file(self, cmd, filename): logger.info(f'Downloading {filename}') returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): -<<<<<<< HEAD - logger.info('returned value:', returned_value) - - # -------------------------------------------------------------------------- - def set_fix_ver_from_gwhome(self, gwhome, verdict): - - fix_ver_file = f'{gwhome}/versions/fix.ver' - self.fix_ver_dict = verdict - if (os.path.isfile(fix_ver_file)): - with open(fix_ver_file, "r") as file: - for line in file.readlines(): - if (line.find('export ') >= 0): - headstr, _, value = line.strip().partition('=') - exphead, _, key = headstr.partition(' ') - self.fix_ver_dict[key] = value - else: - logger.info(f'fix_ver_file: {fix_ver_file}s does not exist.') - - # ------------------------------------------------------------------------ - def set_default_fix_ver(self, verdict): - - self.fix_ver_dict = verdict - -# ----------------------------------------------------------------------------- - - -def namespace_to_dict(namespace): - return { - k: namespace_to_dict(v) if isinstance(v, argparse.Namespace) else v - for k, v in vars(namespace).items() - } - -# ------------------------------------------------------------------------------ - - -if __name__ == '__main__': - - atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] - ocngridlist = ['500', '100', '050', '025'] -======= logger.info(f'returned value: {returned_value}') # -------------------------------------------------------------------------- @@ -470,39 +263,10 @@ def main() -> None: # define available ATM and OCN grids. ATMGRIDLIST = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] OCNGRIDLIST = ['500', '100', '050', '025'] ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") -<<<<<<< HEAD - parser.add_argument("-a", "--atmgrid", type=str, required=True, - help="ATM grid, like: C48, C96, C192, C384, C768, C1152") - parser.add_argument("-o", "--ocngrid", type=str, required=True, - help="OCN grid, like: 500, 100, 050, 025") - parser.add_argument("--localdir", type=str, required=True, - help="local directory to store FIX data subset") - parser.add_argument("--gwhome", type=str, default='unknown', - help="GW home diretory where can find fix.ver") - parser.add_argument("--fix_bucket", type=str, default='s3://noaa-nws-global-pds/fix', - help="S3 Bucket directory of FIX data") - parser.add_argument("--aer_ver", type=str, default='20220805', help="AER version") - parser.add_argument("--am_ver", type=str, default='20220805', help="AM version") - parser.add_argument("--chem_ver", type=str, default='20220805', help="chem version") - parser.add_argument("--cice_ver", type=str, default='20240416', help="cice version") - parser.add_argument("--cpl_ver", type=str, default='20230526', help="cpl version") - parser.add_argument("--datm_ver", type=str, default='20220805', help="datm version") - parser.add_argument("--glwu_ver", type=str, default='20220805', help="glwu version") - parser.add_argument("--gsi_ver", type=str, default='20240208', help="gsi version") - parser.add_argument("--lut_ver", type=str, default='20220805', help="lut version") - parser.add_argument("--mom6_ver", type=str, default='20240416', help="mom6 version") - parser.add_argument("--orog_ver", type=str, default='20231027', help="orog version") - parser.add_argument("--reg2grb2_ver", type=str, default='20220805', help="reg2grb2 version") - parser.add_argument("--sfc_climo_ver", type=str, default='20220805', help="sfc_climo version") - parser.add_argument("--ugwd_ver", type=str, default='20220805', help="ugwd version") - parser.add_argument("--verif_ver", type=str, default='20220805', help="verif version") - parser.add_argument("--wave_ver", type=str, default='20220805', help="wave version") -======= parser.add_argument("-d", "--localdir", type=str, required=True, help="local directory to store FIX data subset") parser.add_argument("-f", "--fix_ver", type=str, required=True, @@ -517,20 +281,14 @@ def main() -> None: parser.add_argument("-o", "--ocngrid", type=str, required=False, default="100", help="OCN grid, like: 500,100,050,025, default: 100") ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 args = parser.parse_args() if args.verbose: logger.info(f"the atmgrid is {args.atmgrid}") -<<<<<<< HEAD - else: - logger.info(f"the atmgrid is {args.atmgrid}") -======= logger.info(f"the ocngrid is {args.ocngrid}") logger.info(f"the localdir is {args.localdir}") logger.info(f"the fix_file is {args.fix_ver}") logger.info(f"the s3 bucket is {args.fix_bucket}") ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 atmgrid = args.atmgrid if (atmgrid.find(',') > 0): @@ -539,17 +297,10 @@ def main() -> None: atmgridarray = [atmgrid] for grid in atmgridarray: -<<<<<<< HEAD - if (grid not in atmgridlist): - logger.info(f'atmgrid: {grid}') - logger.info(f'is not in supported grids: {atmgridlist}') - sys.exit(-1) -======= if (grid not in ATMGRIDLIST): logger.error(f'atmgrid: {grid}') logger.error(f'is not in supported grids: {ATMGRIDLIST}') raise SystemExit ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 ocngrid = args.ocngrid if (ocngrid.find(',') > 0): @@ -558,28 +309,6 @@ def main() -> None: ocngridarray = [ocngrid] for grid in ocngridarray: -<<<<<<< HEAD - if (grid not in ocngridlist): - logger.info(f'ocngrid: {grid}') - logger.info(f'is not in supported grids: {ocngridlist}') - sys.exit(-1) - - verdict = namespace_to_dict(args) - - # ------------------------------------------------------------------ - ffd = FetchFIXdata(atmgridarray=atmgridarray, - ocngridarray=ocngridarray, - fix_bucket=args.fix_bucket, - localdir=args.localdir, verbose=args.verbose) - - if (args.gwhome is None): - ffd.set_default_fix_ver(verdict) - else: - ffd.set_fix_ver_from_gwhome(args.gwhome, verdict) - - ffd.update_s3dict() - ffd.fetchdata() -======= if (grid not in OCNGRIDLIST): logger.error(f'ocngrid: {grid}') logger.error(f'is not in supported grids: {OCNGRIDLIST}') @@ -596,4 +325,3 @@ def main() -> None: # ------------------------------------------------------------------------------ if __name__ == '__main__': main() ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index 5e113f1ead1..266ee7d56df 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -43,9 +43,7 @@ function _usage() { -S Run all valid SFS cases in the specified YAML directory. - -C Run all valid GCAFS cases in the specified YAML directory. - - NOTES on -G, -E, -S and -C: + NOTES on -G, -E, and -S: - Valid cases are determined by the experiment:system key as well as the skip_ci_on_hosts list in each YAML. @@ -68,6 +66,8 @@ function _usage() { -t Add a 'tag' to the end of the case names in the pslots to distinguish pslots between multiple sets of tests. + -R Run with Container + -v Verbose mode. Prints output of all commands to stdout. -V Very verbose mode. Passes -v to all commands and prints to stdout. @@ -93,7 +93,7 @@ _specified_yaml_dir=false _run_all_gfs=false _run_all_gefs=false _run_all_sfs=false -_run_all_gcafs=false +_run_with_container=false _hpc_account="" _set_account=false _update_cron=false @@ -110,7 +110,7 @@ _auto_del=false _nonflag_option_count=0 while [[ $# -gt 0 && "$1" != "--" ]]; do - while getopts ":H:bDuy:Y:GESCA:ce:t:vVdh" option; do + while getopts ":H:bDuy:Y:GESA:ce:t:vVRdh" option; do case "${option}" in H) HOMEgfs="${OPTARG}" @@ -135,12 +135,12 @@ while [[ $# -gt 0 && "$1" != "--" ]]; do G) _run_all_gfs=true ;; E) _run_all_gefs=true ;; S) _run_all_sfs=true ;; - C) _run_all_gcafs=true ;; c) _update_cron=true ;; e) _email="${OPTARG}" && _set_email=true ;; t) _tag="_${OPTARG}" ;; v) _verbose=true ;; V) _very_verbose=true && _verbose=true && _verbose_flag="-v" ;; + R) _run_with_container=true ;; A) _set_account=true && _hpc_account="${OPTARG}" ;; d) _debug=true && _very_verbose=true && _verbose=true && _verbose_flag="-v" && PS4='${LINENO}: ' ;; h) _usage && exit 0 ;; @@ -245,17 +245,16 @@ else fi fi -# Empty the _yaml_list array if -G, -E, -S and/or -C were selected +# Empty the _yaml_list array if -G, -E, and/or -S were selected if [[ "${_run_all_gfs}" == "true" || \ "${_run_all_gefs}" == "true" || \ - "${_run_all_gcafs}" == "true" || \ "${_run_all_sfs}" == "true" ]]; then - # Raise an error if the user specified a yaml list and any of -G -E -S -C + # Raise an error if the user specified a yaml list and any of -G -E -S if [[ "${_specified_yaml_list}" == "true" ]]; then echo "Ambiguous case selection." echo "Please select which tests to run explicitly with -y \"list of tests\" or" - echo "by specifying -G (all GFS), -E (all GEFS), -C (all GCAFS) and/or -S (all SFS), but not both." + echo "by specifying -G (all GFS), -E (all GEFS), and/or -S (all SFS), but not both." exit 3 fi @@ -271,6 +270,16 @@ if [[ "${_specified_home}" == "false" ]]; then fi fi +# Set RUN_WITH_CONTAINER if it is set by the user +if [[ "${_run_with_container}" == "true" ]]; then + RUN_WITH_CONTAINER=YES + if [[ "${_verbose}" == "true" ]]; then + printf "Run with Container %s\n\n" "${RUN_WITH_CONTAINER}" + fi +else + RUN_WITH_CONTAINER=NO +fi + # Set the _yaml_dir to HOMEgfs/dev/ci/cases/pr if not explicitly set if [[ "${_specified_yaml_dir}" == false ]]; then _yaml_dir="${HOMEgfs}/dev/ci/cases/pr" @@ -285,7 +294,8 @@ function select_all_yamls() # YAMLs in that list that are not for the specified system and issue warnings when # doing so. - _net="${1}" + _system="${1}" + _SYSTEM="${_system^^}" # Bash cannot return an array from a function and any edits are descoped at # the end of the function, so use a nameref instead. @@ -294,12 +304,12 @@ function select_all_yamls() if [[ "${_specified_yaml_list}" == false ]]; then # Start over with an empty _yaml_list _nameref_yaml_list=() - printf "Running all %s cases in %s\n\n" "${_net^^}" "${_yaml_dir}" + printf "Running all %s cases in %s\n\n" "${_SYSTEM}" "${_yaml_dir}" _yaml_count=0 for _full_path in "${_yaml_dir}/"*.yaml; do # Skip any YAML that isn't supported - if ! grep -l "net: *${_net}" "${_full_path}" >& /dev/null ; then continue; fi + if ! grep -l "system: *${_system}" "${_full_path}" >& /dev/null ; then continue; fi # Select only cases for the specified system _yaml=$(basename "${_full_path}") @@ -314,7 +324,7 @@ function select_all_yamls() if [[ ${_yaml_count} -eq 0 ]]; then read -r -d '' _message << EOM - "No YAMLs or ${_net^^} were found in the directory (${_yaml_dir})!" + "No YAMLs or ${_SYSTEM} were found in the directory (${_yaml_dir})!" "Please check the directory/YAMLs and try again" EOM echo "${_message}" @@ -327,9 +337,9 @@ EOM # Check if the specified yamls are for the specified system for i in "${!_nameref_yaml_list}"; do _yaml="${_nameref_yaml_list[${i}]}" - _found=$(grep -l "net: *${_net}" "${_yaml_dir}/${_yaml}.yaml") + _found=$(grep -l "system: *${system}" "${_yaml_dir}/${_yaml}.yaml") if [[ -z "${_found}" ]]; then - echo "WARNING: the yaml file ${_yaml_dir}/${_yaml}.yaml is not designed for the ${_net^^} system" + echo "WARNING: the yaml file ${_yaml_dir}/${_yaml}.yaml is not designed for the ${_SYSTEM} system" echo "Removing this yaml from the set of cases to run" unset '_nameref_yaml_list[${i}]' # Sleep 2 seconds to give the user a moment to react @@ -367,15 +377,6 @@ if [[ "${_run_all_sfs}" == "true" ]]; then _yaml_list=("${_yaml_list[@]}" "${_sfs_yaml_list[@]}") fi -# Check if running all GCAFS cases -if [[ "${_run_all_gcafs}" == "true" ]]; then - _build_flags="${_build_flags} gcafs gdas " - - declare -a _gfs_yaml_list - select_all_yamls "gcafs" "_gcafs_yaml_list" - _yaml_list=("${_yaml_list[@]}" "${_gcafs_yaml_list[@]}") -fi - # Loading modules sometimes raises unassigned errors, so disable checks set +u if [[ "${_verbose}" == "true" ]]; then @@ -451,15 +452,28 @@ fi if [[ "${_verbose}" == true ]]; then printf "Linking the workflow\n\n" fi -if ! "${HOMEgfs}/sorc/link_workflow.sh" >& stdout; then - cat stdout - echo "link_workflow.sh failed!" - if [[ "${_set_email}" == true ]]; then - _stdout=$(cat stdout) - send_email "link_workflow.sh failed with the message"$'\n'"${_stdout}" +if [[ "${_run_with_container}" == true ]]; then + if ! "${HOMEgfs}/sorc/link_workflow.sh" -r >& stdout; then + cat stdout + echo "link_workflow.sh failed!" + if [[ "${_set_email}" == true ]]; then + _stdout=$(cat stdout) + send_email "link_workflow.sh failed with the message"$'\n'"${_stdout}" + fi + rm -f stdout + exit 9 + fi +else + if ! "${HOMEgfs}/sorc/link_workflow.sh" >& stdout; then + cat stdout + echo "link_workflow.sh failed!" + if [[ "${_set_email}" == true ]]; then + _stdout=$(cat stdout) + send_email "link_workflow.sh failed with the message"$'\n'"${_stdout}" + fi + rm -f stdout + exit 9 fi - rm -f stdout - exit 9 fi rm -f stdout @@ -522,7 +536,11 @@ for _case in "${_yaml_list[@]}"; do echo "${_case}" fi _pslot="${_case}${_tag}" - _create_exp_cmd="./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + if [[ "${_run_with_container}" == "true" ]]; then + _create_exp_cmd="../../bin/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + else + _create_exp_cmd="./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + fi if [[ "${_verbose}" == true ]]; then pslot=${_pslot} RUNTESTS=${_runtests} ${_create_exp_cmd} else diff --git a/dev/workflow/hosts.py b/dev/workflow/hosts.py index 793b8b877d9..d1875867c4f 100644 --- a/dev/workflow/hosts.py +++ b/dev/workflow/hosts.py @@ -15,12 +15,11 @@ class Host: Gather Host specific information. """ - SUPPORTED_HOSTS = ['HERA', 'ORION', 'HERCULES', 'WCOSS2', 'URSA', - 'GAEAC5', 'GAEAC6', 'AWSPW', 'AZUREPW', 'GOOGLEPW'] + SUPPORTED_HOSTS = ['HERA', 'ORION', 'HERCULES', 'WCOSS2', 'CONTAINER', + 'GAEAC5', 'GAEAC6', 'URSA', 'AWSPW', 'AZUREPW', 'GOOGLEPW'] - def __init__(self, host='URSA'): + def __init__(self, host=None): - print(f'host: {host}') if host is not None and host not in Host.SUPPORTED_HOSTS: raise NotImplementedError(f'{host} is not a supported host.\n' + 'Currently supported hosts are:\n' + @@ -44,11 +43,8 @@ def detect(self) -> None: # Detect the machine name and store in self.machine machine_id = os.getenv('MACHINE_ID', 'UNKNOWN') - machine_id = 'URSA' pw_csp = os.getenv('PW_CSP', 'UNKNOWN') - #container = os.getenv('SINGULARITY_NAME', None) - - print(f'machine_id: {machine_id}') + container = os.getenv('SINGULARITY_NAME', None) # Detect the machine since MACHINE_ID is set, # Additionaly, if PW_CSP is set, then the machine is a cloud machine @@ -57,12 +53,8 @@ def detect(self) -> None: self.machine = f"{pw_csp.upper()}PW" return - print("os.path.exists('/scratch3/NCEPDEV'): ", os.path.exists('/scratch3/NCEPDEV')) # Detect the machine since MACHINE_ID is not set - if os.path.exists('/scratch3/NCEPDEV'): - self.machine = 'URSA' - machine_id = 'URSA' - elif os.path.exists('/scratch1/NCEPDEV'): + if os.path.exists('/scratch1/NCEPDEV'): self.machine = 'HERA' machine_id = 'HERA' elif os.path.exists('/work/noaa'): @@ -73,19 +65,17 @@ def detect(self) -> None: self.machine = 'GAEAC5' elif os.path.exists('/gpfs/f6'): self.machine = 'GAEAC6' - #elif container is not None: - # self.machine = 'CONTAINER' + elif os.path.exists('/scratch3/NCEPDEV'): + self.machine = 'URSA' + machine_id = 'URSA' + elif container is not None: + self.machine = 'CONTAINER' elif pw_csp is not None: if pw_csp.lower() not in ['azure', 'aws', 'google']: raise ValueError( f'cloud service provider "{pw_csp}" is not supported.') self.machine = f"{pw_csp.upper()}PW" - print(f'self.machine: {self.machine}') - print('Host.SUPPORTED_HOSTS: ', Host.SUPPORTED_HOSTS) - self.machine = 'URSA' - machine_id = 'URSA' - if self.machine not in Host.SUPPORTED_HOSTS: raise NotImplementedError('This machine is not a supported host.\n' + 'Currently supported hosts are:\n' + diff --git a/dev/workflow/hosts/container.yaml b/dev/workflow/hosts/container.yaml index b30ab5d8ecc..b007a35c629 100644 --- a/dev/workflow/hosts/container.yaml +++ b/dev/workflow/hosts/container.yaml @@ -1,33 +1,28 @@ # Paths -DMPDIR: '' # TODO: This does not yet exist. -BASE_GIT: '' #TODO: This does not yet exist. -BASE_DATA: '/bucket/global-workflow-shared-data' -BASE_IC: '/bucket/global-workflow-shared-data/ICSDIR' -AERO_INPUTS_DIR: /contrib/global-workflow-shared-data/data/GEFS_ExtData/20250310 -PACKAGEROOT: '' #TODO: This does not yet exist. -HOMEDIR: '/contrib/${USER}' -STMP: '/lustre/${USER}/stmp/' -PTMP: '/lustre/${USER}/ptmp/' +DMPDIR: '/home/${USER}' +BASE_GIT: '' +PACKAGEROOT: '' +HOMEDIR: '/home/${USER}' +STMP: '/home/${USER}' +PTMP: '/home/${USER}' NOSCRUB: '${HOMEDIR}' -COMINsyn: '' #TODO: This does not yet exist. +COMINsyn: '' # BQS properties -SCHEDULER: slurm -QUEUE: batch -PARTITION_BATCH: compute -PARTITION_SERVICE: process -CHGRP_RSTPROD: 'YES' -CHGRP_CMD: 'chgrp rstprod' # TODO: This is not yet supported. +SCHEDULER: 'none' +QUEUE: '' +QUEUE_SERVICE: '' +PARTITION_BATCH: '' +PARTITION_SERVICE: '' +RESERVATION: '' +CLUSTERS: '' # HPSS properties -HPSS_PROJECT: emc-global #TODO: See `ATARDIR` below. +HPSS_PROJECT: emc-global ARCHCOM_TO: 'local' -ATARDIR: '' # TODO: This will not yet work from CONTAINER. +ATARDIR: '${NOSCRUB}/archive_rotdir/${PSLOT}' CHGRP_RSTPROD: 'YES' -CHGRP_CMD: 'chgrp rstprod' # TODO: This is not yet supported. +CHGRP_CMD: 'chgrp rstprod' # Features -SUPPORTED_RESOLUTIONS: ['C48', 'C96', 'C192', 'C384', 'C768'] # TODO: Test and support all cubed-sphere resolutions. +SUPPORTED_RESOLUTIONS: ['C96', 'C48'] DO_ARCHCOM: 'NO' -DO_TRACKER: 'NO' -DO_GENESIS: 'NO' -DO_METP: 'NO' MAKE_NSSTBUFR: 'NO' MAKE_ACFTBUFR: 'NO' diff --git a/dev/workflow/hosts/ursa.yaml b/dev/workflow/hosts/ursa.yaml index cda4c94d6c9..3c21f221a61 100644 --- a/dev/workflow/hosts/ursa.yaml +++ b/dev/workflow/hosts/ursa.yaml @@ -6,9 +6,10 @@ BASE_DATA: '/scratch3/NCEPDEV/global/role.glopara/data' BASE_IC: '/scratch4/NAGAPE/epic/Wei.Huang/data/ICSDIR' AERO_INPUTS_DIR: /scratch3/NCEPDEV/global/role.glopara/data/GEFS_ExtData/20250310 PACKAGEROOT: '/scratch3/NCEPDEV/global/role.glopara/nwpara' -HOMEDIR: '/scratch3/NCEPDEV/global/role.glopara${USER}' -STMP: '/scratch4/NAGAPE/epic/Wei.Huang/run/stmp2' -PTMP: '/scratch4/NAGAPE/epic/Wei.Huang/run/stmp2' +#HOMEDIR: '/scratch3/NCEPDEV/global/role.glopara${USER}' +HOMEDIR: '/scratch4/NAGAPE/epic/${USER}' +STMP: '/scratch4/NAGAPE/epic/${USER}/run/stmp2' +PTMP: '/scratch4/NAGAPE/epic/${USER}/run/stmp2' NOSCRUB: '${HOMEDIR}' COMINsyn: '/scratch3/NCEPDEV/global/role.glopara/com/gfs/prod/syndat' COMINecmwf: /scratch3/NCEPDEV/global/role.glopara/data/external_gempak/ecmwf diff --git a/dev/workflow/rocoto/tasks.py b/dev/workflow/rocoto/tasks.py index ce9c086af25..1844937a5b2 100644 --- a/dev/workflow/rocoto/tasks.py +++ b/dev/workflow/rocoto/tasks.py @@ -53,8 +53,7 @@ def __init__(self, app_config: AppConfig, run: str) -> None: # Save base in the internal state (never know where it may be needed) self._base = self._configs['base'] - #self.HOMEgfs = self._base['HOMEgfs'] - self.HOMEgfs = '/scratch4/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud' + self.HOMEgfs = self._base['HOMEgfs'] self.rotdir = self._base['ROTDIR'] self.pslot = self._base['PSLOT'] if self.run == "enkfgfs": diff --git a/dev/workflow/rocoto/workflow_xml.py b/dev/workflow/rocoto/workflow_xml.py index f5820a47ae6..e5e4d27282e 100644 --- a/dev/workflow/rocoto/workflow_xml.py +++ b/dev/workflow/rocoto/workflow_xml.py @@ -34,8 +34,7 @@ def __init__(self, app_config: AppConfig, rocoto_config: Dict) -> None: self.use_scrontab = self.host_info.get("USE_SCRONTAB", False) # Add ACCOUNT to host_info, with that from config.base self.host_info.ACCOUNT = self._base['ACCOUNT'] - #self.HOMEgfs = self._base['HOMEgfs'] - self.HOMEgfs = '/scratch4/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud' + self.HOMEgfs = self._base['HOMEgfs'] self.expdir = self._base['EXPDIR'] self.pslot = self._base['PSLOT'] @@ -158,11 +157,14 @@ def _write_crontab(self, crontab_file: str = None, cronint: int = 5) -> None: """ # No point creating a crontab if rocotorun is not available. - #rocotoruncmd = find_executable('rocotorun') - rocotoruncmd = '/apps/rocoto/1.3.7/bin/rocotorun' + rocotoruncmd = find_executable('rocotorun') if rocotoruncmd is None: - print('Failed to find rocotorun, crontab will not be created') - return + try: + rocotoruncmd = '/apps/rocoto/default/bin/rocotorun' + os.path.exists(rocotoruncmd) + except Exception as ee: + raise Exception("Failed to find rocotorun, crontab will not be created: ") from ee + return rocotorunstr = f'{rocotoruncmd} -d {self.expdir}/{self.pslot}.db -w {self.expdir}/{self.pslot}.xml' cronintstr = f'*/{cronint} * * * *' diff --git a/env/CONTAINER.env b/env/CONTAINER.env index eff2df8b931..ba01fcf0dd9 100755 --- a/env/CONTAINER.env +++ b/env/CONTAINER.env @@ -9,78 +9,24 @@ fi step=$1 -export launcher="srun -l --export=ALL" -export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" +export launcher="mpirun" +export mpmd_opt="--multi-prog" # Configure MPI environment +export MPI_BUFS_PER_PROC=2048 +export MPI_BUFS_PER_HOST=2048 +export MPI_GROUP_MAX=256 +export MPI_MEMMAP_OFF=1 +export MP_STDOUTMODE="ORDERED" +export KMP_AFFINITY=scatter export OMP_STACKSIZE=2048000 export NTHSTACK=1024000000 ulimit -s unlimited ulimit -a -# Calculate common variables -# Check first if the dependent variables are set -if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then - max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) - NTHREADSmax=${threads_per_task:-${max_threads_per_task}} - NTHREADS1=${threads_per_task:-1} - if [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]]; then - NTHREADSmax=${max_threads_per_task} - fi - if [[ ${NTHREADS1} -gt ${max_threads_per_task} ]]; then - NTHREADS1=${max_threads_per_task} - fi - APRUN_default="${launcher} -n ${ntasks}" -else - echo "ERROR config.resources must be sourced before sourcing CONTAINER.env" - exit 2 -fi - -if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then - - export POE="NO" - export BACK="NO" - export sys_tp="CONTAINER" - export launcher_PREP="srun" - -elif [[ "${step}" = "prepsnowobs" ]]; then - - export APRUN_CALCFIMS="${APRUN_default}" - -elif [[ "${step}" = "prep_emissions" ]]; then - - export APRUN="${APRUN_default}" - -elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then - - export USE_CFP="YES" - if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi - export wavempexec=${launcher} - export wave_mpmd=${mpmd_opt} - -elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then - - export launcher="srun --mpi=pmi2 -l" - - (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) - (( ufs_ntasks = nnodes*tasks_per_node )) - # With ESMF threading, the model wants to use the full node - export APRUN_UFS="${launcher} -n ${ufs_ntasks}" - unset nnodes ufs_ntasks - -elif [[ "${step}" = "post" ]]; then - - export NTHREADS_UPP=${NTHREADS1} - export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}" - -elif [[ "${step}" = "oceanice_products" ]]; then - - export NTHREADS_OCNICEPOST=${NTHREADS1} - export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" - -elif [[ "${step}" = "atmos_products" ]]; then - - export USE_CFP="YES" # Use MPMD for downstream product generation in CONTAINER +if [ "${step}" = "marineanlvar" ]; then + export NTHREADS_OCNANAL=1 + export APRUN_MARINEANLVAR="${launcher} -n 2" fi diff --git a/env/HERA.env b/env/HERA.env index d45eaf7e305..065f5dbc469 100755 --- a/env/HERA.env +++ b/env/HERA.env @@ -250,8 +250,6 @@ elif [[ "${step}" = "eupd" ]]; then elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then - export launcher="srun" - (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) # With ESMF threading, the model wants to use the full node diff --git a/gen-c48atm.sh b/gen-c48atm.sh new file mode 100755 index 00000000000..d0395b1dce6 --- /dev/null +++ b/gen-c48atm.sh @@ -0,0 +1,20 @@ +#!/bin/bash + + set -x + + HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud + rundir=/scratch4/NAGAPE/epic/Wei.Huang/run + mkdir -p ${rundir} + HPC_ACCOUNT=epic + + cd ${HOMEDIR}/dev/workflow + + RUNTESTS=${rundir} \ + ./generate_workflows.sh \ + -H ${HOMEDIR} \ + -y "C48_ATM" \ + -Y ${HOMEDIR}/dev/ci/cases/pr \ + -A ${HPC_ACCOUNT} \ + -e "Wei.Huang@noaa.gov" \ + -R -v + diff --git a/jobs/JGLOBAL_STAGE_IC b/jobs/JGLOBAL_STAGE_IC index 04240a27d06..e0cdcf5faf6 100755 --- a/jobs/JGLOBAL_STAGE_IC +++ b/jobs/JGLOBAL_STAGE_IC @@ -2,9 +2,13 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "stage_ic" -c "base stage_ic" -module purge # Execute staging -${HOMEgfs}/bin/run_python.sh ${SCRgfs}/exglobal_stage_ic.py +if [ "$RUN_WITH_CONTAINER" == "YES" ]; then + # module purge + ${HOMEgfs}/bin/run_python.sh ${SCRgfs}/exglobal_stage_ic.py +else + "${SCRgfs}/exglobal_stage_ic.py" +fi err=$? ############################################################### diff --git a/modulefiles/module_base.container.lua b/modulefiles/module_base.container.lua deleted file mode 100644 index 5a850c51d94..00000000000 --- a/modulefiles/module_base.container.lua +++ /dev/null @@ -1,53 +0,0 @@ -help([[ -Load environment to run GFS in container -]]) - -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/intel-oneapi-mpi/2021.9.0/intel/2021.10.0") -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core") - -load("gnu") -load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) -load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "None"))) -unload("gnu") - --- load(pathJoin("python", (os.getenv("python_ver") or "None"))) - -load(pathJoin("jasper", (os.getenv("jasper_ver") or "None"))) -load(pathJoin("libpng", (os.getenv("libpng_ver") or "None"))) --- load(pathJoin("cdo", (os.getenv("cdo_ver") or "None"))) --- load(pathJoin("R", (os.getenv("R_ver") or "None"))) - -load(pathJoin("hdf5", (os.getenv("hdf5_ver") or "None"))) -load(pathJoin("netcdf-c", (os.getenv("netcdf_c_ver") or "None"))) -load(pathJoin("netcdf-fortran", (os.getenv("netcdf_fortran_ver") or "None"))) - -load(pathJoin("nco", (os.getenv("nco_ver") or "None"))) -load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "None"))) -load(pathJoin("grib-util", (os.getenv("grib_util_ver") or "None"))) -load(pathJoin("g2tmpl", (os.getenv("g2tmpl_ver") or "None"))) --- load(pathJoin("gsi-ncdiag", (os.getenv("gsi_ncdiag_ver") or "None"))) -load(pathJoin("crtm", (os.getenv("crtm_ver") or "None"))) -load(pathJoin("bufr", (os.getenv("bufr_ver") or "None"))) -load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) -load(pathJoin("py-f90nml", (os.getenv("py_f90nml_ver") or "None"))) -load(pathJoin("py-netcdf4", (os.getenv("py_netcdf4_ver") or "None"))) -load(pathJoin("py-pyyaml", (os.getenv("py_pyyaml_ver") or "None"))) -load(pathJoin("py-jinja2", (os.getenv("py_jinja2_ver") or "None"))) -load(pathJoin("py-pandas", (os.getenv("py_pandas_ver") or "None"))) -load(pathJoin("py-python-dateutil", (os.getenv("py_python_dateutil_ver") or "None"))) ---load(pathJoin("met", (os.getenv("met_ver") or "None"))) ---load(pathJoin("metplus", (os.getenv("metplus_ver") or "None"))) -load(pathJoin("py-xarray", (os.getenv("py_xarray_ver") or "None"))) - -setenv("WGRIB2","wgrib2") -setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) - ---prepend_path("MODULEPATH", pathJoin("/scratch1/NCEPDEV/global/glopara/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) ---prepend_path("MODULEPATH", pathJoin("/scratch1/NCEPDEV/global/glopara/git/prepobs/feature-GFSv17_com_reorg_log_update/modulefiles")) ---load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) - ---prepend_path("MODULEPATH", pathJoin("/scratch1/NCEPDEV/global/glopara/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) ---load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) - -whatis("Description: GFS run environment") diff --git a/modulefiles/module_gwci.container.lua b/modulefiles/module_gwci.container.lua deleted file mode 100644 index 5fec1cd5dad..00000000000 --- a/modulefiles/module_gwci.container.lua +++ /dev/null @@ -1,15 +0,0 @@ -help([[ -Load environment to run GFS workflow setup scripts in container -]]) - -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") - -load(pathJoin("stack-intel", os.getenv("2021.10.0"))) -load(pathJoin("stack-intel-oneapi-mpi", os.getenv("2021.9.0"))) - -load(pathJoin("netcdf-c", os.getenv("4.9.2"))) -load(pathJoin("netcdf-fortran", os.getenv("4.6.1"))) -load(pathJoin("nccmp","1.9.0.1")) -load(pathJoin("wgrib2", "2.0.8")) - -whatis("Description: GFS run setup CI environment") diff --git a/modulefiles/module_gwsetup.container.lua b/modulefiles/module_gwsetup.container.lua deleted file mode 100644 index bb4882cfcb1..00000000000 --- a/modulefiles/module_gwsetup.container.lua +++ /dev/null @@ -1,21 +0,0 @@ -help([[ -Load environment to run GFS workflow setup scripts in container -]]) - ---load(pathJoin("rocoto")) - -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") - -local stack_intel_ver=os.getenv("stack_intel_ver") or "2021.10.0" -local stack_mpi_ver=os.getenv("stack_mpi_ver") or "2021.9.0" - -load("gnu") -load(pathJoin("stack-intel", stack_intel_ver)) -load(pathJoin("stack-intel-oneapi-mpi", stack_mpi_ver)) -unload("gnu") - -load("py-jinja2") -load("py-pyyaml") -load("py-numpy") - -whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_run.hera.lua b/modulefiles/module_run.hera.lua deleted file mode 100644 index 1914992ecef..00000000000 --- a/modulefiles/module_run.hera.lua +++ /dev/null @@ -1,27 +0,0 @@ -help([[ -Load environment to run GFS on Hera -]]) - -prepend_path("MODULEPATH", "/scratch1/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/gsi-addon-dev-fms-2024.01/install/modulefiles/Core") - --- load(pathJoin("hpss", (os.getenv("hpss_ver") or "None"))) - -load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "2021.5.0"))) -load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "2021.5.1"))) -load(pathJoin("python", (os.getenv("python_ver") or "3.11.6"))) -load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "2.1.1"))) -load(pathJoin("py-f90nml", (os.getenv("py_f90nml_ver") or "1.4.3"))) -load(pathJoin("py-netcdf4", (os.getenv("py_netcdf4_ver") or "1.5.8"))) -load(pathJoin("py-pyyaml", (os.getenv("py_pyyaml_ver") or "6.0"))) -load(pathJoin("py-jinja2", (os.getenv("py_jinja2_ver") or "3.1.2"))) -load(pathJoin("py-pandas", (os.getenv("py_pandas_ver") or "1.5.3"))) -load(pathJoin("py-numpy", (os.getenv("py_numpy_ver") or "1.23.4"))) -load(pathJoin("py-xarray", (os.getenv("py_xarray_ver") or "2023.7.0"))) -load(pathJoin("py-python-dateutil", (os.getenv("py_python_dateutil_ver") or "2.8.2"))) - -setenv("WGRIB2","wgrib2") -setenv("WGRIB","wgrib") --- setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) - -whatis("Description: GFS run host environment") - diff --git a/parm/ufs/fv3/global_control.nml.IN b/parm/ufs/fv3/global_control.nml.IN deleted file mode 120000 index d45620b2a7a..00000000000 --- a/parm/ufs/fv3/global_control.nml.IN +++ /dev/null @@ -1 +0,0 @@ -/scratch2/NAGAPE/epic/Wei.Huang/src/global-workflow-cloud/sorc/ufs_model.fd/tests/parm/global_control.nml.IN \ No newline at end of file diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 08971aa38ea..0cc4c9ebf56 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -1,11 +1,14 @@ #! /usr/bin/env bash -#source "${USHgfs}/load_fv3gfs_modules.sh" -#module load wgrib2/2.0.8 - # Programs used -#export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} -export WGRIB2="${HOMEgfs}/bin/run_wgrib2.sh" +if [ "$RUN_WITH_CONTAINER" == "NO" ]; then + source "${USHgfs}/load_fv3gfs_modules.sh" + module load wgrib2/2.0.8 + + export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} +else + export WGRIB2="${HOMEgfs}/bin/run_wgrib2.sh" +fi # Scripts used INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/interp_atmos_master.sh"} diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index 07f4c9c2784..3e9d6f0b0ac 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -4,19 +4,20 @@ HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." >/dev/null 2>&1 && pwd)" TRACE=NO source "${HOMEgfs}/ush/preamble.sh" +_run_with_container=false function usage() { cat <&2 fi - -export MACHINE_ID diff --git a/ush/forecast_postdet.sh b/ush/forecast_postdet.sh index eb26ca2a05c..a7918b587e1 100755 --- a/ush/forecast_postdet.sh +++ b/ush/forecast_postdet.sh @@ -277,8 +277,6 @@ EOF fi # warm_start == .true. #============================================================================ - make_nh=".true." - na_init=1 #============================================================================ if [[ "${QUILTING}" = ".true." ]] && [[ "${OUTPUT_GRID}" = "gaussian_grid" ]]; then diff --git a/ush/forecast_predet.sh b/ush/forecast_predet.sh index f6b6f780660..2daabb5e4f0 100755 --- a/ush/forecast_predet.sh +++ b/ush/forecast_predet.sh @@ -286,7 +286,6 @@ FV3_predet(){ # <0 means older adiabatic pre-conditioning na_init=${na_init:-1} -# local suite_file="${HOMEgfs}/dev/FV3-ccpp-suites/suite_${CCPP_SUITE}.xml" local suite_file="${HOMEgfs}/sorc/ufs_model.fd/FV3/ccpp/suites/suite_${CCPP_SUITE}.xml" if [[ ! -f "${suite_file}" ]]; then echo "FATAL ERROR: CCPP Suite file ${suite_file} does not exist, ABORT!" @@ -334,18 +333,18 @@ FV3_predet(){ IEMS=${IEMS:-1} fi - #if [[ "${TYPE}" == "nh" ]]; then # non-hydrostatic options + if [[ "${TYPE}" == "nh" ]]; then # non-hydrostatic options hydrostatic=".false." phys_hydrostatic=".false." # enable heating in hydrostatic balance in non-hydrostatic simulation use_hydro_pressure=".false." # use hydrostatic pressure for physics make_nh=".true." # running in non-hydrostatic mode pass_full_omega_to_physics_in_non_hydrostatic_mode=".true." - #else # hydrostatic options - # hydrostatic=".true." - # phys_hydrostatic=".false." # ignored when hydrostatic = T - # use_hydro_pressure=".false." # ignored when hydrostatic = T - # make_nh=".false." # running in hydrostatic mode - #fi + else # hydrostatic options + hydrostatic=".true." + phys_hydrostatic=".false." # ignored when hydrostatic = T + use_hydro_pressure=".false." # ignored when hydrostatic = T + make_nh=".false." # running in hydrostatic mode + fi # Conserve total energy as heat globally consv_te=${consv_te:-1.} # range 0.-1., 1. will restore energy to orig. val. before physics @@ -364,38 +363,38 @@ FV3_predet(){ if [[ "${MONO:0:4}" == "mono" ]]; then # monotonic options d_con=${d_con_mono:-"0."} do_vort_damp=".false." - #if [[ "${TYPE}" == "nh" ]]; then # monotonic and non-hydrostatic + if [[ "${TYPE}" == "nh" ]]; then # monotonic and non-hydrostatic hord_mt=${hord_mt_nh_mono:-"10"} hord_xx=${hord_xx_nh_mono:-"10"} hord_dp=${hord_xx_nh_mono:-"10"} - #else # monotonic and hydrostatic - # hord_mt=${hord_mt_hydro_mono:-"10"} - # hord_xx=${hord_xx_hydro_mono:-"10"} - # hord_dp=${hord_xx_hydro_mono:-"10"} - # kord_tm=${kord_tm_hydro_mono:-"-12"} - # kord_mt=${kord_mt_hydro_mono:-"12"} - # kord_wz=${kord_wz_hydro_mono:-"12"} - # kord_tr=${kord_tr_hydro_mono:-"12"} - #fi + else # monotonic and hydrostatic + hord_mt=${hord_mt_hydro_mono:-"10"} + hord_xx=${hord_xx_hydro_mono:-"10"} + hord_dp=${hord_xx_hydro_mono:-"10"} + kord_tm=${kord_tm_hydro_mono:-"-12"} + kord_mt=${kord_mt_hydro_mono:-"12"} + kord_wz=${kord_wz_hydro_mono:-"12"} + kord_tr=${kord_tr_hydro_mono:-"12"} + fi else # non-monotonic options d_con=${d_con_nonmono:-"1."} do_vort_damp=".true." - #if [[ "${TYPE}" == "nh" ]]; then # non-monotonic and non-hydrostatic + if [[ "${TYPE}" == "nh" ]]; then # non-monotonic and non-hydrostatic hord_mt=${hord_mt_nh_nonmono:-"5"} hord_xx=${hord_xx_nh_nonmono:-"5"} hord_dp=${hord_dp_nh_nonmono:-"-5"} - #else # non-monotonic and hydrostatic - # hord_mt=${hord_mt_hydro_nonmono:-"10"} - # hord_xx=${hord_xx_hydro_nonmono:-"10"} - # hord_dp=${hord_xx_hydro_nonmono:-"10"} - #fi + else # non-monotonic and hydrostatic + hord_mt=${hord_mt_hydro_nonmono:-"10"} + hord_xx=${hord_xx_hydro_nonmono:-"10"} + hord_dp=${hord_xx_hydro_nonmono:-"10"} + fi fi - #if [[ "${MONO:0:4}" != "mono" && "${TYPE}" == "nh" ]]; then + if [[ "${MONO:0:4}" != "mono" && "${TYPE}" == "nh" ]]; then vtdm4=${vtdm4_nh_nonmono:-"0.06"} - #else - # vtdm4=${vtdm4:-"0.05"} - #fi + else + vtdm4=${vtdm4:-"0.05"} + fi # Initial conditions are chgres-ed from GFS analysis file nggps_ic=${nggps_ic:-".true."} diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index 23aa861654c..9f7095c6e45 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -43,8 +43,10 @@ _calling_script="${BASH_SOURCE[1]}" source "${HOMEgfs}/ush/preamble.sh" -export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH -export LD_LIBRARY_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2023.2.0-uov33rpz3lplh3hh3v5c6vssbc7ndxuk/lib +if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then + export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH + #export LD_LIBRARY_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2023.2.0-uov33rpz3lplh3hh3v5c6vssbc7ndxuk/lib +fi OPTIND=1 while getopts "c:e:" option; do diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh index 787758836bb..b107d0fc00d 100755 --- a/ush/load_fv3gfs_modules.sh +++ b/ush/load_fv3gfs_modules.sh @@ -11,31 +11,37 @@ if [[ "${DEBUG_WORKFLOW:-NO}" == "NO" ]]; then echo "Loading modules quietly..." set +x fi -set -x # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) -# Find module command and purge: -#source "${HOMEgfs}/ush/detect_machine.sh" -#source "${HOMEgfs}/ush/module-setup.sh" +if [[ "$RUN_WITH_CONTAINER" == "NO" ]]; then + # Find module command and purge: + source "${HOMEgfs}/ush/detect_machine.sh" + source "${HOMEgfs}/ush/module-setup.sh" -# Source versions file for runtime -#source "${HOMEgfs}/versions/run.ver" + # Source versions file for runtime + source "${HOMEgfs}/versions/run.ver" -# Load our modules: -#module use "${HOMEgfs}/modulefiles" + # Load our modules: + module use "${HOMEgfs}/modulefiles" -#case "${MACHINE_ID}" in -# "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud" | "ursa") -# module load "module_base.${MACHINE_ID}" -# ;; -# *) -# echo "WARNING: UNKNOWN PLATFORM" -# ;; -#esac + case "${MACHINE_ID}" in + "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud" | "ursa") + module load "module_base.${MACHINE_ID}" + ;; + *) + echo "WARNING: UNKNOWN PLATFORM" + ;; + esac -#module list + module list + + # Add wxflow to PYTHONPATH + wxflowPATH="${HOMEgfs}/ush/python" + PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" + export PYTHONPATH +fi # If this function exists in the environment, run it; else set -x if it was set on entering this script ftype=$(type -t set_trace || echo "") @@ -45,11 +51,6 @@ elif [[ "${set_x}" == "YES" ]]; then set -x fi -# Add wxflow to PYTHONPATH -#wxflowPATH="${HOMEgfs}/ush/python" -#PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" -#export PYTHONPATH - # Restore stack soft limit: ulimit -S -s "${ulimit_s}" unset ulimit_s diff --git a/ush/module-setup.sh b/ush/module-setup.sh index b4893c0bb50..01845c1ea78 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -88,11 +88,8 @@ elif [[ ${MACHINE_ID} = discover* ]]; then elif [[ $MACHINE_ID = container ]] ; then # We are in a container - #if ( ! eval module help > /dev/null 2>&1 ) ; then - source /usr/lmod/lmod/init/bash - #fi + source /usr/lmod/lmod/init/bash module purge - unset MODULEPATH # TODO: This can likely be made more general once other cloud # platforms come online. diff --git a/ush/parsing_ufs_configure.sh b/ush/parsing_ufs_configure.sh index d131b0cb0e0..75b2cae2264 100755 --- a/ush/parsing_ufs_configure.sh +++ b/ush/parsing_ufs_configure.sh @@ -126,7 +126,6 @@ echo "Rendered ufs.configure:" cat ufs.configure cpreq "${HOMEgfs}/sorc/ufs_model.fd/tests/parm/fd_ufs.yaml" fd_ufs.yaml -#cpreq "${HOMEgfs}/parm/ufs/fd_ufs.yaml" fd_ufs.yaml echo "SUB ${FUNCNAME[0]}: ufs.configure ends" diff --git a/versions/build.container.ver b/versions/build.container.ver deleted file mode 100644 index c9bc7c925f8..00000000000 --- a/versions/build.container.ver +++ /dev/null @@ -1,5 +0,0 @@ -export stack_intel_ver=2021.10.0 -export stack_impi_ver=2021.9.0 - -source "${HOMEgfs:-}/versions/spack.ver" -export spack_mod_path="/opt/spack-stack/spack-stack-${spack_stack_ver}/envs/unified-env/install/modulefiles/Core" diff --git a/versions/run.container.ver b/versions/run.container.ver deleted file mode 100644 index 391de348489..00000000000 --- a/versions/run.container.ver +++ /dev/null @@ -1,7 +0,0 @@ -export stack_intel_ver=2021.10.0 -export stack_impi_ver=2021.9.0 - -source "${HOMEgfs:-}/versions/spack.ver" -export spack_mod_path="/opt/spack-stack/spack-stack-${spack_stack_ver}/envs/unified-env/install/modulefiles/Core" - -export cdo_ver=2.2.0 diff --git a/versions/spack.ver b/versions/spack.ver index 301cb65e107..a212307a372 100644 --- a/versions/spack.ver +++ b/versions/spack.ver @@ -18,7 +18,7 @@ export bacio_ver=2.4.1 export nemsio_ver=2.5.4 export sigio_ver=2.3.2 export w3emc_ver=2.10.0 -export bufr_ver=12.0.1 +export bufr_ver=11.7.0 export g2_ver=3.4.5 export sp_ver=2.5.0 export ip_ver=4.3.0 From 81ec75f718de64f9f997df26cb0c738786b348a2 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 16 Jun 2025 21:19:16 +0000 Subject: [PATCH 059/134] sync --- dev/parm/config/gfs/config.base.j2 | 3 - dev/ush/fetch-fix-data.py | 272 ----------------------------- scripts/exglobal_atmos_products.sh | 4 +- 3 files changed, 2 insertions(+), 277 deletions(-) diff --git a/dev/parm/config/gfs/config.base.j2 b/dev/parm/config/gfs/config.base.j2 index 2c4a9762b76..7394ed3af59 100644 --- a/dev/parm/config/gfs/config.base.j2 +++ b/dev/parm/config/gfs/config.base.j2 @@ -8,9 +8,6 @@ echo "BEGIN: config.base" # Machine environment export machine="{{ MACHINE }}" -echo "%0 step: ${step}" -echo "%0 machine: ${machine}" - # EMC parallel or NCO production export RUN_ENVIR="emc" diff --git a/dev/ush/fetch-fix-data.py b/dev/ush/fetch-fix-data.py index d16d2db79b5..3dcbc54466e 100644 --- a/dev/ush/fetch-fix-data.py +++ b/dev/ush/fetch-fix-data.py @@ -1,31 +1,16 @@ -<<<<<<< HEAD -#!/usr/bin/env python -# cfetch-fix-data.py -======= #!/usr/bin/env python3 # fetch-fix-data.py ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 # wei.huang@noaa.gov # 2025-02-26 # script to download a subset of FIX data to local machines. import os -<<<<<<< HEAD -import time -import sys -import getopt -======= ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 import argparse import subprocess from pathlib import Path import logging # Create and configure logger -<<<<<<< HEAD -logging.basicConfig(filename="cfetch-fix-data.log", -======= logging.basicConfig(filename="fetch-fix-data.log", ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 format='%(asctime)s %(message)s', filemode='w') @@ -39,12 +24,6 @@ class FetchFIXdata(): -<<<<<<< HEAD - - def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], - fix_bucket=None, localdir=None, verbose=0): - -======= """Fetch a subset of FIX data from NOAA s3 bucket. """ @@ -70,7 +49,6 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], ------- None """ ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 # self.aws_fix_bucket = f's3://noaa-nws-global-pds/fix' self.aws_fix_bucket = fix_bucket self.aws_cp = f'aws --no-sign-request s3 cp' @@ -79,17 +57,6 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], self.atmgridarray = atmgridarray self.ocngridarray = ocngridarray self.localdir = localdir -<<<<<<< HEAD - self.verbose = verbose - - if (os.path.isdir(localdir)): - logger.info(f'Prepare to download FIX data for {atmgrid} and {ocngrid} to {localdir}') - else: - logger.info(f'local dir: <{localdir}> does not exist. Stop') - sys.exit(-1) - - self.verdict = {} -======= self.fix_ver = fix_ver self.verbose = verbose @@ -110,7 +77,6 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], logger.error(f'File fix_ver: <{fix_ver}> does not exist. Stop') raise SystemExit ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 self.s3dict = {} self.s3dict['raworog'] = f'raw/orog' @@ -119,13 +85,6 @@ def __init__(self, atmgridarray=['C48'], ocngridarray=['500'], else: self.targetdir = self.localdir -<<<<<<< HEAD - # -------------------------------------------------------------------------- - def update_s3dict(self): - - self.update_s3dick_grid_independent() - self.add_grid_data() -======= self.get_fix_ver_dict() self.create_s3dict() @@ -158,100 +117,11 @@ def create_s3dict(self): self.add_cpl2s3dict(s3key, val) else: self.s3dict[s3key] = f'{s3key}/{val}' ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 if (self.verbose): self.printinfo() # -------------------------------------------------------------------------- -<<<<<<< HEAD - def update_s3dick_grid_independent(self): - - for key in self.fix_ver_dict.keys(): - val = self.fix_ver_dict[key] - if (key == 'aer_ver'): - self.s3dict['aer'] = f'aer/{val}' - elif (key == 'am_ver'): - self.s3dict['am'] = f'am/{val}' - elif (key == 'chem_ver'): - self.s3dict['fimdata_chem'] = f'chem/{val}/fimdata_chem' - self.s3dict['Emission_data'] = f'chem/{val}/Emission_data' - elif (key == 'datm_ver'): - self.s3dict['cfsr'] = f'datm/{val}/cfsr' - self.s3dict['gefs'] = f'datm/{val}/gefs' - self.s3dict['gfs'] = f'datm/{val}/gfs' - self.s3dict['mom6'] = f'datm/{val}/mom6' - elif (key == 'glwu_ver'): - self.s3dict['glwu'] = f'glwu/{val}' - elif (key == 'gsi_ver'): - self.s3dict['gsi'] = f'gsi/{val}' - elif (key == 'lut_ver'): - self.s3dict['lut'] = f'lut/{val}' - elif (key == 'mom6_ver'): - self.s3dict['mom6post'] = f'mom6/{val}/post' - elif (key == 'reg2grb2_ver'): - self.s3dict['reg2grb2'] = f'reg2grb2/{val}' - elif (key == 'sfc_climb_ver'): - self.s3dict['sfc_climo'] = f'sfc_climo/{val}' - elif (key == 'verif_ver'): - self.s3dict['verif'] = f'verif/{val}' - elif (key == 'wave_ver'): - self.s3dict['wave'] = f'wave/{val}' - - # -------------------------------------------------------------------------- - def add_grid_data(self): - - for key in self.fix_ver_dict.keys(): - val = self.fix_ver_dict[key] - if (key == 'orog_ver'): - self.add_atmgrid2s3dict('orog', key, val) - elif (key == 'ugwd_ver'): - self.add_atmgrid2s3dict('ugwd', key, val) - elif (key == 'mom6_ver'): - self.add_ocngrid2s3dict('mom6', key, val) - elif (key == 'cice_ver'): - self.add_ocngrid2s3dict('cice', key, val) - elif (key == 'cpl_ver'): - self.add_cpl2s3dict('cpl', key, val) - - # -------------------------------------------------------------------------- - def add_atmgrid2s3dict(self, varname, key, val): - - for atmgrid in self.atmgridarray: - newkey = f'{key}_{atmgrid}' - self.s3dict[newkey] = f'{varname}/{val}/{atmgrid}' - - # ------------------------------------------------------------------------- - def add_ocngrid2s3dict(self, varname, key, val): - - for ocngrid in self.ocngridarray: - newkey = f'{key}_{atmgrid}' - self.s3dict[newkey] = f'{varname}/{val}/{ocngrid}' - - # ------------------------------------------------------------------------- - def add_cpl2s3dict(self, varname, key, val): - - for atmgrid in self.atmgridarray: - for ocngrid in self.ocngridarray: - newkey = f'{key}_a{atmgrid}o{ocngrid}' - self.s3dict[newkey] = f'{varname}/{val}/a{atmgrid}o{ocngrid}' - - # ------------------------------------------------------------------------- - def printinfo(self): - - logger.info(f'Preparing to fetch') - logger.info(f'ATM grid: {self.atmgridarray}') - logger.info(f'ONC grid: {self.ocngridarray}') - logger.info(f'From: {self.aws_fix_bucket}') - logger.info(f'To: {self.targetdir}') - for key in self.s3dict.keys(): - val = self.s3dict[key] - logger.info(f'{key}: {val}') - - # ------------------------------------------------------------------------- - def fetchdata(self): - -======= def add_atmgrid2s3dict(self, key, val): """ Add ATM grid data to dict. @@ -305,7 +175,6 @@ def printinfo(self): def fetchdata(self): """Fetch data defined in s3bucket. """ ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 if (self.verbose): logger.info(f'Create local fix dir: {self.targetdir}') @@ -315,23 +184,6 @@ def fetchdata(self): self.fetch_ugwp_limb_tau() for key in self.s3dict.keys(): -<<<<<<< HEAD - self.fetch_dir(self.s3dict[key]) - - # ------------------------------------------------------------------------- - def fetch_dir(self, dir): - - remotedir = f'{self.aws_fix_bucket}/{dir}' - localdir = f'{self.targetdir}/{dir}' - cmd = f'{self.aws_sync} {remotedir} {localdir}' - self.download_dir(cmd, localdir) - - # -------------------------------------------------------------------------- - def download_dir(self, cmd, localdir): - - # returned_value = os.system(cmd) # returns the exit code in unix - # logger.info('returned value:', returned_value) -======= self.download_dir(self.s3dict[key]) # -------------------------------------------------------------------------- @@ -345,7 +197,6 @@ def download_dir(self, dir): # returned_value = os.system(cmd) # returns the exit code in unix # if (self.verbose): # logger.info(f'returned value: {returned_value}') ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 if (os.path.isdir(localdir)): logger.info(f'{localdir} already exist. skip') @@ -360,20 +211,12 @@ def download_dir(self, dir): logger.info(f'Downloading {localdir}') returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): -<<<<<<< HEAD - logger.info('returned value:', returned_value) - - # -------------------------------------------------------------------------- - def fetch_ugwp_limb_tau(self): - -======= logger.info(f'returned value: {returned_value}') # -------------------------------------------------------------------------- def fetch_ugwp_limb_tau(self): """download ugwp_limb_tau.nc """ ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 ugwd_ver = self.fix_ver_dict['ugwd_ver'] ugwp_limb_tau_remotepath = f'{self.aws_fix_bucket}/ugwd/{ugwd_ver}/ugwp_limb_tau.nc' ugwp_limb_tau_localdir = f'{self.targetdir}/ugwd/{ugwd_ver}' @@ -381,20 +224,10 @@ def fetch_ugwp_limb_tau(self): path = Path(ugwp_limb_tau_localdir) path.mkdir(parents=True, exist_ok=True) cmd = f'{self.aws_cp} {ugwp_limb_tau_remotepath} {filename}' -<<<<<<< HEAD - self.download_file(cmd, filename) - - # ------------------------------------------------------------------------- - def download_file(self, cmd, filename): - - # returned_value = os.system(cmd) # returns the exit code in unix - # logger.info('returned value:', returned_value) -======= # returned_value = os.system(cmd) # returns the exit code in unix # if (self.verbose): # logger.info(f'returned value: {returned_value}') ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 if (os.path.isfile(filename)): logger.info(f'{filename} already exist. skip') @@ -404,46 +237,6 @@ def download_file(self, cmd, filename): logger.info(f'Downloading {filename}') returned_value = subprocess.call(cmd, shell=True) # returns the exit code in unix if (self.verbose): -<<<<<<< HEAD - logger.info('returned value:', returned_value) - - # -------------------------------------------------------------------------- - def set_fix_ver_from_gwhome(self, gwhome, verdict): - - fix_ver_file = f'{gwhome}/versions/fix.ver' - self.fix_ver_dict = verdict - if (os.path.isfile(fix_ver_file)): - with open(fix_ver_file, "r") as file: - for line in file.readlines(): - if (line.find('export ') >= 0): - headstr, _, value = line.strip().partition('=') - exphead, _, key = headstr.partition(' ') - self.fix_ver_dict[key] = value - else: - logger.info(f'fix_ver_file: {fix_ver_file}s does not exist.') - - # ------------------------------------------------------------------------ - def set_default_fix_ver(self, verdict): - - self.fix_ver_dict = verdict - -# ----------------------------------------------------------------------------- - - -def namespace_to_dict(namespace): - return { - k: namespace_to_dict(v) if isinstance(v, argparse.Namespace) else v - for k, v in vars(namespace).items() - } - -# ------------------------------------------------------------------------------ - - -if __name__ == '__main__': - - atmgridlist = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] - ocngridlist = ['500', '100', '050', '025'] -======= logger.info(f'returned value: {returned_value}') # -------------------------------------------------------------------------- @@ -470,39 +263,10 @@ def main() -> None: # define available ATM and OCN grids. ATMGRIDLIST = ['C48', 'C96', 'C192', 'C384', 'C768', 'C1152'] OCNGRIDLIST = ['500', '100', '050', '025'] ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") -<<<<<<< HEAD - parser.add_argument("-a", "--atmgrid", type=str, required=True, - help="ATM grid, like: C48, C96, C192, C384, C768, C1152") - parser.add_argument("-o", "--ocngrid", type=str, required=True, - help="OCN grid, like: 500, 100, 050, 025") - parser.add_argument("--localdir", type=str, required=True, - help="local directory to store FIX data subset") - parser.add_argument("--gwhome", type=str, default='unknown', - help="GW home diretory where can find fix.ver") - parser.add_argument("--fix_bucket", type=str, default='s3://noaa-nws-global-pds/fix', - help="S3 Bucket directory of FIX data") - parser.add_argument("--aer_ver", type=str, default='20220805', help="AER version") - parser.add_argument("--am_ver", type=str, default='20220805', help="AM version") - parser.add_argument("--chem_ver", type=str, default='20220805', help="chem version") - parser.add_argument("--cice_ver", type=str, default='20240416', help="cice version") - parser.add_argument("--cpl_ver", type=str, default='20230526', help="cpl version") - parser.add_argument("--datm_ver", type=str, default='20220805', help="datm version") - parser.add_argument("--glwu_ver", type=str, default='20220805', help="glwu version") - parser.add_argument("--gsi_ver", type=str, default='20240208', help="gsi version") - parser.add_argument("--lut_ver", type=str, default='20220805', help="lut version") - parser.add_argument("--mom6_ver", type=str, default='20240416', help="mom6 version") - parser.add_argument("--orog_ver", type=str, default='20231027', help="orog version") - parser.add_argument("--reg2grb2_ver", type=str, default='20220805', help="reg2grb2 version") - parser.add_argument("--sfc_climo_ver", type=str, default='20220805', help="sfc_climo version") - parser.add_argument("--ugwd_ver", type=str, default='20220805', help="ugwd version") - parser.add_argument("--verif_ver", type=str, default='20220805', help="verif version") - parser.add_argument("--wave_ver", type=str, default='20220805', help="wave version") -======= parser.add_argument("-d", "--localdir", type=str, required=True, help="local directory to store FIX data subset") parser.add_argument("-f", "--fix_ver", type=str, required=True, @@ -517,20 +281,14 @@ def main() -> None: parser.add_argument("-o", "--ocngrid", type=str, required=False, default="100", help="OCN grid, like: 500,100,050,025, default: 100") ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 args = parser.parse_args() if args.verbose: logger.info(f"the atmgrid is {args.atmgrid}") -<<<<<<< HEAD - else: - logger.info(f"the atmgrid is {args.atmgrid}") -======= logger.info(f"the ocngrid is {args.ocngrid}") logger.info(f"the localdir is {args.localdir}") logger.info(f"the fix_file is {args.fix_ver}") logger.info(f"the s3 bucket is {args.fix_bucket}") ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 atmgrid = args.atmgrid if (atmgrid.find(',') > 0): @@ -539,17 +297,10 @@ def main() -> None: atmgridarray = [atmgrid] for grid in atmgridarray: -<<<<<<< HEAD - if (grid not in atmgridlist): - logger.info(f'atmgrid: {grid}') - logger.info(f'is not in supported grids: {atmgridlist}') - sys.exit(-1) -======= if (grid not in ATMGRIDLIST): logger.error(f'atmgrid: {grid}') logger.error(f'is not in supported grids: {ATMGRIDLIST}') raise SystemExit ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 ocngrid = args.ocngrid if (ocngrid.find(',') > 0): @@ -558,28 +309,6 @@ def main() -> None: ocngridarray = [ocngrid] for grid in ocngridarray: -<<<<<<< HEAD - if (grid not in ocngridlist): - logger.info(f'ocngrid: {grid}') - logger.info(f'is not in supported grids: {ocngridlist}') - sys.exit(-1) - - verdict = namespace_to_dict(args) - - # ------------------------------------------------------------------ - ffd = FetchFIXdata(atmgridarray=atmgridarray, - ocngridarray=ocngridarray, - fix_bucket=args.fix_bucket, - localdir=args.localdir, verbose=args.verbose) - - if (args.gwhome is None): - ffd.set_default_fix_ver(verdict) - else: - ffd.set_fix_ver_from_gwhome(args.gwhome, verdict) - - ffd.update_s3dict() - ffd.fetchdata() -======= if (grid not in OCNGRIDLIST): logger.error(f'ocngrid: {grid}') logger.error(f'is not in supported grids: {OCNGRIDLIST}') @@ -596,4 +325,3 @@ def main() -> None: # ------------------------------------------------------------------------------ if __name__ == '__main__': main() ->>>>>>> 7fa952e0638f62539db47c96d37f64335c68e234 diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 0a5f77799f9..a723b06a6e7 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -1,7 +1,7 @@ #! /usr/bin/env bash -source "${USHgfs}/load_fv3gfs_modules.sh" -module load wgrib2/2.0.8 +#source "${USHgfs}/load_fv3gfs_modules.sh" +#module load wgrib2/2.0.8 # Programs used #export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} From 11bce6df58dd31ae8352f2a5696a050cb8722c11 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 20 Jun 2025 18:02:34 +0000 Subject: [PATCH 060/134] create a directory dev/container to hold container code --- bin/run_python.sh | 15 ---- bin/run_wgrib2.sh | 18 ---- c48atm.sh | 2 +- dev/container/create-container-links.sh | 27 ++++++ dev/container/exec.python | 10 +++ dev/container/exec.wgrib2 | 9 ++ dev/container/gen-wrapper.sh | 66 ++++++++++++++ dev/container/link_gfs_utils.sh | 85 +++++++++++++++++++ dev/container/link_model.sh | 85 +++++++++++++++++++ dev/container/link_ufs_utils.sh | 81 ++++++++++++++++++ dev/container/link_ww3.sh | 80 +++++++++++++++++ ush/run_python.sh => dev/container/ush.python | 11 +-- ush/run_wgrib2.sh => dev/container/ush.wgrib2 | 6 +- dev/workflow/generate_workflows.sh | 2 +- dev/workflow/hosts/ursa.yaml | 3 +- gen-c48atm.sh | 5 ++ gen-c48s2sw.sh | 26 ++++++ jobs/JGLOBAL_OCEANICE_PRODUCTS | 7 +- jobs/JGLOBAL_STAGE_IC | 4 +- modulefiles/module_gwsetup.container.lua | 21 +++++ scripts/exglobal_atmos_products.sh | 3 +- sorc/link_workflow.sh | 2 +- ush/jjob_header.sh | 15 +++- ush/load_fv3gfs_modules.sh | 11 +-- ush/load_ufswm_modules.sh | 50 +++++++---- ush/run_gfs_model.sh | 19 ----- 26 files changed, 567 insertions(+), 96 deletions(-) delete mode 100755 bin/run_python.sh delete mode 100755 bin/run_wgrib2.sh create mode 100755 dev/container/create-container-links.sh create mode 100755 dev/container/exec.python create mode 100755 dev/container/exec.wgrib2 create mode 100755 dev/container/gen-wrapper.sh create mode 100755 dev/container/link_gfs_utils.sh create mode 100755 dev/container/link_model.sh create mode 100755 dev/container/link_ufs_utils.sh create mode 100755 dev/container/link_ww3.sh rename ush/run_python.sh => dev/container/ush.python (57%) rename ush/run_wgrib2.sh => dev/container/ush.wgrib2 (57%) create mode 100755 gen-c48s2sw.sh create mode 100644 modulefiles/module_gwsetup.container.lua delete mode 100755 ush/run_gfs_model.sh diff --git a/bin/run_python.sh b/bin/run_python.sh deleted file mode 100755 index e77230f4fae..00000000000 --- a/bin/run_python.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - containerdir=/scratch4/NAGAPE/epic/Wei.Huang/demo - img=${containerdir}/ubuntu22.04-intel-ufs-env-v1.6.0.img - HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud - cmd=${HOMEgfs}/ush/run_python.sh - arg="$@" - -#wxflowPATH="${HOMEgfs}/ush/python" -#export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" - - singularity exec \ - -B /scratch3 \ - -B /scratch4 \ - ${img} $cmd $arg - diff --git a/bin/run_wgrib2.sh b/bin/run_wgrib2.sh deleted file mode 100755 index 3b6171c3e8d..00000000000 --- a/bin/run_wgrib2.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - containerdir=/scratch4/NAGAPE/epic/Wei.Huang/demo - img=${containerdir}/ubuntu22.04-intel-ufs-env-v1.6.0.img - HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud - cmd=${HOMEgfs}/ush/run_wgrib2.sh - -#source /usr/lmod/lmod/init/bash -#module purge -#source ${HOMEgfs}/versions/run.ver -#module use ${HOMEgfs}/modulefiles -#module load module_base.container - - arg="$@" - singularity exec \ - -B /scratch3 \ - -B /scratch4 \ - ${img} $cmd $arg - diff --git a/c48atm.sh b/c48atm.sh index ab58004056c..a85a3a066e1 100755 --- a/c48atm.sh +++ b/c48atm.sh @@ -13,6 +13,6 @@ pslot=c48atm \ RUNTESTS=${rundir} \ RUNDIR=/scratch4/NAGAPE/epic/Wei.Huang \ - bin/run_python.sh dev/workflow/create_experiment.py \ + exec/run_python.sh dev/workflow/create_experiment.py \ --yaml dev/ci/cases/pr/C48_ATM.yaml diff --git a/dev/container/create-container-links.sh b/dev/container/create-container-links.sh new file mode 100755 index 00000000000..f556993c165 --- /dev/null +++ b/dev/container/create-container-links.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud +container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img +verbose=true + +echo "HOMEgfs: $HOMEgfs" +echo "container: $container" +echo "Verbose: $verbose" + +source ${HOMEgfs}/ush/detect_machine.sh + +bindings="-B /scratch3 -B /scratch4" +if [[ ${MACHINE_ID} = ursa* ]] ; then + # We are on NOAA Ursa + bindings="-B /scratch3 -B /scratch4" +fi + +#${HOMEgfs}/dev/container/gen-wrapper.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -v + +${HOMEgfs}/dev/container/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -v + +${HOMEgfs}/dev/container/link_model.sh -H ${HOMEgfs} -c ${container} -m gfs_model -b "${bindings}" -v + +${HOMEgfs}/dev/container/link_gfs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -v +${HOMEgfs}/dev/container/link_ufs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -v + diff --git a/dev/container/exec.python b/dev/container/exec.python new file mode 100755 index 00000000000..e18f6a613d8 --- /dev/null +++ b/dev/container/exec.python @@ -0,0 +1,10 @@ +#!/bin/bash + arg="$@" + + export LD_LIBRARY_PATH=$(dirname SIF) + + singularity exec \ + BINDINGS \ + SIF \ + HOMEgfs/ush/container/run_python.sh $arg + diff --git a/dev/container/exec.wgrib2 b/dev/container/exec.wgrib2 new file mode 100755 index 00000000000..42d0270278b --- /dev/null +++ b/dev/container/exec.wgrib2 @@ -0,0 +1,9 @@ +#!/bin/bash + export LD_LIBRARY_PATH=$(dirname SIF) + arg="$@" + + singularity exec \ + BINDINGS \ + SIF \ + HOMEgfs/ush/container/run_wgrib2.sh $arg + diff --git a/dev/container/gen-wrapper.sh b/dev/container/gen-wrapper.sh new file mode 100755 index 00000000000..66acfc3ff31 --- /dev/null +++ b/dev/container/gen-wrapper.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +verbose=false +bindings="-B /scratch3 -B /scratch4" + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -b|--bindings) + bindings="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container ]]; then + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings "-B dirname [-B dirname1 [...]]" [-v]" + exit -1 +fi + +echo "HOMEgfs: $HOMEgfs" +echo "container: $container" +echo "bindings: $bindings" +echo "Verbose: $verbose" + +if [[ "$verbose" == "true" ]]; then + set -x +fi + +for dnm in exec ush +do + if [[ "$dnm" == "exec" ]]; then + targetdir=${HOMEgfs}/${dnm} + else + targetdir=${HOMEgfs}/${dnm}/container + fi + mkdir -p ${targetdir} + for fnm in python wgrib2 + do + sourcef=${HOMEgfs}/dev/container/${dnm}.${fnm} + targetf=${targetdir}/run_${fnm}.sh + + sed -e "s?HOMEgfs?${HOMEgfs}?g" \ + -e "s?SIF?${container}?g" \ + -e "s?BINDINGS?${bindings}?g" \ + ${sourcef} > ${targetf} + + chmod 755 ${targetf} + done +done + diff --git a/dev/container/link_gfs_utils.sh b/dev/container/link_gfs_utils.sh new file mode 100755 index 00000000000..b971f4ed254 --- /dev/null +++ b/dev/container/link_gfs_utils.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +verbose=false + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -b|--binding) + binding="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container ]]; then + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings -B dirname [-B dirname1 [...]] [-v]" + exit -1 +fi + +echo "HOMEgfs: $HOMEgfs" +echo "container: $container" +echo "bindings: $bindings" +echo "Verbose: $verbose" + +if [[ "$verbose" == "true" ]]; then + set -x +fi + +for nm in enkf_chgres_recenter_nc ensadd ensppf ensstat fbwndgfs \ + gaussian_sfcanl gefs_6h_ave_1mem gfs_bufr \ + mkgfsawps ocnicepost overgridid reg2grb2 supvit \ + syndat_getjtbul syndat_maksynrc syndat_qctropcy \ + tave tocsbufr vint wave_stat webtitle rdbfmsua +do + model=${nm} + echo "model: $model" + + run_model_script=${HOMEgfs}/ush/container/run_${model}.sh + rm -f ${run_model_script} + + cat > $run_model_script << EOF_MODEL +#!/bin/bash + +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles +module load gfsutils_container.intel + +arg="\$@" +${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${model} \$arg +EOF_MODEL + + chmod 755 $run_model_script + + #link_model_script=${HOMEgfs}/exec/${model} + #rm -f ${link_model_script} + + link_model_script=${HOMEgfs}/exec/${model}.x + rm -f ${link_model_script} + + cat > $link_model_script << EOF_LINK +#!/bin/bash + export LD_LIBRARY_PATH=$(dirname $container) + arg="\$@" + singularity exec ${bindings} ${container} ${run_model_script} \$arg +EOF_LINK + + chmod 755 $link_model_script +done + diff --git a/dev/container/link_model.sh b/dev/container/link_model.sh new file mode 100755 index 00000000000..1707298a2eb --- /dev/null +++ b/dev/container/link_model.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +verbose=false +bindings="-B /scratch3 -B /scratch4" + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -b|--bindings) + bindings="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -m|--model) + model="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container || ! -v model ]]; then + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image \\" + echo " -m/--model name_model -b/--bindings -B dirname [-B dirname1 [...]] [-v]" + exit -1 +fi + +echo "HOMEgfs: $HOMEgfs" +echo "model: $model" +echo "Verbose: $verbose" + +if [[ "$verbose" == "true" ]]; then + set -x +fi + +run_model_script=${HOMEgfs}/ush/container/run_${model}.sh +rm -f ${run_model_script} + +cat > $run_model_script << EOF_MODEL +#!/bin/bash + +# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD +export OMP_NUM_THREADS=1 + +#source /opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/intel-oneapi-mpi-2021.9.0-6bnjcwc/setvars.sh --force +#export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2023.2.3/linux/compiler/lib/intel64_lin +#export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.0/lib:$LD_LIBRARY_PATH + +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/sorc/ufs_model.fd/modulefiles +module load ufs_container.intel + +arg="\$@" +${HOMEgfs}/sorc/ufs_model.fd/tests/${model}.x \$arg +EOF_MODEL + +chmod 755 $run_model_script + +link_model_script=${HOMEgfs}/exec/${model}.x +rm -f ${link_model_script} + +cat > $link_model_script << EOF_LINK +#!/bin/bash + + export LD_LIBRARY_PATH=$(dirname ${container}) + arg="\$@" + singularity exec ${bindings} ${container} ${run_model_script} \$arg +EOF_LINK + +chmod 755 $link_model_script + diff --git a/dev/container/link_ufs_utils.sh b/dev/container/link_ufs_utils.sh new file mode 100755 index 00000000000..2b41806b7de --- /dev/null +++ b/dev/container/link_ufs_utils.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +verbose=false + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -b|--binding) + binding="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container ]]; then + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings -B dirname [-B dirname1 [...]] [-v]" + exit -1 +fi + +echo "HOMEgfs: $HOMEgfs" +echo "container: $container" +echo "bindings: $bindings" +echo "Verbose: $verbose" + +if [[ "$verbose" == "true" ]]; then + set -x +fi + +for nm in emcsfc_ice_blend emcsfc_snow2mdl fregrid global_cycle regridStates.x +do + model=${nm} + echo "model: $model" + + run_model_script=${HOMEgfs}/ush/container/run_${model}.sh + rm -f ${run_model_script} + + cat > $run_model_script << EOF_MODEL +#!/bin/bash + +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/sorc/ufs_utils.fd/modulefiles +module load build.container.intel + +arg="\$@" +${HOMEgfs}/sorc/ufs_utils.fd/exec/${model} \$arg +EOF_MODEL + + chmod 755 $run_model_script + + #link_model_script=${HOMEgfs}/exec/${model} + #rm -f ${link_model_script} + + link_model_script=${HOMEgfs}/exec/${model} + rm -f ${link_model_script} + + cat > $link_model_script << EOF_LINK +#!/bin/bash + export LD_LIBRARY_PATH=$(dirname $container) + arg="\$@" + singularity exec ${bindings} ${container} ${run_model_script} \$arg +EOF_LINK + + chmod 755 $link_model_script +done + diff --git a/dev/container/link_ww3.sh b/dev/container/link_ww3.sh new file mode 100755 index 00000000000..cf3887a0a16 --- /dev/null +++ b/dev/container/link_ww3.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +verbose=false + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -b|--bindings) + bindings="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container ]]; then + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings -B dirname [-B dirname1 [...]] [-v]" + exit -1 +fi + +echo "HOMEgfs: $HOMEgfs" +echo "container: $container" +echo "bindings: $bindings" +echo "Verbose: $verbose" + +if [[ "$verbose" == "true" ]]; then + set -x +fi +for nm in gint grib grid ounf ounp outf outp prep prnc +do + model=ww3_${nm} + echo "model: $model" + + run_model_script=${HOMEgfs}/ush/container/run_${model}.sh + rm -f ${run_model_script} + + cat > $run_model_script << EOF_MODEL +#!/bin/bash + +# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD +export OMP_NUM_THREADS=1 + +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/sorc/ufs_model.fd/modulefiles +module load ufs_container.intel + +arg="\$@" +${HOMEgfs}/sorc/ufs_model.fd/WW3/install/pdlib_ON/bin/${model} \$arg +EOF_MODEL + + chmod 755 $run_model_script + + link_model_script=${HOMEgfs}/exec/gfs_${model}.x + rm -f ${link_model_script} + + cat > $link_model_script << EOF_LINK +#!/bin/bash + export LD_LIBRARY_PATH=$(dirname $container) + arg="\$@" + singularity exec ${bindings} ${container} ${run_model_script} \$arg +EOF_LINK + + chmod 755 $link_model_script +done + diff --git a/ush/run_python.sh b/dev/container/ush.python similarity index 57% rename from ush/run_python.sh rename to dev/container/ush.python index 0efdabfb368..463fbd7cdbc 100644 --- a/ush/run_python.sh +++ b/dev/container/ush.python @@ -1,12 +1,9 @@ #!/bin/bash -HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud - -#source /opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/intel-oneapi-mpi-2021.9.0-6bnjcwc/setvars.sh --force - source /usr/lmod/lmod/init/bash module purge -source ${HOMEgfs}/dev/ush/gw_setup.sh +module use HOMEgfs/modulefiles +module load module_gwsetup.container module list @@ -22,8 +19,8 @@ module load py-python-dateutil/2.8.2 module list -wxflowPATH="${HOMEgfs}/ush/python" -export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" +wxflowPATH="HOMEgfs/ush/python" +export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}HOMEgfs/ush:${wxflowPATH}" arg="$@" diff --git a/ush/run_wgrib2.sh b/dev/container/ush.wgrib2 similarity index 57% rename from ush/run_wgrib2.sh rename to dev/container/ush.wgrib2 index 8c243bb2819..f4b12204a46 100755 --- a/ush/run_wgrib2.sh +++ b/dev/container/ush.wgrib2 @@ -1,14 +1,12 @@ #!/usr/bin/env bash -export HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud - source /usr/lmod/lmod/init/bash module purge -module use ${HOMEgfs}/modulefiles +module use HOMEgfs/modulefiles module load module_gwsetup.container module load wgrib2/2.0.8 -export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.0/lib:$LD_LIBRARY_PATH +#export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.0/lib:$LD_LIBRARY_PATH arg="$@" diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index 266ee7d56df..1c3ae3321e9 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -537,7 +537,7 @@ for _case in "${_yaml_list[@]}"; do fi _pslot="${_case}${_tag}" if [[ "${_run_with_container}" == "true" ]]; then - _create_exp_cmd="../../bin/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" else _create_exp_cmd="./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" fi diff --git a/dev/workflow/hosts/ursa.yaml b/dev/workflow/hosts/ursa.yaml index 3c21f221a61..c07517bdb9b 100644 --- a/dev/workflow/hosts/ursa.yaml +++ b/dev/workflow/hosts/ursa.yaml @@ -2,8 +2,7 @@ DMPDIR: '/scratch3/NCEPDEV/global/role.glopara/dump' BASE_GIT: '/scratch3/NCEPDEV/global/role.glopara/git' BASE_DATA: '/scratch3/NCEPDEV/global/role.glopara/data' -#BASE_IC: '/scratch3/NCEPDEV/global/role.glopara/data/ICSDIR' -BASE_IC: '/scratch4/NAGAPE/epic/Wei.Huang/data/ICSDIR' +BASE_IC: '/scratch3/NCEPDEV/global/role.glopara/data/ICSDIR' AERO_INPUTS_DIR: /scratch3/NCEPDEV/global/role.glopara/data/GEFS_ExtData/20250310 PACKAGEROOT: '/scratch3/NCEPDEV/global/role.glopara/nwpara' #HOMEDIR: '/scratch3/NCEPDEV/global/role.glopara${USER}' diff --git a/gen-c48atm.sh b/gen-c48atm.sh index d0395b1dce6..f9c924ed3be 100755 --- a/gen-c48atm.sh +++ b/gen-c48atm.sh @@ -7,6 +7,10 @@ mkdir -p ${rundir} HPC_ACCOUNT=epic + container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img + bindings="-B /scratch3 -B /scratch4" + ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v + cd ${HOMEDIR}/dev/workflow RUNTESTS=${rundir} \ @@ -18,3 +22,4 @@ -e "Wei.Huang@noaa.gov" \ -R -v + ${HOMEDIR}/dev/container/create-container-links.sh diff --git a/gen-c48s2sw.sh b/gen-c48s2sw.sh new file mode 100755 index 00000000000..03e1f439ac4 --- /dev/null +++ b/gen-c48s2sw.sh @@ -0,0 +1,26 @@ +#!/bin/bash + + set -x + + HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud + rundir=/scratch4/NAGAPE/epic/Wei.Huang/run + mkdir -p ${rundir} + HPC_ACCOUNT=epic + + container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img + bindings="-B /scratch3 -B /scratch4" + ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v + + cd ${HOMEDIR}/dev/workflow + + RUNTESTS=${rundir} \ + ./generate_workflows.sh \ + -H ${HOMEDIR} \ + -y "C48_S2SW" \ + -Y ${HOMEDIR}/dev/ci/cases/pr \ + -A ${HPC_ACCOUNT} \ + -e "Wei.Huang@noaa.gov" \ + -R -v + + ${HOMEDIR}/dev/container/create-container-links.sh + diff --git a/jobs/JGLOBAL_OCEANICE_PRODUCTS b/jobs/JGLOBAL_OCEANICE_PRODUCTS index 9174c90a8ce..f9420754ea3 100755 --- a/jobs/JGLOBAL_OCEANICE_PRODUCTS +++ b/jobs/JGLOBAL_OCEANICE_PRODUCTS @@ -13,8 +13,13 @@ YMD="${PDY}" HH="${cyc}" declare_from_tmpl -rx "COMOUT_${COMPONENT^^}_GRIB":"COM YMD="${PDY}" HH="${cyc}" declare_from_tmpl -rx "COMOUT_${COMPONENT^^}_NETCDF":"COM_${COMPONENT^^}_NETCDF_TMPL" ############################################################### +#RUN_WITH_CONTAINER=YES # Run exglobal script -"${SCRgfs}/exglobal_oceanice_products.py" && true +if [ "$RUN_WITH_CONTAINER" == "YES" ]; then + ${HOMEgfs}/exec/run_python.sh ${SCRgfs}/exglobal_oceanice_products.py +else + "${SCRgfs}/exglobal_oceanice_products.py" && true +fi export err=$? if [[ ${err} -ne 0 ]]; then err_exit diff --git a/jobs/JGLOBAL_STAGE_IC b/jobs/JGLOBAL_STAGE_IC index e0cdcf5faf6..2691792ebff 100755 --- a/jobs/JGLOBAL_STAGE_IC +++ b/jobs/JGLOBAL_STAGE_IC @@ -2,10 +2,10 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "stage_ic" -c "base stage_ic" +# RUN_WITH_CONTAINER=YES # Execute staging if [ "$RUN_WITH_CONTAINER" == "YES" ]; then - # module purge - ${HOMEgfs}/bin/run_python.sh ${SCRgfs}/exglobal_stage_ic.py + ${HOMEgfs}/exec/run_python.sh ${SCRgfs}/exglobal_stage_ic.py else "${SCRgfs}/exglobal_stage_ic.py" fi diff --git a/modulefiles/module_gwsetup.container.lua b/modulefiles/module_gwsetup.container.lua new file mode 100644 index 00000000000..bb4882cfcb1 --- /dev/null +++ b/modulefiles/module_gwsetup.container.lua @@ -0,0 +1,21 @@ +help([[ +Load environment to run GFS workflow setup scripts in container +]]) + +--load(pathJoin("rocoto")) + +prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") + +local stack_intel_ver=os.getenv("stack_intel_ver") or "2021.10.0" +local stack_mpi_ver=os.getenv("stack_mpi_ver") or "2021.9.0" + +load("gnu") +load(pathJoin("stack-intel", stack_intel_ver)) +load(pathJoin("stack-intel-oneapi-mpi", stack_mpi_ver)) +unload("gnu") + +load("py-jinja2") +load("py-pyyaml") +load("py-numpy") + +whatis("Description: GFS run setup environment") diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 0cc4c9ebf56..69cbc05e56b 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -1,5 +1,6 @@ #! /usr/bin/env bash +RUN_WITH_CONTAINER=YES # Programs used if [ "$RUN_WITH_CONTAINER" == "NO" ]; then source "${USHgfs}/load_fv3gfs_modules.sh" @@ -7,7 +8,7 @@ if [ "$RUN_WITH_CONTAINER" == "NO" ]; then export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} else - export WGRIB2="${HOMEgfs}/bin/run_wgrib2.sh" + export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" fi # Scripts used diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index 3e9d6f0b0ac..46c1a5e6b35 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -79,7 +79,7 @@ ${LINK_OR_COPY} "${HOMEgfs}/versions/run.${machine}.ver" "${HOMEgfs}/versions/ru case "${machine}" in "wcoss2") FIX_DIR="/lfs/h2/emc/global/noscrub/emc.global/FIX/fix" ;; "hera") FIX_DIR="/scratch1/NCEPDEV/global/glopara/fix" ;; -"ursa") FIX_DIR="/scratch3/NCEPDEV/global/glopara/fix" ;; +"ursa") FIX_DIR="/scratch3/NCEPDEV/global/role.glopara/fix" ;; "orion") FIX_DIR="/work2/noaa/global/role-global/fix" ;; "hercules") FIX_DIR="/work2/noaa/global/role-global/fix" ;; "gaeac5") FIX_DIR="/gpfs/f5/ufs-ard/world-shared/global/glopara/data/fix" ;; diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index 9f7095c6e45..69babc5ec2a 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -43,9 +43,20 @@ _calling_script="${BASH_SOURCE[1]}" source "${HOMEgfs}/ush/preamble.sh" +export RUN_WITH_CONTAINER=YES if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then - export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH + #export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH #export LD_LIBRARY_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2023.2.0-uov33rpz3lplh3hh3v5c6vssbc7ndxuk/lib + + if [[ -v PATH ]]; then + if [[ "$PATH" =~ "prod-util" ]]; then + echo "PATH already contains prod-util" + else + export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH + fi + else + export PATH=/home/Wei.Huang/prod-util-2.1.1/bin + fi fi OPTIND=1 @@ -98,6 +109,8 @@ export pgm=${pgm:-} ############################################## # Run setpdy and initialize PDY variables ############################################## +which setpdy.sh + export cycle="t${cyc}z" setpdy.sh || true source ./PDY || true diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh index b107d0fc00d..c8ea0f57b46 100755 --- a/ush/load_fv3gfs_modules.sh +++ b/ush/load_fv3gfs_modules.sh @@ -15,6 +15,7 @@ fi # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) +RUN_WITH_CONTAINER=YES if [[ "$RUN_WITH_CONTAINER" == "NO" ]]; then # Find module command and purge: source "${HOMEgfs}/ush/detect_machine.sh" @@ -36,13 +37,13 @@ if [[ "$RUN_WITH_CONTAINER" == "NO" ]]; then esac module list - - # Add wxflow to PYTHONPATH - wxflowPATH="${HOMEgfs}/ush/python" - PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" - export PYTHONPATH fi +# Add wxflow to PYTHONPATH +wxflowPATH="${HOMEgfs}/ush/python" +PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" +export PYTHONPATH + # If this function exists in the environment, run it; else set -x if it was set on entering this script ftype=$(type -t set_trace || echo "") if [[ "${ftype}" == "function" ]]; then diff --git a/ush/load_ufswm_modules.sh b/ush/load_ufswm_modules.sh index f00358095d4..4a30c61eb52 100755 --- a/ush/load_ufswm_modules.sh +++ b/ush/load_ufswm_modules.sh @@ -9,26 +9,40 @@ fi # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) -source "${HOMEgfs}/ush/detect_machine.sh" -source "${HOMEgfs}/ush/module-setup.sh" - -module use "${HOMEgfs}/sorc/ufs_model.fd/modulefiles" -module load "ufs_${MACHINE_ID}.intel" -module load prod_util -if [[ "${MACHINE_ID}" = "wcoss2" ]]; then - module load cray-pals - module load cfp - module load libjpeg - module load craype-network-ucx - module load cray-mpich-ucx +RUN_WITH_CONTAINER=YES +if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then + if [[ -v PATH ]]; then + if [[ "$PATH" =~ "prod-util" ]]; then + echo "PATH already contains prod-util" + else + export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH + fi + else + export PATH=/home/Wei.Huang/prod-util-2.1.1/bin + fi + export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" else - export UTILROOT=${prod_util_ROOT} + source "${HOMEgfs}/ush/detect_machine.sh" + source "${HOMEgfs}/ush/module-setup.sh" + + module use "${HOMEgfs}/sorc/ufs_model.fd/modulefiles" + module load "ufs_${MACHINE_ID}.intel" + module load prod_util + if [[ "${MACHINE_ID}" = "wcoss2" ]]; then + module load cray-pals + module load cfp + module load libjpeg + module load craype-network-ucx + module load cray-mpich-ucx + else + export UTILROOT=${prod_util_ROOT} + fi + module load wgrib2 + export WGRIB2=wgrib2 + + module list + unset MACHINE_ID fi -module load wgrib2 -export WGRIB2=wgrib2 - -module list -unset MACHINE_ID ############################################################### # exglobal_forecast.py requires the following in PYTHONPATH diff --git a/ush/run_gfs_model.sh b/ush/run_gfs_model.sh deleted file mode 100755 index b8b3014833d..00000000000 --- a/ush/run_gfs_model.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD -export OMP_NUM_THREADS=1 -#export FPATH=/usr/lmod/lmod/libexec -#module reset -#module use ${HOMEgfs}/modulefiles -#module load module_base.container -source /opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/intel-oneapi-mpi-2021.9.0-6bnjcwc/setvars.sh --force -export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2023.2.3/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.0/lib:$LD_LIBRARY_PATH - -#export I_MPI_OFI_PROVIDER=tcp -#export I_MPI_FABRICS=shm:ofi -#export FI_PROVIDER=tcp -export HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/demo/global-workflow-cloud -arg="$@" -${HOMEgfs}/sorc/ufs_model.fd/tests/gfs_model.x $arg - From d6f5283c472c09531716b381dc1812220ba34605 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 25 Jun 2025 18:17:16 -0400 Subject: [PATCH 061/134] sync with ursa --- dev/container/create-container-links.sh | 37 +++++++ dev/container/gen-wrapper.sh | 68 +++++++++++++ dev/container/link_gfs_utils.sh | 108 +++++++++++++++++++++ dev/container/link_model.sh | 85 ++++++++++++++++ dev/container/link_ufs_utils.sh | 81 ++++++++++++++++ dev/container/link_ww3.sh | 93 ++++++++++++++++++ dev/container/ush.wgrib2 | 62 ++++++++++++ dev/parm/config/gefs/config.resources | 9 ++ dev/parm/config/gefs/config.resources.URSA | 1 + dev/parm/config/sfs/config.resources | 1 + dev/parm/config/sfs/config.resources.URSA | 1 + dev/workflow/generate_workflows.sh | 53 ++++++++-- gen-C96mx100_S2S.sh | 26 +++++ gen-c48s2swa-gefs.sh | 28 ++++++ jobs/JGLOBAL_OCEANICE_PRODUCTS | 8 +- jobs/JGLOBAL_PREP_EMISSIONS | 8 +- jobs/JGLOBAL_STAGE_IC | 7 +- modulefiles/module_base.container.lua | 9 +- scripts/exglobal_atmos_products.sh | 14 ++- scripts/exglobal_oceanice_products.py | 14 ++- ush/interp_atmos_master.sh | 12 ++- ush/interp_atmos_sflux.sh | 12 ++- ush/jjob_header.sh | 19 +++- ush/load_fv3gfs_modules.sh | 46 ++++----- ush/load_ufswm_modules.sh | 50 ++++++---- ush/preamble.sh | 12 +++ ush/python/pygfs/task/oceanice_products.py | 15 ++- 27 files changed, 807 insertions(+), 72 deletions(-) create mode 100755 dev/container/create-container-links.sh create mode 100755 dev/container/gen-wrapper.sh create mode 100755 dev/container/link_gfs_utils.sh create mode 100755 dev/container/link_model.sh create mode 100755 dev/container/link_ufs_utils.sh create mode 100755 dev/container/link_ww3.sh create mode 100755 dev/container/ush.wgrib2 create mode 120000 dev/parm/config/gefs/config.resources.URSA create mode 120000 dev/parm/config/sfs/config.resources.URSA create mode 100755 gen-C96mx100_S2S.sh create mode 100755 gen-c48s2swa-gefs.sh diff --git a/dev/container/create-container-links.sh b/dev/container/create-container-links.sh new file mode 100755 index 00000000000..148eee09986 --- /dev/null +++ b/dev/container/create-container-links.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +verbose=true + +#echo "Verbose: $verbose" + +source ${HOMEgfs}/ush/detect_machine.sh + +bindings="-B /scratch3 -B /scratch4" +if [[ ${MACHINE_ID} = ursa* ]] ; then + echo "We are on NOAA Ursa" + bindings="-B /scratch3 -B /scratch4" + HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud + container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img +elif [[ ${MACHINE_ID} = gaea* ]] ; then + echo "We are on NOAA Gaea" + bindings="-B /gpfs/f6/scratch" + HOMEgfs=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud + container=/gpfs/f6/scratch/Wei.Huang/container/ubuntu22.04-intel-ufs-env-v1.6.0.img +fi + +#echo "HOMEgfs: $HOMEgfs" +#echo "container: $container" + +#${HOMEgfs}/dev/container/gen-wrapper.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" + +${HOMEgfs}/dev/container/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gfs +${HOMEgfs}/dev/container/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t sfs +${HOMEgfs}/dev/container/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gefs + +${HOMEgfs}/dev/container/link_model.sh -H ${HOMEgfs} -c ${container} -m gfs_model -b "${bindings}" +${HOMEgfs}/dev/container/link_model.sh -H ${HOMEgfs} -c ${container} -m sfs_model -b "${bindings}" +${HOMEgfs}/dev/container/link_model.sh -H ${HOMEgfs} -c ${container} -m gefs_model -b "${bindings}" + +${HOMEgfs}/dev/container/link_gfs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" +${HOMEgfs}/dev/container/link_ufs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" + diff --git a/dev/container/gen-wrapper.sh b/dev/container/gen-wrapper.sh new file mode 100755 index 00000000000..ab1c1cd29f2 --- /dev/null +++ b/dev/container/gen-wrapper.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +verbose=false +bindings="-B /scratch3 -B /scratch4" + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -b|--bindings) + bindings="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container ]]; then + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings "-B dirname [-B dirname1 [...]]" [-v]" + exit -1 +fi + +#echo "HOMEgfs: $HOMEgfs" +#echo "container: $container" +#echo "bindings: $bindings" +#echo "Verbose: $verbose" + +if [[ "$verbose" == "true" ]]; then + set -x +fi + +for dnm in exec ush +do + if [[ "$dnm" == "exec" ]]; then + targetdir=${HOMEgfs}/${dnm} + else + targetdir=${HOMEgfs}/${dnm}/container + fi + mkdir -p ${targetdir} + for fnm in python wgrib2 + do + sourcef=${HOMEgfs}/dev/container/${dnm}.${fnm} + targetf=${targetdir}/run_${fnm}.sh + + sed -e "s?HOMEgfs?${HOMEgfs}?g" \ + -e "s?SIF?${container}?g" \ + -e "s?BINDINGS?${bindings}?g" \ + ${sourcef} > ${targetf} + + chmod 755 ${targetf} + done +done + +sed -i 's/RUN_WITH_CONTAINER=NO/RUN_WITH_CONTAINER=YES/g' ${HOMEgfs}/ush/preamble.sh + diff --git a/dev/container/link_gfs_utils.sh b/dev/container/link_gfs_utils.sh new file mode 100755 index 00000000000..9a805db3576 --- /dev/null +++ b/dev/container/link_gfs_utils.sh @@ -0,0 +1,108 @@ +#!/bin/bash + +verbose=false + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -b|--bindings) + bindings="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container ]]; then + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings -B dirname [-B dirname1 [...]] [-v]" + exit -1 +fi + +#echo "HOMEgfs: $HOMEgfs" +#echo "container: $container" +#echo "bindings: $bindings" +#echo "Verbose: $verbose" + +if [[ "$verbose" == "true" ]]; then + set -x +fi + +for nm in enkf_chgres_recenter_nc ensadd ensppf ensstat fbwndgfs \ + gaussian_sfcanl gefs_6h_ave_1mem gfs_bufr \ + mkgfsawps ocnicepost overgridid reg2grb2 supvit \ + syndat_getjtbul syndat_maksynrc syndat_qctropcy \ + tave tocsbufr vint wave_stat webtitle rdbfmsua +do + model=${nm} + # echo "model: $model" + + run_model_script=${HOMEgfs}/ush/container/run_${model}.sh + rm -f ${run_model_script} + + cat > $run_model_script << EOF_MODEL +#!/bin/bash + +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles +module load gfsutils_container.intel +module load wgrib2/2.0.8 + +arg="\$@" +${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${model}.x \$arg +EOF_MODEL + + chmod 755 $run_model_script + + #link_model_script=${HOMEgfs}/exec/${model} + #rm -f ${link_model_script} + + link_model_script=${HOMEgfs}/exec/${model}.x + rm -f ${link_model_script} + + cat > $link_model_script << EOF_LINK +#!/bin/bash + export LD_LIBRARY_PATH=$(dirname $container) + arg="\$@" + singularity exec ${bindings} ${container} ${run_model_script} \$arg +EOF_LINK + + chmod 755 $link_model_script +done + +for nm in ocnicepost +do + direct_model_script=${HOMEgfs}/exec/${nm}.x + rm -f ${direct_model_script} + + cat > $direct_model_script << EOF_DIRECT +#!/bin/bash +#!/bin/bash + +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles +module load gfsutils_container.intel +module load wgrib2/2.0.8 + +arg="\$@" +${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${nm}.x \$arg +EOF_DIRECT + + chmod 755 $direct_model_script +done + diff --git a/dev/container/link_model.sh b/dev/container/link_model.sh new file mode 100755 index 00000000000..270fe4aae95 --- /dev/null +++ b/dev/container/link_model.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +verbose=false +bindings="-B /scratch3 -B /scratch4" + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -b|--bindings) + bindings="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -m|--model) + model="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container || ! -v model ]]; then + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image \\" + echo " -m/--model name_model -b/--bindings -B dirname [-B dirname1 [...]] [-v]" + exit -1 +fi + +#echo "HOMEgfs: $HOMEgfs" +#echo "model: $model" +#echo "Verbose: $verbose" + +if [[ "$verbose" == "true" ]]; then + set -x +fi + +run_model_script=${HOMEgfs}/ush/container/run_${model}.sh +rm -f ${run_model_script} + +cat > $run_model_script << EOF_MODEL +#!/bin/bash + +# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD +export OMP_NUM_THREADS=1 + +#source /opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/intel-oneapi-mpi-2021.9.0-6bnjcwc/setvars.sh --force +#export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2023.2.3/linux/compiler/lib/intel64_lin +#export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.0/lib:$LD_LIBRARY_PATH + +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/sorc/ufs_model.fd/modulefiles +module load ufs_container.intel + +arg="\$@" +${HOMEgfs}/sorc/ufs_model.fd/tests/${model}.x \$arg +EOF_MODEL + +chmod 755 $run_model_script + +link_model_script=${HOMEgfs}/exec/${model}.x +rm -f ${link_model_script} + +cat > $link_model_script << EOF_LINK +#!/bin/bash + + export LD_LIBRARY_PATH=$(dirname ${container}) + arg="\$@" + singularity exec ${bindings} ${container} ${run_model_script} \$arg +EOF_LINK + +chmod 755 $link_model_script + diff --git a/dev/container/link_ufs_utils.sh b/dev/container/link_ufs_utils.sh new file mode 100755 index 00000000000..305969844af --- /dev/null +++ b/dev/container/link_ufs_utils.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +verbose=false + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -b|--binding) + binding="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container ]]; then + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings -B dirname [-B dirname1 [...]] [-v]" + exit -1 +fi + +#echo "HOMEgfs: $HOMEgfs" +#echo "container: $container" +#echo "bindings: $bindings" +#echo "Verbose: $verbose" + +if [[ "$verbose" == "true" ]]; then + set -x +fi + +for nm in emcsfc_ice_blend emcsfc_snow2mdl fregrid global_cycle regridStates.x +do + model=${nm} + #echo "model: $model" + + run_model_script=${HOMEgfs}/ush/container/run_${model}.sh + rm -f ${run_model_script} + + cat > $run_model_script << EOF_MODEL +#!/bin/bash + +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/sorc/ufs_utils.fd/modulefiles +module load build.container.intel + +arg="\$@" +${HOMEgfs}/sorc/ufs_utils.fd/exec/${model} \$arg +EOF_MODEL + + chmod 755 $run_model_script + + #link_model_script=${HOMEgfs}/exec/${model} + #rm -f ${link_model_script} + + link_model_script=${HOMEgfs}/exec/${model} + rm -f ${link_model_script} + + cat > $link_model_script << EOF_LINK +#!/bin/bash + export LD_LIBRARY_PATH=$(dirname $container) + arg="\$@" + singularity exec ${bindings} ${container} ${run_model_script} \$arg +EOF_LINK + + chmod 755 $link_model_script +done + diff --git a/dev/container/link_ww3.sh b/dev/container/link_ww3.sh new file mode 100755 index 00000000000..729ce398530 --- /dev/null +++ b/dev/container/link_ww3.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +verbose=false + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -b|--bindings) + bindings="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -t|--type) + type="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container || ! -v type ]]; then + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image \\" + " -b/--bindings -B dirname [-B dirname1 [...]] -t/--type [gfs|sfs|gefs] [-v]" + exit -1 +fi + +#echo "HOMEgfs: $HOMEgfs" +#echo "container: $container" +#echo "bindings: $bindings" +#echo "type: $type" +#echo "Verbose: $verbose" + +if [[ "$verbose" == "true" ]]; then + set -x +fi + +if [[ "$type" == "gfs" ]]; then + pdlib=pdlib_ON +else + pdlib=pdlib_OFF +fi + +for nm in gint grib grid ounf ounp outf outp prep prnc +do + model=ww3_${nm} + #echo "model: $model" + + run_model_script=${HOMEgfs}/ush/container/run_${type}_${model}.sh + rm -f ${run_model_script} + + cat > $run_model_script << EOF_MODEL +#!/bin/bash + +# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD +export OMP_NUM_THREADS=1 + +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/sorc/ufs_model.fd/modulefiles +module load ufs_container.intel + +arg="\$@" +${HOMEgfs}/sorc/ufs_model.fd/WW3/install/${pdlib}/bin/${model} \$arg +EOF_MODEL + + chmod 755 $run_model_script + + link_model_script=${HOMEgfs}/exec/${type}_${model}.x + rm -f ${link_model_script} + + cat > $link_model_script << EOF_LINK +#!/bin/bash + export LD_LIBRARY_PATH=$(dirname $container) + arg="\$@" + singularity exec ${bindings} ${container} ${run_model_script} \$arg +EOF_LINK + + chmod 755 $link_model_script +done + diff --git a/dev/container/ush.wgrib2 b/dev/container/ush.wgrib2 new file mode 100755 index 00000000000..572dc2d229a --- /dev/null +++ b/dev/container/ush.wgrib2 @@ -0,0 +1,62 @@ +#!/bin/bash + +source /usr/lmod/lmod/init/bash +module purge +module use HOMEgfs/modulefiles +module load module_gwsetup.container + +module load wgrib2/2.0.8 + +arg=$@ + +new_arg="" +has_ftime=false +has_sets=false + +sets="" + +# Basic argument parsing using a while loop and case statement +while [[ "$#" -gt 0 ]]; do + case "$1" in + -set_date) + sdate=$2 + shift + ;; + -set_ftime) + has_ftime=true + ftime="$2 $3 $4" + shift + shift + shift + ;; + -set) + has_sets=true + sets="$sets -set $2 $3" + shift + shift + ;; + -grib) + outfile=$2 + shift + ;; + *) + # echo "Unknown option: $1" + new_arg="$new_arg $1" + ;; + esac + shift # Consume the option/argument +done + +#/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/wgrib2-2.0.8-bq36dgw/bin/wgrib2 $arg + +if [[ "$has_ftime" == "true" ]]; then + echo "new_arg: $new_arg" + if [[ "$has_sets" == "true" ]]; then + wgrib2 $new_arg -set_date "$sdate" -set_ftime "$ftime" $sets -grib $outfile + else + wgrib2 $new_arg -set_date "$sdate" -set_ftime "$ftime" -grib $outfile + fi +else + wgrib2 $arg +fi + diff --git a/dev/parm/config/gefs/config.resources b/dev/parm/config/gefs/config.resources index 61c8555840d..6695486d722 100644 --- a/dev/parm/config/gefs/config.resources +++ b/dev/parm/config/gefs/config.resources @@ -22,6 +22,7 @@ case ${machine} in "HERCULES") max_tasks_per_node=80;; "GAEAC5") max_tasks_per_node=128;; "GAEAC6") max_tasks_per_node=192;; + "URSA") max_tasks_per_node=192;; "AWSPW") export PARTITION_BATCH="compute" max_tasks_per_node=48 @@ -51,6 +52,14 @@ case ${step} in export memory="4096M" ;; + "gen_control_ic") + export walltime="00:15:00" + export ntasks=1 + export tasks_per_node=1 + export threads_per_task=1 + export memory="4096M" + ;; + "waveinit") export walltime="00:10:00" export ntasks=12 diff --git a/dev/parm/config/gefs/config.resources.URSA b/dev/parm/config/gefs/config.resources.URSA new file mode 120000 index 00000000000..6d3d16eda14 --- /dev/null +++ b/dev/parm/config/gefs/config.resources.URSA @@ -0,0 +1 @@ +../gfs/config.resources.URSA \ No newline at end of file diff --git a/dev/parm/config/sfs/config.resources b/dev/parm/config/sfs/config.resources index dc9ecc1a989..fdd91a4ada4 100644 --- a/dev/parm/config/sfs/config.resources +++ b/dev/parm/config/sfs/config.resources @@ -21,6 +21,7 @@ case ${machine} in "ORION") max_tasks_per_node=40;; "HERCULES") max_tasks_per_node=80;; "GAEAC6") max_tasks_per_node=192;; + "URSA") max_tasks_per_node=192;; "AWSPW") export PARTITION_BATCH="compute" max_tasks_per_node=48 diff --git a/dev/parm/config/sfs/config.resources.URSA b/dev/parm/config/sfs/config.resources.URSA new file mode 120000 index 00000000000..6d3d16eda14 --- /dev/null +++ b/dev/parm/config/sfs/config.resources.URSA @@ -0,0 +1 @@ +../gfs/config.resources.URSA \ No newline at end of file diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index 5669b75676d..36061ffa8e4 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -66,6 +66,8 @@ function _usage() { -t Add a 'tag' to the end of the case names in the pslots to distinguish pslots between multiple sets of tests. + -R Run with Container + -v Verbose mode. Prints output of all commands to stdout. -V Very verbose mode. Passes -v to all commands and prints to stdout. @@ -91,6 +93,7 @@ _specified_yaml_dir=false _run_all_gfs=false _run_all_gefs=false _run_all_sfs=false +_run_with_container=false _hpc_account="" _set_account=false _update_cron=false @@ -107,7 +110,7 @@ _auto_del=false _nonflag_option_count=0 while [[ $# -gt 0 && "$1" != "--" ]]; do - while getopts ":H:bDuy:Y:GESA:ce:t:vVdh" option; do + while getopts ":H:bDuy:Y:GESA:ce:t:vVRdh" option; do case "${option}" in H) HOMEgfs="${OPTARG}" @@ -137,6 +140,7 @@ while [[ $# -gt 0 && "$1" != "--" ]]; do t) _tag="_${OPTARG}" ;; v) _verbose=true ;; V) _very_verbose=true && _verbose=true && _verbose_flag="-v" ;; + R) _run_with_container=true ;; A) _set_account=true && _hpc_account="${OPTARG}" ;; d) _debug=true && _very_verbose=true && _verbose=true && _verbose_flag="-v" && PS4='${LINENO}: ' ;; h) _usage && exit 0 ;; @@ -266,6 +270,18 @@ if [[ "${_specified_home}" == "false" ]]; then fi fi +# Set RUN_WITH_CONTAINER if it is set by the user +if [[ "${_run_with_container}" == "true" ]]; then + RUN_WITH_CONTAINER=YES + if [[ "${_verbose}" == "true" ]]; then + printf "Run with Container %s\n\n" "${RUN_WITH_CONTAINER}" + fi + sed -i 's/RUN_WITH_CONTAINER=NO/RUN_WITH_CONTAINER=YES/g' ../../ush/preamble.sh +else + RUN_WITH_CONTAINER=NO + sed -i 's/RUN_WITH_CONTAINER=YES/RUN_WITH_CONTAINER=NO/g' ../../ush/preamble.sh +fi + # Set the _yaml_dir to HOMEgfs/dev/ci/cases/pr if not explicitly set if [[ "${_specified_yaml_dir}" == false ]]; then _yaml_dir="${HOMEgfs}/dev/ci/cases/pr" @@ -438,15 +454,28 @@ fi if [[ "${_verbose}" == true ]]; then printf "Linking the workflow\n\n" fi -if ! "${HOMEgfs}/sorc/link_workflow.sh" >& stdout; then - cat stdout - echo "link_workflow.sh failed!" - if [[ "${_set_email}" == true ]]; then - _stdout=$(cat stdout) - send_email "link_workflow.sh failed with the message"$'\n'"${_stdout}" +if [[ "${_run_with_container}" == true ]]; then + if ! "${HOMEgfs}/sorc/link_workflow.sh" -r >& stdout; then + cat stdout + echo "link_workflow.sh failed!" + if [[ "${_set_email}" == true ]]; then + _stdout=$(cat stdout) + send_email "link_workflow.sh failed with the message"$'\n'"${_stdout}" + fi + rm -f stdout + exit 9 + fi +else + if ! "${HOMEgfs}/sorc/link_workflow.sh" >& stdout; then + cat stdout + echo "link_workflow.sh failed!" + if [[ "${_set_email}" == true ]]; then + _stdout=$(cat stdout) + send_email "link_workflow.sh failed with the message"$'\n'"${_stdout}" + fi + rm -f stdout + exit 9 fi - rm -f stdout - exit 9 fi rm -f stdout @@ -509,7 +538,11 @@ for _case in "${_yaml_list[@]}"; do echo "${_case}" fi _pslot="${_case}${_tag}" - _create_exp_cmd="./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + if [[ "${_run_with_container}" == "true" ]]; then + _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + else + _create_exp_cmd="./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + fi if [[ "${_verbose}" == true ]]; then pslot=${_pslot} RUNTESTS=${_runtests} ${_create_exp_cmd} else diff --git a/gen-C96mx100_S2S.sh b/gen-C96mx100_S2S.sh new file mode 100755 index 00000000000..aa44564d3fc --- /dev/null +++ b/gen-C96mx100_S2S.sh @@ -0,0 +1,26 @@ +#!/bin/bash + + set -x + + HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud + rundir=/scratch4/NAGAPE/epic/Wei.Huang/run + mkdir -p ${rundir} + HPC_ACCOUNT=epic + + container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img + bindings="-B /scratch3 -B /scratch4" + ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v + + cd ${HOMEDIR}/dev/workflow + + RUNTESTS=${rundir} \ + ./generate_workflows.sh \ + -H ${HOMEDIR} \ + -y "C96mx100_S2S" \ + -Y ${HOMEDIR}/dev/ci/cases/pr \ + -A ${HPC_ACCOUNT} \ + -e "Wei.Huang@noaa.gov" \ + -R -v + +#${HOMEDIR}/dev/container/create-container-links.sh + diff --git a/gen-c48s2swa-gefs.sh b/gen-c48s2swa-gefs.sh new file mode 100755 index 00000000000..10e3e14f011 --- /dev/null +++ b/gen-c48s2swa-gefs.sh @@ -0,0 +1,28 @@ +#!/bin/bash + + set -x + + HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud + rundir=/scratch4/NAGAPE/epic/Wei.Huang/run + mkdir -p ${rundir} + HPC_ACCOUNT=epic + + container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img + bindings="-B /scratch3 -B /scratch4" +#${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v + +# -y "C48_ATM C48_S2SW C48_S2SWA_gefs" \ + + cd ${HOMEDIR}/dev/workflow + + RUNTESTS=${rundir} \ + ./generate_workflows.sh \ + -H ${HOMEDIR} \ + -y "C48_S2SWA_gefs" \ + -Y ${HOMEDIR}/dev/ci/cases/pr \ + -A ${HPC_ACCOUNT} \ + -e "Wei.Huang@noaa.gov" \ + -R -v + +#${HOMEDIR}/dev/container/create-container-links.sh + diff --git a/jobs/JGLOBAL_OCEANICE_PRODUCTS b/jobs/JGLOBAL_OCEANICE_PRODUCTS index 9174c90a8ce..dc406ae7346 100755 --- a/jobs/JGLOBAL_OCEANICE_PRODUCTS +++ b/jobs/JGLOBAL_OCEANICE_PRODUCTS @@ -14,7 +14,13 @@ YMD="${PDY}" HH="${cyc}" declare_from_tmpl -rx "COMOUT_${COMPONENT^^}_NETCDF":"C ############################################################### # Run exglobal script -"${SCRgfs}/exglobal_oceanice_products.py" && true +if [ "$RUN_WITH_CONTAINER" == "YES" ]; then + export WGRIB2=${HOMEgfs}/ush/container/run_wgrib2.sh + ${HOMEgfs}/exec/run_python.sh ${SCRgfs}/exglobal_oceanice_products.py -c -v + export WGRIB2=${HOMEgfs}/exec/run_wgrib2.sh +else + "${SCRgfs}/exglobal_oceanice_products.py" && true +fi export err=$? if [[ ${err} -ne 0 ]]; then err_exit diff --git a/jobs/JGLOBAL_PREP_EMISSIONS b/jobs/JGLOBAL_PREP_EMISSIONS index b545547fcb4..1da11e48a1d 100755 --- a/jobs/JGLOBAL_PREP_EMISSIONS +++ b/jobs/JGLOBAL_PREP_EMISSIONS @@ -16,7 +16,13 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "prep_emissions" -c "base prep_emissio ############################################################### # Run relevant script EXSCRIPT=${PREP_EMISSIONS_PY:-${SCRgfs}/exglobal_prep_emissions.py} -${EXSCRIPT} && true + +# Execute staging +if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then + ${HOMEgfs}/exec/run_python.sh ${EXSCRIPT} && true +else + ${EXSCRIPT} && true +fi export err=$? if [[ ${err} -ne 0 ]]; then err_exit "Error executing ${EXSCRIPT}, ABORT!" diff --git a/jobs/JGLOBAL_STAGE_IC b/jobs/JGLOBAL_STAGE_IC index 04240a27d06..c44f0b24626 100755 --- a/jobs/JGLOBAL_STAGE_IC +++ b/jobs/JGLOBAL_STAGE_IC @@ -2,9 +2,12 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "stage_ic" -c "base stage_ic" -module purge # Execute staging -${HOMEgfs}/bin/run_python.sh ${SCRgfs}/exglobal_stage_ic.py +if [ "$RUN_WITH_CONTAINER" == "YES" ]; then + ${HOMEgfs}/exec/run_python.sh ${SCRgfs}/exglobal_stage_ic.py +else + "${SCRgfs}/exglobal_stage_ic.py" +fi err=$? ############################################################### diff --git a/modulefiles/module_base.container.lua b/modulefiles/module_base.container.lua index 5a850c51d94..2b2d2aca49e 100644 --- a/modulefiles/module_base.container.lua +++ b/modulefiles/module_base.container.lua @@ -4,12 +4,15 @@ Load environment to run GFS in container prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/intel-oneapi-mpi/2021.9.0/intel/2021.10.0") prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core") +-- prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core") -load("gnu") +setenv("stack_intel_ver", "2021.10.0") +setenv("stack_impi_ver", "2021.9.0") + +-- load("gnu") load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "None"))) -unload("gnu") +-- unload("gnu") -- load(pathJoin("python", (os.getenv("python_ver") or "None"))) diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 0a5f77799f9..86c04ea3028 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -1,11 +1,16 @@ #! /usr/bin/env bash -source "${USHgfs}/load_fv3gfs_modules.sh" -module load wgrib2/2.0.8 +source "${HOMEgfs}/ush/preamble.sh" # Programs used -#export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} -export WGRIB2="${HOMEgfs}/bin/run_wgrib2.sh" +if [ "$RUN_WITH_CONTAINER" == "NO" ]; then + source "${USHgfs}/load_fv3gfs_modules.sh" + module load wgrib2/2.0.8 + + export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} +else + export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" +fi # Scripts used INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/interp_atmos_master.sh"} @@ -52,6 +57,7 @@ MASTER_FILE="${COMIN_ATMOS_MASTER}/${PREFIX}master.grb2${fhr3}" ${WGRIB2} "${MASTER_FILE}" > wgrib2.log grep -F -f "${paramlista}" wgrib2.log > grep.res ${WGRIB2} -i -grib "tmpfile_${fhr3}" "${MASTER_FILE}" < grep.res + export err=$? if [[ ${err} -ne 0 ]]; then err_exit "wgrib2 failed to create intermediate grib2 file from ${MASTER_FILE} using ${paramlista}" diff --git a/scripts/exglobal_oceanice_products.py b/scripts/exglobal_oceanice_products.py index bb03840842a..7e973c9e768 100755 --- a/scripts/exglobal_oceanice_products.py +++ b/scripts/exglobal_oceanice_products.py @@ -5,6 +5,8 @@ from wxflow import AttrDict, Logger, logit, cast_strdict_as_dtypedict from pygfs.task.oceanice_products import OceanIceProducts +import argparse + # initialize root logger logger = Logger(level=os.environ.get("LOGGING_LEVEL", "DEBUG"), colored_log=True) @@ -12,6 +14,16 @@ @logit(logger) def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-v", "--verbose", action="store_true", + help="increase output verbosity") + parser.add_argument("-c", "--container", action="store_true", + help="use container") + args = parser.parse_args() + + if args.verbose: + logger.info(f"use contaier {args.container}") + config = cast_strdict_as_dtypedict(os.environ) # Instantiate the OceanIce object @@ -39,7 +51,7 @@ def main(): oceanice.configure(oceanice_dict, grid) # Run the oceanice post executable to interpolate and create grib2 files - oceanice.execute(oceanice_dict, grid) + oceanice.execute(oceanice_dict, grid, run_with_container=args.container) # Subset raw model data to create netCDF products oceanice.subset(oceanice_dict) diff --git a/ush/interp_atmos_master.sh b/ush/interp_atmos_master.sh index b4772f7c70c..f607b3554da 100755 --- a/ush/interp_atmos_master.sh +++ b/ush/interp_atmos_master.sh @@ -8,7 +8,17 @@ input_file=${1:-"pgb2file_in"} # Input pressure grib2 file output_file_prefix=${2:-"pgb2file_out"} # Prefix for output grib2 file; the prefix is appended by resolution e.g. _0p25 grid_string=${3:-"0p25"} # Target grids; e.g. "0p25" or "0p25:0p50"; If multiple, they need to be ":" seperated -WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} +source "${HOMEgfs}/ush/preamble.sh" + +# Programs used +if [ "$RUN_WITH_CONTAINER" == "NO" ]; then + #source "${USHgfs}/load_fv3gfs_modules.sh" + #module load wgrib2/2.0.8 + + export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} +else + export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" +fi # wgrib2 options for regridding defaults="-set_grib_type same -set_bitmap 1 -set_grib_max_bits 16" diff --git a/ush/interp_atmos_sflux.sh b/ush/interp_atmos_sflux.sh index 54d1a7f1dba..ca8f6111b13 100755 --- a/ush/interp_atmos_sflux.sh +++ b/ush/interp_atmos_sflux.sh @@ -7,7 +7,17 @@ input_file=${1:-"sfluxfile_in"} # Input sflux grib2 file output_file_prefix=${2:-"sfluxfile_out"} # Prefix for output sflux grib2 file; the prefix is appended by resolution e.g. _0p25 grid_string=${3:-"1p00"} # Target grids; e.g. "0p25" or "0p25:0p50"; If multiple, they need to be ":" seperated -WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} +source "${HOMEgfs}/ush/preamble.sh" + +# Programs used +if [ "$RUN_WITH_CONTAINER" == "NO" ]; then + #source "${USHgfs}/load_fv3gfs_modules.sh" + #module load wgrib2/2.0.8 + + export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} +else + export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" +fi # wgrib2 options for regridding defaults="-set_grib_type same -set_bitmap 1 -set_grib_max_bits 16" diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index d40053d0cca..aa2dabce4c3 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -43,11 +43,20 @@ _calling_script="${BASH_SOURCE[1]}" source "${HOMEgfs}/ush/preamble.sh" -#module purge -#module use ${HOMEgfs}/modulefiles -#module load module_run.hera +if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then + #export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH + #export LD_LIBRARY_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2023.2.0-uov33rpz3lplh3hh3v5c6vssbc7ndxuk/lib -source ~/prod_util.env + if [[ -v PATH ]]; then + if [[ "$PATH" =~ "prod-util" ]]; then + echo "PATH already contains prod-util" + else + export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH + fi + else + export PATH=/home/Wei.Huang/prod-util-2.1.1/bin + fi +fi OPTIND=1 while getopts "c:e:" option; do @@ -99,6 +108,8 @@ export pgm=${pgm:-} ############################################## # Run setpdy and initialize PDY variables ############################################## +which setpdy.sh + export cycle="t${cyc}z" setpdy.sh || true source ./PDY || true diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh index d304940b4db..1fa7af82cd1 100755 --- a/ush/load_fv3gfs_modules.sh +++ b/ush/load_fv3gfs_modules.sh @@ -11,31 +11,38 @@ if [[ "${DEBUG_WORKFLOW:-NO}" == "NO" ]]; then echo "Loading modules quietly..." set +x fi -set -x # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) -# Find module command and purge: -source "${HOMEgfs}/ush/detect_machine.sh" -source "${HOMEgfs}/ush/module-setup.sh" +#RUN_WITH_CONTAINER=YES +if [[ "$RUN_WITH_CONTAINER" == "NO" ]]; then + # Find module command and purge: + source "${HOMEgfs}/ush/detect_machine.sh" + source "${HOMEgfs}/ush/module-setup.sh" -# Source versions file for runtime -source "${HOMEgfs}/versions/run.ver" + # Source versions file for runtime + source "${HOMEgfs}/versions/run.ver" -# Load our modules: -module use "${HOMEgfs}/modulefiles" + # Load our modules: + module use "${HOMEgfs}/modulefiles" -case "${MACHINE_ID}" in - "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud" | "container") - module load "module_base.${MACHINE_ID}" - ;; - *) - echo "WARNING: UNKNOWN PLATFORM" - ;; -esac + case "${MACHINE_ID}" in + "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "jet" | "s4" | "noaacloud" | "ursa") + module load "module_base.${MACHINE_ID}" + ;; + *) + echo "WARNING: UNKNOWN PLATFORM" + ;; + esac -module list + module list +fi + +# Add wxflow to PYTHONPATH +wxflowPATH="${HOMEgfs}/ush/python" +PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" +export PYTHONPATH # If this function exists in the environment, run it; else set -x if it was set on entering this script ftype=$(type -t set_trace || echo "") @@ -45,11 +52,6 @@ elif [[ "${set_x}" == "YES" ]]; then set -x fi -# Add wxflow to PYTHONPATH -wxflowPATH="${HOMEgfs}/ush/python" -PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" -export PYTHONPATH - # Restore stack soft limit: ulimit -S -s "${ulimit_s}" unset ulimit_s diff --git a/ush/load_ufswm_modules.sh b/ush/load_ufswm_modules.sh index f00358095d4..f7201de32a7 100755 --- a/ush/load_ufswm_modules.sh +++ b/ush/load_ufswm_modules.sh @@ -9,26 +9,40 @@ fi # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) -source "${HOMEgfs}/ush/detect_machine.sh" -source "${HOMEgfs}/ush/module-setup.sh" - -module use "${HOMEgfs}/sorc/ufs_model.fd/modulefiles" -module load "ufs_${MACHINE_ID}.intel" -module load prod_util -if [[ "${MACHINE_ID}" = "wcoss2" ]]; then - module load cray-pals - module load cfp - module load libjpeg - module load craype-network-ucx - module load cray-mpich-ucx +#RUN_WITH_CONTAINER=YES +if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then + if [[ -v PATH ]]; then + if [[ "$PATH" =~ "prod-util" ]]; then + echo "PATH already contains prod-util" + else + export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH + fi + else + export PATH=/home/Wei.Huang/prod-util-2.1.1/bin + fi + export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" else - export UTILROOT=${prod_util_ROOT} + source "${HOMEgfs}/ush/detect_machine.sh" + source "${HOMEgfs}/ush/module-setup.sh" + + module use "${HOMEgfs}/sorc/ufs_model.fd/modulefiles" + module load "ufs_${MACHINE_ID}.intel" + module load prod_util + if [[ "${MACHINE_ID}" = "wcoss2" ]]; then + module load cray-pals + module load cfp + module load libjpeg + module load craype-network-ucx + module load cray-mpich-ucx + else + export UTILROOT=${prod_util_ROOT} + fi + module load wgrib2 + export WGRIB2=wgrib2 + + module list + unset MACHINE_ID fi -module load wgrib2 -export WGRIB2=wgrib2 - -module list -unset MACHINE_ID ############################################################### # exglobal_forecast.py requires the following in PYTHONPATH diff --git a/ush/preamble.sh b/ush/preamble.sh index ffd83f1e15f..3068dfe248b 100755 --- a/ush/preamble.sh +++ b/ush/preamble.sh @@ -176,6 +176,18 @@ trap "postamble ${_calling_script} ${start_time} \$?" EXIT source "${HOMEgfs}/ush/bash_utils.sh" +# Decide if run with container +export RUN_WITH_CONTAINER=YES + +if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then + export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" +else + source "${HOMEgfs}/ush/detect_machine.sh" + source "${HOMEgfs}/ush/module-setup.sh" + module load wgrib2 + export WGRIB2=wgrib2 +fi + # Turn on our settings export SHELLOPTS declare -xf set_strict diff --git a/ush/python/pygfs/task/oceanice_products.py b/ush/python/pygfs/task/oceanice_products.py index d319608ad14..5ebc7761bf5 100644 --- a/ush/python/pygfs/task/oceanice_products.py +++ b/ush/python/pygfs/task/oceanice_products.py @@ -155,7 +155,7 @@ def configure(config: Dict, product_grid: str) -> None: @staticmethod @logit(logger) - def execute(config: Dict, product_grid: str) -> None: + def execute(config: Dict, product_grid: str, run_with_container=False) -> None: """Run the ocnicepost.x executable to interpolate and convert to grib2 Parameters @@ -171,14 +171,15 @@ def execute(config: Dict, product_grid: str) -> None: """ # Run the ocnicepost.x executable - OceanIceProducts.interp(config.DATA, config.APRUN_OCNICEPOST, exec_name="ocnicepost.x") + OceanIceProducts.interp(config.DATA, config.APRUN_OCNICEPOST, + exec_name="ocnicepost.x", run_with_container=run_with_container) # Index the interpolated grib2 file OceanIceProducts.index(config, product_grid) @staticmethod @logit(logger) - def interp(workdir: str, aprun_cmd: str, exec_name: str = "ocnicepost.x") -> None: + def interp(workdir: str, aprun_cmd: str, exec_name: str = "ocnicepost.x", run_with_container=False) -> None: """ Run the interpolation executable to generate interpolated file @@ -200,7 +201,11 @@ def interp(workdir: str, aprun_cmd: str, exec_name: str = "ocnicepost.x") -> Non os.chdir(workdir) logger.debug(f"Current working directory: {os.getcwd()}") - exec_cmd = Executable(aprun_cmd) + print(f'aprun_cmd: {aprun_cmd}') + if run_with_container: + exec_cmd = Executable('time') + else: + exec_cmd = Executable(aprun_cmd) exec_cmd.add_default_arg(os.path.join(workdir, exec_name)) try: exec_cmd() @@ -236,6 +241,7 @@ def index(config: Dict, grid: str) -> None: logger.info("Generate index file") wgrib2_cmd = os.environ.get("WGRIB2", None) + print('wgrib2_cmd:', wgrib2_cmd) grbfile = f"{config.component}.{grid}.grib2" grbfidx = f"{grbfile}.idx" @@ -245,6 +251,7 @@ def index(config: Dict, grid: str) -> None: return logger.info(f"Creating index file for {grbfile}") + print('which(wgrib2):', which("wgrib2")) exec_cmd = which("wgrib2") if wgrib2_cmd is None else Executable(wgrib2_cmd) exec_cmd.add_default_arg("-s") try: From 4137587b1f7bff03fc0d157cf49d7da2217b590f Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 26 Jun 2025 10:40:37 -0400 Subject: [PATCH 062/134] gaea changes --- .gitignore | 1 + c48atm.sh | 18 --------------- dev/container/create-container-links.sh | 29 ++++++++++++++++++++++--- dev/container/gen-wrapper.sh | 1 - dev/workflow/create_experiment.py | 7 ++++++ dev/workflow/generate_workflows.sh | 13 +++++++++-- dev/workflow/rocoto/workflow_xml.py | 18 ++++++++++++--- dev/workflow/setup_xml.py | 3 +++ gen-c48atm.sh | 24 +++++++++++++++----- jobs/JGLOBAL_STAGE_IC | 4 ---- ush/jjob_header.sh | 4 ---- ush/load_fv3gfs_modules.sh | 5 ----- 12 files changed, 81 insertions(+), 46 deletions(-) delete mode 100755 c48atm.sh diff --git a/.gitignore b/.gitignore index 8bf3e1c46c0..d1f03aafea6 100644 --- a/.gitignore +++ b/.gitignore @@ -172,6 +172,7 @@ ush/imsfv3_scf2ioda.py ush/atparse.bash ush/run_bufr2ioda.py ush/bufr2ioda_insitu* +ush/container # ush log file ush/fetch-fix-data.log diff --git a/c48atm.sh b/c48atm.sh deleted file mode 100755 index a85a3a066e1..00000000000 --- a/c48atm.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - - set -x - - HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev - rundir=/scratch4/NAGAPE/epic/Wei.Huang/run - mkdir -p ${rundir} - -#source ~/.bashrc - source dev/ush/gw_setup.sh - - HPC_ACCOUNT=epic \ - pslot=c48atm \ - RUNTESTS=${rundir} \ - RUNDIR=/scratch4/NAGAPE/epic/Wei.Huang \ - exec/run_python.sh dev/workflow/create_experiment.py \ - --yaml dev/ci/cases/pr/C48_ATM.yaml - diff --git a/dev/container/create-container-links.sh b/dev/container/create-container-links.sh index 0497f3f2943..eb31a8f3aba 100755 --- a/dev/container/create-container-links.sh +++ b/dev/container/create-container-links.sh @@ -1,19 +1,42 @@ #!/bin/bash -verbose=true +HOMEgfs=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud +verbose=false + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs ]]; then + echo "Usage: create-container-links.sh -H/--HOMEgfs gw-home-dir [-v]" + exit -1 +fi source ${HOMEgfs}/ush/detect_machine.sh +echo "MACHINE_ID: ${MACHINE_ID}" + bindings="-B /scratch3 -B /scratch4" if [[ ${MACHINE_ID} = ursa* ]] ; then echo "We are on NOAA Ursa" bindings="-B /scratch3 -B /scratch4" - HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img elif [[ ${MACHINE_ID} = gaea* ]] ; then echo "We are on NOAA Gaea" bindings="-B /gpfs/f6/scratch" - HOMEgfs=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud container=/gpfs/f6/scratch/Wei.Huang/container/ubuntu22.04-intel-ufs-env-v1.6.0.img fi diff --git a/dev/container/gen-wrapper.sh b/dev/container/gen-wrapper.sh index 03067a41593..25628cf16b6 100755 --- a/dev/container/gen-wrapper.sh +++ b/dev/container/gen-wrapper.sh @@ -1,7 +1,6 @@ #!/bin/bash verbose=false -bindings="-B /scratch3 -B /scratch4" while [ "$#" -gt 0 ]; do case "$1" in diff --git a/dev/workflow/create_experiment.py b/dev/workflow/create_experiment.py index 1ec7299be9d..d9d76d98c49 100755 --- a/dev/workflow/create_experiment.py +++ b/dev/workflow/create_experiment.py @@ -76,6 +76,8 @@ def input_args(): '-o', '--overwrite', help='overwrite previously created experiment', action="store_true", required=False) parser.add_argument('--force', help='raise warnings instead of errors when possible', action='store_true', dest="force") + parser.add_argument('-r', '--rocotorun', help='rocotorun fullpath', + default=None, required=False) return parser.parse_args() @@ -111,6 +113,11 @@ def input_args(): if user_inputs.force: setup_xml_args.append("--force") + if user_inputs.rocotorun is not None: + setup_xml_args.append("--rocotorun") + setup_xml_args.append(user_inputs.rocotorun) + + print('setup_xml_args: ', setup_xml_args) logger.info(f"Call: setup_xml.main()") logger.debug(f"setup_xml.py {' '.join(setup_xml_args)}") setup_xml.main(setup_xml_args) diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index cc2a2ba8f9d..5f222d81e2c 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -63,6 +63,8 @@ function _usage() { If this option is not specified, then the existing email address in the crontab will be preserved. + -r specify rocotorun fullpath (mainly work with container) + -t Add a 'tag' to the end of the case names in the pslots to distinguish pslots between multiple sets of tests. @@ -99,6 +101,8 @@ _hpc_account="" _set_account=false _update_cron=false _email="" +_has_rocotorun=false +_rocotorun_fullpath="" _tag="" _set_email=false _verbose=false @@ -111,7 +115,7 @@ _auto_del=false _nonflag_option_count=0 while [[ $# -gt 0 && "$1" != "--" ]]; do - while getopts ":H:bDuy:Y:GESCA:ce:t:vVRdh" option; do + while getopts ":H:bDuy:Y:GESCA:ce:t:r:vVRdh" option; do case "${option}" in H) HOMEgfs="${OPTARG}" @@ -142,6 +146,7 @@ while [[ $# -gt 0 && "$1" != "--" ]]; do v) _verbose=true ;; V) _very_verbose=true && _verbose=true && _verbose_flag="-v" ;; R) _run_with_container=true ;; + r) _rocotorun_fullpath="${OPTARG}" && _has_rocotorun=true ;; A) _set_account=true && _hpc_account="${OPTARG}" ;; d) _debug=true && _very_verbose=true && _verbose=true && _verbose_flag="-v" && PS4='${LINENO}: ' ;; h) _usage && exit 0 ;; @@ -540,7 +545,11 @@ for _case in "${_yaml_list[@]}"; do fi _pslot="${_case}${_tag}" if [[ "${_run_with_container}" == "true" ]]; then - _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + if [[ "${_has_rocotorun}" == "true" ]]; then + _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml -r ${_rocotorun_fullpath} --overwrite" + else + _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + fi else _create_exp_cmd="./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" fi diff --git a/dev/workflow/rocoto/workflow_xml.py b/dev/workflow/rocoto/workflow_xml.py index e5e4d27282e..0a916291047 100644 --- a/dev/workflow/rocoto/workflow_xml.py +++ b/dev/workflow/rocoto/workflow_xml.py @@ -160,7 +160,10 @@ def _write_crontab(self, crontab_file: str = None, cronint: int = 5) -> None: rocotoruncmd = find_executable('rocotorun') if rocotoruncmd is None: try: - rocotoruncmd = '/apps/rocoto/default/bin/rocotorun' + if ( 'rocotorun' in self.rocoto_config.keys() ): + rocotoruncmd = self.rocoto_config['rocotorun'] + else: + rocotoruncmd = '/apps/rocoto/default/bin/rocotorun' os.path.exists(rocotoruncmd) except Exception as ee: raise Exception("Failed to find rocotorun, crontab will not be created: ") from ee @@ -271,9 +274,18 @@ def _check_rocotorc(self): rocotorun = which("rocotorun") if rocotorun is None: - raise FileNotFoundError("Could not find the rocotorun executable. Make sure you have the module loaded!") + try: + if ( 'rocotorun' in self.rocoto_config.keys() ): + rocotorun = self.rocoto_config['rocotorun'] + else: + rocotorun = '/apps/rocoto/default/bin/rocotorun' + os.path.exists(rocotorun) + except Exception as ee: + raise Exception("Could not find the rocotorun executable. Make sure you have the module loaded!: ") from ee - version = rocotorun("--version", output=str, error=str).split()[-1].strip() + version = rocotorun.split('/')[-3] + else: + version = rocotorun("--version", output=str, error=str).split()[-1].strip() homedir = os.path.expanduser("~") rocotorc_file = os.path.join(homedir, ".rocoto", version, "rocotorc") diff --git a/dev/workflow/setup_xml.py b/dev/workflow/setup_xml.py index ad1be5c4db4..7a255108a7c 100755 --- a/dev/workflow/setup_xml.py +++ b/dev/workflow/setup_xml.py @@ -44,6 +44,8 @@ def input_args(*argv): default=10, required=False) parser.add_argument('--force', help='raise warnings instead of errors when possible', action='store_true', dest="force") + parser.add_argument('--rocotorun', help='rocotorun fullpath', type=str, + default=None, required=False) return parser.parse_args(argv[0][0] if len(argv[0]) else None) @@ -89,6 +91,7 @@ def main(*argv): rocoto_param_dict = {'maxtries': user_inputs.maxtries, 'cyclethrottle': user_inputs.cyclethrottle, 'taskthrottle': user_inputs.taskthrottle, + 'rocotorun': user_inputs.rocotorun, 'verbosity': user_inputs.verbosity} cfg = Configuration(user_inputs.expdir) diff --git a/gen-c48atm.sh b/gen-c48atm.sh index f9c924ed3be..73e36a916e8 100755 --- a/gen-c48atm.sh +++ b/gen-c48atm.sh @@ -2,13 +2,24 @@ set -x - HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud - rundir=/scratch4/NAGAPE/epic/Wei.Huang/run +#for Ursa +#HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud +#container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img +#rundir=/scratch4/NAGAPE/epic/Wei.Huang/run +#bindings="-B /scratch3 -B /scratch4" +#HPC_ACCOUNT=epic + +#for GaeaC6 + HOMEDIR=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud + container=/gpfs/f6/scratch/Wei.Huang/container/ubuntu22.04-intel-ufs-env-v1.6.0.img + rundir=/gpfs/f6/scratch/Wei.Huang/run + bindings="-B /gpfs/f6/scratch -B /ncrc/home1/Wei.Huang" + HPC_ACCOUNT=bil-fire8 + + module load rocoto/1.3.7 + mkdir -p ${rundir} - HPC_ACCOUNT=epic - container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img - bindings="-B /scratch3 -B /scratch4" ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v cd ${HOMEDIR}/dev/workflow @@ -20,6 +31,7 @@ -Y ${HOMEDIR}/dev/ci/cases/pr \ -A ${HPC_ACCOUNT} \ -e "Wei.Huang@noaa.gov" \ + -r "/autofs/ncrc-svm1_proj/hurr1/hafs/shared/rocoto/1.3.7/bin/rocotorun" \ -R -v - ${HOMEDIR}/dev/container/create-container-links.sh + ${HOMEDIR}/dev/container/create-container-links.sh -H ${HOMEDIR} diff --git a/jobs/JGLOBAL_STAGE_IC b/jobs/JGLOBAL_STAGE_IC index 6053ab2435a..c44f0b24626 100755 --- a/jobs/JGLOBAL_STAGE_IC +++ b/jobs/JGLOBAL_STAGE_IC @@ -2,10 +2,6 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "stage_ic" -c "base stage_ic" -<<<<<<< HEAD -======= -# RUN_WITH_CONTAINER=YES ->>>>>>> origin/feature/container-on-ursa-readonly # Execute staging if [ "$RUN_WITH_CONTAINER" == "YES" ]; then ${HOMEgfs}/exec/run_python.sh ${SCRgfs}/exglobal_stage_ic.py diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index 1798107c2c0..aa2dabce4c3 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -43,10 +43,6 @@ _calling_script="${BASH_SOURCE[1]}" source "${HOMEgfs}/ush/preamble.sh" -<<<<<<< HEAD -======= -export RUN_WITH_CONTAINER=YES ->>>>>>> origin/feature/container-on-ursa-readonly if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then #export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH #export LD_LIBRARY_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2023.2.0-uov33rpz3lplh3hh3v5c6vssbc7ndxuk/lib diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh index f49a366450e..d58bd98eef4 100755 --- a/ush/load_fv3gfs_modules.sh +++ b/ush/load_fv3gfs_modules.sh @@ -15,11 +15,6 @@ fi # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) -<<<<<<< HEAD -#RUN_WITH_CONTAINER=YES -======= -RUN_WITH_CONTAINER=YES ->>>>>>> origin/feature/container-on-ursa-readonly if [[ "$RUN_WITH_CONTAINER" == "NO" ]]; then # Find module command and purge: source "${HOMEgfs}/ush/detect_machine.sh" From 52f4cb39197e257dd8ee2ef91163f55490975b51 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 27 Jun 2025 14:28:30 +0000 Subject: [PATCH 063/134] testing on AWS --- .gitignore | 1 + dev/container/com.sh | 19 +++++++ dev/container/compile-gw-in-container.sh | 6 +++ dev/container/create-container-links.sh | 49 +++++++++++------ .../container/gen-C96mx100_S2S.sh | 0 dev/container/gen-run-cases.sh | 52 +++++++++++++++++++ dev/container/gen-wrapper.sh | 9 ++-- dev/container/link_model.sh | 5 +- dev/container/shell-in-container.sh | 8 +++ dev/workflow/create_experiment.py | 6 +++ dev/workflow/generate_workflows.sh | 14 ++++- dev/workflow/hosts.py | 5 +- dev/workflow/rocoto/workflow_xml.py | 18 +++++-- dev/workflow/setup_xml.py | 9 ++-- env/CONTAINER.env | 32 ------------ gen-c48atm.sh | 25 --------- gen-c48s2sw.sh | 26 ---------- gen-c48s2swa-gefs.sh | 28 ---------- jobs/JGLOBAL_ARCHIVE_VRFY | 7 ++- sorc/com.sh | 6 +++ ush/load_fv3gfs_modules.sh | 3 +- 21 files changed, 179 insertions(+), 149 deletions(-) create mode 100755 dev/container/com.sh create mode 100755 dev/container/compile-gw-in-container.sh rename gen-C96mx100_S2S.sh => dev/container/gen-C96mx100_S2S.sh (100%) create mode 100755 dev/container/gen-run-cases.sh create mode 100755 dev/container/shell-in-container.sh delete mode 100755 env/CONTAINER.env delete mode 100755 gen-c48atm.sh delete mode 100755 gen-c48s2sw.sh delete mode 100755 gen-c48s2swa-gefs.sh create mode 100755 sorc/com.sh diff --git a/.gitignore b/.gitignore index 8bf3e1c46c0..d1f03aafea6 100644 --- a/.gitignore +++ b/.gitignore @@ -172,6 +172,7 @@ ush/imsfv3_scf2ioda.py ush/atparse.bash ush/run_bufr2ioda.py ush/bufr2ioda_insitu* +ush/container # ush log file ush/fetch-fix-data.log diff --git a/dev/container/com.sh b/dev/container/com.sh new file mode 100755 index 00000000000..0c1df3263b8 --- /dev/null +++ b/dev/container/com.sh @@ -0,0 +1,19 @@ +#!/bin/bash +#SBATCH --job-name=compile +#SBATCH --account=$USER +#SBATCH --qos=batch +##SBATCH --partition=compute +#SBATCH --partition=process +#SBATCH -t 04:15:00 +#SBATCH --nodes=1 +#SBATCH -o compile.%J.log +#SBATCH --exclusive + +set -x + +gwhome=/contrib/Wei.Huang/src/global-workflow-cloud +img=/contrib/Wei.Huang/src/gw-container-spack-stack-1.6.0/ubuntu22.04-intel-ufs-env-v1.6.0.img +cmd=${gwhome}/sorc/com.sh + +singularity exec -B /contrib -B /lustre ${img} ${cmd} + diff --git a/dev/container/compile-gw-in-container.sh b/dev/container/compile-gw-in-container.sh new file mode 100755 index 00000000000..edf33a63797 --- /dev/null +++ b/dev/container/compile-gw-in-container.sh @@ -0,0 +1,6 @@ +#!/bin/bash + + cd /contrib/Wei.Huang/src/global-workflow-cloud/sorc + ./build_all.sh gfs sfs gefs + ./link_workflow.sh + diff --git a/dev/container/create-container-links.sh b/dev/container/create-container-links.sh index 54eeee5909e..fa51c0276ce 100755 --- a/dev/container/create-container-links.sh +++ b/dev/container/create-container-links.sh @@ -1,22 +1,41 @@ #!/bin/bash -HOMEgfs=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud -container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img -verbose=true - -#echo "HOMEgfs: $HOMEgfs" -#echo "container: $container" -#echo "Verbose: $verbose" - -source ${HOMEgfs}/ush/detect_machine.sh - -bindings="-B /scratch3 -B /scratch4" -if [[ ${MACHINE_ID} = ursa* ]] ; then - # We are on NOAA Ursa - bindings="-B /scratch3 -B /scratch4" +verbose=false + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -b|--bindings) + bindings="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then + echo "Usage: create-container-links.sh -H/--HOMEgfs gw-home-dir -c/--container container-fullpath -b/--bindings list-of-binding-dirs [-v]" + exit -1 fi -#${HOMEgfs}/dev/container/gen-wrapper.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" +echo "HOMEgfs: $HOMEgfs" +echo "container: $container" +echo "bindings: $bindings" +echo "Verbose: $verbose" ${HOMEgfs}/dev/container/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gfs ${HOMEgfs}/dev/container/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t sfs diff --git a/gen-C96mx100_S2S.sh b/dev/container/gen-C96mx100_S2S.sh similarity index 100% rename from gen-C96mx100_S2S.sh rename to dev/container/gen-C96mx100_S2S.sh diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh new file mode 100755 index 00000000000..fa5f64d818a --- /dev/null +++ b/dev/container/gen-run-cases.sh @@ -0,0 +1,52 @@ +#!/bin/bash + + set -x + +HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )" +source "${HOMEgfs}/ush/detect_machine.sh" + +echo "MACHINE_ID: $MACHINE_ID" + +HOMEDIR=${HOMEgfs} +if [[ ${MACHINE_ID} = ursa* ]] ; then + container=/scratch4/NAGAPE/epic/${USER}/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img + rundir=/scratch4/NAGAPE/epic/${USER}/run + bindings="-B /scratch3 -B /scratch4" + HPC_ACCOUNT=epic +elif [[ ${MACHINE_ID} = gaea* ]] ; then + container=/gpfs/f6/scratch/${USER}/container/ubuntu22.04-intel-ufs-env-v1.6.0.img + rundir=/gpfs/f6/scratch/${USER}/run + bindings="-B /gpfs/f6/scratch -B /ncrc/home1/${USER}" + HPC_ACCOUNT=bil-fire8 +elif [[ ${MACHINE_ID} = noaacloud* ]] ; then + TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR + container=/contrib/${USER}/src/gw-container-spack-stack-1.6.0/ubuntu22.04-intel-ufs-env-v1.6.0.img + rundir=/lustre/${USER}/run + bindings="--env \"I_MPI_FABRICS=ofi:shm,I_MPI_DEBUG=6\" -B /apps/slurm/default/lib/libpmi2.so -B /contrib -B /lustre -B /bucket" + HPC_ACCOUNT=${USER} +fi + + module load rocoto/1.3.7 + + rocotocmd=`which rocotorun` + + mkdir -p ${rundir} + + ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v + + cd ${HOMEDIR}/dev/workflow + + TOPICDIR=${TOPICDIR} \ + RUNTESTS=${rundir} \ + RUNDIRS=${rundir} \ + ./generate_workflows.sh \ + -H ${HOMEDIR} \ + -y "C48_ATM" \ + -Y ${HOMEDIR}/dev/ci/cases/pr \ + -A ${HPC_ACCOUNT} \ + -e "Wei.Huang@noaa.gov" \ + -r ${rocotocmd} \ + -R -v + + ${HOMEDIR}/dev/container/create-container-links.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" + diff --git a/dev/container/gen-wrapper.sh b/dev/container/gen-wrapper.sh index ab1c1cd29f2..25628cf16b6 100755 --- a/dev/container/gen-wrapper.sh +++ b/dev/container/gen-wrapper.sh @@ -1,7 +1,6 @@ #!/bin/bash verbose=false -bindings="-B /scratch3 -B /scratch4" while [ "$#" -gt 0 ]; do case "$1" in @@ -33,10 +32,10 @@ if [[ ! -v HOMEgfs || ! -v container ]]; then exit -1 fi -#echo "HOMEgfs: $HOMEgfs" -#echo "container: $container" -#echo "bindings: $bindings" -#echo "Verbose: $verbose" +echo "HOMEgfs: $HOMEgfs" +echo "container: $container" +echo "bindings: $bindings" +echo "Verbose: $verbose" if [[ "$verbose" == "true" ]]; then set -x diff --git a/dev/container/link_model.sh b/dev/container/link_model.sh index 270fe4aae95..ba3dea2de72 100755 --- a/dev/container/link_model.sh +++ b/dev/container/link_model.sh @@ -78,7 +78,10 @@ cat > $link_model_script << EOF_LINK export LD_LIBRARY_PATH=$(dirname ${container}) arg="\$@" - singularity exec ${bindings} ${container} ${run_model_script} \$arg + singularity exec \\ + ${bindings} \\ + ${container} \\ + ${run_model_script} \$arg EOF_LINK chmod 755 $link_model_script diff --git a/dev/container/shell-in-container.sh b/dev/container/shell-in-container.sh new file mode 100755 index 00000000000..e1290c0850e --- /dev/null +++ b/dev/container/shell-in-container.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +#export SINGULARITY_BIND="${slurm_binding}/lustre:/lustre,/bucket:/bucket,/contrib:/contrib" +#export SINGULARITY_BIND="/lustre:/lustre,/bucket:/bucket,/contrib:/contrib" + +img=/contrib/Wei.Huang/src/gw-container-spack-stack-1.6.0/ubuntu22.04-intel-ufs-env-v1.6.0.img +singularity shell -B /contrib -B /lustre ${img} + diff --git a/dev/workflow/create_experiment.py b/dev/workflow/create_experiment.py index 1ec7299be9d..a47ba8f634a 100755 --- a/dev/workflow/create_experiment.py +++ b/dev/workflow/create_experiment.py @@ -76,6 +76,8 @@ def input_args(): '-o', '--overwrite', help='overwrite previously created experiment', action="store_true", required=False) parser.add_argument('--force', help='raise warnings instead of errors when possible', action='store_true', dest="force") + parser.add_argument('-r', '--rocotorun', help='rocotorun fullpath', + default=None, required=False) return parser.parse_args() @@ -111,6 +113,10 @@ def input_args(): if user_inputs.force: setup_xml_args.append("--force") + if user_inputs.rocotorun is not None: + setup_xml_args.append("--rocotorun") + setup_xml_args.append(user_inputs.rocotorun) + logger.info(f"Call: setup_xml.main()") logger.debug(f"setup_xml.py {' '.join(setup_xml_args)}") setup_xml.main(setup_xml_args) diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index 36061ffa8e4..5f222d81e2c 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -63,6 +63,8 @@ function _usage() { If this option is not specified, then the existing email address in the crontab will be preserved. + -r specify rocotorun fullpath (mainly work with container) + -t Add a 'tag' to the end of the case names in the pslots to distinguish pslots between multiple sets of tests. @@ -94,10 +96,13 @@ _run_all_gfs=false _run_all_gefs=false _run_all_sfs=false _run_with_container=false +_run_all_gcafs=false _hpc_account="" _set_account=false _update_cron=false _email="" +_has_rocotorun=false +_rocotorun_fullpath="" _tag="" _set_email=false _verbose=false @@ -110,7 +115,7 @@ _auto_del=false _nonflag_option_count=0 while [[ $# -gt 0 && "$1" != "--" ]]; do - while getopts ":H:bDuy:Y:GESA:ce:t:vVRdh" option; do + while getopts ":H:bDuy:Y:GESCA:ce:t:r:vVRdh" option; do case "${option}" in H) HOMEgfs="${OPTARG}" @@ -141,6 +146,7 @@ while [[ $# -gt 0 && "$1" != "--" ]]; do v) _verbose=true ;; V) _very_verbose=true && _verbose=true && _verbose_flag="-v" ;; R) _run_with_container=true ;; + r) _rocotorun_fullpath="${OPTARG}" && _has_rocotorun=true ;; A) _set_account=true && _hpc_account="${OPTARG}" ;; d) _debug=true && _very_verbose=true && _verbose=true && _verbose_flag="-v" && PS4='${LINENO}: ' ;; h) _usage && exit 0 ;; @@ -539,7 +545,11 @@ for _case in "${_yaml_list[@]}"; do fi _pslot="${_case}${_tag}" if [[ "${_run_with_container}" == "true" ]]; then - _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + if [[ "${_has_rocotorun}" == "true" ]]; then + _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml -r ${_rocotorun_fullpath} --overwrite" + else + _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + fi else _create_exp_cmd="./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" fi diff --git a/dev/workflow/hosts.py b/dev/workflow/hosts.py index 6d1b8580bdc..eacc53c5b8e 100644 --- a/dev/workflow/hosts.py +++ b/dev/workflow/hosts.py @@ -15,7 +15,7 @@ class Host: Gather Host specific information. """ - SUPPORTED_HOSTS = ['HERA', 'ORION', 'HERCULES', 'WCOSS2', 'CONTAINER', + SUPPORTED_HOSTS = ['HERA', 'ORION', 'HERCULES', 'WCOSS2', 'GAEAC5', 'GAEAC6', 'URSA', 'AWSPW', 'AZUREPW', 'GOOGLEPW'] def __init__(self, host=None): @@ -44,7 +44,6 @@ def detect(self) -> None: machine_id = os.getenv('MACHINE_ID', 'UNKNOWN') pw_csp = os.getenv('PW_CSP', 'UNKNOWN') - container = os.getenv('SINGULARITY_NAME', None) # Detect the machine since MACHINE_ID is set, # Additionaly, if PW_CSP is set, then the machine is a cloud machine @@ -67,8 +66,6 @@ def detect(self) -> None: elif os.path.exists('/scratch3/NCEPDEV'): self.machine = 'URSA' machine_id = 'URSA' - elif container is not None: - self.machine = 'CONTAINER' elif pw_csp is not None: if pw_csp.lower() not in ['azure', 'aws', 'google']: raise ValueError( diff --git a/dev/workflow/rocoto/workflow_xml.py b/dev/workflow/rocoto/workflow_xml.py index e5e4d27282e..0a916291047 100644 --- a/dev/workflow/rocoto/workflow_xml.py +++ b/dev/workflow/rocoto/workflow_xml.py @@ -160,7 +160,10 @@ def _write_crontab(self, crontab_file: str = None, cronint: int = 5) -> None: rocotoruncmd = find_executable('rocotorun') if rocotoruncmd is None: try: - rocotoruncmd = '/apps/rocoto/default/bin/rocotorun' + if ( 'rocotorun' in self.rocoto_config.keys() ): + rocotoruncmd = self.rocoto_config['rocotorun'] + else: + rocotoruncmd = '/apps/rocoto/default/bin/rocotorun' os.path.exists(rocotoruncmd) except Exception as ee: raise Exception("Failed to find rocotorun, crontab will not be created: ") from ee @@ -271,9 +274,18 @@ def _check_rocotorc(self): rocotorun = which("rocotorun") if rocotorun is None: - raise FileNotFoundError("Could not find the rocotorun executable. Make sure you have the module loaded!") + try: + if ( 'rocotorun' in self.rocoto_config.keys() ): + rocotorun = self.rocoto_config['rocotorun'] + else: + rocotorun = '/apps/rocoto/default/bin/rocotorun' + os.path.exists(rocotorun) + except Exception as ee: + raise Exception("Could not find the rocotorun executable. Make sure you have the module loaded!: ") from ee - version = rocotorun("--version", output=str, error=str).split()[-1].strip() + version = rocotorun.split('/')[-3] + else: + version = rocotorun("--version", output=str, error=str).split()[-1].strip() homedir = os.path.expanduser("~") rocotorc_file = os.path.join(homedir, ".rocoto", version, "rocotorc") diff --git a/dev/workflow/setup_xml.py b/dev/workflow/setup_xml.py index 574936d2779..7a255108a7c 100755 --- a/dev/workflow/setup_xml.py +++ b/dev/workflow/setup_xml.py @@ -44,6 +44,8 @@ def input_args(*argv): default=10, required=False) parser.add_argument('--force', help='raise warnings instead of errors when possible', action='store_true', dest="force") + parser.add_argument('--rocotorun', help='rocotorun fullpath', type=str, + default=None, required=False) return parser.parse_args(argv[0][0] if len(argv[0]) else None) @@ -89,6 +91,7 @@ def main(*argv): rocoto_param_dict = {'maxtries': user_inputs.maxtries, 'cyclethrottle': user_inputs.cyclethrottle, 'taskthrottle': user_inputs.taskthrottle, + 'rocotorun': user_inputs.rocotorun, 'verbosity': user_inputs.verbosity} cfg = Configuration(user_inputs.expdir) @@ -97,13 +100,9 @@ def main(*argv): check_expdir(user_inputs.expdir, base['EXPDIR']) - print('user_inputs: ', user_inputs) - # Check if "HOMEDIR","STMP","PTMP" dirrctories are writable - #dir_keys = ["HOMEDIR", "STMP", "PTMP"] - dir_keys = ["STMP", "PTMP"] + dir_keys = ["HOMEDIR", "STMP", "PTMP"] for dk in dir_keys: - print(f'base[{dk}]: {base[dk]}') check_dir_writable(base[dk]) if not check_dir_writable(base[dk]): msg = f'The {dk} path {base[dk]} cannot be written to! Please correct this path and try again.' diff --git a/env/CONTAINER.env b/env/CONTAINER.env deleted file mode 100755 index ba01fcf0dd9..00000000000 --- a/env/CONTAINER.env +++ /dev/null @@ -1,32 +0,0 @@ -#! /usr/bin/env bash - -if [[ $# -ne 1 ]]; then - - echo "Must specify an input argument to set runtime environment variables!" - exit 1 - -fi - -step=$1 - -export launcher="mpirun" -export mpmd_opt="--multi-prog" - -# Configure MPI environment -export MPI_BUFS_PER_PROC=2048 -export MPI_BUFS_PER_HOST=2048 -export MPI_GROUP_MAX=256 -export MPI_MEMMAP_OFF=1 -export MP_STDOUTMODE="ORDERED" -export KMP_AFFINITY=scatter -export OMP_STACKSIZE=2048000 -export NTHSTACK=1024000000 - -ulimit -s unlimited -ulimit -a - - -if [ "${step}" = "marineanlvar" ]; then - export NTHREADS_OCNANAL=1 - export APRUN_MARINEANLVAR="${launcher} -n 2" -fi diff --git a/gen-c48atm.sh b/gen-c48atm.sh deleted file mode 100755 index f9c924ed3be..00000000000 --- a/gen-c48atm.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - - set -x - - HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud - rundir=/scratch4/NAGAPE/epic/Wei.Huang/run - mkdir -p ${rundir} - HPC_ACCOUNT=epic - - container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img - bindings="-B /scratch3 -B /scratch4" - ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v - - cd ${HOMEDIR}/dev/workflow - - RUNTESTS=${rundir} \ - ./generate_workflows.sh \ - -H ${HOMEDIR} \ - -y "C48_ATM" \ - -Y ${HOMEDIR}/dev/ci/cases/pr \ - -A ${HPC_ACCOUNT} \ - -e "Wei.Huang@noaa.gov" \ - -R -v - - ${HOMEDIR}/dev/container/create-container-links.sh diff --git a/gen-c48s2sw.sh b/gen-c48s2sw.sh deleted file mode 100755 index 03e1f439ac4..00000000000 --- a/gen-c48s2sw.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - - set -x - - HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud - rundir=/scratch4/NAGAPE/epic/Wei.Huang/run - mkdir -p ${rundir} - HPC_ACCOUNT=epic - - container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img - bindings="-B /scratch3 -B /scratch4" - ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v - - cd ${HOMEDIR}/dev/workflow - - RUNTESTS=${rundir} \ - ./generate_workflows.sh \ - -H ${HOMEDIR} \ - -y "C48_S2SW" \ - -Y ${HOMEDIR}/dev/ci/cases/pr \ - -A ${HPC_ACCOUNT} \ - -e "Wei.Huang@noaa.gov" \ - -R -v - - ${HOMEDIR}/dev/container/create-container-links.sh - diff --git a/gen-c48s2swa-gefs.sh b/gen-c48s2swa-gefs.sh deleted file mode 100755 index 10e3e14f011..00000000000 --- a/gen-c48s2swa-gefs.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - - set -x - - HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud - rundir=/scratch4/NAGAPE/epic/Wei.Huang/run - mkdir -p ${rundir} - HPC_ACCOUNT=epic - - container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img - bindings="-B /scratch3 -B /scratch4" -#${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v - -# -y "C48_ATM C48_S2SW C48_S2SWA_gefs" \ - - cd ${HOMEDIR}/dev/workflow - - RUNTESTS=${rundir} \ - ./generate_workflows.sh \ - -H ${HOMEDIR} \ - -y "C48_S2SWA_gefs" \ - -Y ${HOMEDIR}/dev/ci/cases/pr \ - -A ${HPC_ACCOUNT} \ - -e "Wei.Huang@noaa.gov" \ - -R -v - -#${HOMEDIR}/dev/container/create-container-links.sh - diff --git a/jobs/JGLOBAL_ARCHIVE_VRFY b/jobs/JGLOBAL_ARCHIVE_VRFY index d3bc3ba70cd..0545498f47f 100755 --- a/jobs/JGLOBAL_ARCHIVE_VRFY +++ b/jobs/JGLOBAL_ARCHIVE_VRFY @@ -24,8 +24,11 @@ done ############################################################### # Run archive script ############################################################### - -${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} +if [ "$RUN_WITH_CONTAINER" == "YES" ]; then + ${HOMEgfs}/exec/run_python.sh ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} -c -v +else + ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} +fi err=$? if [[ ${err} -ne 0 ]]; then exit "${err}" diff --git a/sorc/com.sh b/sorc/com.sh new file mode 100755 index 00000000000..a6a7fecfe6a --- /dev/null +++ b/sorc/com.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +cd /contrib/Wei.Huang/src/global-workflow-cloud/sorc + +./build_all.sh gfs sfs gefs + diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh index 1fa7af82cd1..981c7324111 100755 --- a/ush/load_fv3gfs_modules.sh +++ b/ush/load_fv3gfs_modules.sh @@ -12,10 +12,11 @@ if [[ "${DEBUG_WORKFLOW:-NO}" == "NO" ]]; then set +x fi +source "${HOMEgfs}/ush/preamble.sh" + # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) -#RUN_WITH_CONTAINER=YES if [[ "$RUN_WITH_CONTAINER" == "NO" ]]; then # Find module command and purge: source "${HOMEgfs}/ush/detect_machine.sh" From 958bb9c1abca3b3762d770b3d8ebf89b7885d906 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 27 Jun 2025 20:11:11 +0000 Subject: [PATCH 064/134] add note for multiple nodes run on AWS --- dev/container/exec.python | 0 dev/container/exec.wgrib2 | 0 dev/container/gen-run-cases.sh | 6 +++--- dev/container/link_model.sh | 7 +++++++ dev/container/ush.wgrib2 | 0 5 files changed, 10 insertions(+), 3 deletions(-) mode change 100755 => 100644 dev/container/exec.python mode change 100755 => 100644 dev/container/exec.wgrib2 mode change 100755 => 100644 dev/container/ush.wgrib2 diff --git a/dev/container/exec.python b/dev/container/exec.python old mode 100755 new mode 100644 diff --git a/dev/container/exec.wgrib2 b/dev/container/exec.wgrib2 old mode 100755 new mode 100644 diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index 1a9731ae6a8..3d5f26688e3 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -2,12 +2,12 @@ set -x -HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )" +HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" source "${HOMEgfs}/ush/detect_machine.sh" #yamllist="C48_ATM" -#yamllist="C48_S2SW" -yamllist="C48_S2SWA_gefs" +yamllist="C48_S2SW" +#yamllist="C48_S2SWA_gefs" HOMEDIR=${HOMEgfs} if [[ ${MACHINE_ID} = ursa* ]] ; then diff --git a/dev/container/link_model.sh b/dev/container/link_model.sh index ba3dea2de72..7da56687089 100755 --- a/dev/container/link_model.sh +++ b/dev/container/link_model.sh @@ -76,6 +76,13 @@ rm -f ${link_model_script} cat > $link_model_script << EOF_LINK #!/bin/bash +#Need these lines on AWS to run more than one node. +#export I_MPI_DEBUG=1 +#export I_MPI_FABRICS=shm:ofi +#export I_MPI_OFI_PROVIDER=tcp +#export FI_PROVIDER=tcp +#export FI_TCP_IFACE=eth0 + export LD_LIBRARY_PATH=$(dirname ${container}) arg="\$@" singularity exec \\ diff --git a/dev/container/ush.wgrib2 b/dev/container/ush.wgrib2 old mode 100755 new mode 100644 From a7f4dff62de5fc542ae24a0827c003c49e0b38e1 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 1 Jul 2025 17:32:58 +0000 Subject: [PATCH 065/134] wants to run with or without container on AWS --- dev/container/set-case-wihtout-container.sh | 43 +++++++++++++++++++++ dev/parm/config/gfs/config.resources | 10 ----- ush/jjob_header.sh | 15 ------- ush/preamble.sh | 35 ++++++++++++++++- versions/run.noaacloud.ver | 3 ++ 5 files changed, 80 insertions(+), 26 deletions(-) create mode 100755 dev/container/set-case-wihtout-container.sh diff --git a/dev/container/set-case-wihtout-container.sh b/dev/container/set-case-wihtout-container.sh new file mode 100755 index 00000000000..475306e36e4 --- /dev/null +++ b/dev/container/set-case-wihtout-container.sh @@ -0,0 +1,43 @@ +#!/bin/bash + + set -x + +HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" +source "${HOMEgfs}/ush/detect_machine.sh" + +#yamllist="C48_ATM" +yamllist="C48_S2SW" +#yamllist="C48_S2SWA_gefs" + +HOMEDIR=${HOMEgfs} +if [[ ${MACHINE_ID} = ursa* ]] ; then + rundir=/scratch4/NAGAPE/epic/${USER}/run + HPC_ACCOUNT=epic +elif [[ ${MACHINE_ID} = gaea* ]] ; then + rundir=/gpfs/f6/scratch/${USER}/run + HPC_ACCOUNT=bil-fire8 +elif [[ ${MACHINE_ID} = noaacloud* ]] ; then + TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR + rundir=/lustre/${USER}/run + HPC_ACCOUNT=${USER} +fi + + module load rocoto/1.3.7 + + rocotocmd=`which rocotorun` + + mkdir -p ${rundir} + + cd ${HOMEDIR}/dev/workflow + + TOPICDIR=${TOPICDIR} \ + RUNTESTS=${rundir} \ + RUNDIRS=${rundir} \ + ./generate_workflows.sh \ + -H ${HOMEDIR} \ + -y ${yamllist} \ + -Y ${HOMEDIR}/dev/ci/cases/pr \ + -A ${HPC_ACCOUNT} \ + -e "Wei.Huang@noaa.gov" \ + -v + diff --git a/dev/parm/config/gfs/config.resources b/dev/parm/config/gfs/config.resources index fc8802ff067..dfdf5964520 100644 --- a/dev/parm/config/gfs/config.resources +++ b/dev/parm/config/gfs/config.resources @@ -35,9 +35,6 @@ step=$1 echo "BEGIN: config.resources" -echo "%0 step: ${step}" -echo "%0 machine: ${machine}" - case ${machine} in "WCOSS2") max_tasks_per_node=128 @@ -100,13 +97,6 @@ case ${machine} in # shellcheck disable=SC2034 mem_node_max="" ;; - "CONTAINER") - npe_node_max=1 - max_tasks_per_node= - # TODO Supply a max mem/node value for container - # shellcheck disable=SC2034 - mem_node_max="" - ;; *) echo "FATAL ERROR: Unknown machine encountered by ${BASH_SOURCE[0]}" exit 2 diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index aa2dabce4c3..04bcc774642 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -43,21 +43,6 @@ _calling_script="${BASH_SOURCE[1]}" source "${HOMEgfs}/ush/preamble.sh" -if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then - #export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH - #export LD_LIBRARY_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2023.2.0-uov33rpz3lplh3hh3v5c6vssbc7ndxuk/lib - - if [[ -v PATH ]]; then - if [[ "$PATH" =~ "prod-util" ]]; then - echo "PATH already contains prod-util" - else - export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH - fi - else - export PATH=/home/Wei.Huang/prod-util-2.1.1/bin - fi -fi - OPTIND=1 while getopts "c:e:" option; do case "${option}" in diff --git a/ush/preamble.sh b/ush/preamble.sh index 3068dfe248b..410e4d98f7b 100755 --- a/ush/preamble.sh +++ b/ush/preamble.sh @@ -177,14 +177,47 @@ trap "postamble ${_calling_script} ${start_time} \$?" EXIT source "${HOMEgfs}/ush/bash_utils.sh" # Decide if run with container -export RUN_WITH_CONTAINER=YES +export RUN_WITH_CONTAINER=NO if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" + # if [[ ! -d ~/prod-util-2.1.1 ]]; then + # cp -r $prod_util_ROOT ~/prod-util-2.1.1 + # fi + + if [[ "$PATH" =~ "prod-util" ]]; then + export PATH=~/prod-util-2.1.1/bin:$PATH + fi + export FSYNC ~/prod-util-2.1.1/bin/fsync_file + export MDATE ~/prod-util-2.1.1/bin/mdate + export NDATE ~/prod-util-2.1.1/bin/ndate + export NHOUR ~/prod-util-2.1.1/bin/nhour else source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" + + # Source versions file for runtime + source "${HOMEgfs}/versions/run.ver" + + # Load our modules: + module use "${HOMEgfs}/modulefiles" + + case "${MACHINE_ID}" in + "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud" | "ursa") + module load "module_base.${MACHINE_ID}" + export err=$? + if [[ ${err} -ne 0 ]]; then + echo "FATAL ERROR: Failed to load module_base.${MACHINE_ID}" + exit 1 + fi + ;; + *) + echo "WARNING: UNKNOWN PLATFORM" + ;; + esac + module load wgrib2 + module load prod_util/2.1.1 export WGRIB2=wgrib2 fi diff --git a/versions/run.noaacloud.ver b/versions/run.noaacloud.ver index 88fe4e0115f..3b166e76fdc 100644 --- a/versions/run.noaacloud.ver +++ b/versions/run.noaacloud.ver @@ -11,4 +11,7 @@ export cdo_ver=2.2.0 export export esmf_ver=8.5.0 export g2tmpl_ver=1.10.2 +export prepobs_run_ver=1.1.0 +export fit2obs_ver=1.1.7 + export perl_ver=5.38.0 From 682b8d5c304d470f7dd5cfe0583593323cd4fb48 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 1 Jul 2025 20:32:54 +0000 Subject: [PATCH 066/134] sync and remove extra code --- dev/workflow/generate_workflows.sh | 32 ++++++++++++++------ dev/workflow/hosts/awspw.yaml | 1 - dev/workflow/hosts/hera.yaml | 4 +-- dev/workflow/rocoto/workflow_xml.py | 4 +-- dev/workflow/setup_expt.py | 8 ----- env/AWSPW.env | 2 +- ush/jjob_header.sh | 2 -- ush/load_fv3gfs_modules.sh | 40 ++++-------------------- ush/load_ufswm_modules.sh | 47 +++++++---------------------- ush/module-setup.sh | 5 --- ush/preamble.sh | 2 +- 11 files changed, 44 insertions(+), 103 deletions(-) diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index 5f222d81e2c..c0003b3e92f 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -43,7 +43,9 @@ function _usage() { -S Run all valid SFS cases in the specified YAML directory. - NOTES on -G, -E, and -S: + -C Run all valid GCAFS cases in the specified YAML directory. + + NOTES on -G, -E, -S and -C: - Valid cases are determined by the experiment:system key as well as the skip_ci_on_hosts list in each YAML. @@ -140,6 +142,7 @@ while [[ $# -gt 0 && "$1" != "--" ]]; do G) _run_all_gfs=true ;; E) _run_all_gefs=true ;; S) _run_all_sfs=true ;; + C) _run_all_gcafs=true ;; c) _update_cron=true ;; e) _email="${OPTARG}" && _set_email=true ;; t) _tag="_${OPTARG}" ;; @@ -251,16 +254,17 @@ else fi fi -# Empty the _yaml_list array if -G, -E, and/or -S were selected +# Empty the _yaml_list array if -G, -E, -S and/or -C were selected if [[ "${_run_all_gfs}" == "true" || \ "${_run_all_gefs}" == "true" || \ + "${_run_all_gcafs}" == "true" || \ "${_run_all_sfs}" == "true" ]]; then - # Raise an error if the user specified a yaml list and any of -G -E -S + # Raise an error if the user specified a yaml list and any of -G -E -S -C if [[ "${_specified_yaml_list}" == "true" ]]; then echo "Ambiguous case selection." echo "Please select which tests to run explicitly with -y \"list of tests\" or" - echo "by specifying -G (all GFS), -E (all GEFS), and/or -S (all SFS), but not both." + echo "by specifying -G (all GFS), -E (all GEFS), -C (all GCAFS) and/or -S (all SFS), but not both." exit 3 fi @@ -302,8 +306,7 @@ function select_all_yamls() # YAMLs in that list that are not for the specified system and issue warnings when # doing so. - _system="${1}" - _SYSTEM="${_system^^}" + _net="${1}" # Bash cannot return an array from a function and any edits are descoped at # the end of the function, so use a nameref instead. @@ -312,12 +315,12 @@ function select_all_yamls() if [[ "${_specified_yaml_list}" == false ]]; then # Start over with an empty _yaml_list _nameref_yaml_list=() - printf "Running all %s cases in %s\n\n" "${_SYSTEM}" "${_yaml_dir}" + printf "Running all %s cases in %s\n\n" "${_net^^}" "${_yaml_dir}" _yaml_count=0 for _full_path in "${_yaml_dir}/"*.yaml; do # Skip any YAML that isn't supported - if ! grep -l "system: *${_system}" "${_full_path}" >& /dev/null ; then continue; fi + if ! grep -l "net: *${_net}" "${_full_path}" >& /dev/null ; then continue; fi # Select only cases for the specified system _yaml=$(basename "${_full_path}") @@ -332,7 +335,7 @@ function select_all_yamls() if [[ ${_yaml_count} -eq 0 ]]; then read -r -d '' _message << EOM - "No YAMLs or ${_SYSTEM} were found in the directory (${_yaml_dir})!" + "No YAMLs or ${_net^^} were found in the directory (${_yaml_dir})!" "Please check the directory/YAMLs and try again" EOM echo "${_message}" @@ -347,7 +350,7 @@ EOM _yaml="${_nameref_yaml_list[${i}]}" _found=$(grep -l "system: *${system}" "${_yaml_dir}/${_yaml}.yaml") if [[ -z "${_found}" ]]; then - echo "WARNING: the yaml file ${_yaml_dir}/${_yaml}.yaml is not designed for the ${_SYSTEM} system" + echo "WARNING: the yaml file ${_yaml_dir}/${_yaml}.yaml is not designed for the ${_net^^} system" echo "Removing this yaml from the set of cases to run" unset '_nameref_yaml_list[${i}]' # Sleep 2 seconds to give the user a moment to react @@ -385,6 +388,15 @@ if [[ "${_run_all_sfs}" == "true" ]]; then _yaml_list=("${_yaml_list[@]}" "${_sfs_yaml_list[@]}") fi +# Check if running all GCAFS cases +if [[ "${_run_all_gcafs}" == "true" ]]; then + _build_flags="${_build_flags} gcafs gdas " + + declare -a _gfs_yaml_list + select_all_yamls "gcafs" "_gcafs_yaml_list" + _yaml_list=("${_yaml_list[@]}" "${_gcafs_yaml_list[@]}") +fi + # Loading modules sometimes raises unassigned errors, so disable checks set +u if [[ "${_verbose}" == "true" ]]; then diff --git a/dev/workflow/hosts/awspw.yaml b/dev/workflow/hosts/awspw.yaml index c7b8eee0a13..e0b1aac36fb 100644 --- a/dev/workflow/hosts/awspw.yaml +++ b/dev/workflow/hosts/awspw.yaml @@ -27,6 +27,5 @@ DO_ARCHCOM: 'NO' DO_TRACKER: 'NO' DO_GENESIS: 'NO' DO_METP: 'NO' -KEEPDATA: 'YES' MAKE_NSSTBUFR: 'YES' MAKE_ACFTBUFR: 'NO' diff --git a/dev/workflow/hosts/hera.yaml b/dev/workflow/hosts/hera.yaml index 75719b1b665..c635addbd01 100644 --- a/dev/workflow/hosts/hera.yaml +++ b/dev/workflow/hosts/hera.yaml @@ -5,8 +5,7 @@ BASE_DATA: '/scratch1/NCEPDEV/global/glopara/data' BASE_IC: '/scratch1/NCEPDEV/global/glopara/data/ICSDIR' AERO_INPUTS_DIR: '/scratch1/NCEPDEV/global/glopara/data/GEFS_ExtData/20250310' PACKAGEROOT: '/scratch1/NCEPDEV/global/glopara/nwpara' -#HOMEDIR: '/scratch1/NCEPDEV/global/${USER}' -HOMEDIR: '/scratch2/NAGAPE/epic/Wei.Huang' +HOMEDIR: '/scratch1/NCEPDEV/global/${USER}' STMP: '/scratch1/NCEPDEV/stmp2/${USER}' PTMP: '/scratch1/NCEPDEV/stmp4/${USER}' NOSCRUB: '${HOMEDIR}' @@ -29,6 +28,5 @@ CHGRP_CMD: 'chgrp rstprod' SUPPORTED_RESOLUTIONS: ['C1152', 'C768', 'C384', 'C192', 'C96', 'C48'] DO_ARCHCOM: 'YES' DO_AWIPS: 'NO' -KEEPDATA: 'YES' MAKE_NSSTBUFR: 'NO' MAKE_ACFTBUFR: 'NO' diff --git a/dev/workflow/rocoto/workflow_xml.py b/dev/workflow/rocoto/workflow_xml.py index 0a916291047..0926b991913 100644 --- a/dev/workflow/rocoto/workflow_xml.py +++ b/dev/workflow/rocoto/workflow_xml.py @@ -160,7 +160,7 @@ def _write_crontab(self, crontab_file: str = None, cronint: int = 5) -> None: rocotoruncmd = find_executable('rocotorun') if rocotoruncmd is None: try: - if ( 'rocotorun' in self.rocoto_config.keys() ): + if ('rocotorun' in self.rocoto_config.keys()): rocotoruncmd = self.rocoto_config['rocotorun'] else: rocotoruncmd = '/apps/rocoto/default/bin/rocotorun' @@ -275,7 +275,7 @@ def _check_rocotorc(self): if rocotorun is None: try: - if ( 'rocotorun' in self.rocoto_config.keys() ): + if ('rocotorun' in self.rocoto_config.keys()): rocotorun = self.rocoto_config['rocotorun'] else: rocotorun = '/apps/rocoto/default/bin/rocotorun' diff --git a/dev/workflow/setup_expt.py b/dev/workflow/setup_expt.py index d9b5340f5af..802aed4cbf4 100755 --- a/dev/workflow/setup_expt.py +++ b/dev/workflow/setup_expt.py @@ -23,13 +23,6 @@ _here = os.path.dirname(__file__) _top = os.path.abspath(os.path.join(os.path.abspath(_here), '../..')) -print('__file__:', __file__) -print('_here:', _here) -print('_top:', _top) - -current_dir = os.getcwd() -print('current_dir:', current_dir) - # Setup the logger logger = getLogger(__name__) @@ -102,7 +95,6 @@ def _update_defaults(dict_in: dict) -> dict: yaml_path = yaml_path.replace('/opt/global-workflow-cloud', host_plus_inputs_dict.HOMEgfs) if not os.path.exists(yaml_path): raise FileNotFoundError(f'YAML file does not exist, check path: {yaml_path}') - yaml_dict = parse_j2yaml(yaml_path, host_plus_inputs_dict) # yaml_dict is in the form {defaults: {key1: val1, ...}, base: {key1: val1, ...}, ...} diff --git a/env/AWSPW.env b/env/AWSPW.env index 8a0abaef201..46a9e663e7b 100755 --- a/env/AWSPW.env +++ b/env/AWSPW.env @@ -61,7 +61,7 @@ elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step} elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then - export launcher="srun --mpi=pmi2" + export launcher="srun --mpi=pmi2 -l" (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index 04bcc774642..f8353c09b4e 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -93,8 +93,6 @@ export pgm=${pgm:-} ############################################## # Run setpdy and initialize PDY variables ############################################## -which setpdy.sh - export cycle="t${cyc}z" setpdy.sh || true source ./PDY || true diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh index e80a6425d0b..7a057b0392b 100755 --- a/ush/load_fv3gfs_modules.sh +++ b/ush/load_fv3gfs_modules.sh @@ -12,43 +12,10 @@ if [[ "${DEBUG_WORKFLOW:-NO}" == "NO" ]]; then set +x fi -source "${HOMEgfs}/ush/preamble.sh" - # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) -if [[ "$RUN_WITH_CONTAINER" == "NO" ]]; then - # Find module command and purge: - source "${HOMEgfs}/ush/detect_machine.sh" - source "${HOMEgfs}/ush/module-setup.sh" - - # Source versions file for runtime - source "${HOMEgfs}/versions/run.ver" - - # Load our modules: - module use "${HOMEgfs}/modulefiles" - - case "${MACHINE_ID}" in - "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud" | "ursa") - module load "module_base.${MACHINE_ID}" - export err=$? - if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: Failed to load module_base.${MACHINE_ID}" - exit 1 - fi - ;; - *) - echo "WARNING: UNKNOWN PLATFORM" - ;; - esac - - module list -fi - -# Add wxflow to PYTHONPATH -wxflowPATH="${HOMEgfs}/ush/python" -PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" -export PYTHONPATH +source "${HOMEgfs}/ush/preamble.sh" # If this function exists in the environment, run it; else set -x if it was set on entering this script ftype=$(type -t set_trace || echo "") @@ -58,6 +25,11 @@ elif [[ "${set_x}" == "YES" ]]; then set -x fi +# Add wxflow to PYTHONPATH +wxflowPATH="${HOMEgfs}/ush/python" +PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" +export PYTHONPATH + # Restore stack soft limit: ulimit -S -s "${ulimit_s}" unset ulimit_s diff --git a/ush/load_ufswm_modules.sh b/ush/load_ufswm_modules.sh index 6192265c26e..39b1699630b 100755 --- a/ush/load_ufswm_modules.sh +++ b/ush/load_ufswm_modules.sh @@ -9,45 +9,20 @@ fi # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) -if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then - if [[ -v PATH ]]; then - if [[ "$PATH" =~ "prod-util" ]]; then - echo "PATH already contains prod-util" - else - export PATH=/home/Wei.Huang/prod-util-2.1.1/bin:$PATH - fi - else - export PATH=/home/Wei.Huang/prod-util-2.1.1/bin - fi - export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" +source "${HOMEgfs}/ush/preamble.sh" + +if [[ "${MACHINE_ID}" = "wcoss2" ]]; then + module load cray-pals + module load cfp + module load libjpeg + module load craype-network-ucx + module load cray-mpich-ucx else - source "${HOMEgfs}/ush/detect_machine.sh" - source "${HOMEgfs}/ush/module-setup.sh" - - module use "${HOMEgfs}/sorc/ufs_model.fd/modulefiles" - module load "ufs_${MACHINE_ID}.intel" - export err=$? - if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: Failed to load ufs_${MACHINE_ID}.intel" - exit 1 - fi - module load prod_util - if [[ "${MACHINE_ID}" = "wcoss2" ]]; then - module load cray-pals - module load cfp - module load libjpeg - module load craype-network-ucx - module load cray-mpich-ucx - else - export UTILROOT=${prod_util_ROOT} - fi - module load wgrib2 - export WGRIB2=wgrib2 - - module list - unset MACHINE_ID + export UTILROOT=${prod_util_ROOT} fi +unset MACHINE_ID + ############################################################### # exglobal_forecast.py requires the following in PYTHONPATH # This will be moved to a module load when ready diff --git a/ush/module-setup.sh b/ush/module-setup.sh index 01845c1ea78..61948fa3995 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -86,11 +86,6 @@ elif [[ ${MACHINE_ID} = discover* ]]; then export PATH=${PATH}:${SPACK_ROOT}/bin . "${SPACK_ROOT}"/share/spack/setup-env.sh -elif [[ $MACHINE_ID = container ]] ; then - # We are in a container - source /usr/lmod/lmod/init/bash - module purge - # TODO: This can likely be made more general once other cloud # platforms come online. elif [[ ${MACHINE_ID} = "noaacloud" ]]; then diff --git a/ush/preamble.sh b/ush/preamble.sh index 410e4d98f7b..65d542ad12f 100755 --- a/ush/preamble.sh +++ b/ush/preamble.sh @@ -217,7 +217,7 @@ else esac module load wgrib2 - module load prod_util/2.1.1 + module load prod_util export WGRIB2=wgrib2 fi From 92dedd2382cf9cc68e8ab1943cc0a84cc6f8fd1d Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Jul 2025 04:22:23 +0000 Subject: [PATCH 067/134] retest --- dev/container/link_model.sh | 10 +++++----- ush/load_ufswm_modules.sh | 12 ------------ ush/preamble.sh | 38 +++++++++++++++++++++++-------------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/dev/container/link_model.sh b/dev/container/link_model.sh index 7da56687089..638fdc14e43 100755 --- a/dev/container/link_model.sh +++ b/dev/container/link_model.sh @@ -77,11 +77,11 @@ cat > $link_model_script << EOF_LINK #!/bin/bash #Need these lines on AWS to run more than one node. -#export I_MPI_DEBUG=1 -#export I_MPI_FABRICS=shm:ofi -#export I_MPI_OFI_PROVIDER=tcp -#export FI_PROVIDER=tcp -#export FI_TCP_IFACE=eth0 + export I_MPI_DEBUG=1 + export I_MPI_FABRICS=shm:ofi + export I_MPI_OFI_PROVIDER=tcp + export FI_PROVIDER=tcp + export FI_TCP_IFACE=eth0 export LD_LIBRARY_PATH=$(dirname ${container}) arg="\$@" diff --git a/ush/load_ufswm_modules.sh b/ush/load_ufswm_modules.sh index 39b1699630b..90bc98ad4a8 100755 --- a/ush/load_ufswm_modules.sh +++ b/ush/load_ufswm_modules.sh @@ -11,18 +11,6 @@ ulimit_s=$( ulimit -S -s ) source "${HOMEgfs}/ush/preamble.sh" -if [[ "${MACHINE_ID}" = "wcoss2" ]]; then - module load cray-pals - module load cfp - module load libjpeg - module load craype-network-ucx - module load cray-mpich-ucx -else - export UTILROOT=${prod_util_ROOT} -fi - -unset MACHINE_ID - ############################################################### # exglobal_forecast.py requires the following in PYTHONPATH # This will be moved to a module load when ready diff --git a/ush/preamble.sh b/ush/preamble.sh index 65d542ad12f..8f82a825c11 100755 --- a/ush/preamble.sh +++ b/ush/preamble.sh @@ -177,7 +177,7 @@ trap "postamble ${_calling_script} ${start_time} \$?" EXIT source "${HOMEgfs}/ush/bash_utils.sh" # Decide if run with container -export RUN_WITH_CONTAINER=NO +export RUN_WITH_CONTAINER=YES if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" @@ -185,13 +185,13 @@ if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then # cp -r $prod_util_ROOT ~/prod-util-2.1.1 # fi - if [[ "$PATH" =~ "prod-util" ]]; then - export PATH=~/prod-util-2.1.1/bin:$PATH - fi - export FSYNC ~/prod-util-2.1.1/bin/fsync_file - export MDATE ~/prod-util-2.1.1/bin/mdate - export NDATE ~/prod-util-2.1.1/bin/ndate - export NHOUR ~/prod-util-2.1.1/bin/nhour + #if [[ "$PATH" =~ "prod-util" ]]; then + export PATH=~/prod-util-2.1.1/bin:$PATH + #fi + export FSYNC=~/prod-util-2.1.1/bin/fsync_file + export MDATE=~/prod-util-2.1.1/bin/mdate + export NDATE=~/prod-util-2.1.1/bin/ndate + export NHOUR=~/prod-util-2.1.1/bin/nhour else source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" @@ -203,19 +203,29 @@ else module use "${HOMEgfs}/modulefiles" case "${MACHINE_ID}" in - "wcoss2" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud" | "ursa") + "wcoss2") + module load cray-pals + module load cfp + module load libjpeg + module load craype-network-ucx + module load cray-mpich-ucx module load "module_base.${MACHINE_ID}" - export err=$? - if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: Failed to load module_base.${MACHINE_ID}" - exit 1 - fi + ;; + "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud" | "ursa") + module load "module_base.${MACHINE_ID}" + export UTILROOT=${prod_util_ROOT} ;; *) echo "WARNING: UNKNOWN PLATFORM" ;; esac + export err=$? + if [[ ${err} -ne 0 ]]; then + echo "FATAL ERROR: Failed to load module_base.${MACHINE_ID}" + exit 1 + fi + module load wgrib2 module load prod_util export WGRIB2=wgrib2 From 1f19ab7d10c3b01fa2b09606fe57c3326c929978 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 3 Jul 2025 12:34:43 -0400 Subject: [PATCH 068/134] before merge AWS changes --- dev/container/gen-run-cases.sh | 3 ++- gen-C96mx100_S2S.sh | 26 -------------------------- gen-c48s2swa-gefs.sh | 28 ---------------------------- scripts/exglobal_forecast.sh | 3 --- 4 files changed, 2 insertions(+), 58 deletions(-) delete mode 100755 gen-C96mx100_S2S.sh delete mode 100755 gen-c48s2swa-gefs.sh diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index 3d5f26688e3..c2d5c58ab9e 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -6,8 +6,9 @@ HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" source "${HOMEgfs}/ush/detect_machine.sh" #yamllist="C48_ATM" -yamllist="C48_S2SW" +#yamllist="C48_S2SW" #yamllist="C48_S2SWA_gefs" +yamllist="C96mx100_S2S" HOMEDIR=${HOMEgfs} if [[ ${MACHINE_ID} = ursa* ]] ; then diff --git a/gen-C96mx100_S2S.sh b/gen-C96mx100_S2S.sh deleted file mode 100755 index aa44564d3fc..00000000000 --- a/gen-C96mx100_S2S.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - - set -x - - HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud - rundir=/scratch4/NAGAPE/epic/Wei.Huang/run - mkdir -p ${rundir} - HPC_ACCOUNT=epic - - container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img - bindings="-B /scratch3 -B /scratch4" - ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v - - cd ${HOMEDIR}/dev/workflow - - RUNTESTS=${rundir} \ - ./generate_workflows.sh \ - -H ${HOMEDIR} \ - -y "C96mx100_S2S" \ - -Y ${HOMEDIR}/dev/ci/cases/pr \ - -A ${HPC_ACCOUNT} \ - -e "Wei.Huang@noaa.gov" \ - -R -v - -#${HOMEDIR}/dev/container/create-container-links.sh - diff --git a/gen-c48s2swa-gefs.sh b/gen-c48s2swa-gefs.sh deleted file mode 100755 index 10e3e14f011..00000000000 --- a/gen-c48s2swa-gefs.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - - set -x - - HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud - rundir=/scratch4/NAGAPE/epic/Wei.Huang/run - mkdir -p ${rundir} - HPC_ACCOUNT=epic - - container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img - bindings="-B /scratch3 -B /scratch4" -#${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v - -# -y "C48_ATM C48_S2SW C48_S2SWA_gefs" \ - - cd ${HOMEDIR}/dev/workflow - - RUNTESTS=${rundir} \ - ./generate_workflows.sh \ - -H ${HOMEDIR} \ - -y "C48_S2SWA_gefs" \ - -Y ${HOMEDIR}/dev/ci/cases/pr \ - -A ${HPC_ACCOUNT} \ - -e "Wei.Huang@noaa.gov" \ - -R -v - -#${HOMEDIR}/dev/container/create-container-links.sh - diff --git a/scripts/exglobal_forecast.sh b/scripts/exglobal_forecast.sh index 7f0ab762742..cbc6942bf44 100755 --- a/scripts/exglobal_forecast.sh +++ b/scripts/exglobal_forecast.sh @@ -175,14 +175,11 @@ else export OMP_NUM_THREADS=${UFS_THREADS:-1} fi -<<<<<<< HEAD echo "EXECgfs: ${EXECgfs}" echo "FCSTEXEC: ${FCSTEXEC}" echo "DATA: ${DATA}" echo "APRUN_UFS: ${APRUN_UFS}" -======= ->>>>>>> origin/feature/container-on-ursa-readonly cpreq "${EXECgfs}/${FCSTEXEC}" "${DATA}/" ${APRUN_UFS} "${DATA}/${FCSTEXEC}" 1>&1 2>&2 && true export err=$? From 60039dc66fe26120e0486a74d434c3f6cf8cdc2b Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 15 Jul 2025 13:38:33 -0400 Subject: [PATCH 069/134] change for atmos products on gaea --- .../exec.exglobal_atmos_products.sh | 9 + .../atmos_products/exglobal_atmos_products.sh | 314 ++++++++++++++++++ .../atmos_products/interp_atmos_master.sh | 89 +++++ .../atmos_products/interp_atmos_sflux.sh | 64 ++++ dev/container/atmos_products/run_mpmd.sh | 99 ++++++ dev/container/create-container-links.sh | 50 --- dev/container/gen-C96mx100_S2S.sh | 38 --- dev/container/gen-run-cases.sh | 57 +++- dev/container/set-case-wihtout-container.sh | 43 --- dev/container/utils/create-atmos-products.sh | 49 +++ dev/container/utils/create-container-links.sh | 50 +++ dev/container/{ => utils}/exec.python | 0 dev/container/{ => utils}/exec.wgrib2 | 0 dev/container/{ => utils}/gen-wrapper.sh | 2 +- dev/container/{ => utils}/link_gfs_utils.sh | 0 dev/container/{ => utils}/link_model.sh | 19 +- dev/container/{ => utils}/link_ufs_utils.sh | 0 dev/container/{ => utils}/link_ww3.sh | 4 +- dev/container/{ => utils}/ush.python | 0 dev/container/{ => utils}/ush.wgrib2 | 0 dev/parm/config/gfs/yaml/defaults.yaml | 2 +- dev/workflow/generate_workflows.sh | 15 +- env/GAEAC6.env | 19 +- jobs/JGLOBAL_ATMOS_PRODUCTS | 11 +- versions/spack.ver | 2 +- 25 files changed, 761 insertions(+), 175 deletions(-) create mode 100755 dev/container/atmos_products/exec.exglobal_atmos_products.sh create mode 100755 dev/container/atmos_products/exglobal_atmos_products.sh create mode 100755 dev/container/atmos_products/interp_atmos_master.sh create mode 100755 dev/container/atmos_products/interp_atmos_sflux.sh create mode 100755 dev/container/atmos_products/run_mpmd.sh delete mode 100755 dev/container/create-container-links.sh delete mode 100755 dev/container/gen-C96mx100_S2S.sh delete mode 100755 dev/container/set-case-wihtout-container.sh create mode 100755 dev/container/utils/create-atmos-products.sh create mode 100755 dev/container/utils/create-container-links.sh rename dev/container/{ => utils}/exec.python (100%) rename dev/container/{ => utils}/exec.wgrib2 (100%) rename dev/container/{ => utils}/gen-wrapper.sh (95%) rename dev/container/{ => utils}/link_gfs_utils.sh (100%) rename dev/container/{ => utils}/link_model.sh (83%) rename dev/container/{ => utils}/link_ufs_utils.sh (100%) rename dev/container/{ => utils}/link_ww3.sh (95%) rename dev/container/{ => utils}/ush.python (100%) rename dev/container/{ => utils}/ush.wgrib2 (100%) diff --git a/dev/container/atmos_products/exec.exglobal_atmos_products.sh b/dev/container/atmos_products/exec.exglobal_atmos_products.sh new file mode 100755 index 00000000000..a2b9db30fa5 --- /dev/null +++ b/dev/container/atmos_products/exec.exglobal_atmos_products.sh @@ -0,0 +1,9 @@ +#!/bin/bash + export LD_LIBRARY_PATH=$(dirname SIF) + arg="$@" + + singularity exec \ + BINDINGS \ + SIF \ + HOMEgfs/ush/container/run_exglobal_atmos_products.sh $arg + diff --git a/dev/container/atmos_products/exglobal_atmos_products.sh b/dev/container/atmos_products/exglobal_atmos_products.sh new file mode 100755 index 00000000000..b9caebfb498 --- /dev/null +++ b/dev/container/atmos_products/exglobal_atmos_products.sh @@ -0,0 +1,314 @@ +#! /usr/bin/env bash + +source /usr/lmod/lmod/init/bash +module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles +module load gfsutils_container.intel +module load wgrib2 +module load prod_util +module list + +export I_MPI_TMPDIR=/tmp +export PBS_ENVIRONMENT="" +export LSB_JOBID=0 +export I_MPI_HYDRA_RMK=slurm +export LOADL_HOSTFILE="" +export PE_HOSTFILE="" +export I_MPI_YARN=no +export NB_PARALLEL_JOB_HOSTS="" + +#source /opt/intel/oneapi/setvars.sh --force +#export OCL_ICD_FILENAMES="" + +# Programs used +export UTILROOT=${prod_util_ROOT} +#export WGRIB2=${wgrib2_ROOT}/bin/wgrib2 +export WGRIB2=wgrib2 + +# Scripts used +INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/interp_atmos_master.sh"} +INTERP_ATMOS_SFLUXSH=${INTERP_ATMOS_SFLUXSH:-"${USHgfs}/interp_atmos_sflux.sh"} + +# Variables used in this job +downset=${downset:-1} # No. of groups of pressure grib2 products to create +ntasks_atmos_products=${ntasks_atmos_products:-8} # no. of processors available to process each group + +# WGNE related options +WGNE=${WGNE:-NO} # Create WGNE products +FHMAX_WGNE=${FHMAX_WGNE:-0} # WGNE products are created for first FHMAX_WGNE forecast hours (except 0) + +cd "${DATA}" || exit 1 + +# Set paramlist files based on FORECAST_HOUR (-1, 0, 3, 6, etc.) +# Determine if supplemental products (PGBS) (1-degree and 1/2-degree) should be generated +if [[ ${FORECAST_HOUR} -le 0 ]]; then + if [[ ${FORECAST_HOUR} -lt 0 ]]; then + fhr3="anl" + paramlista="${paramlista_anl}" + FLXGF="NO" + elif [[ ${FORECAST_HOUR} == 0 ]]; then + fhr3=$(printf "f%03d" "${FORECAST_HOUR}") + paramlista="${paramlista_f000}" + fi + PGBS="YES" +else + fhr3=$(printf "f%03d" "${FORECAST_HOUR}") + if (( FORECAST_HOUR%FHOUT_PGBS == 0 )); then + PGBS="YES" + fi +fi + +#----------------------------------------------------- +# Section creating pressure grib2 interpolated products + +# Files needed by ${INTERP_ATMOS_MASTERSH} +MASTER_FILE="${COMIN_ATMOS_MASTER}/${PREFIX}master.grb2${fhr3}" + +# Get inventory from ${MASTER_FILE} that matches patterns from ${paramlista} +# Extract this inventory from ${MASTER_FILE} into a smaller tmpfile or tmpfileb based on paramlista or paramlistb +# shellcheck disable=SC2312 +${WGRIB2} "${MASTER_FILE}" | grep -F -f "${paramlista}" | ${WGRIB2} -i -grib "tmpfile_${fhr3}" "${MASTER_FILE}" && true +export err=$? +if [[ ${err} -ne 0 ]]; then + err_exit "wgrib2 failed to create intermediate grib2 file from ${MASTER_FILE} using ${paramlista}" +fi + +# Do the same as above for ${paramlistb} +if [[ ${downset} -eq 2 ]]; then + # shellcheck disable=SC2312 + ${WGRIB2} "${MASTER_FILE}" | grep -F -f "${paramlistb}" | ${WGRIB2} -i -grib "tmpfileb_${fhr3}" "${MASTER_FILE}" && true + export err=$? + if [[ ${err} -ne 0 ]]; then + err_exit "wgrib2 failed to create intermediate grib2 file from ${MASTER_FILE} using ${paramlistb}" + fi +fi + +# Determine grids once and save them as a string and an array for processing +grid_string="0p25" +if [[ "${PGBS:-}" == "YES" ]]; then + grid_string="${grid_string}:0p50:1p00" +else + echo "Supplemental product generation is disable for fhr = ${fhr3}" + PGBS="NO" # Can't generate supplemental products if PGBS is not YES +fi +# Also transform the ${grid_string} into an array for processing +IFS=':' read -ra grids <<< "${grid_string}" + +for (( nset=1 ; nset <= downset ; nset++ )); do + + echo "Begin processing nset = ${nset}" + + # Number of processors available to process $nset + nproc=${ntasks} + + # Each set represents a group of files + if [[ ${nset} == 1 ]]; then + grp="" # TODO: this should be "a" when we eventually rename the pressure grib2 files per EE2 convention + elif [[ ${nset} == 2 ]]; then + grp="b" + fi + + # process grib2 chunkfiles to interpolate using MPMD + tmpfile="tmpfile${grp}_${fhr3}" + + # shellcheck disable=SC2312 + ncount=$(${WGRIB2} "${tmpfile}" | wc -l) + if [[ ${nproc} -gt ${ncount} ]]; then + echo "WARNING: Total no. of available processors '${nproc}' exceeds no. of records '${ncount}' in ${tmpfile}" + echo "Reduce nproc to ${ncount} (or less) to not waste resources" + fi + inv=$(( ncount / nproc )) + rm -f "${DATA}/poescript" + + last=0 + for (( iproc = 1 ; iproc <= nproc ; iproc++ )); do + first=$((last + 1)) + last=$((last + inv)) + if [[ ${last} -gt ${ncount} ]]; then + last=${ncount} + fi + + # if final record of is u-component, add next record v-component + # if final record is land, add next record icec + # grep returns 1 if no match is found, so temporarily turn off exit on non-zero rc + set +e + # shellcheck disable=SC2312 + ${WGRIB2} -d "${last}" "${tmpfile}" | grep -E -i "ugrd|ustm|uflx|u-gwd|land|maxuw" + rc=$? + set_strict + if [[ ${rc} == 0 ]]; then # Matched the grep + last=$(( last + 1 )) + fi + if [[ ${iproc} -eq ${nproc} ]]; then + last=${ncount} + fi + + # Break tmpfile into processor specific chunks in preparation for MPMD + ${WGRIB2} "${tmpfile}" -for "${first}":"${last}" -grib "${tmpfile}_${iproc}" && true + export err=$? + if [[ ${err} -ne 0 ]]; then + err_exit "wgrib2 failed to geneate an intermediate grib2 file from ${tmpfile} records ${first} to ${last}" + fi + input_file="${tmpfile}_${iproc}" + output_file_prefix="pgb2${grp}file_${fhr3}_${iproc}" + echo "${INTERP_ATMOS_MASTERSH} ${input_file} ${output_file_prefix} ${grid_string}" >> "${DATA}/poescript" + + # if at final record and have not reached the final processor then write echo's to + # poescript for remaining processors + if [[ ${last} -eq ${ncount} ]]; then + for (( pproc = iproc+1 ; pproc < nproc ; pproc++ )); do + echo "/bin/echo ${pproc}" >> "${DATA}/poescript" + done + break + fi + done # for (( iproc = 1 ; iproc <= nproc ; iproc++ )); do + + # Run with MPMD or serial + "${USHgfs}/container/run_mpmd.sh" "${DATA}/poescript" && true + export err=$? + if [[ ${err} -ne 0 ]]; then + err_exit "Some or all interpolations of the master grib file failed during MPMD execution!" + fi + + # We are in a loop over downset, save output from mpmd into nset specific output + mv mpmd.out "mpmd_${nset}.out" + + # Concatenate grib files from each processor into a single one + # and clean-up as you go + echo "Concatenating processor-specific grib2 files into a single product file" + for (( iproc = 1 ; iproc <= nproc ; iproc++ )); do + for grid in "${grids[@]}"; do + cat "pgb2${grp}file_${fhr3}_${iproc}_${grid}" >> "pgb2${grp}file_${fhr3}_${grid}" + rm -f "pgb2${grp}file_${fhr3}_${iproc}_${grid}" + done + # There is no further use of the processor specific tmpfile; delete it + rm -f "${tmpfile}_${iproc}" + done + + # Move to COM and index the product grib files + for grid in "${grids[@]}"; do + prod_dir="COMOUT_ATMOS_GRIB_${grid}" + cpfs "pgb2${grp}file_${fhr3}_${grid}" "${!prod_dir}/${PREFIX}pgrb2${grp}.${grid}.${fhr3}" + ${WGRIB2} -s "pgb2${grp}file_${fhr3}_${grid}" > "${!prod_dir}/${PREFIX}pgrb2${grp}.${grid}.${fhr3}.idx" + done + + echo "Finished processing nset = ${nset}" + +done # for (( nset=1 ; nset <= downset ; nset++ )) + +#--------------------------------------------------------------- + +# Create the index file for the sflux master, if it exists. +FLUX_FILE="${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" +if [[ -s "${FLUX_FILE}" ]]; then + ${WGRIB2} -s "${FLUX_FILE}" > "${FLUX_FILE}.idx" +fi + +# Section creating slfux grib2 interpolated products +# Create 1-degree sflux grib2 output +# move to COM and index it +if [[ "${FLXGF:-}" == "YES" ]]; then + + # Files needed by ${INTERP_ATMOS_SFLUXSH} + input_file="${FLUX_FILE}" + output_file_prefix="sflux_${fhr3}" + grid_string="1p00" + "${INTERP_ATMOS_SFLUXSH}" "${input_file}" "${output_file_prefix}" "${grid_string}" && true + export err=$? + if [[ ${err} -ne 0 ]]; then + err_exit "Unable to interpolate the surface flux grib2 files!" + fi + + # Move to COM and index the product sflux file + IFS=':' read -ra grids <<< "${grid_string}" + for grid in "${grids[@]}"; do + prod_dir="COMOUT_ATMOS_GRIB_${grid}" + cpfs "sflux_${fhr3}_${grid}" "${!prod_dir}/${PREFIX}flux.${grid}.${fhr3}" + ${WGRIB2} -s "sflux_${fhr3}_${grid}" > "${!prod_dir}/${PREFIX}flux.${grid}.${fhr3}.idx" + done +fi + +# Section creating 0.25 degree WGNE products for nset=1, and fhr <= FHMAX_WGNE +if [[ "${WGNE:-}" == "YES" ]]; then + grp="" # TODO: this should be "a" when we eventually rename the pressure grib2 files per EE2 convention + if [[ ${FORECAST_HOUR} -gt 0 && ${FORECAST_HOUR} -le ${FHMAX_WGNE} ]]; then + # TODO: 597 is the message number for APCP in GFSv16. GFSv17 may change this as more messages are added. This can be controlled via config.atmos_products + ${WGRIB2} "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2${grp}.0p25.${fhr3}" -d "${APCP_MSG:-597}" -grib "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}wgne.${fhr3}" + fi +fi + +#--------------------------------------------------------------- + +# Start sending DBN alerts +# Everything below this line is for sending files to DBN (SENDDBN=YES) +if [[ "${SENDDBN:-}" == "YES" ]]; then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P25" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2.0p25.${fhr3}" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P25_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2.0p25.${fhr3}.idx" + if [[ "${RUN}" == "gfs" ]]; then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P25" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2b.0p25.${fhr3}" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P25_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2b.0p25.${fhr3}.idx" + if [[ -s "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2.0p50.${fhr3}" ]]; then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P5" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2.0p50.${fhr3}" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P5_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2.0p50.${fhr3}.idx" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P5" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2b.0p50.${fhr3}" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P5_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2b.0p50.${fhr3}.idx" + fi + if [[ -s "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" ]]; then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_1P0" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_1P0_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}.idx" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_1P0" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2b.1p00.${fhr3}" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_1P0_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2b.1p00.${fhr3}.idx" + fi + if [[ "${WGNE:-}" == "YES" && -s "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}wgne.${fhr3}" ]] ; then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_WGNE" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}wgne.${fhr3}" + fi + fi + + if [[ "${fhr3}" == "anl" ]]; then + + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_MSC_sfcanl" "${job}" "${COMIN_ATMOS_ANALYSIS}/${PREFIX}sfc${fhr3}.nc" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SA" "${job}" "${COMIN_ATMOS_ANALYSIS}/${PREFIX}atm${fhr3}.nc" + + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGA_GB2" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGA_GB2_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}.idx" + + else # forecast hours f000, f003, f006, etc. + + if [[ "${RUN}" == "gdas" ]]; then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB_GB2" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB_GB2_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}.idx" + if (( FORECAST_HOUR % 3 == 0 )); then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}atm${fhr3}.nc" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_BF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}sfc${fhr3}.nc" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2_WIDX" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2.idx" + fi + elif [[ "${RUN}" == "gfs" ]]; then + + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}atm${fhr3}.nc" + if [[ ${fhr} -gt 0 && ${fhr} -le 84 || ${fhr} -eq 120 ]]; then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_BF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}sfc${fhr3}.nc" + fi + + if [[ -s "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrbf${fhr3}.grib2" ]]; then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2_WIDX" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2.idx" + fi + elif [[ "${RUN}" == "gcafs" ]]; then + + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SF" "${job}" "${COM_ATMOS_HISTORY}/${PREFIX}atm${fhr3}.nc" + if [[ ${fhr} -gt 0 && ${fhr} -le 84 || ${fhr} == 120 ]]; then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_BF" "${job}" "${COM_ATMOS_HISTORY}/${PREFIX}sfc${fhr3}.nc" + fi + + if [[ -s "${COM_ATMOS_MASTER}/${PREFIX}sfluxgrbf${fhr3}.grib2" ]]; then + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2" "${job}" "${COM_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" + "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2_WIDX" "${job}" "${COM_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2.idx" + fi + fi + + + fi # end if fhr3=anl + +fi # end if SENDDBN=YES + +exit 0 diff --git a/dev/container/atmos_products/interp_atmos_master.sh b/dev/container/atmos_products/interp_atmos_master.sh new file mode 100755 index 00000000000..321d9f52dd3 --- /dev/null +++ b/dev/container/atmos_products/interp_atmos_master.sh @@ -0,0 +1,89 @@ +#! /usr/bin/env bash + +# This script takes in a master grib file and creates products at various interpolated resolutions +# Generate 0.25 / 0.5 / 1 degree interpolated grib2 files for each input grib2 file +# trim's RH and tweaks sea-ice cover + +input_file=${1:-"pgb2file_in"} # Input pressure grib2 file +output_file_prefix=${2:-"pgb2file_out"} # Prefix for output grib2 file; the prefix is appended by resolution e.g. _0p25 +grid_string=${3:-"0p25"} # Target grids; e.g. "0p25" or "0p25:0p50"; If multiple, they need to be ":" seperated + +#source "${HOMEgfs}/ush/preamble.sh" + +# Programs used +#if [ "$RUN_WITH_CONTAINER" == "NO" ]; then +# #source "${USHgfs}/load_fv3gfs_modules.sh" +# #module load wgrib2/2.0.8 +source /usr/lmod/lmod/init/bash +module purge +module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles +module load gfsutils_container.intel +module load wgrib2 +export WGRIB2=wgrib2 +##else +# export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" +#fi + +# wgrib2 options for regridding +defaults="-set_grib_type same -set_bitmap 1 -set_grib_max_bits 16" +interp_winds="-new_grid_winds earth" +interp_bilinear="-new_grid_interpolation bilinear" +interp_neighbor="-if :(CSNOW|CRAIN|CFRZR|CICEP|ICSEV): -new_grid_interpolation neighbor -fi" +interp_budget="-if :(APCP|ACPCP|PRATE|CPRAT|DZDT): -new_grid_interpolation budget -fi" +increased_bits="-if :(APCP|ACPCP|PRATE|CPRAT): -set_grib_max_bits 25 -fi" + +# interpolated target grids +# shellcheck disable=SC2034 +grid0p25="latlon 0:1440:0.25 90:721:-0.25" +# shellcheck disable=SC2034 +grid0p50="latlon 0:720:0.5 90:361:-0.5" +# shellcheck disable=SC2034 +grid1p00="latlon 0:360:1.0 90:181:-1.0" + +# "Import" functions used in this script +source "${USHgfs}/product_functions.sh" + +# Transform the input ${grid_string} into an array for processing +IFS=':' read -ra grids <<< "${grid_string}" + +output_grids="" +for grid in "${grids[@]}"; do + gridopt="grid${grid}" + output_grids="${output_grids} -new_grid ${!gridopt} ${output_file_prefix}_${grid}" +done + +#shellcheck disable=SC2086 +${WGRIB2} "${input_file}" ${defaults} \ + ${interp_winds} \ + ${interp_bilinear} \ + ${interp_neighbor} \ + ${interp_budget} \ + ${increased_bits} \ + ${output_grids} +export err=$? +if [[ ${err} -ne 0 ]]; then + echo "FATAL ERROR: WGRIB2 failed to generate interpolated grib2 file!" + exit "${err}" +fi + +# trim and mask for all grids +for grid in "${grids[@]}"; do + trim_rh "${output_file_prefix}_${grid}" + export err=$? + if [[ ${err} -ne 0 ]]; then + echo "FATAL ERROR: Failed during the execution of trim_rh" + exit "${err}" + fi + # shellcheck disable=SC2312 + var_count=$(${WGRIB2} "${output_file_prefix}_${grid}" -match "LAND|ICEC" |wc -l) + if [[ "${var_count}" -eq 2 ]]; then + mod_icec "${output_file_prefix}_${grid}" + export err=$? + if [[ ${err} -ne 0 ]]; then + echo "FATAL ERROR: Failed during execution of mod_icec" + exit "${err}" + fi + fi +done + +exit 0 diff --git a/dev/container/atmos_products/interp_atmos_sflux.sh b/dev/container/atmos_products/interp_atmos_sflux.sh new file mode 100755 index 00000000000..abe528a638d --- /dev/null +++ b/dev/container/atmos_products/interp_atmos_sflux.sh @@ -0,0 +1,64 @@ +#! /usr/bin/env bash + +# This script takes in a master flux file and creates interpolated flux files at various interpolated resolutions +# Generate 0.25 / 0.5 / 1 degree interpolated grib2 flux files for each input sflux grib2 file + +input_file=${1:-"sfluxfile_in"} # Input sflux grib2 file +output_file_prefix=${2:-"sfluxfile_out"} # Prefix for output sflux grib2 file; the prefix is appended by resolution e.g. _0p25 +grid_string=${3:-"1p00"} # Target grids; e.g. "0p25" or "0p25:0p50"; If multiple, they need to be ":" seperated + +#source "${HOMEgfs}/ush/preamble.sh" + +# Programs used +#if [ "$RUN_WITH_CONTAINER" == "NO" ]; then +# #source "${USHgfs}/load_fv3gfs_modules.sh" +# #module load wgrib2/2.0.8 +source /usr/lmod/lmod/init/bash +module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles +module load gfsutils_container.intel +module load wgrib2 +export WGRIB2=wgrib2 +#else +# export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" +#fi + +# wgrib2 options for regridding +defaults="-set_grib_type same -set_bitmap 1 -set_grib_max_bits 16" +interp_winds="-new_grid_winds earth" +interp_bilinear="-new_grid_interpolation bilinear" +interp_neighbor="-if :(LAND|CSNOW|CRAIN|CFRZR|CICEP|ICSEV): -new_grid_interpolation neighbor -fi" +interp_budget="-if :(APCP|ACPCP|PRATE|CPRAT|DZDT): -new_grid_interpolation budget -fi" +increased_bits="-if :(APCP|ACPCP|PRATE|CPRAT): -set_grib_max_bits 25 -fi" + +# interpolated target grids +# shellcheck disable=SC2034 +grid0p25="latlon 0:1440:0.25 90:721:-0.25" +# shellcheck disable=SC2034 +grid0p50="latlon 0:720:0.5 90:361:-0.5" +# shellcheck disable=SC2034 +grid1p00="latlon 0:360:1.0 90:181:-1.0" + +# Transform the input ${grid_string} into an array for processing +IFS=':' read -ra grids <<< "${grid_string}" + +output_grids="" +for grid in "${grids[@]}"; do + gridopt="grid${grid}" + output_grids="${output_grids} -new_grid ${!gridopt} ${output_file_prefix}_${grid}" +done + +#shellcheck disable=SC2086 +${WGRIB2} "${input_file}" ${defaults} \ + ${interp_winds} \ + ${interp_bilinear} \ + ${interp_neighbor} \ + ${interp_budget} \ + ${increased_bits} \ + ${output_grids} +export err=$? +if [[ ${err} -ne 0 ]]; then + echo "FATAL ERROR: WGRIB2 failed to interpolate surface flux parameters to a new grib2 file" + exit "${err}" +fi + +exit 0 diff --git a/dev/container/atmos_products/run_mpmd.sh b/dev/container/atmos_products/run_mpmd.sh new file mode 100755 index 00000000000..977802ff4fb --- /dev/null +++ b/dev/container/atmos_products/run_mpmd.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +################################################################################ +# +# UNIX Script Documentation Block +# Script name: run_mpmd.sh +# Script description: Run multiple commands in MPMD mode or serially +# +# Author: Rahul Mahajan +# +# Org: NCEP/EMC +# +# Abstract: This script runs multiple commands in MPMD mode. It is used to run +# multiple serial commands in parallel using the CFP (Coupled Framework +# Parallelism) feature of the workflow. +# +# Environment variables: +# USE_CFP: If set to YES, run in MPMD mode, else run in serial mode. Default is 'NO'. +# launcher: Command to launch the MPMD job. Default is empty. +# Supported launchers are 'srun' and 'mpiexec'. +# mpmd_opt: Additional options to pass to the launcher. Default is empty. +# +# Input: +# cmdfile: File containing commands to execute in MPMD/serial mode +# +# Command line: +# run_mpmd.sh cmdfile +# +################################################################################ + +#source "${USHgfs}/preamble.sh" + +source /usr/lmod/lmod/init/bash +module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles +module load gfsutils_container.intel +module load wgrib2 + +cmdfile=${1:?"run_mpmd requires an input file containing commands to execute in MPMD/serial mode"} + +# If USE_CFP is not set, run in serial mode +if [[ "${USE_CFP:-}" != "YES" ]]; then + echo "INFO: Using serial mode for MPMD job" + chmod 755 "${cmdfile}" + bash +x "${cmdfile}" > mpmd.out 2>&1 + rc=$? + cat mpmd.out + exit "${rc}" +fi + +# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD +export OMP_NUM_THREADS=1 + +# Determine the number of MPMD processes from incoming ${cmdfile} +nprocs=$(wc -l < "${cmdfile}") + +# Local MPMD file containing instructions to run in CFP +mpmd_cmdfile="${DATA:-}/mpmd_cmdfile" +if [[ -s "${mpmd_cmdfile}" ]]; then rm -f "${mpmd_cmdfile}"; fi + +cat << EOF + INFO: Executing MPMD job, STDOUT redirected for each process separately + INFO: On failure, logs for each job will be available in ${DATA}/mpmd.proc_num.out + INFO: The proc_num corresponds to the line in '${mpmd_cmdfile}' +EOF + +# [[ "${launcher:-}" =~ ^mpiexec.* ]]; then # mpiexec + +# Redirect output from each process to its own stdout +# Read the incoming cmdfile and create mpiexec usable cmdfile +nm=0 +echo "#!/bin/bash" >> "${mpmd_cmdfile}" +# shellcheck disable=SC2312 +while IFS= read -r line; do + echo "-n 1 ${line} > mpmd.${nm}.out" >> "${mpmd_cmdfile}" + ${line} > mpmd.${nm}.out & + ((nm=nm+1)) +done < "${cmdfile}" +chmod 755 "${mpmd_cmdfile}" +wait + +# shellcheck disable=SC2086 +#mpirun -np ${nprocs} ${mpmd_opt:-} "${mpmd_cmdfile}" +#mpirun -n ${nprocs} ${mpmd_cmdfile} +#mpirun --app ${mpmd_cmdfile} +err=$? +set_strict + +# On success concatenate processor specific output into a single mpmd.out +if [[ ${err} -eq 0 ]]; then + rm -f "${mpmd_cmdfile}" + out_files=$(find . -name 'mpmd.*.out') + for file in ${out_files}; do + cat "${file}" >> mpmd.out + rm -f "${file}" + done + cat mpmd.out +fi + +exit "${err}" diff --git a/dev/container/create-container-links.sh b/dev/container/create-container-links.sh deleted file mode 100755 index fa51c0276ce..00000000000 --- a/dev/container/create-container-links.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -verbose=false - -while [ "$#" -gt 0 ]; do - case "$1" in - -H|--HOMEgfs) - HOMEgfs="$2" - shift 2 - ;; - -c|--container) - container="$2" - shift 2 - ;; - -b|--bindings) - bindings="$2" - shift 2 - ;; - -v|--verbose) - verbose=true - shift - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then - echo "Usage: create-container-links.sh -H/--HOMEgfs gw-home-dir -c/--container container-fullpath -b/--bindings list-of-binding-dirs [-v]" - exit -1 -fi - -echo "HOMEgfs: $HOMEgfs" -echo "container: $container" -echo "bindings: $bindings" -echo "Verbose: $verbose" - -${HOMEgfs}/dev/container/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gfs -${HOMEgfs}/dev/container/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t sfs -${HOMEgfs}/dev/container/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gefs - -${HOMEgfs}/dev/container/link_model.sh -H ${HOMEgfs} -c ${container} -m gfs_model -b "${bindings}" -${HOMEgfs}/dev/container/link_model.sh -H ${HOMEgfs} -c ${container} -m sfs_model -b "${bindings}" -${HOMEgfs}/dev/container/link_model.sh -H ${HOMEgfs} -c ${container} -m gefs_model -b "${bindings}" - -${HOMEgfs}/dev/container/link_gfs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -${HOMEgfs}/dev/container/link_ufs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" - diff --git a/dev/container/gen-C96mx100_S2S.sh b/dev/container/gen-C96mx100_S2S.sh deleted file mode 100755 index 7b0528801df..00000000000 --- a/dev/container/gen-C96mx100_S2S.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - - set -x - -#for Ursa -#HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud -#container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img -#rundir=/scratch4/NAGAPE/epic/Wei.Huang/run -#bindings="-B /scratch3 -B /scratch4" -#HPC_ACCOUNT=epic - -#for GaeaC6 - HOMEDIR=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud - container=/gpfs/f6/scratch/Wei.Huang/container/ubuntu22.04-intel-ufs-env-v1.6.0.img - rundir=/gpfs/f6/scratch/Wei.Huang/run - bindings="-B /gpfs/f6/scratch -B /ncrc/home1/Wei.Huang" - HPC_ACCOUNT=bil-fire8 - - module load rocoto/1.3.7 - - mkdir -p ${rundir} - - ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v - - cd ${HOMEDIR}/dev/workflow - - RUNTESTS=${rundir} \ - ./generate_workflows.sh \ - -H ${HOMEDIR} \ - -y "C96mx100_S2S" \ - -Y ${HOMEDIR}/dev/ci/cases/pr \ - -A ${HPC_ACCOUNT} \ - -e "Wei.Huang@noaa.gov" \ - -r "/autofs/ncrc-svm1_proj/hurr1/hafs/shared/rocoto/1.3.7/bin/rocotorun" \ - -R -v - - ${HOMEDIR}/dev/container/create-container-links.sh -H ${HOMEDIR} - diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index c2d5c58ab9e..f980e81257c 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -1,14 +1,20 @@ #!/bin/bash - set -x +set -x HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" source "${HOMEgfs}/ush/detect_machine.sh" +run_with_container=YES + +casetype=pr #yamllist="C48_ATM" #yamllist="C48_S2SW" -#yamllist="C48_S2SWA_gefs" -yamllist="C96mx100_S2S" +yamllist="C48_S2SWA_gefs" +#yamllist="C96mx100_S2S" + +#casetype=hires +#yamllist="C768_S2SW" HOMEDIR=${HOMEgfs} if [[ ${MACHINE_ID} = ursa* ]] ; then @@ -16,40 +22,57 @@ if [[ ${MACHINE_ID} = ursa* ]] ; then rundir=/scratch4/NAGAPE/epic/${USER}/run bindings="-B /scratch3 -B /scratch4" HPC_ACCOUNT=epic + + module load rocoto/1.3.7 + rocotocmd=`which rocotorun` elif [[ ${MACHINE_ID} = gaea* ]] ; then container=/gpfs/f6/scratch/${USER}/container/ubuntu22.04-intel-ufs-env-v1.6.0.img rundir=/gpfs/f6/scratch/${USER}/run bindings="-B /gpfs/f6/scratch -B /ncrc/home1/${USER}" HPC_ACCOUNT=bil-fire8 + + rocotocmd=/autofs/ncrc-svm1_home2/Christopher.W.Harrop/rocoto-1.3.7/bin/rocotorun elif [[ ${MACHINE_ID} = noaacloud* ]] ; then TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR container=/contrib/${USER}/src/gw-container-spack-stack-1.6.0/ubuntu22.04-intel-ufs-env-v1.6.0.img rundir=/lustre/${USER}/run bindings="--env \"I_MPI_FABRICS=ofi:shm,I_MPI_DEBUG=6\" -B /apps/slurm/default/lib/libpmi2.so -B /contrib -B /lustre -B /bucket" HPC_ACCOUNT=${USER} -fi - - module load rocoto/1.3.7 - rocotocmd=`which rocotorun` + module load rocoto/1.3.7 + rocotocmd=`which rocotorun` +fi - mkdir -p ${rundir} +mkdir -p ${rundir} - ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v +cd ${HOMEDIR}/dev/workflow - cd ${HOMEDIR}/dev/workflow +if [[ "${run_with_container}" == "YES" ]]; then + ${HOMEDIR}/dev/container/utils/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v - TOPICDIR=${TOPICDIR} \ - RUNTESTS=${rundir} \ - RUNDIRS=${rundir} \ + TOPICDIR=${TOPICDIR} \ + RUNTESTS=${rundir} \ + RUNDIRS=${rundir} \ ./generate_workflows.sh \ -H ${HOMEDIR} \ -y ${yamllist} \ - -Y ${HOMEDIR}/dev/ci/cases/pr \ + -Y ${HOMEDIR}/dev/ci/cases/${casetype} \ -A ${HPC_ACCOUNT} \ - -e "Wei.Huang@noaa.gov" \ + -e "${USER}@noaa.gov" \ -r ${rocotocmd} \ - -R -v + -v -R - ${HOMEDIR}/dev/container/create-container-links.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" + ${HOMEDIR}/dev/container/utils/create-container-links.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" +else + TOPICDIR=${TOPICDIR} \ + RUNTESTS=${rundir} \ + RUNDIRS=${rundir} \ + ./generate_workflows.sh \ + -H ${HOMEDIR} \ + -y ${yamllist} \ + -Y ${HOMEDIR}/dev/ci/cases/${casetype} \ + -A ${HPC_ACCOUNT} \ + -e "${USER}@noaa.gov" \ + -v +fi diff --git a/dev/container/set-case-wihtout-container.sh b/dev/container/set-case-wihtout-container.sh deleted file mode 100755 index 475306e36e4..00000000000 --- a/dev/container/set-case-wihtout-container.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - - set -x - -HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" -source "${HOMEgfs}/ush/detect_machine.sh" - -#yamllist="C48_ATM" -yamllist="C48_S2SW" -#yamllist="C48_S2SWA_gefs" - -HOMEDIR=${HOMEgfs} -if [[ ${MACHINE_ID} = ursa* ]] ; then - rundir=/scratch4/NAGAPE/epic/${USER}/run - HPC_ACCOUNT=epic -elif [[ ${MACHINE_ID} = gaea* ]] ; then - rundir=/gpfs/f6/scratch/${USER}/run - HPC_ACCOUNT=bil-fire8 -elif [[ ${MACHINE_ID} = noaacloud* ]] ; then - TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR - rundir=/lustre/${USER}/run - HPC_ACCOUNT=${USER} -fi - - module load rocoto/1.3.7 - - rocotocmd=`which rocotorun` - - mkdir -p ${rundir} - - cd ${HOMEDIR}/dev/workflow - - TOPICDIR=${TOPICDIR} \ - RUNTESTS=${rundir} \ - RUNDIRS=${rundir} \ - ./generate_workflows.sh \ - -H ${HOMEDIR} \ - -y ${yamllist} \ - -Y ${HOMEDIR}/dev/ci/cases/pr \ - -A ${HPC_ACCOUNT} \ - -e "Wei.Huang@noaa.gov" \ - -v - diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh new file mode 100755 index 00000000000..63f0fcb5973 --- /dev/null +++ b/dev/container/utils/create-atmos-products.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +verbose=false + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -b|--bindings) + bindings="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then + echo "Usage: create-container-links.sh -H/--HOMEgfs gw-home-dir -c/--container container-fullpath -b/--bindings list-of-binding-dirs [-v]" + exit -1 +fi + +echo "HOMEgfs: $HOMEgfs" +echo "container: $container" +echo "bindings: $bindings" +echo "Verbose: $verbose" + +sed -e "s?HOMEgfs?${HOMEgfs}?g" \ + -e "s?SIF?${container}?g" \ + -e "s?BINDINGS?${bindings}?g" \ + ${HOMEgfs}/dev/container/atmos_products/exec.exglobal_atmos_products.sh > ${HOMEgfs}/exec/exglobal_atmos_products.sh + +for fl in exglobal_atmos_products.sh interp_atmos_master.sh interp_atmos_sflux.sh run_mpmd.sh +do + cp ${HOMEgfs}/dev/container/atmos_products/${fl} ${HOMEgfs}/ush/container/. +done + diff --git a/dev/container/utils/create-container-links.sh b/dev/container/utils/create-container-links.sh new file mode 100755 index 00000000000..3fa1da687a2 --- /dev/null +++ b/dev/container/utils/create-container-links.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +verbose=false + +while [ "$#" -gt 0 ]; do + case "$1" in + -H|--HOMEgfs) + HOMEgfs="$2" + shift 2 + ;; + -c|--container) + container="$2" + shift 2 + ;; + -b|--bindings) + bindings="$2" + shift 2 + ;; + -v|--verbose) + verbose=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then + echo "Usage: create-container-links.sh -H/--HOMEgfs gw-home-dir -c/--container container-fullpath -b/--bindings list-of-binding-dirs [-v]" + exit -1 +fi + +echo "HOMEgfs: $HOMEgfs" +echo "container: $container" +echo "bindings: $bindings" +echo "Verbose: $verbose" + +${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gfs +${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t sfs +${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gefs + +${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m gfs_model -b "${bindings}" +${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m sfs_model -b "${bindings}" +${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m gefs_model -b "${bindings}" + +${HOMEgfs}/dev/container/utils/link_gfs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" +${HOMEgfs}/dev/container/utils/link_ufs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" + diff --git a/dev/container/exec.python b/dev/container/utils/exec.python similarity index 100% rename from dev/container/exec.python rename to dev/container/utils/exec.python diff --git a/dev/container/exec.wgrib2 b/dev/container/utils/exec.wgrib2 similarity index 100% rename from dev/container/exec.wgrib2 rename to dev/container/utils/exec.wgrib2 diff --git a/dev/container/gen-wrapper.sh b/dev/container/utils/gen-wrapper.sh similarity index 95% rename from dev/container/gen-wrapper.sh rename to dev/container/utils/gen-wrapper.sh index 25628cf16b6..d65bbc5bd51 100755 --- a/dev/container/gen-wrapper.sh +++ b/dev/container/utils/gen-wrapper.sh @@ -51,7 +51,7 @@ do mkdir -p ${targetdir} for fnm in python wgrib2 do - sourcef=${HOMEgfs}/dev/container/${dnm}.${fnm} + sourcef=${HOMEgfs}/dev/container/utils/${dnm}.${fnm} targetf=${targetdir}/run_${fnm}.sh sed -e "s?HOMEgfs?${HOMEgfs}?g" \ diff --git a/dev/container/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh similarity index 100% rename from dev/container/link_gfs_utils.sh rename to dev/container/utils/link_gfs_utils.sh diff --git a/dev/container/link_model.sh b/dev/container/utils/link_model.sh similarity index 83% rename from dev/container/link_model.sh rename to dev/container/utils/link_model.sh index 26eacae5e06..5ab3f629258 100755 --- a/dev/container/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -73,15 +73,22 @@ cat > $link_model_script << EOF_LINK #!/bin/bash #Need these lines on AWS to run more than one node. - export I_MPI_DEBUG=1 - export I_MPI_FABRICS=shm:ofi - export I_MPI_OFI_PROVIDER=tcp - export FI_PROVIDER=tcp - export FI_TCP_IFACE=eth0 + export I_MPI_DEBUG=10 +#export I_MPI_FABRICS=shm:ofi +#export I_MPI_OFI_PROVIDER=tcp +#export FI_PROVIDER=tcp +#export FI_TCP_IFACE=eth0 + +#For GaeaC6 +#export SINGULARITY_ENABLE_OVERLAY=try +#export SINGULARITY_DISABLE_OVERLAY=yes +#export SINGULARITY_DEBUG=10 +#export SINGULARITY_DEBUG=0 +#unset SINGULARITY_DEBUG export LD_LIBRARY_PATH=$(dirname ${container}) arg="\$@" - singularity exec \\ + /usr/bin/apptainer exec \\ ${bindings} \\ ${container} \\ ${run_model_script} \$arg diff --git a/dev/container/link_ufs_utils.sh b/dev/container/utils/link_ufs_utils.sh similarity index 100% rename from dev/container/link_ufs_utils.sh rename to dev/container/utils/link_ufs_utils.sh diff --git a/dev/container/link_ww3.sh b/dev/container/utils/link_ww3.sh similarity index 95% rename from dev/container/link_ww3.sh rename to dev/container/utils/link_ww3.sh index 0a73f82c74a..77c68b7b5c3 100755 --- a/dev/container/link_ww3.sh +++ b/dev/container/utils/link_ww3.sh @@ -69,8 +69,8 @@ export OMP_NUM_THREADS=1 source /usr/lmod/lmod/init/bash module purge -module use ${HOMEgfs}/sorc/ufs_model.fd/modulefiles -module load ufs_container.intel +module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles +module load gfsutils_container.intel arg="\$@" ${HOMEgfs}/sorc/ufs_model.fd/WW3/install/${pdlib}/bin/${model} \$arg diff --git a/dev/container/ush.python b/dev/container/utils/ush.python similarity index 100% rename from dev/container/ush.python rename to dev/container/utils/ush.python diff --git a/dev/container/ush.wgrib2 b/dev/container/utils/ush.wgrib2 similarity index 100% rename from dev/container/ush.wgrib2 rename to dev/container/utils/ush.wgrib2 diff --git a/dev/parm/config/gfs/yaml/defaults.yaml b/dev/parm/config/gfs/yaml/defaults.yaml index 573b2b97488..ebfae35cd17 100644 --- a/dev/parm/config/gfs/yaml/defaults.yaml +++ b/dev/parm/config/gfs/yaml/defaults.yaml @@ -15,7 +15,7 @@ base: DO_GENESIS: "YES" DO_GENESIS_FSU: "NO" DO_METP: "YES" - FHMAX_GFS: 120 + FHMAX_GFS: 24 FHMAX_HF_GFS: 48 FCST_BREAKPOINTS: "" DO_GSISOILDA: "NO" diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index c0003b3e92f..196a8e2fe51 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -116,8 +116,11 @@ _runtests="${RUNTESTS:-${_runtests:-}}" _auto_del=false _nonflag_option_count=0 +n=0 while [[ $# -gt 0 && "$1" != "--" ]]; do - while getopts ":H:bDuy:Y:GESCA:ce:t:r:vVRdh" option; do + while getopts ":H:bDuy:Y:GESCA:ce:t:r:vVdhR" option; do + n=$((n + 1)) + echo "No. $n: option: $option" case "${option}" in H) HOMEgfs="${OPTARG}" @@ -280,16 +283,16 @@ if [[ "${_specified_home}" == "false" ]]; then fi fi +echo "_run_with_container: ${_run_with_container}" + # Set RUN_WITH_CONTAINER if it is set by the user if [[ "${_run_with_container}" == "true" ]]; then - RUN_WITH_CONTAINER=YES if [[ "${_verbose}" == "true" ]]; then - printf "Run with Container %s\n\n" "${RUN_WITH_CONTAINER}" + printf "Run with Container" fi - sed -i 's/RUN_WITH_CONTAINER=NO/RUN_WITH_CONTAINER=YES/g' ../../ush/preamble.sh + sed -i "s?RUN_WITH_CONTAINER=NO?RUN_WITH_CONTAINER=YES?g" ../../ush/preamble.sh else - RUN_WITH_CONTAINER=NO - sed -i 's/RUN_WITH_CONTAINER=YES/RUN_WITH_CONTAINER=NO/g' ../../ush/preamble.sh + sed -i "s?RUN_WITH_CONTAINER=YES?RUN_WITH_CONTAINER=NO?g" ../../ush/preamble.sh fi # Set the _yaml_dir to HOMEgfs/dev/ci/cases/pr if not explicitly set diff --git a/env/GAEAC6.env b/env/GAEAC6.env index 0e7fb1db1f9..390fb0cc1ef 100755 --- a/env/GAEAC6.env +++ b/env/GAEAC6.env @@ -254,15 +254,16 @@ case ${step} in ;; "fcst" | "efcs") - export launcher="srun --mpi=pmi2 -l --export=ALL" - export OMP_STACKSIZE=1024M - - export MPICH_COLL_SYNC=MPI_Bcast - export FI_VERBS_PREFER_XRC=0 - export FI_CXI_RX_MATCH_MODE=hybrid - export COMEX_EAGER_THRESHOLD=65536 - export FI_CXI_RDZV_THRESHOLD=65536 - export FI_CXI_DEFAULT_CQ_SIZE=1048576 + #export launcher="srun --mpi=pmi2 -l --export=ALL" + export launcher="srun --mpi=pmi2 -l --hint=nomultithread --distribution=block:block" + #export OMP_STACKSIZE=1024M + + #export MPICH_COLL_SYNC=MPI_Bcast + #export FI_VERBS_PREFER_XRC=0 + #export FI_CXI_RX_MATCH_MODE=hybrid + #export COMEX_EAGER_THRESHOLD=65536 + #export FI_CXI_RDZV_THRESHOLD=65536 + #export FI_CXI_DEFAULT_CQ_SIZE=1048576 (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) diff --git a/jobs/JGLOBAL_ATMOS_PRODUCTS b/jobs/JGLOBAL_ATMOS_PRODUCTS index b4f803f8036..1f589ec6170 100755 --- a/jobs/JGLOBAL_ATMOS_PRODUCTS +++ b/jobs/JGLOBAL_ATMOS_PRODUCTS @@ -24,7 +24,12 @@ export PREFIX="${RUN}.t${cyc}z." ############################################################### # Run exglobal script -"${SCRgfs}/exglobal_atmos_products.sh" && true +if [ "$RUN_WITH_CONTAINER" == "YES" ]; then + "${HOMEgfs}/exec/exglobal_atmos_products.sh" && true + export WGRIB2=${HOMEgfs}/exec/run_wgrib2.sh +else + "${SCRgfs}/exglobal_atmos_products.sh" && true +fi export err=$? if [[ ${err} -ne 0 ]]; then err_exit @@ -34,6 +39,8 @@ fi # End JOB SPECIFIC work ############################################## +exit 0 + ############################################## # Final processing ############################################## @@ -41,6 +48,8 @@ if [[ -e "${pgmout}" ]]; then cat "${pgmout}" fi +exit 0 + ########################################## # Remove the Temporary working directory ########################################## diff --git a/versions/spack.ver b/versions/spack.ver index b69e4277d56..7d27b3e9a03 100644 --- a/versions/spack.ver +++ b/versions/spack.ver @@ -30,7 +30,7 @@ export grib_util_ver=1.3.0 export prod_util_ver=2.1.1 export py_netcdf4_ver=1.5.8 export py_pyyaml_ver=6.0 -export py_jinja2_ver=3.0.3 +export py_jinja2_ver=3.1.2 export py_pandas_ver=1.5.3 export py_python_dateutil_ver=2.8.2 export py_f90nml_ver=1.4.3 From e19a65e158f06453a9d3e5510fb373b14f02dd57 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 16 Jul 2025 03:33:51 +0000 Subject: [PATCH 070/134] add highmemory partition for atmos products --- .../atmos_products/exec.exglobal_atmos_products.sh | 2 +- dev/container/shell-in-container.sh | 2 +- dev/container/utils/link_model.sh | 2 +- dev/parm/config/gefs/config.resources.AWSPW | 7 +++++++ dev/parm/config/gfs/config.resources.AWSPW | 7 +++++++ dev/parm/config/gfs/yaml/defaults.yaml | 2 +- 6 files changed, 18 insertions(+), 4 deletions(-) diff --git a/dev/container/atmos_products/exec.exglobal_atmos_products.sh b/dev/container/atmos_products/exec.exglobal_atmos_products.sh index a2b9db30fa5..fa2bc370741 100755 --- a/dev/container/atmos_products/exec.exglobal_atmos_products.sh +++ b/dev/container/atmos_products/exec.exglobal_atmos_products.sh @@ -5,5 +5,5 @@ singularity exec \ BINDINGS \ SIF \ - HOMEgfs/ush/container/run_exglobal_atmos_products.sh $arg + HOMEgfs/ush/container/exglobal_atmos_products.sh $arg diff --git a/dev/container/shell-in-container.sh b/dev/container/shell-in-container.sh index e1290c0850e..e32276bc7ed 100755 --- a/dev/container/shell-in-container.sh +++ b/dev/container/shell-in-container.sh @@ -4,5 +4,5 @@ #export SINGULARITY_BIND="/lustre:/lustre,/bucket:/bucket,/contrib:/contrib" img=/contrib/Wei.Huang/src/gw-container-spack-stack-1.6.0/ubuntu22.04-intel-ufs-env-v1.6.0.img -singularity shell -B /contrib -B /lustre ${img} +singularity shell -e -B /contrib -B /lustre ${img} diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 5ab3f629258..1a2f49d784c 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -88,7 +88,7 @@ cat > $link_model_script << EOF_LINK export LD_LIBRARY_PATH=$(dirname ${container}) arg="\$@" - /usr/bin/apptainer exec \\ + /usr/bin/singularity exec \\ ${bindings} \\ ${container} \\ ${run_model_script} \$arg diff --git a/dev/parm/config/gefs/config.resources.AWSPW b/dev/parm/config/gefs/config.resources.AWSPW index 071c102e3af..513b5420043 100644 --- a/dev/parm/config/gefs/config.resources.AWSPW +++ b/dev/parm/config/gefs/config.resources.AWSPW @@ -16,6 +16,13 @@ case ${step} in tasks_per_node=48 ;; + "atmos_products") + export PARTITION_BATCH="highmemory" + unset PARTITION_SERVICE + max_tasks_per_node=24 + tasks_per_node=24 + ;; + *) export PARTITION_BATCH="process" unset PARTITION_SERVICE diff --git a/dev/parm/config/gfs/config.resources.AWSPW b/dev/parm/config/gfs/config.resources.AWSPW index 0b3e3fc4fbd..bc20944f6f4 100644 --- a/dev/parm/config/gfs/config.resources.AWSPW +++ b/dev/parm/config/gfs/config.resources.AWSPW @@ -28,6 +28,13 @@ case ${step} in max_tasks_per_node=48 ;; + "atmos_products") + export PARTITION_BATCH="highmemory" + unset PARTITION_SERVICE + max_tasks_per_node=24 + tasks_per_node=24 + ;; + *) export PARTITION_BATCH="process" unset PARTITION_SERVICE diff --git a/dev/parm/config/gfs/yaml/defaults.yaml b/dev/parm/config/gfs/yaml/defaults.yaml index ebfae35cd17..573b2b97488 100644 --- a/dev/parm/config/gfs/yaml/defaults.yaml +++ b/dev/parm/config/gfs/yaml/defaults.yaml @@ -15,7 +15,7 @@ base: DO_GENESIS: "YES" DO_GENESIS_FSU: "NO" DO_METP: "YES" - FHMAX_GFS: 24 + FHMAX_GFS: 120 FHMAX_HF_GFS: 48 FCST_BREAKPOINTS: "" DO_GSISOILDA: "NO" From 8141155b467d1ef9544794523cee04b2096870b3 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 17 Jul 2025 00:24:49 +0000 Subject: [PATCH 071/134] testing on gaezc6 --- dev/container/gen-run-cases.sh | 10 +++++----- dev/container/shell-in-container.sh | 23 +++++++++++++++++++---- dev/container/utils/link_model.sh | 2 +- dev/workflow/generate_workflows.sh | 12 ++++-------- 4 files changed, 29 insertions(+), 18 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index f980e81257c..de81c482c9f 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -7,19 +7,19 @@ source "${HOMEgfs}/ush/detect_machine.sh" run_with_container=YES -casetype=pr +#casetype=pr #yamllist="C48_ATM" #yamllist="C48_S2SW" -yamllist="C48_S2SWA_gefs" +#yamllist="C48_S2SWA_gefs" #yamllist="C96mx100_S2S" -#casetype=hires -#yamllist="C768_S2SW" +casetype=hires +yamllist="C768_S2SW" HOMEDIR=${HOMEgfs} if [[ ${MACHINE_ID} = ursa* ]] ; then container=/scratch4/NAGAPE/epic/${USER}/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img - rundir=/scratch4/NAGAPE/epic/${USER}/run + rundir=/scratch3/NAGAPE/epic/${USER}/run bindings="-B /scratch3 -B /scratch4" HPC_ACCOUNT=epic diff --git a/dev/container/shell-in-container.sh b/dev/container/shell-in-container.sh index e32276bc7ed..359fd62ded1 100755 --- a/dev/container/shell-in-container.sh +++ b/dev/container/shell-in-container.sh @@ -1,8 +1,23 @@ #!/bin/bash -#export SINGULARITY_BIND="${slurm_binding}/lustre:/lustre,/bucket:/bucket,/contrib:/contrib" -#export SINGULARITY_BIND="/lustre:/lustre,/bucket:/bucket,/contrib:/contrib" +set -x -img=/contrib/Wei.Huang/src/gw-container-spack-stack-1.6.0/ubuntu22.04-intel-ufs-env-v1.6.0.img -singularity shell -e -B /contrib -B /lustre ${img} +HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" +source "${HOMEgfs}/ush/detect_machine.sh" +sif=ubuntu22.04-intel-ufs-env-v1.6.0.img + +if [[ ${MACHINE_ID} = ursa* ]] ; then + img=/scratch4/NAGAPE/epic/${USER}/demo/${sif} + bindings="-e -B /scratch3 -B /scratch4" +elif [[ ${MACHINE_ID} = gaea* ]] ; then + img=/gpfs/f6/scratch/${USER}/container/${sif} + bindings="-e -B /gpfs/f6/scratch -B /ncrc/home1/${USER}" +elif [[ ${MACHINE_ID} = noaacloud* ]] ; then + img=/contrib/${USER}/src/gw-container-spack-stack-1.6.0/${sif} + bindings="-e -B /contrib -B /lustre -B /bucket" +fi + +cd ${HOMEDIR}/dev/workflow + +singularity shell ${bindings} ${img} diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 1a2f49d784c..8cd89ec21b6 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -88,7 +88,7 @@ cat > $link_model_script << EOF_LINK export LD_LIBRARY_PATH=$(dirname ${container}) arg="\$@" - /usr/bin/singularity exec \\ + /apps/bin/apptainer exec \\ ${bindings} \\ ${container} \\ ${run_model_script} \$arg diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index 196a8e2fe51..ad0c0949cf6 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -116,11 +116,8 @@ _runtests="${RUNTESTS:-${_runtests:-}}" _auto_del=false _nonflag_option_count=0 -n=0 while [[ $# -gt 0 && "$1" != "--" ]]; do while getopts ":H:bDuy:Y:GESCA:ce:t:r:vVdhR" option; do - n=$((n + 1)) - echo "No. $n: option: $option" case "${option}" in H) HOMEgfs="${OPTARG}" @@ -283,13 +280,12 @@ if [[ "${_specified_home}" == "false" ]]; then fi fi -echo "_run_with_container: ${_run_with_container}" +if [[ "${_verbose}" == "true" ]]; then + echo "_run_with_container: ${_run_with_container}" +fi # Set RUN_WITH_CONTAINER if it is set by the user if [[ "${_run_with_container}" == "true" ]]; then - if [[ "${_verbose}" == "true" ]]; then - printf "Run with Container" - fi sed -i "s?RUN_WITH_CONTAINER=NO?RUN_WITH_CONTAINER=YES?g" ../../ush/preamble.sh else sed -i "s?RUN_WITH_CONTAINER=YES?RUN_WITH_CONTAINER=NO?g" ../../ush/preamble.sh @@ -351,7 +347,7 @@ EOM # Check if the specified yamls are for the specified system for i in "${!_nameref_yaml_list}"; do _yaml="${_nameref_yaml_list[${i}]}" - _found=$(grep -l "system: *${system}" "${_yaml_dir}/${_yaml}.yaml") + _found=$(grep -l "net: *${_net}" "${_yaml_dir}/${_yaml}.yaml") if [[ -z "${_found}" ]]; then echo "WARNING: the yaml file ${_yaml_dir}/${_yaml}.yaml is not designed for the ${_net^^} system" echo "Removing this yaml from the set of cases to run" From 1e07a0c8beccef5a82ccd513bc033bff0fb89499 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 17 Jul 2025 10:54:47 -0400 Subject: [PATCH 072/134] fix wgrib2 file location error --- dev/container/atmos_products/exglobal_atmos_products.sh | 4 ++-- dev/container/atmos_products/run_mpmd.sh | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/dev/container/atmos_products/exglobal_atmos_products.sh b/dev/container/atmos_products/exglobal_atmos_products.sh index b9caebfb498..9fd55803290 100755 --- a/dev/container/atmos_products/exglobal_atmos_products.sh +++ b/dev/container/atmos_products/exglobal_atmos_products.sh @@ -25,8 +25,8 @@ export UTILROOT=${prod_util_ROOT} export WGRIB2=wgrib2 # Scripts used -INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/interp_atmos_master.sh"} -INTERP_ATMOS_SFLUXSH=${INTERP_ATMOS_SFLUXSH:-"${USHgfs}/interp_atmos_sflux.sh"} +INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/container/interp_atmos_master.sh"} +INTERP_ATMOS_SFLUXSH=${INTERP_ATMOS_SFLUXSH:-"${USHgfs}/container/interp_atmos_sflux.sh"} # Variables used in this job downset=${downset:-1} # No. of groups of pressure grib2 products to create diff --git a/dev/container/atmos_products/run_mpmd.sh b/dev/container/atmos_products/run_mpmd.sh index 977802ff4fb..650d045eae6 100755 --- a/dev/container/atmos_products/run_mpmd.sh +++ b/dev/container/atmos_products/run_mpmd.sh @@ -28,7 +28,7 @@ # ################################################################################ -#source "${USHgfs}/preamble.sh" +set -x source /usr/lmod/lmod/init/bash module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles @@ -63,15 +63,14 @@ cat << EOF INFO: The proc_num corresponds to the line in '${mpmd_cmdfile}' EOF -# [[ "${launcher:-}" =~ ^mpiexec.* ]]; then # mpiexec - # Redirect output from each process to its own stdout # Read the incoming cmdfile and create mpiexec usable cmdfile nm=0 echo "#!/bin/bash" >> "${mpmd_cmdfile}" # shellcheck disable=SC2312 while IFS= read -r line; do - echo "-n 1 ${line} > mpmd.${nm}.out" >> "${mpmd_cmdfile}" + echo "${line} > mpmd.${nm}.out" >> "${mpmd_cmdfile}" + echo "Line ${nm}: ${line}" ${line} > mpmd.${nm}.out & ((nm=nm+1)) done < "${cmdfile}" From ca3d4ba4e7e93bede95172acf4c4f43b75040619 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 23 Jul 2025 21:32:51 +0000 Subject: [PATCH 073/134] sync --- dev/container/gen-C96mx100_S2S.sh | 26 ------------------------- modulefiles/module_base.noaacloud.lua | 16 +++++++++++---- scripts/exglobal_atmos_products.sh | 2 +- versions/build.noaacloud.ver | 4 ++-- versions/run.noaacloud.ver | 13 ++++++------- versions/spack.ver | 28 +++++++++++++-------------- 6 files changed, 35 insertions(+), 54 deletions(-) delete mode 100755 dev/container/gen-C96mx100_S2S.sh diff --git a/dev/container/gen-C96mx100_S2S.sh b/dev/container/gen-C96mx100_S2S.sh deleted file mode 100755 index aa44564d3fc..00000000000 --- a/dev/container/gen-C96mx100_S2S.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - - set -x - - HOMEDIR=/scratch4/NAGAPE/epic/Wei.Huang/dev/global-workflow-cloud - rundir=/scratch4/NAGAPE/epic/Wei.Huang/run - mkdir -p ${rundir} - HPC_ACCOUNT=epic - - container=/scratch4/NAGAPE/epic/Wei.Huang/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img - bindings="-B /scratch3 -B /scratch4" - ${HOMEDIR}/dev/container/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v - - cd ${HOMEDIR}/dev/workflow - - RUNTESTS=${rundir} \ - ./generate_workflows.sh \ - -H ${HOMEDIR} \ - -y "C96mx100_S2S" \ - -Y ${HOMEDIR}/dev/ci/cases/pr \ - -A ${HPC_ACCOUNT} \ - -e "Wei.Huang@noaa.gov" \ - -R -v - -#${HOMEDIR}/dev/container/create-container-links.sh - diff --git a/modulefiles/module_base.noaacloud.lua b/modulefiles/module_base.noaacloud.lua index bd10d0eabf1..7ff0ad9f962 100644 --- a/modulefiles/module_base.noaacloud.lua +++ b/modulefiles/module_base.noaacloud.lua @@ -5,10 +5,18 @@ Load environment to run GFS on noaacloud local spack_mod_path=(os.getenv("spack_mod_path") or "None") prepend_path("MODULEPATH", spack_mod_path) -load("gnu") -load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) -load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "None"))) -unload("gnu") +prepend_path("MODULEPATH", "/contrib/spack-stack-rocky8/spack-stack-1.9.2/envs/ue-oneapi-2024.2.1/install/modulefiles/intel-oneapi-mpi/2021.13-mg3hegm/gcc/13.2.0") +prepend_path("MODULEPATH", "/apps/modules/modulefiles") + +gnu_ver=os.getenv("gnu_ver") or "13.2.0" +stack_oneapi_ver=os.getenv("stack_oneapi_ver") or "2024.2.1" +stack_impi_ver=os.getenv("stack_impi_ver") or "2021.13" +cmake_ver=os.getenv("cmake_ver") or "3.27.9" + +load(pathJoin("gnu", gnu_ver)) +load(pathJoin("stack-oneapi", stack_oneapi_ver)) +load(pathJoin("stack-intel-oneapi-mpi", stack_impi_ver)) +load(pathJoin("cmake", cmake_ver)) load(pathJoin("python", (os.getenv("python_ver") or "None"))) diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 86c04ea3028..a2b0813a287 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -5,7 +5,7 @@ source "${HOMEgfs}/ush/preamble.sh" # Programs used if [ "$RUN_WITH_CONTAINER" == "NO" ]; then source "${USHgfs}/load_fv3gfs_modules.sh" - module load wgrib2/2.0.8 + module load wgrib2/3.6.0 export WGRIB2=${WGRIB2:-${wgrib2_ROOT}/bin/wgrib2} else diff --git a/versions/build.noaacloud.ver b/versions/build.noaacloud.ver index b5fd272b4b5..5f566ef8d1b 100644 --- a/versions/build.noaacloud.ver +++ b/versions/build.noaacloud.ver @@ -1,5 +1,5 @@ export stack_intel_ver=2021.10.0 export stack_impi_ver=2021.10.0 -export spack_env=gsi-addon-env +export spack_env=1.9.2 source "${HOMEgfs:-}/versions/spack.ver" -export spack_mod_path="/contrib/spack-stack-rocky8/spack-stack-${spack_stack_ver}/envs/gsi-addon-env/install/modulefiles/Core" +export spack_mod_path="/contrib/spack-stack-rocky8/spack-stack-${spack_stack_ver}/envs/ue-oneapi-2024.2.1/install/modulefiles/Core" diff --git a/versions/run.noaacloud.ver b/versions/run.noaacloud.ver index 3b166e76fdc..fb7cb3ccfbb 100644 --- a/versions/run.noaacloud.ver +++ b/versions/run.noaacloud.ver @@ -1,15 +1,14 @@ -export stack_intel_ver=2021.10.0 -export stack_impi_ver=2021.10.0 -export spack_env=gsi-addon-env +export stack_intel_ver=2024.2.1 +export stack_impi_ver=2021.13 +export spack_env=1.9.2 source "${HOMEgfs:-}/versions/spack.ver" -export spack_mod_path="/contrib/spack-stack-rocky8/spack-stack-${spack_stack_ver}/envs/gsi-addon-env/install/modulefiles/Core" +export spack_mod_path="/contrib/spack-stack-rocky8/spack-stack-${spack_env}/envs/ue-oneapi-2024.2.1/install/modulefiles/Core" -export cdo_ver=2.2.0 +export cdo_ver=2.3.0 # modules updated in spack.ver for gsi-addon-dev-fms-2024.01 -export export esmf_ver=8.5.0 -export g2tmpl_ver=1.10.2 +export export esmf_ver=8.8.0 export prepobs_run_ver=1.1.0 export fit2obs_ver=1.1.7 diff --git a/versions/spack.ver b/versions/spack.ver index 7d27b3e9a03..476d5c1b78c 100644 --- a/versions/spack.ver +++ b/versions/spack.ver @@ -1,16 +1,16 @@ -export spack_stack_ver=1.6.0 -export cmake_ver=3.23.1 -export python_ver=3.11.6 +export spack_stack_ver=1.9.2 +export cmake_ver=3.27.9 +export python_ver=3.11.7 -export jasper_ver=2.0.32 +export jasper_ver=4.2.0 export libpng_ver=1.6.37 export zlib_ver=1.2.13 export esmf_ver=8.6.0 export fms_ver=2023.02.01 -export cdo_ver=2.2.0 -export nco_ver=5.0.6 +export cdo_ver=2.3.0 +export nco_ver=5.2.4 -export hdf5_ver=1.14.0 +export hdf5_ver=1.14.3 export netcdf_c_ver=4.9.2 export netcdf_fortran_ver=4.6.1 @@ -18,27 +18,27 @@ export bacio_ver=2.4.1 export nemsio_ver=2.5.4 export sigio_ver=2.3.2 export w3emc_ver=2.10.0 -export bufr_ver=11.7.0 +export bufr_ver=12.1.0 export g2_ver=3.4.5 export sp_ver=2.5.0 export ip_ver=4.3.0 export gsi_ncdiag_ver=1.1.2 export g2tmpl_ver=1.13.0 export crtm_ver=2.4.0.1 -export wgrib2_ver=2.0.8 -export grib_util_ver=1.3.0 +export wgrib2_ver=3.6.0 +export grib_util_ver=1.4.0 export prod_util_ver=2.1.1 -export py_netcdf4_ver=1.5.8 +export py_netcdf4_ver=1.7.1.post2 export py_pyyaml_ver=6.0 -export py_jinja2_ver=3.1.2 -export py_pandas_ver=1.5.3 +export py_jinja2_ver=3.1.4 +export py_pandas_ver=2.2.3 export py_python_dateutil_ver=2.8.2 export py_f90nml_ver=1.4.3 export py_numpy_ver=1.23.4 export met_ver=9.1.3 export metplus_ver=3.1.1 -export py_xarray_ver=2023.7.0 +export py_xarray_ver=2024.7.0 export obsproc_run_ver=1.2.0 export prepobs_run_ver=1.1.0 From cbc380be537861310c9bac88276a69a28b80b21b Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 24 Jul 2025 23:20:42 +0000 Subject: [PATCH 074/134] update to ss-1.9.1 --- dev/container/com.sh | 2 +- dev/container/gen-run-cases.sh | 16 +++++++++------- dev/container/shell-in-container.sh | 15 +++++++-------- dev/container/utils/ush.python | 15 ++++++++------- modulefiles/module_gwsetup.container.lua | 14 +++++--------- 5 files changed, 30 insertions(+), 32 deletions(-) diff --git a/dev/container/com.sh b/dev/container/com.sh index 0c1df3263b8..dc409c66666 100755 --- a/dev/container/com.sh +++ b/dev/container/com.sh @@ -12,7 +12,7 @@ set -x gwhome=/contrib/Wei.Huang/src/global-workflow-cloud -img=/contrib/Wei.Huang/src/gw-container-spack-stack-1.6.0/ubuntu22.04-intel-ufs-env-v1.6.0.img +img=/contrib/Wei.Huang/container/ubuntu22.04-intel-ufs-env-v1.6.0.img cmd=${gwhome}/sorc/com.sh singularity exec -B /contrib -B /lustre ${img} ${cmd} diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index de81c482c9f..d3431cde305 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -7,18 +7,20 @@ source "${HOMEgfs}/ush/detect_machine.sh" run_with_container=YES -#casetype=pr -#yamllist="C48_ATM" + casetype=pr + yamllist="C48_ATM" #yamllist="C48_S2SW" #yamllist="C48_S2SWA_gefs" #yamllist="C96mx100_S2S" -casetype=hires -yamllist="C768_S2SW" +#casetype=hires +#yamllist="C768_S2SW" HOMEDIR=${HOMEgfs} +#img=ubuntu22.04-intel-ufs-env-v1.6.0.img +img=ubuntu22.04-intel-ufs-env-v1.9.1.img if [[ ${MACHINE_ID} = ursa* ]] ; then - container=/scratch4/NAGAPE/epic/${USER}/demo/ubuntu22.04-intel-ufs-env-v1.6.0.img + container=/scratch4/NAGAPE/epic/${USER}/demo/${img} rundir=/scratch3/NAGAPE/epic/${USER}/run bindings="-B /scratch3 -B /scratch4" HPC_ACCOUNT=epic @@ -26,7 +28,7 @@ if [[ ${MACHINE_ID} = ursa* ]] ; then module load rocoto/1.3.7 rocotocmd=`which rocotorun` elif [[ ${MACHINE_ID} = gaea* ]] ; then - container=/gpfs/f6/scratch/${USER}/container/ubuntu22.04-intel-ufs-env-v1.6.0.img + container=/gpfs/f6/scratch/${USER}/container/${img} rundir=/gpfs/f6/scratch/${USER}/run bindings="-B /gpfs/f6/scratch -B /ncrc/home1/${USER}" HPC_ACCOUNT=bil-fire8 @@ -34,7 +36,7 @@ elif [[ ${MACHINE_ID} = gaea* ]] ; then rocotocmd=/autofs/ncrc-svm1_home2/Christopher.W.Harrop/rocoto-1.3.7/bin/rocotorun elif [[ ${MACHINE_ID} = noaacloud* ]] ; then TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR - container=/contrib/${USER}/src/gw-container-spack-stack-1.6.0/ubuntu22.04-intel-ufs-env-v1.6.0.img + container=/contrib/${USER}/container/${img} rundir=/lustre/${USER}/run bindings="--env \"I_MPI_FABRICS=ofi:shm,I_MPI_DEBUG=6\" -B /apps/slurm/default/lib/libpmi2.so -B /contrib -B /lustre -B /bucket" HPC_ACCOUNT=${USER} diff --git a/dev/container/shell-in-container.sh b/dev/container/shell-in-container.sh index 359fd62ded1..56cbe89faf8 100755 --- a/dev/container/shell-in-container.sh +++ b/dev/container/shell-in-container.sh @@ -4,20 +4,19 @@ set -x HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" source "${HOMEgfs}/ush/detect_machine.sh" -sif=ubuntu22.04-intel-ufs-env-v1.6.0.img +#sif=ubuntu22.04-intel-ufs-env-v1.6.0.img +sif=ubuntu22.04-intel-ufs-env-v1.9.1.img if [[ ${MACHINE_ID} = ursa* ]] ; then img=/scratch4/NAGAPE/epic/${USER}/demo/${sif} - bindings="-e -B /scratch3 -B /scratch4" + bindings="-B /scratch3 -B /scratch4" elif [[ ${MACHINE_ID} = gaea* ]] ; then img=/gpfs/f6/scratch/${USER}/container/${sif} - bindings="-e -B /gpfs/f6/scratch -B /ncrc/home1/${USER}" + bindings="-B /gpfs/f6/scratch -B /ncrc/home1/${USER}" elif [[ ${MACHINE_ID} = noaacloud* ]] ; then - img=/contrib/${USER}/src/gw-container-spack-stack-1.6.0/${sif} - bindings="-e -B /contrib -B /lustre -B /bucket" + img=/contrib/${USER}/container/${sif} + bindings="-B /contrib -B /lustre -B /bucket" fi -cd ${HOMEDIR}/dev/workflow - -singularity shell ${bindings} ${img} +singularity shell -e ${bindings} ${img} diff --git a/dev/container/utils/ush.python b/dev/container/utils/ush.python index 463fbd7cdbc..4657c432ebc 100644 --- a/dev/container/utils/ush.python +++ b/dev/container/utils/ush.python @@ -7,14 +7,14 @@ module load module_gwsetup.container module list -#module load python/3.10.13 +module load python/3.11.7 module load py-f90nml/1.4.3 -module load py-netcdf4/1.5.8 +module load py-netcdf4/1.7.1.post2 module load py-pyyaml/6.0 -module load py-jinja2/3.0.3 -module load py-pandas/1.5.3 -module load py-numpy/1.22.3 -module load py-xarray/2023.7.0 +module load py-jinja2/3.1.4 +module load py-pandas/2.2.3 +module load py-numpy/1.26.3 +module load py-xarray/2024.7.0 module load py-python-dateutil/2.8.2 module list @@ -24,5 +24,6 @@ export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}HOMEgfs/ush:${wxflowPATH}" arg="$@" -/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/python-3.10.13-h3oyipv/bin/python $arg +#/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/python-3.10.13-h3oyipv/bin/python $arg +/opt/spack-stack/spack-stack-1.9.1/envs/unified-env/install/oneapi/2024.2.0/python-3.11.7-t6qidqx/bin/python $arg diff --git a/modulefiles/module_gwsetup.container.lua b/modulefiles/module_gwsetup.container.lua index bb4882cfcb1..cec427f000d 100644 --- a/modulefiles/module_gwsetup.container.lua +++ b/modulefiles/module_gwsetup.container.lua @@ -2,17 +2,13 @@ help([[ Load environment to run GFS workflow setup scripts in container ]]) ---load(pathJoin("rocoto")) +prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.9.1/envs/unified-env/install/modulefiles/Core") -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") +stack_oneapi_ver=os.getenv("stack_oneapi_ver") or "2024.2.0" +stack_impi_ver=os.getenv("stack_impi_ver") or "2021.13" -local stack_intel_ver=os.getenv("stack_intel_ver") or "2021.10.0" -local stack_mpi_ver=os.getenv("stack_mpi_ver") or "2021.9.0" - -load("gnu") -load(pathJoin("stack-intel", stack_intel_ver)) -load(pathJoin("stack-intel-oneapi-mpi", stack_mpi_ver)) -unload("gnu") +load(pathJoin("stack-oneapi", stack_oneapi_ver)) +load(pathJoin("stack-intel-oneapi-mpi", stack_impi_ver)) load("py-jinja2") load("py-pyyaml") From c0dd64a9a9299a45b874351265e190d3734eb723 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 25 Jul 2025 00:14:22 +0000 Subject: [PATCH 075/134] re-test --- dev/container/gen-run-cases.sh | 8 +++++--- dev/container/utils/link_model.sh | 10 +++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index f980e81257c..62161364dd4 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -9,8 +9,8 @@ run_with_container=YES casetype=pr #yamllist="C48_ATM" -#yamllist="C48_S2SW" -yamllist="C48_S2SWA_gefs" + yamllist="C48_S2SW" +#yamllist="C48_S2SWA_gefs" #yamllist="C96mx100_S2S" #casetype=hires @@ -36,7 +36,8 @@ elif [[ ${MACHINE_ID} = noaacloud* ]] ; then TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR container=/contrib/${USER}/src/gw-container-spack-stack-1.6.0/ubuntu22.04-intel-ufs-env-v1.6.0.img rundir=/lustre/${USER}/run - bindings="--env \"I_MPI_FABRICS=ofi:shm,I_MPI_DEBUG=6\" -B /apps/slurm/default/lib/libpmi2.so -B /contrib -B /lustre -B /bucket" + bindings="--env \"I_MPI_FABRICS=shm:ofi,I_MPI_DEBUG=6\" -B /apps/slurm/default/lib/libpmi2.so -B /contrib -B /lustre -B /bucket" + #bindings="-B /apps/slurm/default/lib/libpmi2.so -B /contrib -B /lustre -B /bucket" HPC_ACCOUNT=${USER} module load rocoto/1.3.7 @@ -63,6 +64,7 @@ if [[ "${run_with_container}" == "YES" ]]; then -v -R ${HOMEDIR}/dev/container/utils/create-container-links.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" + ${HOMEDIR}/dev/container/utils/create-atmos-products.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" else TOPICDIR=${TOPICDIR} \ RUNTESTS=${rundir} \ diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 1a2f49d784c..4a431c1f02b 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -74,10 +74,10 @@ cat > $link_model_script << EOF_LINK #Need these lines on AWS to run more than one node. export I_MPI_DEBUG=10 -#export I_MPI_FABRICS=shm:ofi -#export I_MPI_OFI_PROVIDER=tcp -#export FI_PROVIDER=tcp -#export FI_TCP_IFACE=eth0 + export I_MPI_FABRICS=shm:ofi + export I_MPI_OFI_PROVIDER=tcp + export FI_PROVIDER=tcp + export FI_TCP_IFACE=eth0 #For GaeaC6 #export SINGULARITY_ENABLE_OVERLAY=try @@ -88,7 +88,7 @@ cat > $link_model_script << EOF_LINK export LD_LIBRARY_PATH=$(dirname ${container}) arg="\$@" - /usr/bin/singularity exec \\ + singularity exec \\ ${bindings} \\ ${container} \\ ${run_model_script} \$arg From a2203692817460189c166c75e8aedf9f252477dd Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 28 Jul 2025 14:09:21 +0000 Subject: [PATCH 076/134] update wgrib2 to 3.6.0 and make shell script executable --- dev/container/gen-run-cases.sh | 4 +++- dev/container/utils/create-atmos-products.sh | 2 ++ dev/container/utils/link_model.sh | 2 +- dev/container/utils/ush.wgrib2 | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index d3431cde305..996c0cd8cec 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -37,7 +37,8 @@ elif [[ ${MACHINE_ID} = gaea* ]] ; then elif [[ ${MACHINE_ID} = noaacloud* ]] ; then TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR container=/contrib/${USER}/container/${img} - rundir=/lustre/${USER}/run + rundir=/lustre/${USER}/ss191/run + STMP=/lustre/${USER}/ss191/stmp bindings="--env \"I_MPI_FABRICS=ofi:shm,I_MPI_DEBUG=6\" -B /apps/slurm/default/lib/libpmi2.so -B /contrib -B /lustre -B /bucket" HPC_ACCOUNT=${USER} @@ -64,6 +65,7 @@ if [[ "${run_with_container}" == "YES" ]]; then -r ${rocotocmd} \ -v -R + ${HOMEDIR}/dev/container/utils/create-atmos-products.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" ${HOMEDIR}/dev/container/utils/create-container-links.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" else TOPICDIR=${TOPICDIR} \ diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh index 63f0fcb5973..03a0a2bfff0 100755 --- a/dev/container/utils/create-atmos-products.sh +++ b/dev/container/utils/create-atmos-products.sh @@ -41,9 +41,11 @@ sed -e "s?HOMEgfs?${HOMEgfs}?g" \ -e "s?SIF?${container}?g" \ -e "s?BINDINGS?${bindings}?g" \ ${HOMEgfs}/dev/container/atmos_products/exec.exglobal_atmos_products.sh > ${HOMEgfs}/exec/exglobal_atmos_products.sh + chmod +x ${HOMEgfs}/exec/exglobal_atmos_products.sh for fl in exglobal_atmos_products.sh interp_atmos_master.sh interp_atmos_sflux.sh run_mpmd.sh do cp ${HOMEgfs}/dev/container/atmos_products/${fl} ${HOMEgfs}/ush/container/. + chmod +x ${HOMEgfs}/ush/container/${fl} done diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 8cd89ec21b6..a7e9775d959 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -88,7 +88,7 @@ cat > $link_model_script << EOF_LINK export LD_LIBRARY_PATH=$(dirname ${container}) arg="\$@" - /apps/bin/apptainer exec \\ + singularity exec \\ ${bindings} \\ ${container} \\ ${run_model_script} \$arg diff --git a/dev/container/utils/ush.wgrib2 b/dev/container/utils/ush.wgrib2 index 572dc2d229a..567d7074556 100644 --- a/dev/container/utils/ush.wgrib2 +++ b/dev/container/utils/ush.wgrib2 @@ -5,7 +5,7 @@ module purge module use HOMEgfs/modulefiles module load module_gwsetup.container -module load wgrib2/2.0.8 +module load wgrib2/3.6.0 arg=$@ From 0a6dba34e057d5069daf7a8dd54ed232b11b5c66 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 4 Aug 2025 16:33:59 +0000 Subject: [PATCH 077/134] update atmos_produts --- .../atmos_products/exglobal_atmos_products.sh | 8 ++++---- dev/container/utils/create-atmos-products.sh | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dev/container/atmos_products/exglobal_atmos_products.sh b/dev/container/atmos_products/exglobal_atmos_products.sh index b9caebfb498..78f15fdd67d 100755 --- a/dev/container/atmos_products/exglobal_atmos_products.sh +++ b/dev/container/atmos_products/exglobal_atmos_products.sh @@ -3,8 +3,8 @@ source /usr/lmod/lmod/init/bash module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles module load gfsutils_container.intel -module load wgrib2 -module load prod_util +module load wgrib2/2.0.8 +module load prod_util/2.1.1 module list export I_MPI_TMPDIR=/tmp @@ -25,8 +25,8 @@ export UTILROOT=${prod_util_ROOT} export WGRIB2=wgrib2 # Scripts used -INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/interp_atmos_master.sh"} -INTERP_ATMOS_SFLUXSH=${INTERP_ATMOS_SFLUXSH:-"${USHgfs}/interp_atmos_sflux.sh"} +INTERP_ATMOS_MASTERSH=${USHgfs}/container/interp_atmos_master.sh +INTERP_ATMOS_SFLUXSH=${USHgfs}/container/interp_atmos_sflux.sh # Variables used in this job downset=${downset:-1} # No. of groups of pressure grib2 products to create diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh index 63f0fcb5973..6342c27f971 100755 --- a/dev/container/utils/create-atmos-products.sh +++ b/dev/container/utils/create-atmos-products.sh @@ -32,17 +32,17 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then exit -1 fi -echo "HOMEgfs: $HOMEgfs" -echo "container: $container" -echo "bindings: $bindings" -echo "Verbose: $verbose" +#echo "HOMEgfs: $HOMEgfs" +#echo "container: $container" +#echo "bindings: $bindings" +#echo "Verbose: $verbose" sed -e "s?HOMEgfs?${HOMEgfs}?g" \ -e "s?SIF?${container}?g" \ -e "s?BINDINGS?${bindings}?g" \ ${HOMEgfs}/dev/container/atmos_products/exec.exglobal_atmos_products.sh > ${HOMEgfs}/exec/exglobal_atmos_products.sh -for fl in exglobal_atmos_products.sh interp_atmos_master.sh interp_atmos_sflux.sh run_mpmd.sh +for fl in exglobal_atmos_products.sh interp_atmos_master.sh interp_atmos_sflux.sh run_mpmd.sh do cp ${HOMEgfs}/dev/container/atmos_products/${fl} ${HOMEgfs}/ush/container/. done From d197f95551c1d0c04d1f8935f70b0d407f6e45b5 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 20 Aug 2025 13:50:39 +0000 Subject: [PATCH 078/134] direct use interp_atmos_master[sflux] --- dev/container/atmos_products/exglobal_atmos_products.sh | 8 ++++---- dev/parm/config/gfs/config.resources.AWSPW | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dev/container/atmos_products/exglobal_atmos_products.sh b/dev/container/atmos_products/exglobal_atmos_products.sh index b9caebfb498..91bef9b850c 100755 --- a/dev/container/atmos_products/exglobal_atmos_products.sh +++ b/dev/container/atmos_products/exglobal_atmos_products.sh @@ -3,8 +3,8 @@ source /usr/lmod/lmod/init/bash module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles module load gfsutils_container.intel -module load wgrib2 -module load prod_util +module load wgrib2/3.6.0 +module load prod_util/2.1.1 module list export I_MPI_TMPDIR=/tmp @@ -25,8 +25,8 @@ export UTILROOT=${prod_util_ROOT} export WGRIB2=wgrib2 # Scripts used -INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/interp_atmos_master.sh"} -INTERP_ATMOS_SFLUXSH=${INTERP_ATMOS_SFLUXSH:-"${USHgfs}/interp_atmos_sflux.sh"} +INTERP_ATMOS_MASTERSH=${USHgfs}/container/interp_atmos_master.sh +INTERP_ATMOS_SFLUXSH=${USHgfs}/container/interp_atmos_sflux.sh # Variables used in this job downset=${downset:-1} # No. of groups of pressure grib2 products to create diff --git a/dev/parm/config/gfs/config.resources.AWSPW b/dev/parm/config/gfs/config.resources.AWSPW index bc20944f6f4..5fa90e8515a 100644 --- a/dev/parm/config/gfs/config.resources.AWSPW +++ b/dev/parm/config/gfs/config.resources.AWSPW @@ -30,6 +30,7 @@ case ${step} in "atmos_products") export PARTITION_BATCH="highmemory" + #export PARTITION_BATCH="process" unset PARTITION_SERVICE max_tasks_per_node=24 tasks_per_node=24 From d2f255f6ba4737f7e68bce2dd6e689429f3cccb3 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 20 Aug 2025 19:29:40 +0000 Subject: [PATCH 079/134] trying to make ss192 container working on AWS --- dev/container/com.sh | 10 ++- dev/container/gen-run-cases.sh | 2 +- dev/container/shell-in-container.sh | 2 +- .../{ => utils}/compile-gw-in-container.sh | 3 +- dev/container/utils/ush.python | 22 +++-- dev/ush/load_gw_run_modules.sh | 80 ++----------------- modulefiles/gw_setup.container.lua | 21 +++++ modulefiles/module_base.container.lua | 56 ------------- modulefiles/module_base.noaacloud.lua | 60 -------------- modulefiles/module_base.ursa.lua | 30 ------- modulefiles/module_gwci.ursa.lua | 12 --- modulefiles/module_gwsetup.container.lua | 17 ---- modulefiles/module_gwsetup.containerized.lua | 18 ----- modulefiles/module_gwsetup.ursa.lua | 13 --- modulefiles/module_run.ursa.lua | 19 ----- sorc/com.sh | 6 -- 16 files changed, 48 insertions(+), 323 deletions(-) rename dev/container/{ => utils}/compile-gw-in-container.sh (53%) create mode 100644 modulefiles/gw_setup.container.lua delete mode 100644 modulefiles/module_base.container.lua delete mode 100644 modulefiles/module_base.noaacloud.lua delete mode 100644 modulefiles/module_base.ursa.lua delete mode 100644 modulefiles/module_gwci.ursa.lua delete mode 100644 modulefiles/module_gwsetup.container.lua delete mode 100644 modulefiles/module_gwsetup.containerized.lua delete mode 100644 modulefiles/module_gwsetup.ursa.lua delete mode 100644 modulefiles/module_run.ursa.lua delete mode 100755 sorc/com.sh diff --git a/dev/container/com.sh b/dev/container/com.sh index dc409c66666..e1878a2d18a 100755 --- a/dev/container/com.sh +++ b/dev/container/com.sh @@ -11,9 +11,11 @@ set -x -gwhome=/contrib/Wei.Huang/src/global-workflow-cloud -img=/contrib/Wei.Huang/container/ubuntu22.04-intel-ufs-env-v1.6.0.img -cmd=${gwhome}/sorc/com.sh +gwhome=/contrib/Wei.Huang/container/global-workflow-cloud +img=/contrib/Wei.Huang/container/ubuntu22.04-intel-ufs-env-v1.9.2.img +cmd=${gwhome}/dev/container/utils/compile-gw-in-container.sh -singularity exec -B /contrib -B /lustre ${img} ${cmd} +gw_sorc_dir=${gwhome}/sorc + +singularity exec -B /contrib ${img} ${cmd} ${gw_sorc_dir} diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index 996c0cd8cec..1f9bc097fe4 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -18,7 +18,7 @@ run_with_container=YES HOMEDIR=${HOMEgfs} #img=ubuntu22.04-intel-ufs-env-v1.6.0.img -img=ubuntu22.04-intel-ufs-env-v1.9.1.img +img=ubuntu22.04-intel-ufs-env-v1.9.2.img if [[ ${MACHINE_ID} = ursa* ]] ; then container=/scratch4/NAGAPE/epic/${USER}/demo/${img} rundir=/scratch3/NAGAPE/epic/${USER}/run diff --git a/dev/container/shell-in-container.sh b/dev/container/shell-in-container.sh index 56cbe89faf8..8e7441d87c3 100755 --- a/dev/container/shell-in-container.sh +++ b/dev/container/shell-in-container.sh @@ -5,7 +5,7 @@ set -x HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" source "${HOMEgfs}/ush/detect_machine.sh" #sif=ubuntu22.04-intel-ufs-env-v1.6.0.img -sif=ubuntu22.04-intel-ufs-env-v1.9.1.img +sif=ubuntu22.04-intel-ufs-env-v1.9.2.img if [[ ${MACHINE_ID} = ursa* ]] ; then img=/scratch4/NAGAPE/epic/${USER}/demo/${sif} diff --git a/dev/container/compile-gw-in-container.sh b/dev/container/utils/compile-gw-in-container.sh similarity index 53% rename from dev/container/compile-gw-in-container.sh rename to dev/container/utils/compile-gw-in-container.sh index edf33a63797..4f8c56a48b9 100755 --- a/dev/container/compile-gw-in-container.sh +++ b/dev/container/utils/compile-gw-in-container.sh @@ -1,6 +1,7 @@ #!/bin/bash - cd /contrib/Wei.Huang/src/global-workflow-cloud/sorc + gw_sorc_dir=$1 + cd ${gw_sorc_dir} ./build_all.sh gfs sfs gefs ./link_workflow.sh diff --git a/dev/container/utils/ush.python b/dev/container/utils/ush.python index 4657c432ebc..500623ccd11 100644 --- a/dev/container/utils/ush.python +++ b/dev/container/utils/ush.python @@ -3,27 +3,23 @@ source /usr/lmod/lmod/init/bash module purge module use HOMEgfs/modulefiles -module load module_gwsetup.container +module load gw_setup.container module list -module load python/3.11.7 -module load py-f90nml/1.4.3 -module load py-netcdf4/1.7.1.post2 -module load py-pyyaml/6.0 -module load py-jinja2/3.1.4 -module load py-pandas/2.2.3 -module load py-numpy/1.26.3 -module load py-xarray/2024.7.0 -module load py-python-dateutil/2.8.2 +module load python +module load py-netcdf4 +module load py-xarray +module load py-f90nml +module load py-numpy +# module load py-python-dateutil/2.8.2 module list -wxflowPATH="HOMEgfs/ush/python" +wxflowPATH="HOMEgfs/ush/python:HOMEgfs/sorc/wxflow/src" export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}HOMEgfs/ush:${wxflowPATH}" arg="$@" -#/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/python-3.10.13-h3oyipv/bin/python $arg -/opt/spack-stack/spack-stack-1.9.1/envs/unified-env/install/oneapi/2024.2.0/python-3.11.7-t6qidqx/bin/python $arg +python $arg diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index 660463ed0c2..7a057b0392b 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -2,75 +2,20 @@ ############################################################### if [[ "$-" == *x* ]]; then - set_x=YES + set_x=YES else - set_x=NO + set_x=NO fi if [[ "${DEBUG_WORKFLOW:-NO}" == "NO" ]]; then - echo "Loading modules quietly..." - set +x + echo "Loading modules quietly..." + set +x fi # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) -# Test if HOMEgfs is defined. If not, then try to determine it with git rev-parse -_unset_homegfs="NO" -if [[ -z ${HOMEgfs+x} ]]; then - echo "INFO HOMEgfs is not defined. Attempting to find the global-workflow root directory" - # HOMEgfs will be removed from the environment at the end of this script - _unset_homegfs="YES" - - script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) - HOMEgfs=$(cd "${script_dir}" && git rev-parse --show-toplevel) - export HOMEgfs - err=$? - if [[ ${err} -ne 0 ]]; then - is_git_dir=$( cd -- "${script_dir}" &> /dev/null && git rev-parse --is-inside-work-tree) - git_stat=$? - if [[ ${git_stat} -ne 0 || ${is_git_dir} != "true" ]]; then - echo "FATAL ERROR unable to determine the root because it is not a git repository." - else - echo "FATAL ERROR unable to determine the root because git rev-parse --show-toplevel failed for an unknown reason" - fi - echo " Unable to load modules. Exiting" - exit 1 - fi -fi - -# Find module command and purge: -source "${HOMEgfs}/ush/detect_machine.sh" -source "${HOMEgfs}/ush/module-setup.sh" - -# Source versions file for runtime -if [[ -f "${HOMEgfs}/versions/run.ver" ]]; then - source "${HOMEgfs}/versions/run.ver" -else - echo "FATAL ERROR ${HOMEgfs}/versions/run.ver does not exist!" - echo "HINT: Run link_workflow.sh first." - exit 1 -fi - -# Load our modules: -module use "${HOMEgfs}/modulefiles" - -case "${MACHINE_ID}" in -"wcoss2" | "ursa" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud") - target_module="gw_run.${MACHINE_ID}" - module load "${target_module}" - export err=$? - if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: Failed to load ${target_module}" - exit 1 - fi - ;; -*) - echo "WARNING: UNKNOWN PLATFORM" - ;; -esac - -module list +source "${HOMEgfs}/ush/preamble.sh" # If this function exists in the environment, run it; else set -x if it was set on entering this script ftype=$(type -t set_trace || echo "") @@ -80,20 +25,11 @@ elif [[ "${set_x}" == "YES" ]]; then set -x fi -# Set up the PYTHONPATH to include wxflow from HOMEgfs -if [[ -d "${HOMEgfs}/sorc/wxflow/src" ]]; then - PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/sorc/wxflow/src" -fi - -# Add HOMEgfs/ush/python to PYTHONPATH -PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush/python" +# Add wxflow to PYTHONPATH +wxflowPATH="${HOMEgfs}/ush/python" +PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" export PYTHONPATH # Restore stack soft limit: ulimit -S -s "${ulimit_s}" unset ulimit_s - -# Unset HOMEgfs if it was not set at the beginning of this script -if [[ ${_unset_homegfs} == "YES" ]]; then - unset HOMEgfs -fi diff --git a/modulefiles/gw_setup.container.lua b/modulefiles/gw_setup.container.lua new file mode 100644 index 00000000000..2af6e33ac67 --- /dev/null +++ b/modulefiles/gw_setup.container.lua @@ -0,0 +1,21 @@ +help([[ +Load environment to run GFS workflow setup scripts on container +]]) + +-- load("rocoto") + +prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.9.2/envs/unified-env/install/modulefiles/Core") + +local stack_oneapi_ver=os.getenv("stack_oneapi_ver") or "2024.2.0" +local stack_mpi_ver=os.getenv("stack_mpi_ver") or "2021.13" +local cmake_ver=os.getenv("cmake_ver") or "3.27.9" + +load(pathJoin("stack-oneapi", stack_oneapi_ver)) +load(pathJoin("stack-intel-oneapi-mpi", stack_mpi_ver)) +load(pathJoin("cmake", cmake_ver)) + +load("py-jinja2") +load("py-pyyaml") +load("py-numpy") + +whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_base.container.lua b/modulefiles/module_base.container.lua deleted file mode 100644 index 2b2d2aca49e..00000000000 --- a/modulefiles/module_base.container.lua +++ /dev/null @@ -1,56 +0,0 @@ -help([[ -Load environment to run GFS in container -]]) - -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/intel-oneapi-mpi/2021.9.0/intel/2021.10.0") -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") --- prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core") - -setenv("stack_intel_ver", "2021.10.0") -setenv("stack_impi_ver", "2021.9.0") - --- load("gnu") -load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) -load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "None"))) --- unload("gnu") - --- load(pathJoin("python", (os.getenv("python_ver") or "None"))) - -load(pathJoin("jasper", (os.getenv("jasper_ver") or "None"))) -load(pathJoin("libpng", (os.getenv("libpng_ver") or "None"))) --- load(pathJoin("cdo", (os.getenv("cdo_ver") or "None"))) --- load(pathJoin("R", (os.getenv("R_ver") or "None"))) - -load(pathJoin("hdf5", (os.getenv("hdf5_ver") or "None"))) -load(pathJoin("netcdf-c", (os.getenv("netcdf_c_ver") or "None"))) -load(pathJoin("netcdf-fortran", (os.getenv("netcdf_fortran_ver") or "None"))) - -load(pathJoin("nco", (os.getenv("nco_ver") or "None"))) -load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "None"))) -load(pathJoin("grib-util", (os.getenv("grib_util_ver") or "None"))) -load(pathJoin("g2tmpl", (os.getenv("g2tmpl_ver") or "None"))) --- load(pathJoin("gsi-ncdiag", (os.getenv("gsi_ncdiag_ver") or "None"))) -load(pathJoin("crtm", (os.getenv("crtm_ver") or "None"))) -load(pathJoin("bufr", (os.getenv("bufr_ver") or "None"))) -load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) -load(pathJoin("py-f90nml", (os.getenv("py_f90nml_ver") or "None"))) -load(pathJoin("py-netcdf4", (os.getenv("py_netcdf4_ver") or "None"))) -load(pathJoin("py-pyyaml", (os.getenv("py_pyyaml_ver") or "None"))) -load(pathJoin("py-jinja2", (os.getenv("py_jinja2_ver") or "None"))) -load(pathJoin("py-pandas", (os.getenv("py_pandas_ver") or "None"))) -load(pathJoin("py-python-dateutil", (os.getenv("py_python_dateutil_ver") or "None"))) ---load(pathJoin("met", (os.getenv("met_ver") or "None"))) ---load(pathJoin("metplus", (os.getenv("metplus_ver") or "None"))) -load(pathJoin("py-xarray", (os.getenv("py_xarray_ver") or "None"))) - -setenv("WGRIB2","wgrib2") -setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) - ---prepend_path("MODULEPATH", pathJoin("/scratch1/NCEPDEV/global/glopara/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) ---prepend_path("MODULEPATH", pathJoin("/scratch1/NCEPDEV/global/glopara/git/prepobs/feature-GFSv17_com_reorg_log_update/modulefiles")) ---load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) - ---prepend_path("MODULEPATH", pathJoin("/scratch1/NCEPDEV/global/glopara/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) ---load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) - -whatis("Description: GFS run environment") diff --git a/modulefiles/module_base.noaacloud.lua b/modulefiles/module_base.noaacloud.lua deleted file mode 100644 index 7ff0ad9f962..00000000000 --- a/modulefiles/module_base.noaacloud.lua +++ /dev/null @@ -1,60 +0,0 @@ -help([[ -Load environment to run GFS on noaacloud -]]) - -local spack_mod_path=(os.getenv("spack_mod_path") or "None") -prepend_path("MODULEPATH", spack_mod_path) - -prepend_path("MODULEPATH", "/contrib/spack-stack-rocky8/spack-stack-1.9.2/envs/ue-oneapi-2024.2.1/install/modulefiles/intel-oneapi-mpi/2021.13-mg3hegm/gcc/13.2.0") -prepend_path("MODULEPATH", "/apps/modules/modulefiles") - -gnu_ver=os.getenv("gnu_ver") or "13.2.0" -stack_oneapi_ver=os.getenv("stack_oneapi_ver") or "2024.2.1" -stack_impi_ver=os.getenv("stack_impi_ver") or "2021.13" -cmake_ver=os.getenv("cmake_ver") or "3.27.9" - -load(pathJoin("gnu", gnu_ver)) -load(pathJoin("stack-oneapi", stack_oneapi_ver)) -load(pathJoin("stack-intel-oneapi-mpi", stack_impi_ver)) -load(pathJoin("cmake", cmake_ver)) - -load(pathJoin("python", (os.getenv("python_ver") or "None"))) - -load(pathJoin("jasper", (os.getenv("jasper_ver") or "None"))) -load(pathJoin("libpng", (os.getenv("libpng_ver") or "None"))) -load(pathJoin("cdo", (os.getenv("cdo_ver") or "None"))) ---load(pathJoin("R", (os.getenv("R_ver") or "None"))) -load(pathJoin("perl", (os.getenv("perl_ver") or "None"))) - -load(pathJoin("hdf5", (os.getenv("hdf5_ver") or "None"))) -load(pathJoin("netcdf-c", (os.getenv("netcdf_c_ver") or "None"))) -load(pathJoin("netcdf-fortran", (os.getenv("netcdf_fortran_ver") or "None"))) - -load(pathJoin("nco", (os.getenv("nco_ver") or "None"))) -load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "None"))) -load(pathJoin("grib-util", (os.getenv("grib_util_ver") or "None"))) -load(pathJoin("g2tmpl", (os.getenv("g2tmpl_ver") or "None"))) -load(pathJoin("gsi-ncdiag", (os.getenv("gsi_ncdiag_ver") or "None"))) -load(pathJoin("crtm", (os.getenv("crtm_ver") or "None"))) -load(pathJoin("bufr", (os.getenv("bufr_ver") or "None"))) -load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) -load(pathJoin("py-f90nml", (os.getenv("py_f90nml_ver") or "None"))) -load(pathJoin("py-netcdf4", (os.getenv("py_netcdf4_ver") or "None"))) -load(pathJoin("py-pyyaml", (os.getenv("py_pyyaml_ver") or "None"))) -load(pathJoin("py-jinja2", (os.getenv("py_jinja2_ver") or "None"))) -load(pathJoin("py-pandas", (os.getenv("py_pandas_ver") or "None"))) -load(pathJoin("py-python-dateutil", (os.getenv("py_python_dateutil_ver") or "None"))) ---load(pathJoin("met", (os.getenv("met_ver") or "None"))) ---load(pathJoin("metplus", (os.getenv("metplus_ver") or "None"))) -load(pathJoin("py-xarray", (os.getenv("py_xarray_ver") or "None"))) - -setenv("WGRIB2","wgrib2") -setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) - -prepend_path("MODULEPATH", "/contrib/git/prepobs/modulefiles") -load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) - -prepend_path("MODULEPATH", "/contrib/git/Fit2Obs/modulefiles") -load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) - -whatis("Description: GFS run environment") diff --git a/modulefiles/module_base.ursa.lua b/modulefiles/module_base.ursa.lua deleted file mode 100644 index 655c0725219..00000000000 --- a/modulefiles/module_base.ursa.lua +++ /dev/null @@ -1,30 +0,0 @@ -help([[ -Load environment to run GFS on Ursa -]]) - -local spack_mod_path=(os.getenv("spack_mod_path") or "None") -prepend_path("MODULEPATH", spack_mod_path) - --- load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) --- load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "None"))) --- load(pathJoin("python", (os.getenv("python_ver") or "None"))) - -load(pathJoin("hpss", (os.getenv("hpss_ver") or "None"))) -load(pathJoin("gempak", (os.getenv("gempak_ver") or "None"))) -load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "None"))) - -setenv("WGRIB2","wgrib2") - --- Stop gap fix for wgrib with spack-stack 1.6.0 --- TODO Remove this when spack-stack issue #1097 is resolved --- setenv("WGRIB","wgrib") --- setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) - --- prepend_path("MODULEPATH", pathJoin("/scratch1/NCEPDEV/global/glopara/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) --- load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) - --- prepend_path("MODULEPATH", pathJoin("/scratch1/NCEPDEV/global/glopara/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) --- load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) - -whatis("Description: GFS run environment") - diff --git a/modulefiles/module_gwci.ursa.lua b/modulefiles/module_gwci.ursa.lua deleted file mode 100644 index 4cd5a4d3c7a..00000000000 --- a/modulefiles/module_gwci.ursa.lua +++ /dev/null @@ -1,12 +0,0 @@ -help([[ -Load environment to run GFS workflow setup scripts on Ursa -]]) - -prepend_path("MODULEPATH", "/scratch3/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/gsi-addon-dev-rocky8/install/modulefiles/Core") - -load(pathJoin("stack-intel", os.getenv("2021.5.0"))) -load(pathJoin("stack-intel-oneapi-mpi", os.getenv("2021.5.1"))) - --- load(pathJoin("wgrib2", "2.0.8")) - -whatis("Description: GFS run setup CI environment") diff --git a/modulefiles/module_gwsetup.container.lua b/modulefiles/module_gwsetup.container.lua deleted file mode 100644 index cec427f000d..00000000000 --- a/modulefiles/module_gwsetup.container.lua +++ /dev/null @@ -1,17 +0,0 @@ -help([[ -Load environment to run GFS workflow setup scripts in container -]]) - -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.9.1/envs/unified-env/install/modulefiles/Core") - -stack_oneapi_ver=os.getenv("stack_oneapi_ver") or "2024.2.0" -stack_impi_ver=os.getenv("stack_impi_ver") or "2021.13" - -load(pathJoin("stack-oneapi", stack_oneapi_ver)) -load(pathJoin("stack-intel-oneapi-mpi", stack_impi_ver)) - -load("py-jinja2") -load("py-pyyaml") -load("py-numpy") - -whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_gwsetup.containerized.lua b/modulefiles/module_gwsetup.containerized.lua deleted file mode 100644 index 06cf5668502..00000000000 --- a/modulefiles/module_gwsetup.containerized.lua +++ /dev/null @@ -1,18 +0,0 @@ -help([[ -Load environment to run GFS workflow setup scripts on Hera -]]) - -load(pathJoin("rocoto")) - -prepend_path("MODULEPATH", "/scratch1/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/gsi-addon-dev-rocky8/install/modulefiles/Core") - -local stack_intel_ver=os.getenv("stack_intel_ver") or "2021.5.0" -local python_ver=os.getenv("python_ver") or "3.11.6" - -load(pathJoin("stack-intel", stack_intel_ver)) -load(pathJoin("python", python_ver)) -load("py-jinja2") -load("py-pyyaml") -load("py-numpy") - -whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_gwsetup.ursa.lua b/modulefiles/module_gwsetup.ursa.lua deleted file mode 100644 index f22b6dd94c8..00000000000 --- a/modulefiles/module_gwsetup.ursa.lua +++ /dev/null @@ -1,13 +0,0 @@ -help([[ -Load environment to run GFS workflow setup scripts on Ursa -]]) - -load(pathJoin("rocoto")) - --- prepend_path("MODULEPATH", "/scratch1/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/gsi-addon-dev-rocky8/install/modulefiles/Core") - --- local stack_intel_ver=os.getenv("stack_intel_ver") or "2021.5.0" - --- load(pathJoin("stack-intel", stack_intel_ver)) - -whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_run.ursa.lua b/modulefiles/module_run.ursa.lua deleted file mode 100644 index 3cc54a308d6..00000000000 --- a/modulefiles/module_run.ursa.lua +++ /dev/null @@ -1,19 +0,0 @@ -help([[ -Load environment to run GFS on Ursa -]]) - -prepend_path("MODULEPATH", "/scratch3/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/gsi-addon-dev-fms-2024.01/install/modulefiles/Core") - --- load(pathJoin("hpss", (os.getenv("hpss_ver") or "None"))) - -load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "2021.5.0"))) -load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "2021.5.1"))) -load(pathJoin("python", (os.getenv("python_ver") or "3.11.6"))) -load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "2.1.1"))) - -setenv("WGRIB2","wgrib2") -setenv("WGRIB","wgrib") --- setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) - -whatis("Description: GFS run host environment") - diff --git a/sorc/com.sh b/sorc/com.sh deleted file mode 100755 index a6a7fecfe6a..00000000000 --- a/sorc/com.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -cd /contrib/Wei.Huang/src/global-workflow-cloud/sorc - -./build_all.sh gfs sfs gefs - From 5814ef9fc37cfb1a370a108cd80460be9ee5d909 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 21 Aug 2025 15:53:49 +0000 Subject: [PATCH 080/134] run check_land_input_orography.py using container python, if run with container --- ush/forecast_postdet.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ush/forecast_postdet.sh b/ush/forecast_postdet.sh index d918a15c858..0acdd913487 100755 --- a/ush/forecast_postdet.sh +++ b/ush/forecast_postdet.sh @@ -106,8 +106,14 @@ FV3_postdet() { # Check for consistency # TODO: the checker has a --fatal option, which is not used here. This needs to be decided how to handle. if [[ "${CHECK_LAND_RESTART_OROG:-NO}" == "YES" ]]; then - "${USHgfs}/check_land_input_orography.py" \ - --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" + if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then + ${USHgfs}/../exec/run_python.sh \ + "${USHgfs}/check_land_input_orography.py" \ + --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" + else + "${USHgfs}/check_land_input_orography.py" \ + --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" + fi err=$? if [[ ${err} -ne 0 ]]; then echo "FATAL ERROR: check_land_input_orography.py returned error code ${err}, ABORT!" From d79760798a2bd32e44e5e0142601133c331c620e Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 21 Aug 2025 19:51:16 +0000 Subject: [PATCH 081/134] detect container --- dev/container/shell-in-container.sh | 2 +- ush/detect_machine.sh | 6 ++++++ ush/module-setup.sh | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/dev/container/shell-in-container.sh b/dev/container/shell-in-container.sh index 8e7441d87c3..94ba9f65cd3 100755 --- a/dev/container/shell-in-container.sh +++ b/dev/container/shell-in-container.sh @@ -8,7 +8,7 @@ source "${HOMEgfs}/ush/detect_machine.sh" sif=ubuntu22.04-intel-ufs-env-v1.9.2.img if [[ ${MACHINE_ID} = ursa* ]] ; then - img=/scratch4/NAGAPE/epic/${USER}/demo/${sif} + img=/scratch4/NAGAPE/epic/${USER}/containers/${sif} bindings="-B /scratch3 -B /scratch4" elif [[ ${MACHINE_ID} = gaea* ]] ; then img=/gpfs/f6/scratch/${USER}/container/${sif} diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index 8a3ad5c08c8..e18aaca5acd 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -8,6 +8,12 @@ # # Thank you for your contribution +# overwrite MACHINE_ID if in container +if [[ -d /opt/spack-stack && -v SINGULARITY_CONTAINER ]]; then + # We are in a container + MACHINE_ID=container +fi + # If the MACHINE_ID variable is set, skip this script. if [[ -n "${MACHINE_ID:-}" ]]; then return diff --git a/ush/module-setup.sh b/ush/module-setup.sh index 4065c67e4cd..76df65c6e3c 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -102,6 +102,12 @@ elif [[ ${MACHINE_ID} = "noaacloud" ]]; then # We are on NOAA Cloud module purge +elif [[ ${MACHINE_ID} == container ]] ; then + # We are in a container + # Always source the lmod init script to override the system module paths and instead use the container modules + source /usr/lmod/lmod/init/bash + module purge + else echo WARNING: UNKNOWN PLATFORM 1>&2 fi From 7077d1ac8e7c5be5b105957ecda7f046cdb7d82b Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 22 Aug 2025 15:56:20 +0000 Subject: [PATCH 082/134] testing on ursa --- bin/run_python.sh | 11 ----- bin/run_wgrib2.sh | 9 ---- dev/container/atmos_products/run_mpmd.sh | 6 +-- dev/parm/config/gfs/config.resources.URSA | 58 ----------------------- dev/ush/load_gw_run_modules.sh | 8 ++-- dev/workflow/hosts.py | 12 ++--- env/URSA.env | 6 +++ scripts/exglobal_forecast.sh | 5 -- ush/run_gfs_model.sh | 24 ---------- ush/run_python.sh | 28 ----------- 10 files changed, 19 insertions(+), 148 deletions(-) delete mode 100755 bin/run_python.sh delete mode 100755 bin/run_wgrib2.sh delete mode 100755 ush/run_gfs_model.sh delete mode 100755 ush/run_python.sh diff --git a/bin/run_python.sh b/bin/run_python.sh deleted file mode 100755 index 0462c8e070c..00000000000 --- a/bin/run_python.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - containerdir=/gpfs/f6/scratch/Wei.Huang/container - img=${containerdir}/ubuntu22.04-intel-ufs-env-v1.6.0.img - - HOMEgfs=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud - cmd=${HOMEgfs}/ush/run_python.sh - arg="$@" - echo "running: singularity exec ${img} $cmd $arg" - singularity exec \ - -B /gpfs/f6/scratch \ - ${img} $cmd $arg diff --git a/bin/run_wgrib2.sh b/bin/run_wgrib2.sh deleted file mode 100755 index a80d80c31ae..00000000000 --- a/bin/run_wgrib2.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - containerdir=/gpfs/f6/scratch/Wei.Huang/container - img=${containerdir}/gw-container.sif - cmd=/opt/global-workflow-cloud/ush/run_wgrib2.sh - arg="$@" - echo "running: singularity exec ${img} $cmd $arg" - singularity exec \ - -B /scratch4 \ - ${img} $cmd $arg diff --git a/dev/container/atmos_products/run_mpmd.sh b/dev/container/atmos_products/run_mpmd.sh index 650d045eae6..5e6dcc634db 100755 --- a/dev/container/atmos_products/run_mpmd.sh +++ b/dev/container/atmos_products/run_mpmd.sh @@ -31,7 +31,7 @@ set -x source /usr/lmod/lmod/init/bash -module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles +module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel module load wgrib2 @@ -51,7 +51,7 @@ fi export OMP_NUM_THREADS=1 # Determine the number of MPMD processes from incoming ${cmdfile} -nprocs=$(wc -l < "${cmdfile}") +#nprocs=$(wc -l < "${cmdfile}") # Local MPMD file containing instructions to run in CFP mpmd_cmdfile="${DATA:-}/mpmd_cmdfile" @@ -71,7 +71,7 @@ echo "#!/bin/bash" >> "${mpmd_cmdfile}" while IFS= read -r line; do echo "${line} > mpmd.${nm}.out" >> "${mpmd_cmdfile}" echo "Line ${nm}: ${line}" - ${line} > mpmd.${nm}.out & + ${line} > "mpmd.${nm}.out" & ((nm=nm+1)) done < "${cmdfile}" chmod 755 "${mpmd_cmdfile}" diff --git a/dev/parm/config/gfs/config.resources.URSA b/dev/parm/config/gfs/config.resources.URSA index b961c50258f..a00edf5e892 100644 --- a/dev/parm/config/gfs/config.resources.URSA +++ b/dev/parm/config/gfs/config.resources.URSA @@ -1,43 +1,5 @@ #! /usr/bin/env bash -<<<<<<< HEAD -# Ursa-specific job resources - -case ${step} in - "prep") - # Run on 7 nodes for memory requirement - tasks_per_node=2 - ;; - - "stage_ic") - tasks_per_node=1 - export PARTITION_BATCH="u1-service" - ;; - - "anal") - if [[ "${CASE}" == "C384" ]]; then - export ntasks=270 - export threads_per_task=8 - export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) - fi - ;; - - "atmanlvar" | "atmensanlobs" | "atmensanlsol") - export tasks_per_node=12 - ;; - - "eupd") - case "${CASE}" in - "C1152" | "C768") - export ntasks=80 - export threads_per_task=20 - ;; - "C384") - export ntasks=80 - ;; - "C192" | "C96" | "C48") - export threads_per_task=4 -======= # Hera-specific job resources case ${step} in @@ -88,7 +50,6 @@ case ${step} in export ntasks=480 export threads_per_task=6 export tasks_per_node=8 ->>>>>>> 3e85a5aeaf2464f8640d9458bd419f736c3b0d65 ;; *) ;; @@ -96,24 +57,6 @@ case ${step} in export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) ;; -<<<<<<< HEAD - "ecen") - if [[ "${CASE}" == "C768" || "${CASE}" == "C1152" ]]; then export threads_per_task=6; fi - export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) - ;; - - "upp") - if (( "${CASE:1}" >= 768 )); then - # Run fewer tasks per node for memory - tasks_per_node=20 - fi - ;; - - "epos") - tasks_per_node=20 - ;; - -======= "aeroanlvar") export tasks_per_node=48 ;; @@ -138,7 +81,6 @@ case ${step} in ;; esac ;; ->>>>>>> 3e85a5aeaf2464f8640d9458bd419f736c3b0d65 *) ;; esac diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index 7a057b0392b..604338d1012 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -2,14 +2,14 @@ ############################################################### if [[ "$-" == *x* ]]; then - set_x=YES + set_x=YES else - set_x=NO + set_x=NO fi if [[ "${DEBUG_WORKFLOW:-NO}" == "NO" ]]; then - echo "Loading modules quietly..." - set +x + echo "Loading modules quietly..." + set +x fi # Setup runtime environment by loading modules diff --git a/dev/workflow/hosts.py b/dev/workflow/hosts.py index 80d159a6353..ec663cfed58 100644 --- a/dev/workflow/hosts.py +++ b/dev/workflow/hosts.py @@ -63,19 +63,19 @@ def detect(self) -> None: for line in f: fields = line.strip().split() mount_point = fields[4] - if mount_point == "/home": - mount_source = fields[9] - if "hera" in mount_source.lower(): - self.machine = "HERA" - elif "ursa" in mount_source.lower(): + if mount_point.find("/home") >= 0: + mount_source = fields[9].lower() + if mount_source.find("ursa") >= 0: self.machine = "URSA" + elif mount_source.find("hera") >= 0: + self.machine = "HERA" # TODO: When Hera is no longer used, remove this check and switch to Ursa. # Check if this is the GitHub runner if self.machine != 'HERA' and self.machine != 'URSA': machine = socket.gethostname().upper() print(f'Detected host {machine}; assuming this is a GitHub runner.') - self.machine = 'HERA' + self.machine = 'URSA' elif os.path.exists('/work/noaa'): # Orion or Hercules diff --git a/env/URSA.env b/env/URSA.env index e21e1dedeaa..6e0a3c31660 100755 --- a/env/URSA.env +++ b/env/URSA.env @@ -144,6 +144,7 @@ elif [[ "${step}" = "marineanlchkpt" ]]; then export APRUNCFP="${launcher} -n \$ncmd --multi-prog" export APRUN_MARINEANLCHKPT="${APRUN_default}" + export APRUN_MARINEANLOBSSTATS="${launcher} -n 1" elif [[ "${step}" = "ocnanalecen" ]]; then @@ -241,6 +242,11 @@ elif [[ "${step}" = "eupd" ]]; then export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then + #fast + #export launcher="srun -l --hint=nomultithread --distribution=block:block" + #slow3 + #export launcher="srun -l --export=ALL --hint=nomultithread" + export launcher="srun -l --hint=nomultithread" (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) diff --git a/scripts/exglobal_forecast.sh b/scripts/exglobal_forecast.sh index cbc6942bf44..776018019c3 100755 --- a/scripts/exglobal_forecast.sh +++ b/scripts/exglobal_forecast.sh @@ -175,11 +175,6 @@ else export OMP_NUM_THREADS=${UFS_THREADS:-1} fi -echo "EXECgfs: ${EXECgfs}" -echo "FCSTEXEC: ${FCSTEXEC}" -echo "DATA: ${DATA}" -echo "APRUN_UFS: ${APRUN_UFS}" - cpreq "${EXECgfs}/${FCSTEXEC}" "${DATA}/" ${APRUN_UFS} "${DATA}/${FCSTEXEC}" 1>&1 2>&2 && true export err=$? diff --git a/ush/run_gfs_model.sh b/ush/run_gfs_model.sh deleted file mode 100755 index 58553540c99..00000000000 --- a/ush/run_gfs_model.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD -export OMP_NUM_THREADS=1 -export FPATH=/usr/lmod/lmod/libexec -export HOMEgfs=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud -source /usr/lmod/lmod/init/bash -module purge -source ${HOMEgfs}/versions/run.container.ver -module use ${HOMEgfs}/modulefiles -module load module_base.container -export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2023.2.3/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.0/lib:$LD_LIBRARY_PATH -#export I_MPI_DEBUG=30 -#export I_MPI_FABRICS=shm:tcp -#export FI_PROVIDER=shm:tcp -export I_MPI_DEBUG=1 -export I_MPI_FABRICS=shm:ofi -export I_MPI_OFI_PROVIDER=tcp -export FI_PROVIDER=tcp -export FI_TCP_IFACE=eth0 -args=$@ -${HOMEgfs}/sorc/ufs_model.fd/tests/gfs_model.x $args - diff --git a/ush/run_python.sh b/ush/run_python.sh deleted file mode 100755 index 1903e172611..00000000000 --- a/ush/run_python.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -export FPATH=/usr/lmod/lmod/libexec - -export HOMEgfs=/gpfs/f6/scratch/Wei.Huang/src/global-workflow-cloud -source /usr/lmod/lmod/init/bash -module purge -source ${HOMEgfs}/versions/run.container.ver -module use ${HOMEgfs}/modulefiles -module load module_base.container - -#module load python/3.10.13 -#module load py-f90nml/1.4.3 -#module load py-netcdf4/1.5.8 -#module load py-pyyaml/6.0 -#module load py-jinja2/3.1.2 -#module load py-pandas/1.5.3 -#module load py-numpy/1.22.3 -#module load py-xarray/2023.7.0 -#module load py-python-dateutil/2.8.2 - -wxflowPATH="${HOMEgfs}/ush/python" -export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" - -arg="$@" - -/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/python-3.10.13-h3oyipv/bin/python $arg - From 7ced4476c4907aa22937ee69d897422c067fad1d Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 26 Aug 2025 16:20:17 +0000 Subject: [PATCH 083/134] sync with emc repo --- dev/container/utils/link_model.sh | 15 +++++++++++- dev/parm/config/sfs/config.resources | 1 - dev/ush/load_gw_run_modules.sh | 4 ++++ dev/ush/load_ufswm_modules.sh | 5 ++-- dev/workflow/generate_workflows.sh | 32 ++++++++----------------- env/URSA.env | 2 +- sorc/link_workflow.sh | 21 ++++------------- ush/load_fv3gfs_modules.sh | 35 ---------------------------- ush/preamble.sh | 6 ++--- 9 files changed, 38 insertions(+), 83 deletions(-) delete mode 100755 ush/load_fv3gfs_modules.sh diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index a7e9775d959..7fb5cd6c3e3 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -73,7 +73,7 @@ cat > $link_model_script << EOF_LINK #!/bin/bash #Need these lines on AWS to run more than one node. - export I_MPI_DEBUG=10 +#export I_MPI_DEBUG=10 #export I_MPI_FABRICS=shm:ofi #export I_MPI_OFI_PROVIDER=tcp #export FI_PROVIDER=tcp @@ -86,7 +86,20 @@ cat > $link_model_script << EOF_LINK #export SINGULARITY_DEBUG=0 #unset SINGULARITY_DEBUG +# --- MPI and Fabric Configuration --- +# 1. Force Intel MPI to use Slurm's PMI2 library for job startup +# for Ursa +export I_MPI_PMI_LIBRARY=/apps/slurm/default/lib/libpmi2.so + +# 2. Set the OFI provider to Mellanox InfiniBand +export FI_PROVIDER=mlx + +# 3. Disable problematic shared memory transports in UCX +export UCX_TLS=^sm,cma +# --- End of Configuration --- + export LD_LIBRARY_PATH=$(dirname ${container}) + set +x arg="\$@" singularity exec \\ ${bindings} \\ diff --git a/dev/parm/config/sfs/config.resources b/dev/parm/config/sfs/config.resources index d7d8f7cf6bd..803e6282711 100644 --- a/dev/parm/config/sfs/config.resources +++ b/dev/parm/config/sfs/config.resources @@ -23,7 +23,6 @@ case ${machine} in "ORION") max_tasks_per_node=40;; "HERCULES") max_tasks_per_node=80;; "GAEAC6") max_tasks_per_node=192;; - "URSA") max_tasks_per_node=192;; "AWSPW") export PARTITION_BATCH="compute" max_tasks_per_node=48 diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index 604338d1012..d12c793a320 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -28,6 +28,10 @@ fi # Add wxflow to PYTHONPATH wxflowPATH="${HOMEgfs}/ush/python" PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" +# Set up the PYTHONPATH to include wxflow from HOMEgfs +if [[ -d "${HOMEgfs}/sorc/wxflow/src" ]]; then + PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/sorc/wxflow/src" +fi export PYTHONPATH # Restore stack soft limit: diff --git a/dev/ush/load_ufswm_modules.sh b/dev/ush/load_ufswm_modules.sh index 9c478dd83d8..2d76f36b5a4 100755 --- a/dev/ush/load_ufswm_modules.sh +++ b/dev/ush/load_ufswm_modules.sh @@ -12,13 +12,12 @@ ulimit_s=$(ulimit -S -s) source "${HOMEgfs}/ush/preamble.sh" unset MACHINE_ID +# Add HOMEgfs/ush/python to PYTHONPATH +PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush/python" # Set up the PYTHONPATH to include wxflow from HOMEgfs if [[ -d "${HOMEgfs}/sorc/wxflow/src" ]]; then PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/sorc/wxflow/src" fi - -# Add HOMEgfs/ush/python to PYTHONPATH -PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush/python" export PYTHONPATH # Restore stack soft limit: diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index caf42e2b41c..11537bb4791 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -471,30 +471,18 @@ fi if [[ "${_verbose}" == true ]]; then printf "Linking the workflow\n\n" fi -if [[ "${_run_with_container}" == true ]]; then - if ! "${HOMEgfs}/sorc/link_workflow.sh" -r >& stdout; then - cat stdout - echo "link_workflow.sh failed!" - if [[ "${_set_email}" == true ]]; then - _stdout=$(cat stdout) - send_email "link_workflow.sh failed with the message"$'\n'"${_stdout}" - fi - rm -f stdout - exit 9 - fi -else - if ! "${HOMEgfs}/sorc/link_workflow.sh" >& stdout; then - cat stdout - echo "link_workflow.sh failed!" - if [[ "${_set_email}" == true ]]; then - _stdout=$(cat stdout) - send_email "link_workflow.sh failed with the message"$'\n'"${_stdout}" - fi - rm -f stdout - exit 9 - fi +if ! "${HOMEgfs}/sorc/link_workflow.sh" >& stdout; then + cat stdout + echo "link_workflow.sh failed!" + if [[ "${_set_email}" == true ]]; then + _stdout=$(cat stdout) + send_email "link_workflow.sh failed with the message"$'\n'"${_stdout}" + fi + rm -f stdout + exit 9 fi rm -f stdout +unset HOMEgfs # Configure the environment for running create_experiment.py if [[ "${_verbose}" == true ]]; then diff --git a/env/URSA.env b/env/URSA.env index 6e0a3c31660..73ffd2749b9 100755 --- a/env/URSA.env +++ b/env/URSA.env @@ -246,7 +246,7 @@ elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then #export launcher="srun -l --hint=nomultithread --distribution=block:block" #slow3 #export launcher="srun -l --export=ALL --hint=nomultithread" - export launcher="srun -l --hint=nomultithread" + export launcher="srun --mpi=pmi2 -l --hint=nomultithread" (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index b39c6bffc42..f9d23c7a012 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -4,20 +4,17 @@ HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." >/dev/null 2>&1 && pwd)" TRACE=NO source "${HOMEgfs}/ush/preamble.sh" -_run_with_container=false function usage() { cat < Date: Tue, 26 Aug 2025 22:28:13 +0000 Subject: [PATCH 084/134] remove duplicated scripts --- .../exec.exglobal_atmos_products.sh | 2 +- .../atmos_products/exglobal_atmos_products.sh | 314 ------------------ .../atmos_products/interp_atmos_master.sh | 89 ----- .../atmos_products/interp_atmos_sflux.sh | 64 ---- dev/container/atmos_products/run_mpmd.sh | 98 ------ dev/container/gen-run-cases.sh | 5 +- dev/ush/load_gw_run_modules.sh | 65 ++++ scripts/exglobal_atmos_products.sh | 2 + ush/interp_atmos_master.sh | 2 + ush/interp_atmos_sflux.sh | 2 + ush/preamble.sh | 54 +-- ush/run_mpmd.sh | 72 ++-- 12 files changed, 94 insertions(+), 675 deletions(-) delete mode 100755 dev/container/atmos_products/exglobal_atmos_products.sh delete mode 100755 dev/container/atmos_products/interp_atmos_master.sh delete mode 100755 dev/container/atmos_products/interp_atmos_sflux.sh delete mode 100755 dev/container/atmos_products/run_mpmd.sh diff --git a/dev/container/atmos_products/exec.exglobal_atmos_products.sh b/dev/container/atmos_products/exec.exglobal_atmos_products.sh index fa2bc370741..b658528b2b5 100755 --- a/dev/container/atmos_products/exec.exglobal_atmos_products.sh +++ b/dev/container/atmos_products/exec.exglobal_atmos_products.sh @@ -5,5 +5,5 @@ singularity exec \ BINDINGS \ SIF \ - HOMEgfs/ush/container/exglobal_atmos_products.sh $arg + HOMEgfs/scripts/exglobal_atmos_products.sh $arg diff --git a/dev/container/atmos_products/exglobal_atmos_products.sh b/dev/container/atmos_products/exglobal_atmos_products.sh deleted file mode 100755 index 91bef9b850c..00000000000 --- a/dev/container/atmos_products/exglobal_atmos_products.sh +++ /dev/null @@ -1,314 +0,0 @@ -#! /usr/bin/env bash - -source /usr/lmod/lmod/init/bash -module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles -module load gfsutils_container.intel -module load wgrib2/3.6.0 -module load prod_util/2.1.1 -module list - -export I_MPI_TMPDIR=/tmp -export PBS_ENVIRONMENT="" -export LSB_JOBID=0 -export I_MPI_HYDRA_RMK=slurm -export LOADL_HOSTFILE="" -export PE_HOSTFILE="" -export I_MPI_YARN=no -export NB_PARALLEL_JOB_HOSTS="" - -#source /opt/intel/oneapi/setvars.sh --force -#export OCL_ICD_FILENAMES="" - -# Programs used -export UTILROOT=${prod_util_ROOT} -#export WGRIB2=${wgrib2_ROOT}/bin/wgrib2 -export WGRIB2=wgrib2 - -# Scripts used -INTERP_ATMOS_MASTERSH=${USHgfs}/container/interp_atmos_master.sh -INTERP_ATMOS_SFLUXSH=${USHgfs}/container/interp_atmos_sflux.sh - -# Variables used in this job -downset=${downset:-1} # No. of groups of pressure grib2 products to create -ntasks_atmos_products=${ntasks_atmos_products:-8} # no. of processors available to process each group - -# WGNE related options -WGNE=${WGNE:-NO} # Create WGNE products -FHMAX_WGNE=${FHMAX_WGNE:-0} # WGNE products are created for first FHMAX_WGNE forecast hours (except 0) - -cd "${DATA}" || exit 1 - -# Set paramlist files based on FORECAST_HOUR (-1, 0, 3, 6, etc.) -# Determine if supplemental products (PGBS) (1-degree and 1/2-degree) should be generated -if [[ ${FORECAST_HOUR} -le 0 ]]; then - if [[ ${FORECAST_HOUR} -lt 0 ]]; then - fhr3="anl" - paramlista="${paramlista_anl}" - FLXGF="NO" - elif [[ ${FORECAST_HOUR} == 0 ]]; then - fhr3=$(printf "f%03d" "${FORECAST_HOUR}") - paramlista="${paramlista_f000}" - fi - PGBS="YES" -else - fhr3=$(printf "f%03d" "${FORECAST_HOUR}") - if (( FORECAST_HOUR%FHOUT_PGBS == 0 )); then - PGBS="YES" - fi -fi - -#----------------------------------------------------- -# Section creating pressure grib2 interpolated products - -# Files needed by ${INTERP_ATMOS_MASTERSH} -MASTER_FILE="${COMIN_ATMOS_MASTER}/${PREFIX}master.grb2${fhr3}" - -# Get inventory from ${MASTER_FILE} that matches patterns from ${paramlista} -# Extract this inventory from ${MASTER_FILE} into a smaller tmpfile or tmpfileb based on paramlista or paramlistb -# shellcheck disable=SC2312 -${WGRIB2} "${MASTER_FILE}" | grep -F -f "${paramlista}" | ${WGRIB2} -i -grib "tmpfile_${fhr3}" "${MASTER_FILE}" && true -export err=$? -if [[ ${err} -ne 0 ]]; then - err_exit "wgrib2 failed to create intermediate grib2 file from ${MASTER_FILE} using ${paramlista}" -fi - -# Do the same as above for ${paramlistb} -if [[ ${downset} -eq 2 ]]; then - # shellcheck disable=SC2312 - ${WGRIB2} "${MASTER_FILE}" | grep -F -f "${paramlistb}" | ${WGRIB2} -i -grib "tmpfileb_${fhr3}" "${MASTER_FILE}" && true - export err=$? - if [[ ${err} -ne 0 ]]; then - err_exit "wgrib2 failed to create intermediate grib2 file from ${MASTER_FILE} using ${paramlistb}" - fi -fi - -# Determine grids once and save them as a string and an array for processing -grid_string="0p25" -if [[ "${PGBS:-}" == "YES" ]]; then - grid_string="${grid_string}:0p50:1p00" -else - echo "Supplemental product generation is disable for fhr = ${fhr3}" - PGBS="NO" # Can't generate supplemental products if PGBS is not YES -fi -# Also transform the ${grid_string} into an array for processing -IFS=':' read -ra grids <<< "${grid_string}" - -for (( nset=1 ; nset <= downset ; nset++ )); do - - echo "Begin processing nset = ${nset}" - - # Number of processors available to process $nset - nproc=${ntasks} - - # Each set represents a group of files - if [[ ${nset} == 1 ]]; then - grp="" # TODO: this should be "a" when we eventually rename the pressure grib2 files per EE2 convention - elif [[ ${nset} == 2 ]]; then - grp="b" - fi - - # process grib2 chunkfiles to interpolate using MPMD - tmpfile="tmpfile${grp}_${fhr3}" - - # shellcheck disable=SC2312 - ncount=$(${WGRIB2} "${tmpfile}" | wc -l) - if [[ ${nproc} -gt ${ncount} ]]; then - echo "WARNING: Total no. of available processors '${nproc}' exceeds no. of records '${ncount}' in ${tmpfile}" - echo "Reduce nproc to ${ncount} (or less) to not waste resources" - fi - inv=$(( ncount / nproc )) - rm -f "${DATA}/poescript" - - last=0 - for (( iproc = 1 ; iproc <= nproc ; iproc++ )); do - first=$((last + 1)) - last=$((last + inv)) - if [[ ${last} -gt ${ncount} ]]; then - last=${ncount} - fi - - # if final record of is u-component, add next record v-component - # if final record is land, add next record icec - # grep returns 1 if no match is found, so temporarily turn off exit on non-zero rc - set +e - # shellcheck disable=SC2312 - ${WGRIB2} -d "${last}" "${tmpfile}" | grep -E -i "ugrd|ustm|uflx|u-gwd|land|maxuw" - rc=$? - set_strict - if [[ ${rc} == 0 ]]; then # Matched the grep - last=$(( last + 1 )) - fi - if [[ ${iproc} -eq ${nproc} ]]; then - last=${ncount} - fi - - # Break tmpfile into processor specific chunks in preparation for MPMD - ${WGRIB2} "${tmpfile}" -for "${first}":"${last}" -grib "${tmpfile}_${iproc}" && true - export err=$? - if [[ ${err} -ne 0 ]]; then - err_exit "wgrib2 failed to geneate an intermediate grib2 file from ${tmpfile} records ${first} to ${last}" - fi - input_file="${tmpfile}_${iproc}" - output_file_prefix="pgb2${grp}file_${fhr3}_${iproc}" - echo "${INTERP_ATMOS_MASTERSH} ${input_file} ${output_file_prefix} ${grid_string}" >> "${DATA}/poescript" - - # if at final record and have not reached the final processor then write echo's to - # poescript for remaining processors - if [[ ${last} -eq ${ncount} ]]; then - for (( pproc = iproc+1 ; pproc < nproc ; pproc++ )); do - echo "/bin/echo ${pproc}" >> "${DATA}/poescript" - done - break - fi - done # for (( iproc = 1 ; iproc <= nproc ; iproc++ )); do - - # Run with MPMD or serial - "${USHgfs}/container/run_mpmd.sh" "${DATA}/poescript" && true - export err=$? - if [[ ${err} -ne 0 ]]; then - err_exit "Some or all interpolations of the master grib file failed during MPMD execution!" - fi - - # We are in a loop over downset, save output from mpmd into nset specific output - mv mpmd.out "mpmd_${nset}.out" - - # Concatenate grib files from each processor into a single one - # and clean-up as you go - echo "Concatenating processor-specific grib2 files into a single product file" - for (( iproc = 1 ; iproc <= nproc ; iproc++ )); do - for grid in "${grids[@]}"; do - cat "pgb2${grp}file_${fhr3}_${iproc}_${grid}" >> "pgb2${grp}file_${fhr3}_${grid}" - rm -f "pgb2${grp}file_${fhr3}_${iproc}_${grid}" - done - # There is no further use of the processor specific tmpfile; delete it - rm -f "${tmpfile}_${iproc}" - done - - # Move to COM and index the product grib files - for grid in "${grids[@]}"; do - prod_dir="COMOUT_ATMOS_GRIB_${grid}" - cpfs "pgb2${grp}file_${fhr3}_${grid}" "${!prod_dir}/${PREFIX}pgrb2${grp}.${grid}.${fhr3}" - ${WGRIB2} -s "pgb2${grp}file_${fhr3}_${grid}" > "${!prod_dir}/${PREFIX}pgrb2${grp}.${grid}.${fhr3}.idx" - done - - echo "Finished processing nset = ${nset}" - -done # for (( nset=1 ; nset <= downset ; nset++ )) - -#--------------------------------------------------------------- - -# Create the index file for the sflux master, if it exists. -FLUX_FILE="${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" -if [[ -s "${FLUX_FILE}" ]]; then - ${WGRIB2} -s "${FLUX_FILE}" > "${FLUX_FILE}.idx" -fi - -# Section creating slfux grib2 interpolated products -# Create 1-degree sflux grib2 output -# move to COM and index it -if [[ "${FLXGF:-}" == "YES" ]]; then - - # Files needed by ${INTERP_ATMOS_SFLUXSH} - input_file="${FLUX_FILE}" - output_file_prefix="sflux_${fhr3}" - grid_string="1p00" - "${INTERP_ATMOS_SFLUXSH}" "${input_file}" "${output_file_prefix}" "${grid_string}" && true - export err=$? - if [[ ${err} -ne 0 ]]; then - err_exit "Unable to interpolate the surface flux grib2 files!" - fi - - # Move to COM and index the product sflux file - IFS=':' read -ra grids <<< "${grid_string}" - for grid in "${grids[@]}"; do - prod_dir="COMOUT_ATMOS_GRIB_${grid}" - cpfs "sflux_${fhr3}_${grid}" "${!prod_dir}/${PREFIX}flux.${grid}.${fhr3}" - ${WGRIB2} -s "sflux_${fhr3}_${grid}" > "${!prod_dir}/${PREFIX}flux.${grid}.${fhr3}.idx" - done -fi - -# Section creating 0.25 degree WGNE products for nset=1, and fhr <= FHMAX_WGNE -if [[ "${WGNE:-}" == "YES" ]]; then - grp="" # TODO: this should be "a" when we eventually rename the pressure grib2 files per EE2 convention - if [[ ${FORECAST_HOUR} -gt 0 && ${FORECAST_HOUR} -le ${FHMAX_WGNE} ]]; then - # TODO: 597 is the message number for APCP in GFSv16. GFSv17 may change this as more messages are added. This can be controlled via config.atmos_products - ${WGRIB2} "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2${grp}.0p25.${fhr3}" -d "${APCP_MSG:-597}" -grib "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}wgne.${fhr3}" - fi -fi - -#--------------------------------------------------------------- - -# Start sending DBN alerts -# Everything below this line is for sending files to DBN (SENDDBN=YES) -if [[ "${SENDDBN:-}" == "YES" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P25" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2.0p25.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P25_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2.0p25.${fhr3}.idx" - if [[ "${RUN}" == "gfs" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P25" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2b.0p25.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P25_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2b.0p25.${fhr3}.idx" - if [[ -s "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2.0p50.${fhr3}" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P5" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2.0p50.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P5_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2.0p50.${fhr3}.idx" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P5" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2b.0p50.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P5_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2b.0p50.${fhr3}.idx" - fi - if [[ -s "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_1P0" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_1P0_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}.idx" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_1P0" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2b.1p00.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_1P0_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2b.1p00.${fhr3}.idx" - fi - if [[ "${WGNE:-}" == "YES" && -s "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}wgne.${fhr3}" ]] ; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_WGNE" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}wgne.${fhr3}" - fi - fi - - if [[ "${fhr3}" == "anl" ]]; then - - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_MSC_sfcanl" "${job}" "${COMIN_ATMOS_ANALYSIS}/${PREFIX}sfc${fhr3}.nc" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SA" "${job}" "${COMIN_ATMOS_ANALYSIS}/${PREFIX}atm${fhr3}.nc" - - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGA_GB2" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGA_GB2_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}.idx" - - else # forecast hours f000, f003, f006, etc. - - if [[ "${RUN}" == "gdas" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB_GB2" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB_GB2_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}.idx" - if (( FORECAST_HOUR % 3 == 0 )); then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}atm${fhr3}.nc" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_BF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}sfc${fhr3}.nc" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2_WIDX" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2.idx" - fi - elif [[ "${RUN}" == "gfs" ]]; then - - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}atm${fhr3}.nc" - if [[ ${fhr} -gt 0 && ${fhr} -le 84 || ${fhr} -eq 120 ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_BF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}sfc${fhr3}.nc" - fi - - if [[ -s "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrbf${fhr3}.grib2" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2_WIDX" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2.idx" - fi - elif [[ "${RUN}" == "gcafs" ]]; then - - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SF" "${job}" "${COM_ATMOS_HISTORY}/${PREFIX}atm${fhr3}.nc" - if [[ ${fhr} -gt 0 && ${fhr} -le 84 || ${fhr} == 120 ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_BF" "${job}" "${COM_ATMOS_HISTORY}/${PREFIX}sfc${fhr3}.nc" - fi - - if [[ -s "${COM_ATMOS_MASTER}/${PREFIX}sfluxgrbf${fhr3}.grib2" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2" "${job}" "${COM_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2_WIDX" "${job}" "${COM_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2.idx" - fi - fi - - - fi # end if fhr3=anl - -fi # end if SENDDBN=YES - -exit 0 diff --git a/dev/container/atmos_products/interp_atmos_master.sh b/dev/container/atmos_products/interp_atmos_master.sh deleted file mode 100755 index 321d9f52dd3..00000000000 --- a/dev/container/atmos_products/interp_atmos_master.sh +++ /dev/null @@ -1,89 +0,0 @@ -#! /usr/bin/env bash - -# This script takes in a master grib file and creates products at various interpolated resolutions -# Generate 0.25 / 0.5 / 1 degree interpolated grib2 files for each input grib2 file -# trim's RH and tweaks sea-ice cover - -input_file=${1:-"pgb2file_in"} # Input pressure grib2 file -output_file_prefix=${2:-"pgb2file_out"} # Prefix for output grib2 file; the prefix is appended by resolution e.g. _0p25 -grid_string=${3:-"0p25"} # Target grids; e.g. "0p25" or "0p25:0p50"; If multiple, they need to be ":" seperated - -#source "${HOMEgfs}/ush/preamble.sh" - -# Programs used -#if [ "$RUN_WITH_CONTAINER" == "NO" ]; then -# #source "${USHgfs}/load_fv3gfs_modules.sh" -# #module load wgrib2/2.0.8 -source /usr/lmod/lmod/init/bash -module purge -module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles -module load gfsutils_container.intel -module load wgrib2 -export WGRIB2=wgrib2 -##else -# export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" -#fi - -# wgrib2 options for regridding -defaults="-set_grib_type same -set_bitmap 1 -set_grib_max_bits 16" -interp_winds="-new_grid_winds earth" -interp_bilinear="-new_grid_interpolation bilinear" -interp_neighbor="-if :(CSNOW|CRAIN|CFRZR|CICEP|ICSEV): -new_grid_interpolation neighbor -fi" -interp_budget="-if :(APCP|ACPCP|PRATE|CPRAT|DZDT): -new_grid_interpolation budget -fi" -increased_bits="-if :(APCP|ACPCP|PRATE|CPRAT): -set_grib_max_bits 25 -fi" - -# interpolated target grids -# shellcheck disable=SC2034 -grid0p25="latlon 0:1440:0.25 90:721:-0.25" -# shellcheck disable=SC2034 -grid0p50="latlon 0:720:0.5 90:361:-0.5" -# shellcheck disable=SC2034 -grid1p00="latlon 0:360:1.0 90:181:-1.0" - -# "Import" functions used in this script -source "${USHgfs}/product_functions.sh" - -# Transform the input ${grid_string} into an array for processing -IFS=':' read -ra grids <<< "${grid_string}" - -output_grids="" -for grid in "${grids[@]}"; do - gridopt="grid${grid}" - output_grids="${output_grids} -new_grid ${!gridopt} ${output_file_prefix}_${grid}" -done - -#shellcheck disable=SC2086 -${WGRIB2} "${input_file}" ${defaults} \ - ${interp_winds} \ - ${interp_bilinear} \ - ${interp_neighbor} \ - ${interp_budget} \ - ${increased_bits} \ - ${output_grids} -export err=$? -if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: WGRIB2 failed to generate interpolated grib2 file!" - exit "${err}" -fi - -# trim and mask for all grids -for grid in "${grids[@]}"; do - trim_rh "${output_file_prefix}_${grid}" - export err=$? - if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: Failed during the execution of trim_rh" - exit "${err}" - fi - # shellcheck disable=SC2312 - var_count=$(${WGRIB2} "${output_file_prefix}_${grid}" -match "LAND|ICEC" |wc -l) - if [[ "${var_count}" -eq 2 ]]; then - mod_icec "${output_file_prefix}_${grid}" - export err=$? - if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: Failed during execution of mod_icec" - exit "${err}" - fi - fi -done - -exit 0 diff --git a/dev/container/atmos_products/interp_atmos_sflux.sh b/dev/container/atmos_products/interp_atmos_sflux.sh deleted file mode 100755 index abe528a638d..00000000000 --- a/dev/container/atmos_products/interp_atmos_sflux.sh +++ /dev/null @@ -1,64 +0,0 @@ -#! /usr/bin/env bash - -# This script takes in a master flux file and creates interpolated flux files at various interpolated resolutions -# Generate 0.25 / 0.5 / 1 degree interpolated grib2 flux files for each input sflux grib2 file - -input_file=${1:-"sfluxfile_in"} # Input sflux grib2 file -output_file_prefix=${2:-"sfluxfile_out"} # Prefix for output sflux grib2 file; the prefix is appended by resolution e.g. _0p25 -grid_string=${3:-"1p00"} # Target grids; e.g. "0p25" or "0p25:0p50"; If multiple, they need to be ":" seperated - -#source "${HOMEgfs}/ush/preamble.sh" - -# Programs used -#if [ "$RUN_WITH_CONTAINER" == "NO" ]; then -# #source "${USHgfs}/load_fv3gfs_modules.sh" -# #module load wgrib2/2.0.8 -source /usr/lmod/lmod/init/bash -module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles -module load gfsutils_container.intel -module load wgrib2 -export WGRIB2=wgrib2 -#else -# export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" -#fi - -# wgrib2 options for regridding -defaults="-set_grib_type same -set_bitmap 1 -set_grib_max_bits 16" -interp_winds="-new_grid_winds earth" -interp_bilinear="-new_grid_interpolation bilinear" -interp_neighbor="-if :(LAND|CSNOW|CRAIN|CFRZR|CICEP|ICSEV): -new_grid_interpolation neighbor -fi" -interp_budget="-if :(APCP|ACPCP|PRATE|CPRAT|DZDT): -new_grid_interpolation budget -fi" -increased_bits="-if :(APCP|ACPCP|PRATE|CPRAT): -set_grib_max_bits 25 -fi" - -# interpolated target grids -# shellcheck disable=SC2034 -grid0p25="latlon 0:1440:0.25 90:721:-0.25" -# shellcheck disable=SC2034 -grid0p50="latlon 0:720:0.5 90:361:-0.5" -# shellcheck disable=SC2034 -grid1p00="latlon 0:360:1.0 90:181:-1.0" - -# Transform the input ${grid_string} into an array for processing -IFS=':' read -ra grids <<< "${grid_string}" - -output_grids="" -for grid in "${grids[@]}"; do - gridopt="grid${grid}" - output_grids="${output_grids} -new_grid ${!gridopt} ${output_file_prefix}_${grid}" -done - -#shellcheck disable=SC2086 -${WGRIB2} "${input_file}" ${defaults} \ - ${interp_winds} \ - ${interp_bilinear} \ - ${interp_neighbor} \ - ${interp_budget} \ - ${increased_bits} \ - ${output_grids} -export err=$? -if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: WGRIB2 failed to interpolate surface flux parameters to a new grib2 file" - exit "${err}" -fi - -exit 0 diff --git a/dev/container/atmos_products/run_mpmd.sh b/dev/container/atmos_products/run_mpmd.sh deleted file mode 100755 index 5e6dcc634db..00000000000 --- a/dev/container/atmos_products/run_mpmd.sh +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env bash - -################################################################################ -# -# UNIX Script Documentation Block -# Script name: run_mpmd.sh -# Script description: Run multiple commands in MPMD mode or serially -# -# Author: Rahul Mahajan -# -# Org: NCEP/EMC -# -# Abstract: This script runs multiple commands in MPMD mode. It is used to run -# multiple serial commands in parallel using the CFP (Coupled Framework -# Parallelism) feature of the workflow. -# -# Environment variables: -# USE_CFP: If set to YES, run in MPMD mode, else run in serial mode. Default is 'NO'. -# launcher: Command to launch the MPMD job. Default is empty. -# Supported launchers are 'srun' and 'mpiexec'. -# mpmd_opt: Additional options to pass to the launcher. Default is empty. -# -# Input: -# cmdfile: File containing commands to execute in MPMD/serial mode -# -# Command line: -# run_mpmd.sh cmdfile -# -################################################################################ - -set -x - -source /usr/lmod/lmod/init/bash -module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" -module load gfsutils_container.intel -module load wgrib2 - -cmdfile=${1:?"run_mpmd requires an input file containing commands to execute in MPMD/serial mode"} - -# If USE_CFP is not set, run in serial mode -if [[ "${USE_CFP:-}" != "YES" ]]; then - echo "INFO: Using serial mode for MPMD job" - chmod 755 "${cmdfile}" - bash +x "${cmdfile}" > mpmd.out 2>&1 - rc=$? - cat mpmd.out - exit "${rc}" -fi - -# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD -export OMP_NUM_THREADS=1 - -# Determine the number of MPMD processes from incoming ${cmdfile} -#nprocs=$(wc -l < "${cmdfile}") - -# Local MPMD file containing instructions to run in CFP -mpmd_cmdfile="${DATA:-}/mpmd_cmdfile" -if [[ -s "${mpmd_cmdfile}" ]]; then rm -f "${mpmd_cmdfile}"; fi - -cat << EOF - INFO: Executing MPMD job, STDOUT redirected for each process separately - INFO: On failure, logs for each job will be available in ${DATA}/mpmd.proc_num.out - INFO: The proc_num corresponds to the line in '${mpmd_cmdfile}' -EOF - -# Redirect output from each process to its own stdout -# Read the incoming cmdfile and create mpiexec usable cmdfile -nm=0 -echo "#!/bin/bash" >> "${mpmd_cmdfile}" -# shellcheck disable=SC2312 -while IFS= read -r line; do - echo "${line} > mpmd.${nm}.out" >> "${mpmd_cmdfile}" - echo "Line ${nm}: ${line}" - ${line} > "mpmd.${nm}.out" & - ((nm=nm+1)) -done < "${cmdfile}" -chmod 755 "${mpmd_cmdfile}" -wait - -# shellcheck disable=SC2086 -#mpirun -np ${nprocs} ${mpmd_opt:-} "${mpmd_cmdfile}" -#mpirun -n ${nprocs} ${mpmd_cmdfile} -#mpirun --app ${mpmd_cmdfile} -err=$? -set_strict - -# On success concatenate processor specific output into a single mpmd.out -if [[ ${err} -eq 0 ]]; then - rm -f "${mpmd_cmdfile}" - out_files=$(find . -name 'mpmd.*.out') - for file in ${out_files}; do - cat "${file}" >> mpmd.out - rm -f "${file}" - done - cat mpmd.out -fi - -exit "${err}" diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index 1f9bc097fe4..b7137efc19b 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -17,12 +17,11 @@ run_with_container=YES #yamllist="C768_S2SW" HOMEDIR=${HOMEgfs} -#img=ubuntu22.04-intel-ufs-env-v1.6.0.img img=ubuntu22.04-intel-ufs-env-v1.9.2.img if [[ ${MACHINE_ID} = ursa* ]] ; then - container=/scratch4/NAGAPE/epic/${USER}/demo/${img} + container=/scratch4/NAGAPE/epic/${USER}/containers/${img} rundir=/scratch3/NAGAPE/epic/${USER}/run - bindings="-B /scratch3 -B /scratch4" + bindings="-B /apps -B /scratch3 -B /scratch4" HPC_ACCOUNT=epic module load rocoto/1.3.7 diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index d12c793a320..a80082c7d3a 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -34,6 +34,71 @@ if [[ -d "${HOMEgfs}/sorc/wxflow/src" ]]; then fi export PYTHONPATH +if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then + # if [[ ! -d ~/prod-util-2.1.1 ]]; then + # cp -r $prod_util_ROOT ~/prod-util-2.1.1 + # fi + + #if [[ "$PATH" =~ "prod-util" ]]; then + export PATH=~/prod-util-2.1.1/bin:$PATH + #fi + export FSYNC=~/prod-util-2.1.1/bin/fsync_file + export MDATE=~/prod-util-2.1.1/bin/mdate + export NDATE=~/prod-util-2.1.1/bin/ndate + export NHOUR=~/prod-util-2.1.1/bin/nhour + + source /usr/lmod/lmod/init/bash + module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" + module load gfsutils_container.intel + module load wgrib2 +else + source "${HOMEgfs}/ush/detect_machine.sh" + source "${HOMEgfs}/ush/module-setup.sh" + + # Source versions file for runtime + source "${HOMEgfs}/versions/run.ver" + + # Load our modules: + module use "${HOMEgfs}/modulefiles" + + case "${MACHINE_ID}" in + "wcoss2") + module load cray-pals + module load cfp + module load libjpeg + module load craype-network-ucx + module load cray-mpich-ucx + module load "gw_run.${MACHINE_ID}" + ;; + "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud" | "ursa") + module load "gw_run.${MACHINE_ID}" + export UTILROOT=${prod_util_ROOT} + ;; + *) + echo "WARNING: UNKNOWN PLATFORM" + ;; + esac + + export err=$? + if [[ ${err} -ne 0 ]]; then + echo "FATAL ERROR: Failed to load gw_run.${MACHINE_ID}" + exit 1 + fi + + module load wgrib2 + module load prod_util +fi +export WGRIB2=wgrib2 + +# Turn on our settings +export SHELLOPTS +declare -xf set_strict +declare -xf set_trace +declare -xf postamble +declare -xf err_exit +set_strict +set_trace + # Restore stack soft limit: ulimit -S -s "${ulimit_s}" unset ulimit_s diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 070e7c14dd3..285d2cb3f04 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -2,6 +2,8 @@ source "${HOMEgfs}/ush/preamble.sh" +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" + # Scripts used INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/interp_atmos_master.sh"} INTERP_ATMOS_SFLUXSH=${INTERP_ATMOS_SFLUXSH:-"${USHgfs}/interp_atmos_sflux.sh"} diff --git a/ush/interp_atmos_master.sh b/ush/interp_atmos_master.sh index 00bbde65043..39fdc563fad 100755 --- a/ush/interp_atmos_master.sh +++ b/ush/interp_atmos_master.sh @@ -4,6 +4,8 @@ # Generate 0.25 / 0.5 / 1 degree interpolated grib2 files for each input grib2 file # trim's RH and tweaks sea-ice cover +source "${USHgfs}/preamble.sh" + input_file=${1:-"pgb2file_in"} # Input pressure grib2 file output_file_prefix=${2:-"pgb2file_out"} # Prefix for output grib2 file; the prefix is appended by resolution e.g. _0p25 grid_string=${3:-"0p25"} # Target grids; e.g. "0p25" or "0p25:0p50"; If multiple, they need to be ":" seperated diff --git a/ush/interp_atmos_sflux.sh b/ush/interp_atmos_sflux.sh index 2aff2cc58aa..a9ebc82b717 100755 --- a/ush/interp_atmos_sflux.sh +++ b/ush/interp_atmos_sflux.sh @@ -1,5 +1,7 @@ #! /usr/bin/env bash +source "${USHgfs}/preamble.sh" + # This script takes in a master flux file and creates interpolated flux files at various interpolated resolutions # Generate 0.25 / 0.5 / 1 degree interpolated grib2 flux files for each input sflux grib2 file diff --git a/ush/preamble.sh b/ush/preamble.sh index 212fcfa4038..a241d8f2a4e 100755 --- a/ush/preamble.sh +++ b/ush/preamble.sh @@ -177,59 +177,7 @@ trap "postamble ${_calling_script} ${start_time} \$?" EXIT source "${HOMEgfs}/ush/bash_utils.sh" # Decide if run with container -export RUN_WITH_CONTAINER=YES - -if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then - export WGRIB2="${HOMEgfs}/exec/run_wgrib2.sh" - # if [[ ! -d ~/prod-util-2.1.1 ]]; then - # cp -r $prod_util_ROOT ~/prod-util-2.1.1 - # fi - - #if [[ "$PATH" =~ "prod-util" ]]; then - export PATH=~/prod-util-2.1.1/bin:$PATH - #fi - export FSYNC=~/prod-util-2.1.1/bin/fsync_file - export MDATE=~/prod-util-2.1.1/bin/mdate - export NDATE=~/prod-util-2.1.1/bin/ndate - export NHOUR=~/prod-util-2.1.1/bin/nhour -else - source "${HOMEgfs}/ush/detect_machine.sh" - source "${HOMEgfs}/ush/module-setup.sh" - - # Source versions file for runtime - source "${HOMEgfs}/versions/run.ver" - - # Load our modules: - module use "${HOMEgfs}/modulefiles" - - case "${MACHINE_ID}" in - "wcoss2") - module load cray-pals - module load cfp - module load libjpeg - module load craype-network-ucx - module load cray-mpich-ucx - module load "gw_run.${MACHINE_ID}" - ;; - "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud" | "ursa") - module load "gw_run.${MACHINE_ID}" - export UTILROOT=${prod_util_ROOT} - ;; - *) - echo "WARNING: UNKNOWN PLATFORM" - ;; - esac - - export err=$? - if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: Failed to load gw_run.${MACHINE_ID}" - exit 1 - fi - - module load wgrib2 - module load prod_util - export WGRIB2=wgrib2 -fi +export RUN_WITH_CONTAINER=NO # Turn on our settings export SHELLOPTS diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index 6ffbd3106d1..dbc933e3da7 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -30,6 +30,13 @@ source "${USHgfs}/preamble.sh" +if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then + source /usr/lmod/lmod/init/bash + module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" + module load gfsutils_container.intel + module load wgrib2 +fi + cmdfile=${1:?"run_mpmd requires an input file containing commands to execute in MPMD/serial mode"} # If USE_CFP is not set, run in serial mode @@ -45,63 +52,22 @@ fi # Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD export OMP_NUM_THREADS=1 -# Determine the number of MPMD processes from incoming ${cmdfile} -nprocs=$(wc -l < "${cmdfile}") - -# Local MPMD file containing instructions to run in CFP -mpmd_cmdfile="${DATA:-}/mpmd_cmdfile" -if [[ -s "${mpmd_cmdfile}" ]]; then rm -f "${mpmd_cmdfile}"; fi - -cat << EOF - INFO: Executing MPMD job, STDOUT redirected for each process separately - INFO: On failure, logs for each job will be available in ${DATA}/mpmd.proc_num.out - INFO: The proc_num corresponds to the line in '${mpmd_cmdfile}' -EOF - -if [[ "${launcher:-}" =~ ^srun.* ]]; then # srun-based system e.g. Hera, Orion, etc. - - # Slurm requires a counter in front of each line in the script - # Read the incoming cmdfile and create srun usable cmdfile - nm=0 - # shellcheck disable=SC2312 - while IFS= read -r line; do - echo "${nm} ${line}" >> "${mpmd_cmdfile}" - ((nm=nm+1)) - done < "${cmdfile}" - - set +e - # shellcheck disable=SC2086 - ${launcher:-} ${mpmd_opt:-} -n ${nprocs} "${mpmd_cmdfile}" - err=$? - set_strict +# Redirect output from each process to its own stdout +# Read the incoming cmdfile and create mpiexec usable cmdfile +nm=0 +# shellcheck disable=SC2312 +while IFS= read -r line; do + echo "Line ${nm}: ${line}" + ${line} > "mpmd.${nm}.out" & + ((nm=nm+1)) +done < "${cmdfile}" +wait -elif [[ "${launcher:-}" =~ ^mpiexec.* ]]; then # mpiexec - - # Redirect output from each process to its own stdout - # Read the incoming cmdfile and create mpiexec usable cmdfile - nm=0 - echo "#!/bin/bash" >> "${mpmd_cmdfile}" - # shellcheck disable=SC2312 - while IFS= read -r line; do - echo "${line} > mpmd.${nm}.out" >> "${mpmd_cmdfile}" - ((nm=nm+1)) - done < "${cmdfile}" - chmod 755 "${mpmd_cmdfile}" - - # shellcheck disable=SC2086 - ${launcher:-} -np ${nprocs} ${mpmd_opt:-} "${mpmd_cmdfile}" - err=$? - -else - - echo "FATAL ERROR: CFP is not usable with launcher: '${launcher:-}'" - err=1 - -fi +err=$? +set_strict # On success concatenate processor specific output into a single mpmd.out if [[ ${err} -eq 0 ]]; then - rm -f "${mpmd_cmdfile}" out_files=$(find . -name 'mpmd.*.out') for file in ${out_files}; do cat "${file}" >> mpmd.out From 5e351a53fe0e122a324a36c7c588caee54631e4f Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Sat, 30 Aug 2025 12:38:38 +0000 Subject: [PATCH 085/134] ursa to ss192 --- .gitmodules | 6 +++--- dev/container/utils/create-atmos-products.sh | 10 +++++----- dev/ush/load_gw_run_modules.sh | 9 +++++---- scripts/exgfs_wave_init.sh | 1 + scripts/exglobal_forecast.sh | 4 ++++ ush/preamble.sh | 2 +- ush/run_mpmd.sh | 6 +++++- ush/wave_grid_moddef.sh | 1 + 8 files changed, 25 insertions(+), 14 deletions(-) diff --git a/.gitmodules b/.gitmodules index 5c7edfe6e93..531a5e306e2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,7 +2,7 @@ path = sorc/ufs_model.fd #url = https://github.com/ufs-community/ufs-weather-model url = https://github.com/NOAA-EPIC/ufs-weather-model.git - branch = feature/use_container_stack-stack-1.6.0 + branch = feature/use_container_spack-stack-1.9.2 [submodule "sorc/wxflow"] path = sorc/wxflow url = https://github.com/NOAA-EMC/wxflow @@ -10,12 +10,12 @@ path = sorc/gfs_utils.fd #url = https://github.com/NOAA-EMC/gfs-utils url = https://github.com/NOAA-EPIC/gfs-utils.git - branch = feature/use_container_stack-stack-1.6.0 + branch = feature/use_container_spack-stack-1.9.2 [submodule "sorc/ufs_utils.fd"] path = sorc/ufs_utils.fd #url = https://github.com/ufs-community/UFS_UTILS.git url = https://github.com/NOAA-EPIC/UFS_UTILS-cloud.git - branch = feature/use_container_stack-stack-1.6.0 + branch = feature/use_container_spack-stack-1.9.2 [submodule "sorc/verif-global.fd"] path = sorc/verif-global.fd url = https://github.com/NOAA-EMC/EMC_verif-global.git diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh index 03a0a2bfff0..cef9a8d0b2c 100755 --- a/dev/container/utils/create-atmos-products.sh +++ b/dev/container/utils/create-atmos-products.sh @@ -43,9 +43,9 @@ sed -e "s?HOMEgfs?${HOMEgfs}?g" \ ${HOMEgfs}/dev/container/atmos_products/exec.exglobal_atmos_products.sh > ${HOMEgfs}/exec/exglobal_atmos_products.sh chmod +x ${HOMEgfs}/exec/exglobal_atmos_products.sh -for fl in exglobal_atmos_products.sh interp_atmos_master.sh interp_atmos_sflux.sh run_mpmd.sh -do - cp ${HOMEgfs}/dev/container/atmos_products/${fl} ${HOMEgfs}/ush/container/. - chmod +x ${HOMEgfs}/ush/container/${fl} -done +#for fl in exglobal_atmos_products.sh interp_atmos_master.sh interp_atmos_sflux.sh run_mpmd.sh +#do +# cp ${HOMEgfs}/dev/container/atmos_products/${fl} ${HOMEgfs}/ush/container/. +# chmod +x ${HOMEgfs}/ush/container/${fl} +#done diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index a80082c7d3a..3e9b0454105 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -34,7 +34,11 @@ if [[ -d "${HOMEgfs}/sorc/wxflow/src" ]]; then fi export PYTHONPATH -if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then +source "${HOMEgfs}/ush/detect_machine.sh" +source "${HOMEgfs}/ush/module-setup.sh" + +#if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then +if [[ "$MACHINE_ID" == "container" ]]; then # if [[ ! -d ~/prod-util-2.1.1 ]]; then # cp -r $prod_util_ROOT ~/prod-util-2.1.1 # fi @@ -52,9 +56,6 @@ if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then module load gfsutils_container.intel module load wgrib2 else - source "${HOMEgfs}/ush/detect_machine.sh" - source "${HOMEgfs}/ush/module-setup.sh" - # Source versions file for runtime source "${HOMEgfs}/versions/run.ver" diff --git a/scripts/exgfs_wave_init.sh b/scripts/exgfs_wave_init.sh index 02968d90e8b..2d277005800 100755 --- a/scripts/exgfs_wave_init.sh +++ b/scripts/exgfs_wave_init.sh @@ -17,6 +17,7 @@ # ############################################################################### # +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" # --------------------------------------------------------------------------- # # 1. Get files that are used by most child scripts diff --git a/scripts/exglobal_forecast.sh b/scripts/exglobal_forecast.sh index 776018019c3..c84e3886972 100755 --- a/scripts/exglobal_forecast.sh +++ b/scripts/exglobal_forecast.sh @@ -77,6 +77,8 @@ # Main body starts here ####################### +set +x + # include all subroutines. Executions later. source "${USHgfs}/forecast_predet.sh" # include functions for variable definition source "${USHgfs}/forecast_det.sh" # include functions for run type determination @@ -85,6 +87,8 @@ source "${USHgfs}/parsing_ufs_configure.sh" # include functions for ufs_configu source "${USHgfs}/atparse.bash" # include function atparse for parsing @[XYZ] templated files +set +x + # Coupling control switches, for coupling purpose, off by default cpl=${cpl:-.false.} cplflx=${cplflx:-.false.} # default off,import from outside source diff --git a/ush/preamble.sh b/ush/preamble.sh index a241d8f2a4e..4d552c2e29a 100755 --- a/ush/preamble.sh +++ b/ush/preamble.sh @@ -177,7 +177,7 @@ trap "postamble ${_calling_script} ${start_time} \$?" EXIT source "${HOMEgfs}/ush/bash_utils.sh" # Decide if run with container -export RUN_WITH_CONTAINER=NO +export RUN_WITH_CONTAINER=YES # Turn on our settings export SHELLOPTS diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index dbc933e3da7..a0b0d143170 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -30,7 +30,11 @@ source "${USHgfs}/preamble.sh" -if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then +source "${HOMEgfs}/ush/detect_machine.sh" +source "${HOMEgfs}/ush/module-setup.sh" + +#if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then +if [[ "$MACHINE_ID" == "container" ]]; then source /usr/lmod/lmod/init/bash module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel diff --git a/ush/wave_grid_moddef.sh b/ush/wave_grid_moddef.sh index 5caf20c6eaa..ea4f362ee55 100755 --- a/ush/wave_grid_moddef.sh +++ b/ush/wave_grid_moddef.sh @@ -17,6 +17,7 @@ # # ############################################################################### # + source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" # --------------------------------------------------------------------------- # # 0. Preparations From 98c1e120e756daa2ed86d71475595396142160f5 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 2 Sep 2025 15:26:51 +0000 Subject: [PATCH 086/134] testing on ursa --- dev/container/utils/link_gfs_utils.sh | 4 +- dev/container/utils/link_model.sh | 96 ++++++++++++++++++++------ env/URSA.env | 11 +-- scripts/exgfs_wave_post_gridded_sbs.sh | 1 + 4 files changed, 83 insertions(+), 29 deletions(-) diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index 9a805db3576..0ed0b27eea8 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -60,7 +60,7 @@ source /usr/lmod/lmod/init/bash module purge module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles module load gfsutils_container.intel -module load wgrib2/2.0.8 +module load wgrib2/3.6.0 arg="\$@" ${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${model}.x \$arg @@ -97,7 +97,7 @@ source /usr/lmod/lmod/init/bash module purge module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles module load gfsutils_container.intel -module load wgrib2/2.0.8 +module load wgrib2/3.6.0 arg="\$@" ${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${nm}.x \$arg diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 7fb5cd6c3e3..3e2c71d28d9 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -2,6 +2,7 @@ verbose=false bindings="-B /scratch3 -B /scratch4" +machineid="ursa" while [ "$#" -gt 0 ]; do case "$1" in @@ -21,6 +22,10 @@ while [ "$#" -gt 0 ]; do model="$2" shift 2 ;; + -M|--MACHINE_ID) + machineid="$2" + shift 2 + ;; -v|--verbose) verbose=true shift @@ -32,15 +37,16 @@ while [ "$#" -gt 0 ]; do esac done -if [[ ! -v HOMEgfs || ! -v container || ! -v model ]]; then +if [[ ! -v HOMEgfs || ! -v container || ! -v model || ! -v MACHINE_ID ]]; then echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image \\" - echo " -m/--model name_model -b/--bindings -B dirname [-B dirname1 [...]] [-v]" + echo " -m/--model name_model -M/MACHINE_ID MACHINE_ID -b/--bindings [...]] [-v]" exit -1 fi #echo "HOMEgfs: $HOMEgfs" #echo "model: $model" #echo "Verbose: $verbose" +#echo "machineid: $machineid" if [[ "$verbose" == "true" ]]; then set -x @@ -69,27 +75,14 @@ chmod 755 $run_model_script link_model_script=${HOMEgfs}/exec/${model}.x rm -f ${link_model_script} -cat > $link_model_script << EOF_LINK +case "${machineid}" in + ursa) +cat > $link_model_script << EOF_URSA #!/bin/bash - -#Need these lines on AWS to run more than one node. -#export I_MPI_DEBUG=10 -#export I_MPI_FABRICS=shm:ofi -#export I_MPI_OFI_PROVIDER=tcp -#export FI_PROVIDER=tcp -#export FI_TCP_IFACE=eth0 - -#For GaeaC6 -#export SINGULARITY_ENABLE_OVERLAY=try -#export SINGULARITY_DISABLE_OVERLAY=yes -#export SINGULARITY_DEBUG=10 -#export SINGULARITY_DEBUG=0 -#unset SINGULARITY_DEBUG - # --- MPI and Fabric Configuration --- # 1. Force Intel MPI to use Slurm's PMI2 library for job startup # for Ursa -export I_MPI_PMI_LIBRARY=/apps/slurm/default/lib/libpmi2.so +#export I_MPI_PMI_LIBRARY=/apps/slurm/default/lib/libpmi2.so # 2. Set the OFI provider to Mellanox InfiniBand export FI_PROVIDER=mlx @@ -98,14 +91,73 @@ export FI_PROVIDER=mlx export UCX_TLS=^sm,cma # --- End of Configuration --- +HOST_SLURM_PATH=/apps/slurm/default +HOST_MPI_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2024.2.1-oqhstbmawnrsdw472p4pjsopj547o6xs/compiler/2024.2 + + export LD_LIBRARY_PATH=$(dirname ${container}) + set +x + arg="\$@" + singularity exec \\ + --bind \${HOST_SLURM_PATH}:\${HOST_SLURM_PATH} \\ + --bind \${HOST_MPI_PATH}:\${HOST_MPI_PATH} \\ + ${bindings} \\ + ${container} \\ + ${run_model_script} \$arg +EOF_URSA + ;; + gaea*) +cat > $link_model_script << EOF_GAEA +#!/bin/bash +#export SINGULARITY_ENABLE_OVERLAY=try +#export SINGULARITY_DISABLE_OVERLAY=yes +#export SINGULARITY_DEBUG=10 +#export SINGULARITY_DEBUG=0 +#unset SINGULARITY_DEBUG + + export LD_LIBRARY_PATH=$(dirname ${container}) + set +x + arg="\$@" + singularity exec \\ + ${bindings} \\ + ${container} \\ + ${run_model_script} \$arg +EOF_GAEA + ;; + noaacloud) +cat > $link_model_script << EOF_NOAACLOUD +#!/bin/bash + +#Need these lines on AWS to run more than one node. +#export I_MPI_DEBUG=10 + export I_MPI_FABRICS=shm:ofi + export I_MPI_OFI_PROVIDER=tcp + export FI_PROVIDER=tcp + export FI_TCP_IFACE=eth0 + + export LD_LIBRARY_PATH=$(dirname ${container}) + set +x + arg="\$@" + singularity exec \\ + --bind \${HOST_SLURM_PATH}:\${HOST_SLURM_PATH} \\ + --bind \${HOST_MPI_PATH}:\${HOST_MPI_PATH} \\ + ${bindings} \\ + ${container} \\ + ${run_model_script} \$arg +EOF_NOAACLOUD + ;; + *) +cat > $link_model_script << EOF_LINK +#!/bin/bash export LD_LIBRARY_PATH=$(dirname ${container}) set +x arg="\$@" singularity exec \\ - ${bindings} \\ - ${container} \\ - ${run_model_script} \$arg + ${bindings} \\ + ${container} \\ + ${run_model_script} \$arg EOF_LINK + ;; +esac chmod 755 $link_model_script diff --git a/env/URSA.env b/env/URSA.env index 73ffd2749b9..7fa72339c4f 100755 --- a/env/URSA.env +++ b/env/URSA.env @@ -242,11 +242,12 @@ elif [[ "${step}" = "eupd" ]]; then export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then - #fast - #export launcher="srun -l --hint=nomultithread --distribution=block:block" - #slow3 - #export launcher="srun -l --export=ALL --hint=nomultithread" - export launcher="srun --mpi=pmi2 -l --hint=nomultithread" + # fast + #export launcher="srun -l --hint=nomultithread --distribution=block:block" + # slow3 + #export launcher="srun -l --export=ALL --hint=nomultithread" + export launcher="srun --mpi=pmi2 -l --hint=nomultithread" + #export launcher="srun env -u SLURM_NODELIST" (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) diff --git a/scripts/exgfs_wave_post_gridded_sbs.sh b/scripts/exgfs_wave_post_gridded_sbs.sh index 99eefda4fc9..5bf562986ea 100755 --- a/scripts/exgfs_wave_post_gridded_sbs.sh +++ b/scripts/exgfs_wave_post_gridded_sbs.sh @@ -21,6 +21,7 @@ # ############################################################################### +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" source "${USHgfs}/wave_domain_grid.sh" DOGRI_WAV=${DOGRI_WAV:-"NO"} # Interpolate to a grid From 38670a396cf4a2bce6defe3d6d3e2afa74aec026 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 2 Sep 2025 19:20:56 +0000 Subject: [PATCH 087/134] testing on ursa --- dev/container/gen-run-cases.sh | 2 +- dev/container/utils/create-atmos-products.sh | 8 +---- .../exec.exglobal_atmos_products.sh | 0 dev/container/utils/link_model.sh | 35 +++++++------------ dev/container/utils/ush.wgrib2 | 7 +--- ush/wave_grib2_sbs.sh | 1 + ush/wave_grid_interp_sbs.sh | 2 ++ 7 files changed, 18 insertions(+), 37 deletions(-) rename dev/container/{atmos_products => utils}/exec.exglobal_atmos_products.sh (100%) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index b7137efc19b..7cf5d652eda 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -x +set +x HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" source "${HOMEgfs}/ush/detect_machine.sh" diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh index cef9a8d0b2c..08365f172ce 100755 --- a/dev/container/utils/create-atmos-products.sh +++ b/dev/container/utils/create-atmos-products.sh @@ -40,12 +40,6 @@ echo "Verbose: $verbose" sed -e "s?HOMEgfs?${HOMEgfs}?g" \ -e "s?SIF?${container}?g" \ -e "s?BINDINGS?${bindings}?g" \ - ${HOMEgfs}/dev/container/atmos_products/exec.exglobal_atmos_products.sh > ${HOMEgfs}/exec/exglobal_atmos_products.sh + ${HOMEgfs}/dev/container/utils/exec.exglobal_atmos_products.sh > ${HOMEgfs}/exec/exglobal_atmos_products.sh chmod +x ${HOMEgfs}/exec/exglobal_atmos_products.sh -#for fl in exglobal_atmos_products.sh interp_atmos_master.sh interp_atmos_sflux.sh run_mpmd.sh -#do -# cp ${HOMEgfs}/dev/container/atmos_products/${fl} ${HOMEgfs}/ush/container/. -# chmod +x ${HOMEgfs}/ush/container/${fl} -#done - diff --git a/dev/container/atmos_products/exec.exglobal_atmos_products.sh b/dev/container/utils/exec.exglobal_atmos_products.sh similarity index 100% rename from dev/container/atmos_products/exec.exglobal_atmos_products.sh rename to dev/container/utils/exec.exglobal_atmos_products.sh diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 3e2c71d28d9..b5d22c2c5d9 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -58,13 +58,19 @@ rm -f ${run_model_script} cat > $run_model_script << EOF_MODEL #!/bin/bash -# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD -export OMP_NUM_THREADS=1 +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" -source /usr/lmod/lmod/init/bash -module purge -module use ${HOMEgfs}/sorc/ufs_model.fd/modulefiles -module load ufs_container.intel +# --- MPI and Fabric Configuration --- +# 1. Force Intel MPI to use Slurm's PMI2 library for job startup +# for Ursa +export I_MPI_PMI_LIBRARY=/apps/slurm/default/lib/libpmi2.so + +# 2. Set the OFI provider to Mellanox InfiniBand +export FI_PROVIDER=mlx + +# 3. Disable problematic shared memory transports in UCX +export UCX_TLS=^sm,cma +# --- End of Configuration --- arg="\$@" ${HOMEgfs}/sorc/ufs_model.fd/tests/${model}.x \$arg @@ -79,27 +85,10 @@ case "${machineid}" in ursa) cat > $link_model_script << EOF_URSA #!/bin/bash -# --- MPI and Fabric Configuration --- -# 1. Force Intel MPI to use Slurm's PMI2 library for job startup -# for Ursa -#export I_MPI_PMI_LIBRARY=/apps/slurm/default/lib/libpmi2.so - -# 2. Set the OFI provider to Mellanox InfiniBand -export FI_PROVIDER=mlx - -# 3. Disable problematic shared memory transports in UCX -export UCX_TLS=^sm,cma -# --- End of Configuration --- - -HOST_SLURM_PATH=/apps/slurm/default -HOST_MPI_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2024.2.1-oqhstbmawnrsdw472p4pjsopj547o6xs/compiler/2024.2 - export LD_LIBRARY_PATH=$(dirname ${container}) set +x arg="\$@" singularity exec \\ - --bind \${HOST_SLURM_PATH}:\${HOST_SLURM_PATH} \\ - --bind \${HOST_MPI_PATH}:\${HOST_MPI_PATH} \\ ${bindings} \\ ${container} \\ ${run_model_script} \$arg diff --git a/dev/container/utils/ush.wgrib2 b/dev/container/utils/ush.wgrib2 index 567d7074556..d58873337c7 100644 --- a/dev/container/utils/ush.wgrib2 +++ b/dev/container/utils/ush.wgrib2 @@ -1,9 +1,6 @@ #!/bin/bash -source /usr/lmod/lmod/init/bash -module purge -module use HOMEgfs/modulefiles -module load module_gwsetup.container +source HOMEgfs/dev/ush/load_gw_run_modules.sh module load wgrib2/3.6.0 @@ -47,8 +44,6 @@ while [[ "$#" -gt 0 ]]; do shift # Consume the option/argument done -#/opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/intel/2021.10.0/wgrib2-2.0.8-bq36dgw/bin/wgrib2 $arg - if [[ "$has_ftime" == "true" ]]; then echo "new_arg: $new_arg" if [[ "$has_sets" == "true" ]]; then diff --git a/ush/wave_grib2_sbs.sh b/ush/wave_grib2_sbs.sh index 7ccd284068f..5cc04a861a4 100755 --- a/ush/wave_grib2_sbs.sh +++ b/ush/wave_grib2_sbs.sh @@ -25,6 +25,7 @@ # --------------------------------------------------------------------------- # # 0. Preparations +source ${HOMEgfs}/dev/ush/load_gw_run_modules.sh # Script inputs grdID=$1 diff --git a/ush/wave_grid_interp_sbs.sh b/ush/wave_grid_interp_sbs.sh index ed41b813610..3a249ecb457 100755 --- a/ush/wave_grid_interp_sbs.sh +++ b/ush/wave_grid_interp_sbs.sh @@ -22,6 +22,8 @@ # 0.a Basic modes of operation +source ${HOMEgfs}/dev/ush/load_gw_run_modules.sh + grdID=$1 valid_time=$2 dt=$3 From 79a30379fca85d9716d82784b480f1df08fe95c4 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 3 Sep 2025 14:28:07 +0000 Subject: [PATCH 088/134] upper level need to detect machine as well --- dev/container/utils/link_gfs_utils.sh | 20 +++++++++----------- dev/container/utils/ush.wgrib2 | 5 +---- dev/ush/load_gw_run_modules.sh | 17 +++++++++++++++++ ush/detect_machine.sh | 6 ++++++ 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index 9a805db3576..0d36784c504 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -56,11 +56,13 @@ do cat > $run_model_script << EOF_MODEL #!/bin/bash -source /usr/lmod/lmod/init/bash -module purge -module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles -module load gfsutils_container.intel -module load wgrib2/2.0.8 +#source /usr/lmod/lmod/init/bash +#module purge +#module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles +#module load gfsutils_container.intel + +source "${HOMEgfs}/dev/ush/load_fv3gfs_modules.sh" +module load wgrib2/3.6.0 arg="\$@" ${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${model}.x \$arg @@ -91,13 +93,9 @@ do cat > $direct_model_script << EOF_DIRECT #!/bin/bash -#!/bin/bash -source /usr/lmod/lmod/init/bash -module purge -module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles -module load gfsutils_container.intel -module load wgrib2/2.0.8 +source "${HOMEgfs}/dev/ush/load_fv3gfs_modules.sh" +module load wgrib2/3.6.0 arg="\$@" ${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${nm}.x \$arg diff --git a/dev/container/utils/ush.wgrib2 b/dev/container/utils/ush.wgrib2 index 567d7074556..8933063f57e 100644 --- a/dev/container/utils/ush.wgrib2 +++ b/dev/container/utils/ush.wgrib2 @@ -1,9 +1,6 @@ #!/bin/bash -source /usr/lmod/lmod/init/bash -module purge -module use HOMEgfs/modulefiles -module load module_gwsetup.container +source "HOMEgfs/dev/ush/load_gw_run_modules.sh" module load wgrib2/3.6.0 diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index 7a057b0392b..f6ebf93dffe 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -17,6 +17,23 @@ ulimit_s=$( ulimit -S -s ) source "${HOMEgfs}/ush/preamble.sh" +# Find module command and purge: +source "${HOMEgfs}/ush/detect_machine.sh" +source "${HOMEgfs}/ush/module-setup.sh" + +case "$${MACHINE_ID}" in + container) + source /usr/lmod/lmod/init/bash + ;; + *) + source /apps/lmod/lmod/init/bash + ;; +esac + +module purge +module use ${HOMEgfs}/sorc/ufs_model.fd/modulefiles +module load ufs_${MACHINE_ID}.intel + # If this function exists in the environment, run it; else set -x if it was set on entering this script ftype=$(type -t set_trace || echo "") if [[ "${ftype}" == "function" ]]; then diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index ee6c2c2c79b..c8111d723f4 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -8,6 +8,12 @@ # # Thank you for your contribution +# overwrite MACHINE_ID if in container +if [[ -d /opt/spack-stack && -v SINGULARITY_CONTAINER ]]; then + # We are in a container + MACHINE_ID=container +fi + # If the MACHINE_ID variable is set, skip this script. if [[ -n "${MACHINE_ID:-}" ]]; then return From 81a5a5347c04443a868d193e7eb703c5495283ac Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 3 Sep 2025 17:47:32 +0000 Subject: [PATCH 089/134] C48_ATM containerized GW worked on Ursa with ss192 --- dev/container/utils/link_model.sh | 22 +--------------------- dev/ush/load_gw_run_modules.sh | 6 ++++-- sorc/build_upp.sh | 4 ++-- sorc/link_workflow.sh | 2 +- 4 files changed, 8 insertions(+), 26 deletions(-) diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 7fb5cd6c3e3..1623913f0da 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -52,13 +52,7 @@ rm -f ${run_model_script} cat > $run_model_script << EOF_MODEL #!/bin/bash -# Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD -export OMP_NUM_THREADS=1 - -source /usr/lmod/lmod/init/bash -module purge -module use ${HOMEgfs}/sorc/ufs_model.fd/modulefiles -module load ufs_container.intel +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" arg="\$@" ${HOMEgfs}/sorc/ufs_model.fd/tests/${model}.x \$arg @@ -72,20 +66,6 @@ rm -f ${link_model_script} cat > $link_model_script << EOF_LINK #!/bin/bash -#Need these lines on AWS to run more than one node. -#export I_MPI_DEBUG=10 -#export I_MPI_FABRICS=shm:ofi -#export I_MPI_OFI_PROVIDER=tcp -#export FI_PROVIDER=tcp -#export FI_TCP_IFACE=eth0 - -#For GaeaC6 -#export SINGULARITY_ENABLE_OVERLAY=try -#export SINGULARITY_DISABLE_OVERLAY=yes -#export SINGULARITY_DEBUG=10 -#export SINGULARITY_DEBUG=0 -#unset SINGULARITY_DEBUG - # --- MPI and Fabric Configuration --- # 1. Force Intel MPI to use Slurm's PMI2 library for job startup # for Ursa diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index 21d209c58ce..6fbdf3cb7e6 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -21,12 +21,14 @@ source "${HOMEgfs}/ush/preamble.sh" source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" -case "$${MACHINE_ID}" in +echo "MACHINE_ID: ${MACHINE_ID}" + +case "${MACHINE_ID}" in container) source /usr/lmod/lmod/init/bash ;; *) - source /apps/lmod/lmod/init/bash + # source /apps/lmod/lmod/init/bash ;; esac diff --git a/sorc/build_upp.sh b/sorc/build_upp.sh index 40f5da89337..0aa25024385 100755 --- a/sorc/build_upp.sh +++ b/sorc/build_upp.sh @@ -32,8 +32,8 @@ if [[ "${MACHINE_ID}" == "gaeac6" ]]; then export MACHINE_ID="gaea" fi -cd "${HOMEgfs_}/sorc/ufs_model.fd/FV3/upp/tests" +cd "${HOMEgfs_}/sorc/ufs_model.fd/UFSATM/upp/tests" # shellcheck disable=SC2086 -BUILD_JOBS=${BUILD_JOBS:-8} bash -x "${HOMEgfs_}/sorc/ufs_model.fd/FV3/upp/tests/compile_upp.sh" ${_opts} +BUILD_JOBS=${BUILD_JOBS:-8} bash -x "${HOMEgfs_}/sorc/ufs_model.fd/UFSATM/upp/tests/compile_upp.sh" ${_opts} exit 0 diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index f9d23c7a012..6e1d6c6a42e 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -407,7 +407,7 @@ if [[ -d ufs_model.fd ]]; then if [[ -d upp.fd ]]; then rm -rf upp.fd fi - ${LINK} ufs_model.fd/FV3/upp upp.fd + ${LINK} ufs_model.fd/UFSATM/upp upp.fd fi if [[ -d gsi_enkf.fd ]]; then From b4580dece312b52a5150c9de43698fda07eb928f Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 3 Sep 2025 20:20:44 +0000 Subject: [PATCH 090/134] testing c48-s2sw on ursa --- dev/container/gen-run-cases.sh | 6 +- dev/container/utils/create-atmos-products.sh | 14 +--- dev/container/utils/create-container-links.sh | 8 +- dev/container/utils/gen-wrapper.sh | 8 +- dev/container/utils/link_model.sh | 83 +++++++++++++++++-- dev/container/utils/link_ww3.sh | 10 +-- scripts/exgfs_wave_post_gridded_sbs.sh | 1 + ush/jjob_header.sh | 2 +- ush/wave_grib2_sbs.sh | 8 ++ ush/wave_grid_interp_sbs.sh | 9 ++ 10 files changed, 114 insertions(+), 35 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index b7137efc19b..37b0c1248fc 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -8,8 +8,8 @@ source "${HOMEgfs}/ush/detect_machine.sh" run_with_container=YES casetype=pr - yamllist="C48_ATM" -#yamllist="C48_S2SW" +#yamllist="C48_ATM" + yamllist="C48_S2SW" #yamllist="C48_S2SWA_gefs" #yamllist="C96mx100_S2S" @@ -21,7 +21,7 @@ img=ubuntu22.04-intel-ufs-env-v1.9.2.img if [[ ${MACHINE_ID} = ursa* ]] ; then container=/scratch4/NAGAPE/epic/${USER}/containers/${img} rundir=/scratch3/NAGAPE/epic/${USER}/run - bindings="-B /apps -B /scratch3 -B /scratch4" + bindings="-B /scratch3 -B /scratch4" HPC_ACCOUNT=epic module load rocoto/1.3.7 diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh index cef9a8d0b2c..b06d02b8915 100755 --- a/dev/container/utils/create-atmos-products.sh +++ b/dev/container/utils/create-atmos-products.sh @@ -32,10 +32,10 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then exit -1 fi -echo "HOMEgfs: $HOMEgfs" -echo "container: $container" -echo "bindings: $bindings" -echo "Verbose: $verbose" +# echo "HOMEgfs: $HOMEgfs" +# echo "container: $container" +# echo "bindings: $bindings" +# echo "Verbose: $verbose" sed -e "s?HOMEgfs?${HOMEgfs}?g" \ -e "s?SIF?${container}?g" \ @@ -43,9 +43,3 @@ sed -e "s?HOMEgfs?${HOMEgfs}?g" \ ${HOMEgfs}/dev/container/atmos_products/exec.exglobal_atmos_products.sh > ${HOMEgfs}/exec/exglobal_atmos_products.sh chmod +x ${HOMEgfs}/exec/exglobal_atmos_products.sh -#for fl in exglobal_atmos_products.sh interp_atmos_master.sh interp_atmos_sflux.sh run_mpmd.sh -#do -# cp ${HOMEgfs}/dev/container/atmos_products/${fl} ${HOMEgfs}/ush/container/. -# chmod +x ${HOMEgfs}/ush/container/${fl} -#done - diff --git a/dev/container/utils/create-container-links.sh b/dev/container/utils/create-container-links.sh index 3fa1da687a2..4355dbfb391 100755 --- a/dev/container/utils/create-container-links.sh +++ b/dev/container/utils/create-container-links.sh @@ -32,10 +32,10 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then exit -1 fi -echo "HOMEgfs: $HOMEgfs" -echo "container: $container" -echo "bindings: $bindings" -echo "Verbose: $verbose" +# echo "HOMEgfs: $HOMEgfs" +# echo "container: $container" +# echo "bindings: $bindings" +# echo "Verbose: $verbose" ${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gfs ${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t sfs diff --git a/dev/container/utils/gen-wrapper.sh b/dev/container/utils/gen-wrapper.sh index d65bbc5bd51..b3701a45134 100755 --- a/dev/container/utils/gen-wrapper.sh +++ b/dev/container/utils/gen-wrapper.sh @@ -32,10 +32,10 @@ if [[ ! -v HOMEgfs || ! -v container ]]; then exit -1 fi -echo "HOMEgfs: $HOMEgfs" -echo "container: $container" -echo "bindings: $bindings" -echo "Verbose: $verbose" +# echo "HOMEgfs: $HOMEgfs" +# echo "container: $container" +# echo "bindings: $bindings" +# echo "Verbose: $verbose" if [[ "$verbose" == "true" ]]; then set -x diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 1623913f0da..220bba4014c 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -2,6 +2,7 @@ verbose=false bindings="-B /scratch3 -B /scratch4" +machineid="ursa" while [ "$#" -gt 0 ]; do case "$1" in @@ -21,6 +22,10 @@ while [ "$#" -gt 0 ]; do model="$2" shift 2 ;; + -M|--MACHINE_ID) + machineid="$2" + shift 2 + ;; -v|--verbose) verbose=true shift @@ -32,15 +37,16 @@ while [ "$#" -gt 0 ]; do esac done -if [[ ! -v HOMEgfs || ! -v container || ! -v model ]]; then +if [[ ! -v HOMEgfs || ! -v container || ! -v model || ! -v MACHINE_ID ]]; then echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image \\" - echo " -m/--model name_model -b/--bindings -B dirname [-B dirname1 [...]] [-v]" + echo " -m/--model name_model -M/MACHINE_ID MACHINE_ID -b/--bindings [...]] [-v]" exit -1 fi #echo "HOMEgfs: $HOMEgfs" #echo "model: $model" #echo "Verbose: $verbose" +#echo "machineid: $machineid" if [[ "$verbose" == "true" ]]; then set -x @@ -58,12 +64,12 @@ arg="\$@" ${HOMEgfs}/sorc/ufs_model.fd/tests/${model}.x \$arg EOF_MODEL -chmod 755 $run_model_script - link_model_script=${HOMEgfs}/exec/${model}.x rm -f ${link_model_script} -cat > $link_model_script << EOF_LINK +case "${machineid}" in + ursa) +cat > $link_model_script << EOF_URSA #!/bin/bash # --- MPI and Fabric Configuration --- @@ -78,14 +84,75 @@ export FI_PROVIDER=mlx export UCX_TLS=^sm,cma # --- End of Configuration --- +HOST_SLURM_PATH=/apps/slurm/default +HOST_MPI_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2024.2.1-oqhstbmawnrsdw472p4pjsopj547o6xs/compiler/2024.2/opt/compiler + + export LD_LIBRARY_PATH=$(dirname ${container}) + set +x + arg="\$@" + singularity exec \\ + --bind \${HOST_SLURM_PATH}:\${HOST_SLURM_PATH} \\ + --bind \${HOST_MPI_PATH}:\${HOST_MPI_PATH} \\ + ${bindings} \\ + ${container} \\ + ${run_model_script} \$arg +EOF_URSA + ;; + + gaea*) +cat > $link_model_script << EOF_GAEA +#!/bin/bash +#export SINGULARITY_ENABLE_OVERLAY=try +#export SINGULARITY_DISABLE_OVERLAY=yes +#export SINGULARITY_DEBUG=10 +#export SINGULARITY_DEBUG=0 +#unset SINGULARITY_DEBUG + export LD_LIBRARY_PATH=$(dirname ${container}) set +x arg="\$@" singularity exec \\ - ${bindings} \\ - ${container} \\ - ${run_model_script} \$arg + ${bindings} \\ + ${container} \\ + ${run_model_script} \$arg +EOF_GAEA + ;; + + noaacloud) +cat > $link_model_script << EOF_NOAACLOUD +#!/bin/bash + +#Need these lines on AWS to run more than one node. +#export I_MPI_DEBUG=10 + export I_MPI_FABRICS=shm:ofi + export I_MPI_OFI_PROVIDER=tcp + export FI_PROVIDER=tcp + export FI_TCP_IFACE=eth0 + + export LD_LIBRARY_PATH=$(dirname ${container}) + set +x + arg="\$@" + singularity exec \\ + ${bindings} \\ + ${container} \\ + ${run_model_script} \$arg +EOF_NOAACLOUD + ;; + + *) +cat > $link_model_script << EOF_LINK +#!/bin/bash + export LD_LIBRARY_PATH=$(dirname ${container}) + set +x + arg="\$@" + singularity exec \\ + ${bindings} \\ + ${container} \\ + ${run_model_script} \$arg EOF_LINK + ;; +esac +chmod 755 $run_model_script chmod 755 $link_model_script diff --git a/dev/container/utils/link_ww3.sh b/dev/container/utils/link_ww3.sh index 77c68b7b5c3..6741da703a1 100755 --- a/dev/container/utils/link_ww3.sh +++ b/dev/container/utils/link_ww3.sh @@ -37,11 +37,11 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v type ]]; then exit -1 fi -echo "HOMEgfs: $HOMEgfs" -echo "container: $container" -echo "bindings: $bindings" -echo "type: $type" -echo "Verbose: $verbose" +# echo "HOMEgfs: $HOMEgfs" +# echo "container: $container" +# echo "bindings: $bindings" +# echo "type: $type" +# echo "Verbose: $verbose" if [[ "$verbose" == "true" ]]; then set -x diff --git a/scripts/exgfs_wave_post_gridded_sbs.sh b/scripts/exgfs_wave_post_gridded_sbs.sh index 99eefda4fc9..5bf562986ea 100755 --- a/scripts/exgfs_wave_post_gridded_sbs.sh +++ b/scripts/exgfs_wave_post_gridded_sbs.sh @@ -21,6 +21,7 @@ # ############################################################################### +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" source "${USHgfs}/wave_domain_grid.sh" DOGRI_WAV=${DOGRI_WAV:-"NO"} # Interpolate to a grid diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index 5de17f73a49..0c56cf16ac1 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -41,7 +41,7 @@ # [default: $$] _calling_script="${BASH_SOURCE[1]}" -source "${HOMEgfs}/ush/preamble.sh" +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" OPTIND=1 while getopts "c:e:" option; do diff --git a/ush/wave_grib2_sbs.sh b/ush/wave_grib2_sbs.sh index 7ccd284068f..d08afd37d85 100755 --- a/ush/wave_grib2_sbs.sh +++ b/ush/wave_grib2_sbs.sh @@ -25,6 +25,14 @@ # --------------------------------------------------------------------------- # # 0. Preparations +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" + +if [[ ${MACHINE_ID} == container ]] ; then + # We are in a container + export PATH=${PATH}:~/prod-util-2.1.1/bin +else + module load prod_util +fi # Script inputs grdID=$1 diff --git a/ush/wave_grid_interp_sbs.sh b/ush/wave_grid_interp_sbs.sh index ed41b813610..0ad34e4ca65 100755 --- a/ush/wave_grid_interp_sbs.sh +++ b/ush/wave_grid_interp_sbs.sh @@ -22,6 +22,15 @@ # 0.a Basic modes of operation +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" + +if [[ ${MACHINE_ID} == container ]] ; then + # We are in a container + export PATH=${PATH}:~/prod-util-2.1.1/bin +else + module load prod_util +fi + grdID=$1 valid_time=$2 dt=$3 From 99c08a3110cc88eefe3730420169b395fff984b6 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 4 Sep 2025 14:36:50 +0000 Subject: [PATCH 091/134] more test on ursa for gefs --- dev/container/utils/ush.python | 10 ++++++---- scripts/exglobal_atmos_products.sh | 2 -- ush/atmos_ensstat.sh | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/dev/container/utils/ush.python b/dev/container/utils/ush.python index 500623ccd11..e8cad4b60b9 100644 --- a/dev/container/utils/ush.python +++ b/dev/container/utils/ush.python @@ -2,19 +2,21 @@ source /usr/lmod/lmod/init/bash module purge -module use HOMEgfs/modulefiles -module load gw_setup.container +module use HOMEgfs/sorc/gfs_utils.fd/modulefiles +module load gfsutils_container.intel -module list +#module list module load python module load py-netcdf4 module load py-xarray module load py-f90nml module load py-numpy +module load py-jinja2 +module load py-pyyaml # module load py-python-dateutil/2.8.2 -module list +#module list wxflowPATH="HOMEgfs/ush/python:HOMEgfs/sorc/wxflow/src" export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}HOMEgfs/ush:${wxflowPATH}" diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 285d2cb3f04..3b08bcf415a 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -1,7 +1,5 @@ #! /usr/bin/env bash -source "${HOMEgfs}/ush/preamble.sh" - source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" # Scripts used diff --git a/ush/atmos_ensstat.sh b/ush/atmos_ensstat.sh index b0dd881edde..a0551c2f23e 100755 --- a/ush/atmos_ensstat.sh +++ b/ush/atmos_ensstat.sh @@ -1,5 +1,7 @@ #! /usr/bin/env bash +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" + grid=${1} fhr3=${2} grid_type=${3:-''} From f3ab33b4245324ffe0b8cc1e4ce3fa50ec3d6268 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 4 Sep 2025 11:34:25 -0400 Subject: [PATCH 092/134] add machine-id to link_model.sh --- dev/container/gen-run-cases.sh | 8 ++++---- dev/container/utils/create-container-links.sh | 14 +++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index 64873c7fd71..81c5cf21f0a 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -1,6 +1,6 @@ #!/bin/bash -set +x +set -x HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" source "${HOMEgfs}/ush/detect_machine.sh" @@ -8,8 +8,8 @@ source "${HOMEgfs}/ush/detect_machine.sh" run_with_container=YES casetype=pr -#yamllist="C48_ATM" - yamllist="C48_S2SW" + yamllist="C48_ATM" +#yamllist="C48_S2SW" #yamllist="C48_S2SWA_gefs" #yamllist="C96mx100_S2S" @@ -65,7 +65,7 @@ if [[ "${run_with_container}" == "YES" ]]; then -v -R ${HOMEDIR}/dev/container/utils/create-atmos-products.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" - ${HOMEDIR}/dev/container/utils/create-container-links.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" + ${HOMEDIR}/dev/container/utils/create-container-links.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -M ${MACHINE_ID} else TOPICDIR=${TOPICDIR} \ RUNTESTS=${rundir} \ diff --git a/dev/container/utils/create-container-links.sh b/dev/container/utils/create-container-links.sh index 4355dbfb391..0c5518f17cc 100755 --- a/dev/container/utils/create-container-links.sh +++ b/dev/container/utils/create-container-links.sh @@ -20,6 +20,10 @@ while [ "$#" -gt 0 ]; do verbose=true shift ;; + -M|--MACHINE_ID) + machineid="$2" + shift 2 + ;; *) echo "Unknown option: $1" exit 1 @@ -27,8 +31,8 @@ while [ "$#" -gt 0 ]; do esac done -if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then - echo "Usage: create-container-links.sh -H/--HOMEgfs gw-home-dir -c/--container container-fullpath -b/--bindings list-of-binding-dirs [-v]" +if [[ ! -v HOMEgfs || ! -v container || ! -v bindings || ! -v MACHINE_ID ]]; then + echo "Usage: create-container-links.sh -H/--HOMEgfs gw-home-dir -c/--container container-fullpath -b/--bindings -M|--MACHINE_ID list-of-binding-dirs [-v]" exit -1 fi @@ -41,9 +45,9 @@ ${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${b ${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t sfs ${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gefs -${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m gfs_model -b "${bindings}" -${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m sfs_model -b "${bindings}" -${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m gefs_model -b "${bindings}" +${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m gfs_model -b "${bindings}" -M ${machineid} +${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m sfs_model -b "${bindings}" -M ${machineid} +${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m gefs_model -b "${bindings}" -M ${machineid} ${HOMEgfs}/dev/container/utils/link_gfs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" ${HOMEgfs}/dev/container/utils/link_ufs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" From 003b39b599bb9260eeade8da2cad028b9c35f12e Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 5 Sep 2025 10:33:24 -0400 Subject: [PATCH 093/134] update CDMBWD to CDMBGWD --- ush/parsing_namelists_FV3.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/parsing_namelists_FV3.sh b/ush/parsing_namelists_FV3.sh index b8d33f2b401..24ab9ad410d 100755 --- a/ush/parsing_namelists_FV3.sh +++ b/ush/parsing_namelists_FV3.sh @@ -240,7 +240,7 @@ local BETASCU=${betascu:-"8.0"} local BETAMCU=${betamcu:-"1.0"} local BETADCU=${betadcu:-"2.0"} local RAS=${ras:-".false."} -local CDMBWD=${cdmbgwd:-"3.5,0.25"} +local CDMBGWD=${cdmbgwd:-"3.5,0.25"} local PSL_GWD_DX_FACTOR=${psl_gwd_dx_factor:-"6.0"} local PRSLRD0=${prslrd0:-"0."} local IVEGSRC=${ivegsrc:-"1"} From 11ddfb2c3bc2834c0f3278a5bf029932f6d074f6 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 5 Sep 2025 15:04:48 +0000 Subject: [PATCH 094/134] WM using UFSATM instead of FV3 --- dev/container/gen-run-cases.sh | 10 +++++----- dev/container/shell-in-container.sh | 5 ++--- ush/forecast_predet.sh | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index 81c5cf21f0a..d441a083026 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -19,7 +19,7 @@ run_with_container=YES HOMEDIR=${HOMEgfs} img=ubuntu22.04-intel-ufs-env-v1.9.2.img if [[ ${MACHINE_ID} = ursa* ]] ; then - container=/scratch4/NAGAPE/epic/${USER}/containers/${img} + container=/scratch3/NCEPDEV/nems/role.epic/containers/${img} rundir=/scratch3/NAGAPE/epic/${USER}/run bindings="-B /scratch3 -B /scratch4" HPC_ACCOUNT=epic @@ -27,7 +27,7 @@ if [[ ${MACHINE_ID} = ursa* ]] ; then module load rocoto/1.3.7 rocotocmd=`which rocotorun` elif [[ ${MACHINE_ID} = gaea* ]] ; then - container=/gpfs/f6/scratch/${USER}/container/${img} + container=/gpfs/f6/scratch/Wei.Huang/container/${img} rundir=/gpfs/f6/scratch/${USER}/run bindings="-B /gpfs/f6/scratch -B /ncrc/home1/${USER}" HPC_ACCOUNT=bil-fire8 @@ -35,9 +35,9 @@ elif [[ ${MACHINE_ID} = gaea* ]] ; then rocotocmd=/autofs/ncrc-svm1_home2/Christopher.W.Harrop/rocoto-1.3.7/bin/rocotorun elif [[ ${MACHINE_ID} = noaacloud* ]] ; then TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR - container=/contrib/${USER}/container/${img} - rundir=/lustre/${USER}/ss191/run - STMP=/lustre/${USER}/ss191/stmp + container=/contrib/container/${img} + rundir=/lustre/${USER}/run + STMP=/lustre/${USER}/stmp bindings="--env \"I_MPI_FABRICS=ofi:shm,I_MPI_DEBUG=6\" -B /apps/slurm/default/lib/libpmi2.so -B /contrib -B /lustre -B /bucket" HPC_ACCOUNT=${USER} diff --git a/dev/container/shell-in-container.sh b/dev/container/shell-in-container.sh index 94ba9f65cd3..3cb6b0614b0 100755 --- a/dev/container/shell-in-container.sh +++ b/dev/container/shell-in-container.sh @@ -4,17 +4,16 @@ set -x HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" source "${HOMEgfs}/ush/detect_machine.sh" -#sif=ubuntu22.04-intel-ufs-env-v1.6.0.img sif=ubuntu22.04-intel-ufs-env-v1.9.2.img if [[ ${MACHINE_ID} = ursa* ]] ; then - img=/scratch4/NAGAPE/epic/${USER}/containers/${sif} + img=/scratch3/NCEPDEV/nems/role.epic/containers/${sif} bindings="-B /scratch3 -B /scratch4" elif [[ ${MACHINE_ID} = gaea* ]] ; then img=/gpfs/f6/scratch/${USER}/container/${sif} bindings="-B /gpfs/f6/scratch -B /ncrc/home1/${USER}" elif [[ ${MACHINE_ID} = noaacloud* ]] ; then - img=/contrib/${USER}/container/${sif} + img=/contrib/containers/${sif} bindings="-B /contrib -B /lustre -B /bucket" fi diff --git a/ush/forecast_predet.sh b/ush/forecast_predet.sh index 6595b6b8a13..70af1aa34c6 100755 --- a/ush/forecast_predet.sh +++ b/ush/forecast_predet.sh @@ -274,7 +274,7 @@ FV3_predet(){ # <0 means older adiabatic pre-conditioning na_init=${na_init:-1} - local suite_file="${HOMEgfs}/sorc/ufs_model.fd/FV3/ccpp/suites/suite_${CCPP_SUITE}.xml" + local suite_file="${HOMEgfs}/sorc/ufs_model.fd/UFSATM/ccpp/suites/suite_${CCPP_SUITE}.xml" if [[ ! -f "${suite_file}" ]]; then echo "FATAL ERROR: CCPP Suite file ${suite_file} does not exist, ABORT!" exit 2 From fd711e8e4d023a75017028ec7bcde337444766f9 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 9 Sep 2025 15:13:30 +0000 Subject: [PATCH 095/134] do not need use prod_util from home now --- dev/ush/load_gw_run_modules.sh | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index 6fbdf3cb7e6..9a0c693d895 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -58,22 +58,9 @@ source "${HOMEgfs}/ush/module-setup.sh" #if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then if [[ "$MACHINE_ID" == "container" ]]; then - # if [[ ! -d ~/prod-util-2.1.1 ]]; then - # cp -r $prod_util_ROOT ~/prod-util-2.1.1 - # fi - - #if [[ "$PATH" =~ "prod-util" ]]; then - export PATH=~/prod-util-2.1.1/bin:$PATH - #fi - export FSYNC=~/prod-util-2.1.1/bin/fsync_file - export MDATE=~/prod-util-2.1.1/bin/mdate - export NDATE=~/prod-util-2.1.1/bin/ndate - export NHOUR=~/prod-util-2.1.1/bin/nhour - source /usr/lmod/lmod/init/bash module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel - module load wgrib2 else # Source versions file for runtime source "${HOMEgfs}/versions/run.ver" @@ -104,10 +91,10 @@ else echo "FATAL ERROR: Failed to load gw_run.${MACHINE_ID}" exit 1 fi - - module load wgrib2 - module load prod_util fi + +module load wgrib2 +module load prod_util export WGRIB2=wgrib2 # Turn on our settings From 8a00789265decee3a4cbcd72c146feda9e30779e Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 9 Sep 2025 20:15:00 +0000 Subject: [PATCH 096/134] add cpfs and cpreq to bash_utils --- ush/bash_utils.sh | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/ush/bash_utils.sh b/ush/bash_utils.sh index fc69a79131a..e27de09b942 100755 --- a/ush/bash_utils.sh +++ b/ush/bash_utils.sh @@ -109,6 +109,44 @@ function wait_for_file() { set_trace return 1 } + +function cpreq() { + cp $* + if [ $? -ne 0 ] ; then + err_exit "'cp $*' was not successful." + fi +} + +function cpfs() { + if [ $# -ne 2 ]; then + echo "This script requires two arguments: a source file and a destination file path." + exit 16 + fi + + if [ "$2" = '.' -o "$2" = './' ]; then + cpdstfile=${PWD:?}/$(basename $1) + elif [ -d $2 ]; then + cpdstfile=${2%/}/$(basename $1) + else + cpdstfile=$2 + fi + + cp $1 $cpdstfile.cptmp + + if [ $? -ne 0 ] ; then + err_exit "$1 is missing or was not copied successfully." + fi + + #${FSYNC} $cpdstfile.cptmp + #if [ $? -ne 0 ]; then + # >&2 echo "WARNING: ${FSYNC} $cpdstfile.cptmp failed." + #fi + + mv $cpdstfile.cptmp $cpdstfile + if [ $? -ne 0 ] ; then + err_exit "$cpdstfile.cptmp is missing or was not moved successfully." + fi +} # shellcheck disable= From aab3b8a19ae280c7a9811ff626edb0406a809b28 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 9 Sep 2025 22:45:42 +0000 Subject: [PATCH 097/134] remove redundent code --- dev/container/utils/link_ww3.sh | 8 ++++---- ush/wave_grib2_sbs.sh | 7 ------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/dev/container/utils/link_ww3.sh b/dev/container/utils/link_ww3.sh index 6741da703a1..f3bd4acdc41 100755 --- a/dev/container/utils/link_ww3.sh +++ b/dev/container/utils/link_ww3.sh @@ -47,11 +47,11 @@ if [[ "$verbose" == "true" ]]; then set -x fi -if [[ "$type" == "gfs" ]]; then +#if [[ "$type" == "gfs" ]]; then pdlib=pdlib_ON -else - pdlib=pdlib_OFF -fi +#else +# pdlib=pdlib_OFF +#fi for nm in gint grib grid ounf ounp outf outp prep prnc do diff --git a/ush/wave_grib2_sbs.sh b/ush/wave_grib2_sbs.sh index d08afd37d85..fb1e20b86df 100755 --- a/ush/wave_grib2_sbs.sh +++ b/ush/wave_grib2_sbs.sh @@ -27,13 +27,6 @@ source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" -if [[ ${MACHINE_ID} == container ]] ; then - # We are in a container - export PATH=${PATH}:~/prod-util-2.1.1/bin -else - module load prod_util -fi - # Script inputs grdID=$1 GRIDNR=$2 From db7855c68e77d314941cf0cfa1dc43de32a89cd0 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 9 Sep 2025 22:47:41 +0000 Subject: [PATCH 098/134] remove redundent code --- ush/wave_grid_interp_sbs.sh | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ush/wave_grid_interp_sbs.sh b/ush/wave_grid_interp_sbs.sh index 0ad34e4ca65..d51863896a6 100755 --- a/ush/wave_grid_interp_sbs.sh +++ b/ush/wave_grid_interp_sbs.sh @@ -24,13 +24,6 @@ source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" -if [[ ${MACHINE_ID} == container ]] ; then - # We are in a container - export PATH=${PATH}:~/prod-util-2.1.1/bin -else - module load prod_util -fi - grdID=$1 valid_time=$2 dt=$3 From 0f5e6b568677b6acf4b0a5b5e02334f7c6313c69 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 10 Sep 2025 15:04:58 +0000 Subject: [PATCH 099/134] add 2 versions files for container --- versions/build.container.ver | 13 +++++++++++++ versions/run.container.ver | 15 +++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 versions/build.container.ver create mode 100644 versions/run.container.ver diff --git a/versions/build.container.ver b/versions/build.container.ver new file mode 100644 index 00000000000..8ec869379f6 --- /dev/null +++ b/versions/build.container.ver @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +export stack_compiler=oneapi +export stack_compiler_ver=2024.2.0 + +export stack_mpi=intel-oneapi-mpi +export stack_mpi_ver=2021.13 + +# gnu_ver set to override 14.2.0 default set in UFS-WM noaacloud module file +export gnu_ver=13.2.0 + +spack_stack_root="/opt/spack-stack/spack-stack-1.9.2" + +source "${HOMEgfs:-}/versions/spack.ver" diff --git a/versions/run.container.ver b/versions/run.container.ver new file mode 100644 index 00000000000..f71efc41ed1 --- /dev/null +++ b/versions/run.container.ver @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +export stack_compiler=oneapi +export stack_compiler_ver=2024.2.0 + +export stack_mpi=intel-oneapi-mpi +export stack_mpi_ver=2021.13 + +spack_stack_root="/opt/spack-stack//spack-stack-1.9.2" + +source "${HOMEgfs:-}/versions/spack.ver" + +export cdo_ver=2.3.0 +export perl_ver=5.38.0 +export mkl_ver=2024.2.1 +export imagemagick_ver=7.1.1-11 From 6922c860a0345dfb643c419fabf79c479d1da728 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 11 Sep 2025 21:34:28 +0000 Subject: [PATCH 100/134] simplifying and sync --- dev/container/gen-run-cases.sh | 15 +- .../utils/compile-gw-in-container.sh | 4 +- dev/container/utils/create-atmos-products.sh | 12 +- dev/container/utils/create-container-links.sh | 12 +- dev/container/utils/gen-wrapper.sh | 2 +- dev/container/utils/link_gfs_utils.sh | 2 +- dev/container/utils/link_model.sh | 2 +- dev/container/utils/link_ufs_utils.sh | 2 +- dev/container/utils/link_ww3.sh | 2 +- dev/workflow/rocoto/workflow_xml.py | 15 +- dev/workflow/setup_expt.py | 4 - dev/workflow/setup_xml.py | 3 +- env/GAEAC6.env | 18 +- env/GAEAC6.env.container | 362 ++++++++++++++++++ env/URSA.env | 6 - env/URSA.env.container | 332 ++++++++++++++++ scripts/exglobal_forecast.sh | 4 - ush/preamble.sh | 2 +- ush/python/pygfs/task/oceanice_products.py | 2 - 19 files changed, 749 insertions(+), 52 deletions(-) create mode 100755 env/GAEAC6.env.container mode change 100755 => 100644 env/URSA.env create mode 100755 env/URSA.env.container diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index d441a083026..285be926df4 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -5,11 +5,11 @@ set -x HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" source "${HOMEgfs}/ush/detect_machine.sh" -run_with_container=YES +run_with_container="YES" - casetype=pr - yamllist="C48_ATM" -#yamllist="C48_S2SW" + casetype="pr" +#yamllist="C48_ATM" + yamllist="C48_S2SW" #yamllist="C48_S2SWA_gefs" #yamllist="C96mx100_S2S" @@ -26,6 +26,10 @@ if [[ ${MACHINE_ID} = ursa* ]] ; then module load rocoto/1.3.7 rocotocmd=`which rocotorun` + + if [[ "${run_with_container}" == "YES" ]]; then + cp ${HOMEgfs}/env/URSA.env.container ${HOMEgfs}/env/URSA.env + fi elif [[ ${MACHINE_ID} = gaea* ]] ; then container=/gpfs/f6/scratch/Wei.Huang/container/${img} rundir=/gpfs/f6/scratch/${USER}/run @@ -33,6 +37,9 @@ elif [[ ${MACHINE_ID} = gaea* ]] ; then HPC_ACCOUNT=bil-fire8 rocotocmd=/autofs/ncrc-svm1_home2/Christopher.W.Harrop/rocoto-1.3.7/bin/rocotorun + if [[ "${run_with_container}" == "YES" ]]; then + cp ${HOMEgfs}/env/GAEAC6.env.container ${HOMEgfs}/env/GAEAC6.env + fi elif [[ ${MACHINE_ID} = noaacloud* ]] ; then TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR container=/contrib/container/${img} diff --git a/dev/container/utils/compile-gw-in-container.sh b/dev/container/utils/compile-gw-in-container.sh index 4f8c56a48b9..fa3bfd82dc0 100755 --- a/dev/container/utils/compile-gw-in-container.sh +++ b/dev/container/utils/compile-gw-in-container.sh @@ -1,7 +1,7 @@ #!/bin/bash gw_sorc_dir=$1 - cd ${gw_sorc_dir} + cd "${gw_sorc_dir}" || exit -1 ./build_all.sh gfs sfs gefs - ./link_workflow.sh +#./link_workflow.sh diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh index f4270c1948a..6ccce9ac298 100755 --- a/dev/container/utils/create-atmos-products.sh +++ b/dev/container/utils/create-atmos-products.sh @@ -2,7 +2,7 @@ verbose=false -while [ "$#" -gt 0 ]; do +while [[ "$#" -gt 0 ]]; do case "$1" in -H|--HOMEgfs) HOMEgfs="$2" @@ -32,10 +32,12 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then exit -1 fi -# echo "HOMEgfs: $HOMEgfs" -# echo "container: $container" -# echo "bindings: $bindings" -# echo "Verbose: $verbose" +if [[ "${verbose}" == "true" ]]; then + echo "Verbose: $verbose" + echo "HOMEgfs: $HOMEgfs" + echo "container: $container" + echo "bindings: $bindings" +fi sed -e "s?HOMEgfs?${HOMEgfs}?g" \ -e "s?SIF?${container}?g" \ diff --git a/dev/container/utils/create-container-links.sh b/dev/container/utils/create-container-links.sh index 0c5518f17cc..5e2ab15b4e1 100755 --- a/dev/container/utils/create-container-links.sh +++ b/dev/container/utils/create-container-links.sh @@ -2,7 +2,7 @@ verbose=false -while [ "$#" -gt 0 ]; do +while [[ "$#" -gt 0 ]]; do case "$1" in -H|--HOMEgfs) HOMEgfs="$2" @@ -36,10 +36,12 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v bindings || ! -v MACHINE_ID ]]; the exit -1 fi -# echo "HOMEgfs: $HOMEgfs" -# echo "container: $container" -# echo "bindings: $bindings" -# echo "Verbose: $verbose" +if [[ "${verbose}" == "true" ]]; then + echo "HOMEgfs: $HOMEgfs" + echo "container: $container" + echo "bindings: $bindings" + echo "Verbose: $verbose" +fi ${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gfs ${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t sfs diff --git a/dev/container/utils/gen-wrapper.sh b/dev/container/utils/gen-wrapper.sh index b3701a45134..3cd276788f3 100755 --- a/dev/container/utils/gen-wrapper.sh +++ b/dev/container/utils/gen-wrapper.sh @@ -2,7 +2,7 @@ verbose=false -while [ "$#" -gt 0 ]; do +while [[ "$#" -gt 0 ]]; do case "$1" in -H|--HOMEgfs) HOMEgfs="$2" diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index 7605989e44b..3eb6d9b1115 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -2,7 +2,7 @@ verbose=false -while [ "$#" -gt 0 ]; do +while [[ "$#" -gt 0 ]]; do case "$1" in -H|--HOMEgfs) HOMEgfs="$2" diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 8d8d025ccd2..f682fcada65 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -4,7 +4,7 @@ verbose=false bindings="-B /scratch3 -B /scratch4" machineid="ursa" -while [ "$#" -gt 0 ]; do +while [[ "$#" -gt 0 ]]; do case "$1" in -H|--HOMEgfs) HOMEgfs="$2" diff --git a/dev/container/utils/link_ufs_utils.sh b/dev/container/utils/link_ufs_utils.sh index ebc7ecebd6b..04afded7761 100755 --- a/dev/container/utils/link_ufs_utils.sh +++ b/dev/container/utils/link_ufs_utils.sh @@ -2,7 +2,7 @@ verbose=false -while [ "$#" -gt 0 ]; do +while [[ "$#" -gt 0 ]]; do case "$1" in -H|--HOMEgfs) HOMEgfs="$2" diff --git a/dev/container/utils/link_ww3.sh b/dev/container/utils/link_ww3.sh index f3bd4acdc41..dd1139654c1 100755 --- a/dev/container/utils/link_ww3.sh +++ b/dev/container/utils/link_ww3.sh @@ -2,7 +2,7 @@ verbose=false -while [ "$#" -gt 0 ]; do +while [[ "$#" -gt 0 ]]; do case "$1" in -H|--HOMEgfs) HOMEgfs="$2" diff --git a/dev/workflow/rocoto/workflow_xml.py b/dev/workflow/rocoto/workflow_xml.py index 7eda64c7d9a..72ac83b251c 100644 --- a/dev/workflow/rocoto/workflow_xml.py +++ b/dev/workflow/rocoto/workflow_xml.py @@ -158,10 +158,19 @@ def _write_crontab(self, crontab_file: str = None, cronint: int = 5) -> None: # No point creating a crontab if rocotorun is not available. rocotorun = which('rocotorun') if rocotorun is None: - print('Failed to find rocotorun, crontab will not be created') - return + try: + if ('rocotorun' in self.rocoto_config.keys()): + rocotoruncmd = self.rocoto_config['rocotorun'] + else: + rocotoruncmd = '/apps/rocoto/default/bin/rocotorun' + os.path.exists(rocotoruncmd) + except Exception as ee: + raise Exception("Could not find the rocotorun executable. Make sure you have the module loaded!: ") from ee - rocotoruncmd = rocotorun.command + version = rocotoruncmd.split('/')[-3] + else: + version = rocotorun("--version", output=str, error=str).split()[-1].strip() + rocotoruncmd = rocotorun.command rocotorunstr = f'{rocotoruncmd} -d {self.expdir}/{self.pslot}.db -w {self.expdir}/{self.pslot}.xml' cronintstr = f'*/{cronint} * * * *' diff --git a/dev/workflow/setup_expt.py b/dev/workflow/setup_expt.py index 14b6d488df9..fed0e5cdaa1 100755 --- a/dev/workflow/setup_expt.py +++ b/dev/workflow/setup_expt.py @@ -84,15 +84,11 @@ def _update_defaults(dict_in: dict) -> dict: # Combine host.info and inputs_dict into a single dict, add some additional keys host_plus_inputs_dict = AttrDict(host.info, **inputs_dict_remapped) - host_plus_inputs_dict.HOMEgfs = _top host_plus_inputs_dict.MACHINE = str(host).upper() # Read in the YAML file yaml_path = inputs.yaml - - if yaml_path.find('/opt/global-workflow-cloud') >= 0: - yaml_path = yaml_path.replace('/opt/global-workflow-cloud', host_plus_inputs_dict.HOMEgfs) if not os.path.exists(yaml_path): raise FileNotFoundError(f'YAML file does not exist, check path: {yaml_path}') yaml_dict = parse_j2yaml(yaml_path, host_plus_inputs_dict) diff --git a/dev/workflow/setup_xml.py b/dev/workflow/setup_xml.py index 7a255108a7c..a6b40690281 100755 --- a/dev/workflow/setup_xml.py +++ b/dev/workflow/setup_xml.py @@ -101,7 +101,8 @@ def main(*argv): check_expdir(user_inputs.expdir, base['EXPDIR']) # Check if "HOMEDIR","STMP","PTMP" dirrctories are writable - dir_keys = ["HOMEDIR", "STMP", "PTMP"] + #dir_keys = ["HOMEDIR", "STMP", "PTMP"] + dir_keys = ["STMP", "PTMP"] for dk in dir_keys: check_dir_writable(base[dk]) if not check_dir_writable(base[dk]): diff --git a/env/GAEAC6.env b/env/GAEAC6.env index af829086ceb..4920f15695a 100755 --- a/env/GAEAC6.env +++ b/env/GAEAC6.env @@ -254,16 +254,14 @@ case ${step} in ;; "fcst" | "efcs") - #export launcher="srun --mpi=pmi2 -l --export=ALL" - export launcher="srun --mpi=pmi2 -l --hint=nomultithread --distribution=block:block" - #export OMP_STACKSIZE=1024M - - #export MPICH_COLL_SYNC=MPI_Bcast - #export FI_VERBS_PREFER_XRC=0 - #export FI_CXI_RX_MATCH_MODE=hybrid - #export COMEX_EAGER_THRESHOLD=65536 - #export FI_CXI_RDZV_THRESHOLD=65536 - #export FI_CXI_DEFAULT_CQ_SIZE=1048576 + export OMP_STACKSIZE=1024M + + export MPICH_COLL_SYNC=MPI_Bcast + export FI_VERBS_PREFER_XRC=0 + export FI_CXI_RX_MATCH_MODE=hybrid + export COMEX_EAGER_THRESHOLD=65536 + export FI_CXI_RDZV_THRESHOLD=65536 + export FI_CXI_DEFAULT_CQ_SIZE=1048576 (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) diff --git a/env/GAEAC6.env.container b/env/GAEAC6.env.container new file mode 100755 index 00000000000..af829086ceb --- /dev/null +++ b/env/GAEAC6.env.container @@ -0,0 +1,362 @@ +#! /usr/bin/env bash + +if [[ $# -ne 1 ]]; then + + echo "Must specify an input argument to set runtime environment variables!" + exit 1 + +fi + +step=$1 + +export launcher="srun -l --export=ALL" +export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" + +export OMP_STACKSIZE=2048000 +export NTHSTACK=1024000000 + +ulimit -s unlimited +ulimit -a + +# Calculate common variables +# Check first if the dependent variables are set +if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then + max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) + NTHREADSmax=${threads_per_task:-${max_threads_per_task}} + NTHREADS1=${threads_per_task:-1} + if [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]]; then + NTHREADSmax=${max_threads_per_task} + fi + if [[ ${NTHREADS1} -gt ${max_threads_per_task} ]]; then + NTHREADS1=${max_threads_per_task} + fi + # This may be useful when GaeaC6 is fully ported, so ignore SC warning + # shellcheck disable=SC2034 + APRUN_default="${launcher} -n ${ntasks}" +else + echo "ERROR config.resources must be sourced before sourcing GAEAC6.env" + exit 2 +fi + +case ${step} in + "prep" | "prepbufr") + + export POE="NO" + export BACK="NO" + export sys_tp="GAEAC6" + export launcher_PREP="srun" + ;; + "prep_emissions") + + export APRUN="${APRUN_default}" + ;; + "waveinit" | "waveprep" | "wavepostsbs" | "wavepostbndpnt" | "wavepostpnt" | "wavepostbndpntbll") + + export USE_CFP="YES" + if [[ "${step}" == "waveprep" ]]; then + export MP_PULSE=0 + fi + export wavempexec=${launcher} + export wave_mpmd=${mpmd_opt} + + ;; + "atmanlvar") + + export NTHREADS_ATMANLVAR=${NTHREADSmax} + export APRUN_ATMANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLVAR}" + ;; + "atmanlfv3inc") + + export NTHREADS_ATMANLFV3INC=${NTHREADSmax} + export APRUN_ATMANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLFV3INC}" + ;; + "atmensanlobs") + + export NTHREADS_ATMENSANLOBS=${NTHREADSmax} + export APRUN_ATMENSANLOBS="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLOBS}" + ;; + "atmensanlsol") + + export NTHREADS_ATMENSANLSOL=${NTHREADSmax} + export APRUN_ATMENSANLSOL="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLSOL}" + ;; + "atmensanlletkf") + + export NTHREADS_ATMENSANLLETKF=${NTHREADSmax} + export APRUN_ATMENSANLLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLLETKF}" + ;; + "atmensanlfv3inc") + + export NTHREADS_ATMENSANLFV3INC=${NTHREADSmax} + export APRUN_ATMENSANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLFV3INC}" + ;; + "aeroanlvar") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_AEROANL=${NTHREADSmax} + export APRUN_AEROANL="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANL}" + ;; + "aeroanlgenb") + + export NTHREADS_AEROANLGENB=${NTHREADSmax} + export APRUN_AEROANLGENB="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANLGENB}" + ;; + "prepobsaero") + + export NTHREADS_PREPOBSAERO=${NTHREADS1} + export APRUN_PREPOBSAERO="${APRUN_default} --cpus-per-task=${NTHREADS_PREPOBSAERO}" + ;; + "snowanl") + + export APRUN_CALCFIMS="${launcher} -n 1" + + export NTHREADS_SNOWANL=${NTHREADSmax} + export APRUN_SNOWANL="${APRUN_default} --cpus-per-task=${NTHREADS_SNOWANL}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + ;; + "esnowanl") + + export APRUN_CALCFIMS="${launcher} -n 1" + + export NTHREADS_ESNOWANL=${NTHREADSmax} + export APRUN_ESNOWANL="${APRUN_default} --cpus-per-task=${NTHREADS_ESNOWANL}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + ;; + "marinebmat") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + export APRUN_MARINEBMAT="${APRUN_default}" + ;; + "marineanlvar") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + export APRUN_MARINEANLVAR="${APRUN_default}" + ;; + "marineanlecen") + + export APRUN_MARINEANLECEN="${APRUN_default}" + ;; + "marineanlchkpt") + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_OCNANAL=${NTHREADSmax} + export APRUN_MARINEANLCHKPT="${APRUN_default} --cpus-per-task=${NTHREADS_OCNANAL}" + + export APRUN_MARINEANLOBSSTATS="${launcher} -n 1" + ;; + "anlstat") + + export NTHREADS_ANLSTAT=${NTHREADSmax} + export APRUN_ANLSTAT="${APRUN_default} --cpus-per-task=${NTHREADS_ANLSTAT}" + ;; + "marineanlletkf") + + export NTHREADS_MARINEANLLETKF=${NTHREADSmax} + export APRUN_MARINEANLLETKF="${APRUN_default}" + ;; + "ecen_fv3jedi") + + export NTHREADS_ECEN_FV3JEDI=${NTHREADSmax} + export APRUN_CORRECTION_INCREMENT="${launcher} -n ${ntasks_correction_increment} --cpus-per-task=${NTHREADS_ECEN_FV3JEDI}" + export APRUN_ENSEMBLE_RECENTER="${launcher} -n ${ntasks_ensemble_recenter} --cpus-per-task=${NTHREADS_ECEN_FV3JEDI}" + ;; + "analcalc_fv3jedi") + + export NTHREADS_ANALCALC_FV3JEDI=${NTHREADSmax} + export APRUN_ANALCALC_FV3JEDI="${APRUN_default} --cpus-per-task=${NTHREADS_ANALCALC_FV3JEDI}" + ;; + "anal" | "analcalc") + + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_GSI=${threads_per_task_anal:-${max_threads_per_task}} + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then + export NTHREADS_CALCINC=${max_threads_per_task} + fi + export APRUN_CALCINC="${launcher} \$ncmd --cpus-per-task=${NTHREADS_CALCINC}" + + export NTHREADS_CYCLE=${threads_per_task_cycle:-12} + if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CYCLE=${max_tasks_per_node} + fi + ntasks_cycle=${ntiles:-6} + export APRUN_CYCLE="${launcher} -n ${ntasks_cycle} --cpus-per-task=${NTHREADS_CYCLE}" + + export NTHREADS_GAUSFCANL=1 + ntasks_gausfcanl=${ntasks_gausfcanl:-1} + export APRUN_GAUSFCANL="${launcher} -n ${ntasks_gausfcanl} --cpus-per-task=${NTHREADS_GAUSFCANL}" + ;; + "offlineanl") + + export NTHREADS_CHGRES=${threads_per_task_chgres:-12} + if [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CHGRES=${max_tasks_per_node} + fi + export APRUN_CHGRES="time" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then + export NTHREADS_CALCINC=${max_threads_per_task} + fi + export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" + + ;; + "sfcanl") + + export NTHREADS_CYCLE=${threads_per_task:-14} + if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CYCLE=${max_tasks_per_node} + fi + export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + + # REGRID requires 6 tasks for reproducibility + ntasks_regrid=6 + export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + + ;; + "eobs") + + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + + export NTHREADS_GSI=${NTHREADSmax} + if [[ ${NTHREADS_GSI} -gt ${max_threads_per_task} ]]; then + export NTHREADS_GSI=${max_threads_per_task} + fi + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + ;; + "eupd") + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + + export NTHREADS_ENKF=${NTHREADSmax} + export APRUN_ENKF="${launcher} -n ${ntasks_enkf:-${ntasks}} --cpus-per-task=${NTHREADS_ENKF}" + ;; + "fcst" | "efcs") + + #export launcher="srun --mpi=pmi2 -l --export=ALL" + export launcher="srun --mpi=pmi2 -l --hint=nomultithread --distribution=block:block" + #export OMP_STACKSIZE=1024M + + #export MPICH_COLL_SYNC=MPI_Bcast + #export FI_VERBS_PREFER_XRC=0 + #export FI_CXI_RX_MATCH_MODE=hybrid + #export COMEX_EAGER_THRESHOLD=65536 + #export FI_CXI_RDZV_THRESHOLD=65536 + #export FI_CXI_DEFAULT_CQ_SIZE=1048576 + + (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) + (( ufs_ntasks = nnodes*tasks_per_node )) + # With ESMF threading, the model wants to use the full node + export APRUN_UFS="${launcher} -n ${ufs_ntasks}" + unset nnodes ufs_ntasks + ;; + "upp") + + export NTHREADS_UPP=${NTHREADS1} + export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}" + ;; + "atmos_products") + + export USE_CFP="YES" # Use MPMD for downstream product generation + ;; + "oceanice_products") + + export NTHREADS_OCNICEPOST=${NTHREADS1} + export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" + ;; + "ecen") + + export NTHREADS_ECEN=${NTHREADSmax} + export APRUN_ECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ECEN}" + + export NTHREADS_CHGRES=${threads_per_task_chgres:-12} + if [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CHGRES=${max_tasks_per_node} + fi + export APRUN_CHGRES="time" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then + export NTHREADS_CALCINC=${max_threads_per_task} + fi + export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" + + ;; + "esfc") + + export NTHREADS_ESFC=${NTHREADSmax} + export APRUN_ESFC="${APRUN_default} --cpus-per-task=${NTHREADS_ESFC}" + + export NTHREADS_CYCLE=${threads_per_task_cycle:-14} + if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CYCLE=${max_tasks_per_node} + fi + export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + + # REGRID requires 6 tasks for reproducibility + ntasks_regrid=6 + export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + + ;; + "epos") + + export NTHREADS_EPOS=${NTHREADSmax} + export APRUN_EPOS="${APRUN_default} --cpus-per-task=${NTHREADS_EPOS}" + + ;; + "postsnd") + + export CFP_MP="YES" + + export NTHREADS_POSTSND=${NTHREADS1} + export APRUN_POSTSND="${APRUN_default} --cpus-per-task=${NTHREADS_POSTSND}" + + export NTHREADS_POSTSNDCFP=${threads_per_task_postsndcfp:-1} + if [[ ${NTHREADS_POSTSNDCFP} -gt ${max_threads_per_task} ]]; then + export NTHREADS_POSTSNDCFP=${max_threads_per_task} + fi + export APRUN_POSTSNDCFP="${launcher} -n ${ntasks_postsndcfp} ${mpmd_opt}" + + ;; + "awips") + + export NTHREADS_AWIPS=${NTHREADS1} + export APRUN_AWIPSCFP="${APRUN_default} ${mpmd_opt}" + + ;; + "gempak") + + echo "WARNING: ${step} is not enabled on ${machine}!" + + ;; + "fit2obs") + + export NTHREADS_FIT2OBS=${NTHREADS1} + export MPIRUN="${APRUN_default} --cpus-per-task=${NTHREADS_FIT2OBS}" + ;; + *) + # Some other job not yet defined here + echo "WARNING: The job step ${step} does not specify GAEAC6-specific resources" + ;; +esac diff --git a/env/URSA.env b/env/URSA.env old mode 100755 new mode 100644 index 247f0515e77..b908defb4a0 --- a/env/URSA.env +++ b/env/URSA.env @@ -242,12 +242,6 @@ elif [[ "${step}" = "eupd" ]]; then export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then - # fast - #export launcher="srun -l --hint=nomultithread --distribution=block:block" - # slow3 - #export launcher="srun -l --export=ALL --hint=nomultithread" - export launcher="srun --mpi=pmi2 -l --hint=nomultithread" - #export launcher="srun env -u SLURM_NODELIST" (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) diff --git a/env/URSA.env.container b/env/URSA.env.container new file mode 100755 index 00000000000..247f0515e77 --- /dev/null +++ b/env/URSA.env.container @@ -0,0 +1,332 @@ +#! /usr/bin/env bash + +if [[ $# -ne 1 ]]; then + + echo "Must specify an input argument to set runtime environment variables!" + exit 1 + +fi + +step=$1 + +export launcher="srun -l --export=ALL --hint=nomultithread" +export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" + +#export POSTAMBLE_CMD='report-mem' + +# Configure MPI environment +export OMP_STACKSIZE=2048000 +export NTHSTACK=1024000000 + +# Setting stacksize to unlimited on login nodes is prohibited +if [[ -n "${SLURM_JOB_ID:-}" ]]; then + ulimit -s unlimited + ulimit -a +fi + +# Calculate common variables +# Check first if the dependent variables are set +if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then + max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) + NTHREADSmax=${threads_per_task:-${max_threads_per_task}} + NTHREADS1=${threads_per_task:-1} + if [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]]; then + NTHREADSmax=${max_threads_per_task} + fi + if [[ ${NTHREADS1} -gt ${max_threads_per_task} ]]; then + NTHREADS1=${max_threads_per_task} + fi + APRUN_default="${launcher} -n ${ntasks}" +else + echo "ERROR config.resources must be sourced before sourcing URSA.env" + exit 2 +fi + +if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then + + export POE="NO" + export BACK="NO" + export sys_tp="URSA" + export launcher_PREP="srun --hint=nomultithread" + +elif [[ "${step}" = "prep_emissions" ]]; then + + export APRUN="${APRUN_default}" + +elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then + + export USE_CFP="YES" + if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi + export wavempexec=${launcher} + export wave_mpmd=${mpmd_opt} + +elif [[ "${step}" = "atmanlvar" ]]; then + + export NTHREADS_ATMANLVAR=${NTHREADSmax} + export APRUN_ATMANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLVAR}" + +elif [[ "${step}" = "atmensanlobs" ]]; then + + export NTHREADS_ATMENSANLOBS=${NTHREADSmax} + export APRUN_ATMENSANLOBS="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLOBS}" + +elif [[ "${step}" = "atmensanlsol" ]]; then + + export NTHREADS_ATMENSANLSOL=${NTHREADSmax} + export APRUN_ATMENSANLSOL="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLSOL}" + +elif [[ "${step}" = "atmensanlletkf" ]]; then + + export NTHREADS_ATMENSANLLETKF=${NTHREADSmax} + export APRUN_ATMENSANLLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLLETKF}" + +elif [[ "${step}" = "atmensanlfv3inc" ]]; then + + export NTHREADS_ATMENSANLFV3INC=${NTHREADSmax} + export APRUN_ATMENSANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLFV3INC}" + +elif [[ "${step}" = "aeroanlvar" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_AEROANL=${NTHREADSmax} + export APRUN_AEROANL="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANL}" + +elif [[ "${step}" = "aeroanlgenb" ]]; then + + export NTHREADS_AEROANLGENB=${NTHREADSmax} + export APRUN_AEROANLGENB="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANLGENB}" + +elif [[ "${step}" = "atmanlfv3inc" ]]; then + + export NTHREADS_ATMANLFV3INC=${NTHREADSmax} + export APRUN_ATMANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLFV3INC}" + +elif [[ "${step}" = "anlstat" ]]; then + + export NTHREADS_ANLSTAT=${NTHREADSmax} + export APRUN_ANLSTAT="${APRUN_default} --cpus-per-task=${NTHREADS_ANLSTAT}" + +elif [[ "${step}" = "prepobsaero" ]]; then + + export NTHREADS_PREPOBSAERO=${NTHREADS1} + export APRUN_PREPOBSAERO="${APRUN_default} --cpus-per-task=${NTHREADS_PREPOBSAERO}" + +elif [[ "${step}" = "snowanl" ]]; then + + export APRUN_CALCFIMS="${launcher} -n 1" + + export NTHREADS_SNOWANL=${NTHREADSmax} + export APRUN_SNOWANL="${APRUN_default} --mem=0 --cpus-per-task=${NTHREADS_SNOWANL}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + +elif [[ "${step}" = "esnowanl" ]]; then + + export APRUN_CALCFIMS="${launcher} -n 1" + + export NTHREADS_ESNOWANL=${NTHREADSmax} + export APRUN_ESNOWANL="${APRUN_default} --mem=0 --cpus-per-task=${NTHREADS_ESNOWANL}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + +elif [[ "${step}" = "marinebmat" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd --multi-prog" + export APRUN_MARINEBMAT="${APRUN_default}" + +elif [[ "${step}" = "marineanlvar" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd --multi-prog" + export APRUN_MARINEANLVAR="${APRUN_default}" + +elif [[ "${step}" = "marineanlchkpt" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd --multi-prog" + export APRUN_MARINEANLCHKPT="${APRUN_default}" + + export APRUN_MARINEANLOBSSTATS="${launcher} -n 1" + +elif [[ "${step}" = "marineanlecen" ]]; then + + export NTHREADS_MARINEANLECEN=${NTHREADSmax} + export APRUN_MARINEANLECEN="${APRUN_default} --cpus-per-task=${NTHREADS_MARINEANLECEN}" + +elif [[ "${step}" = "marineanlletkf" ]]; then + + export NTHREADS_MARINEANLLETKF=${NTHREADSmax} + export APRUN_MARINEANLLETKF=${APRUN_default} + +elif [[ "${step}" = "ecen_fv3jedi" ]]; then + + export NTHREADS_ECEN_FV3JEDI=${NTHREADSmax} + export APRUN_CORRECTION_INCREMENT="${launcher} -n ${ntasks_correction_increment} --cpus-per-task=${NTHREADS_ECEN_FV3JEDI}" + export APRUN_ENSEMBLE_RECENTER="${launcher} -n ${ntasks_ensemble_recenter} --cpus-per-task=${NTHREADS_ECEN_FV3JEDI}" + +elif [[ "${step}" = "analcalc_fv3jedi" ]]; then + + export NTHREADS_ANALCALC_FV3JEDI=${NTHREADSmax} + export APRUN_ANALCALC_FV3JEDI="${APRUN_default} --cpus-per-task=${NTHREADS_ANALCALC_FV3JEDI}" + +elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then + + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_GSI=${NTHREADSmax} + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then + export NTHREADS_CALCINC=${max_threads_per_task} + fi + export APRUN_CALCINC="${launcher} \$ncmd --cpus-per-task=${NTHREADS_CALCINC}" + + export NTHREADS_CYCLE=${threads_per_task_cycle:-12} + if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CYCLE=${max_tasks_per_node} + fi + ntasks_cycle=${ntiles:-6} + export APRUN_CYCLE="${launcher} -n ${ntasks_cycle} --cpus-per-task=${NTHREADS_CYCLE}" + + export NTHREADS_GAUSFCANL=1 + ntasks_gausfcanl=${ntasks_gausfcanl:-1} + export APRUN_GAUSFCANL="${launcher} -n ${ntasks_gausfcanl} --cpus-per-task=${NTHREADS_GAUSFCANL}" + +elif [[ "${step}" = "offlineanl" ]]; then + + export NTHREADS_CHGRES=${threads_per_task_chgres:-12} + if [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CHGRES=${max_tasks_per_node} + fi + export APRUN_CHGRES="time" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then + export NTHREADS_CALCINC=${max_threads_per_task} + fi + export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" + +elif [[ "${step}" = "sfcanl" ]]; then + + export NTHREADS_CYCLE=${threads_per_task:-14} + export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + + # REGRID requires 6 tasks for reproducibility + ntasks_regrid=6 + export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + +elif [[ "${step}" = "eobs" ]]; then + + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export NTHREADS_GSI=${NTHREADSmax} + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + +elif [[ "${step}" = "eupd" ]]; then + + export NTHREADS_ENKF=${NTHREADSmax} + export APRUN_ENKF="${launcher} -n ${ntasks_enkf:-${ntasks}} --cpus-per-task=${NTHREADS_ENKF}" + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + +elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then + # fast + #export launcher="srun -l --hint=nomultithread --distribution=block:block" + # slow3 + #export launcher="srun -l --export=ALL --hint=nomultithread" + export launcher="srun --mpi=pmi2 -l --hint=nomultithread" + #export launcher="srun env -u SLURM_NODELIST" + + (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) + (( ufs_ntasks = nnodes*tasks_per_node )) + # With ESMF threading, the model wants to use the full node + export APRUN_UFS="${launcher} -n ${ufs_ntasks}" + unset nnodes ufs_ntasks + +elif [[ "${step}" = "upp" ]]; then + + export NTHREADS_UPP=${NTHREADS1} + export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}" + +elif [[ "${step}" = "atmos_products" ]]; then + + export USE_CFP="YES" # Use MPMD for downstream product generation on Ursa + +elif [[ "${step}" = "oceanice_products" ]]; then + + export NTHREADS_OCNICEPOST=${NTHREADS1} + export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" + +elif [[ "${step}" = "ecen" ]]; then + + export NTHREADS_ECEN=${NTHREADSmax} + export APRUN_ECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ECEN}" + + export NTHREADS_CHGRES=${threads_per_task_chgres:-12} + if [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CHGRES=${max_tasks_per_node} + fi + export APRUN_CHGRES="time" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then + export NTHREADS_CALCINC=${max_threads_per_task} + fi + export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" + +elif [[ "${step}" = "esfc" ]]; then + + export NTHREADS_ESFC=${threads_per_task_esfc:-${max_threads_per_task}} + export APRUN_ESFC="${APRUN_default} --cpus-per-task=${NTHREADS_ESFC}" + + export NTHREADS_CYCLE=${threads_per_task_cycle:-14} + if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then + export NTHREADS_CYCLE=${max_tasks_per_node} + fi + export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + + # REGRID requires 6 tasks for reproducibility + ntasks_regrid=6 + export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " + +elif [[ "${step}" = "epos" ]]; then + + export NTHREADS_EPOS=${NTHREADSmax} + export APRUN_EPOS="${APRUN_default} --cpus-per-task=${NTHREADS_EPOS}" + +elif [[ "${step}" = "postsnd" ]]; then + + export CFP_MP="YES" + + export NTHREADS_POSTSND=${NTHREADS1} + export APRUN_POSTSND="${APRUN_default} --cpus-per-task=${NTHREADS_POSTSND}" + + export NTHREADS_POSTSNDCFP=${threads_per_task_postsndcfp:-1} + if [[ ${NTHREADS_POSTSNDCFP} -gt ${max_threads_per_task} ]]; then + export NTHREADS_POSTSNDCFP=${max_threads_per_task} + fi + export APRUN_POSTSNDCFP="${launcher} -n ${ntasks_postsndcfp} ${mpmd_opt}" + +elif [[ "${step}" = "awips" ]]; then + + export NTHREADS_AWIPS=${NTHREADS1} + export APRUN_AWIPSCFP="${APRUN_default} ${mpmd_opt}" + +elif [[ "${step}" = "fit2obs" ]]; then + + export NTHREADS_FIT2OBS=${NTHREADS1} + export MPIRUN="${APRUN_default} --cpus-per-task=${NTHREADS_FIT2OBS}" + +fi diff --git a/scripts/exglobal_forecast.sh b/scripts/exglobal_forecast.sh index c84e3886972..776018019c3 100755 --- a/scripts/exglobal_forecast.sh +++ b/scripts/exglobal_forecast.sh @@ -77,8 +77,6 @@ # Main body starts here ####################### -set +x - # include all subroutines. Executions later. source "${USHgfs}/forecast_predet.sh" # include functions for variable definition source "${USHgfs}/forecast_det.sh" # include functions for run type determination @@ -87,8 +85,6 @@ source "${USHgfs}/parsing_ufs_configure.sh" # include functions for ufs_configu source "${USHgfs}/atparse.bash" # include function atparse for parsing @[XYZ] templated files -set +x - # Coupling control switches, for coupling purpose, off by default cpl=${cpl:-.false.} cplflx=${cplflx:-.false.} # default off,import from outside source diff --git a/ush/preamble.sh b/ush/preamble.sh index 4d552c2e29a..a241d8f2a4e 100755 --- a/ush/preamble.sh +++ b/ush/preamble.sh @@ -177,7 +177,7 @@ trap "postamble ${_calling_script} ${start_time} \$?" EXIT source "${HOMEgfs}/ush/bash_utils.sh" # Decide if run with container -export RUN_WITH_CONTAINER=YES +export RUN_WITH_CONTAINER=NO # Turn on our settings export SHELLOPTS diff --git a/ush/python/pygfs/task/oceanice_products.py b/ush/python/pygfs/task/oceanice_products.py index 5ebc7761bf5..4a21aca5b69 100644 --- a/ush/python/pygfs/task/oceanice_products.py +++ b/ush/python/pygfs/task/oceanice_products.py @@ -241,7 +241,6 @@ def index(config: Dict, grid: str) -> None: logger.info("Generate index file") wgrib2_cmd = os.environ.get("WGRIB2", None) - print('wgrib2_cmd:', wgrib2_cmd) grbfile = f"{config.component}.{grid}.grib2" grbfidx = f"{grbfile}.idx" @@ -251,7 +250,6 @@ def index(config: Dict, grid: str) -> None: return logger.info(f"Creating index file for {grbfile}") - print('which(wgrib2):', which("wgrib2")) exec_cmd = which("wgrib2") if wgrib2_cmd is None else Executable(wgrib2_cmd) exec_cmd.add_default_arg("-s") try: From 870a0ea1232725e75bc919369e3764b1de24ad29 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 11 Sep 2025 22:32:52 +0000 Subject: [PATCH 101/134] need to source gw_run env, not just preamble --- ush/interp_atmos_master.sh | 2 +- ush/interp_atmos_sflux.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/interp_atmos_master.sh b/ush/interp_atmos_master.sh index 39fdc563fad..87fa306da6b 100755 --- a/ush/interp_atmos_master.sh +++ b/ush/interp_atmos_master.sh @@ -4,7 +4,7 @@ # Generate 0.25 / 0.5 / 1 degree interpolated grib2 files for each input grib2 file # trim's RH and tweaks sea-ice cover -source "${USHgfs}/preamble.sh" +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" input_file=${1:-"pgb2file_in"} # Input pressure grib2 file output_file_prefix=${2:-"pgb2file_out"} # Prefix for output grib2 file; the prefix is appended by resolution e.g. _0p25 diff --git a/ush/interp_atmos_sflux.sh b/ush/interp_atmos_sflux.sh index a9ebc82b717..cc0b7da101f 100755 --- a/ush/interp_atmos_sflux.sh +++ b/ush/interp_atmos_sflux.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -source "${USHgfs}/preamble.sh" +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" # This script takes in a master flux file and creates interpolated flux files at various interpolated resolutions # Generate 0.25 / 0.5 / 1 degree interpolated grib2 flux files for each input sflux grib2 file From 734946137011fcf51ade00c79e64b929435a9f61 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 12 Sep 2025 18:03:59 +0000 Subject: [PATCH 102/134] change exit code to positive numbers --- dev/container/utils/compile-gw-in-container.sh | 2 +- dev/container/utils/create-atmos-products.sh | 2 +- dev/container/utils/create-container-links.sh | 2 +- dev/container/utils/gen-wrapper.sh | 2 +- dev/container/utils/link_gfs_utils.sh | 2 +- dev/container/utils/link_model.sh | 2 +- dev/container/utils/link_ufs_utils.sh | 2 +- dev/container/utils/link_ww3.sh | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dev/container/utils/compile-gw-in-container.sh b/dev/container/utils/compile-gw-in-container.sh index fa3bfd82dc0..650160db70e 100755 --- a/dev/container/utils/compile-gw-in-container.sh +++ b/dev/container/utils/compile-gw-in-container.sh @@ -1,7 +1,7 @@ #!/bin/bash gw_sorc_dir=$1 - cd "${gw_sorc_dir}" || exit -1 + cd "${gw_sorc_dir}" || exit 11 ./build_all.sh gfs sfs gefs #./link_workflow.sh diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh index 6ccce9ac298..1f7e72f0760 100755 --- a/dev/container/utils/create-atmos-products.sh +++ b/dev/container/utils/create-atmos-products.sh @@ -29,7 +29,7 @@ done if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then echo "Usage: create-container-links.sh -H/--HOMEgfs gw-home-dir -c/--container container-fullpath -b/--bindings list-of-binding-dirs [-v]" - exit -1 + exit 11 fi if [[ "${verbose}" == "true" ]]; then diff --git a/dev/container/utils/create-container-links.sh b/dev/container/utils/create-container-links.sh index 5e2ab15b4e1..194477f1c30 100755 --- a/dev/container/utils/create-container-links.sh +++ b/dev/container/utils/create-container-links.sh @@ -33,7 +33,7 @@ done if [[ ! -v HOMEgfs || ! -v container || ! -v bindings || ! -v MACHINE_ID ]]; then echo "Usage: create-container-links.sh -H/--HOMEgfs gw-home-dir -c/--container container-fullpath -b/--bindings -M|--MACHINE_ID list-of-binding-dirs [-v]" - exit -1 + exit 11 fi if [[ "${verbose}" == "true" ]]; then diff --git a/dev/container/utils/gen-wrapper.sh b/dev/container/utils/gen-wrapper.sh index 3cd276788f3..6d79a84ba8d 100755 --- a/dev/container/utils/gen-wrapper.sh +++ b/dev/container/utils/gen-wrapper.sh @@ -29,7 +29,7 @@ done if [[ ! -v HOMEgfs || ! -v container ]]; then echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings "-B dirname [-B dirname1 [...]]" [-v]" - exit -1 + exit 11 fi # echo "HOMEgfs: $HOMEgfs" diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index 3eb6d9b1115..51ca5ccaee5 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -29,7 +29,7 @@ done if [[ ! -v HOMEgfs || ! -v container ]]; then echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings -B dirname [-B dirname1 [...]] [-v]" - exit -1 + exit 11 fi #echo "HOMEgfs: $HOMEgfs" diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index f682fcada65..72c3bbdb3cb 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -40,7 +40,7 @@ done if [[ ! -v HOMEgfs || ! -v container || ! -v model || ! -v MACHINE_ID ]]; then echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image \\" echo " -m/--model name_model -M/MACHINE_ID MACHINE_ID -b/--bindings [...]] [-v]" - exit -1 + exit 11 fi #echo "HOMEgfs: $HOMEgfs" diff --git a/dev/container/utils/link_ufs_utils.sh b/dev/container/utils/link_ufs_utils.sh index 04afded7761..4a0ed94dfef 100755 --- a/dev/container/utils/link_ufs_utils.sh +++ b/dev/container/utils/link_ufs_utils.sh @@ -29,7 +29,7 @@ done if [[ ! -v HOMEgfs || ! -v container ]]; then echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings -B dirname [-B dirname1 [...]] [-v]" - exit -1 + exit 11 fi #echo "HOMEgfs: $HOMEgfs" diff --git a/dev/container/utils/link_ww3.sh b/dev/container/utils/link_ww3.sh index dd1139654c1..4f419cdafff 100755 --- a/dev/container/utils/link_ww3.sh +++ b/dev/container/utils/link_ww3.sh @@ -34,7 +34,7 @@ done if [[ ! -v HOMEgfs || ! -v container || ! -v type ]]; then echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image \\" " -b/--bindings -B dirname [-B dirname1 [...]] -t/--type [gfs|sfs|gefs] [-v]" - exit -1 + exit 11 fi # echo "HOMEgfs: $HOMEgfs" From 26bc3f7f2e5e87cb3a658096d400b5094465dbb2 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 12 Sep 2025 18:41:17 +0000 Subject: [PATCH 103/134] reverse changes --- dev/workflow/setup_xml.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dev/workflow/setup_xml.py b/dev/workflow/setup_xml.py index a6b40690281..7a255108a7c 100755 --- a/dev/workflow/setup_xml.py +++ b/dev/workflow/setup_xml.py @@ -101,8 +101,7 @@ def main(*argv): check_expdir(user_inputs.expdir, base['EXPDIR']) # Check if "HOMEDIR","STMP","PTMP" dirrctories are writable - #dir_keys = ["HOMEDIR", "STMP", "PTMP"] - dir_keys = ["STMP", "PTMP"] + dir_keys = ["HOMEDIR", "STMP", "PTMP"] for dk in dir_keys: check_dir_writable(base[dk]) if not check_dir_writable(base[dk]): From 0b8a0290500dcf28148c08520e8518a878ee5a1f Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Sat, 13 Sep 2025 13:36:38 +0000 Subject: [PATCH 104/134] remove wgrib2 from explicit wrapper, and remove commented echo --- dev/container/utils/exec.wgrib2 | 9 ----- dev/container/utils/gen-wrapper.sh | 20 +++------- dev/container/utils/link_gfs_utils.sh | 5 --- dev/container/utils/link_model.sh | 5 --- dev/container/utils/link_ufs_utils.sh | 5 --- dev/container/utils/link_ww3.sh | 6 --- dev/container/utils/ush.wgrib2 | 57 --------------------------- jobs/JGLOBAL_ATMOS_PRODUCTS | 1 - jobs/JGLOBAL_OCEANICE_PRODUCTS | 2 - ush/run_mpmd.sh | 1 - 10 files changed, 6 insertions(+), 105 deletions(-) delete mode 100644 dev/container/utils/exec.wgrib2 delete mode 100644 dev/container/utils/ush.wgrib2 diff --git a/dev/container/utils/exec.wgrib2 b/dev/container/utils/exec.wgrib2 deleted file mode 100644 index 42d0270278b..00000000000 --- a/dev/container/utils/exec.wgrib2 +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - export LD_LIBRARY_PATH=$(dirname SIF) - arg="$@" - - singularity exec \ - BINDINGS \ - SIF \ - HOMEgfs/ush/container/run_wgrib2.sh $arg - diff --git a/dev/container/utils/gen-wrapper.sh b/dev/container/utils/gen-wrapper.sh index b3701a45134..2b0929cf7e6 100755 --- a/dev/container/utils/gen-wrapper.sh +++ b/dev/container/utils/gen-wrapper.sh @@ -32,11 +32,6 @@ if [[ ! -v HOMEgfs || ! -v container ]]; then exit -1 fi -# echo "HOMEgfs: $HOMEgfs" -# echo "container: $container" -# echo "bindings: $bindings" -# echo "Verbose: $verbose" - if [[ "$verbose" == "true" ]]; then set -x fi @@ -49,18 +44,15 @@ do targetdir=${HOMEgfs}/${dnm}/container fi mkdir -p ${targetdir} - for fnm in python wgrib2 - do - sourcef=${HOMEgfs}/dev/container/utils/${dnm}.${fnm} - targetf=${targetdir}/run_${fnm}.sh + sourcef=${HOMEgfs}/dev/container/utils/${dnm}.python + targetf=${targetdir}/run_python.sh - sed -e "s?HOMEgfs?${HOMEgfs}?g" \ - -e "s?SIF?${container}?g" \ - -e "s?BINDINGS?${bindings}?g" \ + sed -e "s?HOMEgfs?${HOMEgfs}?g" \ + -e "s?SIF?${container}?g" \ + -e "s?BINDINGS?${bindings}?g" \ ${sourcef} > ${targetf} - chmod 755 ${targetf} - done + chmod 755 ${targetf} done sed -i 's/RUN_WITH_CONTAINER=NO/RUN_WITH_CONTAINER=YES/g' ${HOMEgfs}/ush/preamble.sh diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index 7605989e44b..0555ed7c63d 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -32,11 +32,6 @@ if [[ ! -v HOMEgfs || ! -v container ]]; then exit -1 fi -#echo "HOMEgfs: $HOMEgfs" -#echo "container: $container" -#echo "bindings: $bindings" -#echo "Verbose: $verbose" - if [[ "$verbose" == "true" ]]; then set -x fi diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 8d8d025ccd2..d5baeb364b5 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -43,11 +43,6 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v model || ! -v MACHINE_ID ]]; then exit -1 fi -#echo "HOMEgfs: $HOMEgfs" -#echo "model: $model" -#echo "Verbose: $verbose" -#echo "machineid: $machineid" - if [[ "$verbose" == "true" ]]; then set -x fi diff --git a/dev/container/utils/link_ufs_utils.sh b/dev/container/utils/link_ufs_utils.sh index ebc7ecebd6b..69d0cf8ebc3 100755 --- a/dev/container/utils/link_ufs_utils.sh +++ b/dev/container/utils/link_ufs_utils.sh @@ -32,11 +32,6 @@ if [[ ! -v HOMEgfs || ! -v container ]]; then exit -1 fi -#echo "HOMEgfs: $HOMEgfs" -#echo "container: $container" -#echo "bindings: $bindings" -#echo "Verbose: $verbose" - if [[ "$verbose" == "true" ]]; then set -x fi diff --git a/dev/container/utils/link_ww3.sh b/dev/container/utils/link_ww3.sh index f3bd4acdc41..c1ea086f37d 100755 --- a/dev/container/utils/link_ww3.sh +++ b/dev/container/utils/link_ww3.sh @@ -37,12 +37,6 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v type ]]; then exit -1 fi -# echo "HOMEgfs: $HOMEgfs" -# echo "container: $container" -# echo "bindings: $bindings" -# echo "type: $type" -# echo "Verbose: $verbose" - if [[ "$verbose" == "true" ]]; then set -x fi diff --git a/dev/container/utils/ush.wgrib2 b/dev/container/utils/ush.wgrib2 deleted file mode 100644 index d58873337c7..00000000000 --- a/dev/container/utils/ush.wgrib2 +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -source HOMEgfs/dev/ush/load_gw_run_modules.sh - -module load wgrib2/3.6.0 - -arg=$@ - -new_arg="" -has_ftime=false -has_sets=false - -sets="" - -# Basic argument parsing using a while loop and case statement -while [[ "$#" -gt 0 ]]; do - case "$1" in - -set_date) - sdate=$2 - shift - ;; - -set_ftime) - has_ftime=true - ftime="$2 $3 $4" - shift - shift - shift - ;; - -set) - has_sets=true - sets="$sets -set $2 $3" - shift - shift - ;; - -grib) - outfile=$2 - shift - ;; - *) - # echo "Unknown option: $1" - new_arg="$new_arg $1" - ;; - esac - shift # Consume the option/argument -done - -if [[ "$has_ftime" == "true" ]]; then - echo "new_arg: $new_arg" - if [[ "$has_sets" == "true" ]]; then - wgrib2 $new_arg -set_date "$sdate" -set_ftime "$ftime" $sets -grib $outfile - else - wgrib2 $new_arg -set_date "$sdate" -set_ftime "$ftime" -grib $outfile - fi -else - wgrib2 $arg -fi - diff --git a/jobs/JGLOBAL_ATMOS_PRODUCTS b/jobs/JGLOBAL_ATMOS_PRODUCTS index 1f589ec6170..c08c6599825 100755 --- a/jobs/JGLOBAL_ATMOS_PRODUCTS +++ b/jobs/JGLOBAL_ATMOS_PRODUCTS @@ -26,7 +26,6 @@ export PREFIX="${RUN}.t${cyc}z." # Run exglobal script if [ "$RUN_WITH_CONTAINER" == "YES" ]; then "${HOMEgfs}/exec/exglobal_atmos_products.sh" && true - export WGRIB2=${HOMEgfs}/exec/run_wgrib2.sh else "${SCRgfs}/exglobal_atmos_products.sh" && true fi diff --git a/jobs/JGLOBAL_OCEANICE_PRODUCTS b/jobs/JGLOBAL_OCEANICE_PRODUCTS index dc406ae7346..73696d1a2d6 100755 --- a/jobs/JGLOBAL_OCEANICE_PRODUCTS +++ b/jobs/JGLOBAL_OCEANICE_PRODUCTS @@ -15,9 +15,7 @@ YMD="${PDY}" HH="${cyc}" declare_from_tmpl -rx "COMOUT_${COMPONENT^^}_NETCDF":"C ############################################################### # Run exglobal script if [ "$RUN_WITH_CONTAINER" == "YES" ]; then - export WGRIB2=${HOMEgfs}/ush/container/run_wgrib2.sh ${HOMEgfs}/exec/run_python.sh ${SCRgfs}/exglobal_oceanice_products.py -c -v - export WGRIB2=${HOMEgfs}/exec/run_wgrib2.sh else "${SCRgfs}/exglobal_oceanice_products.py" && true fi diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index a0b0d143170..a2783fa011a 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -33,7 +33,6 @@ source "${USHgfs}/preamble.sh" source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" -#if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then if [[ "$MACHINE_ID" == "container" ]]; then source /usr/lmod/lmod/init/bash module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" From a6a0ba911a819b79ad6e20d194662df3e2f97216 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 16 Sep 2025 00:21:16 +0000 Subject: [PATCH 105/134] fixing lint shell script check --- dev/container/utils/exec.python | 2 +- dev/container/utils/ush.python | 2 +- jobs/JGLOBAL_ATMOS_PRODUCTS | 4 +--- jobs/JGLOBAL_OCEANICE_PRODUCTS | 4 ++-- jobs/JGLOBAL_PREP_EMISSIONS | 4 ++-- jobs/JGLOBAL_STAGE_IC | 4 ++-- ush/bash_utils.sh | 10 +++++----- versions/build.container.ver | 2 +- versions/run.container.ver | 2 +- 9 files changed, 16 insertions(+), 18 deletions(-) diff --git a/dev/container/utils/exec.python b/dev/container/utils/exec.python index e18f6a613d8..f84f67fef34 100644 --- a/dev/container/utils/exec.python +++ b/dev/container/utils/exec.python @@ -6,5 +6,5 @@ singularity exec \ BINDINGS \ SIF \ - HOMEgfs/ush/container/run_python.sh $arg + HOMEgfs/ush/container/run_python.sh "${arg}" diff --git a/dev/container/utils/ush.python b/dev/container/utils/ush.python index e8cad4b60b9..448e92a42bf 100644 --- a/dev/container/utils/ush.python +++ b/dev/container/utils/ush.python @@ -23,5 +23,5 @@ export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}HOMEgfs/ush:${wxflowPATH}" arg="$@" -python $arg +python ${arg} diff --git a/jobs/JGLOBAL_ATMOS_PRODUCTS b/jobs/JGLOBAL_ATMOS_PRODUCTS index c08c6599825..c52d94f7246 100755 --- a/jobs/JGLOBAL_ATMOS_PRODUCTS +++ b/jobs/JGLOBAL_ATMOS_PRODUCTS @@ -24,7 +24,7 @@ export PREFIX="${RUN}.t${cyc}z." ############################################################### # Run exglobal script -if [ "$RUN_WITH_CONTAINER" == "YES" ]; then +if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then "${HOMEgfs}/exec/exglobal_atmos_products.sh" && true else "${SCRgfs}/exglobal_atmos_products.sh" && true @@ -38,8 +38,6 @@ fi # End JOB SPECIFIC work ############################################## -exit 0 - ############################################## # Final processing ############################################## diff --git a/jobs/JGLOBAL_OCEANICE_PRODUCTS b/jobs/JGLOBAL_OCEANICE_PRODUCTS index 73696d1a2d6..758f3975d9b 100755 --- a/jobs/JGLOBAL_OCEANICE_PRODUCTS +++ b/jobs/JGLOBAL_OCEANICE_PRODUCTS @@ -14,8 +14,8 @@ YMD="${PDY}" HH="${cyc}" declare_from_tmpl -rx "COMOUT_${COMPONENT^^}_NETCDF":"C ############################################################### # Run exglobal script -if [ "$RUN_WITH_CONTAINER" == "YES" ]; then - ${HOMEgfs}/exec/run_python.sh ${SCRgfs}/exglobal_oceanice_products.py -c -v +if [ "${RUN_WITH_CONTAINER}" == "YES" ]; then + "${HOMEgfs}/exec/run_python.sh" ${SCRgfs}/exglobal_oceanice_products.py -c -v else "${SCRgfs}/exglobal_oceanice_products.py" && true fi diff --git a/jobs/JGLOBAL_PREP_EMISSIONS b/jobs/JGLOBAL_PREP_EMISSIONS index 1da11e48a1d..64c52228623 100755 --- a/jobs/JGLOBAL_PREP_EMISSIONS +++ b/jobs/JGLOBAL_PREP_EMISSIONS @@ -18,8 +18,8 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "prep_emissions" -c "base prep_emissio EXSCRIPT=${PREP_EMISSIONS_PY:-${SCRgfs}/exglobal_prep_emissions.py} # Execute staging -if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then - ${HOMEgfs}/exec/run_python.sh ${EXSCRIPT} && true +if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then + "${HOMEgfs}/exec/run_python.sh" ${EXSCRIPT} && true else ${EXSCRIPT} && true fi diff --git a/jobs/JGLOBAL_STAGE_IC b/jobs/JGLOBAL_STAGE_IC index c44f0b24626..087275918e8 100755 --- a/jobs/JGLOBAL_STAGE_IC +++ b/jobs/JGLOBAL_STAGE_IC @@ -3,8 +3,8 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "stage_ic" -c "base stage_ic" # Execute staging -if [ "$RUN_WITH_CONTAINER" == "YES" ]; then - ${HOMEgfs}/exec/run_python.sh ${SCRgfs}/exglobal_stage_ic.py +if [[ "{$RUN_WITH_CONTAINER}" == "YES" ]]; then + "${HOMEgfs}/exec/run_python.sh" ${SCRgfs}/exglobal_stage_ic.py else "${SCRgfs}/exglobal_stage_ic.py" fi diff --git a/ush/bash_utils.sh b/ush/bash_utils.sh index e27de09b942..a2a2597f444 100755 --- a/ush/bash_utils.sh +++ b/ush/bash_utils.sh @@ -112,18 +112,18 @@ function wait_for_file() { function cpreq() { cp $* - if [ $? -ne 0 ] ; then + if [[ "$?" -ne "0" ]] ; then err_exit "'cp $*' was not successful." fi } function cpfs() { - if [ $# -ne 2 ]; then + if [ "$#" -ne "2" ]; then echo "This script requires two arguments: a source file and a destination file path." exit 16 fi - if [ "$2" = '.' -o "$2" = './' ]; then + if [[ "$2" = '.' -o "$2" = './' ]]; then cpdstfile=${PWD:?}/$(basename $1) elif [ -d $2 ]; then cpdstfile=${2%/}/$(basename $1) @@ -133,7 +133,7 @@ function cpfs() { cp $1 $cpdstfile.cptmp - if [ $? -ne 0 ] ; then + if [[ "$?" -ne "0" ]] ; then err_exit "$1 is missing or was not copied successfully." fi @@ -143,7 +143,7 @@ function cpfs() { #fi mv $cpdstfile.cptmp $cpdstfile - if [ $? -ne 0 ] ; then + if [[ "$?" -ne "0" ]] ; then err_exit "$cpdstfile.cptmp is missing or was not moved successfully." fi } diff --git a/versions/build.container.ver b/versions/build.container.ver index 8ec869379f6..ef5795a25a4 100644 --- a/versions/build.container.ver +++ b/versions/build.container.ver @@ -8,6 +8,6 @@ export stack_mpi_ver=2021.13 # gnu_ver set to override 14.2.0 default set in UFS-WM noaacloud module file export gnu_ver=13.2.0 -spack_stack_root="/opt/spack-stack/spack-stack-1.9.2" +export spack_stack_root="/opt/spack-stack/spack-stack-1.9.2" source "${HOMEgfs:-}/versions/spack.ver" diff --git a/versions/run.container.ver b/versions/run.container.ver index f71efc41ed1..06a0cdd3bb7 100644 --- a/versions/run.container.ver +++ b/versions/run.container.ver @@ -5,7 +5,7 @@ export stack_compiler_ver=2024.2.0 export stack_mpi=intel-oneapi-mpi export stack_mpi_ver=2021.13 -spack_stack_root="/opt/spack-stack//spack-stack-1.9.2" +export spack_stack_root="/opt/spack-stack//spack-stack-1.9.2" source "${HOMEgfs:-}/versions/spack.ver" From e1099f6434ee43e38d1177c3d3fbbd329e85f42e Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 16 Sep 2025 00:35:44 +0000 Subject: [PATCH 106/134] fixing lint shell script check --- dev/container/com.sh | 15 ++++++--------- jobs/JGLOBAL_PREP_EMISSIONS | 2 +- jobs/JGLOBAL_STAGE_IC | 2 +- ush/bash_utils.sh | 22 +++++++++++----------- ush/forecast_postdet.sh | 4 ++-- ush/run_mpmd.sh | 2 +- 6 files changed, 22 insertions(+), 25 deletions(-) diff --git a/dev/container/com.sh b/dev/container/com.sh index e1878a2d18a..c40b6ae119c 100755 --- a/dev/container/com.sh +++ b/dev/container/com.sh @@ -1,21 +1,18 @@ #!/bin/bash #SBATCH --job-name=compile -#SBATCH --account=$USER +#SBATCH --account=epic #SBATCH --qos=batch -##SBATCH --partition=compute -#SBATCH --partition=process +#SBATCH --partition=u1-compute #SBATCH -t 04:15:00 #SBATCH --nodes=1 +#SBATCH --ntasks=24 #SBATCH -o compile.%J.log -#SBATCH --exclusive -set -x - -gwhome=/contrib/Wei.Huang/container/global-workflow-cloud -img=/contrib/Wei.Huang/container/ubuntu22.04-intel-ufs-env-v1.9.2.img +gwhome=/scratch4/NAGAPE/epic/Wei.Huang/src/container/global-workflow-cloud cmd=${gwhome}/dev/container/utils/compile-gw-in-container.sh +img=/scratch3/NCEPDEV/nems/role.epic/containers/ubuntu22.04-intel-ufs-env-v1.9.2.img gw_sorc_dir=${gwhome}/sorc -singularity exec -B /contrib ${img} ${cmd} ${gw_sorc_dir} +singularity exec -B /scratch3 -B /scratch4 "${img}" "${cmd}" "${gw_sorc_dir}" diff --git a/jobs/JGLOBAL_PREP_EMISSIONS b/jobs/JGLOBAL_PREP_EMISSIONS index 64c52228623..e3d35a38b20 100755 --- a/jobs/JGLOBAL_PREP_EMISSIONS +++ b/jobs/JGLOBAL_PREP_EMISSIONS @@ -19,7 +19,7 @@ EXSCRIPT=${PREP_EMISSIONS_PY:-${SCRgfs}/exglobal_prep_emissions.py} # Execute staging if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then - "${HOMEgfs}/exec/run_python.sh" ${EXSCRIPT} && true + "${HOMEgfs}/exec/run_python.sh" "${EXSCRIPT}" && true else ${EXSCRIPT} && true fi diff --git a/jobs/JGLOBAL_STAGE_IC b/jobs/JGLOBAL_STAGE_IC index 087275918e8..4dc9b018575 100755 --- a/jobs/JGLOBAL_STAGE_IC +++ b/jobs/JGLOBAL_STAGE_IC @@ -4,7 +4,7 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "stage_ic" -c "base stage_ic" # Execute staging if [[ "{$RUN_WITH_CONTAINER}" == "YES" ]]; then - "${HOMEgfs}/exec/run_python.sh" ${SCRgfs}/exglobal_stage_ic.py + "${HOMEgfs}/exec/run_python.sh" "${SCRgfs}/exglobal_stage_ic.py" else "${SCRgfs}/exglobal_stage_ic.py" fi diff --git a/ush/bash_utils.sh b/ush/bash_utils.sh index a2a2597f444..fe24e8f7841 100755 --- a/ush/bash_utils.sh +++ b/ush/bash_utils.sh @@ -111,27 +111,27 @@ function wait_for_file() { } function cpreq() { - cp $* - if [[ "$?" -ne "0" ]] ; then - err_exit "'cp $*' was not successful." + cp "$*" + if [[ "$?" -ne "0" ]]; then + err_exit "'cp $*' was not successful." fi } function cpfs() { - if [ "$#" -ne "2" ]; then + if [[ "$#" -ne "2" ]]; then echo "This script requires two arguments: a source file and a destination file path." exit 16 fi - if [[ "$2" = '.' -o "$2" = './' ]]; then - cpdstfile=${PWD:?}/$(basename $1) - elif [ -d $2 ]; then - cpdstfile=${2%/}/$(basename $1) + if [[ "$2" = '.' || "$2" = './' ]]; then + cpdstfile=${PWD:?}/$(basename "$1") + elif [ -d "$2" ]; then + cpdstfile=${2%/}/$(basename "$1") else cpdstfile=$2 fi - cp $1 $cpdstfile.cptmp + cp "$1" ${cpdstfile}.cptmp if [[ "$?" -ne "0" ]] ; then err_exit "$1 is missing or was not copied successfully." @@ -142,9 +142,9 @@ function cpfs() { # >&2 echo "WARNING: ${FSYNC} $cpdstfile.cptmp failed." #fi - mv $cpdstfile.cptmp $cpdstfile + mv ${cpdstfile}.cptmp ${cpdstfile} if [[ "$?" -ne "0" ]] ; then - err_exit "$cpdstfile.cptmp is missing or was not moved successfully." + err_exit "${cpdstfile}.cptmp is missing or was not moved successfully." fi } diff --git a/ush/forecast_postdet.sh b/ush/forecast_postdet.sh index c9018d438e7..9ce5aae19d7 100755 --- a/ush/forecast_postdet.sh +++ b/ush/forecast_postdet.sh @@ -108,8 +108,8 @@ FV3_postdet() { # Check for consistency # TODO: the checker has a --fatal option, which is not used here. This needs to be decided how to handle. if [[ "${CHECK_LAND_RESTART_OROG:-NO}" == "YES" ]]; then - if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then - ${USHgfs}/../exec/run_python.sh \ + if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then + "${USHgfs}/../exec/run_python.sh" \ "${USHgfs}/check_land_input_orography.py" \ --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" else diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index a2783fa011a..1a22b2e5c61 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -33,7 +33,7 @@ source "${USHgfs}/preamble.sh" source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" -if [[ "$MACHINE_ID" == "container" ]]; then +if [[ "${MACHINE_ID}" == "container" ]]; then source /usr/lmod/lmod/init/bash module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel From 9ff1c7e098aeecae8d74dd64982eaae78cc0bafa Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 16 Sep 2025 02:53:00 +0000 Subject: [PATCH 107/134] fixing lint shell script check --- dev/container/gen-run-cases.sh | 39 +++++++++---------- dev/container/shell-in-container.sh | 2 +- dev/container/utils/create-atmos-products.sh | 12 +++--- dev/container/utils/create-container-links.sh | 24 ++++++------ .../utils/exec.exglobal_atmos_products.sh | 6 +-- dev/container/utils/exec.python | 7 ++-- dev/container/utils/gen-wrapper.sh | 14 +++---- dev/container/utils/link_gfs_utils.sh | 23 ++++++----- dev/container/utils/link_model.sh | 28 +++++++------ dev/container/utils/link_ww3.sh | 21 +++++----- dev/container/utils/ush.python | 4 +- jobs/JGLOBAL_ATMOS_PRODUCTS | 3 -- scripts/exglobal_atmos_products.sh | 6 +-- 13 files changed, 89 insertions(+), 100 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index 285be926df4..0bff4a0e4ed 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -25,10 +25,10 @@ if [[ ${MACHINE_ID} = ursa* ]] ; then HPC_ACCOUNT=epic module load rocoto/1.3.7 - rocotocmd=`which rocotorun` + rocotocmd=$(command -v rocotorun) if [[ "${run_with_container}" == "YES" ]]; then - cp ${HOMEgfs}/env/URSA.env.container ${HOMEgfs}/env/URSA.env + cp "${HOMEgfs}/env/URSA.env.container" "${HOMEgfs}/env/URSA.env" fi elif [[ ${MACHINE_ID} = gaea* ]] ; then container=/gpfs/f6/scratch/Wei.Huang/container/${img} @@ -38,50 +38,49 @@ elif [[ ${MACHINE_ID} = gaea* ]] ; then rocotocmd=/autofs/ncrc-svm1_home2/Christopher.W.Harrop/rocoto-1.3.7/bin/rocotorun if [[ "${run_with_container}" == "YES" ]]; then - cp ${HOMEgfs}/env/GAEAC6.env.container ${HOMEgfs}/env/GAEAC6.env + cp "${HOMEgfs}/env/GAEAC6.env.container" "${HOMEgfs}/env/GAEAC6.env" fi elif [[ ${MACHINE_ID} = noaacloud* ]] ; then TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR - container=/contrib/container/${img} + container=/contrib/containers/${img} rundir=/lustre/${USER}/run - STMP=/lustre/${USER}/stmp bindings="--env \"I_MPI_FABRICS=ofi:shm,I_MPI_DEBUG=6\" -B /apps/slurm/default/lib/libpmi2.so -B /contrib -B /lustre -B /bucket" HPC_ACCOUNT=${USER} module load rocoto/1.3.7 - rocotocmd=`which rocotorun` + rocotocmd=$(command -v rocotorun) fi -mkdir -p ${rundir} +mkdir -p "${rundir}" -cd ${HOMEDIR}/dev/workflow +cd "${HOMEDIR}/dev/workflow" if [[ "${run_with_container}" == "YES" ]]; then - ${HOMEDIR}/dev/container/utils/gen-wrapper.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -v + "${HOMEDIR}/dev/container/utils/gen-wrapper.sh" -H "${HOMEDIR}" -c "${container}" -b "${bindings}" -v TOPICDIR=${TOPICDIR} \ RUNTESTS=${rundir} \ RUNDIRS=${rundir} \ ./generate_workflows.sh \ - -H ${HOMEDIR} \ - -y ${yamllist} \ - -Y ${HOMEDIR}/dev/ci/cases/${casetype} \ - -A ${HPC_ACCOUNT} \ + -H "${HOMEDIR}" \ + -y "${yamllist}" \ + -Y "${HOMEDIR}/dev/ci/cases/${casetype}" \ + -A "${HPC_ACCOUNT}" \ -e "${USER}@noaa.gov" \ - -r ${rocotocmd} \ + -r "${rocotocmd}" \ -v -R - ${HOMEDIR}/dev/container/utils/create-atmos-products.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" - ${HOMEDIR}/dev/container/utils/create-container-links.sh -H ${HOMEDIR} -c ${container} -b "${bindings}" -M ${MACHINE_ID} + "${HOMEDIR}/dev/container/utils/create-atmos-products.sh" -H "${HOMEDIR}" -c "${container}" -b "${bindings}" + "${HOMEDIR}/dev/container/utils/create-container-links.sh" -H "${HOMEDIR}" -c "${container}" -b "${bindings}" -M "${MACHINE_ID}" else TOPICDIR=${TOPICDIR} \ RUNTESTS=${rundir} \ RUNDIRS=${rundir} \ ./generate_workflows.sh \ - -H ${HOMEDIR} \ - -y ${yamllist} \ - -Y ${HOMEDIR}/dev/ci/cases/${casetype} \ - -A ${HPC_ACCOUNT} \ + -H "${HOMEDIR}" \ + -y "${yamllist}" \ + -Y "${HOMEDIR}/dev/ci/cases/${casetype}" \ + -A "${HPC_ACCOUNT}" \ -e "${USER}@noaa.gov" \ -v fi diff --git a/dev/container/shell-in-container.sh b/dev/container/shell-in-container.sh index 3cb6b0614b0..dab96be882e 100755 --- a/dev/container/shell-in-container.sh +++ b/dev/container/shell-in-container.sh @@ -17,5 +17,5 @@ elif [[ ${MACHINE_ID} = noaacloud* ]] ; then bindings="-B /contrib -B /lustre -B /bucket" fi -singularity shell -e ${bindings} ${img} +singularity shell -e "${bindings}" "${img}" diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh index 1f7e72f0760..8d6b7de0672 100755 --- a/dev/container/utils/create-atmos-products.sh +++ b/dev/container/utils/create-atmos-products.sh @@ -33,15 +33,15 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v bindings ]]; then fi if [[ "${verbose}" == "true" ]]; then - echo "Verbose: $verbose" - echo "HOMEgfs: $HOMEgfs" - echo "container: $container" - echo "bindings: $bindings" + echo "Verbose: ${verbose}" + echo "HOMEgfs: ${HOMEgfs}" + echo "container: ${container}" + echo "bindings: ${bindings}" fi sed -e "s?HOMEgfs?${HOMEgfs}?g" \ -e "s?SIF?${container}?g" \ -e "s?BINDINGS?${bindings}?g" \ - ${HOMEgfs}/dev/container/utils/exec.exglobal_atmos_products.sh > ${HOMEgfs}/exec/exglobal_atmos_products.sh - chmod +x ${HOMEgfs}/exec/exglobal_atmos_products.sh + "${HOMEgfs}/dev/container/utils/exec.exglobal_atmos_products.sh" > "${HOMEgfs}/exec/exglobal_atmos_products.sh" + chmod +x "${HOMEgfs}/exec/exglobal_atmos_products.sh" diff --git a/dev/container/utils/create-container-links.sh b/dev/container/utils/create-container-links.sh index 194477f1c30..808d1902b1c 100755 --- a/dev/container/utils/create-container-links.sh +++ b/dev/container/utils/create-container-links.sh @@ -37,20 +37,20 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v bindings || ! -v MACHINE_ID ]]; the fi if [[ "${verbose}" == "true" ]]; then - echo "HOMEgfs: $HOMEgfs" - echo "container: $container" - echo "bindings: $bindings" - echo "Verbose: $verbose" + echo "HOMEgfs: ${HOMEgfs}" + echo "container: ${container}" + echo "bindings: ${bindings}" + echo "Verbose: ${verbose}" fi -${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gfs -${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t sfs -${HOMEgfs}/dev/container/utils/link_ww3.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -t gefs +"${HOMEgfs}/dev/container/utils/link_ww3.sh" -H "${HOMEgfs}" -c "${container}" -b "${bindings}" -t gfs +"${HOMEgfs}/dev/container/utils/link_ww3.sh" -H "${HOMEgfs}" -c "${container}" -b "${bindings}" -t sfs +"${HOMEgfs}/dev/container/utils/link_ww3.sh" -H "${HOMEgfs}" -c "${container}" -b "${bindings}" -t gefs -${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m gfs_model -b "${bindings}" -M ${machineid} -${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m sfs_model -b "${bindings}" -M ${machineid} -${HOMEgfs}/dev/container/utils/link_model.sh -H ${HOMEgfs} -c ${container} -m gefs_model -b "${bindings}" -M ${machineid} +"${HOMEgfs}/dev/container/utils/link_model.sh" -H "${HOMEgfs}" -c "${container}" -m gfs_model -b "${bindings}" -M "${machineid}" +"${HOMEgfs}/dev/container/utils/link_model.sh" -H "${HOMEgfs}" -c "${container}" -m sfs_model -b "${bindings}" -M "${machineid}" +"${HOMEgfs}/dev/container/utils/link_model.sh" -H "${HOMEgfs}" -c "${container}" -m gefs_model -b "${bindings}" -M "${machineid}" -${HOMEgfs}/dev/container/utils/link_gfs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" -${HOMEgfs}/dev/container/utils/link_ufs_utils.sh -H ${HOMEgfs} -c ${container} -b "${bindings}" +"${HOMEgfs}/dev/container/utils/link_gfs_utils.sh" -H "${HOMEgfs}" -c "${container}" -b "${bindings}" +"${HOMEgfs}/dev/container/utils/link_ufs_utils.sh" -H "${HOMEgfs}" -c "${container}" -b "${bindings}" diff --git a/dev/container/utils/exec.exglobal_atmos_products.sh b/dev/container/utils/exec.exglobal_atmos_products.sh index b658528b2b5..89c158dad11 100755 --- a/dev/container/utils/exec.exglobal_atmos_products.sh +++ b/dev/container/utils/exec.exglobal_atmos_products.sh @@ -1,9 +1,9 @@ #!/bin/bash - export LD_LIBRARY_PATH=$(dirname SIF) - arg="$@" + LD_LIBRARY_PATH=$(dirname SIF) + export LD_LIBRARY_PATH singularity exec \ BINDINGS \ SIF \ - HOMEgfs/scripts/exglobal_atmos_products.sh $arg + "HOMEgfs/scripts/exglobal_atmos_products.sh" "$@" diff --git a/dev/container/utils/exec.python b/dev/container/utils/exec.python index f84f67fef34..fe1ac65e214 100644 --- a/dev/container/utils/exec.python +++ b/dev/container/utils/exec.python @@ -1,10 +1,9 @@ #!/bin/bash - arg="$@" - - export LD_LIBRARY_PATH=$(dirname SIF) + LD_LIBRARY_PATH=$(dirname SIF) + export LD_LIBRARY_PATH singularity exec \ BINDINGS \ SIF \ - HOMEgfs/ush/container/run_python.sh "${arg}" + HOMEgfs/ush/container/run_python.sh "$@" diff --git a/dev/container/utils/gen-wrapper.sh b/dev/container/utils/gen-wrapper.sh index 7b6aa9d72a1..a358c39b2af 100755 --- a/dev/container/utils/gen-wrapper.sh +++ b/dev/container/utils/gen-wrapper.sh @@ -28,32 +28,32 @@ while [[ "$#" -gt 0 ]]; do done if [[ ! -v HOMEgfs || ! -v container ]]; then - echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings "-B dirname [-B dirname1 [...]]" [-v]" + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings -B dirname [-B dirname1 [...]] [-v]" exit 11 fi -if [[ "$verbose" == "true" ]]; then +if [[ "${verbose}" == "true" ]]; then set -x fi for dnm in exec ush do - if [[ "$dnm" == "exec" ]]; then + if [[ "${dnm}" == "exec" ]]; then targetdir=${HOMEgfs}/${dnm} else targetdir=${HOMEgfs}/${dnm}/container fi - mkdir -p ${targetdir} + mkdir -p "${targetdir}" sourcef=${HOMEgfs}/dev/container/utils/${dnm}.python targetf=${targetdir}/run_python.sh sed -e "s?HOMEgfs?${HOMEgfs}?g" \ -e "s?SIF?${container}?g" \ -e "s?BINDINGS?${bindings}?g" \ - ${sourcef} > ${targetf} + "${sourcef}" > "${targetf}" - chmod 755 ${targetf} + chmod 755 "${targetf}" done -sed -i 's/RUN_WITH_CONTAINER=NO/RUN_WITH_CONTAINER=YES/g' ${HOMEgfs}/ush/preamble.sh +sed -i 's/RUN_WITH_CONTAINER=NO/RUN_WITH_CONTAINER=YES/g' "${HOMEgfs}/ush/preamble.sh" diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index 88211d93bfe..7ad278ed0d5 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -32,7 +32,7 @@ if [[ ! -v HOMEgfs || ! -v container ]]; then exit 11 fi -if [[ "$verbose" == "true" ]]; then +if [[ "${verbose}" == "true" ]]; then set -x fi @@ -46,20 +46,19 @@ do # echo "model: $model" run_model_script=${HOMEgfs}/ush/container/run_${model}.sh - rm -f ${run_model_script} + rm -f "${run_model_script}" - cat > $run_model_script << EOF_MODEL + cat > ${run_model_script} << EOF_MODEL #!/bin/bash source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" module load wgrib2/3.6.0 -arg="\$@" -${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${model}.x \$arg +${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${model}.x "\$@" EOF_MODEL - chmod 755 $run_model_script + chmod 755 ${run_model_script} #link_model_script=${HOMEgfs}/exec/${model} #rm -f ${link_model_script} @@ -69,20 +68,20 @@ EOF_MODEL cat > $link_model_script << EOF_LINK #!/bin/bash - export LD_LIBRARY_PATH=$(dirname $container) + export LD_LIBRARY_PATH=$(dirname ${container}) arg="\$@" - singularity exec ${bindings} ${container} ${run_model_script} \$arg + singularity exec "${bindings}" "${container}" "${run_model_script}" "\$@" EOF_LINK - chmod 755 $link_model_script + chmod 755 ${link_model_script} done for nm in ocnicepost do direct_model_script=${HOMEgfs}/exec/${nm}.x - rm -f ${direct_model_script} + rm -f "${direct_model_script}" - cat > $direct_model_script << EOF_DIRECT + cat > ${direct_model_script} << EOF_DIRECT #!/bin/bash source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" @@ -92,6 +91,6 @@ arg="\$@" ${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${nm}.x \$arg EOF_DIRECT - chmod 755 $direct_model_script + chmod 755 ${direct_model_script} done diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 9252ad9a481..f141632f260 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -43,20 +43,19 @@ if [[ ! -v HOMEgfs || ! -v container || ! -v model || ! -v MACHINE_ID ]]; then exit 11 fi -if [[ "$verbose" == "true" ]]; then +if [[ "${verbose}" == "true" ]]; then set -x fi run_model_script=${HOMEgfs}/ush/container/run_${model}.sh -rm -f ${run_model_script} +rm -f "${run_model_script}" -cat > $run_model_script << EOF_MODEL +cat > ${run_model_script} << EOF_MODEL #!/bin/bash source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" -arg="\$@" -${HOMEgfs}/sorc/ufs_model.fd/tests/${model}.x \$arg +${HOMEgfs}/sorc/ufs_model.fd/tests/${model}.x "\$@" EOF_MODEL link_model_script=${HOMEgfs}/exec/${model}.x @@ -64,7 +63,7 @@ rm -f ${link_model_script} case "${machineid}" in ursa) -cat > $link_model_script << EOF_URSA +cat > ${link_model_script} << EOF_URSA #!/bin/bash # --- MPI and Fabric Configuration --- @@ -82,20 +81,19 @@ export UCX_TLS=^sm,cma HOST_SLURM_PATH=/apps/slurm/default HOST_MPI_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2024.2.1-oqhstbmawnrsdw472p4pjsopj547o6xs/compiler/2024.2/opt/compiler - export LD_LIBRARY_PATH=$(dirname ${container}) - set +x - arg="\$@" + LD_LIBRARY_PATH=$(dirname ${container}) + export LD_LIBRARY_PATH singularity exec \\ --bind \${HOST_SLURM_PATH}:\${HOST_SLURM_PATH} \\ --bind \${HOST_MPI_PATH}:\${HOST_MPI_PATH} \\ ${bindings} \\ ${container} \\ - ${run_model_script} \$arg + ${run_model_script} "\$@" EOF_URSA ;; gaea*) -cat > $link_model_script << EOF_GAEA +cat > ${link_model_script} << EOF_GAEA #!/bin/bash #export SINGULARITY_ENABLE_OVERLAY=try #export SINGULARITY_DISABLE_OVERLAY=yes @@ -114,7 +112,7 @@ EOF_GAEA ;; noaacloud) -cat > $link_model_script << EOF_NOAACLOUD +cat > ${link_model_script} << EOF_NOAACLOUD #!/bin/bash #Need these lines on AWS to run more than one node. @@ -135,7 +133,7 @@ EOF_NOAACLOUD ;; *) -cat > $link_model_script << EOF_LINK +cat > ${link_model_script} << EOF_LINK #!/bin/bash export LD_LIBRARY_PATH=$(dirname ${container}) set +x @@ -149,6 +147,6 @@ EOF_LINK esac -chmod 755 $run_model_script -chmod 755 $link_model_script +chmod 755 ${run_model_script} +chmod 755 ${link_model_script} diff --git a/dev/container/utils/link_ww3.sh b/dev/container/utils/link_ww3.sh index 1fc255434ca..16347a8a835 100755 --- a/dev/container/utils/link_ww3.sh +++ b/dev/container/utils/link_ww3.sh @@ -33,11 +33,11 @@ done if [[ ! -v HOMEgfs || ! -v container || ! -v type ]]; then echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image \\" - " -b/--bindings -B dirname [-B dirname1 [...]] -t/--type [gfs|sfs|gefs] [-v]" + echo " -b/--bindings -B dirname [-B dirname1 [...]] -t/--type [gfs|sfs|gefs] [-v]" exit 11 fi -if [[ "$verbose" == "true" ]]; then +if [[ "${verbose}" == "true" ]]; then set -x fi @@ -55,7 +55,7 @@ do run_model_script=${HOMEgfs}/ush/container/run_${type}_${model}.sh rm -f ${run_model_script} - cat > $run_model_script << EOF_MODEL + cat > ${run_model_script} << EOF_MODEL #!/bin/bash # Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD @@ -66,22 +66,21 @@ module purge module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles module load gfsutils_container.intel -arg="\$@" -${HOMEgfs}/sorc/ufs_model.fd/WW3/install/${pdlib}/bin/${model} \$arg +${HOMEgfs}/sorc/ufs_model.fd/WW3/install/${pdlib}/bin/${model} "\$@" EOF_MODEL - chmod 755 $run_model_script + chmod 755 ${run_model_script} link_model_script=${HOMEgfs}/exec/${type}_${model}.x rm -f ${link_model_script} - cat > $link_model_script << EOF_LINK + cat > ${link_model_script} << EOF_LINK #!/bin/bash - export LD_LIBRARY_PATH=$(dirname $container) - arg="\$@" - singularity exec ${bindings} ${container} ${run_model_script} \$arg + LD_LIBRARY_PATH=$(dirname ${container}) + export LD_LIBRARY_PATH + singularity exec ${bindings} ${container} ${run_model_script} "\$@" EOF_LINK - chmod 755 $link_model_script + chmod 755 ${link_model_script} done diff --git a/dev/container/utils/ush.python b/dev/container/utils/ush.python index 448e92a42bf..ac0b87d629a 100644 --- a/dev/container/utils/ush.python +++ b/dev/container/utils/ush.python @@ -21,7 +21,5 @@ module load py-pyyaml wxflowPATH="HOMEgfs/ush/python:HOMEgfs/sorc/wxflow/src" export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}HOMEgfs/ush:${wxflowPATH}" -arg="$@" - -python ${arg} +python "$@" diff --git a/jobs/JGLOBAL_ATMOS_PRODUCTS b/jobs/JGLOBAL_ATMOS_PRODUCTS index c52d94f7246..6ed276a4e5a 100755 --- a/jobs/JGLOBAL_ATMOS_PRODUCTS +++ b/jobs/JGLOBAL_ATMOS_PRODUCTS @@ -45,8 +45,6 @@ if [[ -e "${pgmout}" ]]; then cat "${pgmout}" fi -exit 0 - ########################################## # Remove the Temporary working directory ########################################## @@ -55,5 +53,4 @@ if [[ "${KEEPDATA:-NO}" == "NO" ]]; then rm -rf "${DATA}" fi - exit 0 diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 3b08bcf415a..ef2bc4e689d 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -92,9 +92,9 @@ for (( nset=1 ; nset <= downset ; nset++ )); do tmpfile="tmpfile${grp}_${fhr3}" # shellcheck disable=SC2312 - #ncount=$(${WGRIB2} "${tmpfile}" | wc -l) - ${WGRIB2} "${tmpfile}" > wgrib2.log - ncount=$(cat wgrib2.log | wc -l) + ncount=$(${WGRIB2} "${tmpfile}" | wc -l) + #${WGRIB2} "${tmpfile}" > wgrib2.log + #ncount=$(cat wgrib2.log | wc -l) if [[ ${nproc} -gt ${ncount} ]]; then echo "WARNING: Total no. of available processors '${nproc}' exceeds no. of records '${ncount}' in ${tmpfile}" echo "Reduce nproc to ${ncount} (or less) to not waste resources" From 6d6d76acd7f1749ad5dc0000bae68f9a356dba0c Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 16 Sep 2025 14:13:52 +0000 Subject: [PATCH 108/134] making lint complaint --- dev/parm/config/gefs/config.resources | 1 - dev/ush/load_gw_run_modules.sh | 74 +++++++++++++-------------- dev/workflow/generate_workflows.sh | 17 ++++++ jobs/JGLOBAL_ARCHIVE_VRFY | 4 +- jobs/JGLOBAL_OCEANICE_PRODUCTS | 4 +- jobs/JGLOBAL_STAGE_IC | 2 +- modulefiles/gw_run.common.lua | 2 +- modulefiles/gw_run.container.lua | 24 +++++++++ ush/bash_utils.sh | 13 +++-- 9 files changed, 90 insertions(+), 51 deletions(-) create mode 100644 modulefiles/gw_run.container.lua diff --git a/dev/parm/config/gefs/config.resources b/dev/parm/config/gefs/config.resources index a8bcc1e36ae..2e27563e23b 100644 --- a/dev/parm/config/gefs/config.resources +++ b/dev/parm/config/gefs/config.resources @@ -24,7 +24,6 @@ case ${machine} in "HERCULES") max_tasks_per_node=80;; "GAEAC5") max_tasks_per_node=128;; "GAEAC6") max_tasks_per_node=192;; - "URSA") max_tasks_per_node=192;; "AWSPW") export PARTITION_BATCH="compute" max_tasks_per_node=48 diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index d8f8a3c096c..3560cd5b41e 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -21,8 +21,6 @@ source "${HOMEgfs}/ush/preamble.sh" source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" -echo "MACHINE_ID: ${MACHINE_ID}" - case "${MACHINE_ID}" in container) source /usr/lmod/lmod/init/bash @@ -56,45 +54,43 @@ export PYTHONPATH source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" -#if [[ "$RUN_WITH_CONTAINER" == "YES" ]]; then -if [[ "$MACHINE_ID" == "container" ]]; then - source /usr/lmod/lmod/init/bash - module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" - module load gfsutils_container.intel -else - # Source versions file for runtime - source "${HOMEgfs}/versions/run.ver" - - # Load our modules: - module use "${HOMEgfs}/modulefiles" - - case "${MACHINE_ID}" in - "wcoss2") - module load cray-pals - module load cfp - module load libjpeg - module load craype-network-ucx - module load cray-mpich-ucx - module load "gw_run.${MACHINE_ID}" - ;; - "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud" | "ursa") - module load "gw_run.${MACHINE_ID}" - export UTILROOT=${prod_util_ROOT} - ;; - *) - echo "WARNING: UNKNOWN PLATFORM" - ;; - esac - - export err=$? - if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: Failed to load gw_run.${MACHINE_ID}" - exit 1 - fi +# Source versions file for runtime +source "${HOMEgfs}/versions/run.ver" + +# Load our modules: +module use "${HOMEgfs}/modulefiles" + +case "${MACHINE_ID}" in + "wcoss2") + module load cray-pals + module load cfp + module load libjpeg + module load craype-network-ucx + module load cray-mpich-ucx + module load "gw_run.${MACHINE_ID}" + ;; + "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud" | "ursa") + module load "gw_run.${MACHINE_ID}" + export UTILROOT=${prod_util_ROOT} + ;; + "container") + source /usr/lmod/lmod/init/bash + module load "gw_run.${MACHINE_ID}" + export UTILROOT=${prod_util_ROOT} + ;; + *) + echo "WARNING: UNKNOWN PLATFORM" + ;; +esac + +export err=$? +if [[ ${err} -ne 0 ]]; then + echo "FATAL ERROR: Failed to load gw_run.${MACHINE_ID}" + exit 1 fi -module load wgrib2 -module load prod_util +# module load wgrib2 +# module load prod_util export WGRIB2=wgrib2 # Turn on our settings diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index 11537bb4791..0dd0407d89f 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -538,24 +538,37 @@ echo "Running create_experiment.py for ${#_yaml_list[@]} cases" if [[ "${_verbose}" == true ]]; then printf "Selected cases: %s\n\n" "${_yaml_list[*]}" fi +echo "$0 check part 1" for _case in "${_yaml_list[@]}"; do if [[ "${_verbose}" == false ]]; then echo "${_case}" fi _pslot="${_case}${_tag}" + echo "$0 check part 1.1" if [[ "${_run_with_container}" == "true" ]]; then if [[ "${_has_rocotorun}" == "true" ]]; then + echo "$0 check part 1.1.1" _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml -r ${_rocotorun_fullpath} --overwrite" + echo "$0 check part 1.1.2" else + echo "$0 check part 1.2.1" _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + echo "$0 check part 1.2.2" fi else + echo "$0 check part 1.3.1" _create_exp_cmd="./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" + echo "$0 check part 1.3.2" fi + echo "$0 check part 1.4" if [[ "${_verbose}" == true ]]; then + echo "$0 check part 1.4.1" pslot=${_pslot} RUNTESTS=${_runtests} ${_create_exp_cmd} + echo "$0 check part 1.4.1.2" else + echo "$0 check part 1.4.2" if ! pslot=${_pslot} RUNTESTS=${_runtests} ${_create_exp_cmd} 2> stderr 1> stdout; then + echo "$0 check part 1.4.2" _output=$(cat stdout stderr) _message="The create_experiment command (${_create_exp_cmd}) failed with a non-zero status. Output:" _message="${_message}"$'\n'"${_output}" @@ -566,9 +579,11 @@ for _case in "${_yaml_list[@]}"; do rm -f stdout stderr exit 12 fi + echo "$0 check part 1.4.3" rm -f stdout stderr fi + echo "$0 check part 1.5" # Check if DATAROOT is already present; eval will return just DATAROOT from the sourcing # shellcheck disable=SC2312 eval "$(PDY=0 cyc=0 source "${_runtests}/EXPDIR/${_pslot}/config.base" >& /dev/null; echo _dataroot="${STMP}/RUNDIRS/${_pslot}")" @@ -587,6 +602,7 @@ for _case in "${_yaml_list[@]}"; do fi fi + echo "$0 check part 1.6" # Check if this experiment is using cron or scron cron_file="${_runtests}/EXPDIR/${_pslot}/${_pslot}.crontab" scron_sh_file="${_runtests}/EXPDIR/${_pslot}/${_pslot}.scron.sh" @@ -612,6 +628,7 @@ for _case in "${_yaml_list[@]}"; do grep "${_pslot}" "${_runtests}/EXPDIR/${_pslot}/${_pslot}.crontab" >> tests.cron fi done +echo "$0 check part 2" echo # Update the cron diff --git a/jobs/JGLOBAL_ARCHIVE_VRFY b/jobs/JGLOBAL_ARCHIVE_VRFY index 0545498f47f..d9af5eb327b 100755 --- a/jobs/JGLOBAL_ARCHIVE_VRFY +++ b/jobs/JGLOBAL_ARCHIVE_VRFY @@ -24,8 +24,8 @@ done ############################################################### # Run archive script ############################################################### -if [ "$RUN_WITH_CONTAINER" == "YES" ]; then - ${HOMEgfs}/exec/run_python.sh ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} -c -v +if [ "${RUN_WITH_CONTAINER}" == "YES" ]; then + "${HOMEgfs}/exec/run_python.sh" ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} -c -v else ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} fi diff --git a/jobs/JGLOBAL_OCEANICE_PRODUCTS b/jobs/JGLOBAL_OCEANICE_PRODUCTS index 758f3975d9b..f0c4d4e8289 100755 --- a/jobs/JGLOBAL_OCEANICE_PRODUCTS +++ b/jobs/JGLOBAL_OCEANICE_PRODUCTS @@ -14,8 +14,8 @@ YMD="${PDY}" HH="${cyc}" declare_from_tmpl -rx "COMOUT_${COMPONENT^^}_NETCDF":"C ############################################################### # Run exglobal script -if [ "${RUN_WITH_CONTAINER}" == "YES" ]; then - "${HOMEgfs}/exec/run_python.sh" ${SCRgfs}/exglobal_oceanice_products.py -c -v +if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then + "${HOMEgfs}/exec/run_python.sh" "${SCRgfs}/exglobal_oceanice_products.py" -c -v else "${SCRgfs}/exglobal_oceanice_products.py" && true fi diff --git a/jobs/JGLOBAL_STAGE_IC b/jobs/JGLOBAL_STAGE_IC index 4dc9b018575..c20fd7564e8 100755 --- a/jobs/JGLOBAL_STAGE_IC +++ b/jobs/JGLOBAL_STAGE_IC @@ -3,7 +3,7 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "stage_ic" -c "base stage_ic" # Execute staging -if [[ "{$RUN_WITH_CONTAINER}" == "YES" ]]; then +if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then "${HOMEgfs}/exec/run_python.sh" "${SCRgfs}/exglobal_stage_ic.py" else "${SCRgfs}/exglobal_stage_ic.py" diff --git a/modulefiles/gw_run.common.lua b/modulefiles/gw_run.common.lua index 997466b5748..f60a98234b3 100644 --- a/modulefiles/gw_run.common.lua +++ b/modulefiles/gw_run.common.lua @@ -38,7 +38,7 @@ local common_modules = { "gsi-ncdiag", "crtm", "bufr", - --"wgrib2", temporarily disable wgrib2 until it is installed with ipolates + "wgrib2", "py-f90nml", "py-netcdf4", "py-pyyaml", diff --git a/modulefiles/gw_run.container.lua b/modulefiles/gw_run.container.lua new file mode 100644 index 00000000000..1fcb7e639ec --- /dev/null +++ b/modulefiles/gw_run.container.lua @@ -0,0 +1,24 @@ +help([[ +Load environment to run GFS on Ursa +]]) + +-- Test that HOMEgfs is set. +-- If not, load_gw_run_modules.sh was not sourced to load this module. +local homegfssdir=os.getenv("HOMEgfs") or "None" +if (homegfssdir == "None") then + LmodError("FATAL ERROR HOMEgfs variable is unset.\n" .. + "Please \"source ush/load_gw_run_modules.sh\" rather than loading this module directly.\n") +end + +load("gw_run.common") +load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) + +prepend_path("MODULEPATH", pathJoin("/scratch3/NCEPDEV/global/role.glopara/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) +load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) + +prepend_path("MODULEPATH", pathJoin("/scratch3/NCEPDEV/global/role.glopara/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) +load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) + +whatis("Description: GFS run environment") + +load(pathJoin("imagemagick", (os.getenv("imagemagick_ver") or "None"))) diff --git a/ush/bash_utils.sh b/ush/bash_utils.sh index fe24e8f7841..ab76b502a99 100755 --- a/ush/bash_utils.sh +++ b/ush/bash_utils.sh @@ -111,7 +111,7 @@ function wait_for_file() { } function cpreq() { - cp "$*" + cp $* if [[ "$?" -ne "0" ]]; then err_exit "'cp $*' was not successful." fi @@ -142,10 +142,13 @@ function cpfs() { # >&2 echo "WARNING: ${FSYNC} $cpdstfile.cptmp failed." #fi - mv ${cpdstfile}.cptmp ${cpdstfile} - if [[ "$?" -ne "0" ]] ; then - err_exit "${cpdstfile}.cptmp is missing or was not moved successfully." - fi + #mv "${cpdstfile}.cptmp" ${cpdstfile} + #if [[ "$?" -ne "0" ]] ; then + # err_exit "${cpdstfile}.cptmp is missing or was not moved successfully." + #fi + if ! mv "${cpdstfile}.cptmp" "${cpdstfile}"; then + err_exit "Failed to rename '${cpdstfile}.cptmp' to '${cpdstfile}'." + fi } # shellcheck disable= From d581da6319e5df1eb3ffedca1d238ca6cf40dc90 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 16 Sep 2025 15:03:57 +0000 Subject: [PATCH 109/134] making lint compliant --- dev/container/gen-run-cases.sh | 6 ++++- dev/container/utils/link_gfs_utils.sh | 21 ++++++++--------- dev/container/utils/link_model.sh | 34 +++++++++++++-------------- dev/container/utils/link_ufs_utils.sh | 29 +++++++++++------------ dev/container/utils/link_ww3.sh | 14 +++++------ dev/ush/load_gw_run_modules.sh | 4 ++-- jobs/JGLOBAL_ARCHIVE_VRFY | 4 ++-- ush/bash_utils.sh | 9 ++++--- 8 files changed, 61 insertions(+), 60 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index 0bff4a0e4ed..d6460dce4ad 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -53,7 +53,11 @@ fi mkdir -p "${rundir}" -cd "${HOMEDIR}/dev/workflow" +# cd "${HOMEDIR}/dev/workflow" || exit 1 +if ! cd "${HOMEDIR}/dev/workflow"; then + echo "Error: Could not change to the workflow directory. Aborting." >&2 + exit 1 +fi if [[ "${run_with_container}" == "YES" ]]; then "${HOMEDIR}/dev/container/utils/gen-wrapper.sh" -H "${HOMEDIR}" -c "${container}" -b "${bindings}" -v diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index 7ad278ed0d5..708ba573e6e 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -48,7 +48,7 @@ do run_model_script=${HOMEgfs}/ush/container/run_${model}.sh rm -f "${run_model_script}" - cat > ${run_model_script} << EOF_MODEL + cat > "${run_model_script}" << EOF_MODEL #!/bin/bash source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" @@ -58,22 +58,22 @@ module load wgrib2/3.6.0 ${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${model}.x "\$@" EOF_MODEL - chmod 755 ${run_model_script} + chmod 755 "${run_model_script}" #link_model_script=${HOMEgfs}/exec/${model} #rm -f ${link_model_script} link_model_script=${HOMEgfs}/exec/${model}.x - rm -f ${link_model_script} + rm -f "${link_model_script}" - cat > $link_model_script << EOF_LINK + cat > "${link_model_script}" << EOF_LINK #!/bin/bash - export LD_LIBRARY_PATH=$(dirname ${container}) - arg="\$@" + LD_LIBRARY_PATH=$(dirname ${container}) + export LD_LIBRARY_PATH singularity exec "${bindings}" "${container}" "${run_model_script}" "\$@" EOF_LINK - chmod 755 ${link_model_script} + chmod 755 "${link_model_script}" done for nm in ocnicepost @@ -81,16 +81,15 @@ do direct_model_script=${HOMEgfs}/exec/${nm}.x rm -f "${direct_model_script}" - cat > ${direct_model_script} << EOF_DIRECT + cat > "${direct_model_script}" << EOF_DIRECT #!/bin/bash source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" module load wgrib2/3.6.0 -arg="\$@" -${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${nm}.x \$arg +${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${nm}.x "\$@" EOF_DIRECT - chmod 755 ${direct_model_script} + chmod 755 "${direct_model_script}" done diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index f141632f260..c8e6b1d81b6 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -50,7 +50,7 @@ fi run_model_script=${HOMEgfs}/ush/container/run_${model}.sh rm -f "${run_model_script}" -cat > ${run_model_script} << EOF_MODEL +cat > "${run_model_script}" << EOF_MODEL #!/bin/bash source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" @@ -59,11 +59,11 @@ ${HOMEgfs}/sorc/ufs_model.fd/tests/${model}.x "\$@" EOF_MODEL link_model_script=${HOMEgfs}/exec/${model}.x -rm -f ${link_model_script} +rm -f "${link_model_script}" case "${machineid}" in ursa) -cat > ${link_model_script} << EOF_URSA +cat > "${link_model_script}" << EOF_URSA #!/bin/bash # --- MPI and Fabric Configuration --- @@ -93,7 +93,7 @@ EOF_URSA ;; gaea*) -cat > ${link_model_script} << EOF_GAEA +cat > "${link_model_script}" << EOF_GAEA #!/bin/bash #export SINGULARITY_ENABLE_OVERLAY=try #export SINGULARITY_DISABLE_OVERLAY=yes @@ -101,18 +101,18 @@ cat > ${link_model_script} << EOF_GAEA #export SINGULARITY_DEBUG=0 #unset SINGULARITY_DEBUG - export LD_LIBRARY_PATH=$(dirname ${container}) + LD_LIBRARY_PATH=$(dirname ${container}) + export LD_LIBRARY_PATH set +x - arg="\$@" singularity exec \\ ${bindings} \\ ${container} \\ - ${run_model_script} \$arg + ${run_model_script} "\$@" EOF_GAEA ;; noaacloud) -cat > ${link_model_script} << EOF_NOAACLOUD +cat > "${link_model_script}" << EOF_NOAACLOUD #!/bin/bash #Need these lines on AWS to run more than one node. @@ -122,31 +122,31 @@ cat > ${link_model_script} << EOF_NOAACLOUD export FI_PROVIDER=tcp export FI_TCP_IFACE=eth0 - export LD_LIBRARY_PATH=$(dirname ${container}) + LD_LIBRARY_PATH=$(dirname ${container}) + export LD_LIBRARY_PATH set +x - arg="\$@" singularity exec \\ ${bindings} \\ ${container} \\ - ${run_model_script} \$arg + ${run_model_script} "\$@" EOF_NOAACLOUD ;; *) -cat > ${link_model_script} << EOF_LINK +cat > "${link_model_script}" << EOF_LINK #!/bin/bash - export LD_LIBRARY_PATH=$(dirname ${container}) + LD_LIBRARY_PATH=$(dirname ${container}) + export LD_LIBRARY_PATH set +x - arg="\$@" singularity exec \\ ${bindings} \\ ${container} \\ - ${run_model_script} \$arg + ${run_model_script} "\$@" EOF_LINK ;; esac -chmod 755 ${run_model_script} -chmod 755 ${link_model_script} +chmod 755 "${run_model_script}" +chmod 755 "${link_model_script}" diff --git a/dev/container/utils/link_ufs_utils.sh b/dev/container/utils/link_ufs_utils.sh index a7e7c05defd..71ab6e86cd6 100755 --- a/dev/container/utils/link_ufs_utils.sh +++ b/dev/container/utils/link_ufs_utils.sh @@ -8,8 +8,8 @@ while [[ "$#" -gt 0 ]]; do HOMEgfs="$2" shift 2 ;; - -b|--binding) - binding="$2" + -b|--bindings) + bindings="$2" shift 2 ;; -c|--container) @@ -28,11 +28,11 @@ while [[ "$#" -gt 0 ]]; do done if [[ ! -v HOMEgfs || ! -v container ]]; then - echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings -B dirname [-B dirname1 [...]] [-v]" + echo "Usage: link_model.sh -H/-HOMEgfs gw-home-dir -c/--container full-path-container-image -b/--bindings [-v]" exit 11 fi -if [[ "$verbose" == "true" ]]; then +if [[ "${verbose}" == "true" ]]; then set -x fi @@ -41,9 +41,9 @@ do model=${nm} run_model_script=${HOMEgfs}/ush/container/run_${model}.sh - rm -f ${run_model_script} + rm -f "${run_model_script}" - cat > $run_model_script << EOF_MODEL + cat > "${run_model_script}" << EOF_MODEL #!/bin/bash source /usr/lmod/lmod/init/bash @@ -51,25 +51,24 @@ module purge module use ${HOMEgfs}/sorc/ufs_utils.fd/modulefiles module load build.container.intel -arg="\$@" -${HOMEgfs}/sorc/ufs_utils.fd/exec/${model} \$arg +${HOMEgfs}/sorc/ufs_utils.fd/exec/${model} "\$@" EOF_MODEL - chmod 755 $run_model_script + chmod 755 "${run_model_script}" #link_model_script=${HOMEgfs}/exec/${model} #rm -f ${link_model_script} link_model_script=${HOMEgfs}/exec/${model} - rm -f ${link_model_script} + rm -f "${link_model_script}" - cat > $link_model_script << EOF_LINK + cat > "${link_model_script}" << EOF_LINK #!/bin/bash - export LD_LIBRARY_PATH=$(dirname $container) - arg="\$@" - singularity exec ${bindings} ${container} ${run_model_script} \$arg + LD_LIBRARY_PATH=$(dirname $container) + export LD_LIBRARY_PATH + singularity exec ${bindings} ${container} ${run_model_script} "\$@" EOF_LINK - chmod 755 $link_model_script + chmod 755 "$link_model_script" done diff --git a/dev/container/utils/link_ww3.sh b/dev/container/utils/link_ww3.sh index 16347a8a835..bbc357aa799 100755 --- a/dev/container/utils/link_ww3.sh +++ b/dev/container/utils/link_ww3.sh @@ -53,9 +53,9 @@ do #echo "model: $model" run_model_script=${HOMEgfs}/ush/container/run_${type}_${model}.sh - rm -f ${run_model_script} + rm -f "${run_model_script}" - cat > ${run_model_script} << EOF_MODEL + cat > "${run_model_script}" << EOF_MODEL #!/bin/bash # Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD @@ -69,18 +69,18 @@ module load gfsutils_container.intel ${HOMEgfs}/sorc/ufs_model.fd/WW3/install/${pdlib}/bin/${model} "\$@" EOF_MODEL - chmod 755 ${run_model_script} + chmod 755 "${run_model_script}" link_model_script=${HOMEgfs}/exec/${type}_${model}.x - rm -f ${link_model_script} + rm -f "${link_model_script}" - cat > ${link_model_script} << EOF_LINK + cat > "${link_model_script}" << EOF_LINK #!/bin/bash - LD_LIBRARY_PATH=$(dirname ${container}) + LD_LIBRARY_PATH=$(dirname "${container}") export LD_LIBRARY_PATH singularity exec ${bindings} ${container} ${run_model_script} "\$@" EOF_LINK - chmod 755 ${link_model_script} + chmod 755 "${link_model_script}" done diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index 3560cd5b41e..126a21bd803 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -31,8 +31,8 @@ case "${MACHINE_ID}" in esac module purge -module use ${HOMEgfs}/sorc/ufs_model.fd/modulefiles -module load ufs_${MACHINE_ID}.intel +module use "${HOMEgfs}/sorc/ufs_model.fd/modulefiles" +module load "ufs_${MACHINE_ID}.intel" # If this function exists in the environment, run it; else set -x if it was set on entering this script ftype=$(type -t set_trace || echo "") diff --git a/jobs/JGLOBAL_ARCHIVE_VRFY b/jobs/JGLOBAL_ARCHIVE_VRFY index d9af5eb327b..453bd3177eb 100755 --- a/jobs/JGLOBAL_ARCHIVE_VRFY +++ b/jobs/JGLOBAL_ARCHIVE_VRFY @@ -24,8 +24,8 @@ done ############################################################### # Run archive script ############################################################### -if [ "${RUN_WITH_CONTAINER}" == "YES" ]; then - "${HOMEgfs}/exec/run_python.sh" ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} -c -v +if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then + "${HOMEgfs}/exec/run_python.sh" "${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py}" -c -v else ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} fi diff --git a/ush/bash_utils.sh b/ush/bash_utils.sh index ab76b502a99..763e8844917 100755 --- a/ush/bash_utils.sh +++ b/ush/bash_utils.sh @@ -111,9 +111,8 @@ function wait_for_file() { } function cpreq() { - cp $* - if [[ "$?" -ne "0" ]]; then - err_exit "'cp $*' was not successful." + if ! cp "$@"; then + err_exit "The copy $@ operation failed." fi } @@ -125,13 +124,13 @@ function cpfs() { if [[ "$2" = '.' || "$2" = './' ]]; then cpdstfile=${PWD:?}/$(basename "$1") - elif [ -d "$2" ]; then + elif [[ -d "$2" ]]; then cpdstfile=${2%/}/$(basename "$1") else cpdstfile=$2 fi - cp "$1" ${cpdstfile}.cptmp + cp "$1" "${cpdstfile}.cptmp" if [[ "$?" -ne "0" ]] ; then err_exit "$1 is missing or was not copied successfully." From c193688287bbb7e4afd4eb6023df5e29d576ae1e Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 16 Sep 2025 17:09:10 +0000 Subject: [PATCH 110/134] making lint compliant --- dev/container/utils/link_gfs_utils.sh | 15 ++++++--------- dev/container/utils/link_model.sh | 8 ++++---- dev/container/utils/link_ufs_utils.sh | 4 ++-- dev/workflow/generate_workflows.sh | 17 ----------------- ush/bash_utils.sh | 6 ++---- 5 files changed, 14 insertions(+), 36 deletions(-) diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index 708ba573e6e..72397fa551b 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -68,7 +68,7 @@ EOF_MODEL cat > "${link_model_script}" << EOF_LINK #!/bin/bash - LD_LIBRARY_PATH=$(dirname ${container}) + LD_LIBRARY_PATH=$(dirname "${container}") export LD_LIBRARY_PATH singularity exec "${bindings}" "${container}" "${run_model_script}" "\$@" EOF_LINK @@ -76,20 +76,17 @@ EOF_LINK chmod 755 "${link_model_script}" done -for nm in ocnicepost -do - direct_model_script=${HOMEgfs}/exec/${nm}.x - rm -f "${direct_model_script}" +direct_model_script=${HOMEgfs}/exec/ocnicepost.x +rm -f "${direct_model_script}" - cat > "${direct_model_script}" << EOF_DIRECT +cat > "${direct_model_script}" << EOF_DIRECT #!/bin/bash source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" module load wgrib2/3.6.0 -${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${nm}.x "\$@" +${HOMEgfs}/sorc/gfs_utils.fd/install/bin/ocnicepost.x "\$@" EOF_DIRECT - chmod 755 "${direct_model_script}" -done +chmod 755 "${direct_model_script}" diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index c8e6b1d81b6..8972ff38e74 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -81,7 +81,7 @@ export UCX_TLS=^sm,cma HOST_SLURM_PATH=/apps/slurm/default HOST_MPI_PATH=/apps/spack-2024-12/linux-rocky9-x86_64/gcc-11.4.1/intel-oneapi-compilers-2024.2.1-oqhstbmawnrsdw472p4pjsopj547o6xs/compiler/2024.2/opt/compiler - LD_LIBRARY_PATH=$(dirname ${container}) + LD_LIBRARY_PATH=$(dirname "${container}") export LD_LIBRARY_PATH singularity exec \\ --bind \${HOST_SLURM_PATH}:\${HOST_SLURM_PATH} \\ @@ -101,7 +101,7 @@ cat > "${link_model_script}" << EOF_GAEA #export SINGULARITY_DEBUG=0 #unset SINGULARITY_DEBUG - LD_LIBRARY_PATH=$(dirname ${container}) + LD_LIBRARY_PATH=$(dirname "${container}") export LD_LIBRARY_PATH set +x singularity exec \\ @@ -122,7 +122,7 @@ cat > "${link_model_script}" << EOF_NOAACLOUD export FI_PROVIDER=tcp export FI_TCP_IFACE=eth0 - LD_LIBRARY_PATH=$(dirname ${container}) + LD_LIBRARY_PATH=$(dirname "${container}") export LD_LIBRARY_PATH set +x singularity exec \\ @@ -135,7 +135,7 @@ EOF_NOAACLOUD *) cat > "${link_model_script}" << EOF_LINK #!/bin/bash - LD_LIBRARY_PATH=$(dirname ${container}) + LD_LIBRARY_PATH=$(dirname "${container}") export LD_LIBRARY_PATH set +x singularity exec \\ diff --git a/dev/container/utils/link_ufs_utils.sh b/dev/container/utils/link_ufs_utils.sh index 71ab6e86cd6..e7b5be9fd1e 100755 --- a/dev/container/utils/link_ufs_utils.sh +++ b/dev/container/utils/link_ufs_utils.sh @@ -64,11 +64,11 @@ EOF_MODEL cat > "${link_model_script}" << EOF_LINK #!/bin/bash - LD_LIBRARY_PATH=$(dirname $container) + LD_LIBRARY_PATH=$(dirname "${container}") export LD_LIBRARY_PATH singularity exec ${bindings} ${container} ${run_model_script} "\$@" EOF_LINK - chmod 755 "$link_model_script" + chmod 755 "${link_model_script}" done diff --git a/dev/workflow/generate_workflows.sh b/dev/workflow/generate_workflows.sh index 0dd0407d89f..11537bb4791 100755 --- a/dev/workflow/generate_workflows.sh +++ b/dev/workflow/generate_workflows.sh @@ -538,37 +538,24 @@ echo "Running create_experiment.py for ${#_yaml_list[@]} cases" if [[ "${_verbose}" == true ]]; then printf "Selected cases: %s\n\n" "${_yaml_list[*]}" fi -echo "$0 check part 1" for _case in "${_yaml_list[@]}"; do if [[ "${_verbose}" == false ]]; then echo "${_case}" fi _pslot="${_case}${_tag}" - echo "$0 check part 1.1" if [[ "${_run_with_container}" == "true" ]]; then if [[ "${_has_rocotorun}" == "true" ]]; then - echo "$0 check part 1.1.1" _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml -r ${_rocotorun_fullpath} --overwrite" - echo "$0 check part 1.1.2" else - echo "$0 check part 1.2.1" _create_exp_cmd="../../exec/run_python.sh ./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" - echo "$0 check part 1.2.2" fi else - echo "$0 check part 1.3.1" _create_exp_cmd="./create_experiment.py -y ${_yaml_dir}/${_case}.yaml --overwrite" - echo "$0 check part 1.3.2" fi - echo "$0 check part 1.4" if [[ "${_verbose}" == true ]]; then - echo "$0 check part 1.4.1" pslot=${_pslot} RUNTESTS=${_runtests} ${_create_exp_cmd} - echo "$0 check part 1.4.1.2" else - echo "$0 check part 1.4.2" if ! pslot=${_pslot} RUNTESTS=${_runtests} ${_create_exp_cmd} 2> stderr 1> stdout; then - echo "$0 check part 1.4.2" _output=$(cat stdout stderr) _message="The create_experiment command (${_create_exp_cmd}) failed with a non-zero status. Output:" _message="${_message}"$'\n'"${_output}" @@ -579,11 +566,9 @@ for _case in "${_yaml_list[@]}"; do rm -f stdout stderr exit 12 fi - echo "$0 check part 1.4.3" rm -f stdout stderr fi - echo "$0 check part 1.5" # Check if DATAROOT is already present; eval will return just DATAROOT from the sourcing # shellcheck disable=SC2312 eval "$(PDY=0 cyc=0 source "${_runtests}/EXPDIR/${_pslot}/config.base" >& /dev/null; echo _dataroot="${STMP}/RUNDIRS/${_pslot}")" @@ -602,7 +587,6 @@ for _case in "${_yaml_list[@]}"; do fi fi - echo "$0 check part 1.6" # Check if this experiment is using cron or scron cron_file="${_runtests}/EXPDIR/${_pslot}/${_pslot}.crontab" scron_sh_file="${_runtests}/EXPDIR/${_pslot}/${_pslot}.scron.sh" @@ -628,7 +612,6 @@ for _case in "${_yaml_list[@]}"; do grep "${_pslot}" "${_runtests}/EXPDIR/${_pslot}/${_pslot}.crontab" >> tests.cron fi done -echo "$0 check part 2" echo # Update the cron diff --git a/ush/bash_utils.sh b/ush/bash_utils.sh index 763e8844917..4432e5e384d 100755 --- a/ush/bash_utils.sh +++ b/ush/bash_utils.sh @@ -112,7 +112,7 @@ function wait_for_file() { function cpreq() { if ! cp "$@"; then - err_exit "The copy $@ operation failed." + err_exit "The copy $* operation failed." fi } @@ -130,9 +130,7 @@ function cpfs() { cpdstfile=$2 fi - cp "$1" "${cpdstfile}.cptmp" - - if [[ "$?" -ne "0" ]] ; then + if ! cp "$1" "${cpdstfile}.cptmp"; then err_exit "$1 is missing or was not copied successfully." fi From d96672a050c1294e8aebe4081bc1c924947ddbf0 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 17 Sep 2025 13:40:11 +0000 Subject: [PATCH 111/134] update container forecast to use module in container --- dev/container/utils/link_model.sh | 4 +++- dev/ush/load_gw_run_modules.sh | 23 +++++------------------ ush/forecast_det.sh | 6 ++++-- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/dev/container/utils/link_model.sh b/dev/container/utils/link_model.sh index 8972ff38e74..4dbd65a388a 100755 --- a/dev/container/utils/link_model.sh +++ b/dev/container/utils/link_model.sh @@ -53,7 +53,9 @@ rm -f "${run_model_script}" cat > "${run_model_script}" << EOF_MODEL #!/bin/bash -source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" +source /usr/lmod/lmod/init/bash +module use "${HOMEgfs}/sorc/ufs_model.fd/modulefiles" +module load ufs_container.intel ${HOMEgfs}/sorc/ufs_model.fd/tests/${model}.x "\$@" EOF_MODEL diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index 126a21bd803..59672040dd3 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -18,22 +18,13 @@ ulimit_s=$( ulimit -S -s ) source "${HOMEgfs}/ush/preamble.sh" # Find module command and purge: -source "${HOMEgfs}/ush/detect_machine.sh" +#if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then +# MACHINE_ID=container +#else + source "${HOMEgfs}/ush/detect_machine.sh" +#fi source "${HOMEgfs}/ush/module-setup.sh" -case "${MACHINE_ID}" in - container) - source /usr/lmod/lmod/init/bash - ;; - *) - # source /apps/lmod/lmod/init/bash - ;; -esac - -module purge -module use "${HOMEgfs}/sorc/ufs_model.fd/modulefiles" -module load "ufs_${MACHINE_ID}.intel" - # If this function exists in the environment, run it; else set -x if it was set on entering this script ftype=$(type -t set_trace || echo "") if [[ "${ftype}" == "function" ]]; then @@ -51,12 +42,8 @@ if [[ -d "${HOMEgfs}/sorc/wxflow/src" ]]; then fi export PYTHONPATH -source "${HOMEgfs}/ush/detect_machine.sh" -source "${HOMEgfs}/ush/module-setup.sh" - # Source versions file for runtime source "${HOMEgfs}/versions/run.ver" - # Load our modules: module use "${HOMEgfs}/modulefiles" diff --git a/ush/forecast_det.sh b/ush/forecast_det.sh index 6fa1d127e6f..e18b2f3d59c 100755 --- a/ush/forecast_det.sh +++ b/ush/forecast_det.sh @@ -17,8 +17,10 @@ UFS_det(){ # Ensure cold start ICs are present when warm start is not set # TODO: add checks for other cold start ICs as well if [[ ! -f "${COMIN_ATMOS_INPUT}/gfs_ctrl.nc" ]]; then - echo "FATAL ERROR: Cold start ICs are missing from '${COMIN_ATMOS_INPUT}'" - exit 1 + if [[ ! -L "${COMIN_ATMOS_INPUT}/gfs_ctrl.nc" ]]; then + echo "FATAL ERROR: Cold start ICs are missing from '${COMIN_ATMOS_INPUT}'" + exit 1 + fi fi # Since warm start is false, we cannot do IAU From ff08841e52e20d64c4da3dd90751f13e5168ca5b Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 17 Sep 2025 20:39:26 +0000 Subject: [PATCH 112/134] re-test on ursa --- dev/container/shell-in-container.sh | 2 +- dev/container/utils/link_gfs_utils.sh | 6 ++++-- dev/ush/load_gw_run_modules.sh | 9 +++++++-- modulefiles/gw_run.container.lua | 12 ++++++------ versions/run.container.ver | 3 +-- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/dev/container/shell-in-container.sh b/dev/container/shell-in-container.sh index dab96be882e..9d2a0a4d1f3 100755 --- a/dev/container/shell-in-container.sh +++ b/dev/container/shell-in-container.sh @@ -17,5 +17,5 @@ elif [[ ${MACHINE_ID} = noaacloud* ]] ; then bindings="-B /contrib -B /lustre -B /bucket" fi -singularity shell -e "${bindings}" "${img}" +singularity shell -e ${bindings} "${img}" diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index 72397fa551b..2d6b741c5e8 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -51,8 +51,10 @@ do cat > "${run_model_script}" << EOF_MODEL #!/bin/bash -source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" - +#source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" +source /usr/lmod/lmod/init/bash +module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" +module load gfsutils_container.intel module load wgrib2/3.6.0 ${HOMEgfs}/sorc/gfs_utils.fd/install/bin/${model}.x "\$@" diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index 59672040dd3..d0a501eea85 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -43,7 +43,7 @@ fi export PYTHONPATH # Source versions file for runtime -source "${HOMEgfs}/versions/run.ver" +source "${HOMEgfs}/versions/run.${MACHINE_ID}.ver" # Load our modules: module use "${HOMEgfs}/modulefiles" @@ -62,7 +62,12 @@ case "${MACHINE_ID}" in ;; "container") source /usr/lmod/lmod/init/bash - module load "gw_run.${MACHINE_ID}" + #source "${HOMEgfs}/versions/run.container.ver" + #module use "${HOMEgfs}/modulefiles" + #module load gw_run.container + module purge + module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" + module load gfsutils_container.intel export UTILROOT=${prod_util_ROOT} ;; *) diff --git a/modulefiles/gw_run.container.lua b/modulefiles/gw_run.container.lua index 1fcb7e639ec..55c3e479f3e 100644 --- a/modulefiles/gw_run.container.lua +++ b/modulefiles/gw_run.container.lua @@ -13,12 +13,12 @@ end load("gw_run.common") load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) -prepend_path("MODULEPATH", pathJoin("/scratch3/NCEPDEV/global/role.glopara/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) -load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) +-- prepend_path("MODULEPATH", pathJoin("/scratch3/NCEPDEV/global/role.glopara/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) +-- load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) +-- +-- prepend_path("MODULEPATH", pathJoin("/scratch3/NCEPDEV/global/role.glopara/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) +-- load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) -prepend_path("MODULEPATH", pathJoin("/scratch3/NCEPDEV/global/role.glopara/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) -load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) +-- load(pathJoin("imagemagick", (os.getenv("imagemagick_ver") or "None"))) whatis("Description: GFS run environment") - -load(pathJoin("imagemagick", (os.getenv("imagemagick_ver") or "None"))) diff --git a/versions/run.container.ver b/versions/run.container.ver index 06a0cdd3bb7..698ea5adac5 100644 --- a/versions/run.container.ver +++ b/versions/run.container.ver @@ -9,7 +9,6 @@ export spack_stack_root="/opt/spack-stack//spack-stack-1.9.2" source "${HOMEgfs:-}/versions/spack.ver" -export cdo_ver=2.3.0 export perl_ver=5.38.0 export mkl_ver=2024.2.1 -export imagemagick_ver=7.1.1-11 +#export imagemagick_ver=7.1.1-11 From 66bb7ca77b8624def2106fa553fb202f12af5dc4 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 18 Sep 2025 00:34:59 +0000 Subject: [PATCH 113/134] check on aws --- dev/ush/load_gw_run_modules.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index d0a501eea85..9a2ea8aaf41 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -68,7 +68,6 @@ case "${MACHINE_ID}" in module purge module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel - export UTILROOT=${prod_util_ROOT} ;; *) echo "WARNING: UNKNOWN PLATFORM" @@ -81,8 +80,8 @@ if [[ ${err} -ne 0 ]]; then exit 1 fi -# module load wgrib2 -# module load prod_util +module load wgrib2 +module load prod_util export WGRIB2=wgrib2 # Turn on our settings From 2e8c33261a1ca3e45c9ce3e36b15394b0f286045 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 29 Sep 2025 17:12:25 +0000 Subject: [PATCH 114/134] re-merge on aws --- modulefiles/gw_run.noaacloud.lua | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modulefiles/gw_run.noaacloud.lua b/modulefiles/gw_run.noaacloud.lua index 4a8dd932128..41f29dd2053 100644 --- a/modulefiles/gw_run.noaacloud.lua +++ b/modulefiles/gw_run.noaacloud.lua @@ -7,7 +7,7 @@ Load environment to run GFS on NOAA cloud local homegfssdir=os.getenv("HOMEgfs") or "None" if (homegfssdir == "None") then LmodError("FATAL ERROR HOMEgfs variable is unset.\n" .. - "Please \"source ush/load_gw_run_modules.sh\" rather than loading this module directly.\n") + "Please \"source dev/ush/load_gw_run_modules.sh\" rather than loading this module directly.\n") end load(pathJoin("perl", (os.getenv("perl_ver") or "None"))) @@ -15,12 +15,12 @@ load(pathJoin("mkl", (os.getenv("mkl_ver") or "None"))) load("gw_run.common") -prepend_path("MODULEPATH", pathJoin("/contrib/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) -load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) +-- prepend_path("MODULEPATH", pathJoin("/contrib/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) +-- load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) -prepend_path("MODULEPATH", pathJoin("/contrib/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) -load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) +-- prepend_path("MODULEPATH", pathJoin("/contrib/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) +-- load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) -load(pathJoin("imagemagick", (os.getenv("imagemagick_ver") or "None"))) +-- load(pathJoin("imagemagick", (os.getenv("imagemagick_ver") or "None"))) whatis("Description: GFS run environment") From 59c88971bd28d948f7c172eebee3fa8c45ee25ed Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 29 Sep 2025 21:26:33 +0000 Subject: [PATCH 115/134] save local change --- .gitmodules | 28 +++++++++++++++------------- dev/workflow/create_experiment.py | 6 +++--- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/.gitmodules b/.gitmodules index 531a5e306e2..ea22e018408 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,33 +1,35 @@ [submodule "sorc/ufs_model.fd"] path = sorc/ufs_model.fd - #url = https://github.com/ufs-community/ufs-weather-model - url = https://github.com/NOAA-EPIC/ufs-weather-model.git - branch = feature/use_container_spack-stack-1.9.2 + url = https://github.com/ufs-community/ufs-weather-model [submodule "sorc/wxflow"] path = sorc/wxflow url = https://github.com/NOAA-EMC/wxflow [submodule "sorc/gfs_utils.fd"] path = sorc/gfs_utils.fd - #url = https://github.com/NOAA-EMC/gfs-utils - url = https://github.com/NOAA-EPIC/gfs-utils.git - branch = feature/use_container_spack-stack-1.9.2 + url = https://github.com/NOAA-EMC/gfs-utils [submodule "sorc/ufs_utils.fd"] path = sorc/ufs_utils.fd - #url = https://github.com/ufs-community/UFS_UTILS.git - url = https://github.com/NOAA-EPIC/UFS_UTILS-cloud.git - branch = feature/use_container_spack-stack-1.9.2 + url = https://github.com/ufs-community/UFS_UTILS.git [submodule "sorc/verif-global.fd"] path = sorc/verif-global.fd url = https://github.com/NOAA-EMC/EMC_verif-global.git [submodule "sorc/gsi_enkf.fd"] path = sorc/gsi_enkf.fd - url = https://github.com/NOAA-EMC/GSI.git + #url = https://github.com/NOAA-EMC/GSI.git + url = https://github.com/NOAA-EPIC/GSI-cloud.git + branch = feature/use_container_spack-stack-1.9.2 [submodule "sorc/gdas.cd"] path = sorc/gdas.cd - url = https://github.com/NOAA-EMC/GDASApp.git + #url = https://github.com/NOAA-EMC/GDASApp.git + url = https://github.com/NOAA-EPIC/GDASApp-cloud.git + branch = feature/use_container_spack-stack-1.9.2 [submodule "sorc/gsi_utils.fd"] path = sorc/gsi_utils.fd - url = https://github.com/NOAA-EMC/GSI-Utils.git + #url = https://github.com/NOAA-EMC/GSI-Utils.git + url = https://github.com/NOAA-EPIC/GSI-utils-cloud.git + branch = feature/use_container_spack-stack-1.9.2 [submodule "sorc/gsi_monitor.fd"] path = sorc/gsi_monitor.fd - url = https://github.com/NOAA-EMC/GSI-Monitor.git + #url = https://github.com/NOAA-EMC/GSI-Monitor.git + url = https://github.com/NOAA-EPIC/GSI-Monitor-cloud.git + branch = feature/use_container_spack-stack-1.9.2 diff --git a/dev/workflow/create_experiment.py b/dev/workflow/create_experiment.py index 67779f604a7..156d4e5d73d 100755 --- a/dev/workflow/create_experiment.py +++ b/dev/workflow/create_experiment.py @@ -134,9 +134,9 @@ def input_args(): setup_workflow_args.append("--force") if user_inputs.rocotorun is not None: - setup_xml_args.append("--rocotorun") - setup_xml_args.append(user_inputs.rocotorun) + setup_workflow_args.append("--rocotorun") + setup_workflow_args.append(user_inputs.rocotorun) logger.info(f"Call: setup_workflow.main()") logger.debug(f"setup_workflow.py {' '.join(setup_workflow_args)}") - setup_xml.main(setup_xml_args) + setup_workflow.main(setup_workflow_args) From 04ebd5b494d64e4aef54f3054a225c917d5d3957 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 30 Sep 2025 00:19:02 +0000 Subject: [PATCH 116/134] remove versions for container, and no host modules for container --- dev/container/utils/link_gfs_utils.sh | 6 +++++- dev/ush/load_gw_run_modules.sh | 23 ++++++++++------------- env/URSA.env | 6 ++++++ modulefiles/gw_run.container.lua | 24 ------------------------ modulefiles/gw_setup.container.lua | 21 --------------------- versions/build.container.ver | 13 ------------- versions/run.container.ver | 14 -------------- 7 files changed, 21 insertions(+), 86 deletions(-) delete mode 100644 modulefiles/gw_run.container.lua delete mode 100644 modulefiles/gw_setup.container.lua delete mode 100644 versions/build.container.ver delete mode 100644 versions/run.container.ver diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index 2d6b741c5e8..d74af04c2c2 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -84,7 +84,11 @@ rm -f "${direct_model_script}" cat > "${direct_model_script}" << EOF_DIRECT #!/bin/bash -source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" +#source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" +source /usr/lmod/lmod/init/bash +module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" +module load gfsutils_container.intel +module load wgrib2/3.6.0 module load wgrib2/3.6.0 ${HOMEgfs}/sorc/gfs_utils.fd/install/bin/ocnicepost.x "\$@" diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index 9a2ea8aaf41..ff956313b09 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -17,12 +17,7 @@ ulimit_s=$( ulimit -S -s ) source "${HOMEgfs}/ush/preamble.sh" -# Find module command and purge: -#if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then -# MACHINE_ID=container -#else - source "${HOMEgfs}/ush/detect_machine.sh" -#fi +source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" # If this function exists in the environment, run it; else set -x if it was set on entering this script @@ -42,13 +37,14 @@ if [[ -d "${HOMEgfs}/sorc/wxflow/src" ]]; then fi export PYTHONPATH -# Source versions file for runtime -source "${HOMEgfs}/versions/run.${MACHINE_ID}.ver" -# Load our modules: -module use "${HOMEgfs}/modulefiles" +echo "MACHINE_ID: ${MACHINE_ID}" case "${MACHINE_ID}" in "wcoss2") + # Source versions file for runtime + source "${HOMEgfs}/versions/run.${MACHINE_ID}.ver" + # Load our modules: + module use "${HOMEgfs}/modulefiles" module load cray-pals module load cfp module load libjpeg @@ -57,14 +53,15 @@ case "${MACHINE_ID}" in module load "gw_run.${MACHINE_ID}" ;; "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud" | "ursa") + # Source versions file for runtime + source "${HOMEgfs}/versions/run.${MACHINE_ID}.ver" + # Load our modules: + module use "${HOMEgfs}/modulefiles" module load "gw_run.${MACHINE_ID}" export UTILROOT=${prod_util_ROOT} ;; "container") source /usr/lmod/lmod/init/bash - #source "${HOMEgfs}/versions/run.container.ver" - #module use "${HOMEgfs}/modulefiles" - #module load gw_run.container module purge module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel diff --git a/env/URSA.env b/env/URSA.env index b908defb4a0..247f0515e77 100644 --- a/env/URSA.env +++ b/env/URSA.env @@ -242,6 +242,12 @@ elif [[ "${step}" = "eupd" ]]; then export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then + # fast + #export launcher="srun -l --hint=nomultithread --distribution=block:block" + # slow3 + #export launcher="srun -l --export=ALL --hint=nomultithread" + export launcher="srun --mpi=pmi2 -l --hint=nomultithread" + #export launcher="srun env -u SLURM_NODELIST" (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) diff --git a/modulefiles/gw_run.container.lua b/modulefiles/gw_run.container.lua deleted file mode 100644 index 55c3e479f3e..00000000000 --- a/modulefiles/gw_run.container.lua +++ /dev/null @@ -1,24 +0,0 @@ -help([[ -Load environment to run GFS on Ursa -]]) - --- Test that HOMEgfs is set. --- If not, load_gw_run_modules.sh was not sourced to load this module. -local homegfssdir=os.getenv("HOMEgfs") or "None" -if (homegfssdir == "None") then - LmodError("FATAL ERROR HOMEgfs variable is unset.\n" .. - "Please \"source ush/load_gw_run_modules.sh\" rather than loading this module directly.\n") -end - -load("gw_run.common") -load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) - --- prepend_path("MODULEPATH", pathJoin("/scratch3/NCEPDEV/global/role.glopara/git/prepobs/v" .. (os.getenv("prepobs_run_ver") or "None"), "modulefiles")) --- load(pathJoin("prepobs", (os.getenv("prepobs_run_ver") or "None"))) --- --- prepend_path("MODULEPATH", pathJoin("/scratch3/NCEPDEV/global/role.glopara/git/Fit2Obs/v" .. (os.getenv("fit2obs_ver") or "None"), "modulefiles")) --- load(pathJoin("fit2obs", (os.getenv("fit2obs_ver") or "None"))) - --- load(pathJoin("imagemagick", (os.getenv("imagemagick_ver") or "None"))) - -whatis("Description: GFS run environment") diff --git a/modulefiles/gw_setup.container.lua b/modulefiles/gw_setup.container.lua deleted file mode 100644 index 2af6e33ac67..00000000000 --- a/modulefiles/gw_setup.container.lua +++ /dev/null @@ -1,21 +0,0 @@ -help([[ -Load environment to run GFS workflow setup scripts on container -]]) - --- load("rocoto") - -prepend_path("MODULEPATH", "/opt/spack-stack/spack-stack-1.9.2/envs/unified-env/install/modulefiles/Core") - -local stack_oneapi_ver=os.getenv("stack_oneapi_ver") or "2024.2.0" -local stack_mpi_ver=os.getenv("stack_mpi_ver") or "2021.13" -local cmake_ver=os.getenv("cmake_ver") or "3.27.9" - -load(pathJoin("stack-oneapi", stack_oneapi_ver)) -load(pathJoin("stack-intel-oneapi-mpi", stack_mpi_ver)) -load(pathJoin("cmake", cmake_ver)) - -load("py-jinja2") -load("py-pyyaml") -load("py-numpy") - -whatis("Description: GFS run setup environment") diff --git a/versions/build.container.ver b/versions/build.container.ver deleted file mode 100644 index ef5795a25a4..00000000000 --- a/versions/build.container.ver +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash -export stack_compiler=oneapi -export stack_compiler_ver=2024.2.0 - -export stack_mpi=intel-oneapi-mpi -export stack_mpi_ver=2021.13 - -# gnu_ver set to override 14.2.0 default set in UFS-WM noaacloud module file -export gnu_ver=13.2.0 - -export spack_stack_root="/opt/spack-stack/spack-stack-1.9.2" - -source "${HOMEgfs:-}/versions/spack.ver" diff --git a/versions/run.container.ver b/versions/run.container.ver deleted file mode 100644 index 698ea5adac5..00000000000 --- a/versions/run.container.ver +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash -export stack_compiler=oneapi -export stack_compiler_ver=2024.2.0 - -export stack_mpi=intel-oneapi-mpi -export stack_mpi_ver=2021.13 - -export spack_stack_root="/opt/spack-stack//spack-stack-1.9.2" - -source "${HOMEgfs:-}/versions/spack.ver" - -export perl_ver=5.38.0 -export mkl_ver=2024.2.1 -#export imagemagick_ver=7.1.1-11 From 4fc82655363b805bf2d0374650a8ff20391fcdcc Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 30 Sep 2025 16:35:04 +0000 Subject: [PATCH 117/134] add rocotorun to rocoto_parser --- dev/workflow/setup_workflow.py | 2 ++ sorc/link_workflow.sh | 23 ++--------------------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/dev/workflow/setup_workflow.py b/dev/workflow/setup_workflow.py index e2c7f754b3f..47d45cb58ff 100755 --- a/dev/workflow/setup_workflow.py +++ b/dev/workflow/setup_workflow.py @@ -55,6 +55,8 @@ def input_args(*argv): default=25, required=False) rocoto_parser.add_argument('--verbosity', help='verbosity level of Rocoto', type=int, default=10, required=False) + rocoto_parser.add_argument('--rocotorun', help='rocotorun fullpath', type=str, + default=None, required=False) # EcFlow subparser ecflow_parser = subparsers.add_parser('ecflow', diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index 0d6eed92442..6e1d6c6a42e 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -231,12 +231,8 @@ fi #--add GDASApp parm directory #------------------------------ if [[ -d "${HOMEgfs}/sorc/gdas.cd" ]]; then - cd "${HOMEgfs}/parm" || exit 1 - if [[ ! -d gdas ]]; then - mkdir -p gdas - fi - cd gdas || exit 1 - declare -a gdasapp_comps=("aero" "atm" "io" "ioda" "snow" "marine" "jcb-gdas" "jcb-algorithms" "anlstat" "analcalc") + cd "${HOMEgfs}/parm/gdas" || exit 1 + declare -a gdasapp_comps=("aero" "atm" "io" "ioda" "snow" "soca" "jcb-gdas" "jcb-algorithms" "stat") for comp in "${gdasapp_comps[@]}"; do if [[ -d "${comp}" ]]; then rm -rf "${comp}" @@ -245,21 +241,6 @@ if [[ -d "${HOMEgfs}/sorc/gdas.cd" ]]; then done fi -#------------------------------ -#--add SPOC parm and ush directory -#------------------------------ -sources=("config" "scripts") -targets=("parm/gdas" "ush") -for i in "${!sources[@]}"; do - src="${HOMEgfs}/sorc/gdas.cd/sorc/spoc/dump/${sources[${i}]}" - dst="${HOMEgfs}/${targets[${i}]}" - - if [[ -d "${src}" ]]; then - cd "${dst}" || exit 1 - ${LINK_OR_COPY} "${src}" "spoc" - fi -done - #------------------------------ #--add GDASApp files #------------------------------ From c0d0154e63d058016b647a7069a04d97c51425cf Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 1 Oct 2025 13:31:39 +0000 Subject: [PATCH 118/134] remove unused dir --- .../atmos_products/exglobal_atmos_products.sh | 314 ------------------ 1 file changed, 314 deletions(-) delete mode 100755 dev/container/atmos_products/exglobal_atmos_products.sh diff --git a/dev/container/atmos_products/exglobal_atmos_products.sh b/dev/container/atmos_products/exglobal_atmos_products.sh deleted file mode 100755 index 78f15fdd67d..00000000000 --- a/dev/container/atmos_products/exglobal_atmos_products.sh +++ /dev/null @@ -1,314 +0,0 @@ -#! /usr/bin/env bash - -source /usr/lmod/lmod/init/bash -module use ${HOMEgfs}/sorc/gfs_utils.fd/modulefiles -module load gfsutils_container.intel -module load wgrib2/2.0.8 -module load prod_util/2.1.1 -module list - -export I_MPI_TMPDIR=/tmp -export PBS_ENVIRONMENT="" -export LSB_JOBID=0 -export I_MPI_HYDRA_RMK=slurm -export LOADL_HOSTFILE="" -export PE_HOSTFILE="" -export I_MPI_YARN=no -export NB_PARALLEL_JOB_HOSTS="" - -#source /opt/intel/oneapi/setvars.sh --force -#export OCL_ICD_FILENAMES="" - -# Programs used -export UTILROOT=${prod_util_ROOT} -#export WGRIB2=${wgrib2_ROOT}/bin/wgrib2 -export WGRIB2=wgrib2 - -# Scripts used -INTERP_ATMOS_MASTERSH=${USHgfs}/container/interp_atmos_master.sh -INTERP_ATMOS_SFLUXSH=${USHgfs}/container/interp_atmos_sflux.sh - -# Variables used in this job -downset=${downset:-1} # No. of groups of pressure grib2 products to create -ntasks_atmos_products=${ntasks_atmos_products:-8} # no. of processors available to process each group - -# WGNE related options -WGNE=${WGNE:-NO} # Create WGNE products -FHMAX_WGNE=${FHMAX_WGNE:-0} # WGNE products are created for first FHMAX_WGNE forecast hours (except 0) - -cd "${DATA}" || exit 1 - -# Set paramlist files based on FORECAST_HOUR (-1, 0, 3, 6, etc.) -# Determine if supplemental products (PGBS) (1-degree and 1/2-degree) should be generated -if [[ ${FORECAST_HOUR} -le 0 ]]; then - if [[ ${FORECAST_HOUR} -lt 0 ]]; then - fhr3="anl" - paramlista="${paramlista_anl}" - FLXGF="NO" - elif [[ ${FORECAST_HOUR} == 0 ]]; then - fhr3=$(printf "f%03d" "${FORECAST_HOUR}") - paramlista="${paramlista_f000}" - fi - PGBS="YES" -else - fhr3=$(printf "f%03d" "${FORECAST_HOUR}") - if (( FORECAST_HOUR%FHOUT_PGBS == 0 )); then - PGBS="YES" - fi -fi - -#----------------------------------------------------- -# Section creating pressure grib2 interpolated products - -# Files needed by ${INTERP_ATMOS_MASTERSH} -MASTER_FILE="${COMIN_ATMOS_MASTER}/${PREFIX}master.grb2${fhr3}" - -# Get inventory from ${MASTER_FILE} that matches patterns from ${paramlista} -# Extract this inventory from ${MASTER_FILE} into a smaller tmpfile or tmpfileb based on paramlista or paramlistb -# shellcheck disable=SC2312 -${WGRIB2} "${MASTER_FILE}" | grep -F -f "${paramlista}" | ${WGRIB2} -i -grib "tmpfile_${fhr3}" "${MASTER_FILE}" && true -export err=$? -if [[ ${err} -ne 0 ]]; then - err_exit "wgrib2 failed to create intermediate grib2 file from ${MASTER_FILE} using ${paramlista}" -fi - -# Do the same as above for ${paramlistb} -if [[ ${downset} -eq 2 ]]; then - # shellcheck disable=SC2312 - ${WGRIB2} "${MASTER_FILE}" | grep -F -f "${paramlistb}" | ${WGRIB2} -i -grib "tmpfileb_${fhr3}" "${MASTER_FILE}" && true - export err=$? - if [[ ${err} -ne 0 ]]; then - err_exit "wgrib2 failed to create intermediate grib2 file from ${MASTER_FILE} using ${paramlistb}" - fi -fi - -# Determine grids once and save them as a string and an array for processing -grid_string="0p25" -if [[ "${PGBS:-}" == "YES" ]]; then - grid_string="${grid_string}:0p50:1p00" -else - echo "Supplemental product generation is disable for fhr = ${fhr3}" - PGBS="NO" # Can't generate supplemental products if PGBS is not YES -fi -# Also transform the ${grid_string} into an array for processing -IFS=':' read -ra grids <<< "${grid_string}" - -for (( nset=1 ; nset <= downset ; nset++ )); do - - echo "Begin processing nset = ${nset}" - - # Number of processors available to process $nset - nproc=${ntasks} - - # Each set represents a group of files - if [[ ${nset} == 1 ]]; then - grp="" # TODO: this should be "a" when we eventually rename the pressure grib2 files per EE2 convention - elif [[ ${nset} == 2 ]]; then - grp="b" - fi - - # process grib2 chunkfiles to interpolate using MPMD - tmpfile="tmpfile${grp}_${fhr3}" - - # shellcheck disable=SC2312 - ncount=$(${WGRIB2} "${tmpfile}" | wc -l) - if [[ ${nproc} -gt ${ncount} ]]; then - echo "WARNING: Total no. of available processors '${nproc}' exceeds no. of records '${ncount}' in ${tmpfile}" - echo "Reduce nproc to ${ncount} (or less) to not waste resources" - fi - inv=$(( ncount / nproc )) - rm -f "${DATA}/poescript" - - last=0 - for (( iproc = 1 ; iproc <= nproc ; iproc++ )); do - first=$((last + 1)) - last=$((last + inv)) - if [[ ${last} -gt ${ncount} ]]; then - last=${ncount} - fi - - # if final record of is u-component, add next record v-component - # if final record is land, add next record icec - # grep returns 1 if no match is found, so temporarily turn off exit on non-zero rc - set +e - # shellcheck disable=SC2312 - ${WGRIB2} -d "${last}" "${tmpfile}" | grep -E -i "ugrd|ustm|uflx|u-gwd|land|maxuw" - rc=$? - set_strict - if [[ ${rc} == 0 ]]; then # Matched the grep - last=$(( last + 1 )) - fi - if [[ ${iproc} -eq ${nproc} ]]; then - last=${ncount} - fi - - # Break tmpfile into processor specific chunks in preparation for MPMD - ${WGRIB2} "${tmpfile}" -for "${first}":"${last}" -grib "${tmpfile}_${iproc}" && true - export err=$? - if [[ ${err} -ne 0 ]]; then - err_exit "wgrib2 failed to geneate an intermediate grib2 file from ${tmpfile} records ${first} to ${last}" - fi - input_file="${tmpfile}_${iproc}" - output_file_prefix="pgb2${grp}file_${fhr3}_${iproc}" - echo "${INTERP_ATMOS_MASTERSH} ${input_file} ${output_file_prefix} ${grid_string}" >> "${DATA}/poescript" - - # if at final record and have not reached the final processor then write echo's to - # poescript for remaining processors - if [[ ${last} -eq ${ncount} ]]; then - for (( pproc = iproc+1 ; pproc < nproc ; pproc++ )); do - echo "/bin/echo ${pproc}" >> "${DATA}/poescript" - done - break - fi - done # for (( iproc = 1 ; iproc <= nproc ; iproc++ )); do - - # Run with MPMD or serial - "${USHgfs}/container/run_mpmd.sh" "${DATA}/poescript" && true - export err=$? - if [[ ${err} -ne 0 ]]; then - err_exit "Some or all interpolations of the master grib file failed during MPMD execution!" - fi - - # We are in a loop over downset, save output from mpmd into nset specific output - mv mpmd.out "mpmd_${nset}.out" - - # Concatenate grib files from each processor into a single one - # and clean-up as you go - echo "Concatenating processor-specific grib2 files into a single product file" - for (( iproc = 1 ; iproc <= nproc ; iproc++ )); do - for grid in "${grids[@]}"; do - cat "pgb2${grp}file_${fhr3}_${iproc}_${grid}" >> "pgb2${grp}file_${fhr3}_${grid}" - rm -f "pgb2${grp}file_${fhr3}_${iproc}_${grid}" - done - # There is no further use of the processor specific tmpfile; delete it - rm -f "${tmpfile}_${iproc}" - done - - # Move to COM and index the product grib files - for grid in "${grids[@]}"; do - prod_dir="COMOUT_ATMOS_GRIB_${grid}" - cpfs "pgb2${grp}file_${fhr3}_${grid}" "${!prod_dir}/${PREFIX}pgrb2${grp}.${grid}.${fhr3}" - ${WGRIB2} -s "pgb2${grp}file_${fhr3}_${grid}" > "${!prod_dir}/${PREFIX}pgrb2${grp}.${grid}.${fhr3}.idx" - done - - echo "Finished processing nset = ${nset}" - -done # for (( nset=1 ; nset <= downset ; nset++ )) - -#--------------------------------------------------------------- - -# Create the index file for the sflux master, if it exists. -FLUX_FILE="${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" -if [[ -s "${FLUX_FILE}" ]]; then - ${WGRIB2} -s "${FLUX_FILE}" > "${FLUX_FILE}.idx" -fi - -# Section creating slfux grib2 interpolated products -# Create 1-degree sflux grib2 output -# move to COM and index it -if [[ "${FLXGF:-}" == "YES" ]]; then - - # Files needed by ${INTERP_ATMOS_SFLUXSH} - input_file="${FLUX_FILE}" - output_file_prefix="sflux_${fhr3}" - grid_string="1p00" - "${INTERP_ATMOS_SFLUXSH}" "${input_file}" "${output_file_prefix}" "${grid_string}" && true - export err=$? - if [[ ${err} -ne 0 ]]; then - err_exit "Unable to interpolate the surface flux grib2 files!" - fi - - # Move to COM and index the product sflux file - IFS=':' read -ra grids <<< "${grid_string}" - for grid in "${grids[@]}"; do - prod_dir="COMOUT_ATMOS_GRIB_${grid}" - cpfs "sflux_${fhr3}_${grid}" "${!prod_dir}/${PREFIX}flux.${grid}.${fhr3}" - ${WGRIB2} -s "sflux_${fhr3}_${grid}" > "${!prod_dir}/${PREFIX}flux.${grid}.${fhr3}.idx" - done -fi - -# Section creating 0.25 degree WGNE products for nset=1, and fhr <= FHMAX_WGNE -if [[ "${WGNE:-}" == "YES" ]]; then - grp="" # TODO: this should be "a" when we eventually rename the pressure grib2 files per EE2 convention - if [[ ${FORECAST_HOUR} -gt 0 && ${FORECAST_HOUR} -le ${FHMAX_WGNE} ]]; then - # TODO: 597 is the message number for APCP in GFSv16. GFSv17 may change this as more messages are added. This can be controlled via config.atmos_products - ${WGRIB2} "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2${grp}.0p25.${fhr3}" -d "${APCP_MSG:-597}" -grib "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}wgne.${fhr3}" - fi -fi - -#--------------------------------------------------------------- - -# Start sending DBN alerts -# Everything below this line is for sending files to DBN (SENDDBN=YES) -if [[ "${SENDDBN:-}" == "YES" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P25" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2.0p25.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P25_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2.0p25.${fhr3}.idx" - if [[ "${RUN}" == "gfs" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P25" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2b.0p25.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P25_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}pgrb2b.0p25.${fhr3}.idx" - if [[ -s "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2.0p50.${fhr3}" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P5" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2.0p50.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_0P5_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2.0p50.${fhr3}.idx" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P5" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2b.0p50.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_0P5_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_0p50}/${PREFIX}pgrb2b.0p50.${fhr3}.idx" - fi - if [[ -s "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_1P0" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2_1P0_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}.idx" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_1P0" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2b.1p00.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB2B_1P0_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2b.1p00.${fhr3}.idx" - fi - if [[ "${WGNE:-}" == "YES" && -s "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}wgne.${fhr3}" ]] ; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_WGNE" "${job}" "${COMOUT_ATMOS_GRIB_0p25}/${PREFIX}wgne.${fhr3}" - fi - fi - - if [[ "${fhr3}" == "anl" ]]; then - - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_MSC_sfcanl" "${job}" "${COMIN_ATMOS_ANALYSIS}/${PREFIX}sfc${fhr3}.nc" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SA" "${job}" "${COMIN_ATMOS_ANALYSIS}/${PREFIX}atm${fhr3}.nc" - - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGA_GB2" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGA_GB2_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}.idx" - - else # forecast hours f000, f003, f006, etc. - - if [[ "${RUN}" == "gdas" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB_GB2" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_PGB_GB2_WIDX" "${job}" "${COMOUT_ATMOS_GRIB_1p00}/${PREFIX}pgrb2.1p00.${fhr3}.idx" - if (( FORECAST_HOUR % 3 == 0 )); then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}atm${fhr3}.nc" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_BF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}sfc${fhr3}.nc" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2_WIDX" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2.idx" - fi - elif [[ "${RUN}" == "gfs" ]]; then - - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}atm${fhr3}.nc" - if [[ ${fhr} -gt 0 && ${fhr} -le 84 || ${fhr} -eq 120 ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_BF" "${job}" "${COMIN_ATMOS_HISTORY}/${PREFIX}sfc${fhr3}.nc" - fi - - if [[ -s "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrbf${fhr3}.grib2" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2_WIDX" "${job}" "${COMIN_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2.idx" - fi - elif [[ "${RUN}" == "gcafs" ]]; then - - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SF" "${job}" "${COM_ATMOS_HISTORY}/${PREFIX}atm${fhr3}.nc" - if [[ ${fhr} -gt 0 && ${fhr} -le 84 || ${fhr} == 120 ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_BF" "${job}" "${COM_ATMOS_HISTORY}/${PREFIX}sfc${fhr3}.nc" - fi - - if [[ -s "${COM_ATMOS_MASTER}/${PREFIX}sfluxgrbf${fhr3}.grib2" ]]; then - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2" "${job}" "${COM_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2" - "${DBNROOT}/bin/dbn_alert" MODEL "${RUN^^}_SGB_GB2_WIDX" "${job}" "${COM_ATMOS_MASTER}/${PREFIX}sfluxgrb${fhr3}.grib2.idx" - fi - fi - - - fi # end if fhr3=anl - -fi # end if SENDDBN=YES - -exit 0 From 3b6524497dceef8c56ccddb2ec27fcd15e49efca Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 1 Oct 2025 16:26:01 +0000 Subject: [PATCH 119/134] working on aws --- dev/parm/config/gefs/config.resources.AWSPW | 37 -- dev/parm/config/gefs/config.resources.URSA | 1 - dev/ush/load_gw_run_modules.sh | 130 ++++--- dev/ush/load_ufswm_modules.sh | 5 + env/GAEAC6.env | 2 +- env/GAEAC6.env.container | 362 -------------------- ush/detect_machine.sh | 2 +- ush/load_fv3gfs_modules.sh | 35 -- ush/module-setup.sh | 6 - ush/run_mpmd.sh | 66 +++- 10 files changed, 133 insertions(+), 513 deletions(-) delete mode 100644 dev/parm/config/gefs/config.resources.AWSPW delete mode 120000 dev/parm/config/gefs/config.resources.URSA delete mode 100755 env/GAEAC6.env.container delete mode 100755 ush/load_fv3gfs_modules.sh diff --git a/dev/parm/config/gefs/config.resources.AWSPW b/dev/parm/config/gefs/config.resources.AWSPW deleted file mode 100644 index 3b5a37a88d1..00000000000 --- a/dev/parm/config/gefs/config.resources.AWSPW +++ /dev/null @@ -1,37 +0,0 @@ -#! /usr/bin/env bash -# shellcheck disable=SC2034 - -# AWS-specific job resources - -export is_exclusive="True" -unset memory -unset "memory_${RUN}" - -step=$1 - -case ${step} in - "fcst" | "efcs" | "wavepostbndpnt" | "wavepostpnt") - export PARTITION_BATCH="compute" - unset PARTITION_SERVICE - max_tasks_per_node=48 - tasks_per_node=48 - ;; - - "atmos_products") - export PARTITION_BATCH="highmemory" - unset PARTITION_SERVICE - max_tasks_per_node=24 - tasks_per_node=24 - ;; - - *) - export PARTITION_BATCH="process" - unset PARTITION_SERVICE - max_tasks_per_node=24 - tasks_per_node=24 - ;; - -esac - -export max_tasks_per_node -export tasks_per_node diff --git a/dev/parm/config/gefs/config.resources.URSA b/dev/parm/config/gefs/config.resources.URSA deleted file mode 120000 index 6d3d16eda14..00000000000 --- a/dev/parm/config/gefs/config.resources.URSA +++ /dev/null @@ -1 +0,0 @@ -../gfs/config.resources.URSA \ No newline at end of file diff --git a/dev/ush/load_gw_run_modules.sh b/dev/ush/load_gw_run_modules.sh index ff956313b09..9fac00ba580 100755 --- a/dev/ush/load_gw_run_modules.sh +++ b/dev/ush/load_gw_run_modules.sh @@ -15,11 +15,75 @@ fi # Setup runtime environment by loading modules ulimit_s=$( ulimit -S -s ) +# Test if HOMEgfs is defined. If not, then try to determine it with git rev-parse +_unset_homegfs="NO" +if [[ -z ${HOMEgfs+x} ]]; then + echo "INFO HOMEgfs is not defined. Attempting to find the global-workflow root directory" + # HOMEgfs will be removed from the environment at the end of this script + _unset_homegfs="YES" + + script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + HOMEgfs=$(cd "${script_dir}" && git rev-parse --show-toplevel) + export HOMEgfs + err=$? + if [[ ${err} -ne 0 ]]; then + is_git_dir=$( cd -- "${script_dir}" &> /dev/null && git rev-parse --is-inside-work-tree) + git_stat=$? + if [[ ${git_stat} -ne 0 || ${is_git_dir} != "true" ]]; then + echo "FATAL ERROR unable to determine the root because it is not a git repository." + else + echo "FATAL ERROR unable to determine the root because git rev-parse --show-toplevel failed for an unknown reason" + fi + echo " Unable to load modules. Exiting" + exit 1 + fi +fi + source "${HOMEgfs}/ush/preamble.sh" +# Find module command and purge: source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" +# Source versions file for runtime +if [[ -f "${HOMEgfs}/versions/run.ver" ]]; then + source "${HOMEgfs}/versions/run.ver" +else + echo "FATAL ERROR ${HOMEgfs}/versions/run.ver does not exist!" + echo "HINT: Run link_workflow.sh first." + exit 1 +fi + +# Load our modules: +module use "${HOMEgfs}/modulefiles" + +case "${MACHINE_ID}" in +"wcoss2" | "ursa" | "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud") + target_module="gw_run.${MACHINE_ID}" + module load "${target_module}" + export err=$? + if [[ ${err} -ne 0 ]]; then + echo "FATAL ERROR: Failed to load ${target_module}" + exit 1 + fi + ;; +"container") + source /usr/lmod/lmod/init/bash + module purge + module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" + module load gfsutils_container.intel + module load wgrib2 + module load prod_util + ;; +*) + echo "WARNING: UNKNOWN PLATFORM" + ;; +esac + +module list + +export WGRIB2=wgrib2 + # If this function exists in the environment, run it; else set -x if it was set on entering this script ftype=$(type -t set_trace || echo "") if [[ "${ftype}" == "function" ]]; then @@ -28,68 +92,20 @@ elif [[ "${set_x}" == "YES" ]]; then set -x fi -# Add wxflow to PYTHONPATH -wxflowPATH="${HOMEgfs}/ush/python" -PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" # Set up the PYTHONPATH to include wxflow from HOMEgfs if [[ -d "${HOMEgfs}/sorc/wxflow/src" ]]; then - PYTHONPATH="${HOMEgfs}/sorc/wxflow/src${PYTHONPATH:+:${PYTHONPATH}}" + PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/sorc/wxflow/src" fi -export PYTHONPATH - -echo "MACHINE_ID: ${MACHINE_ID}" - -case "${MACHINE_ID}" in - "wcoss2") - # Source versions file for runtime - source "${HOMEgfs}/versions/run.${MACHINE_ID}.ver" - # Load our modules: - module use "${HOMEgfs}/modulefiles" - module load cray-pals - module load cfp - module load libjpeg - module load craype-network-ucx - module load cray-mpich-ucx - module load "gw_run.${MACHINE_ID}" - ;; - "hera" | "orion" | "hercules" | "gaeac5" | "gaeac6" | "noaacloud" | "ursa") - # Source versions file for runtime - source "${HOMEgfs}/versions/run.${MACHINE_ID}.ver" - # Load our modules: - module use "${HOMEgfs}/modulefiles" - module load "gw_run.${MACHINE_ID}" - export UTILROOT=${prod_util_ROOT} - ;; - "container") - source /usr/lmod/lmod/init/bash - module purge - module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" - module load gfsutils_container.intel - ;; - *) - echo "WARNING: UNKNOWN PLATFORM" - ;; -esac - -export err=$? -if [[ ${err} -ne 0 ]]; then - echo "FATAL ERROR: Failed to load gw_run.${MACHINE_ID}" - exit 1 -fi - -module load wgrib2 -module load prod_util -export WGRIB2=wgrib2 -# Turn on our settings -export SHELLOPTS -declare -xf set_strict -declare -xf set_trace -declare -xf postamble -declare -xf err_exit -set_strict -set_trace +# Add HOMEgfs/ush/python to PYTHONPATH +PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush/python" +export PYTHONPATH # Restore stack soft limit: ulimit -S -s "${ulimit_s}" unset ulimit_s + +# Unset HOMEgfs if it was not set at the beginning of this script +if [[ ${_unset_homegfs} == "YES" ]]; then + unset HOMEgfs +fi diff --git a/dev/ush/load_ufswm_modules.sh b/dev/ush/load_ufswm_modules.sh index 1bc824e717c..309df31018f 100755 --- a/dev/ush/load_ufswm_modules.sh +++ b/dev/ush/load_ufswm_modules.sh @@ -31,7 +31,12 @@ if [[ "${MACHINE_ID}" = "wcoss2" ]]; then module load wgrib2 else if [[ "${MACHINE_ID}" = "container" ]]; then + source /usr/lmod/lmod/init/bash + module purge + module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" + module load gfsutils_container.intel module load wgrib2 + module load prod_util else source "${HOMEgfs}/versions/run.ver" module load "wgrib2/${wgrib2_ver}" diff --git a/env/GAEAC6.env b/env/GAEAC6.env index a068506b665..324b98b1b1d 100755 --- a/env/GAEAC6.env +++ b/env/GAEAC6.env @@ -9,7 +9,7 @@ fi step=$1 -export launcher="srun -l --export=ALL" +export launcher="srun -l --export=ALL --hint=nomultithread --distribution=block:block" export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" export OMP_STACKSIZE=2048000 diff --git a/env/GAEAC6.env.container b/env/GAEAC6.env.container deleted file mode 100755 index af829086ceb..00000000000 --- a/env/GAEAC6.env.container +++ /dev/null @@ -1,362 +0,0 @@ -#! /usr/bin/env bash - -if [[ $# -ne 1 ]]; then - - echo "Must specify an input argument to set runtime environment variables!" - exit 1 - -fi - -step=$1 - -export launcher="srun -l --export=ALL" -export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" - -export OMP_STACKSIZE=2048000 -export NTHSTACK=1024000000 - -ulimit -s unlimited -ulimit -a - -# Calculate common variables -# Check first if the dependent variables are set -if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then - max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) - NTHREADSmax=${threads_per_task:-${max_threads_per_task}} - NTHREADS1=${threads_per_task:-1} - if [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]]; then - NTHREADSmax=${max_threads_per_task} - fi - if [[ ${NTHREADS1} -gt ${max_threads_per_task} ]]; then - NTHREADS1=${max_threads_per_task} - fi - # This may be useful when GaeaC6 is fully ported, so ignore SC warning - # shellcheck disable=SC2034 - APRUN_default="${launcher} -n ${ntasks}" -else - echo "ERROR config.resources must be sourced before sourcing GAEAC6.env" - exit 2 -fi - -case ${step} in - "prep" | "prepbufr") - - export POE="NO" - export BACK="NO" - export sys_tp="GAEAC6" - export launcher_PREP="srun" - ;; - "prep_emissions") - - export APRUN="${APRUN_default}" - ;; - "waveinit" | "waveprep" | "wavepostsbs" | "wavepostbndpnt" | "wavepostpnt" | "wavepostbndpntbll") - - export USE_CFP="YES" - if [[ "${step}" == "waveprep" ]]; then - export MP_PULSE=0 - fi - export wavempexec=${launcher} - export wave_mpmd=${mpmd_opt} - - ;; - "atmanlvar") - - export NTHREADS_ATMANLVAR=${NTHREADSmax} - export APRUN_ATMANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLVAR}" - ;; - "atmanlfv3inc") - - export NTHREADS_ATMANLFV3INC=${NTHREADSmax} - export APRUN_ATMANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLFV3INC}" - ;; - "atmensanlobs") - - export NTHREADS_ATMENSANLOBS=${NTHREADSmax} - export APRUN_ATMENSANLOBS="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLOBS}" - ;; - "atmensanlsol") - - export NTHREADS_ATMENSANLSOL=${NTHREADSmax} - export APRUN_ATMENSANLSOL="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLSOL}" - ;; - "atmensanlletkf") - - export NTHREADS_ATMENSANLLETKF=${NTHREADSmax} - export APRUN_ATMENSANLLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLLETKF}" - ;; - "atmensanlfv3inc") - - export NTHREADS_ATMENSANLFV3INC=${NTHREADSmax} - export APRUN_ATMENSANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLFV3INC}" - ;; - "aeroanlvar") - - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - - export NTHREADS_AEROANL=${NTHREADSmax} - export APRUN_AEROANL="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANL}" - ;; - "aeroanlgenb") - - export NTHREADS_AEROANLGENB=${NTHREADSmax} - export APRUN_AEROANLGENB="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANLGENB}" - ;; - "prepobsaero") - - export NTHREADS_PREPOBSAERO=${NTHREADS1} - export APRUN_PREPOBSAERO="${APRUN_default} --cpus-per-task=${NTHREADS_PREPOBSAERO}" - ;; - "snowanl") - - export APRUN_CALCFIMS="${launcher} -n 1" - - export NTHREADS_SNOWANL=${NTHREADSmax} - export APRUN_SNOWANL="${APRUN_default} --cpus-per-task=${NTHREADS_SNOWANL}" - - export APRUN_APPLY_INCR="${launcher} -n 6" - ;; - "esnowanl") - - export APRUN_CALCFIMS="${launcher} -n 1" - - export NTHREADS_ESNOWANL=${NTHREADSmax} - export APRUN_ESNOWANL="${APRUN_default} --cpus-per-task=${NTHREADS_ESNOWANL}" - - export APRUN_APPLY_INCR="${launcher} -n 6" - ;; - "marinebmat") - - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - export APRUN_MARINEBMAT="${APRUN_default}" - ;; - "marineanlvar") - - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - export APRUN_MARINEANLVAR="${APRUN_default}" - ;; - "marineanlecen") - - export APRUN_MARINEANLECEN="${APRUN_default}" - ;; - "marineanlchkpt") - - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - - export NTHREADS_OCNANAL=${NTHREADSmax} - export APRUN_MARINEANLCHKPT="${APRUN_default} --cpus-per-task=${NTHREADS_OCNANAL}" - - export APRUN_MARINEANLOBSSTATS="${launcher} -n 1" - ;; - "anlstat") - - export NTHREADS_ANLSTAT=${NTHREADSmax} - export APRUN_ANLSTAT="${APRUN_default} --cpus-per-task=${NTHREADS_ANLSTAT}" - ;; - "marineanlletkf") - - export NTHREADS_MARINEANLLETKF=${NTHREADSmax} - export APRUN_MARINEANLLETKF="${APRUN_default}" - ;; - "ecen_fv3jedi") - - export NTHREADS_ECEN_FV3JEDI=${NTHREADSmax} - export APRUN_CORRECTION_INCREMENT="${launcher} -n ${ntasks_correction_increment} --cpus-per-task=${NTHREADS_ECEN_FV3JEDI}" - export APRUN_ENSEMBLE_RECENTER="${launcher} -n ${ntasks_ensemble_recenter} --cpus-per-task=${NTHREADS_ECEN_FV3JEDI}" - ;; - "analcalc_fv3jedi") - - export NTHREADS_ANALCALC_FV3JEDI=${NTHREADSmax} - export APRUN_ANALCALC_FV3JEDI="${APRUN_default} --cpus-per-task=${NTHREADS_ANALCALC_FV3JEDI}" - ;; - "anal" | "analcalc") - - export MKL_NUM_THREADS=4 - export MKL_CBWR=AUTO - - export CFP_MP=${CFP_MP:-"YES"} - export USE_CFP=${USE_CFP:-"YES"} - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - - export NTHREADS_GSI=${threads_per_task_anal:-${max_threads_per_task}} - export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" - - export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} - if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then - export NTHREADS_CALCINC=${max_threads_per_task} - fi - export APRUN_CALCINC="${launcher} \$ncmd --cpus-per-task=${NTHREADS_CALCINC}" - - export NTHREADS_CYCLE=${threads_per_task_cycle:-12} - if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then - export NTHREADS_CYCLE=${max_tasks_per_node} - fi - ntasks_cycle=${ntiles:-6} - export APRUN_CYCLE="${launcher} -n ${ntasks_cycle} --cpus-per-task=${NTHREADS_CYCLE}" - - export NTHREADS_GAUSFCANL=1 - ntasks_gausfcanl=${ntasks_gausfcanl:-1} - export APRUN_GAUSFCANL="${launcher} -n ${ntasks_gausfcanl} --cpus-per-task=${NTHREADS_GAUSFCANL}" - ;; - "offlineanl") - - export NTHREADS_CHGRES=${threads_per_task_chgres:-12} - if [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]]; then - export NTHREADS_CHGRES=${max_tasks_per_node} - fi - export APRUN_CHGRES="time" - - export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} - if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then - export NTHREADS_CALCINC=${max_threads_per_task} - fi - export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" - - ;; - "sfcanl") - - export NTHREADS_CYCLE=${threads_per_task:-14} - if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then - export NTHREADS_CYCLE=${max_tasks_per_node} - fi - export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " - - ;; - "eobs") - - export MKL_NUM_THREADS=4 - export MKL_CBWR=AUTO - - export CFP_MP=${CFP_MP:-"YES"} - export USE_CFP=${USE_CFP:-"YES"} - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - - - export NTHREADS_GSI=${NTHREADSmax} - if [[ ${NTHREADS_GSI} -gt ${max_threads_per_task} ]]; then - export NTHREADS_GSI=${max_threads_per_task} - fi - export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" - ;; - "eupd") - - export CFP_MP=${CFP_MP:-"YES"} - export USE_CFP=${USE_CFP:-"YES"} - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - - - export NTHREADS_ENKF=${NTHREADSmax} - export APRUN_ENKF="${launcher} -n ${ntasks_enkf:-${ntasks}} --cpus-per-task=${NTHREADS_ENKF}" - ;; - "fcst" | "efcs") - - #export launcher="srun --mpi=pmi2 -l --export=ALL" - export launcher="srun --mpi=pmi2 -l --hint=nomultithread --distribution=block:block" - #export OMP_STACKSIZE=1024M - - #export MPICH_COLL_SYNC=MPI_Bcast - #export FI_VERBS_PREFER_XRC=0 - #export FI_CXI_RX_MATCH_MODE=hybrid - #export COMEX_EAGER_THRESHOLD=65536 - #export FI_CXI_RDZV_THRESHOLD=65536 - #export FI_CXI_DEFAULT_CQ_SIZE=1048576 - - (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) - (( ufs_ntasks = nnodes*tasks_per_node )) - # With ESMF threading, the model wants to use the full node - export APRUN_UFS="${launcher} -n ${ufs_ntasks}" - unset nnodes ufs_ntasks - ;; - "upp") - - export NTHREADS_UPP=${NTHREADS1} - export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}" - ;; - "atmos_products") - - export USE_CFP="YES" # Use MPMD for downstream product generation - ;; - "oceanice_products") - - export NTHREADS_OCNICEPOST=${NTHREADS1} - export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" - ;; - "ecen") - - export NTHREADS_ECEN=${NTHREADSmax} - export APRUN_ECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ECEN}" - - export NTHREADS_CHGRES=${threads_per_task_chgres:-12} - if [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]]; then - export NTHREADS_CHGRES=${max_tasks_per_node} - fi - export APRUN_CHGRES="time" - - export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} - if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then - export NTHREADS_CALCINC=${max_threads_per_task} - fi - export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" - - ;; - "esfc") - - export NTHREADS_ESFC=${NTHREADSmax} - export APRUN_ESFC="${APRUN_default} --cpus-per-task=${NTHREADS_ESFC}" - - export NTHREADS_CYCLE=${threads_per_task_cycle:-14} - if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then - export NTHREADS_CYCLE=${max_tasks_per_node} - fi - export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " - - ;; - "epos") - - export NTHREADS_EPOS=${NTHREADSmax} - export APRUN_EPOS="${APRUN_default} --cpus-per-task=${NTHREADS_EPOS}" - - ;; - "postsnd") - - export CFP_MP="YES" - - export NTHREADS_POSTSND=${NTHREADS1} - export APRUN_POSTSND="${APRUN_default} --cpus-per-task=${NTHREADS_POSTSND}" - - export NTHREADS_POSTSNDCFP=${threads_per_task_postsndcfp:-1} - if [[ ${NTHREADS_POSTSNDCFP} -gt ${max_threads_per_task} ]]; then - export NTHREADS_POSTSNDCFP=${max_threads_per_task} - fi - export APRUN_POSTSNDCFP="${launcher} -n ${ntasks_postsndcfp} ${mpmd_opt}" - - ;; - "awips") - - export NTHREADS_AWIPS=${NTHREADS1} - export APRUN_AWIPSCFP="${APRUN_default} ${mpmd_opt}" - - ;; - "gempak") - - echo "WARNING: ${step} is not enabled on ${machine}!" - - ;; - "fit2obs") - - export NTHREADS_FIT2OBS=${NTHREADS1} - export MPIRUN="${APRUN_default} --cpus-per-task=${NTHREADS_FIT2OBS}" - ;; - *) - # Some other job not yet defined here - echo "WARNING: The job step ${step} does not specify GAEAC6-specific resources" - ;; -esac diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index c8111d723f4..bb49f43b4a7 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -9,7 +9,7 @@ # Thank you for your contribution # overwrite MACHINE_ID if in container -if [[ -d /opt/spack-stack && -v SINGULARITY_CONTAINER ]]; then +if [[ -v SINGULARITY_CONTAINER ]]; then # We are in a container MACHINE_ID=container fi diff --git a/ush/load_fv3gfs_modules.sh b/ush/load_fv3gfs_modules.sh deleted file mode 100755 index 7a057b0392b..00000000000 --- a/ush/load_fv3gfs_modules.sh +++ /dev/null @@ -1,35 +0,0 @@ -#! /usr/bin/env bash - -############################################################### -if [[ "$-" == *x* ]]; then - set_x=YES -else - set_x=NO -fi - -if [[ "${DEBUG_WORKFLOW:-NO}" == "NO" ]]; then - echo "Loading modules quietly..." - set +x -fi - -# Setup runtime environment by loading modules -ulimit_s=$( ulimit -S -s ) - -source "${HOMEgfs}/ush/preamble.sh" - -# If this function exists in the environment, run it; else set -x if it was set on entering this script -ftype=$(type -t set_trace || echo "") -if [[ "${ftype}" == "function" ]]; then - set_trace -elif [[ "${set_x}" == "YES" ]]; then - set -x -fi - -# Add wxflow to PYTHONPATH -wxflowPATH="${HOMEgfs}/ush/python" -PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${HOMEgfs}/ush:${wxflowPATH}" -export PYTHONPATH - -# Restore stack soft limit: -ulimit -S -s "${ulimit_s}" -unset ulimit_s diff --git a/ush/module-setup.sh b/ush/module-setup.sh index 76df65c6e3c..4065c67e4cd 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -102,12 +102,6 @@ elif [[ ${MACHINE_ID} = "noaacloud" ]]; then # We are on NOAA Cloud module purge -elif [[ ${MACHINE_ID} == container ]] ; then - # We are in a container - # Always source the lmod init script to override the system module paths and instead use the container modules - source /usr/lmod/lmod/init/bash - module purge - else echo WARNING: UNKNOWN PLATFORM 1>&2 fi diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index 1a22b2e5c61..d85edcea944 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -29,7 +29,6 @@ ################################################################################ source "${USHgfs}/preamble.sh" - source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" @@ -55,22 +54,63 @@ fi # Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD export OMP_NUM_THREADS=1 -# Redirect output from each process to its own stdout -# Read the incoming cmdfile and create mpiexec usable cmdfile -nm=0 -# shellcheck disable=SC2312 -while IFS= read -r line; do - echo "Line ${nm}: ${line}" - ${line} > "mpmd.${nm}.out" & - ((nm=nm+1)) -done < "${cmdfile}" -wait +# Determine the number of MPMD processes from incoming ${cmdfile} +nprocs=$(wc -l < "${cmdfile}") + +# Local MPMD file containing instructions to run in CFP +mpmd_cmdfile="${DATA:-}/mpmd_cmdfile" +if [[ -s "${mpmd_cmdfile}" ]]; then rm -f "${mpmd_cmdfile}"; fi + +cat << EOF + INFO: Executing MPMD job, STDOUT redirected for each process separately + INFO: On failure, logs for each job will be available in ${DATA}/mpmd.proc_num.out + INFO: The proc_num corresponds to the line in '${mpmd_cmdfile}' +EOF + +if [[ "${launcher:-}" =~ ^srun.* ]]; then # srun-based system e.g. Hera, Orion, etc. + + # Slurm requires a counter in front of each line in the script + # Read the incoming cmdfile and create srun usable cmdfile + nm=0 + # shellcheck disable=SC2312 + while IFS= read -r line; do + echo "${nm} ${line}" >> "${mpmd_cmdfile}" + ((nm=nm+1)) + done < "${cmdfile}" + + set +e + # shellcheck disable=SC2086 + ${launcher:-} ${mpmd_opt:-} -n ${nprocs} "${mpmd_cmdfile}" + err=$? + set_strict -err=$? -set_strict +elif [[ "${launcher:-}" =~ ^mpiexec.* ]]; then # mpiexec + + # Redirect output from each process to its own stdout + # Read the incoming cmdfile and create mpiexec usable cmdfile + nm=0 + echo "#!/bin/bash" >> "${mpmd_cmdfile}" + # shellcheck disable=SC2312 + while IFS= read -r line; do + echo "${line} > mpmd.${nm}.out" >> "${mpmd_cmdfile}" + ((nm=nm+1)) + done < "${cmdfile}" + chmod 755 "${mpmd_cmdfile}" + + # shellcheck disable=SC2086 + ${launcher:-} -np ${nprocs} ${mpmd_opt:-} "${mpmd_cmdfile}" + err=$? + +else + + echo "FATAL ERROR: CFP is not usable with launcher: '${launcher:-}'" + err=1 + +fi # On success concatenate processor specific output into a single mpmd.out if [[ ${err} -eq 0 ]]; then + rm -f "${mpmd_cmdfile}" out_files=$(find . -name 'mpmd.*.out') for file in ${out_files}; do cat "${file}" >> mpmd.out From 6bb0373e5dfaedf808547a5643501a09198c1a5c Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 1 Oct 2025 18:55:42 +0000 Subject: [PATCH 120/134] testing on ursa --- .gitmodules | 16 ++++------------ scripts/exgfs_wave_post_gridded_sbs.sh | 1 - scripts/exglobal_atmos_products.sh | 12 ++---------- ush/atmos_ensstat.sh | 2 -- ush/interp_atmos_sflux.sh | 2 -- ush/run_mpmd.sh | 1 + ush/wave_tar.sh | 2 +- 7 files changed, 8 insertions(+), 28 deletions(-) diff --git a/.gitmodules b/.gitmodules index ea22e018408..c80d24c03aa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -15,21 +15,13 @@ url = https://github.com/NOAA-EMC/EMC_verif-global.git [submodule "sorc/gsi_enkf.fd"] path = sorc/gsi_enkf.fd - #url = https://github.com/NOAA-EMC/GSI.git - url = https://github.com/NOAA-EPIC/GSI-cloud.git - branch = feature/use_container_spack-stack-1.9.2 + url = https://github.com/NOAA-EMC/GSI.git [submodule "sorc/gdas.cd"] path = sorc/gdas.cd - #url = https://github.com/NOAA-EMC/GDASApp.git - url = https://github.com/NOAA-EPIC/GDASApp-cloud.git - branch = feature/use_container_spack-stack-1.9.2 + url = https://github.com/NOAA-EMC/GDASApp.git [submodule "sorc/gsi_utils.fd"] path = sorc/gsi_utils.fd - #url = https://github.com/NOAA-EMC/GSI-Utils.git - url = https://github.com/NOAA-EPIC/GSI-utils-cloud.git - branch = feature/use_container_spack-stack-1.9.2 + url = https://github.com/NOAA-EMC/GSI-Utils.git [submodule "sorc/gsi_monitor.fd"] path = sorc/gsi_monitor.fd - #url = https://github.com/NOAA-EMC/GSI-Monitor.git - url = https://github.com/NOAA-EPIC/GSI-Monitor-cloud.git - branch = feature/use_container_spack-stack-1.9.2 + url = https://github.com/NOAA-EMC/GSI-Monitor.git diff --git a/scripts/exgfs_wave_post_gridded_sbs.sh b/scripts/exgfs_wave_post_gridded_sbs.sh index 393c587c36a..6d17b35521b 100755 --- a/scripts/exgfs_wave_post_gridded_sbs.sh +++ b/scripts/exgfs_wave_post_gridded_sbs.sh @@ -21,7 +21,6 @@ # ############################################################################### -source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" source "${USHgfs}/wave_domain_grid.sh" DOGRI_WAV=${DOGRI_WAV:-"NO"} # Interpolate to a grid diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index ef2bc4e689d..0c34247bb3c 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -1,7 +1,5 @@ #! /usr/bin/env bash -source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" - # Scripts used INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/interp_atmos_master.sh"} INTERP_ATMOS_SFLUXSH=${INTERP_ATMOS_SFLUXSH:-"${USHgfs}/interp_atmos_sflux.sh"} @@ -44,10 +42,7 @@ MASTER_FILE="${COMIN_ATMOS_MASTER}/${PREFIX}master.grb2${fhr3}" # Get inventory from ${MASTER_FILE} that matches patterns from ${paramlista} # Extract this inventory from ${MASTER_FILE} into a smaller tmpfile or tmpfileb based on paramlista or paramlistb # shellcheck disable=SC2312 -${WGRIB2} "${MASTER_FILE}" > wgrib2.log -grep -F -f "${paramlista}" wgrib2.log > grep.res -${WGRIB2} -i -grib "tmpfile_${fhr3}" "${MASTER_FILE}" < grep.res - +${WGRIB2} "${MASTER_FILE}" | grep -F -f "${paramlista}" | ${WGRIB2} -i -grib "tmpfile_${fhr3}" "${MASTER_FILE}" && true export err=$? if [[ ${err} -ne 0 ]]; then err_exit "wgrib2 failed to create intermediate grib2 file from ${MASTER_FILE} using ${paramlista}" @@ -93,8 +88,6 @@ for (( nset=1 ; nset <= downset ; nset++ )); do # shellcheck disable=SC2312 ncount=$(${WGRIB2} "${tmpfile}" | wc -l) - #${WGRIB2} "${tmpfile}" > wgrib2.log - #ncount=$(cat wgrib2.log | wc -l) if [[ ${nproc} -gt ${ncount} ]]; then echo "WARNING: Total no. of available processors '${nproc}' exceeds no. of records '${ncount}' in ${tmpfile}" echo "Reduce nproc to ${ncount} (or less) to not waste resources" @@ -115,8 +108,7 @@ for (( nset=1 ; nset <= downset ; nset++ )); do # grep returns 1 if no match is found, so temporarily turn off exit on non-zero rc set +e # shellcheck disable=SC2312 - ${WGRIB2} -d "${last}" "${tmpfile}" > wgrib2.log - grep -E -i "ugrd|ustm|uflx|u-gwd|land|maxuw" wgrib2.log + ${WGRIB2} -d "${last}" "${tmpfile}" | grep -E -i "ugrd|ustm|uflx|u-gwd|land|maxuw" rc=$? set_strict if [[ ${rc} == 0 ]]; then # Matched the grep diff --git a/ush/atmos_ensstat.sh b/ush/atmos_ensstat.sh index a0551c2f23e..b0dd881edde 100755 --- a/ush/atmos_ensstat.sh +++ b/ush/atmos_ensstat.sh @@ -1,7 +1,5 @@ #! /usr/bin/env bash -source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" - grid=${1} fhr3=${2} grid_type=${3:-''} diff --git a/ush/interp_atmos_sflux.sh b/ush/interp_atmos_sflux.sh index cc0b7da101f..2aff2cc58aa 100755 --- a/ush/interp_atmos_sflux.sh +++ b/ush/interp_atmos_sflux.sh @@ -1,7 +1,5 @@ #! /usr/bin/env bash -source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" - # This script takes in a master flux file and creates interpolated flux files at various interpolated resolutions # Generate 0.25 / 0.5 / 1 degree interpolated grib2 flux files for each input sflux grib2 file diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index d85edcea944..3e8a92f7183 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -36,6 +36,7 @@ if [[ "${MACHINE_ID}" == "container" ]]; then source /usr/lmod/lmod/init/bash module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel + module load prod_util module load wgrib2 fi diff --git a/ush/wave_tar.sh b/ush/wave_tar.sh index 9e278c79b02..a8b959b33dc 100755 --- a/ush/wave_tar.sh +++ b/ush/wave_tar.sh @@ -24,7 +24,7 @@ # # --------------------------------------------------------------------------- # # 0. Preparations - + source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" # 0.a Basic modes of operation cd "${DATA}" From a8730ec123357155c84fd6975d2b05d4cf1dc65e Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 1 Oct 2025 19:48:21 +0000 Subject: [PATCH 121/134] recover ursa.yaml --- dev/workflow/hosts/ursa.yaml | 7 +------ scripts/exglobal_atmos_products.sh | 2 ++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/dev/workflow/hosts/ursa.yaml b/dev/workflow/hosts/ursa.yaml index e21184417ef..51d42a8bb22 100644 --- a/dev/workflow/hosts/ursa.yaml +++ b/dev/workflow/hosts/ursa.yaml @@ -5,8 +5,7 @@ BASE_DATA: '/scratch3/NCEPDEV/global/role.glopara/data' BASE_IC: '/scratch3/NCEPDEV/global/role.glopara/data/ICSDIR' AERO_INPUTS_DIR: '/scratch3/NCEPDEV/global/role.glopara/data/GEFS_ExtData/20250310' PACKAGEROOT: '/scratch3/NCEPDEV/global/role.glopara/nwpara' -#HOMEDIR: '/scratch3/NCEPDEV/global/${USER}' -HOMEDIR: '/scratch4/NAGAPE/epic/${USER}' +HOMEDIR: '/scratch3/NCEPDEV/global/${USER}' STMP: '/scratch4/NCEPDEV/stmp/${USER}' PTMP: '/scratch4/NCEPDEV/stmp/${USER}' NOSCRUB: '${HOMEDIR}' @@ -28,7 +27,3 @@ CHGRP_CMD: 'chgrp rstprod' # Features SUPPORTED_RESOLUTIONS: ['C1152', 'C768', 'C384', 'C192', 'C96', 'C48'] DO_ARCHCOM: 'YES' -DO_AWIPS: 'NO' -KEEPDATA: 'YES' -MAKE_NSSTBUFR: 'NO' -MAKE_ACFTBUFR: 'NO' diff --git a/scripts/exglobal_atmos_products.sh b/scripts/exglobal_atmos_products.sh index 0c34247bb3c..1b97742c892 100755 --- a/scripts/exglobal_atmos_products.sh +++ b/scripts/exglobal_atmos_products.sh @@ -1,5 +1,7 @@ #! /usr/bin/env bash +source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" + # Scripts used INTERP_ATMOS_MASTERSH=${INTERP_ATMOS_MASTERSH:-"${USHgfs}/interp_atmos_master.sh"} INTERP_ATMOS_SFLUXSH=${INTERP_ATMOS_SFLUXSH:-"${USHgfs}/interp_atmos_sflux.sh"} From 2823766f7a938cce8b9cbfb1764e69c4edcbfefe Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 1 Oct 2025 19:52:51 +0000 Subject: [PATCH 122/134] remove config.resources.URSA from sfs --- dev/parm/config/sfs/config.resources.URSA | 1 - 1 file changed, 1 deletion(-) delete mode 120000 dev/parm/config/sfs/config.resources.URSA diff --git a/dev/parm/config/sfs/config.resources.URSA b/dev/parm/config/sfs/config.resources.URSA deleted file mode 120000 index 6d3d16eda14..00000000000 --- a/dev/parm/config/sfs/config.resources.URSA +++ /dev/null @@ -1 +0,0 @@ -../gfs/config.resources.URSA \ No newline at end of file From 335ae748b7389793fbc5a14de5a86c8e0d8126aa Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 2 Oct 2025 02:27:34 +0000 Subject: [PATCH 123/134] need run_mpmd.sh changes --- ush/run_mpmd.sh | 68 ++++++++++--------------------------------------- ush/wave_tar.sh | 4 +-- 2 files changed, 16 insertions(+), 56 deletions(-) diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index 3e8a92f7183..30542c33998 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -29,10 +29,11 @@ ################################################################################ source "${USHgfs}/preamble.sh" + source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" -if [[ "${MACHINE_ID}" == "container" ]]; then +if [[ "$MACHINE_ID" == "container" ]]; then source /usr/lmod/lmod/init/bash module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel @@ -55,63 +56,22 @@ fi # Set OMP_NUM_THREADS to 1 to avoid oversubscription when doing MPMD export OMP_NUM_THREADS=1 -# Determine the number of MPMD processes from incoming ${cmdfile} -nprocs=$(wc -l < "${cmdfile}") - -# Local MPMD file containing instructions to run in CFP -mpmd_cmdfile="${DATA:-}/mpmd_cmdfile" -if [[ -s "${mpmd_cmdfile}" ]]; then rm -f "${mpmd_cmdfile}"; fi - -cat << EOF - INFO: Executing MPMD job, STDOUT redirected for each process separately - INFO: On failure, logs for each job will be available in ${DATA}/mpmd.proc_num.out - INFO: The proc_num corresponds to the line in '${mpmd_cmdfile}' -EOF - -if [[ "${launcher:-}" =~ ^srun.* ]]; then # srun-based system e.g. Hera, Orion, etc. - - # Slurm requires a counter in front of each line in the script - # Read the incoming cmdfile and create srun usable cmdfile - nm=0 - # shellcheck disable=SC2312 - while IFS= read -r line; do - echo "${nm} ${line}" >> "${mpmd_cmdfile}" - ((nm=nm+1)) - done < "${cmdfile}" - - set +e - # shellcheck disable=SC2086 - ${launcher:-} ${mpmd_opt:-} -n ${nprocs} "${mpmd_cmdfile}" - err=$? - set_strict +# Redirect output from each process to its own stdout +# Read the incoming cmdfile and create mpiexec usable cmdfile +nm=0 +# shellcheck disable=SC2312 +while IFS= read -r line; do + echo "Line ${nm}: ${line}" + ${line} > "mpmd.${nm}.out" & + ((nm=nm+1)) +done < "${cmdfile}" +wait -elif [[ "${launcher:-}" =~ ^mpiexec.* ]]; then # mpiexec - - # Redirect output from each process to its own stdout - # Read the incoming cmdfile and create mpiexec usable cmdfile - nm=0 - echo "#!/bin/bash" >> "${mpmd_cmdfile}" - # shellcheck disable=SC2312 - while IFS= read -r line; do - echo "${line} > mpmd.${nm}.out" >> "${mpmd_cmdfile}" - ((nm=nm+1)) - done < "${cmdfile}" - chmod 755 "${mpmd_cmdfile}" - - # shellcheck disable=SC2086 - ${launcher:-} -np ${nprocs} ${mpmd_opt:-} "${mpmd_cmdfile}" - err=$? - -else - - echo "FATAL ERROR: CFP is not usable with launcher: '${launcher:-}'" - err=1 - -fi +err=$? +set_strict # On success concatenate processor specific output into a single mpmd.out if [[ ${err} -eq 0 ]]; then - rm -f "${mpmd_cmdfile}" out_files=$(find . -name 'mpmd.*.out') for file in ${out_files}; do cat "${file}" >> mpmd.out diff --git a/ush/wave_tar.sh b/ush/wave_tar.sh index a8b959b33dc..2b0a358513b 100755 --- a/ush/wave_tar.sh +++ b/ush/wave_tar.sh @@ -27,7 +27,7 @@ source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" # 0.a Basic modes of operation - cd "${DATA}" + cd "${DATA}" || exit 1 echo "Making TAR FILE" alertName=$(echo $RUN|tr [a-z] [A-Z]) @@ -165,7 +165,7 @@ EOF # --------------------------------------------------------------------------- # # 4. Final clean up -cd "${DATA}" +cd "${DATA}" || exit 5 if [[ ${KEEPDATA:-NO} == "NO" ]]; then set -v From 3429ed82b238d4e701e09b355c232d48ce04f155 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 2 Oct 2025 19:57:35 +0000 Subject: [PATCH 124/134] only double quota varaibles --- ush/run_mpmd.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index 30542c33998..2f7812c3346 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -28,10 +28,10 @@ # ################################################################################ -source "${USHgfs}/preamble.sh" +source "${USHgfs}"/preamble.sh -source "${HOMEgfs}/ush/detect_machine.sh" -source "${HOMEgfs}/ush/module-setup.sh" +source "${HOMEgfs}"/ush/detect_machine.sh +source "${HOMEgfs}"/ush/module-setup.sh if [[ "$MACHINE_ID" == "container" ]]; then source /usr/lmod/lmod/init/bash From 57b7337ffa08f428f70a48597b0e074ca1d80656 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 2 Oct 2025 20:14:37 +0000 Subject: [PATCH 125/134] only double quota varaibles --- ush/run_mpmd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index 2f7812c3346..738cf5b92e1 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -35,7 +35,7 @@ source "${HOMEgfs}"/ush/module-setup.sh if [[ "$MACHINE_ID" == "container" ]]; then source /usr/lmod/lmod/init/bash - module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" + module use "${HOMEgfs}"/sorc/gfs_utils.fd/modulefiles module load gfsutils_container.intel module load prod_util module load wgrib2 From 4d772f0a5c2b31aa893d253b6f9e22a216b75025 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 7 Oct 2025 20:27:50 +0000 Subject: [PATCH 126/134] reverse to GW repo code, and new way to handle jobs scripts --- dev/container/utils/link_gfs_utils.sh | 2 -- jobs/JGLOBAL_ARCHIVE_VRFY | 11 +++++--- jobs/JGLOBAL_OCEANICE_PRODUCTS | 11 ++++---- jobs/JGLOBAL_PREP_EMISSIONS | 11 ++++---- jobs/JGLOBAL_STAGE_IC | 11 ++++---- sorc/link_workflow.sh | 23 ++++++++++++++-- ush/bash_utils.sh | 38 --------------------------- ush/forecast_det.sh | 6 ++--- ush/run_mpmd.sh | 13 +-------- 9 files changed, 50 insertions(+), 76 deletions(-) diff --git a/dev/container/utils/link_gfs_utils.sh b/dev/container/utils/link_gfs_utils.sh index d74af04c2c2..d48ffd303f8 100755 --- a/dev/container/utils/link_gfs_utils.sh +++ b/dev/container/utils/link_gfs_utils.sh @@ -84,12 +84,10 @@ rm -f "${direct_model_script}" cat > "${direct_model_script}" << EOF_DIRECT #!/bin/bash -#source "${HOMEgfs}/dev/ush/load_gw_run_modules.sh" source /usr/lmod/lmod/init/bash module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel module load wgrib2/3.6.0 -module load wgrib2/3.6.0 ${HOMEgfs}/sorc/gfs_utils.fd/install/bin/ocnicepost.x "\$@" EOF_DIRECT diff --git a/jobs/JGLOBAL_ARCHIVE_VRFY b/jobs/JGLOBAL_ARCHIVE_VRFY index 235ae4f9411..da7b4b58848 100755 --- a/jobs/JGLOBAL_ARCHIVE_VRFY +++ b/jobs/JGLOBAL_ARCHIVE_VRFY @@ -23,10 +23,15 @@ done ############################################################### # Run archive script ############################################################### -if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then - "${HOMEgfs}/exec/run_python.sh" "${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py}" -c -v +#if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then +# "${HOMEgfs}/exec/run_python.sh" "${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py}" -c -v +#else +# ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} +#fi +if [[ -v GLOBALARCHIVESH]]; then + ${GLOBALARCHIVESH} else - ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} + "${PYCMD}" "${SCRgfs}"exglobal_archive_vrfy.py ${PYEXTRAARGS} fi err=$? if [[ ${err} -ne 0 ]]; then diff --git a/jobs/JGLOBAL_OCEANICE_PRODUCTS b/jobs/JGLOBAL_OCEANICE_PRODUCTS index f17f360c16a..393bfcbb0ce 100755 --- a/jobs/JGLOBAL_OCEANICE_PRODUCTS +++ b/jobs/JGLOBAL_OCEANICE_PRODUCTS @@ -13,11 +13,12 @@ YMD="${PDY}" HH="${cyc}" declare_from_tmpl -rx "COMOUT_${COMPONENT^^}_NETCDF":"C ############################################################### # Run exglobal script -if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then - "${HOMEgfs}/exec/run_python.sh" "${SCRgfs}/exglobal_oceanice_products.py" -c -v -else - "${SCRgfs}/exglobal_oceanice_products.py" && true -fi +#if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then +# "${HOMEgfs}/exec/run_python.sh" "${SCRgfs}/exglobal_oceanice_products.py" -c -v +#else +# "${SCRgfs}/exglobal_oceanice_products.py" && true +#fi +"${PYCMD}" "${SCRgfs}"/exglobal_oceanice_products.py ${PYEXTRAARGS} && true export err=$? if [[ ${err} -ne 0 ]]; then err_exit diff --git a/jobs/JGLOBAL_PREP_EMISSIONS b/jobs/JGLOBAL_PREP_EMISSIONS index 82cd17c0692..6ce23f06796 100755 --- a/jobs/JGLOBAL_PREP_EMISSIONS +++ b/jobs/JGLOBAL_PREP_EMISSIONS @@ -18,11 +18,12 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "prep_emissions" -c "base prep_emissio EXSCRIPT=${PREP_EMISSIONS_PY:-${SCRgfs}/exglobal_prep_emissions.py} # Execute staging -if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then - "${HOMEgfs}/exec/run_python.sh" "${EXSCRIPT}" && true -else - ${EXSCRIPT} && true -fi +#if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then +# "${HOMEgfs}/exec/run_python.sh" "${EXSCRIPT}" && true +#else +# ${EXSCRIPT} && true +#fi +"${PYCMD}" "${EXSCRIPT}" && true export err=$? if [[ ${err} -ne 0 ]]; then err_exit "Error executing ${EXSCRIPT}, ABORT!" diff --git a/jobs/JGLOBAL_STAGE_IC b/jobs/JGLOBAL_STAGE_IC index 8507f187006..baf8cb66903 100755 --- a/jobs/JGLOBAL_STAGE_IC +++ b/jobs/JGLOBAL_STAGE_IC @@ -3,11 +3,12 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "stage_ic" -c "base stage_ic" # Execute staging -if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then - "${HOMEgfs}/exec/run_python.sh" "${SCRgfs}/exglobal_stage_ic.py" -else - "${SCRgfs}/exglobal_stage_ic.py" -fi +#if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then +# "${HOMEgfs}/exec/run_python.sh" "${SCRgfs}/exglobal_stage_ic.py" +#else +# "${SCRgfs}/exglobal_stage_ic.py" +#fi +"${PYCMD}" "${SCRgfs}"/exglobal_stage_ic.py err=$? ############################################################### diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index 6e1d6c6a42e..0d6eed92442 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -231,8 +231,12 @@ fi #--add GDASApp parm directory #------------------------------ if [[ -d "${HOMEgfs}/sorc/gdas.cd" ]]; then - cd "${HOMEgfs}/parm/gdas" || exit 1 - declare -a gdasapp_comps=("aero" "atm" "io" "ioda" "snow" "soca" "jcb-gdas" "jcb-algorithms" "stat") + cd "${HOMEgfs}/parm" || exit 1 + if [[ ! -d gdas ]]; then + mkdir -p gdas + fi + cd gdas || exit 1 + declare -a gdasapp_comps=("aero" "atm" "io" "ioda" "snow" "marine" "jcb-gdas" "jcb-algorithms" "anlstat" "analcalc") for comp in "${gdasapp_comps[@]}"; do if [[ -d "${comp}" ]]; then rm -rf "${comp}" @@ -241,6 +245,21 @@ if [[ -d "${HOMEgfs}/sorc/gdas.cd" ]]; then done fi +#------------------------------ +#--add SPOC parm and ush directory +#------------------------------ +sources=("config" "scripts") +targets=("parm/gdas" "ush") +for i in "${!sources[@]}"; do + src="${HOMEgfs}/sorc/gdas.cd/sorc/spoc/dump/${sources[${i}]}" + dst="${HOMEgfs}/${targets[${i}]}" + + if [[ -d "${src}" ]]; then + cd "${dst}" || exit 1 + ${LINK_OR_COPY} "${src}" "spoc" + fi +done + #------------------------------ #--add GDASApp files #------------------------------ diff --git a/ush/bash_utils.sh b/ush/bash_utils.sh index 4432e5e384d..fc69a79131a 100755 --- a/ush/bash_utils.sh +++ b/ush/bash_utils.sh @@ -109,44 +109,6 @@ function wait_for_file() { set_trace return 1 } - -function cpreq() { - if ! cp "$@"; then - err_exit "The copy $* operation failed." - fi -} - -function cpfs() { - if [[ "$#" -ne "2" ]]; then - echo "This script requires two arguments: a source file and a destination file path." - exit 16 - fi - - if [[ "$2" = '.' || "$2" = './' ]]; then - cpdstfile=${PWD:?}/$(basename "$1") - elif [[ -d "$2" ]]; then - cpdstfile=${2%/}/$(basename "$1") - else - cpdstfile=$2 - fi - - if ! cp "$1" "${cpdstfile}.cptmp"; then - err_exit "$1 is missing or was not copied successfully." - fi - - #${FSYNC} $cpdstfile.cptmp - #if [ $? -ne 0 ]; then - # >&2 echo "WARNING: ${FSYNC} $cpdstfile.cptmp failed." - #fi - - #mv "${cpdstfile}.cptmp" ${cpdstfile} - #if [[ "$?" -ne "0" ]] ; then - # err_exit "${cpdstfile}.cptmp is missing or was not moved successfully." - #fi - if ! mv "${cpdstfile}.cptmp" "${cpdstfile}"; then - err_exit "Failed to rename '${cpdstfile}.cptmp' to '${cpdstfile}'." - fi -} # shellcheck disable= diff --git a/ush/forecast_det.sh b/ush/forecast_det.sh index e18b2f3d59c..6fa1d127e6f 100755 --- a/ush/forecast_det.sh +++ b/ush/forecast_det.sh @@ -17,10 +17,8 @@ UFS_det(){ # Ensure cold start ICs are present when warm start is not set # TODO: add checks for other cold start ICs as well if [[ ! -f "${COMIN_ATMOS_INPUT}/gfs_ctrl.nc" ]]; then - if [[ ! -L "${COMIN_ATMOS_INPUT}/gfs_ctrl.nc" ]]; then - echo "FATAL ERROR: Cold start ICs are missing from '${COMIN_ATMOS_INPUT}'" - exit 1 - fi + echo "FATAL ERROR: Cold start ICs are missing from '${COMIN_ATMOS_INPUT}'" + exit 1 fi # Since warm start is false, we cannot do IAU diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index 738cf5b92e1..8b6461d6d1e 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -28,18 +28,7 @@ # ################################################################################ -source "${USHgfs}"/preamble.sh - -source "${HOMEgfs}"/ush/detect_machine.sh -source "${HOMEgfs}"/ush/module-setup.sh - -if [[ "$MACHINE_ID" == "container" ]]; then - source /usr/lmod/lmod/init/bash - module use "${HOMEgfs}"/sorc/gfs_utils.fd/modulefiles - module load gfsutils_container.intel - module load prod_util - module load wgrib2 -fi +source "${HOMEgfs}"/dev/ush/load_gw_run_modules.sh cmdfile=${1:?"run_mpmd requires an input file containing commands to execute in MPMD/serial mode"} From a9c6644fb7930cd4d6900154c18aeef1ce99485c Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 8 Oct 2025 15:54:40 +0000 Subject: [PATCH 127/134] combine few scripts to decrease numbers --- dev/container/com.sh | 18 ------ .../utils/compile-gw-in-container.sh | 7 --- dev/container/utils/create-atmos-products.sh | 17 ++++-- .../utils/exec.exglobal_atmos_products.sh | 9 --- dev/container/utils/exec.python | 9 --- dev/container/utils/gen-wrapper.sh | 56 ++++++++++++------- dev/container/utils/ush.python | 25 --------- env/URSA.env | 6 -- ush/preamble.sh | 12 +++- 9 files changed, 60 insertions(+), 99 deletions(-) delete mode 100755 dev/container/com.sh delete mode 100755 dev/container/utils/compile-gw-in-container.sh delete mode 100755 dev/container/utils/exec.exglobal_atmos_products.sh delete mode 100644 dev/container/utils/exec.python delete mode 100644 dev/container/utils/ush.python diff --git a/dev/container/com.sh b/dev/container/com.sh deleted file mode 100755 index c40b6ae119c..00000000000 --- a/dev/container/com.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=compile -#SBATCH --account=epic -#SBATCH --qos=batch -#SBATCH --partition=u1-compute -#SBATCH -t 04:15:00 -#SBATCH --nodes=1 -#SBATCH --ntasks=24 -#SBATCH -o compile.%J.log - -gwhome=/scratch4/NAGAPE/epic/Wei.Huang/src/container/global-workflow-cloud -cmd=${gwhome}/dev/container/utils/compile-gw-in-container.sh -img=/scratch3/NCEPDEV/nems/role.epic/containers/ubuntu22.04-intel-ufs-env-v1.9.2.img - -gw_sorc_dir=${gwhome}/sorc - -singularity exec -B /scratch3 -B /scratch4 "${img}" "${cmd}" "${gw_sorc_dir}" - diff --git a/dev/container/utils/compile-gw-in-container.sh b/dev/container/utils/compile-gw-in-container.sh deleted file mode 100755 index 650160db70e..00000000000 --- a/dev/container/utils/compile-gw-in-container.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - - gw_sorc_dir=$1 - cd "${gw_sorc_dir}" || exit 11 - ./build_all.sh gfs sfs gefs -#./link_workflow.sh - diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh index a6c3cbf9c60..6df0046d231 100755 --- a/dev/container/utils/create-atmos-products.sh +++ b/dev/container/utils/create-atmos-products.sh @@ -39,10 +39,17 @@ if [[ "${verbose}" == "true" ]]; then echo "bindings: ${bindings}" fi -sed -e "s?HOMEgfs?${HOMEgfs}?g" \ - -e "s?SIF?${container}?g" \ - -e "s?BINDINGS?${bindings}?g" \ - "${HOMEgfs}/dev/container/utils/exec.exglobal_atmos_products.sh" > "${HOMEgfs}/exec/exglobal_atmos_products.sh" +eap_script="${HOMEgfs}"/exec/exglobal_atmos_products.sh +cat > "${eap_script}" << EOF_ATMOS_PRODUCTS +#!/bin/bash + LD_LIBRARY_PATH=\$(dirname ${container}) + export LD_LIBRARY_PATH + + singularity exec \\ + ${bindings} \\ + ${container}? \\ + ${HOMEgfs}/scripts/exglobal_atmos_products.sh "\$@" +EOF_ATMOS_PRODUCTS -chmod +x "${HOMEgfs}/exec/exglobal_atmos_products.sh" +chmod +x "${eap_script}" diff --git a/dev/container/utils/exec.exglobal_atmos_products.sh b/dev/container/utils/exec.exglobal_atmos_products.sh deleted file mode 100755 index 89c158dad11..00000000000 --- a/dev/container/utils/exec.exglobal_atmos_products.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - LD_LIBRARY_PATH=$(dirname SIF) - export LD_LIBRARY_PATH - - singularity exec \ - BINDINGS \ - SIF \ - "HOMEgfs/scripts/exglobal_atmos_products.sh" "$@" - diff --git a/dev/container/utils/exec.python b/dev/container/utils/exec.python deleted file mode 100644 index fe1ac65e214..00000000000 --- a/dev/container/utils/exec.python +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - LD_LIBRARY_PATH=$(dirname SIF) - export LD_LIBRARY_PATH - - singularity exec \ - BINDINGS \ - SIF \ - HOMEgfs/ush/container/run_python.sh "$@" - diff --git a/dev/container/utils/gen-wrapper.sh b/dev/container/utils/gen-wrapper.sh index a358c39b2af..167ae78d915 100755 --- a/dev/container/utils/gen-wrapper.sh +++ b/dev/container/utils/gen-wrapper.sh @@ -36,24 +36,42 @@ if [[ "${verbose}" == "true" ]]; then set -x fi -for dnm in exec ush -do - if [[ "${dnm}" == "exec" ]]; then - targetdir=${HOMEgfs}/${dnm} - else - targetdir=${HOMEgfs}/${dnm}/container - fi - mkdir -p "${targetdir}" - sourcef=${HOMEgfs}/dev/container/utils/${dnm}.python - targetf=${targetdir}/run_python.sh - - sed -e "s?HOMEgfs?${HOMEgfs}?g" \ - -e "s?SIF?${container}?g" \ - -e "s?BINDINGS?${bindings}?g" \ - "${sourcef}" > "${targetf}" - - chmod 755 "${targetf}" -done +exec_python_script="${HOMEgfs}"/exec/run_python.sh -sed -i 's/RUN_WITH_CONTAINER=NO/RUN_WITH_CONTAINER=YES/g' "${HOMEgfs}/ush/preamble.sh" +cat > "${exec_python_script}" << EOF_EXEC_PYTHON +#!/bin/bash + LD_LIBRARY_PATH=\$(dirname "${container}") + export LD_LIBRARY_PATH + singularity exec \\ + ${bindings} \\ + ${container} \\ + ${HOMEgfs}/ush/container/run_python.sh "\$@" +EOF_EXEC_PYTHON + +run_python_script="${HOMEgfs}"/ush/container/run_python.sh + +cat > "${run_python_script}" << EOF_RUN_PYTHON +#!/bin/bash + +source /usr/lmod/lmod/init/bash +module purge +module use "${HOMEgfs}"/sorc/gfs_utils.fd/modulefiles +module load gfsutils_container.intel +module load python +module load py-netcdf4 +module load py-xarray +module load py-f90nml +module load py-numpy +module load py-jinja2 +module load py-pyyaml + +wxflowPATH=${HOMEgfs}/ush/python:${HOMEgfs}/sorc/wxflow/src +export PYTHONPATH=\${PYTHONPATH:+\${PYTHONPATH}:}${HOMEgfs}/ush:\${wxflowPATH} + +python "\$@" +EOF_RUN_PYTHON + +sed -i 's/RUN_WITH_CONTAINER=NO/RUN_WITH_CONTAINER=YES/g' "${HOMEgfs}/ush/preamble.sh" +chmod +x "${exec_python_script}" +chmod +x "${run_python_script}" diff --git a/dev/container/utils/ush.python b/dev/container/utils/ush.python deleted file mode 100644 index ac0b87d629a..00000000000 --- a/dev/container/utils/ush.python +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -source /usr/lmod/lmod/init/bash -module purge -module use HOMEgfs/sorc/gfs_utils.fd/modulefiles -module load gfsutils_container.intel - -#module list - -module load python -module load py-netcdf4 -module load py-xarray -module load py-f90nml -module load py-numpy -module load py-jinja2 -module load py-pyyaml -# module load py-python-dateutil/2.8.2 - -#module list - -wxflowPATH="HOMEgfs/ush/python:HOMEgfs/sorc/wxflow/src" -export PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}HOMEgfs/ush:${wxflowPATH}" - -python "$@" - diff --git a/env/URSA.env b/env/URSA.env index d08c5105442..1b01b877421 100644 --- a/env/URSA.env +++ b/env/URSA.env @@ -242,12 +242,6 @@ elif [[ "${step}" = "eupd" ]]; then export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then - # fast - #export launcher="srun -l --hint=nomultithread --distribution=block:block" - # slow3 - #export launcher="srun -l --export=ALL --hint=nomultithread" - export launcher="srun --mpi=pmi2 -l --hint=nomultithread" - #export launcher="srun env -u SLURM_NODELIST" (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) diff --git a/ush/preamble.sh b/ush/preamble.sh index a241d8f2a4e..032fa1add50 100755 --- a/ush/preamble.sh +++ b/ush/preamble.sh @@ -177,7 +177,17 @@ trap "postamble ${_calling_script} ${start_time} \$?" EXIT source "${HOMEgfs}/ush/bash_utils.sh" # Decide if run with container -export RUN_WITH_CONTAINER=NO +export RUN_WITH_CONTAINER=YES + +if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then + # if within container, will run python executbale inside container, + # also need to tell some scripts that it is run in container, with "-c" option. + export PYCMD="${HOMEgfs}"/exec/run_python.sh + export PYEXTRAARGS=" -c -v" +else + export PYCMD=python + export PYEXTRAARGS="" +fi # Turn on our settings export SHELLOPTS From 39e6392dc692972236f88f2901f9e2d8519067e1 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 8 Oct 2025 17:30:27 +0000 Subject: [PATCH 128/134] fix archive script --- jobs/JGLOBAL_ARCHIVE_VRFY | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jobs/JGLOBAL_ARCHIVE_VRFY b/jobs/JGLOBAL_ARCHIVE_VRFY index da7b4b58848..ca266c4cd89 100755 --- a/jobs/JGLOBAL_ARCHIVE_VRFY +++ b/jobs/JGLOBAL_ARCHIVE_VRFY @@ -28,10 +28,10 @@ done #else # ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} #fi -if [[ -v GLOBALARCHIVESH]]; then +if [[ -v GLOBALARCHIVESH ]]; then ${GLOBALARCHIVESH} else - "${PYCMD}" "${SCRgfs}"exglobal_archive_vrfy.py ${PYEXTRAARGS} + "${PYCMD}" "${SCRgfs}"/exglobal_archive_vrfy.py ${PYEXTRAARGS} fi err=$? if [[ ${err} -ne 0 ]]; then From 5ae3c6722889012c57642962bc160de08ddd5b78 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 8 Oct 2025 17:36:02 +0000 Subject: [PATCH 129/134] using PYCMD --- env/URSA.env.container | 332 ---------------------------------------- ush/forecast_postdet.sh | 17 +- ush/preamble.sh | 12 +- 3 files changed, 20 insertions(+), 341 deletions(-) delete mode 100755 env/URSA.env.container diff --git a/env/URSA.env.container b/env/URSA.env.container deleted file mode 100755 index 247f0515e77..00000000000 --- a/env/URSA.env.container +++ /dev/null @@ -1,332 +0,0 @@ -#! /usr/bin/env bash - -if [[ $# -ne 1 ]]; then - - echo "Must specify an input argument to set runtime environment variables!" - exit 1 - -fi - -step=$1 - -export launcher="srun -l --export=ALL --hint=nomultithread" -export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" - -#export POSTAMBLE_CMD='report-mem' - -# Configure MPI environment -export OMP_STACKSIZE=2048000 -export NTHSTACK=1024000000 - -# Setting stacksize to unlimited on login nodes is prohibited -if [[ -n "${SLURM_JOB_ID:-}" ]]; then - ulimit -s unlimited - ulimit -a -fi - -# Calculate common variables -# Check first if the dependent variables are set -if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then - max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) - NTHREADSmax=${threads_per_task:-${max_threads_per_task}} - NTHREADS1=${threads_per_task:-1} - if [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]]; then - NTHREADSmax=${max_threads_per_task} - fi - if [[ ${NTHREADS1} -gt ${max_threads_per_task} ]]; then - NTHREADS1=${max_threads_per_task} - fi - APRUN_default="${launcher} -n ${ntasks}" -else - echo "ERROR config.resources must be sourced before sourcing URSA.env" - exit 2 -fi - -if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then - - export POE="NO" - export BACK="NO" - export sys_tp="URSA" - export launcher_PREP="srun --hint=nomultithread" - -elif [[ "${step}" = "prep_emissions" ]]; then - - export APRUN="${APRUN_default}" - -elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then - - export USE_CFP="YES" - if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi - export wavempexec=${launcher} - export wave_mpmd=${mpmd_opt} - -elif [[ "${step}" = "atmanlvar" ]]; then - - export NTHREADS_ATMANLVAR=${NTHREADSmax} - export APRUN_ATMANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLVAR}" - -elif [[ "${step}" = "atmensanlobs" ]]; then - - export NTHREADS_ATMENSANLOBS=${NTHREADSmax} - export APRUN_ATMENSANLOBS="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLOBS}" - -elif [[ "${step}" = "atmensanlsol" ]]; then - - export NTHREADS_ATMENSANLSOL=${NTHREADSmax} - export APRUN_ATMENSANLSOL="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLSOL}" - -elif [[ "${step}" = "atmensanlletkf" ]]; then - - export NTHREADS_ATMENSANLLETKF=${NTHREADSmax} - export APRUN_ATMENSANLLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLLETKF}" - -elif [[ "${step}" = "atmensanlfv3inc" ]]; then - - export NTHREADS_ATMENSANLFV3INC=${NTHREADSmax} - export APRUN_ATMENSANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLFV3INC}" - -elif [[ "${step}" = "aeroanlvar" ]]; then - - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - - export NTHREADS_AEROANL=${NTHREADSmax} - export APRUN_AEROANL="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANL}" - -elif [[ "${step}" = "aeroanlgenb" ]]; then - - export NTHREADS_AEROANLGENB=${NTHREADSmax} - export APRUN_AEROANLGENB="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANLGENB}" - -elif [[ "${step}" = "atmanlfv3inc" ]]; then - - export NTHREADS_ATMANLFV3INC=${NTHREADSmax} - export APRUN_ATMANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLFV3INC}" - -elif [[ "${step}" = "anlstat" ]]; then - - export NTHREADS_ANLSTAT=${NTHREADSmax} - export APRUN_ANLSTAT="${APRUN_default} --cpus-per-task=${NTHREADS_ANLSTAT}" - -elif [[ "${step}" = "prepobsaero" ]]; then - - export NTHREADS_PREPOBSAERO=${NTHREADS1} - export APRUN_PREPOBSAERO="${APRUN_default} --cpus-per-task=${NTHREADS_PREPOBSAERO}" - -elif [[ "${step}" = "snowanl" ]]; then - - export APRUN_CALCFIMS="${launcher} -n 1" - - export NTHREADS_SNOWANL=${NTHREADSmax} - export APRUN_SNOWANL="${APRUN_default} --mem=0 --cpus-per-task=${NTHREADS_SNOWANL}" - - export APRUN_APPLY_INCR="${launcher} -n 6" - -elif [[ "${step}" = "esnowanl" ]]; then - - export APRUN_CALCFIMS="${launcher} -n 1" - - export NTHREADS_ESNOWANL=${NTHREADSmax} - export APRUN_ESNOWANL="${APRUN_default} --mem=0 --cpus-per-task=${NTHREADS_ESNOWANL}" - - export APRUN_APPLY_INCR="${launcher} -n 6" - -elif [[ "${step}" = "marinebmat" ]]; then - - export APRUNCFP="${launcher} -n \$ncmd --multi-prog" - export APRUN_MARINEBMAT="${APRUN_default}" - -elif [[ "${step}" = "marineanlvar" ]]; then - - export APRUNCFP="${launcher} -n \$ncmd --multi-prog" - export APRUN_MARINEANLVAR="${APRUN_default}" - -elif [[ "${step}" = "marineanlchkpt" ]]; then - - export APRUNCFP="${launcher} -n \$ncmd --multi-prog" - export APRUN_MARINEANLCHKPT="${APRUN_default}" - - export APRUN_MARINEANLOBSSTATS="${launcher} -n 1" - -elif [[ "${step}" = "marineanlecen" ]]; then - - export NTHREADS_MARINEANLECEN=${NTHREADSmax} - export APRUN_MARINEANLECEN="${APRUN_default} --cpus-per-task=${NTHREADS_MARINEANLECEN}" - -elif [[ "${step}" = "marineanlletkf" ]]; then - - export NTHREADS_MARINEANLLETKF=${NTHREADSmax} - export APRUN_MARINEANLLETKF=${APRUN_default} - -elif [[ "${step}" = "ecen_fv3jedi" ]]; then - - export NTHREADS_ECEN_FV3JEDI=${NTHREADSmax} - export APRUN_CORRECTION_INCREMENT="${launcher} -n ${ntasks_correction_increment} --cpus-per-task=${NTHREADS_ECEN_FV3JEDI}" - export APRUN_ENSEMBLE_RECENTER="${launcher} -n ${ntasks_ensemble_recenter} --cpus-per-task=${NTHREADS_ECEN_FV3JEDI}" - -elif [[ "${step}" = "analcalc_fv3jedi" ]]; then - - export NTHREADS_ANALCALC_FV3JEDI=${NTHREADSmax} - export APRUN_ANALCALC_FV3JEDI="${APRUN_default} --cpus-per-task=${NTHREADS_ANALCALC_FV3JEDI}" - -elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then - - export MKL_NUM_THREADS=4 - export MKL_CBWR=AUTO - - export CFP_MP=${CFP_MP:-"YES"} - export USE_CFP=${USE_CFP:-"YES"} - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - - export NTHREADS_GSI=${NTHREADSmax} - export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" - - export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} - if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then - export NTHREADS_CALCINC=${max_threads_per_task} - fi - export APRUN_CALCINC="${launcher} \$ncmd --cpus-per-task=${NTHREADS_CALCINC}" - - export NTHREADS_CYCLE=${threads_per_task_cycle:-12} - if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then - export NTHREADS_CYCLE=${max_tasks_per_node} - fi - ntasks_cycle=${ntiles:-6} - export APRUN_CYCLE="${launcher} -n ${ntasks_cycle} --cpus-per-task=${NTHREADS_CYCLE}" - - export NTHREADS_GAUSFCANL=1 - ntasks_gausfcanl=${ntasks_gausfcanl:-1} - export APRUN_GAUSFCANL="${launcher} -n ${ntasks_gausfcanl} --cpus-per-task=${NTHREADS_GAUSFCANL}" - -elif [[ "${step}" = "offlineanl" ]]; then - - export NTHREADS_CHGRES=${threads_per_task_chgres:-12} - if [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]]; then - export NTHREADS_CHGRES=${max_tasks_per_node} - fi - export APRUN_CHGRES="time" - - export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} - if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then - export NTHREADS_CALCINC=${max_threads_per_task} - fi - export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" - -elif [[ "${step}" = "sfcanl" ]]; then - - export NTHREADS_CYCLE=${threads_per_task:-14} - export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " - -elif [[ "${step}" = "eobs" ]]; then - - export MKL_NUM_THREADS=4 - export MKL_CBWR=AUTO - - export NTHREADS_GSI=${NTHREADSmax} - export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" - - export CFP_MP=${CFP_MP:-"YES"} - export USE_CFP=${USE_CFP:-"YES"} - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - -elif [[ "${step}" = "eupd" ]]; then - - export NTHREADS_ENKF=${NTHREADSmax} - export APRUN_ENKF="${launcher} -n ${ntasks_enkf:-${ntasks}} --cpus-per-task=${NTHREADS_ENKF}" - - export CFP_MP=${CFP_MP:-"YES"} - export USE_CFP=${USE_CFP:-"YES"} - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - -elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then - # fast - #export launcher="srun -l --hint=nomultithread --distribution=block:block" - # slow3 - #export launcher="srun -l --export=ALL --hint=nomultithread" - export launcher="srun --mpi=pmi2 -l --hint=nomultithread" - #export launcher="srun env -u SLURM_NODELIST" - - (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) - (( ufs_ntasks = nnodes*tasks_per_node )) - # With ESMF threading, the model wants to use the full node - export APRUN_UFS="${launcher} -n ${ufs_ntasks}" - unset nnodes ufs_ntasks - -elif [[ "${step}" = "upp" ]]; then - - export NTHREADS_UPP=${NTHREADS1} - export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}" - -elif [[ "${step}" = "atmos_products" ]]; then - - export USE_CFP="YES" # Use MPMD for downstream product generation on Ursa - -elif [[ "${step}" = "oceanice_products" ]]; then - - export NTHREADS_OCNICEPOST=${NTHREADS1} - export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" - -elif [[ "${step}" = "ecen" ]]; then - - export NTHREADS_ECEN=${NTHREADSmax} - export APRUN_ECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ECEN}" - - export NTHREADS_CHGRES=${threads_per_task_chgres:-12} - if [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]]; then - export NTHREADS_CHGRES=${max_tasks_per_node} - fi - export APRUN_CHGRES="time" - - export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} - if [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]]; then - export NTHREADS_CALCINC=${max_threads_per_task} - fi - export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" - -elif [[ "${step}" = "esfc" ]]; then - - export NTHREADS_ESFC=${threads_per_task_esfc:-${max_threads_per_task}} - export APRUN_ESFC="${APRUN_default} --cpus-per-task=${NTHREADS_ESFC}" - - export NTHREADS_CYCLE=${threads_per_task_cycle:-14} - if [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]]; then - export NTHREADS_CYCLE=${max_tasks_per_node} - fi - export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - - # REGRID requires 6 tasks for reproducibility - ntasks_regrid=6 - export APRUN_REGRID="${launcher} -n ${ntasks_regrid} " - -elif [[ "${step}" = "epos" ]]; then - - export NTHREADS_EPOS=${NTHREADSmax} - export APRUN_EPOS="${APRUN_default} --cpus-per-task=${NTHREADS_EPOS}" - -elif [[ "${step}" = "postsnd" ]]; then - - export CFP_MP="YES" - - export NTHREADS_POSTSND=${NTHREADS1} - export APRUN_POSTSND="${APRUN_default} --cpus-per-task=${NTHREADS_POSTSND}" - - export NTHREADS_POSTSNDCFP=${threads_per_task_postsndcfp:-1} - if [[ ${NTHREADS_POSTSNDCFP} -gt ${max_threads_per_task} ]]; then - export NTHREADS_POSTSNDCFP=${max_threads_per_task} - fi - export APRUN_POSTSNDCFP="${launcher} -n ${ntasks_postsndcfp} ${mpmd_opt}" - -elif [[ "${step}" = "awips" ]]; then - - export NTHREADS_AWIPS=${NTHREADS1} - export APRUN_AWIPSCFP="${APRUN_default} ${mpmd_opt}" - -elif [[ "${step}" = "fit2obs" ]]; then - - export NTHREADS_FIT2OBS=${NTHREADS1} - export MPIRUN="${APRUN_default} --cpus-per-task=${NTHREADS_FIT2OBS}" - -fi diff --git a/ush/forecast_postdet.sh b/ush/forecast_postdet.sh index 30f6e08601a..5a0ba48ab47 100755 --- a/ush/forecast_postdet.sh +++ b/ush/forecast_postdet.sh @@ -108,14 +108,15 @@ FV3_postdet() { # Check for consistency # TODO: the checker has a --fatal option, which is not used here. This needs to be decided how to handle. if [[ "${CHECK_LAND_RESTART_OROG:-NO}" == "YES" ]]; then - if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then - "${USHgfs}/../exec/run_python.sh" \ - "${USHgfs}/check_land_input_orography.py" \ - --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" - else - "${USHgfs}/check_land_input_orography.py" \ - --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" - fi + #if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then + # "${USHgfs}/../exec/run_python.sh" \ + # "${USHgfs}/check_land_input_orography.py" \ + # --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" + #else + # "${USHgfs}/check_land_input_orography.py" \ + # --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" + #fi + "${PYCMD}" "${USHgfs}"/check_land_input_orography.py --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" err=$? if [[ ${err} -ne 0 ]]; then echo "FATAL ERROR: check_land_input_orography.py returned error code ${err}, ABORT!" diff --git a/ush/preamble.sh b/ush/preamble.sh index a241d8f2a4e..4c88032e89c 100755 --- a/ush/preamble.sh +++ b/ush/preamble.sh @@ -176,9 +176,19 @@ trap "postamble ${_calling_script} ${start_time} \$?" EXIT source "${HOMEgfs}/ush/bash_utils.sh" -# Decide if run with container +# Define if run with container, default as NO. export RUN_WITH_CONTAINER=NO +if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then + # if within container, will run python executbale inside container, + # also need to tell some scripts that it is run in container, with "-c" option. + export PYCMD="${HOMEgfs}"/exec/run_python.sh + export PYEXTRAARGS=" -c -v" +else + export PYCMD=python + export PYEXTRAARGS="" +fi + # Turn on our settings export SHELLOPTS declare -xf set_strict From b79c4d1817cd14ab416cc25c27497b9c6f98ab89 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 9 Oct 2025 23:08:39 +0000 Subject: [PATCH 130/134] merge develop change in --- dev/container/gen-run-cases.sh | 7 - dev/container/utils/create-atmos-products.sh | 4 +- dev/ush/load_modules.sh | 13 + jobs/JGLOBAL_ATMOS_PRODUCTS | 2 + scripts/exgfs_wave_init.sh | 1 - ush/forecast_postdet.sh | 11 +- ush/interp_atmos_master.sh | 2 - ush/jjob_header.sh | 2 +- ush/python/pygfs/task/aero_analysis.py | 173 +++------ ush/python/pygfs/task/aero_bmatrix.py | 69 ++-- ush/python/pygfs/task/analysis.py | 359 ++++++++----------- ush/python/pygfs/task/analysis_stats.py | 3 +- ush/python/pygfs/task/atm_analysis.py | 222 +++--------- ush/python/pygfs/task/atmens_analysis.py | 191 ++-------- ush/python/pygfs/task/ensemble_recenter.py | 104 ++---- ush/python/pygfs/task/fv3_analysis_calc.py | 80 ++--- ush/python/pygfs/task/marine_analysis.py | 3 +- ush/python/pygfs/task/marine_bmat.py | 3 +- ush/python/pygfs/task/marine_recenter.py | 3 +- ush/python/pygfs/task/snow_analysis.py | 190 +++------- ush/python/pygfs/task/snowens_analysis.py | 204 ++--------- ush/wave_grib2_sbs.sh | 1 - ush/wave_grid_interp_sbs.sh | 2 - ush/wave_grid_moddef.sh | 1 - ush/wave_tar.sh | 6 +- 25 files changed, 453 insertions(+), 1203 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index d15af6d56c6..685bb3e7389 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -26,10 +26,6 @@ if [[ ${MACHINE_ID} = ursa* ]] ; then module load rocoto/1.3.7 rocotocmd=$(command -v rocotorun) - - if [[ "${run_with_container}" == "YES" ]]; then - cp "${HOMEgfs}/env/URSA.env.container" "${HOMEgfs}/env/URSA.env" - fi elif [[ ${MACHINE_ID} = gaea* ]] ; then container=/gpfs/f6/scratch/Wei.Huang/container/${img} rundir=/gpfs/f6/scratch/${USER}/run @@ -37,9 +33,6 @@ elif [[ ${MACHINE_ID} = gaea* ]] ; then HPC_ACCOUNT=bil-fire8 rocotocmd=/autofs/ncrc-svm1_home2/Christopher.W.Harrop/rocoto-1.3.7/bin/rocotorun - if [[ "${run_with_container}" == "YES" ]]; then - cp "${HOMEgfs}/env/GAEAC6.env.container" "${HOMEgfs}/env/GAEAC6.env" - fi elif [[ ${MACHINE_ID} = noaacloud* ]] ; then TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR container=/contrib/containers/${img} diff --git a/dev/container/utils/create-atmos-products.sh b/dev/container/utils/create-atmos-products.sh index 6df0046d231..27d902ed79b 100755 --- a/dev/container/utils/create-atmos-products.sh +++ b/dev/container/utils/create-atmos-products.sh @@ -42,12 +42,12 @@ fi eap_script="${HOMEgfs}"/exec/exglobal_atmos_products.sh cat > "${eap_script}" << EOF_ATMOS_PRODUCTS #!/bin/bash - LD_LIBRARY_PATH=\$(dirname ${container}) + LD_LIBRARY_PATH=\$(dirname ${HOMEgfs}) export LD_LIBRARY_PATH singularity exec \\ ${bindings} \\ - ${container}? \\ + ${container} \\ ${HOMEgfs}/scripts/exglobal_atmos_products.sh "\$@" EOF_ATMOS_PRODUCTS diff --git a/dev/ush/load_modules.sh b/dev/ush/load_modules.sh index 4bea414b76d..7f5ccf5c69f 100644 --- a/dev/ush/load_modules.sh +++ b/dev/ush/load_modules.sh @@ -62,6 +62,8 @@ fi source "${HOMEgfs}/ush/detect_machine.sh" source "${HOMEgfs}/ush/module-setup.sh" +echo "MACHINE_ID: ${MACHINE_ID}" + # Handle different module types case "${MODULE_TYPE}" in "ufswm") @@ -173,6 +175,16 @@ case "${MODULE_TYPE}" in exit 1 fi + if [[ "${MACHINE_ID}" = "container" ]]; then + source /usr/lmod/lmod/init/bash + module purge + module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" + module load gfsutils_container.intel + module load wgrib2 + module load prod_util + export UTILROOT=${prod_util_ROOT} + else + # Load our modules: module use "${HOMEgfs}/modulefiles" @@ -198,6 +210,7 @@ case "${MODULE_TYPE}" in echo "FATAL ERROR: Could not determine target module for MODULE_TYPE='${MODULE_TYPE}' and MACHINE_ID='${MACHINE_ID}'" exit 1 fi + fi module list diff --git a/jobs/JGLOBAL_ATMOS_PRODUCTS b/jobs/JGLOBAL_ATMOS_PRODUCTS index 3b476c4e049..9ee87131bdd 100755 --- a/jobs/JGLOBAL_ATMOS_PRODUCTS +++ b/jobs/JGLOBAL_ATMOS_PRODUCTS @@ -1,6 +1,8 @@ #! /usr/bin/env bash source "${HOMEgfs}/ush/jjob_header.sh" -e "atmos_products" -c "base atmos_products" +#source "${HOMEgfs}/ush/preamble.sh" +echo "RUN_WITH_CONTAINER: ${RUN_WITH_CONTAINER}" ############################################## # Begin JOB SPECIFIC work diff --git a/scripts/exgfs_wave_init.sh b/scripts/exgfs_wave_init.sh index 329559f81c0..74e328bcccb 100755 --- a/scripts/exgfs_wave_init.sh +++ b/scripts/exgfs_wave_init.sh @@ -17,7 +17,6 @@ # ############################################################################### # -source "${HOMEgfs}/dev/ush/load_modules.sh" # --------------------------------------------------------------------------- # # 1. Get files that are used by most child scripts diff --git a/ush/forecast_postdet.sh b/ush/forecast_postdet.sh index 5a0ba48ab47..293f2850502 100755 --- a/ush/forecast_postdet.sh +++ b/ush/forecast_postdet.sh @@ -108,15 +108,8 @@ FV3_postdet() { # Check for consistency # TODO: the checker has a --fatal option, which is not used here. This needs to be decided how to handle. if [[ "${CHECK_LAND_RESTART_OROG:-NO}" == "YES" ]]; then - #if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then - # "${USHgfs}/../exec/run_python.sh" \ - # "${USHgfs}/check_land_input_orography.py" \ - # --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" - #else - # "${USHgfs}/check_land_input_orography.py" \ - # --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" - #fi - "${PYCMD}" "${USHgfs}"/check_land_input_orography.py --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" + "${PYCMD}" "${USHgfs}"/check_land_input_orography.py \ + --input_dir "${DATA}/INPUT" --orog_dir "${DATA}/INPUT" err=$? if [[ ${err} -ne 0 ]]; then echo "FATAL ERROR: check_land_input_orography.py returned error code ${err}, ABORT!" diff --git a/ush/interp_atmos_master.sh b/ush/interp_atmos_master.sh index 0fe2fa4fc6a..00bbde65043 100755 --- a/ush/interp_atmos_master.sh +++ b/ush/interp_atmos_master.sh @@ -4,8 +4,6 @@ # Generate 0.25 / 0.5 / 1 degree interpolated grib2 files for each input grib2 file # trim's RH and tweaks sea-ice cover -source "${HOMEgfs}/dev/ush/load_modules.sh" - input_file=${1:-"pgb2file_in"} # Input pressure grib2 file output_file_prefix=${2:-"pgb2file_out"} # Prefix for output grib2 file; the prefix is appended by resolution e.g. _0p25 grid_string=${3:-"0p25"} # Target grids; e.g. "0p25" or "0p25:0p50"; If multiple, they need to be ":" seperated diff --git a/ush/jjob_header.sh b/ush/jjob_header.sh index 93af09d6240..5de17f73a49 100755 --- a/ush/jjob_header.sh +++ b/ush/jjob_header.sh @@ -41,7 +41,7 @@ # [default: $$] _calling_script="${BASH_SOURCE[1]}" -source "${HOMEgfs}/dev/ush/load_modules.sh" +source "${HOMEgfs}/ush/preamble.sh" OPTIND=1 while getopts "c:e:" option; do diff --git a/ush/python/pygfs/task/aero_analysis.py b/ush/python/pygfs/task/aero_analysis.py index 28310ea1643..d5408e34e68 100644 --- a/ush/python/pygfs/task/aero_analysis.py +++ b/ush/python/pygfs/task/aero_analysis.py @@ -1,28 +1,24 @@ #!/usr/bin/env python3 import os -import glob -import gzip -import tarfile from logging import getLogger -from pprint import pformat from netCDF4 import Dataset from typing import Dict, List - -from wxflow import (AttrDict, - FileHandler, - add_to_datetime, to_fv3time, to_timedelta, - to_fv3time, - Task, - YAMLFile, parse_j2yaml, - logit) +from pygfs.task.analysis import Analysis from pygfs.jedi import Jedi +from wxflow import ( + AttrDict, + FileHandler, + to_fv3time, to_timedelta, + YAMLFile, parse_j2yaml, + logit +) import numpy as np logger = getLogger(__name__.split('.')[-1]) -class AerosolAnalysis(Task): +class AerosolAnalysis(Analysis): """ Class for JEDI-based global aerosol analysis tasks """ @@ -48,36 +44,40 @@ def __init__(self, config): _res = int(self.task_config['CASE'][1:]) _res_anl = int(self.task_config['CASE_ANL'][1:]) - _window_begin = add_to_datetime(self.task_config.current_cycle, -to_timedelta(f"{self.task_config['assim_freq']}H") / 2) - # Create a local dictionary that is repeatedly used across this class - local_dict = AttrDict( + if self.task_config.DOIAU: + _anl_time = self.task_config.WINDOW_BEGIN + else: + _anl_time = self.task_config.current_cycle + + _bkg_times = [] + for hour in self.task_config.aero_bkg_times: + _bkg_times.append(self.task_config.WINDOW_BEGIN + to_timedelta(f"{str(hour)}H") - to_timedelta(f"{self.task_config.assim_freq}H") / 2) + + # Extend task_config with variables repeatedly used across this class + self.task_config.update(AttrDict( { 'npx_ges': _res + 1, 'npy_ges': _res + 1, 'npz_ges': self.task_config.LEVS - 1, - 'npz': self.task_config.LEVS - 1, 'npx_anl': _res_anl + 1, 'npy_anl': _res_anl + 1, 'npz_anl': self.task_config['LEVS'] - 1, - 'AERO_WINDOW_BEGIN': _window_begin, - 'AERO_WINDOW_LENGTH': f"PT{self.task_config['assim_freq']}H", - 'aero_bkg_fhr': [fh - 3 for fh in self.task_config['aero_bkg_times']], - 'OPREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'APREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'GPREFIX': f"gcdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'aero_obsdatain_path': f"{self.task_config.DATA}/obs/", - 'aero_obsdataout_path': f"{self.task_config.DATA}/diags/", - 'BKG_TSTEP': "PT3H" # FGAT + 'npz': self.task_config.LEVS - 1, + 'BKG_TSTEP': "PT3H", # FGAT + 'BERROR_YAML': f'aero_background_error_static_{self.task_config.STATICB_TYPE}', + 'AERO_BMATRIX_RESCALE_YAML': 'aero_gen_bmatrix_rescale_default.yaml.j2', + 'anl_time': _anl_time, + 'bkg_times': _bkg_times, } - ) + )) - # Extend task_config with local_dict - self.task_config = AttrDict(**self.task_config, **local_dict) + # Extend task_config with content of config yaml for this task + self.task_config.update(parse_j2yaml(self.task_config.TASK_CONFIG_YAML, self.task_config)) # Create dictionary of Jedi objects expected_keys = ['aeroanlvar'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) + self.jedi_dict = Jedi.get_jedi_dict(self.task_config.jedi_config, expected_keys) @logit(logger) def initialize(self) -> None: @@ -85,55 +85,18 @@ def initialize(self) -> None: This method will initialize a global aerosol analysis using JEDI. This includes: - - initialize JEDI applications - - staging observation files - - staging bias correction files - - staging CRTM fix files - - staging FV3-JEDI fix files - - staging B error files - - staging model backgrounds - - creating output directories + - stage input files from COM and create output directories + - extract bias corrections from tar files + - initialize JEDI application """ - # stage observations - logger.info(f"Staging list of observation files generated from JEDI config") - obs_dict = self.jedi_dict['aeroanlvar'].render_jcb(self.task_config, 'aero_obs_staging') - FileHandler(obs_dict).sync() - logger.debug(f"Observation files:\n{pformat(obs_dict)}") + # Stage files from COM + logger.info(f"Staging files from COM") + FileHandler(self.task_config.data_in).sync() - # # stage bias corrections - logger.info(f"Staging list of bias correction files") - bias_dict = self.jedi_dict['aeroanlvar'].render_jcb(self.task_config, 'aero_bias_staging') - - if bias_dict['copy'] is None: - logger.info(f"No bias correction files to stage") - else: - try: - bias_dict['copy'] = Jedi.remove_redundant(bias_dict['copy']) - FileHandler(bias_dict).sync() - logger.debug(f"Bias correction files:\n{pformat(bias_dict)}") - - # extract bias corrections - Jedi.extract_tar_from_filehandler_dict(bias_dict) - except FileNotFoundError: - logger.error(f"Bias correction files or directories do not exist:\n{pformat(bias_dict)}") - - # stage CRTM fix files - logger.info(f"Staging CRTM fix files from {self.task_config.STAGE_CRTM_COEFF_YAML}") - crtm_fix_dict = parse_j2yaml(self.task_config.STAGE_CRTM_COEFF_YAML, self.task_config) - FileHandler(crtm_fix_dict).sync() - logger.debug(f"CRTM fix files:\n{pformat(crtm_fix_dict)}") - - # stage fix files - logger.info(f"Staging JEDI fix files from {self.task_config.STAGE_JEDI_FIX_YAML}") - jedi_fix_dict = parse_j2yaml(self.task_config.STAGE_JEDI_FIX_YAML, self.task_config) - FileHandler(jedi_fix_dict).sync() - logger.debug(f"JEDI fix files:\n{pformat(jedi_fix_dict)}") - - # stage files from COM and create working directories - logger.info(f"Staging files prescribed from {self.task_config.STAGE_YAML}") - stage_dict = parse_j2yaml(self.task_config.STAGE_YAML, self.task_config) - FileHandler(stage_dict).sync() + # Extract bias corrections from tar files + logger.info(f"Extracting bias corrections from tar files") + self.untar_bias_corrections() # initialize JEDI variational application logger.info(f"Initializing JEDI variational DA application") @@ -161,56 +124,28 @@ def finalize(self) -> None: This method will finalize a global aerosol analysis using JEDI. This includes: - - tarring up output diag files and place in ROTDIR - - copying the generated YAML file from initialize to the ROTDIR - - copying the guess files to the ROTDIR - - applying the increments to the original RESTART files - - moving the increment files to the ROTDIR + - apply increments to the original RESTART files + - compress and tar output diag files in COM + - tar radiative bias correction files in COM + - save output files and YAMLs to COM """ - # ---- tar up diags - # path of output tar statfile - logger.info('Preparing observation space diagnostics for archiving') - aerostat = os.path.join(self.task_config.COMOUT_CHEM_ANALYSIS, f"{self.task_config['APREFIX']}aerostat.tgz") - - # get list of diag files to put in tarball - diags = glob.glob(os.path.join(self.task_config['DATA'], 'diags', 'diag*nc')) - - # gzip the files first - for diagfile in diags: - logger.info(f'Adding {diagfile} to tar file') - with open(diagfile, 'rb') as f_in, gzip.open(f"{diagfile}.gz", 'wb') as f_out: - f_out.writelines(f_in) # ---- add increments to RESTART files logger.info('Adding increments to RESTART files') self._add_fms_cube_sphere_increments() - # tar up bias correction files - bfile = f"{self.task_config.APREFIX}aero_varbc_params.tar" - aertar = os.path.join(self.task_config.COMOUT_CHEM_ANALYSIS, bfile) - - # get lists of aerosol bias correction files to add to tarball - satlist = glob.glob(os.path.join(self.task_config.DATA, 'bc', '*satbias*nc')) - - # copy files back to COM - logger.info(f"Copying files to COM based on {self.task_config.SAVE_YAML}") - save_dict = parse_j2yaml(self.task_config.SAVE_YAML, self.task_config) - FileHandler(save_dict).sync() - - # tar aerosol bias correction files to ROTDIR - logger.info(f"Creating aerosol bias correction tar file {aertar}") - with tarfile.open(aertar, 'w') as aerbcor: - for satfile in satlist: - aerbcor.add(satfile, arcname=os.path.basename(satfile)) - logger.info(f"Add {aerbcor.getnames()}") - - # open tar file for writing - with tarfile.open(aerostat, "w|gz") as archive: - for diagfile in diags: - diaggzip = f"{diagfile}.gz" - archive.add(diaggzip, arcname=os.path.basename(diaggzip)) - logger.info(f'Saved diags to {aerostat}') + # Compress and tar diag files in COM directory + self.tar_diag_files(self.task_config.COMOUT_CHEM_ANALYSIS, + f"{self.task_config['APREFIX']}aerostat.tgz") + + # Tar radiative bias correction files into COM directory + self.tar_radiative_bias_corrections(self.task_config.COMOUT_CHEM_ANALYSIS, + f"{self.task_config.APREFIX}aero_varbc_params.tar") + + # Save files from COM + logger.info(f"Saving files to COM") + FileHandler(self.task_config.data_out).sync() def clean(self): super().clean() diff --git a/ush/python/pygfs/task/aero_bmatrix.py b/ush/python/pygfs/task/aero_bmatrix.py index cc9918d4d77..511b7dcdcf5 100644 --- a/ush/python/pygfs/task/aero_bmatrix.py +++ b/ush/python/pygfs/task/aero_bmatrix.py @@ -1,18 +1,14 @@ #!/usr/bin/env python3 -import os from logging import getLogger -from typing import List, Dict - -from wxflow import (AttrDict, FileHandler, - add_to_datetime, to_timedelta, - parse_j2yaml, logit, Task) +from pygfs.task.analysis import Analysis from pygfs.jedi import Jedi +from wxflow import AttrDict, FileHandler, add_to_datetime, to_timedelta, parse_j2yaml, logit logger = getLogger(__name__.split('.')[-1]) -class AerosolBMatrix(Task): +class AerosolBMatrix(Analysis): """ Class for global aerosol BMatrix tasks """ @@ -40,46 +36,37 @@ def __init__(self, config): _res_anl = int(self.task_config['CASE_ANL'][1:]) _window_begin = add_to_datetime(self.task_config.current_cycle, -to_timedelta(f"{self.task_config['assim_freq']}H") / 2) - # fix ocnres - self.task_config.OCNRES = f"{self.task_config.OCNRES:03d}" - - # Create a local dictionary that is repeatedly used across this class - local_dict = AttrDict( + # Extend task_config with variables repeatedly used across this class + self.task_config.update(AttrDict( { 'npx_ges': _res + 1, 'npy_ges': _res + 1, 'npz_ges': self.task_config.LEVS - 1, - 'npz': self.task_config.LEVS - 1, 'npx_anl': _res_anl + 1, 'npy_anl': _res_anl + 1, 'npz_anl': self.task_config['LEVS'] - 1, - 'AERO_WINDOW_BEGIN': _window_begin, - 'AERO_WINDOW_LENGTH': f"PT{self.task_config['assim_freq']}H", - 'aero_bkg_fhr': map(int, str(self.task_config['aero_bkg_times']).split(',')), - 'OPREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'APREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'GPREFIX': f"gdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'aero_obsdatain_path': f"{self.task_config.DATA}/obs/", - 'aero_obsdataout_path': f"{self.task_config.DATA}/diags/", + 'npz': self.task_config.LEVS - 1, + 'BERROR_YAML': f'aero_background_error_static_{self.task_config.STATICB_TYPE}', + 'BERROR_DATA_DIR': f'{self.task_config.FIXgfs}/gdas/aero/clim_b', + 'AERO_BMATRIX_RESCALE_YAML': 'aero_gen_bmatrix_rescale_default.yaml.j2', } - ) + )) - # task_config is everything that this task should need - self.task_config = AttrDict(**self.task_config, **local_dict) + # Extend task_config with content of config yaml for this task + self.task_config.update(parse_j2yaml(self.task_config.TASK_CONFIG_YAML, self.task_config)) # Create dictionary of Jedi objects expected_keys = ['aero_interpbkg', 'aero_diagb', 'aero_diffusion'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) + self.jedi_dict = Jedi.get_jedi_dict(self.task_config.jedi_config, self.task_config, expected_keys) @logit(logger) - def initialize(self: Task) -> None: + def initialize(self) -> None: """Initialize a global aerosol B-matrix This method will initialize a global aerosol B-Matrix. This includes: - - staging the determinstic backgrounds - - staging fix files - - initializing the JEDI applications + - stage input files from COM and create output directories + - initialize JEDI applications Parameters ---------- @@ -90,17 +77,12 @@ def initialize(self: Task) -> None: None """ - # stage fix files - logger.info(f"Staging JEDI fix files from {self.task_config.STAGE_JEDI_FIX_YAML}") - jedi_fix_list = parse_j2yaml(self.task_config.STAGE_JEDI_FIX_YAML, self.task_config) - FileHandler(jedi_fix_list).sync() - - # stage files from COM and create working directories - logger.info(f"Staging files from COM and creating working directories {self.task_config.STAGE_YAML}") - stage_dict = parse_j2yaml(self.task_config.STAGE_YAML, self.task_config) - FileHandler(stage_dict).sync() + # Stage files from COM + logger.info(f"Staging files from COM") + FileHandler(self.task_config.data_in).sync() # initialize JEDI applications + logger.info(f"Initializing JEDI applications") self.jedi_dict['aero_interpbkg'].initialize(self.task_config) self.jedi_dict['aero_diagb'].initialize(self.task_config) self.jedi_dict['aero_diffusion'].initialize(self.task_config) @@ -137,11 +119,10 @@ def finalize(self) -> None: This method will finalize a global aerosol bmatrix using JEDI. This includes: - - copying the bmatrix files to COM - - copying YAMLs to COM + - save output files and YAMLs to COM """ - # save files to COMOUT - logger.info(f"Saving files to COMOUT based on {self.task_config.SAVE_YAML}") - save_dict = parse_j2yaml(self.task_config.SAVE_YAML, self.task_config) - FileHandler(save_dict).sync() + + # Save files to COM + logger.info(f"Saving files to COM") + FileHandler(self.task_config.data_out).sync() diff --git a/ush/python/pygfs/task/analysis.py b/ush/python/pygfs/task/analysis.py index 1d8b38483b0..1998e5a6c04 100644 --- a/ush/python/pygfs/task/analysis.py +++ b/ush/python/pygfs/task/analysis.py @@ -1,274 +1,213 @@ #!/usr/bin/env python3 -import os import glob -import tarfile +import gzip from logging import getLogger -from pprint import pformat -from netCDF4 import Dataset -from typing import List, Dict, Any, Union, Optional - -from jcb import render -from wxflow import (parse_j2yaml, FileHandler, rm_p, logit, - Task, Executable, WorkflowException, to_fv3time, to_YMD, - Template, TemplateConstants) +import os +import tarfile +from typing import Any, Dict +from wxflow import (AttrDict, Task, WorkflowException, + add_to_datetime, to_timedelta, to_isotime, + parse_j2yaml, + logit) logger = getLogger(__name__.split('.')[-1]) class Analysis(Task): - """Parent class for GDAS tasks - - The Analysis class is the parent class for all - Global Data Assimilation System (GDAS) tasks - directly related to peforming an analysis """ + General class for JEDI-based global analysis tasks + """ + @logit(logger, name="Analysis") + def __init__(self, config: Dict[str, Any]): + """Constructor global atm analysis task - def __init__(self, config: Dict[str, Any]) -> None: - super().__init__(config) - # Store location of GDASApp jinja2 templates - self.gdasapp_j2tmpl_dir = os.path.join(self.task_config.PARMgfs, 'gdas') - # fix ocnres - self.task_config.OCNRES = f"{self.task_config.OCNRES :03d}" - - def initialize(self) -> None: - super().initialize() - - # all JEDI analyses need a JEDI config - self.task_config.jedi_config = self.get_jedi_config() - - # all analyses need to stage observations - obs_dict = self.get_obs_dict() - FileHandler(obs_dict).sync() - - # link jedi executable to run directory - self.link_jediexe() - - @logit(logger) - def get_jedi_config(self, algorithm: Optional[str] = None) -> Dict[str, Any]: - """Compile a dictionary of JEDI configuration from JEDIYAML template file + This method will construct a global atm analysis task. + This includes: + - extending the task_config attribute AttrDict to include parameters required for this task Parameters ---------- - algorithm (optional) : str - Name of the algorithm to use in the JEDI configuration. Will override the algorithm - set in the self.task_config.JCB_<>_YAML file + config: Dict + dictionary object containing task configuration Returns ---------- - jedi_config : Dict - a dictionary containing the fully rendered JEDI yaml configuration + None """ + super().__init__(config) - # generate JEDI YAML file - logger.info(f"Generate JEDI YAML config: {self.task_config.jedi_yaml}") - - if 'JCB_BASE_YAML' in self.task_config.keys(): - # Step 1: fill templates of the jcb base YAML file - jcb_config = parse_j2yaml(self.task_config.JCB_BASE_YAML, self.task_config) - - # Step 2: (optional) fill templates of algorithm override YAML and merge - if 'JCB_ALGO_YAML' in self.task_config.keys(): - jcb_algo_config = parse_j2yaml(self.task_config.JCB_ALGO_YAML, self.task_config) - jcb_config = {**jcb_config, **jcb_algo_config} - - # If algorithm is present override the algorithm in the JEDI config - if algorithm: - jcb_config['algorithm'] = algorithm - - # Step 3: generate the JEDI Yaml using JCB driving YAML - jedi_config = render(jcb_config) - elif 'JEDIYAML' in self.task_config.keys(): - # Generate JEDI YAML file (without using JCB) - logger.info(f"Generate JEDI YAML config: {self.task_config.jedi_yaml}") - jedi_config = parse_j2yaml(self.task_config.JEDIYAML, self.task_config, - searchpath=self.gdasapp_j2tmpl_dir) - logger.debug(f"JEDI config:\n{pformat(jedi_config)}") - else: - raise KeyError(f"Task config must contain JCB_BASE_YAML or JEDIYAML") + # Get assimilation window times + _window_begin = add_to_datetime(self.task_config.current_cycle, -to_timedelta(f"{self.task_config.assim_freq}H") / 2) + _next_cycle = add_to_datetime(self.task_config.current_cycle, to_timedelta(f"{self.task_config.assim_freq}H")) - logger.debug(f"JEDI config:\n{pformat(jedi_config)}") + # Get specific assimilation times within the assimulation window + _iau_times_iso = [] + for hour in self.task_config.IAUFHRS: + _iau_times_iso.append(to_isotime(_window_begin + to_timedelta(f"{str(hour)}H") - to_timedelta(f"{self.task_config.assim_freq}H") / 2)) - return jedi_config + # Get observations list from obs list yaml + if 'OBS_LIST_YAML' in self.task_config: + _observations = parse_j2yaml(self.task_config.OBS_LIST_YAML, self.task_config)['observations'] + else: + _observations = [] - @logit(logger) - def get_obs_dict(self) -> Dict[str, Any]: - """Compile a dictionary of observation files to copy + # Get bias correction dict from bias files yaml + if 'BIAS_FILES_YAML' in self.task_config: + _bias_files = parse_j2yaml(self.task_config.BIAS_FILES_YAML, self.task_config)['bias_files'] + else: + _bias_files = AttrDict - This method extracts 'observers' from the JEDI yaml and from that list, extracts a list of - observation files that are to be copied to the run directory - from the observation input directory + # Set prefix needed for GPREFIX, depedning on the model + if self.task_config.NET == 'gcafs': + _da_prefix = 'gcdas' + else: + _da_prefix = 'gdas' + + # Extend task_config with variables that are repeatedly used across this class + self.task_config.update(AttrDict( + { + 'WINDOW_BEGIN': _window_begin, + 'WINDOW_LENGTH': f"PT{self.task_config.assim_freq}H", + 'next_cycle': _next_cycle, + 'OPREFIX': f"{self.task_config.RUN.replace('enkf','')}.t{self.task_config.cyc:02d}z.", + 'APREFIX': f"{self.task_config.RUN.replace('enkf','')}.t{self.task_config.cyc:02d}z.", + 'APREFIX_ENS': f"enkf{self.task_config.RUN.replace('enkf','')}.t{self.task_config.cyc:02d}z.", + 'GPREFIX': f"{_da_prefix}.t{self.task_config.previous_cycle.hour:02d}z.", + 'GPREFIX_ENS': f"enkf{_da_prefix}.t{self.task_config.previous_cycle.hour:02d}z.", + 'OCNRES': f"{self.task_config.OCNRES:03d}", + 'iau_times_iso': _iau_times_iso, + 'observations': _observations, + 'bias_files': _bias_files, + } + )) - Parameters - ---------- + def initialize(self) -> None: + self.initialize() - Returns - ---------- - obs_dict: Dict - a dictionary containing the list of observation files to copy for FileHandler - """ + def execute(self) -> None: + super.execute() + + def finalize(self) -> None: + super.finalize() - logger.info(f"Extracting a list of observation files from Jedi config file") - observations = find_value_in_nested_dict(self.task_config.jedi_config, 'observations') - logger.debug(f"observations:\n{pformat(observations)}") - - copylist = [] - for ob in observations['observers']: - obfile = ob['obs space']['obsdatain']['engine']['obsfile'] - basename = os.path.basename(obfile) - copylist.append([os.path.join(self.task_config['COM_OBS'], basename), obfile]) - obs_dict = { - 'mkdir': [os.path.join(self.task_config['DATA'], 'obs')], - 'copy': copylist - } - return obs_dict + def clean(self) -> None: + super().clean() @logit(logger) - def add_fv3_increments(self, inc_file_tmpl: str, bkg_file_tmpl: str, incvars: List) -> None: - """Add cubed-sphere increments to cubed-sphere backgrounds + def untar_bias_corrections(self) -> None: + """Extract bias correction files from tarballs + This method will extract bias correction files from tarballs Parameters ---------- - inc_file_tmpl : str - template of the FV3 increment file of the form: 'filetype.tile{tilenum}.nc' - bkg_file_tmpl : str - template of the FV3 background file of the form: 'filetype.tile{tilenum}.nc' - incvars : List - List of increment variables to add to the background + None + + Returns + ---------- + None """ - for itile in range(1, self.task_config.ntiles + 1): - inc_path = inc_file_tmpl.format(tilenum=itile) - bkg_path = bkg_file_tmpl.format(tilenum=itile) - with Dataset(inc_path, mode='r') as incfile, Dataset(bkg_path, mode='a') as rstfile: - for vname in incvars: - increment = incfile.variables[vname][:] - bkg = rstfile.variables[vname][:] - anl = bkg + increment - rstfile.variables[vname][:] = anl[:] - try: - rstfile.variables[vname].delncattr('checksum') # remove the checksum so fv3 does not complain - except (AttributeError, RuntimeError): - pass # checksum is missing, move on + bias_file_list = [] + for ob in self.task_config.observations: + if ob in self.task_config.bias_files and not self.task_config.bias_files[ob] in bias_file_list: + bias_file_list.append(self.task_config.bias_files[ob]) + bias_file_path = f'{self.task_config.DATA}/obs/{self.task_config.GPREFIX}{self.task_config.bias_files[ob]}' + if os.path.exists(bias_file_path): + extract_tar(bias_file_path) + else: + logger.warning(f"Bias correction file {bias_file_path} does not exist and will be skipped") @logit(logger) - def link_jediexe(self) -> None: - """ - - This method links a JEDI executable to the run directory + def tar_diag_files(self, comout: str, tarball_name: str) -> None: + """Compress and tar diag files into COM directory Parameters ---------- - Task: GDAS task + comout: str + path to COM output directory + tarball_name: str + name of output tar file Returns ---------- None """ - exe_src = self.task_config.JEDIEXE - # TODO: linking is not permitted per EE2. Needs work in JEDI to be able to copy the exec. - logger.info(f"Link executable {exe_src} to DATA/") - logger.warn("Linking is not permitted per EE2.") - exe_dest = os.path.join(self.task_config.DATA, os.path.basename(exe_src)) - if os.path.exists(exe_dest): - rm_p(exe_dest) - os.symlink(exe_src, exe_dest) + # Set paths of output tar files + diagtar = os.path.join(comout, tarball_name) - return exe_dest + # Get lists of files to put in tarballs + diaglist = glob.glob(os.path.join(self.task_config.DATA, 'diags', 'diag*nc')) + + # Compress diag files + logger.info(f"Compressing {len(diaglist)} diag files") + for diagfile in diaglist: + with open(diagfile, 'rb') as f_in, gzip.open(f"{diagfile}.gz", 'wb') as f_out: + f_out.writelines(f_in) + + # Create tarball of compressed diag files in COM + logger.debug(f"Creating tarball {diagtar} with {len(diaglist)} compressed diag files") + with tarfile.open(diagtar, "w") as archive: + for diagfile in diaglist: + diaggzip = f"{diagfile}.gz" + archive.add(diaggzip, arcname=os.path.basename(diaggzip)) - @staticmethod @logit(logger) - def tgz_diags(statfile: str, diagdir: str) -> None: - """tar and gzip the diagnostic files resulting from a JEDI analysis. + def tar_radiative_bias_corrections(self, comout: str, tarball_name: str) -> None: + """Tar radiative bias correction files and into COM directory Parameters ---------- - statfile : str | os.PathLike - Path to the output .tar.gz .tgz file that will contain the diag*.nc files e.g. atmstat.tgz - diagdir : str | os.PathLike - Directory containing JEDI diag files + comout: str + path to COM output directory + tarball_name: str + name of output tar file + + Returns + ---------- + None """ - # get list of diag files to put in tarball - diags = glob.glob(os.path.join(diagdir, 'diags', 'diag*nc')) - diags.extend(glob.glob(os.path.join(diagdir, 'diags', 'diag*nc4'))) + # Set paths of output tar files + radtar = os.path.join(comout, tarball_name) - logger.info(f"Compressing {len(diags)} diag files to {statfile}") + # Get lists of files to put in tarballs + satlist = glob.glob(os.path.join(self.task_config.DATA, 'bc', '*satbias*nc')) + tlaplist = glob.glob(os.path.join(self.task_config.DATA, 'obs', '*tlapse.txt')) - # Open tar.gz file for writing - with tarfile.open(statfile, "w:gz") as tgz: - # Add diag files to tarball - for diagfile in diags: - tgz.add(diagfile, arcname=os.path.basename(diagfile)) + # Create tarball of radiance bias correction files + logger.info(f"Creating radiance bias correction tarball {radtar}") + with tarfile.open(radtar, 'w') as radbcor: + logger.info(f"Adding {radbcor.getnames()}") + for satfile in satlist: + radbcor.add(satfile, arcname=os.path.basename(satfile)) + for tlapfile in tlaplist: + # Change OPREFIX to APREFIX in tlapse file name when adding to tarball + radbcor.add(tlapfile, arcname=os.path.basename(tlapfile.replace(self.task_config.GPREFIX, self.task_config.APREFIX))) @logit(logger) -def find_value_in_nested_dict(nested_dict: Dict, target_key: str) -> Any: - """ - Recursively search through a nested dictionary and return the value for the target key. - This returns the first target key it finds. So if a key exists in a subsequent - nested dictionary, it will not be found. +def extract_tar(tar_file: str) -> None: + """Extract files from a tarball + + This method extract files from a tarball Parameters ---------- - nested_dict : Dict - Dictionary to search - target_key : str - Key to search for + tar_file + path/name of tarball Returns - ------- - Any - Value of the target key - - Raises - ------ - KeyError - If key is not found in dictionary - - TODO: if this gives issues due to landing on an incorrect key in the nested - dictionary, we will have to implement a more concrete method to search for a key - given a more complete address. See resolved conversations in PR 2387 - - # Example usage: - nested_dict = { - 'a': { - 'b': { - 'c': 1, - 'd': { - 'e': 2, - 'f': 3 - } - }, - 'g': 4 - }, - 'h': { - 'i': 5 - }, - 'j': { - 'k': 6 - } - } - - user_key = input("Enter the key to search for: ") - result = find_value_in_nested_dict(nested_dict, user_key) + ---------- + None """ - if not isinstance(nested_dict, dict): - raise TypeError(f"Input is not of type(dict)") - - result = nested_dict.get(target_key) - if result is not None: - return result - - for value in nested_dict.values(): - if isinstance(value, dict): - try: - result = find_value_in_nested_dict(value, target_key) - if result is not None: - return result - except KeyError: - pass - - raise KeyError(f"Key '{target_key}' not found in the nested dictionary") + # extract files from tar file + tar_path = os.path.dirname(tar_file) + try: + with tarfile.open(tar_file, "r") as tarball: + tarball.extractall(path=tar_path) + logger.info(f"Extract {tarball.getnames()}") + except Exception as e: + raise WorkflowException(f"An error occurred while extracting {tar_file}:\n{e}") from e diff --git a/ush/python/pygfs/task/analysis_stats.py b/ush/python/pygfs/task/analysis_stats.py index cfe34d3257f..086f289d3a1 100644 --- a/ush/python/pygfs/task/analysis_stats.py +++ b/ush/python/pygfs/task/analysis_stats.py @@ -74,7 +74,8 @@ def initialize(self) -> None: # Expected keys are what must be included from the JEDI config file. We can # then loop through ob space list from scripts/exglobal_analysis_stats.py expected_keys = ['aero', 'atmos', 'snow'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) + jedi_config_dict = parse_j2yaml(self.task_config.JEDI_CONFIG_YAML, self.task_config) + self.jedi_dict = Jedi.get_jedi_dict(jedi_config_dict, self.task_config, expected_keys) logger.info(f"Copying files to {self.task_config.DATA}/stats") diff --git a/ush/python/pygfs/task/atm_analysis.py b/ush/python/pygfs/task/atm_analysis.py index dae5469a1c3..0ea0724ef03 100644 --- a/ush/python/pygfs/task/atm_analysis.py +++ b/ush/python/pygfs/task/atm_analysis.py @@ -1,24 +1,17 @@ #!/usr/bin/env python3 -import os -import glob -import gzip -import tarfile from logging import getLogger -from pprint import pformat -from typing import Any, Dict -from wxflow import (AttrDict, FileHandler, Task, - add_to_datetime, to_timedelta, - parse_j2yaml, - logit) +from pygfs.task.analysis import Analysis from pygfs.jedi import Jedi +from typing import Any, Dict +from wxflow import AttrDict, FileHandler, parse_j2yaml, logit logger = getLogger(__name__.split('.')[-1]) -class AtmAnalysis(Task): +class AtmAnalysis(Analysis): """ - Class for JEDI-based global atm analysis tasks + Class for JEDI-based global atm deterministic analysis tasks """ @logit(logger, name="AtmAnalysis") def __init__(self, config: Dict[str, Any]): @@ -42,37 +35,33 @@ def __init__(self, config: Dict[str, Any]): _res = int(self.task_config.CASE[1:]) _res_anl = int(self.task_config.CASE_ANL[1:]) - _window_begin = add_to_datetime(self.task_config.current_cycle, -to_timedelta(f"{self.task_config.assim_freq}H") / 2) + + if self.task_config.DOHYBVAR: + _BERROR_YAML = f"atmosphere_background_error_hybrid_{self.task_config.STATICB_TYPE}_{self.task_config.LOCALIZATION_TYPE}" + else: + _BERROR_YAML = f"atmosphere_background_error_static_{self.task_config.STATICB_TYPE}" # Create a local dictionary that is repeatedly used across this class - local_dict = AttrDict( + self.task_config.update(AttrDict( { 'npx_ges': _res + 1, 'npy_ges': _res + 1, 'npz_ges': self.task_config.LEVS - 1, - 'npz': self.task_config.LEVS - 1, 'npx_anl': _res_anl + 1, 'npy_anl': _res_anl + 1, 'npz_anl': self.task_config.LEVS - 1, - 'ATM_WINDOW_BEGIN': _window_begin, - 'ATM_WINDOW_LENGTH': f"PT{self.task_config.assim_freq}H", - 'OPREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'APREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'APREFIX_ENS': f"enkf{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'GPREFIX': f"gdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'GPREFIX_ENS': f"enkfgdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'atm_obsdatain_path': f"{self.task_config.DATA}/obs/", - 'atm_obsdataout_path': f"{self.task_config.DATA}/diags/", - 'BKG_TSTEP': "PT1H" # Placeholder for 4D applications + 'npz': self.task_config.LEVS - 1, + 'BKG_TSTEP': "PT1H", # Placeholder for 4D applications + 'BERROR_YAML': _BERROR_YAML, } - ) + )) - # Extend task_config with local_dict - self.task_config = AttrDict(**self.task_config, **local_dict) + # Extend task_config with content of config yaml for this task + self.task_config.update(parse_j2yaml(self.task_config.TASK_CONFIG_YAML, self.task_config)) # Create dictionary of Jedi objects expected_keys = ['atmanlvar', 'atmanlfv3inc'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) + self.jedi_dict = Jedi.get_jedi_dict(self.task_config.jedi_config, self.task_config, expected_keys) @logit(logger) def initialize(self) -> None: @@ -80,14 +69,9 @@ def initialize(self) -> None: This method will initialize a global atm analysis. This includes: + - stage input files from COM and create output directories + - extract bias corrections from tar files - initialize JEDI applications - - staging observation files - - staging bias correction files - - staging CRTM fix files - - staging FV3-JEDI fix files - - staging B error files - - staging model backgrounds - - creating output directories Parameters ---------- @@ -98,73 +82,17 @@ def initialize(self) -> None: None """ - # stage observations - logger.info(f"Staging list of observation files") - obs_dict = self.jedi_dict['atmanlvar'].render_jcb(self.task_config, 'atm_obs_staging') - FileHandler(obs_dict).sync() - logger.debug(f"Observation files:\n{pformat(obs_dict)}") - - # stage bias corrections - logger.info(f"Staging list of bias correction files") - bias_dict = self.jedi_dict['atmanlvar'].render_jcb(self.task_config, 'atm_bias_staging') - if bias_dict['copy'] is None: - logger.info(f"No bias correction files to stage") - else: - bias_dict['copy'] = Jedi.remove_redundant(bias_dict['copy']) - FileHandler(bias_dict).sync() - logger.debug(f"Bias correction files:\n{pformat(bias_dict)}") - - # extract bias corrections - Jedi.extract_tar_from_filehandler_dict(bias_dict) - - # stage CRTM fix files - logger.info(f"Staging CRTM fix files from {self.task_config.STAGE_CRTM_COEFF_YAML}") - crtm_fix_dict = parse_j2yaml(self.task_config.STAGE_CRTM_COEFF_YAML, self.task_config) - FileHandler(crtm_fix_dict).sync() - logger.debug(f"CRTM fix files:\n{pformat(crtm_fix_dict)}") - - # stage fix files - logger.info(f"Staging JEDI fix files from {self.task_config.STAGE_JEDI_FIX_YAML}") - jedi_fix_dict = parse_j2yaml(self.task_config.STAGE_JEDI_FIX_YAML, self.task_config) - FileHandler(jedi_fix_dict).sync() - logger.debug(f"JEDI fix files:\n{pformat(jedi_fix_dict)}") - - # stage static background error files, otherwise it will assume ID matrix - logger.info(f"Stage files for STATICB_TYPE {self.task_config.STATICB_TYPE}") - if self.task_config.STATICB_TYPE != 'identity': - berror_staging_dict = parse_j2yaml(self.task_config.STAGE_BERROR_YAML, self.task_config) - else: - berror_staging_dict = {} - FileHandler(berror_staging_dict).sync() - logger.debug(f"Background error files:\n{pformat(berror_staging_dict)}") - - # stage ensemble files for use in hybrid background error - if self.task_config.DOHYBVAR: - logger.debug(f"Stage ensemble files for DOHYBVAR {self.task_config.DOHYBVAR}") - fv3ens_staging_dict = parse_j2yaml(self.task_config.STAGE_FV3ENS_YAML, self.task_config) - FileHandler(fv3ens_staging_dict).sync() - logger.debug(f"Ensemble files:\n{pformat(fv3ens_staging_dict)}") - - # stage backgrounds - logger.info(f"Staging background files from {self.task_config.STAGE_BKG_YAML}") - bkg_staging_dict = parse_j2yaml(self.task_config.STAGE_BKG_YAML, self.task_config) - FileHandler(bkg_staging_dict).sync() - logger.debug(f"Background files:\n{pformat(bkg_staging_dict)}") + # Stage files from COM + logger.info(f"Staging files from COM and creating output directories") + FileHandler(self.task_config.data_in).sync() - # need output dir for diags and anl - logger.debug("Create empty output [anl, diags] directories to receive output from executable") - newdirs = [ - os.path.join(self.task_config.DATA, 'anl'), - os.path.join(self.task_config.DATA, 'diags'), - ] - FileHandler({'mkdir': newdirs}).sync() + # Extract bias corrections from tar files + logger.info(f"Extracting bias corrections from tar files") + self.untar_bias_corrections() - # initialize JEDI variational application - logger.info(f"Initializing JEDI variational DA application") + # Initialize JEDI variational application + logger.info(f"Initializing JEDI applications") self.jedi_dict['atmanlvar'].initialize(self.task_config, clean_empty_obsspaces=True) - - # initialize JEDI FV3 increment conversion application - logger.info(f"Initializing JEDI FV3 increment conversion application") self.jedi_dict['atmanlfv3inc'].initialize(self.task_config) @logit(logger) @@ -189,9 +117,9 @@ def finalize(self) -> None: This method will finalize a global atm analysis using JEDI. This includes: - - tar output diag files and place in ROTDIR - - copy the generated YAML file from initialize to the ROTDIR - - copy the updated bias correction files to ROTDIR + - compress and tar output diag files in COM + - tar radiative bias correction files and place in COM + - save output files and YAMLs to COM Parameters ---------- @@ -202,84 +130,14 @@ def finalize(self) -> None: None """ - # ---- tar up diags - # path of output tar statfile - atmstat = os.path.join(self.task_config.COMOUT_ATMOS_ANALYSIS, f"{self.task_config.APREFIX}atmstat") - - # get list of diag files to put in tarball - diags = glob.glob(os.path.join(self.task_config.DATA, 'diags', 'diag*nc')) - - logger.info(f"Compressing {len(diags)} diag files to {atmstat}.gz") - - # gzip the files first - logger.debug(f"Gzipping {len(diags)} diag files") - for diagfile in diags: - with open(diagfile, 'rb') as f_in, gzip.open(f"{diagfile}.gz", 'wb') as f_out: - f_out.writelines(f_in) - - # open tar file for writing - logger.debug(f"Creating tar file {atmstat} with {len(diags)} gzipped diag files") - with tarfile.open(atmstat, "w") as archive: - for diagfile in diags: - diaggzip = f"{diagfile}.gz" - archive.add(diaggzip, arcname=os.path.basename(diaggzip)) - - # get list of yamls to copy to ROTDIR - yamls = glob.glob(os.path.join(self.task_config.DATA, '*atm*yaml')) - - # copy full YAML from executable to ROTDIR - for src in yamls: - yaml_base = os.path.splitext(os.path.basename(src))[0] - dest_yaml_name = f"{self.task_config.APREFIX}{yaml_base}.yaml" - dest = os.path.join(self.task_config.COMOUT_CONF, dest_yaml_name) - logger.debug(f"Copying {src} to {dest}") - yaml_copy = { - 'copy': [[src, dest]] - } - FileHandler(yaml_copy).sync() - - # path of output radiance bias correction tarfile - bfile = f"{self.task_config.APREFIX}rad_varbc_params.tar" - radtar = os.path.join(self.task_config.COMOUT_ATMOS_ANALYSIS, bfile) - - # rename and copy tlapse radiance bias correction files from obs to bc - tlapobs = glob.glob(os.path.join(self.task_config.DATA, 'obs', '*tlapse.txt')) - copylist = [] - for tlapfile in tlapobs: - obsfile = os.path.basename(tlapfile).split('.', 2) - newfile = f"{self.task_config.APREFIX}{obsfile[2]}" - copylist.append([tlapfile, os.path.join(self.task_config.DATA, 'bc', newfile)]) - tlapse_dict = { - 'copy': copylist - } - FileHandler(tlapse_dict).sync() - - # get lists of radiance bias correction files to add to tarball - satlist = glob.glob(os.path.join(self.task_config.DATA, 'bc', '*satbias*nc')) - tlaplist = glob.glob(os.path.join(self.task_config.DATA, 'bc', '*tlapse.txt')) - - # tar radiance bias correction files to ROTDIR - logger.info(f"Creating radiance bias correction tar file {radtar}") - with tarfile.open(radtar, 'w') as radbcor: - for satfile in satlist: - radbcor.add(satfile, arcname=os.path.basename(satfile)) - for tlapfile in tlaplist: - radbcor.add(tlapfile, arcname=os.path.basename(tlapfile)) - logger.info(f"Add {radbcor.getnames()}") - - # Copy FV3 atm increment to comrot directory - logger.info("Copy UFS model readable atm increment file") - inc_copy = {'copy': []} - for itile in range(6): - src = os.path.join(self.task_config.DATA, "anl", - f"{self.task_config.APREFIX}cubed_sphere_grid_atminc.tile{itile+1}.nc") - dest = self.task_config.COMOUT_ATMOS_ANALYSIS - inc_copy['copy'].append([src, dest]) + # Compress and tar diag files in COM directory + self.tar_diag_files(self.task_config.COMOUT_ATMOS_ANALYSIS, + f"{self.task_config.APREFIX}atmstat") - # copy increments - src_list, dest_list = zip(*inc_copy['copy']) - logger.debug(f"Copying {src_list}\nto {dest_list}") - FileHandler(inc_copy).sync() + # Tar radiative bias correction files into COM directory + self.tar_radiative_bias_corrections(self.task_config.COMOUT_ATMOS_ANALYSIS, + f"{self.task_config.APREFIX}rad_varbc_params.tar") - def clean(self): - super().clean() + # Save files from COM + logger.info(f"Saving files to COM") + FileHandler(self.task_config.data_out).sync() diff --git a/ush/python/pygfs/task/atmens_analysis.py b/ush/python/pygfs/task/atmens_analysis.py index 6db19c11166..778cfaea8a3 100644 --- a/ush/python/pygfs/task/atmens_analysis.py +++ b/ush/python/pygfs/task/atmens_analysis.py @@ -1,24 +1,15 @@ #!/usr/bin/env python3 -import os -import glob -import gzip -import tarfile from logging import getLogger -from pprint import pformat -from typing import Dict, Any - -from wxflow import (AttrDict, FileHandler, Task, - add_to_datetime, to_timedelta, to_YMD, - parse_j2yaml, - logit, - Template, TemplateConstants) +from pygfs.task.analysis import Analysis from pygfs.jedi import Jedi +from typing import Dict, Any +from wxflow import AttrDict, FileHandler, parse_j2yaml, logit logger = getLogger(__name__.split('.')[-1]) -class AtmEnsAnalysis(Task): +class AtmEnsAnalysis(Analysis): """ Class for JEDI-based global atmens analysis tasks """ @@ -43,34 +34,24 @@ def __init__(self, config: Dict[str, Any]): super().__init__(config) _res = int(self.task_config.CASE_ENS[1:]) - _window_begin = add_to_datetime(self.task_config.current_cycle, -to_timedelta(f"{self.task_config.assim_freq}H") / 2) # Create a local dictionary that is repeatedly used across this class - local_dict = AttrDict( + self.task_config.update(AttrDict( { 'npx_ges': _res + 1, 'npy_ges': _res + 1, 'npz_ges': self.task_config.LEVS - 1, 'npz': self.task_config.LEVS - 1, - 'ATM_WINDOW_BEGIN': _window_begin, - 'ATM_WINDOW_LENGTH': f"PT{self.task_config.assim_freq}H", - 'OPREFIX': f"{self.task_config.EUPD_CYC}.t{self.task_config.cyc:02d}z.", - 'APREFIX': f"{self.task_config.RUN.replace('enkf', '')}.t{self.task_config.cyc:02d}z.", - 'APREFIX_ENS': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'GPREFIX': f"gdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'GPREFIX_ENS': f"enkfgdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'atm_obsdatain_path': f"./obs/", - 'atm_obsdataout_path': f"./diags/", - 'BKG_TSTEP': "PT1H" # Placeholder for 4D applications - } + 'BKG_TSTEP': "PT1H", # Placeholder for 4D applications + }) ) - # Extend task_config with local_dict - self.task_config = AttrDict(**self.task_config, **local_dict) + # Extend task_config with content of config yaml for this task + self.task_config.update(parse_j2yaml(self.task_config.TASK_CONFIG_YAML, self.task_config)) # Create dictionary of JEDI objects expected_keys = ['atmensanlobs', 'atmensanlsol', 'atmensanlfv3inc', 'atmensanlletkf'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) + self.jedi_dict = Jedi.get_jedi_dict(self.task_config.jedi_config, self.task_config, expected_keys) @logit(logger) def initialize(self) -> None: @@ -78,13 +59,9 @@ def initialize(self) -> None: This method will initialize a global atmens analysis. This includes: - - initialize JEDI LETKF observer and FV3 increment converter applications - - staging observation files - - staging bias correction files - - staging CRTM fix files - - staging FV3-JEDI fix files - - staging model backgrounds - - creating output directories + - stage input files from COM and create output directories + - extract bias corrections from tar files + - initialize JEDI applications Parameters ---------- @@ -95,58 +72,18 @@ def initialize(self) -> None: None """ - # stage observations - logger.info(f"Staging list of observation files") - obs_dict = self.jedi_dict['atmensanlobs'].render_jcb(self.task_config, 'atm_obs_staging') - FileHandler(obs_dict).sync() - logger.debug(f"Observation files:\n{pformat(obs_dict)}") - - # stage bias corrections - logger.info(f"Staging list of bias correction files") - bias_dict = self.jedi_dict['atmensanlobs'].render_jcb(self.task_config, 'atm_bias_staging') - bias_dict['copy'] = Jedi.remove_redundant(bias_dict['copy']) - FileHandler(bias_dict).sync() - logger.debug(f"Bias correction files:\n{pformat(bias_dict)}") - - # extract bias corrections - Jedi.extract_tar_from_filehandler_dict(bias_dict) - - # stage CRTM fix files - logger.info(f"Staging CRTM fix files from {self.task_config.STAGE_CRTM_COEFF_YAML}") - crtm_fix_dict = parse_j2yaml(self.task_config.STAGE_CRTM_COEFF_YAML, self.task_config) - FileHandler(crtm_fix_dict).sync() - logger.debug(f"CRTM fix files:\n{pformat(crtm_fix_dict)}") - - # stage fix files - logger.info(f"Staging JEDI fix files from {self.task_config.STAGE_JEDI_FIX_YAML}") - jedi_fix_dict = parse_j2yaml(self.task_config.STAGE_JEDI_FIX_YAML, self.task_config) - FileHandler(jedi_fix_dict).sync() - logger.debug(f"JEDI fix files:\n{pformat(jedi_fix_dict)}") - - # stage backgrounds - logger.info(f"Stage ensemble member background files") - bkg_staging_dict = parse_j2yaml(self.task_config.STAGE_BKG_YAML, self.task_config) - FileHandler(bkg_staging_dict).sync() - logger.debug(f"Ensemble member background files:\n{pformat(bkg_staging_dict)}") - - # need output dir for diags and anl - logger.debug("Create empty output [anl, diags] directories to receive output from executable") - newdirs = [ - os.path.join(self.task_config.DATA, 'anl'), - os.path.join(self.task_config.DATA, 'diags'), - ] - FileHandler({'mkdir': newdirs}).sync() - - # initialize JEDI LETKF observer application + # Stage files from COM + logger.info(f"Staging files from COM") + FileHandler(self.task_config.data_in).sync() + + # Extract bias corrections from tar files + logger.info(f"Extracting bias corrections from tar files") + self.untar_bias_corrections() + + # initialize JEDI applications logger.info(f"Initializing JEDI LETKF observer application") self.jedi_dict['atmensanlobs'].initialize(self.task_config, clean_empty_obsspaces=True) - - # initialize JEDI LETKF solver application - logger.info(f"Initializing JEDI LETKF solver application") self.jedi_dict['atmensanlsol'].initialize(self.task_config) - - # initialize JEDI FV3 increment conversion application - logger.info(f"Initializing JEDI FV3 increment conversion application") self.jedi_dict['atmensanlfv3inc'].initialize(self.task_config) @logit(logger) @@ -189,8 +126,8 @@ def finalize(self) -> None: This method will finalize a global atmens analysis using JEDI. This includes: - - tar output diag files and place in ROTDIR - - copy the generated YAML file from initialize to the ROTDIR + - compress and tar output diag files and place in COM + - save output files and YAMLs to COM Parameters ---------- @@ -201,76 +138,10 @@ def finalize(self) -> None: None """ - # ---- tar up diags - # path of output tar statfile - atmensstat = os.path.join(self.task_config.COMOUT_ATMOS_ANALYSIS_ENS, f"{self.task_config.APREFIX_ENS}atmensstat") - - # get list of diag files to put in tarball - diags = glob.glob(os.path.join(self.task_config.DATA, 'diags', 'diag*nc')) - - logger.info(f"Compressing {len(diags)} diag files to {atmensstat}.gz") - - # gzip the files first - logger.debug(f"Gzipping {len(diags)} diag files") - for diagfile in diags: - with open(diagfile, 'rb') as f_in, gzip.open(f"{diagfile}.gz", 'wb') as f_out: - f_out.writelines(f_in) - - # open tar file for writing - logger.debug(f"Creating tar file {atmensstat} with {len(diags)} gzipped diag files") - with tarfile.open(atmensstat, "w") as archive: - for diagfile in diags: - diaggzip = f"{diagfile}.gz" - archive.add(diaggzip, arcname=os.path.basename(diaggzip)) - - # get list of yamls to cop to ROTDIR - yamls = glob.glob(os.path.join(self.task_config.DATA, '*atmens*yaml')) - - # copy full YAML from executable to ROTDIR - for src in yamls: - logger.info(f"Copying {src} to {self.task_config.COMOUT_CONF}") - yaml_base = os.path.splitext(os.path.basename(src))[0] - dest_yaml_name = f"{self.task_config.APREFIX_ENS}{yaml_base}.yaml" - dest = os.path.join(self.task_config.COMOUT_CONF, dest_yaml_name) - logger.debug(f"Copying {src} to {dest}") - yaml_copy = { - 'copy': [[src, dest]] - } - FileHandler(yaml_copy).sync() - - # create template dictionaries - template_inc = self.task_config.COM_ATMOS_ANALYSIS_TMPL - tmpl_inc_dict = { - 'ROTDIR': self.task_config.ROTDIR, - 'RUN': self.task_config.RUN, - 'YMD': to_YMD(self.task_config.current_cycle), - 'HH': self.task_config.current_cycle.strftime('%H') - } - - # copy ensemble mean analysis to comrot - logger.info("Copy ensemble mean analysis") - fh_dict = {'copy': [[f"{self.task_config.DATA}/anl/{self.task_config.APREFIX_ENS}cubed_sphere_grid_atmanl.ensmean.nc", - f"{self.task_config.COMOUT_ATMOS_ANALYSIS_ENS}"]]} - FileHandler(fh_dict).sync() - - # copy FV3 atm increment to comrot directory - logger.info("Copy UFS model readable atm increment file") - - # loop over ensemble members - inc_copy = {'copy': []} - for imem in range(1, self.task_config.NMEM_ENS + 1): - memchar = f"mem{imem:03d}" - - # create output path for member analysis increment - tmpl_inc_dict['MEMDIR'] = memchar - incdir = Template.substitute_structure(template_inc, TemplateConstants.DOLLAR_CURLY_BRACE, tmpl_inc_dict.get) - src = os.path.join(self.task_config.DATA, 'anl', memchar, - f"{self.task_config.APREFIX_ENS}cubed_sphere_grid_atminc.nc") - dest = incdir - inc_copy['copy'].append([src, dest]) - - logger.debug(f"Copying increments") - FileHandler(inc_copy).sync() - - def clean(self): - super().clean() + # Compress and tar diag files in COM directory + self.tar_diag_files(self.task_config.COMOUT_ATMOS_ANALYSIS_ENS, + f"{self.task_config.APREFIX_ENS}atmensstat") + + # Save files from COM + logger.info(f"Saving files to COM") + FileHandler(self.task_config.data_out).sync() diff --git a/ush/python/pygfs/task/ensemble_recenter.py b/ush/python/pygfs/task/ensemble_recenter.py index 2f50f45501e..54ba1d12339 100644 --- a/ush/python/pygfs/task/ensemble_recenter.py +++ b/ush/python/pygfs/task/ensemble_recenter.py @@ -1,24 +1,20 @@ #!/usr/bin/env python3 -from datetime import timedelta from logging import getLogger -import os -from pprint import pformat +from pygfs.task.analysis import Analysis from pygfs.jedi import Jedi -from wxflow import (AttrDict, FileHandler, Task, Executable, Template, TemplateConstants, - add_to_datetime, to_timedelta, to_isotime, to_YMD, - parse_j2yaml, - logit) +from typing import Dict, Any +from wxflow import AttrDict, FileHandler, parse_j2yaml, logit logger = getLogger(__name__.split('.')[-1]) -class EnsembleRecenter(Task): +class EnsembleRecenter(Analysis): """ Class for JEDI-based ensemble increment recentering """ @logit(logger, name="EnsembleRecenter") - def __init__(self, config): + def __init__(self, config: Dict[str, Any]): """Constructor for atmospheric ensemble increment recentering task This method will construct an ensemble increment recentering task @@ -39,38 +35,26 @@ def __init__(self, config): _res = int(self.task_config.CASE[1:]) _res_anl = int(self.task_config.CASE_ANL[1:]) - _window_begin = add_to_datetime(self.task_config.current_cycle, -to_timedelta(f"{self.task_config.assim_freq}H") / 2) - - _iau_times_iso = [] - for hour in self.task_config.IAUFHRS: - _iau_times_iso.append(to_isotime(_window_begin + to_timedelta(f"{str(hour)}H") - to_timedelta(f"{self.task_config.assim_freq}H") / 2)) # Create a local dictionary that is repeatedly used across this class - local_dict = AttrDict( + self.task_config.update(AttrDict( { 'npx_ges': _res + 1, 'npy_ges': _res + 1, 'npz_ges': self.task_config.LEVS - 1, - 'npz': self.task_config.LEVS - 1, 'npx_anl': _res_anl + 1, 'npy_anl': _res_anl + 1, 'npz_anl': self.task_config.LEVS - 1, - 'ATM_WINDOW_LENGTH': f"PT{self.task_config.assim_freq}H", - 'ATM_WINDOW_BEGIN': _window_begin, - 'APREFIX': f"{self.task_config.RUN.replace('enkf', '')}.t{self.task_config.cyc:02d}z.", - 'APREFIX_ENS': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'GPREFIX': f"gdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'GPREFIX_ENS': f"enkfgdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'iau_times_iso': _iau_times_iso + 'npz': self.task_config.LEVS - 1, } - ) + )) - # Extend task_config with local_dict - self.task_config = AttrDict(**self.task_config, **local_dict) + # Extend task_config with content of config yaml for this task + self.task_config.update(parse_j2yaml(self.task_config.TASK_CONFIG_YAML, self.task_config)) # Create dictionary of Jedi objects expected_keys = ['correction_increment', 'ensemble_recenter'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) + self.jedi_dict = Jedi.get_jedi_dict(self.task_config.jedi_config, self.task_config, expected_keys) @logit(logger) def initialize(self) -> None: @@ -78,9 +62,8 @@ def initialize(self) -> None: This method will initialize the ensemble increment recentering task. This includes: - - initializing the JEDI recentering application - - staging JEDI fix files - - staging backgrounds and increments + - stage input files from COM and create output directories + - initialize JEDI applications Parameters ---------- @@ -91,23 +74,15 @@ def initialize(self) -> None: None """ + # Stage files from COM + logger.info(f"Staging files from COM") + FileHandler(self.task_config.data_in).sync() + # Initialize JEDI ensemble increment recentering application - logger.info(f"Initializing JEDI ensemble recentering applications") + logger.info(f"Initializing JEDI applications") self.jedi_dict['correction_increment'].initialize(self.task_config) self.jedi_dict['ensemble_recenter'].initialize(self.task_config) - # Stage fix files - logger.info(f"Staging JEDI fix files from {self.task_config.STAGE_JEDI_FIX_YAML}") - jedi_fix_dict = parse_j2yaml(self.task_config.STAGE_JEDI_FIX_YAML, self.task_config) - FileHandler(jedi_fix_dict).sync() - logger.debug(f"JEDI fix files:\n{pformat(jedi_fix_dict)}") - - # Stage background and increment files - logger.info(f"Staging background and increment files from {self.task_config.STAGE_YAML}") - fh_dict = parse_j2yaml(self.task_config.STAGE_YAML, self.task_config) - FileHandler(fh_dict).sync() - logger.debug(f"JEDI background and increment files:\n{pformat(fh_dict)}") - @logit(logger) def execute(self) -> None: """Run JEDI executable @@ -135,7 +110,7 @@ def finalize(self) -> None: This method will finalize the ensemble increment recentering task. This includes: - - Move correction increment files to the comrot directory + - save output files and YAMLs to COM Parameters ---------- @@ -146,41 +121,6 @@ def finalize(self) -> None: None """ - fh_dict = {'copy': []} - - # create template dictionaries - template_inc = self.task_config.COM_ATMOS_ANALYSIS_TMPL - tmpl_inc_dict = { - 'ROTDIR': self.task_config.ROTDIR, - 'RUN': self.task_config.RUN, - 'YMD': to_YMD(self.task_config.current_cycle), - 'HH': self.task_config.current_cycle.strftime('%H') - } - - # Copy increments to COM - for imem in range(1, self.task_config.NMEM_ENS + 1): - memchar = f"mem{imem:03d}" - tmpl_inc_dict['MEMDIR'] = memchar - incdir = Template.substitute_structure(template_inc, TemplateConstants.DOLLAR_CURLY_BRACE, tmpl_inc_dict.get) - for fh in self.task_config.IAUFHRS: - hr = format(fh, '03') - for itile in range(6): - src = os.path.join(self.task_config.DATA, memchar, - f"{self.task_config.APREFIX_ENS}cubed_sphere_grid_ratmi{hr}.tile{itile+1}.nc") - if fh == 6: - dest = os.path.join(incdir, - f"{self.task_config.APREFIX_ENS}cubed_sphere_grid_ratminc.tile{itile+1}.nc") - else: - dest = incdir - fh_dict['copy'].append([src, dest]) - - # Copy YAMLs to COM - for app_name in self.jedi_dict.keys(): - src = os.path.join(self.task_config.DATA, - f"{app_name}.yaml") - dest = os.path.join(self.task_config.COMOUT_CONF, - f"{self.task_config.APREFIX_ENS}{app_name}.yaml") - fh_dict['copy'].append([src, dest]) - - # Sync file handler - FileHandler(fh_dict).sync() + # Save output files to COM + logger.info(f"Saving output files to COM") + FileHandler(self.task_config.data_out).sync() diff --git a/ush/python/pygfs/task/fv3_analysis_calc.py b/ush/python/pygfs/task/fv3_analysis_calc.py index a287c82ad4e..5dc5000ed51 100644 --- a/ush/python/pygfs/task/fv3_analysis_calc.py +++ b/ush/python/pygfs/task/fv3_analysis_calc.py @@ -4,22 +4,20 @@ from logging import getLogger import netCDF4 as nc import os -from pprint import pformat from pygfs.jedi import Jedi -from wxflow import (AttrDict, FileHandler, Task, - parse_j2yaml, - to_timedelta, add_to_datetime, to_fv3time, to_isotime, - logit) +from pygfs.task.analysis import Analysis +from typing import Dict, Any +from wxflow import AttrDict, FileHandler, to_fv3time, parse_j2yaml, logit logger = getLogger(__name__.split('.')[-1]) -class FV3AnalysisCalc(Task): +class FV3AnalysisCalc(Analysis): """ Class for analysis calculation """ @logit(logger, name="FV3AnalysisCalc") - def __init__(self, config): + def __init__(self, config: Dict[str, Any]): """Constructor for analysis calculation task This method will construct an analysis calculation @@ -39,30 +37,23 @@ def __init__(self, config): super().__init__(config) _res = int(self.task_config.CASE[1:]) - _res_anl = int(self.task_config.CASE_ANL[1:]) - _window_begin = add_to_datetime(self.task_config.current_cycle, -to_timedelta(f"{self.task_config.assim_freq}H") / 2) + _res_anl = int(self.task_config['CASE_ANL'][1:]) # Create a local dictionary that is repeatedly used across this class - local_dict = AttrDict( + self.task_config.update(AttrDict( { 'npx_ges': _res + 1, 'npy_ges': _res + 1, 'npz_ges': self.task_config.LEVS - 1, - 'npz': self.task_config.LEVS - 1, 'npx_anl': _res_anl + 1, 'npy_anl': _res_anl + 1, 'npz_anl': self.task_config.LEVS - 1, - 'ATM_WINDOW_LENGTH': f"PT{self.task_config.assim_freq}H", - 'ATM_WINDOW_BEGIN': _window_begin, - 'APREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'APREFIX_ENS': f"enkf{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'GPREFIX': f"gdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'GPREFIX_ENS': f"enkfgdas.t{self.task_config.previous_cycle.hour:02d}z.", + 'npz': self.task_config.LEVS - 1, } - ) + )) - # Extend task_config with local_dict - self.task_config = AttrDict(**self.task_config, **local_dict) + # Extend task_config with content of config yaml for this task + self.task_config.update(parse_j2yaml(self.task_config.TASK_CONFIG_YAML, self.task_config)) # Create dictionary of Jedi objects expected_keys = ['atm_addincrement'] @@ -70,7 +61,7 @@ def __init__(self, config): expected_keys.append('aero_addincrement') if self.task_config.DO_JEDISNOWDA: expected_keys.append('snow_addincrement') - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) + self.jedi_dict = Jedi.get_jedi_dict(self.task_config.jedi_config, self.task_config, expected_keys) @logit(logger) def initialize(self) -> None: @@ -78,9 +69,8 @@ def initialize(self) -> None: This method will initialize the analysis calculation task. This includes: - - initializing the JEDI addincrement application - - staging JEDI fix files - - staging backgrounds and increments + - stage input files from COM and create output directories + - initialize JEDI applications Parameters ---------- @@ -91,6 +81,10 @@ def initialize(self) -> None: None """ + # Stage files from COM + logger.info(f"Staging files from COM") + FileHandler(self.task_config.data_in).sync() + # Initialize GDASApp JEDI addincrement application logger.info(f"Initializing GDASApp JEDI addincrement applications") self.jedi_dict['atm_addincrement'].initialize(self.task_config) @@ -99,17 +93,6 @@ def initialize(self) -> None: if self.task_config.DO_JEDISNOWDA: self.jedi_dict['snow_addincrement'].initialize(self.task_config) - # Stage fix files - logger.info(f"Staging JEDI fix files from {self.task_config.STAGE_JEDI_FIX_YAML}") - jedi_fix_dict = parse_j2yaml(self.task_config.STAGE_JEDI_FIX_YAML, self.task_config) - FileHandler(jedi_fix_dict).sync() - logger.debug(f"JEDI fix files:\n{pformat(jedi_fix_dict)}") - - # Stage background and increment files - logger.info(f"Staging background and increment files from COM") - fh_dict = parse_j2yaml(self.task_config.STAGE_YAML, self.task_config) - FileHandler(fh_dict).sync() - @logit(logger) def execute(self) -> None: """Compute analyses @@ -165,7 +148,8 @@ def finalize(self) -> None: This method will finalize the analysis calculation task. This includes: - - Move analysis files to the comrot directory + - write analysis log file + - save output files and YAMLs to COM Parameters ---------- @@ -176,26 +160,6 @@ def finalize(self) -> None: None """ - # Copy analyses to COM - fh_dict = {'copy': []} - src_prefix = f"{self.task_config.DATA}/{self.task_config.GPREFIX}" - dest_prefix = f"{self.task_config.COMOUT_ATMOS_ANALYSIS}/{self.task_config.APREFIX}" - fh_dict['copy'].append([f"{src_prefix}atmf006.nc", - f"{dest_prefix}atmanl.nc"]) - fh_dict['copy'].append([f"{src_prefix}sfcf006.nc", - f"{dest_prefix}sfcanl.nc"]) - - # Copy YAMLs to COM - for app_name in self.jedi_dict.keys(): - src = os.path.join(self.task_config.DATA, - f"{app_name}.yaml") - dest = os.path.join(self.task_config.COMOUT_ATMOS_ANALYSIS, - f"{self.task_config.APREFIX}{app_name}.yaml") - fh_dict['copy'].append([src, dest]) - - # Call FileHandler - FileHandler(fh_dict).sync() - # Write analysis log file formatted_date = datetime.now().strftime("%a %b %d %H:%M:%S %Z%Y") log_file = os.path.join(self.task_config.COMOUT_ATMOS_ANALYSIS, f"{self.task_config.RUN}.t{self.task_config.cyc}z.loganl.txt") @@ -203,6 +167,10 @@ def finalize(self) -> None: with open(log_file, "w") as file: file.write(f"{message}\n") + # Save files from COM + logger.info(f"Saving files to COM") + FileHandler(self.task_config.data_out).sync() + @logit(logger) def insert_analysis_variables(valid_time, fn_anl: str, fn_bkg: str) -> None: diff --git a/ush/python/pygfs/task/marine_analysis.py b/ush/python/pygfs/task/marine_analysis.py index fd7fa8d2427..b374c5f0ed5 100644 --- a/ush/python/pygfs/task/marine_analysis.py +++ b/ush/python/pygfs/task/marine_analysis.py @@ -81,7 +81,8 @@ def __init__(self, config): # Construct dictionary of JEDI objects, one for each JEDI application need for the analysis expected_keys = ['var', 'soca_incpostproc', 'soca_diag_stats'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML_DET, self.task_config, expected_keys) + jedi_config_dict = parse_j2yaml(self.task_config.JEDI_CONFIG_YAML_DET, self.task_config) + self.jedi_dict = Jedi.get_jedi_dict(jedi_config_dict, self.task_config, expected_keys) @logit(logger) def initialize(self: Task) -> None: diff --git a/ush/python/pygfs/task/marine_bmat.py b/ush/python/pygfs/task/marine_bmat.py index ab32fc88bdf..e2c2739f893 100644 --- a/ush/python/pygfs/task/marine_bmat.py +++ b/ush/python/pygfs/task/marine_bmat.py @@ -75,7 +75,8 @@ def __init__(self, config): # Create dictionary of Jedi objects expected_keys = ['gridgen', 'soca_diagb', 'soca_parameters_diffusion_vt', 'soca_setcorscales', 'soca_parameters_diffusion_hz', 'soca_ensb', 'soca_ensweights', 'soca_chgres'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) + jedi_config_dict = parse_j2yaml(self.task_config.JEDI_CONFIG_YAML, self.task_config) + self.jedi_dict = Jedi.get_jedi_dict(jedi_config_dict, self.task_config, expected_keys) @logit(logger) def initialize(self: Task) -> None: diff --git a/ush/python/pygfs/task/marine_recenter.py b/ush/python/pygfs/task/marine_recenter.py index 03fb5e2bcbc..87bc8e8c785 100644 --- a/ush/python/pygfs/task/marine_recenter.py +++ b/ush/python/pygfs/task/marine_recenter.py @@ -64,7 +64,8 @@ def __init__(self, config: Dict) -> None: # Construct dictionary of JEDI objects, one for each JEDI application need for the analysis expected_keys = ['gridgen', 'ens_handler'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) + jedi_config_dict = parse_j2yaml(self.task_config.JEDI_CONFIG_YAML, self.task_config) + self.jedi_dict = Jedi.get_jedi_dict(jedi_config_dict, self.task_config, expected_keys) @logit(logger) def initialize(self): diff --git a/ush/python/pygfs/task/snow_analysis.py b/ush/python/pygfs/task/snow_analysis.py index 5253e186104..c9bb3ad778b 100644 --- a/ush/python/pygfs/task/snow_analysis.py +++ b/ush/python/pygfs/task/snow_analysis.py @@ -9,29 +9,25 @@ import tarfile import numpy as np from netCDF4 import Dataset - -from wxflow import (AttrDict, - FileHandler, +from pygfs.task.analysis import Analysis +from pygfs.jedi import Jedi +from wxflow import (AttrDict, Executable, FileHandler, WorkflowException, to_fv3time, to_YMD, to_YMDH, to_timedelta, add_to_datetime, to_julian, rm_p, cp, parse_j2yaml, save_as_yaml, Jinja, - Task, - logit, - Executable, - WorkflowException) -from pygfs.jedi import Jedi + logit) logger = getLogger(__name__.split('.')[-1]) -class SnowAnalysis(Task): +class SnowAnalysis(Analysis): """ Class for JEDI-based global snow analysis tasks """ - @logit(logger, name="SnowAnalysis") + @logit(logger, name="Analysis") def __init__(self, config: Dict[str, Any]): """Constructor global snow analysis task @@ -52,39 +48,34 @@ def __init__(self, config: Dict[str, Any]): super().__init__(config) _res = int(self.task_config['CASE'][1:]) - _window_begin = add_to_datetime(self.task_config.current_cycle, -to_timedelta(f"{self.task_config['assim_freq']}H") / 2) - # fix ocnres - self.task_config.OCNRES = f"{self.task_config.OCNRES:03d}" - - # Create a local dictionary that is repeatedly used across this class - local_dict = AttrDict( + # if 00z, do SCF preprocessing + _ims_file = os.path.join(self.task_config.COMIN_OBS, f'{self.task_config.OPREFIX}imssnow96.asc') + logger.info(f"Checking for IMS file: {_ims_file}") + if self.task_config.cyc == 0 and os.path.exists(_ims_file): + _DO_IMS_SCF = True + else: + _DO_IMS_SCF = False + + # Extend task_config with variables repeatedly used across this class + self.task_config.update(AttrDict( { 'npx_ges': _res + 1, 'npy_ges': _res + 1, 'npz_ges': self.task_config.LEVS - 1, 'npz': self.task_config.LEVS - 1, - 'SNOW_WINDOW_BEGIN': _window_begin, - 'SNOW_WINDOW_LENGTH': f"PT{self.task_config['assim_freq']}H", - 'OPREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'APREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'GPREFIX': f"gdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'snow_obsdatain_path': os.path.join(self.task_config.DATA, 'obs'), - 'snow_obsdataout_path': os.path.join(self.task_config.DATA, 'diags'), 'snow_bkg_path': os.path.join('.', 'bkg/'), - 'res': _res, + 'ims_file': _ims_file, + 'DO_IMS_SCF': _DO_IMS_SCF, # Boolean to decide if IMS snow cover processing is done } - ) + )) - # Extend task_config with local_dict - self.task_config = AttrDict(**self.task_config, **local_dict) + # Extend task_config with content of config yaml for this task + self.task_config.update(parse_j2yaml(self.task_config.TASK_CONFIG_YAML, self.task_config)) # Create JEDI object dictionary expected_keys = ['scf_to_ioda', 'snowanlvar'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) - - # Boolean to decide if IMS snow cover processing is done - self.task_config.DO_IMS_SCF = False + self.jedi_dict = Jedi.get_jedi_dict(self.task_config.jedi_config, self.task_config, expected_keys) @logit(logger) def initialize(self) -> None: @@ -92,12 +83,8 @@ def initialize(self) -> None: This method will initialize a global snow analysis. This includes: - - initialize JEDI application - - staging model backgrounds - - staging observation files - - staging FV3-JEDI fix files - - staging B error files - - creating output directories + - stage input files from COM and create output directories + - initialize JEDI applications Parameters ---------- @@ -108,56 +95,15 @@ def initialize(self) -> None: None """ - # stage backgrounds - logger.info(f"Staging background files from {self.task_config.STAGE_BKG_YAML}") - bkg_staging_dict = parse_j2yaml(self.task_config.STAGE_BKG_YAML, self.task_config) - FileHandler(bkg_staging_dict).sync() - logger.debug(f"Background files:\n{pformat(bkg_staging_dict)}") - - # stage observations - logger.info(f"Staging list of observation files generated from JEDI config") - obs_dict = self.jedi_dict['snowanlvar'].render_jcb(self.task_config, 'snow_obs_staging') - FileHandler(obs_dict).sync() - logger.debug(f"Observation files:\n{pformat(obs_dict)}") - - # stage GTS bufr2ioda mapping YAML files - logger.info(f"Staging GTS bufr2ioda mapping YAML files from {self.task_config.STAGE_GTS_YAML}") - gts_mapping_list = parse_j2yaml(self.task_config.STAGE_GTS_YAML, self.task_config) - FileHandler(gts_mapping_list).sync() - - # stage FV3-JEDI fix files - logger.info(f"Staging JEDI fix files from {self.task_config.STAGE_JEDI_FIX_YAML}") - jedi_fix_dict = parse_j2yaml(self.task_config.STAGE_JEDI_FIX_YAML, self.task_config) - FileHandler(jedi_fix_dict).sync() - logger.debug(f"JEDI fix files:\n{pformat(jedi_fix_dict)}") - - # staging B error files - logger.info("Stage files for static background error") - berror_staging_dict = parse_j2yaml(self.task_config.STAGE_BERROR_YAML, self.task_config) - FileHandler(berror_staging_dict).sync() - logger.debug(f"Background error files:\n{pformat(berror_staging_dict)}") - - # need output dir for diags and anl - logger.debug("Create empty output [anl, diags] directories to receive output from executable") - newdirs = [ - os.path.join(self.task_config.DATA, 'anl'), - os.path.join(self.task_config.DATA, 'diags'), - ] - FileHandler({'mkdir': newdirs}).sync() - - # if 00z, do SCF preprocessing - if self.task_config.cyc == 0: - ims_scf_to_ioda_staging_dict = parse_j2yaml(self.task_config.STAGE_IMS_SCF2IODA_YAML, self.task_config) - FileHandler(ims_scf_to_ioda_staging_dict).sync() - self.jedi_dict['scf_to_ioda'].initialize(self.task_config) - # Check if file exists - ims_file = ims_scf_to_ioda_staging_dict['copy_opt'][0][1] - if os.path.exists(ims_file): - self.task_config.DO_IMS_SCF = True + # Stage files from COM + logger.info(f"Staging files from COM and creating output directories") + FileHandler(self.task_config.data_in).sync() # initialize JEDI variational application - logger.info(f"Initializing JEDI variational DA application") + logger.info(f"Initializing JEDI applications") self.jedi_dict['snowanlvar'].initialize(self.task_config, clean_empty_obsspaces=False) + if self.task_config.DO_IMS_SCF: + self.jedi_dict['scf_to_ioda'].initialize(self.task_config) @logit(logger) def execute(self, jedi_dict_key: str) -> None: @@ -181,10 +127,8 @@ def execute(self, jedi_dict_key: str) -> None: def finalize(self) -> None: """Performs closing actions of the Snow analysis task This method: - - tar and gzip the output diag files and place in COM/ - - copy the generated YAML file from initialize to the COM/ - - copy the analysis files to the COM/ - - copy the increment files to the COM/ + - compress and tar output diag files in COM + - save output files and YAMLs to COM Parameters ---------- @@ -192,67 +136,13 @@ def finalize(self) -> None: Instance of the SnowAnalysis object """ - # ---- tar up diags - # path of output tar statfile - snowstat = os.path.join(self.task_config.COMOUT_SNOW_ANALYSIS, f"{self.task_config.APREFIX}snowstat.tgz") - - # get list of diag files to put in tarball - diags = glob.glob(os.path.join(self.task_config.DATA, 'diags', 'diag*nc')) - - logger.info(f"Compressing {len(diags)} diag files to {snowstat}") - - # gzip the files first - logger.debug(f"Gzipping {len(diags)} diag files") - for diagfile in diags: - with open(diagfile, 'rb') as f_in, gzip.open(f"{diagfile}.gz", 'wb') as f_out: - f_out.writelines(f_in) - - # open tar file for writing - logger.debug(f"Creating tar file {snowstat} with {len(diags)} gzipped diag files") - with tarfile.open(snowstat, "w|gz") as archive: - for diagfile in diags: - diaggzip = f"{diagfile}.gz" - archive.add(diaggzip, arcname=os.path.basename(diaggzip)) - - # get list of yamls to copy to ROTDIR - yamls = glob.glob(os.path.join(self.task_config.DATA, '*snow*yaml')) - - # copy full YAML from executable to ROTDIR - for src in yamls: - yaml_base = os.path.splitext(os.path.basename(src))[0] - dest_yaml_name = f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.{yaml_base}.yaml" - dest = os.path.join(self.task_config.COMOUT_CONF, dest_yaml_name) - logger.debug(f"Copying {src} to {dest}") - yaml_copy = { - 'copy': [[src, dest]] - } - FileHandler(yaml_copy).sync() - - logger.info("Copy analysis to COM") - bkgtimes = [] - if self.task_config.DOIAU: - # need both beginning and middle of window - bkgtimes.append(self.task_config.SNOW_WINDOW_BEGIN) - bkgtimes.append(self.task_config.current_cycle) - anllist = [] - for bkgtime in bkgtimes: - template = f'{to_fv3time(bkgtime)}.sfc_data.tile{{tilenum}}.nc' - for itile in range(1, self.task_config.ntiles + 1): - filename = template.format(tilenum=itile) - src = os.path.join(self.task_config.DATA, 'anl', filename) - dest = os.path.join(self.task_config.COMOUT_SNOW_ANALYSIS, filename) - anllist.append([src, dest]) - FileHandler({'copy': anllist}).sync() + # Compress and tar diag files into COM directory + self.tar_diag_files(self.task_config.COMOUT_SNOW_ANALYSIS, + f"{self.task_config.APREFIX}snowstat.tgz") - logger.info('Copy increments to COM') - template = f'snowinc.{to_fv3time(self.task_config.current_cycle)}.sfc_data.tile{{tilenum}}.nc' - inclist = [] - for itile in range(1, self.task_config.ntiles + 1): - filename = template.format(tilenum=itile) - src = os.path.join(self.task_config.DATA, 'anl', filename) - dest = os.path.join(self.task_config.COMOUT_SNOW_ANALYSIS, filename) - inclist.append([src, dest]) - FileHandler({'copy': inclist}).sync() + # Save files to COM + logger.info(f"Saving files to COM") + FileHandler(self.task_config.data_out).sync() @logit(logger) def add_increments(self) -> None: @@ -269,7 +159,7 @@ def add_increments(self) -> None: bkgtimes = [] if self.task_config.DOIAU: # want analysis at beginning and middle of window - bkgtimes.append(self.task_config.SNOW_WINDOW_BEGIN) + bkgtimes.append(self.task_config.WINDOW_BEGIN) bkgtimes.append(self.task_config.current_cycle) anllist = [] for bkgtime in bkgtimes: @@ -284,7 +174,7 @@ def add_increments(self) -> None: if self.task_config.DOIAU: logger.info("Copying increments to beginning of window") template_in = f'snowinc.{to_fv3time(self.task_config.current_cycle)}.sfc_data.tile{{tilenum}}.nc' - template_out = f'snowinc.{to_fv3time(self.task_config.SNOW_WINDOW_BEGIN)}.sfc_data.tile{{tilenum}}.nc' + template_out = f'snowinc.{to_fv3time(self.task_config.WINDOW_BEGIN)}.sfc_data.tile{{tilenum}}.nc' inclist = [] for itile in range(1, self.task_config.ntiles + 1): filename_in = template_in.format(tilenum=itile) diff --git a/ush/python/pygfs/task/snowens_analysis.py b/ush/python/pygfs/task/snowens_analysis.py index 9a94a55c9e5..7a9d0c7acff 100644 --- a/ush/python/pygfs/task/snowens_analysis.py +++ b/ush/python/pygfs/task/snowens_analysis.py @@ -9,7 +9,8 @@ import tarfile import numpy as np from netCDF4 import Dataset - +from pygfs.task.analysis import Analysis +from pygfs.jedi import Jedi from wxflow import (AttrDict, FileHandler, to_fv3time, to_YMD, to_YMDH, to_timedelta, add_to_datetime, @@ -17,16 +18,14 @@ rm_p, cp, parse_j2yaml, save_as_yaml, Jinja, - Task, logit, Executable, WorkflowException) -from pygfs.jedi import Jedi logger = getLogger(__name__.split('.')[-1]) -class SnowEnsAnalysis(Task): +class SnowEnsAnalysis(Analysis): """ Class for JEDI-based global snow ensemble analysis tasks """ @@ -52,40 +51,34 @@ def __init__(self, config: Dict[str, Any]): super().__init__(config) _res = int(self.task_config['CASE_ENS'][1:]) - self.task_config['CASE'] = self.task_config['CASE_ENS'] - _window_begin = add_to_datetime(self.task_config.current_cycle, -to_timedelta(f"{self.task_config['assim_freq']}H") / 2) - # fix ocnres - self.task_config.OCNRES = f"{self.task_config.OCNRES :03d}" - - # Create a local dictionary that is repeatedly used across this class - local_dict = AttrDict( + # if 00z, do SCF preprocessing + _ims_file = os.path.join(self.task_config.COMIN_OBS, f'{self.task_config.OPREFIX}imssnow96.asc') + if self.task_config.cyc == 0 and os.path.exists(_ims_file): + _DO_IMS_SCF = True + else: + _DO_IMS_SCF = False + + # Extend task_config with variables repeatedly used across this class + self.task_config.update(AttrDict( { 'npx_ges': _res + 1, 'npy_ges': _res + 1, 'npz_ges': self.task_config.LEVS - 1, 'npz': self.task_config.LEVS - 1, - 'SNOW_WINDOW_BEGIN': _window_begin, - 'SNOW_WINDOW_LENGTH': f"PT{self.task_config['assim_freq']}H", - 'OPREFIX': f"{self.task_config.CDUMP}.t{self.task_config.cyc:02d}z.", - 'APREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", - 'GPREFIX': f"gdas.t{self.task_config.previous_cycle.hour:02d}z.", - 'snow_obsdatain_path': os.path.join(self.task_config.DATA, 'obs'), - 'snow_obsdataout_path': os.path.join(self.task_config.DATA, 'diags'), + 'CASE': self.task_config.CASE_ENS, 'snow_bkg_path': os.path.join('.', 'bkg', 'ensmean/'), - 'res': _res, + 'ims_file': _ims_file, + 'DO_IMS_SCF': _DO_IMS_SCF, # Boolean to decide if IMS snow cover processing is done } - ) + )) - # Extend task_config with local_dict - self.task_config = AttrDict(**self.task_config, **local_dict) + # Extend task_config with content of config yaml for this task + self.task_config.update(parse_j2yaml(self.task_config.TASK_CONFIG_YAML, self.task_config)) # Create JEDI object dictionary expected_keys = ['scf_to_ioda', 'snowanlvar', 'esnowanlensmean'] - self.jedi_dict = Jedi.get_jedi_dict(self.task_config.JEDI_CONFIG_YAML, self.task_config, expected_keys) - - # Boolean to decide if IMS snow cover processing is done - self.task_config.DO_IMS_SCF = False + self.jedi_dict = Jedi.get_jedi_dict(self.task_config.jedi_config, self.task_config, expected_keys) @logit(logger) def initialize(self) -> None: @@ -93,12 +86,8 @@ def initialize(self) -> None: This method will initialize a global snow ensemble analysis. This includes: + - stage input files from COM and create output directories - initialize JEDI applications - - staging model backgrounds - - staging observation files - - staging FV3-JEDI fix files - - staging B error files - - creating output directories Parameters ---------- @@ -109,77 +98,16 @@ def initialize(self) -> None: None """ - # stage backgrounds - logger.info(f"Staging background files from {self.task_config.STAGE_BKG_YAML}") - bkg_staging_dict = parse_j2yaml(self.task_config.STAGE_BKG_YAML, self.task_config) - FileHandler(bkg_staging_dict).sync() - logger.debug(f"Background files:\n{pformat(bkg_staging_dict)}") - - # stage orography - logger.info(f"Staging orography files from {self.task_config.STAGE_OROG_YAML}") - orog_staging_dict = parse_j2yaml(self.task_config.STAGE_OROG_YAML, self.task_config) - FileHandler(orog_staging_dict).sync() - logger.debug(f"Orography files:\n{pformat(orog_staging_dict)}") - # note JEDI will try to read the orog files for each member, let's just symlink - logger.info("Linking orography files for each member") - oro_files = glob.glob(os.path.join(self.task_config.DATA, 'orog', 'ens', '*')) - for mem in range(1, self.task_config.NMEM_ENS + 1): - dest = os.path.join(self.task_config.DATA, 'bkg', f"mem{mem:03}") - for oro_file in oro_files: - os.symlink(oro_file, os.path.join(dest, os.path.basename(oro_file))) - # need to symlink orography files for the ensmean too - dest = os.path.join(self.task_config.DATA, 'bkg', 'ensmean') - for oro_file in oro_files: - os.symlink(oro_file, os.path.join(dest, os.path.basename(oro_file))) - - # stage observations - logger.info(f"Staging list of observation files generated from JEDI config") - obs_dict = self.jedi_dict['snowanlvar'].render_jcb(self.task_config, 'snow_obs_staging') - FileHandler(obs_dict).sync() - logger.debug(f"Observation files:\n{pformat(obs_dict)}") - - # stage GTS bufr2ioda mapping YAML files - logger.info(f"Staging GTS bufr2ioda mapping YAML files from {self.task_config.STAGE_GTS_YAML}") - gts_mapping_list = parse_j2yaml(self.task_config.STAGE_GTS_YAML, self.task_config) - FileHandler(gts_mapping_list).sync() - - # stage FV3-JEDI fix files - logger.info(f"Staging JEDI fix files from {self.task_config.STAGE_JEDI_FIX_YAML}") - jedi_fix_dict = parse_j2yaml(self.task_config.STAGE_JEDI_FIX_YAML, self.task_config) - FileHandler(jedi_fix_dict).sync() - logger.debug(f"JEDI fix files:\n{pformat(jedi_fix_dict)}") - - # staging B error files - logger.info("Stage files for static background error") - berror_staging_dict = parse_j2yaml(self.task_config.STAGE_BERROR_YAML, self.task_config) - FileHandler(berror_staging_dict).sync() - logger.debug(f"Background error files:\n{pformat(berror_staging_dict)}") - - # need output dir for diags and anl - logger.debug("Create empty output [anl, diags] directories to receive output from executable") - newdirs = [ - os.path.join(self.task_config.DATA, 'anl'), - os.path.join(self.task_config.DATA, 'diags'), - ] - FileHandler({'mkdir': newdirs}).sync() - - # if 00z, do SCF preprocessing - if self.task_config.cyc == 0: - ims_scf_to_ioda_staging_dict = parse_j2yaml(self.task_config.STAGE_IMS_SCF2IODA_YAML, self.task_config) - FileHandler(ims_scf_to_ioda_staging_dict).sync() - self.jedi_dict['scf_to_ioda'].initialize(self.task_config) - # Check if file exists - ims_file = ims_scf_to_ioda_staging_dict['copy_opt'][0][1] - if os.path.exists(ims_file): - self.task_config.DO_IMS_SCF = True + # Stage files from COM + logger.info(f"Staging files from COM and creating output directories") + FileHandler(self.task_config.data_in).sync() - # initialize JEDI variational application - logger.info(f"Initializing JEDI variational DA application") + # Initialize JEDI applications + logger.info(f"Initializing JEDI applications") self.jedi_dict['snowanlvar'].initialize(self.task_config, clean_empty_obsspaces=False) - - # initialize ensemble mean computation - logger.info(f"Initializing JEDI ensemble mean application") self.jedi_dict['esnowanlensmean'].initialize(self.task_config) + if self.task_config.DO_IMS_SCF: + self.jedi_dict['scf_to_ioda'].initialize(self.task_config) @logit(logger) def execute(self, jedi_dict_key: str) -> None: @@ -203,10 +131,8 @@ def execute(self, jedi_dict_key: str) -> None: def finalize(self) -> None: """Performs closing actions of the Snow analysis task This method: - - tar and gzip the output diag files and place in COM/ - - copy the generated YAML file from initialize to the COM/ - - copy the analysis files to the COM/ - - copy the increment files to the COM/ + - compress and tar output diag files in COM + - save output files and YAMLs to COM Parameters ---------- @@ -214,69 +140,13 @@ def finalize(self) -> None: Instance of the SnowEnsAnalysis object """ - # ---- tar up diags - # path of output tar statfile - snowstat = os.path.join(self.task_config.COMOUT_SNOW_ANALYSIS, f"{self.task_config.APREFIX}snowstat.tgz") - - # get list of diag files to put in tarball - diags = glob.glob(os.path.join(self.task_config.DATA, 'diags', 'diag*nc')) - - logger.info(f"Compressing {len(diags)} diag files to {snowstat}") - - # gzip the files first - logger.debug(f"Gzipping {len(diags)} diag files") - for diagfile in diags: - with open(diagfile, 'rb') as f_in, gzip.open(f"{diagfile}.gz", 'wb') as f_out: - f_out.writelines(f_in) - - # open tar file for writing - logger.debug(f"Creating tar file {snowstat} with {len(diags)} gzipped diag files") - with tarfile.open(snowstat, "w|gz") as archive: - for diagfile in diags: - diaggzip = f"{diagfile}.gz" - archive.add(diaggzip, arcname=os.path.basename(diaggzip)) - - # get list of yamls to copy to ROTDIR - yamls = glob.glob(os.path.join(self.task_config.DATA, '*snow*yaml')) - - # copy full YAML from executable to ROTDIR - for src in yamls: - yaml_base = os.path.splitext(os.path.basename(src))[0] - dest_yaml_name = f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.{yaml_base}.yaml" - dest = os.path.join(self.task_config.COMOUT_CONF, dest_yaml_name) - logger.debug(f"Copying {src} to {dest}") - yaml_copy = { - 'copy': [[src, dest]] - } - FileHandler(yaml_copy).sync() + # Compress and tar diag files into COM directory + self.tar_diag_files(self.task_config.COMOUT_SNOW_ANALYSIS, + f"{self.task_config.APREFIX_ENS}snowstat.tgz") - logger.info("Copy analysis to COM") - bkgtimes = [] - if self.task_config.DOIAU: - # need both beginning and middle of window - bkgtimes.append(self.task_config.SNOW_WINDOW_BEGIN) - bkgtimes.append(self.task_config.current_cycle) - anllist = [] - for mem in range(1, self.task_config.NMEM_ENS + 1): - for bkgtime in bkgtimes: - template = f'{to_fv3time(bkgtime)}.sfc_data.tile{{tilenum}}.nc' - for itile in range(1, self.task_config.ntiles + 1): - filename = template.format(tilenum=itile) - src = os.path.join(self.task_config.DATA, 'anl', f"mem{mem:03d}", filename) - COMOUT_SNOW_ANALYSIS = self.task_config.COMOUT_SNOW_ANALYSIS.replace('ensstat', f"mem{mem:03d}") - dest = os.path.join(COMOUT_SNOW_ANALYSIS, filename) - anllist.append([src, dest]) - FileHandler({'copy': anllist}).sync() - - logger.info('Copy increments to COM') - template = f'snowinc.{to_fv3time(self.task_config.current_cycle)}.sfc_data.tile{{tilenum}}.nc' - inclist = [] - for itile in range(1, self.task_config.ntiles + 1): - filename = template.format(tilenum=itile) - src = os.path.join(self.task_config.DATA, 'anl', filename) - dest = os.path.join(self.task_config.COMOUT_SNOW_ANALYSIS, filename) - inclist.append([src, dest]) - FileHandler({'copy': inclist}).sync() + # Save files to COM + logger.info(f"Saving files to COM") + FileHandler(self.task_config.data_out).sync() @logit(logger) def add_increments(self) -> None: @@ -291,7 +161,7 @@ def add_increments(self) -> None: if self.task_config.DOIAU: logger.info("Copying increments to beginning of window") template_in = f'snowinc.{to_fv3time(self.task_config.current_cycle)}.sfc_data.tile{{tilenum}}.nc' - template_out = f'snowinc.{to_fv3time(self.task_config.SNOW_WINDOW_BEGIN)}.sfc_data.tile{{tilenum}}.nc' + template_out = f'snowinc.{to_fv3time(self.task_config.WINDOW_BEGIN)}.sfc_data.tile{{tilenum}}.nc' inclist = [] for itile in range(1, self.task_config.ntiles + 1): filename_in = template_in.format(tilenum=itile) @@ -304,7 +174,7 @@ def add_increments(self) -> None: bkgtimes = [] if self.task_config.DOIAU: # need both beginning and middle of window - bkgtimes.append(self.task_config.SNOW_WINDOW_BEGIN) + bkgtimes.append(self.task_config.WINDOW_BEGIN) bkgtimes.append(self.task_config.current_cycle) # loop over members diff --git a/ush/wave_grib2_sbs.sh b/ush/wave_grib2_sbs.sh index 77d56c4a9a1..b112248f6f8 100755 --- a/ush/wave_grib2_sbs.sh +++ b/ush/wave_grib2_sbs.sh @@ -25,7 +25,6 @@ # --------------------------------------------------------------------------- # # 0. Preparations -source "${HOMEgfs}/dev/ush/load_modules.sh" # Script inputs grdID=$1 diff --git a/ush/wave_grid_interp_sbs.sh b/ush/wave_grid_interp_sbs.sh index 382defac249..ed41b813610 100755 --- a/ush/wave_grid_interp_sbs.sh +++ b/ush/wave_grid_interp_sbs.sh @@ -22,8 +22,6 @@ # 0.a Basic modes of operation -source "${HOMEgfs}/dev/ush/load_modules.sh" - grdID=$1 valid_time=$2 dt=$3 diff --git a/ush/wave_grid_moddef.sh b/ush/wave_grid_moddef.sh index b2e5009b260..7cf6d3aa20b 100755 --- a/ush/wave_grid_moddef.sh +++ b/ush/wave_grid_moddef.sh @@ -17,7 +17,6 @@ # # ############################################################################### # - source "${HOMEgfs}/dev/ush/load_modules.sh" # --------------------------------------------------------------------------- # # 0. Preparations diff --git a/ush/wave_tar.sh b/ush/wave_tar.sh index 1eb12154ca9..9e278c79b02 100755 --- a/ush/wave_tar.sh +++ b/ush/wave_tar.sh @@ -24,10 +24,10 @@ # # --------------------------------------------------------------------------- # # 0. Preparations - source "${HOMEgfs}/dev/ush/load_modules.sh" + # 0.a Basic modes of operation - cd "${DATA}" || exit 1 + cd "${DATA}" echo "Making TAR FILE" alertName=$(echo $RUN|tr [a-z] [A-Z]) @@ -165,7 +165,7 @@ EOF # --------------------------------------------------------------------------- # # 4. Final clean up -cd "${DATA}" || exit 5 +cd "${DATA}" if [[ ${KEEPDATA:-NO} == "NO" ]]; then set -v From 73311d156c70d2cb69a87255da349b37d0c51b86 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 10 Oct 2025 00:29:37 +0000 Subject: [PATCH 131/134] add PYCMD --- ush/preamble.sh | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/ush/preamble.sh b/ush/preamble.sh index a9d657ded86..4c88032e89c 100755 --- a/ush/preamble.sh +++ b/ush/preamble.sh @@ -189,16 +189,6 @@ else export PYEXTRAARGS="" fi -if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then - # if within container, will run python executbale inside container, - # also need to tell some scripts that it is run in container, with "-c" option. - export PYCMD="${HOMEgfs}"/exec/run_python.sh - export PYEXTRAARGS=" -c -v" -else - export PYCMD=python - export PYEXTRAARGS="" -fi - # Turn on our settings export SHELLOPTS declare -xf set_strict From e4432244963843bd6f7b140b18fa1dcb2d6605cb Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 10 Oct 2025 13:55:16 +0000 Subject: [PATCH 132/134] no need to save to repo, as it is a link --- dev/parm/config/sfs/config.resources.URSA | 1 - 1 file changed, 1 deletion(-) delete mode 120000 dev/parm/config/sfs/config.resources.URSA diff --git a/dev/parm/config/sfs/config.resources.URSA b/dev/parm/config/sfs/config.resources.URSA deleted file mode 120000 index 6d3d16eda14..00000000000 --- a/dev/parm/config/sfs/config.resources.URSA +++ /dev/null @@ -1 +0,0 @@ -../gfs/config.resources.URSA \ No newline at end of file From d8678e2e368315cf4b8a08c2bbdbeb9704741835 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 10 Oct 2025 19:58:03 +0000 Subject: [PATCH 133/134] testing on AWS --- dev/container/gen-run-cases.sh | 1 + dev/container/utils/gen-wrapper.sh | 26 ++++++++++++++++++++++++++ dev/container/utils/link_ww3.sh | 3 ++- dev/jobs/waveinit.sh | 7 ++++++- dev/ush/load_modules.sh | 2 ++ ush/run_mpmd.sh | 2 -- 6 files changed, 37 insertions(+), 4 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index 685bb3e7389..3c7810b9936 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -48,6 +48,7 @@ fi set -x mkdir -p "${rundir}" +mkdir -p "${HOMEDIR}"/exec mkdir -p "${HOMEDIR}"/ush/container cd "${HOMEDIR}/dev/workflow" || exit 1 diff --git a/dev/container/utils/gen-wrapper.sh b/dev/container/utils/gen-wrapper.sh index 167ae78d915..ad9a5fbe842 100755 --- a/dev/container/utils/gen-wrapper.sh +++ b/dev/container/utils/gen-wrapper.sh @@ -75,3 +75,29 @@ EOF_RUN_PYTHON sed -i 's/RUN_WITH_CONTAINER=NO/RUN_WITH_CONTAINER=YES/g' "${HOMEgfs}/ush/preamble.sh" chmod +x "${exec_python_script}" chmod +x "${run_python_script}" + +for item in JGLOBAL_WAVE_INIT +do + exec_script="${HOMEgfs}"/exec/"${item}" + +cat > "${exec_script}" << EOF_SCRIPT +#!/bin/bash +#Need these lines on AWS to run more than one node. +#export I_MPI_DEBUG=10 +#export I_MPI_FABRICS=shm:ofi +#export I_MPI_OFI_PROVIDER=tcp +#export FI_PROVIDER=tcp +#export FI_TCP_IFACE=eth0 + + LD_LIBRARY_PATH=\$(dirname "${container}") + export LD_LIBRARY_PATH + + singularity exec \\ + ${bindings} \\ + ${container} \\ + ${HOMEgfs}/jobs/${item} +EOF_SCRIPT + + chmod +x "${exec_script}" +done + diff --git a/dev/container/utils/link_ww3.sh b/dev/container/utils/link_ww3.sh index bbc357aa799..1950ab0de14 100755 --- a/dev/container/utils/link_ww3.sh +++ b/dev/container/utils/link_ww3.sh @@ -47,7 +47,8 @@ fi # pdlib=pdlib_OFF #fi -for nm in gint grib grid ounf ounp outf outp prep prnc +#for nm in gint grib grid ounf ounp outf outp prep prnc +for nm in gint grib ounf ounp outf outp prep prnc do model=ww3_${nm} #echo "model: $model" diff --git a/dev/jobs/waveinit.sh b/dev/jobs/waveinit.sh index c04a50298ec..5ee6b42ea43 100755 --- a/dev/jobs/waveinit.sh +++ b/dev/jobs/waveinit.sh @@ -4,7 +4,8 @@ set -x ############################################################### #source "${HOMEgfs}/dev/ush/load_modules.sh" run -source "${HOMEgfs}/dev/ush/load_modules.sh" ufswm +#source "${HOMEgfs}/dev/ush/load_modules.sh" ufswm +source "${HOMEgfs}/ush/preamble.sh" status=$? if [[ ${status} -ne 0 ]]; then exit "${status}" @@ -15,7 +16,11 @@ export jobid="${job}.$$" ############################################################### # Execute the JJOB +if [[ "${RUN_WITH_CONTAINER}" == "YES" ]]; then +"${HOMEgfs}/exec/JGLOBAL_WAVE_INIT" +else "${HOMEgfs}/jobs/JGLOBAL_WAVE_INIT" +fi status=$? exit "${status}" diff --git a/dev/ush/load_modules.sh b/dev/ush/load_modules.sh index 7f5ccf5c69f..56ab06b8b14 100644 --- a/dev/ush/load_modules.sh +++ b/dev/ush/load_modules.sh @@ -90,6 +90,7 @@ case "${MODULE_TYPE}" in module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel module load wgrib2 + module load gettext module load prod_util export UTILROOT=${prod_util_ROOT} else @@ -181,6 +182,7 @@ case "${MODULE_TYPE}" in module use "${HOMEgfs}/sorc/gfs_utils.fd/modulefiles" module load gfsutils_container.intel module load wgrib2 + module load gettext module load prod_util export UTILROOT=${prod_util_ROOT} else diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index 923ed468a48..e95e295a260 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -28,8 +28,6 @@ # ################################################################################ -source "${HOMEgfs}"/dev/ush/load_modules.sh - cmdfile=${1:?"run_mpmd requires an input file containing commands to execute in MPMD/serial mode"} # If USE_CFP is not set, run in serial mode From f86f639893bc976097cd67712749ac7ea4da7caa Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 13 Oct 2025 22:57:56 +0000 Subject: [PATCH 134/134] remove env/*.container --- dev/container/gen-run-cases.sh | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/dev/container/gen-run-cases.sh b/dev/container/gen-run-cases.sh index f7d7f46bab7..bd3094bcf2e 100755 --- a/dev/container/gen-run-cases.sh +++ b/dev/container/gen-run-cases.sh @@ -6,6 +6,7 @@ HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" source "${HOMEgfs}/ush/detect_machine.sh" run_with_container="YES" +#run_with_container="NO" casetype="pr" #yamllist="C48_ATM" @@ -26,10 +27,6 @@ if [[ ${MACHINE_ID} = ursa* ]] ; then module load rocoto/1.3.7 rocotocmd=$(command -v rocotorun) - - if [[ "${run_with_container}" == "YES" ]]; then - cp "${HOMEgfs}/env/URSA.env.container" "${HOMEgfs}/env/URSA.env" - fi elif [[ ${MACHINE_ID} = gaea* ]] ; then container=/gpfs/f6/scratch/Wei.Huang/container/${img} rundir=/gpfs/f6/scratch/${USER}/run @@ -37,9 +34,6 @@ elif [[ ${MACHINE_ID} = gaea* ]] ; then HPC_ACCOUNT=bil-fire8 rocotocmd=/autofs/ncrc-svm1_home2/Christopher.W.Harrop/rocoto-1.3.7/bin/rocotorun - if [[ "${run_with_container}" == "YES" ]]; then - cp "${HOMEgfs}/env/GAEAC6.env.container" "${HOMEgfs}/env/GAEAC6.env" - fi elif [[ ${MACHINE_ID} = noaacloud* ]] ; then TOPICDIR=/bucket/global-workflow-shared-data/ICSDIR container=/contrib/containers/${img} @@ -54,11 +48,7 @@ fi mkdir -p "${rundir}" -# cd "${HOMEDIR}/dev/workflow" || exit 1 -if ! cd "${HOMEDIR}/dev/workflow"; then - echo "Error: Could not change to the workflow directory. Aborting." >&2 - exit 1 -fi +cd "${HOMEDIR}/dev/workflow" || exit 1 if [[ "${run_with_container}" == "YES" ]]; then "${HOMEDIR}/dev/container/utils/gen-wrapper.sh" -H "${HOMEDIR}" -c "${container}" -b "${bindings}" -v