From f7973b1192f0ba2506fd22f956237a57b21371ae Mon Sep 17 00:00:00 2001 From: Hannah O Nesser Date: Tue, 22 Aug 2023 13:41:49 -0700 Subject: [PATCH 01/43] Adding a set of functions to format GEOS-Chem input files for HEMCO compatibility. --- gcpy/format_HEMCO.py | 304 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 304 insertions(+) create mode 100644 gcpy/format_HEMCO.py diff --git a/gcpy/format_HEMCO.py b/gcpy/format_HEMCO.py new file mode 100644 index 00000000..63e1d9e6 --- /dev/null +++ b/gcpy/format_HEMCO.py @@ -0,0 +1,304 @@ + +import xarray as xr +import numpy as np +import pandas as pd +from copy import deepcopy as dc +from os.path import join + +def format_HEMCO_dimensions(ds, + start_time="2000-01-01 00:00:00", + lev_long_name="level", + lev_units="level", + lev_formula_terms=None, + gchp=False): + """ + Formats time, lat, lon, and lev (optionally) attributes for coards + compliance (HEMCO compatibility). + + Args: + ds: xarray Dataset + Dataset containing at least latitude and longitude + variables, which must be named lat and lon, respectively. + + Keyword Args (optional): + start_time: string of the format "YYYY-MM-DD HH:mm:ss" + String containing the start time of the dataset for + the purposes of encoding the time dimension. For GCHP + compliance, the first time value must be 0 time units + from the beginning of the unit. The default value is + January 1, 2000. + lev_long_name: string + A detailed description of the level attribute. Examples + include "level", "GEOS-Chem levels", "Eta centers", or + "Sigma centers". Default is "level." + lev_units: string + The unit of the vertical levels, which should be "level", + "eta_level", or "sigma_level". Setting both lev_units and + lev_long_name to "level" allows HEMCO to regrid between + vertical grids. Default is "level". + lev_formula_terms: string or None + If data is used that is not on the model vertical grid, the + data must contain surface pressure values and the hybrid coefficients + of the coordinate system together with the terms in the formula + (e.g., ”ap: hyam b: hybm ps: PS”). Default is None. + gchp: boolean + Boolean identifying whether this file is for use in + GCHP (True) or GEOS-Chem Classic (False). This is primarily + used to set the lev attributes. The default value is + False. + + Returns: + ds: xarray Dataset + An updated version of ds with encoding and attributes + set to be coards/HEMCO compliant. + """ + # Require that ds is an xarray Dataset object + if not isinstance(ds, xr.Dataset): + raise TypeError("The ds argument must be an xarray Dataset.") + + # Check that latitude and longitude are found in the dataset + ## First force all dimension names to be lowercase: + ds = ds.rename_dims({k : k.lower() for k in ds.dims.keys() + if k != k.lower()}) + + # Check and format each of the required dimensions + ds = _format_lat(ds) + ds = _format_lon(ds) + ds = _format_time(ds, start_time) + + # If level is included in the dimensions, set its attributes + if "lev" in ds.coords: + # Note: this is relatively untested (2023/08/21 HON) + ds = _format_lev(ds, lev_long_name, lev_units, + lev_formula_terms, gchp) + + # Require data order to be time, lat, lon (optionally lev) + ds = ds.transpose("time", "lat", "lon", ...) + + # Return the dataset + return ds + + +def _format_lat(ds): + ''' + Formats the latitude dimension for coards compliance. + See define_HEMCO_dimensions for argument listings. + ''' + # If there is a dimension is called latitude, rename it + # (This function assumes ds has dimension names that are + # all lower case) + if "latitude" in ds.dims.keys(): + ds = ds.rename_dims({"latitude" : "lat"}) + + # Require that lat is a monotonically increasing dimension + _check_required_dim(ds, "lat") + + # Set attributes + ds["lat"].attrs = {"long_name": "latitude", + "units": "degrees_north", + "axis" : "Y"} + + return ds + + +def _format_lon(ds): + ''' + Formats the longitude dimension for coards compliance. + See define_HEMCO_dimensions for argument listings. + ''' + # If there is a dimension is called longitude, rename it + # (This function assumes ds has dimension names that are + # all lower case) + if "longitude" in ds.dims.keys(): + ds = ds.rename_dims({"longitude" : "lon"}) + + # Require that lon is a monotonically increasing dimension + _check_required_dim(ds, "lat") + + # Set attributes + ds["lon"].attrs = {"long_name": "longitude", + "units": "degrees_east", + "axis" : "X"} + + return ds + + +def _format_time(ds, start_time): + ''' + Formats the time dimension for coards compliance. + See define_HEMCO_dimensions for argument listings. + ''' + if "time" not in ds.coords: + # If time isn't already in the coords, create a dummy variable + ds = ds.assign_coords(time=pd.to_datetime(start_time)) + ds = ds.expand_dims("time") + else: + # Otherwise, update start_time to match the first time in the file, + # consistent with GCHP requirements + new_start_time = pd.to_datetime(ds["time"][0].values) + new_start_time = new_start_time.strftime("%Y-%m-%d %H:%M:%S") + print(f"Updating the reference start time from") + print(f"{start_time} to {new_start_time}") + print(f"so that time(0) = 0, consistent with GCHP requirements.") + start_time = new_start_time + + # Now check that time is a monotonically increasing dimension + _check_required_dim(ds, "time") + + # Set attributes + ds["time"].encoding= {"units" : f"hours since {start_time}", + "calendar" : "standard"} + ds["time"].attrs = {"long_name" : "Time", "axis" : "T"} + + return ds + + +def _format_lev(ds, lev_long_name, lev_units, lev_formula_terms, gchp): + ''' + Formats the level dimension for coards compliance. + See define_HEMCO_dimensions for argument listings. + ''' + ## HON 2023/08/22: This is relatively untested + + # If there a dimension called level, rename it + if "level" in ds.dims.keys(): + ds = ds.rename_dims({"level" : "lev"}) + + # If formula is provided, check that the components of the + # formula are included. + if lev_formula_terms is not None: + terms = lev_formula_terms.split(": ") + terms = [t for i, t in enumerate(terms) if i % 2 == 1] + for t in terms: + if t not in ds.data_vars.keys(): + raise ValueError(f"{t} is in lev_formula_terms and could \ + not be found.") + + # If unit is level, require that the levels are integers + if (lev_units == "level") and (ds["lev"] != ds["lev"].astype(int)).any(): + raise ValueError("lev has units of level but dimension values \ + are not integers.") + + # Set attributes + ## Set positive to match the GCHP/GEOS-Chem conventions + if gchp: + positive = "down" + else: + positive = "up" + + ## Setting both long_name and units to "level" allows HEMCO + ## to regrid between vertical grids (e.g., 47 -> 72 levels). + lev_attrs = {"long_name" : lev_long_name, + "units" : lev_units, + "positive" : positive, + "axis" : "Z"} + if lev_formula_terms is not None: + lev_attrs.update({"formula_terms" : lev_formula_terms}) + + ## Set the attributes + ds["lev"].attrs = lev_attrs + + return ds + + +def _check_required_dim(ds, dim): + ''' + Checks required dimensions (time, latitude, and longitude) + for COARDS compliance (that the dimension exists and is + monotonically increasing). + + Args: + ds: xarray Dataset + dim: string ("time", "lat", or "lon") + A string corresponding to the required dimension + ''' + if dim not in ["time", "lat", "lon"]: + raise ValueError(f"{dim} is not a required dimension.") + + # Check that the dim is included in + if dim not in ds.dims.keys(): + raise ValueError(f"{dim} is not included in the dimensions.") + + # Require that the variable is monotonically increasing + if np.any(np.diff(ds[dim]).astype("float") < 0): + raise ValueError(f"{dim} is not monotonically increasing.") + + return ds + + +def format_HEMCO_variable(ds, var, long_name, units, **kwargs): + """ + Formats attributes for non-standard variables for coards compliance + (HEMCO compatibility). + + Args: + ds: xarray Dataset + Dataset containing HEMCO input data. + var: string + The name of the non-standard variable to be formatted. + long_name: string + A required HEMCO attribute, a more descriptive name for + var. + units: string + A required HEMCO attribute giving the units of var. See + https://hemco.readthedocs.io/en/stable/hco-ref-guide/input-file-format.html + for more information. + kwargs: dictionary + Any other attributes wanted for the variable. + + Returns: + ds: xarray Dataset + An updated version of ds with variable attributes + set to be coards/HEMCO compliant. + """ + ds[var].attrs = {"long_name" : long_name, "units" : units, + **kwargs} + return ds + + +def save_HEMCO_netcdf(ds, save_dir, save_name, dtype="float", **kwargs): + """ + Saves coards compliant (HEMCO compatible) netcdf. + + Args: + ds: xarray Dataset + Dataset containing HEMCO input data. + save_dir: string + The directory where the data will be saved. + save_name: string + The name the file will be named under. + + Keyword Args (optional): + dtype: data type + The data type the data will be saved as. Default is + float32 to minimize memory usage. + kwargs: dictionary + Any other attributes to be passed to the xarray + to_netcdf function. + """ + # Check that the save_name ends in .nc + if save_name.split(".")[-1][:2] != "nc": + save_name = f"{save_name}.nc" + + # Get time encoding before overwriting + time_units = ds["time"].encoding["units"] + calendar = ds["time"].encoding["calendar"] + + # Set default encoding and dtype for all variables and coordinates + encoding = {"_FillValue" : None, "dtype" : dtype} + var = {k : dc(encoding) for k in ds.keys()} + coord = {k : dc(encoding) for k in ds.coords} + + # Manually update the time encoding, which is often overwritten + # by xarray defaults + coord["time"]["units"] = time_units + coord["time"]["calendar"] = calendar + var.update(coord) + + # Save out + ds.to_netcdf(join(save_dir, save_name), encoding=var, + unlimited_dims=["time"], **kwargs) + + print("-"*70) + print("Saved to", join(save_dir, save_name)) + print("-"*70) \ No newline at end of file From e53948efe19cae2c129a0fc1bfb35ed413c6f7bf Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Wed, 23 Aug 2023 10:56:42 -0400 Subject: [PATCH 02/43] Renamne format_HEMCO.py to format_hemco_data.py + other updates gcpy/__init__.py - Added "from .format_hemco_data" import *" to make sure that functions are imported from format_hemco_data.py gcpy/format_HEMCO.py - Renamed to format_hemco_data.py gcpy/format_hemco_data.py - Added code Y formatting updates suggested by Pylint - Added _update_variable_attributes function so that we replace or update variable attributes to COARDS-conforming values without clobbering other attributes that may be present - Updated imports statements to the proper order (suggested by Pylint) - Added PyDoc header at the top of the module - Renamed "t" variable to "term" to be snake_case conforming CHANGELOG.md - updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 1 + gcpy/__init__.py | 1 + .../{format_HEMCO.py => format_hemco_data.py} | 332 +++++++++++------- 3 files changed, 214 insertions(+), 120 deletions(-) rename gcpy/{format_HEMCO.py => format_hemco_data.py} (50%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 16a2b079..f02ff3f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added new routine `format_number_for_table` in `util.py` - Added BrSALA and BrSALC to `emission_species.yml` - Added `ENCODING = "UTF-8"` to `gcpy/constants.py` +- Added `gcpy/format_hemco_data.py` from @hannahnesser ### Changed - Simplified the Github issues templates into two options: `new-feature-or-discussion.md` and `question-issue.md` diff --git a/gcpy/__init__.py b/gcpy/__init__.py index 9d9efbb1..aeeaf9f0 100644 --- a/gcpy/__init__.py +++ b/gcpy/__init__.py @@ -27,3 +27,4 @@ from .file_regrid import * from .grid_stretching_transforms import * from .cstools import * +from .format_hemco_data import * diff --git a/gcpy/format_HEMCO.py b/gcpy/format_hemco_data.py similarity index 50% rename from gcpy/format_HEMCO.py rename to gcpy/format_hemco_data.py index 63e1d9e6..dc19014e 100644 --- a/gcpy/format_HEMCO.py +++ b/gcpy/format_hemco_data.py @@ -1,22 +1,28 @@ - +""" +Contains functions to make sure that data files to be read by +HEMCO adhere to the COARDS netCDF conventions. +""" import xarray as xr import numpy as np import pandas as pd from copy import deepcopy as dc from os.path import join -def format_HEMCO_dimensions(ds, - start_time="2000-01-01 00:00:00", - lev_long_name="level", - lev_units="level", - lev_formula_terms=None, - gchp=False): + +def format_hemco_dimensions( + dset, + start_time="2000-01-01 00:00:00", + lev_long_name="level", + lev_units="level", + lev_formula_terms=None, + gchp=False +): """ - Formats time, lat, lon, and lev (optionally) attributes for coards + Formats time, lat, lon, and lev (optionally) attributes for coards compliance (HEMCO compatibility). - + Args: - ds: xarray Dataset + dset: xarray Dataset Dataset containing at least latitude and longitude variables, which must be named lat and lon, respectively. @@ -24,11 +30,11 @@ def format_HEMCO_dimensions(ds, start_time: string of the format "YYYY-MM-DD HH:mm:ss" String containing the start time of the dataset for the purposes of encoding the time dimension. For GCHP - compliance, the first time value must be 0 time units + compliance, the first time value must be 0 time units from the beginning of the unit. The default value is January 1, 2000. lev_long_name: string - A detailed description of the level attribute. Examples + A detailed description of the level attribute. Examples include "level", "GEOS-Chem levels", "Eta centers", or "Sigma centers". Default is "level." lev_units: string @@ -38,104 +44,162 @@ def format_HEMCO_dimensions(ds, vertical grids. Default is "level". lev_formula_terms: string or None If data is used that is not on the model vertical grid, the - data must contain surface pressure values and the hybrid coefficients - of the coordinate system together with the terms in the formula - (e.g., ”ap: hyam b: hybm ps: PS”). Default is None. + data must contain surface pressure values and the hybrid + coefficients of the coordinate system together with the + terms in the formula(e.g., ”ap: hyam b: hybm ps: PS”). + Default is None. gchp: boolean - Boolean identifying whether this file is for use in + Boolean identifying whether this file is for use in GCHP (True) or GEOS-Chem Classic (False). This is primarily - used to set the lev attributes. The default value is + used to set the lev attributes. The default value is False. - + Returns: - ds: xarray Dataset - An updated version of ds with encoding and attributes + dset: xarray Dataset + An updated version of dset with encoding and attributes set to be coards/HEMCO compliant. """ - # Require that ds is an xarray Dataset object - if not isinstance(ds, xr.Dataset): - raise TypeError("The ds argument must be an xarray Dataset.") + # Require that dset is an xarray Dataset object + if not isinstance(dset, xr.Dataset): + raise TypeError("The dset argument must be an xarray Dataset.") # Check that latitude and longitude are found in the dataset ## First force all dimension names to be lowercase: - ds = ds.rename_dims({k : k.lower() for k in ds.dims.keys() + dset = dset.rename_dims({k : k.lower() for k in dset.dims.keys() if k != k.lower()}) # Check and format each of the required dimensions - ds = _format_lat(ds) - ds = _format_lon(ds) - ds = _format_time(ds, start_time) + dset = _format_lat(dset) + dset = _format_lon(dset) + dset = _format_time(dset, start_time) # If level is included in the dimensions, set its attributes - if "lev" in ds.coords: + if "lev" in dset.coordset: # Note: this is relatively untested (2023/08/21 HON) - ds = _format_lev(ds, lev_long_name, lev_units, + dset = _format_lev(dset, lev_long_name, lev_units, lev_formula_terms, gchp) - + # Require data order to be time, lat, lon (optionally lev) - ds = ds.transpose("time", "lat", "lon", ...) + dset = dset.transpose("time", "lat", "lon", ...) # Return the dataset - return ds + return dset + + +def _update_variable_attributes( + var_attrs, + coards_attrs +): + """ + Adds COARDS conforming variable attributes and/or replaces + existing variable attributes with COARDS-conforming values. + Args: + var_attrs : dict + Dictionary of variable attributes. + coards_attrs : dict + Dictionary of COARDS-conforming variable attributes. + + Returns + var_attrs : dict + Modified dictionary of variable attributes + """ -def _format_lat(ds): + # Test if each COARDS-conforming attribute is + # present in the list of variable attributes. + found = {} + for (name, _) in coards_attrs.items(): + found[name] = name in var_attrs.keys() + + # If the variable attribute has a COARDS-conforming name, + # then replace it with a COARDS-conforming attribute value. + # + # If the variable attribute is missing, then add the + # COARDS-conforming attribute to the list of variable attrs. + # + # This makes sure that we add/replace variable attrs + # but do not clobber any other existing variable attrs. + for (name, value) in coards_attrs.items(): + if found[name]: + var_attrs.update({name: value}) + else: + var_attrs[name] = value + + return var_attrs + + +def _format_lat(dset): ''' Formats the latitude dimension for coards compliance. See define_HEMCO_dimensions for argument listings. ''' # If there is a dimension is called latitude, rename it - # (This function assumes ds has dimension names that are + # (This function assumes ds has dimension names that are # all lower case) - if "latitude" in ds.dims.keys(): - ds = ds.rename_dims({"latitude" : "lat"}) + if "latitude" in dset.dims.keys(): + dset = dset.rename_dims({"latitude" : "lat"}) # Require that lat is a monotonically increasing dimension - _check_required_dim(ds, "lat") + _check_required_dim(dset, "lat") - # Set attributes - ds["lat"].attrs = {"long_name": "latitude", - "units": "degrees_north", - "axis" : "Y"} + # Update attributes to be COARDS-conforming + dset["lat"].attrs = _update_variable_attributes( + dset["lat"].attrs, + coards_attrs={ + "long_name": "latitude", + "units": "degrees_north", + "axis" : "Y" + } + ) - return ds + return dset -def _format_lon(ds): +def _format_lon( + dset +): ''' Formats the longitude dimension for coards compliance. See define_HEMCO_dimensions for argument listings. ''' # If there is a dimension is called longitude, rename it - # (This function assumes ds has dimension names that are + # (This function assumes dset has dimension names that are # all lower case) - if "longitude" in ds.dims.keys(): - ds = ds.rename_dims({"longitude" : "lon"}) + if "longitude" in dset.dims.keys(): + dset = dset.rename_dims({"longitude" : "lon"}) # Require that lon is a monotonically increasing dimension - _check_required_dim(ds, "lat") + _check_required_dim(dset, "lat") - # Set attributes - ds["lon"].attrs = {"long_name": "longitude", - "units": "degrees_east", - "axis" : "X"} - - return ds + # Update attributes to be COARDS-conforming + dset["lon"].attrs = _update_variable_attributes( + dset["lon"].attrs, + coards_attrs={ + "long_name": "longitude", + "units": "degrees_east", + "axis" : "X" + } + ) + + return dset -def _format_time(ds, start_time): +def _format_time( + dset, + start_time +): ''' - Formats the time dimension for coards compliance. + Formats the time dimension for COARDS compliance. See define_HEMCO_dimensions for argument listings. ''' - if "time" not in ds.coords: - # If time isn't already in the coords, create a dummy variable - ds = ds.assign_coords(time=pd.to_datetime(start_time)) - ds = ds.expand_dims("time") + if "time" not in dset.coordset: + # If time isn't already in the coordset, create a dummy variable + dset = dset.assign_coordset(time=pd.to_datetime(start_time)) + dset = dset.expand_dims("time") else: # Otherwise, update start_time to match the first time in the file, # consistent with GCHP requirements - new_start_time = pd.to_datetime(ds["time"][0].values) + new_start_time = pd.to_datetime(dset["time"][0].values) new_start_time = new_start_time.strftime("%Y-%m-%d %H:%M:%S") print(f"Updating the reference start time from") print(f"{start_time} to {new_start_time}") @@ -143,96 +207,118 @@ def _format_time(ds, start_time): start_time = new_start_time # Now check that time is a monotonically increasing dimension - _check_required_dim(ds, "time") + _check_required_dim(dset, "time") - # Set attributes - ds["time"].encoding= {"units" : f"hours since {start_time}", - "calendar" : "standard"} - ds["time"].attrs = {"long_name" : "Time", "axis" : "T"} + # Set attributes and make sure they are COARDS conforming. + dset["time"].encoding= { + "units" : f"hours since {start_time}", + "calendar" : "standard" + } + dset["time"].attrs = _update_variable_attributes( + dset["time"].attrs, + coards_attrs={ + "long_name": "Time", + "axis" : "T" + } + ) - return ds + return dset -def _format_lev(ds, lev_long_name, lev_units, lev_formula_terms, gchp): +def _format_lev(dset, lev_long_name, lev_units, lev_formula_terms, gchp): ''' - Formats the level dimension for coards compliance. + Formats the level dimension for COARDS compliance. See define_HEMCO_dimensions for argument listings. ''' ## HON 2023/08/22: This is relatively untested # If there a dimension called level, rename it - if "level" in ds.dims.keys(): - ds = ds.rename_dims({"level" : "lev"}) - - # If formula is provided, check that the components of the + if "level" in dset.dims.keys(): + dset = dset.rename_dims({"level" : "lev"}) + + # If formula is provided, check that the components of the # formula are included. if lev_formula_terms is not None: terms = lev_formula_terms.split(": ") terms = [t for i, t in enumerate(terms) if i % 2 == 1] for t in terms: - if t not in ds.data_vars.keys(): + if t not in dset.data_vars.keys(): raise ValueError(f"{t} is in lev_formula_terms and could \ not be found.") - + # If unit is level, require that the levels are integers - if (lev_units == "level") and (ds["lev"] != ds["lev"].astype(int)).any(): + if lev_units == "level" and \ + (dset["lev"] != dset["lev"].astype(int)).any(): raise ValueError("lev has units of level but dimension values \ are not integers.") # Set attributes ## Set positive to match the GCHP/GEOS-Chem conventions + positive = "up" if gchp: positive = "down" - else: - positive = "up" + ## Set attributes and make sure they are COARDS-conforming. ## Setting both long_name and units to "level" allows HEMCO - ## to regrid between vertical grids (e.g., 47 -> 72 levels). - lev_attrs = {"long_name" : lev_long_name, - "units" : lev_units, - "positive" : positive, - "axis" : "Z"} + ## to regrid between vertical gridset (e.g., 47 -> 72 levels). + dset["lev"].attrs = _update_variable_attributes( + dset["lev"].attrs, + coards_attrs={ + "long_name" : lev_long_name, + "units" : lev_units, + "positive" : positive, + "axis" : "Z" + } + ) if lev_formula_terms is not None: - lev_attrs.update({"formula_terms" : lev_formula_terms}) - - ## Set the attributes - ds["lev"].attrs = lev_attrs + dset["lev"].attrs.update({ + "formula_terms" : lev_formula_terms + }) - return ds + return dset -def _check_required_dim(ds, dim): +def _check_required_dim( + dset, + dim +): ''' Checks required dimensions (time, latitude, and longitude) for COARDS compliance (that the dimension exists and is monotonically increasing). Args: - ds: xarray Dataset + dset: xarray Dataset dim: string ("time", "lat", or "lon") A string corresponding to the required dimension ''' if dim not in ["time", "lat", "lon"]: raise ValueError(f"{dim} is not a required dimension.") - # Check that the dim is included in - if dim not in ds.dims.keys(): + # Check that the dim is included in + if dim not in dset.dims.keys(): raise ValueError(f"{dim} is not included in the dimensions.") # Require that the variable is monotonically increasing - if np.any(np.diff(ds[dim]).astype("float") < 0): + if np.any(np.diff(dset[dim]).astype("float") < 0): raise ValueError(f"{dim} is not monotonically increasing.") - - return ds + + return dset -def format_HEMCO_variable(ds, var, long_name, units, **kwargs): +def format_hemco_variable( + dset, + var, + long_name, + units, + **kwargs +): """ - Formats attributes for non-standard variables for coards compliance + Formats attributes for non-standard variables for COARDS compliance (HEMCO compatibility). - + Args: - ds: xarray Dataset + dset: xarray Dataset Dataset containing HEMCO input data. var: string The name of the non-standard variable to be formatted. @@ -240,34 +326,40 @@ def format_HEMCO_variable(ds, var, long_name, units, **kwargs): A required HEMCO attribute, a more descriptive name for var. units: string - A required HEMCO attribute giving the units of var. See + A required HEMCO attribute giving the units of var. See https://hemco.readthedocs.io/en/stable/hco-ref-guide/input-file-format.html for more information. kwargs: dictionary Any other attributes wanted for the variable. - + Returns: - ds: xarray Dataset - An updated version of ds with variable attributes - set to be coards/HEMCO compliant. + dset: xarray Dataset + An updated version of dset with variable attributes + set to be COARDS/HEMCO compliant. """ - ds[var].attrs = {"long_name" : long_name, "units" : units, + dset[var].attrs = {"long_name" : long_name, "units" : units, **kwargs} - return ds + return dset -def save_HEMCO_netcdf(ds, save_dir, save_name, dtype="float", **kwargs): +def save_hemco_netcdf( + dset, + save_dir, + save_name, + dtype="float", + **kwargs +): """ - Saves coards compliant (HEMCO compatible) netcdf. - + Saves COARDS compliant (HEMCO compatible) netcdf. + Args: - ds: xarray Dataset + dset: xarray Dataset Dataset containing HEMCO input data. save_dir: string The directory where the data will be saved. save_name: string The name the file will be named under. - + Keyword Args (optional): dtype: data type The data type the data will be saved as. Default is @@ -276,19 +368,19 @@ def save_HEMCO_netcdf(ds, save_dir, save_name, dtype="float", **kwargs): Any other attributes to be passed to the xarray to_netcdf function. """ - # Check that the save_name ends in .nc + # Check that the save_name endset in .nc if save_name.split(".")[-1][:2] != "nc": save_name = f"{save_name}.nc" - + # Get time encoding before overwriting - time_units = ds["time"].encoding["units"] - calendar = ds["time"].encoding["calendar"] + time_units = dset["time"].encoding["units"] + calendar = dset["time"].encoding["calendar"] # Set default encoding and dtype for all variables and coordinates encoding = {"_FillValue" : None, "dtype" : dtype} - var = {k : dc(encoding) for k in ds.keys()} - coord = {k : dc(encoding) for k in ds.coords} - + var = {k : dc(encoding) for k in dset.keys()} + coord = {k : dc(encoding) for k in dset.coordset} + # Manually update the time encoding, which is often overwritten # by xarray defaults coord["time"]["units"] = time_units @@ -296,9 +388,9 @@ def save_HEMCO_netcdf(ds, save_dir, save_name, dtype="float", **kwargs): var.update(coord) # Save out - ds.to_netcdf(join(save_dir, save_name), encoding=var, + dset.to_netcdf(join(save_dir, save_name), encoding=var, unlimited_dims=["time"], **kwargs) - + print("-"*70) print("Saved to", join(save_dir, save_name)) - print("-"*70) \ No newline at end of file + print("-"*70) From 2ffa8e3c6b74148d21aded05a2e90bfa44fba773 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Wed, 23 Aug 2023 16:12:43 -0400 Subject: [PATCH 03/43] Further updates to format_hemco_data.py gcpy/format_hemco_data.py - Reorder imports for optimal ordering according to Pylint - Now use gcpy.util.verify_variable_type to check argument types - Remove "f" in front of normal python strings w/ no replacements - Split code across multiple lines for clarity - Ensure variables conform to snake_case - In routine format_hemco_variable, add code to make sure that variable attributs aren't clobbered. Also accept extra attributes as a dictionary via **kwargs. - Trimmed trailing whitespace Signed-off-by: Bob Yantosca --- gcpy/format_hemco_data.py | 75 ++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/gcpy/format_hemco_data.py b/gcpy/format_hemco_data.py index dc19014e..297db837 100644 --- a/gcpy/format_hemco_data.py +++ b/gcpy/format_hemco_data.py @@ -2,11 +2,12 @@ Contains functions to make sure that data files to be read by HEMCO adhere to the COARDS netCDF conventions. """ +from os.path import join +from copy import deepcopy as dc import xarray as xr import numpy as np import pandas as pd -from copy import deepcopy as dc -from os.path import join +from gcpy.util import verify_variable_type def format_hemco_dimensions( @@ -60,8 +61,7 @@ def format_hemco_dimensions( set to be coards/HEMCO compliant. """ # Require that dset is an xarray Dataset object - if not isinstance(dset, xr.Dataset): - raise TypeError("The dset argument must be an xarray Dataset.") + verify_variable_type(dset, xr.Dataset) # Check that latitude and longitude are found in the dataset ## First force all dimension names to be lowercase: @@ -104,6 +104,8 @@ def _update_variable_attributes( var_attrs : dict Modified dictionary of variable attributes """ + verify_variable_type(var_attrs, dict) + verify_variable_type(coards_attrs, dict) # Test if each COARDS-conforming attribute is # present in the list of variable attributes. @@ -201,9 +203,9 @@ def _format_time( # consistent with GCHP requirements new_start_time = pd.to_datetime(dset["time"][0].values) new_start_time = new_start_time.strftime("%Y-%m-%d %H:%M:%S") - print(f"Updating the reference start time from") + print("Updating the reference start time from") print(f"{start_time} to {new_start_time}") - print(f"so that time(0) = 0, consistent with GCHP requirements.") + print("so that time(0) = 0, consistent with GCHP requirements.") start_time = new_start_time # Now check that time is a monotonically increasing dimension @@ -225,7 +227,13 @@ def _format_time( return dset -def _format_lev(dset, lev_long_name, lev_units, lev_formula_terms, gchp): +def _format_lev( + dset, + lev_long_name, + lev_units, + lev_formula_terms, + gchp +): ''' Formats the level dimension for COARDS compliance. See define_HEMCO_dimensions for argument listings. @@ -240,11 +248,13 @@ def _format_lev(dset, lev_long_name, lev_units, lev_formula_terms, gchp): # formula are included. if lev_formula_terms is not None: terms = lev_formula_terms.split(": ") - terms = [t for i, t in enumerate(terms) if i % 2 == 1] - for t in terms: - if t not in dset.data_vars.keys(): - raise ValueError(f"{t} is in lev_formula_terms and could \ - not be found.") + terms = [term for i, term in enumerate(terms) if i % 2 == 1] + for term in terms: + if term not in dset.data_vars.keys(): + raise ValueError( + f"{term} is in lev_formula_terms and could \ + not be found." + ) # If unit is level, require that the levels are integers if lev_units == "level" and \ @@ -271,9 +281,9 @@ def _format_lev(dset, lev_long_name, lev_units, lev_formula_terms, gchp): } ) if lev_formula_terms is not None: - dset["lev"].attrs.update({ - "formula_terms" : lev_formula_terms - }) + dset["lev"].attrs.update({ + "formula_terms" : lev_formula_terms + }) return dset @@ -329,7 +339,7 @@ def format_hemco_variable( A required HEMCO attribute giving the units of var. See https://hemco.readthedocs.io/en/stable/hco-ref-guide/input-file-format.html for more information. - kwargs: dictionary + **kwargs : dict Any other attributes wanted for the variable. Returns: @@ -337,8 +347,25 @@ def format_hemco_variable( An updated version of dset with variable attributes set to be COARDS/HEMCO compliant. """ - dset[var].attrs = {"long_name" : long_name, "units" : units, - **kwargs} + verify_variable_type(dset, xr.Dataset) + verify_variable_type(var, str) + verify_variable_type(long_name, str) + verify_variable_type(units, str) + + # Add extra attributes if passed via **kwargs + if len(kwargs) != 0: + for (_, att_dict) in kwargs.items(): + dset[var].attrs.update(att_dict) + + # Update variable attributes to be COARDS-conforming + # without clobbering any pre-existing attributes + dset[var].attrs = _update_variable_attributes( + dset[var].attrs, + coards_attrs={ + "long_name" : long_name, + "units" : units + } + ) return dset @@ -368,6 +395,10 @@ def save_hemco_netcdf( Any other attributes to be passed to the xarray to_netcdf function. """ + verify_variable_type(dset, xr.Dataset) + verify_variable_type(save_dir, str) + verify_variable_type(save_name, str) + # Check that the save_name endset in .nc if save_name.split(".")[-1][:2] != "nc": save_name = f"{save_name}.nc" @@ -388,8 +419,12 @@ def save_hemco_netcdf( var.update(coord) # Save out - dset.to_netcdf(join(save_dir, save_name), encoding=var, - unlimited_dims=["time"], **kwargs) + dset.to_netcdf( + join(save_dir, save_name), + encoding=var, + unlimited_dims=["time"], + **kwargs + ) print("-"*70) print("Saved to", join(save_dir, save_name)) From 1a988266208414a9689a77403d28bed57f8057e2 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 26 Mar 2024 15:16:10 -0400 Subject: [PATCH 04/43] Split off mass conservation table routine into new script gcpy/benchmark/modules/benchmark_mass_cons_table.py - New script for printing mass conservation table output (from the PassiveTracer species). Ref and Dev versions are now printed in the same table. - Code has been refactored for more clarity and efficiency. We now open all files into a common dataset and instead of opening one file at a time. Also split off common operations into local functions. gcpy/benchmark/modules/benchmark_funcs.py - Removed make_benchmark_mass_conservation_table, this has now been ported to benchmark_mass_cons_table.py CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 2 + gcpy/benchmark/modules/benchmark_funcs.py | 166 --------- .../modules/benchmark_mass_cons_table.py | 327 ++++++++++++++++++ 3 files changed, 329 insertions(+), 166 deletions(-) create mode 100644 gcpy/benchmark/modules/benchmark_mass_cons_table.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d6bd958..a36541bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added fixed level budget diagnostic to budget operations table - Function `rename_speciesconc_to_speciesconcvv` in `gcpy/benchmark/modules/benchmark_utils.py` - Function `copy_file_to_dir` in `gcpy/util.py`. This is a wrapper for `shutil.copyfile`. +- Script `gcpy/benchmark/modules/benchmark_mass_cons_table.py`, with code to create mass conservation tables ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) @@ -58,6 +59,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Environment file `docs/environment_files/environment.yml` - Environment file `docs/environment_files/requirements.txt` - Removed `awscli` from the GCPy environment; version 2 is no longer available on conda-forge or PyPi +- Routine `make_benchmark_mass_conservation_table` in `benchmark_funcs.py`; this is now obsolete ## [1.4.2] - 2024-01-26 ### Added diff --git a/gcpy/benchmark/modules/benchmark_funcs.py b/gcpy/benchmark/modules/benchmark_funcs.py index 27965fd9..7f68855d 100644 --- a/gcpy/benchmark/modules/benchmark_funcs.py +++ b/gcpy/benchmark/modules/benchmark_funcs.py @@ -4915,172 +4915,6 @@ def make_benchmark_operations_budget( gc.collect() -def make_benchmark_mass_conservation_table( - datafiles, - runstr, - dst="./benchmark", - overwrite=False, - areapath=None, - spcdb_dir=os.path.dirname(__file__) -): - """ - Creates a text file containing global mass of the PassiveTracer - from Transport Tracer simulations across a series of restart files. - - Args: - datafiles: list of str - Path names of restart files. - runstr: str - Name to put in the filename and header of the output file - - Keyword Args (optional): - dst: str - A string denoting the destination folder where the file - containing emissions totals will be written. - Default value: "./benchmark" - overwrite: bool - Set this flag to True to overwrite files in the - destination folder (specified by the dst argument). - Default value: False - areapath: str - Path to a restart file containing surface area data. - Default value: None - spcdb_dir: str - Path to the species_database.yml - Default value: points to gcpy/gcpy folder - """ - - # ================================================================== - # Initialize - # ================================================================== - - # Create the destination folder - util.make_directory(dst, overwrite) - - # Load a YAML file containing species properties (such as - # molecular weights), which we will need for unit conversions. - properties = util.read_config_file( - os.path.join( - spcdb_dir, - "species_database.yml" - ), - quiet=True - ) - - # Get the species name - spc_name = 'PassiveTracer' - - # Get a list of properties for the given species - species_properties = properties.get(spc_name) - - # Specify target units - target_units = "Tg" - - dates = [] - masses = [] - - # ================================================================== - # Make sure that surface area data is found - # ================================================================== - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=xr.SerializationWarning) - - - # ================================================================== - # Calculate global mass for the tracer at all restart dates - # ================================================================== - for f in datafiles: - ds = xr.open_dataset(f, drop_variables=skip_these_vars) - - # Save date in desired format - #datestr = str(pd.to_datetime(ds.time.values[0])) - #dates.append(datestr[:4] + '-' + datestr[5:7] + '-' + datestr[8:10]) - - # Find the area variable in Dev - if areapath is None: - area = util.get_area_from_dataset(ds) - else: - area = util.get_area_from_dataset( - xr.open_dataset( - areapath, - drop_variables=skip_these_vars - ) - ) - - # Assume typical restart file name format, but avoid using dates - # from within files which may be incorrect for the initial restart - datestr = f.split('/')[-1].split('.')[2][:9] - dates.append(datestr[:4] + '-' + datestr[4:6] + '-' + datestr[6:8]) - - # Select for GCC or GCHP - delta_p = ds['Met_DELPDRY'] if 'Met_DELPDRY' in list(ds.data_vars) else ds['DELP_DRY'] - - # ============================================================== - # Convert units of Ref and save to a DataArray - # (or skip if Ref contains NaNs everywhere) - # ============================================================== - # Select for GCC or GCHP - if 'SpeciesRst_PassiveTracer' in list(ds.data_vars): - attrs = ds['SpeciesRst_PassiveTracer'].attrs - da = ds['SpeciesRst_PassiveTracer'].astype(np.float64) - da.attrs = attrs - else: - attrs = ds['SPC_PassiveTracer'].attrs - da = ds['SPC_PassiveTracer'].astype(np.float64) - da.attrs = attrs - da = convert_units( - da, - spc_name, - species_properties, - target_units, - area_m2=area, - delta_p=delta_p - ) - - # Save total global mass - masses.append(np.sum(da.values)) - - # Clean up - del ds - del da - gc.collect() - - # Calclate max and min mass, absolute diff, percent diff - max_mass = np.max(masses) - min_mass = np.min(masses) - # Convert absdiff to grams - absdiff = (max_mass-min_mass) * 10**12 - pctdiff = (max_mass-min_mass)/min_mass * 100 - - # ================================================================== - # Print masses to file - # ================================================================== - # Create file - outfilename = os.path.join(dst, f"Passive_mass.{runstr}.txt") - - with open(outfilename, 'w') as f: - titlestr = ' Global Mass of Passive Tracer in ' + runstr + ' ' - #headers - print('%' * (len(titlestr)+4), file=f) - print(titlestr, file=f) - print('%' * (len(titlestr)+4), file=f) - print('', file=f) - print(' Date' + ' ' * 8 + 'Mass [Tg]', file=f) - print(' ' + '-' * 10 + ' ' + '-' * 16, file=f) - #masses - for i in range(len(masses)): - print(f" {dates[i]} {masses[i] : 11.13f}", file=f) - print(' ', file=f) - print(' Summary', file=f) - print(' ' + '-' * 30, file=f) - print(f" Max mass = {max_mass : 2.13f} Tg", file=f) - print(f" Min mass = {min_mass : 2.13f} Tg", file=f) - print(f" Abs diff = {absdiff : >16.3f} g", file=f) - print(f" Pct diff = {pctdiff : >16.10f} %", file=f) - - gc.collect() - - def get_species_database_dir(config): """ Returns the directory in which the species_database.yml file is diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py new file mode 100644 index 00000000..b4067729 --- /dev/null +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -0,0 +1,327 @@ +""" +Creates mass conservation tables from passive tracer concentrations +stored in GEOS-Chem Classic and/or GCHP restart files. +""" +import os +import warnings +import numpy as np +import xarray as xr +from gcpy.constants import skip_these_vars +from gcpy.units import convert_units +from gcpy.util import dataset_reader, get_area_from_dataset, \ + make_directory, read_config_file, verify_variable_type + + +# Constants +SPC_NAME = "PassiveTracer" +TARGET_UNITS = "Tg" + + +def get_area( + area_path, + dset +): + """ + Returns the area variable from a dataset (if present), + or reads it from the supplied file path. + + Args + area_path : str|None : Full file path of area data + dset : xr.Dataset : Input data + + Returns + area : xr.DataArray : Grid box areas [m2] + """ + verify_variable_type(area_path, (str, type(None))) + verify_variable_type(dset, xr.Dataset) + + # If the area variable is present in the data set, return it + if area_path is None: + return get_area_from_dataset(dset) + + # Otherwise read the data from the supplied area_path) + reader = dataset_reader(multi_files=False, verbose=False) + return get_area_from_dataset( + reader(area_path, drop_variables=skip_these_vars).load() + ) + + +def get_delta_pressure( + dset +): + """ + Returns the delta-pressure variable from GEOS-Chem Classic + or GCHP data files. + + Args: + dset : xr.Dataset|xr.DataArray : Input data + """ + verify_variable_type(dset, (xr.Dataset, xr.DataArray)) + + # GEOS-Chem Classic + if 'Met_DELPDRY' in list(dset.data_vars): + return dset['Met_DELPDRY'] + + # GCHP + return dset['DELP_DRY'] + + +def get_passive_tracer_metadata( + spcdb_dir +): + """ + Returns a dictionary with metadata for the passive tracer. + + Args + spcdb_dir : str : Directory containing species_database.yml + + Returns + properties : dict : Dictionary with species metadata + """ + verify_variable_type(spcdb_dir, str) + + spc_name = SPC_NAME + properties = read_config_file( + os.path.join( + spcdb_dir, + "species_database.yml" + ), + quiet=True + ) + + return properties.get(spc_name) + + +def get_passive_tracer_varname( + dset +): + """ + Returns the variable name under which the passive tracer + is stored GEOS-Chem Classic or GCHP restart files. + """ + verify_variable_type(dset, xr.Dataset) + + # Name of species (it's more efficient to copy to local variable!) + name = SPC_NAME + + # GEOS-Chem Classic + if f"SpeciesRst_{name}" in dset.data_vars: + return f"SpeciesRst_{name}" + + # GCHP + return f"SPC_{name}" + + +def compute_total_mass( + t_idx, + dset, + area, + delta_p, + metadata, +): + """ + Computes the total mass (in Tg) for the passive tracer. + + Args + t_idx : int : Time index + dset : xr.Dataset : Data [mol/mol dry air] + area : xr.DataArray : Grid box areas [m2] + delta_p : xr.Dataset : Pressure thicknesses [hPa] + metadata : dict : Dictionary w/ species metdata + + Returns + total_mass : np.float64 : Total mass [Tg] of species. + """ + with xr.set_options(keep_attrs=True): + units = TARGET_UNITS + varname = get_passive_tracer_varname(dset) + darr = convert_units( + dset[varname].astype(np.float64).isel(time=t_idx), + varname, + metadata, + units, + area_m2=area.isel(time=0), + delta_p=delta_p.isel(time=t_idx), + ) + + return np.sum(darr) + + +def compute_statistics(masses): + """ + Returns a dictionary with statistics for total masses. + + Args + masses : np.ndarray : Total masses in Tg + + Returns + statistics : dict : Dictionary with statistics + """ + verify_variable_type(masses, (np.ndarray, list)) + + max_mass = np.max(masses) + min_mass = np.min(masses) + + return { + "max_mass": max_mass, + "min_mass": min_mass, + "absdiff_g": (max_mass - min_mass) * 10**12, + "pctdiff": (max_mass-min_mass)/min_mass * 100, + } + + +def make_benchmark_mass_conservation_table( + ref_files, + ref_label, + dev_files, + dev_label, + dst="./benchmark", + overwrite=False, + ref_areapath=None, + dev_areapath=None, + spcdb_dir=os.path.dirname(__file__) +): + """ + Creates a text file containing global mass of passive species + contained in GEOS-Chem Classic and/or GCHP restart files. + + Args + ref_files : list|str : List of files from the Ref model + ref_label : str : Ref version label + dev_files : list|str : List of files from the Dev model + dev_label : str : Dev version label + dst : str : Destination folder for file output + overwrite : bool : Overwrite pre-existing files? + ref_areapath : list|str : Path to file w/ Ref area data (optional) + dev_areapath : list|str : Path to file w/ Dev area data (optional) + spcdb_dir : str : Path to species database file + """ + + # ================================================================== + # Initialize + # ================================================================== + + # Create the destination folder + make_directory(dst, overwrite) + + # Get a list of properties for the given species + metadata = get_passive_tracer_metadata(spcdb_dir) + + # Preserve xarray attributes + with xr.set_options(keep_attrs=True): + + # ============================================================== + # Read data and make sure time dimensions are consistent + # ============================================================== + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=xr.SerializationWarning) + + # Pick the proper function to read the data + reader = dataset_reader(multi_files=True, verbose=False) + + # Get data + ref_data = reader(ref_files, drop_variables=skip_these_vars).load() + dev_data = reader(dev_files, drop_variables=skip_these_vars).load() + ref_area = get_area(ref_areapath, ref_data) + dev_area = get_area(dev_areapath, dev_data) + ref_delta_prs = get_delta_pressure(ref_data) + dev_delta_prs = get_delta_pressure(dev_data) + + # Number of points in the time dimension + ref_time = ref_data["time"].values + dev_time = dev_data["time"].values + + # Throw an error if Ref & Dev have differing time values + if not np.all(ref_time == dev_time): + msg = "Ref and Dev have inconsistent time values!\n" + raise ValueError(msg) + + # Lists for holding the sum of masses in Ref & Dev + ref_masses = np.zeros(len(dev_time), dtype=np.float64) + dev_masses = np.zeros(len(dev_time), dtype=np.float64) + + # List for holding the dates & times + display_dates = [] + + # ================================================================== + # Calculate global mass for the tracer at all restart dates + # ================================================================== + for t_idx, time in enumerate(dev_time): + + # Save datetime string into display_dates list + time = str(np.datetime_as_string(time, unit="m")) + display_dates.append(time.replace("T", " ")) + + # Compute total masses [Tg] for Ref & Dev + ref_masses[t_idx] = compute_total_mass( + t_idx, + ref_data, + ref_area, + ref_delta_prs, + metadata, + ) + dev_masses[t_idx] = compute_total_mass( + t_idx, + dev_data, + dev_area, + dev_delta_prs, + metadata, + ) + + + # ================================================================== + # Print masses and statistics to file + # ================================================================== + + # Get min, max, absdiff, maxdiff for Ref & Dev + ref_stats = compute_statistics(ref_masses) + dev_stats = compute_statistics(dev_masses) + + # Create file + outfilename = os.path.join( + dst, + f"Passive_mass.{ref_label}_vs_{dev_label}.txt" + ) + with open(outfilename, 'w', encoding="utf-8") as ofile: + + # Title + print("="*79, file=ofile) + print("Global mass of PassiveTracer", file=ofile) + print("", file=ofile) + print(f"Ref = {ref_label}", file=ofile) + print(f"Dev = {dev_label}", file=ofile) + print("="*79, file=ofile) + + # Headers + print("", file=ofile) + template = " Date & Time" + " "*18 + "Ref mass [Tg]" + template += " "*13 + "Dev mass [Tg]" + print(template, file=ofile) + template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 + print(template, file=ofile) + + # Total masses + for t_idx, time in enumerate(display_dates): + template = f" {time} " + template +=f"{ref_masses[t_idx] : >20.13f} " + template +=f"{dev_masses[t_idx] : >20.13f}" + print(template, file=ofile) + print(" ", file=ofile) + + # Statistics + template = " Summary" + " "*32+ "Ref" + " "*23 + "Dev" + print(template, file=ofile) + template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 + print(template, file=ofile) + template = f" Maximum mass [Tg] {ref_stats['max_mass'] : >20.13f}" + template+= f" {dev_stats['max_mass'] : >20.13f}" + print(template, file=ofile) + template = f" Minimum mass [Tg] {ref_stats['min_mass'] : >20.13f}" + template+= f" {dev_stats['min_mass'] : >20.13f}" + print(template, file=ofile) + template = f" Abs diff [g] {ref_stats['absdiff_g'] : >20.13f}" + template+= f" {dev_stats['absdiff_g'] : >20.13f}" + print(template, file=ofile) + template = f" % difference {ref_stats['pctdiff'] : >20.13f}" + template+= f" {dev_stats['pctdiff'] : >20.13f}" + print(template, file=ofile) From ea2261ba9c449180aa186315a523b73bf0fd97b4 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 26 Mar 2024 16:25:06 -0400 Subject: [PATCH 05/43] run_1yr_tt_benchmark.py now uses benchmark_mass_cons_table.py gcpy/benchmark/modules/benchmark_mass_cons_table.py - Bug fix: Test if the area variable has a time dimension before trying to use .isel(time=0) - Add extra calls to verify_variable_type gcpy/benchmark/modules/run_1yr_tt_benchmark.py - No longer import make_benchmark_mass_conservation_table from benchmark_funcs.py; this has been removed - Import make_benchmark_mass_conservation_table from the benchmark_mass_cons_table.py module - Now pass Ref & Dev arguments to make_benchmark_mass_conservation_table --- .../modules/benchmark_mass_cons_table.py | 19 ++++++++- .../benchmark/modules/run_1yr_tt_benchmark.py | 39 +++++-------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index b4067729..f2f5098d 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -133,14 +133,22 @@ def compute_total_mass( total_mass : np.float64 : Total mass [Tg] of species. """ with xr.set_options(keep_attrs=True): + + # Local variables units = TARGET_UNITS varname = get_passive_tracer_varname(dset) + + # If area has multiple time slices, take the first one + if "time" in area.dims: + area = area.isel(time=0) + + # Compute mass in Tg darr = convert_units( dset[varname].astype(np.float64).isel(time=t_idx), varname, metadata, units, - area_m2=area.isel(time=0), + area_m2=area, delta_p=delta_p.isel(time=t_idx), ) @@ -196,6 +204,15 @@ def make_benchmark_mass_conservation_table( dev_areapath : list|str : Path to file w/ Dev area data (optional) spcdb_dir : str : Path to species database file """ + verify_variable_type(ref_files, (list, str)) + verify_variable_type(ref_label, str) + verify_variable_type(dev_files, (list, str)) + verify_variable_type(dev_label, str) + verify_variable_type(dst, (str, type(None))) + verify_variable_type(overwrite, bool) + verify_variable_type(ref_areapath, (str, type(None))) + verify_variable_type(ref_areapath, (str, type(None))) + verify_variable_type(spcdb_dir, str) # ================================================================== # Initialize diff --git a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py index b94055bd..abea2ca3 100644 --- a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py @@ -61,9 +61,11 @@ from gcpy.benchmark.modules.benchmark_funcs import \ get_species_database_dir, make_benchmark_conc_plots, \ make_benchmark_wetdep_plots, make_benchmark_mass_tables, \ - make_benchmark_operations_budget, make_benchmark_mass_conservation_table + make_benchmark_operations_budget from gcpy.benchmark.modules.budget_tt import transport_tracers_budgets from gcpy.benchmark.modules.ste_flux import make_benchmark_ste_table +from gcpy.benchmark.modules.benchmark_mass_cons_table import \ + make_benchmark_mass_conservation_table from gcpy.benchmark.modules.benchmark_utils import print_benchmark_info # Tell matplotlib not to look for an X-window @@ -1265,17 +1267,10 @@ def gchp_vs_gchp_mass_table(mon): all_months_dev )[0] - # Ref + # Create table make_benchmark_mass_conservation_table( ref_datafiles, config["data"]["ref"]["gcc"]["version"], - dst=gcc_vs_gcc_tablesdir, - overwrite=True, - spcdb_dir=spcdb_dir, - ) - - # Dev - make_benchmark_mass_conservation_table( dev_datafiles, config["data"]["dev"]["gcc"]["version"], dst=gcc_vs_gcc_tablesdir, @@ -1286,7 +1281,7 @@ def gchp_vs_gchp_mass_table(mon): # =================================================================== # Create mass conservation table for GCHP vs GCC # =================================================================== - if config["options"]["comparisons"]["gcc_vs_gcc"]["run"]: + if config["options"]["comparisons"]["gchp_vs_gcc"]["run"]: print("\n%%% Creating GCHP vs GCC mass conservation tables %%%") # Filepaths @@ -1317,23 +1312,16 @@ def gchp_vs_gchp_mass_table(mon): gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"], ) - # Ref + # Create table make_benchmark_mass_conservation_table( ref_datafiles, config["data"]["dev"]["gcc"]["version"], - dst=gchp_vs_gcc_tablesdir, - overwrite=True, - spcdb_dir=spcdb_dir, - ) - - # Dev - make_benchmark_mass_conservation_table( dev_datafiles, config["data"]["dev"]["gchp"]["version"], dst=gchp_vs_gcc_tablesdir, overwrite=True, spcdb_dir=spcdb_dir, - areapath=dev_areapath, + dev_areapath=dev_areapath, ) # ===================================================================== @@ -1381,24 +1369,17 @@ def gchp_vs_gchp_mass_table(mon): gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"], ) - # Ref + # Create table make_benchmark_mass_conservation_table( ref_datafiles, config["data"]["ref"]["gchp"]["version"], - dst=gchp_vs_gchp_tablesdir, - overwrite=True, - spcdb_dir=spcdb_dir, - areapath=ref_areapath - ) - - # Dev - make_benchmark_mass_conservation_table( dev_datafiles, config["data"]["dev"]["gchp"]["version"], dst=gchp_vs_gchp_tablesdir, overwrite=True, spcdb_dir=spcdb_dir, - areapath=dev_areapath + ref_areapath=ref_areapath, + dev_areapath=dev_areapath, ) # ================================================================== From b2a95774a8f9379b72337c93f8c81633490ba199 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 26 Mar 2024 17:21:06 -0400 Subject: [PATCH 06/43] Update comments for clarity in benchmark_mass_cons_table.py gcpy/benchmark/modules/benchmark_mass_cons_table.py - Add Pydoc comments for get_passive_tracer_varname - Update comments for the time dimension Signed-off-by: Bob Yantosca --- gcpy/benchmark/modules/benchmark_mass_cons_table.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index f2f5098d..561729b8 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -98,6 +98,12 @@ def get_passive_tracer_varname( """ Returns the variable name under which the passive tracer is stored GEOS-Chem Classic or GCHP restart files. + + Args + dset : xr.Dataset : The input data + + Returns + varname : str : Variable name for passive tracer """ verify_variable_type(dset, xr.Dataset) @@ -244,11 +250,11 @@ def make_benchmark_mass_conservation_table( ref_delta_prs = get_delta_pressure(ref_data) dev_delta_prs = get_delta_pressure(dev_data) - # Number of points in the time dimension + # Get datetime values ref_time = ref_data["time"].values dev_time = dev_data["time"].values - # Throw an error if Ref & Dev have differing time values + # Throw an error if Ref & Dev have differing datetime values if not np.all(ref_time == dev_time): msg = "Ref and Dev have inconsistent time values!\n" raise ValueError(msg) @@ -257,7 +263,7 @@ def make_benchmark_mass_conservation_table( ref_masses = np.zeros(len(dev_time), dtype=np.float64) dev_masses = np.zeros(len(dev_time), dtype=np.float64) - # List for holding the dates & times + # List for holding the datetimes display_dates = [] # ================================================================== @@ -285,7 +291,6 @@ def make_benchmark_mass_conservation_table( metadata, ) - # ================================================================== # Print masses and statistics to file # ================================================================== From 5c8df1325a4e59ce5d70358d7ece11028234df1d Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Wed, 27 Mar 2024 16:10:20 -0400 Subject: [PATCH 07/43] Expand statistics output in mass conservation tables gcpy/benchmark/modules/benchmark_mass_cons_tables - Add extra fields to statistics output: - Start mass - End mass - Abs diff - % diff - Mean mass - Variance - Also add extra columns for Abs Diff & % Diff for each of these CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- .../modules/benchmark_mass_cons_table.py | 171 ++++++++++++++++-- 1 file changed, 156 insertions(+), 15 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index 561729b8..1483d267 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -175,12 +175,100 @@ def compute_statistics(masses): max_mass = np.max(masses) min_mass = np.min(masses) + start_mass = masses[0] + end_mass = masses[-1] return { - "max_mass": max_mass, - "min_mass": min_mass, - "absdiff_g": (max_mass - min_mass) * 10**12, - "pctdiff": (max_mass-min_mass)/min_mass * 100, + "min_mass" : min_mass, + "max_mass" : max_mass, + "minmax_absdiff_g" : (max_mass - min_mass) * 1.0e12, + "minmax_pctdiff" : (max_mass - min_mass)/min_mass * 100.0, + "start_mass" : start_mass, + "end_mass" : end_mass, + "startend_absdiff_g" : (end_mass - start_mass) * 1.0e12, + "startend_pctdiff" : (end_mass - start_mass)/start_mass * 100.0, + "mean_mass" : np.mean(masses), + "variance" : np.var(masses), + } + + +def compute_diff( + key, + ref, + dev +): + """ + Computes the difference in two dictionaries (Dev - Ref) for + a given search key. + + key : str : Search key + ref : dict : Dictionary of values from Ref model + dev : dict : Dictionary of values from Dev model + + Returns + diffs : dict : Absolute & percent differences btw Dev & Ref for key + """ + verify_variable_type(key, str) + verify_variable_type(ref, dict) + verify_variable_type(dev, dict) + + return { + "absdiff": dev[key] - ref[key], + "pctdiff": ((dev[key] - ref[key]) / ref[key]) * 100.0 + } + + +def compute_diff_statistics( + ref, + dev +): + """ + Computes difference statistics between the Ref and Dev versions. + + Args + ref_masses : dict : Statistics for Ref model + dev_masses : dict : Statistics for Dev model + + Returns + diff_stats : dict : Difference statistics between Dev and Ref + """ + verify_variable_type(ref, dict) + verify_variable_type(dev, dict) + + min_mass = compute_diff("min_mass", ref, dev) + max_mass = compute_diff("max_mass", ref, dev) + minmax_absdiff_g = compute_diff("minmax_absdiff_g", ref, dev) + minmax_pctdiff = compute_diff("minmax_pctdiff", ref, dev) + start_mass = compute_diff("start_mass", ref, dev) + end_mass = compute_diff("start_mass", ref, dev) + startend_absdiff_g = compute_diff("startend_absdiff_g", ref, dev) + startend_pctdiff = compute_diff("startend_pctdiff", ref, dev) + mean_mass = compute_diff("mean_mass", ref, dev) + variance = compute_diff("variance", ref, dev) + + return { + "min_mass__absdiff" : min_mass["absdiff"], + "min_mass__pctdiff" : min_mass["pctdiff"], + "max_mass__absdiff" : max_mass["absdiff"], + "max_mass__pctdiff" : max_mass["pctdiff"], + "minmax_absdiff_g__absdiff" : minmax_absdiff_g["absdiff"], + "minmax_absdiff_g__pctdiff" : minmax_absdiff_g["pctdiff"], + "minmax_pctdiff__absdiff" : minmax_pctdiff["absdiff"], + "minmax_pctdiff__pctdiff" : minmax_pctdiff["pctdiff"], + + "start_mass__absdiff" : start_mass["absdiff"], + "start_mass__pctdiff" : start_mass["pctdiff"], + "end_mass__absdiff" : end_mass["absdiff"], + "end_mass__pctdiff" : end_mass["pctdiff"], + "startend_absdiff_g__absdiff" : startend_absdiff_g["absdiff"], + "startend_absdiff_g__pctdiff" : startend_absdiff_g["pctdiff"], + "startend_pctdiff__absdiff" : startend_pctdiff["absdiff"], + "startend_pctdiff__pctdiff" : startend_pctdiff["pctdiff"], + + "mean_mass__absdiff" : mean_mass["absdiff"], + "mean_mass__pctdiff" : mean_mass["pctdiff"], + "variance__absdiff" : variance["absdiff"], + "variance__pctdiff" : variance["pctdiff"], } @@ -298,6 +386,7 @@ def make_benchmark_mass_conservation_table( # Get min, max, absdiff, maxdiff for Ref & Dev ref_stats = compute_statistics(ref_masses) dev_stats = compute_statistics(dev_masses) + diff_stats = compute_diff_statistics(ref_stats, dev_stats) # Create file outfilename = os.path.join( @@ -316,8 +405,8 @@ def make_benchmark_mass_conservation_table( # Headers print("", file=ofile) - template = " Date & Time" + " "*18 + "Ref mass [Tg]" - template += " "*13 + "Dev mass [Tg]" + template = " Date & Time" + " "*18 + "Ref mass [Tg]" + template +=" "*13 + "Dev mass [Tg]" print(template, file=ofile) template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 print(template, file=ofile) @@ -332,18 +421,70 @@ def make_benchmark_mass_conservation_table( # Statistics template = " Summary" + " "*32+ "Ref" + " "*23 + "Dev" + template += " "*6 + "Abs Diff" + " % Diff" print(template, file=ofile) - template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 + template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 + template += " " + "-"*13 + " " + "-"*7 print(template, file=ofile) - template = f" Maximum mass [Tg] {ref_stats['max_mass'] : >20.13f}" - template+= f" {dev_stats['max_mass'] : >20.13f}" + template = " Maximum mass [Tg] " + template += f"{ref_stats['max_mass'] : >20.13f} " + template += f"{dev_stats['max_mass'] : >20.13f} " + template += f"{diff_stats['max_mass__absdiff'] : >13.6f} " + template += f"{diff_stats['max_mass__pctdiff'] : >7.3f}" print(template, file=ofile) - template = f" Minimum mass [Tg] {ref_stats['min_mass'] : >20.13f}" - template+= f" {dev_stats['min_mass'] : >20.13f}" + template = " Minimum mass [Tg] " + template += f"{ref_stats['min_mass'] : >20.13f} " + template += f"{dev_stats['min_mass'] : >20.13f} " + template += f"{diff_stats['min_mass__absdiff'] : >13.6f} " + template += f"{diff_stats['min_mass__pctdiff'] : >7.3f} " print(template, file=ofile) - template = f" Abs diff [g] {ref_stats['absdiff_g'] : >20.13f}" - template+= f" {dev_stats['absdiff_g'] : >20.13f}" + template = " Abs diff [g] " + template += f"{ref_stats['minmax_absdiff_g'] : >20.13f} " + template += f"{dev_stats['minmax_absdiff_g'] : >20.13f} " + template += f"{diff_stats['minmax_absdiff_g__absdiff'] : >13.6f} " + template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >7.3f}" + print(template, file=ofile) + template = " % difference " + template += f"{ref_stats['minmax_pctdiff'] : >20.13f} " + template += f"{dev_stats['minmax_pctdiff'] : >20.13f} " + template += f"{diff_stats['minmax_pctdiff__absdiff'] : >13.6f} " + template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >7.3f}" + print(template, file=ofile) + print("", file=ofile) + template = " Start mass [Tg] " + template += f"{ref_stats['start_mass'] : >20.13f} " + template += f"{dev_stats['start_mass'] : >20.13f} " + template += f"{diff_stats['start_mass__absdiff'] : >13.6f} " + template += f"{diff_stats['start_mass__pctdiff'] : >7.3f}" + print(template, file=ofile) + template = " End mass [Tg] " + template += f"{ref_stats['end_mass'] : >20.13f} " + template += f"{dev_stats['end_mass'] : >20.13f} " + template += f"{diff_stats['end_mass__absdiff'] : >13.6f} " + template += f"{diff_stats['end_mass__pctdiff'] : >7.3f}" + print(template, file=ofile) + template = " Abs diff [g] " + template += f"{ref_stats['startend_absdiff_g'] : >20.13f} " + template += f"{dev_stats['startend_absdiff_g'] : >20.13f} " + template += f"{diff_stats['startend_absdiff_g__absdiff'] : >13.6f} " + template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >7.3f}" + print(template, file=ofile) + template = " % difference " + template += f"{ref_stats['startend_pctdiff'] : >20.13f} " + template += f"{dev_stats['startend_pctdiff'] : >20.13f} " + template += f"{diff_stats['startend_pctdiff__absdiff'] : >13.6f} " + template += f"{diff_stats['startend_pctdiff__pctdiff'] : >7.3f}" + print(template, file=ofile) + print("", file=ofile) + template = " Mean mass [Tg] " + template += f"{ref_stats['mean_mass']:>20.13f} " + template += f"{dev_stats['mean_mass']:>20.13f} " + template += f"{diff_stats['mean_mass__absdiff']:>13.6f} " + template += f"{diff_stats['mean_mass__pctdiff']:>7.3f}" print(template, file=ofile) - template = f" % difference {ref_stats['pctdiff'] : >20.13f}" - template+= f" {dev_stats['pctdiff'] : >20.13f}" + template = " Variance [Tg] " + template += f"{ref_stats['variance']:>20.13f} " + template += f"{dev_stats['variance']:>20.13f} " + template += f"{diff_stats['variance__absdiff']:>13.6f} " + template += f"{diff_stats['variance__pctdiff']:>7.3f}" print(template, file=ofile) From 0c3bca7f52b73b54b44071fbda460aa5b841a5cc Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 29 Mar 2024 14:42:03 -0400 Subject: [PATCH 08/43] Add "Abs Diff" & "% Diff" columns in mass conservation table gcpy/benchmark/modules/benchmark_mass_cons_table.py - Added "Abs Diff" & "% Diff" columns to the list of Ref & Dev total masses CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 1 + .../modules/benchmark_mass_cons_table.py | 21 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a36541bb..1ebd93b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Function `rename_speciesconc_to_speciesconcvv` in `gcpy/benchmark/modules/benchmark_utils.py` - Function `copy_file_to_dir` in `gcpy/util.py`. This is a wrapper for `shutil.copyfile`. - Script `gcpy/benchmark/modules/benchmark_mass_cons_table.py`, with code to create mass conservation tables +- Expanded statistics output in benchmark mass conservation tables ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index 1483d267..a6faf4d1 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -405,26 +405,31 @@ def make_benchmark_mass_conservation_table( # Headers print("", file=ofile) - template = " Date & Time" + " "*18 + "Ref mass [Tg]" - template +=" "*13 + "Dev mass [Tg]" + template = " Date & Time" + " "*18 + "Ref mass [Tg]" + " "*13 + template += "Dev mass [Tg]"+ " "*6 + "Abs Diff" + " % Diff" print(template, file=ofile) - template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 + template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 + template += " " + "-"*13 + " " + "-"*7 print(template, file=ofile) # Total masses for t_idx, time in enumerate(display_dates): - template = f" {time} " - template +=f"{ref_masses[t_idx] : >20.13f} " - template +=f"{dev_masses[t_idx] : >20.13f}" + absdiff = dev_masses[t_idx] - ref_masses[t_idx] + pctdiff = (absdiff / ref_masses[t_idx]) * 100.0 + template = f" {time} " + template += f"{ref_masses[t_idx] : >20.13f} " + template += f"{dev_masses[t_idx] : >20.13f} " + template += f"{absdiff : >13.6f} " + template += f"{pctdiff : >7.3f}" print(template, file=ofile) print(" ", file=ofile) # Statistics - template = " Summary" + " "*32+ "Ref" + " "*23 + "Dev" + template = " Summary" + " "*32+ "Ref" + " "*23 + "Dev" template += " "*6 + "Abs Diff" + " % Diff" print(template, file=ofile) template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 - template += " " + "-"*13 + " " + "-"*7 + template += " " + "-"*13 + " " + "-"*7 print(template, file=ofile) template = " Maximum mass [Tg] " template += f"{ref_stats['max_mass'] : >20.13f} " From 486815ba67b886fa60129c89a7a7df21a1234654 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 29 Mar 2024 15:45:47 -0400 Subject: [PATCH 09/43] Add more tweaks to the mass conservation table gcpy/benchmark/modules/benchmark_mass_cons_table.py - Now compute mean_mass and variance with dtype=np.float64 - Use 13.4e right-adjusted format for "Abs Diff" column - Use 8.3f right-adjusted format for "% Diff" column - Update underlines accordingly - Add an extra space between "Abs Diff" and "% Diff" column - Now display variance with 20.13e right-adjusted format, since it can potentially be very small Signed-off-by: Bob Yantosca --- .../modules/benchmark_mass_cons_table.py | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index a6faf4d1..63104cda 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -187,8 +187,8 @@ def compute_statistics(masses): "end_mass" : end_mass, "startend_absdiff_g" : (end_mass - start_mass) * 1.0e12, "startend_pctdiff" : (end_mass - start_mass)/start_mass * 100.0, - "mean_mass" : np.mean(masses), - "variance" : np.var(masses), + "mean_mass" : np.mean(masses, dtype=np.float64), + "variance" : np.var(masses, dtype=np.float64), } @@ -406,10 +406,10 @@ def make_benchmark_mass_conservation_table( # Headers print("", file=ofile) template = " Date & Time" + " "*18 + "Ref mass [Tg]" + " "*13 - template += "Dev mass [Tg]"+ " "*6 + "Abs Diff" + " % Diff" + template += "Dev mass [Tg]"+ " "*6 + "Abs Diff" + " % Diff" print(template, file=ofile) template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 - template += " " + "-"*13 + " " + "-"*7 + template += " " + "-"*13 + " " + "-"*8 print(template, file=ofile) # Total masses @@ -419,77 +419,77 @@ def make_benchmark_mass_conservation_table( template = f" {time} " template += f"{ref_masses[t_idx] : >20.13f} " template += f"{dev_masses[t_idx] : >20.13f} " - template += f"{absdiff : >13.6f} " - template += f"{pctdiff : >7.3f}" + template += f"{absdiff : >13.4e} " + template += f"{pctdiff : >8.3f}" print(template, file=ofile) print(" ", file=ofile) # Statistics template = " Summary" + " "*32+ "Ref" + " "*23 + "Dev" - template += " "*6 + "Abs Diff" + " % Diff" + template += " "*6 + "Abs Diff" + " % Diff" print(template, file=ofile) template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 - template += " " + "-"*13 + " " + "-"*7 + template += " " + "-"*13 + " " + "-"*8 print(template, file=ofile) template = " Maximum mass [Tg] " template += f"{ref_stats['max_mass'] : >20.13f} " template += f"{dev_stats['max_mass'] : >20.13f} " - template += f"{diff_stats['max_mass__absdiff'] : >13.6f} " - template += f"{diff_stats['max_mass__pctdiff'] : >7.3f}" + template += f"{diff_stats['max_mass__absdiff'] : >13.4e} " + template += f"{diff_stats['max_mass__pctdiff'] : >8.3f}" print(template, file=ofile) template = " Minimum mass [Tg] " template += f"{ref_stats['min_mass'] : >20.13f} " template += f"{dev_stats['min_mass'] : >20.13f} " - template += f"{diff_stats['min_mass__absdiff'] : >13.6f} " - template += f"{diff_stats['min_mass__pctdiff'] : >7.3f} " + template += f"{diff_stats['min_mass__absdiff'] : >13.4e} " + template += f"{diff_stats['min_mass__pctdiff'] : >8.3f}" print(template, file=ofile) template = " Abs diff [g] " template += f"{ref_stats['minmax_absdiff_g'] : >20.13f} " template += f"{dev_stats['minmax_absdiff_g'] : >20.13f} " - template += f"{diff_stats['minmax_absdiff_g__absdiff'] : >13.6f} " - template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >7.3f}" + template += f"{diff_stats['minmax_absdiff_g__absdiff'] : >13.4e} " + template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >8.3f}" print(template, file=ofile) template = " % difference " template += f"{ref_stats['minmax_pctdiff'] : >20.13f} " template += f"{dev_stats['minmax_pctdiff'] : >20.13f} " - template += f"{diff_stats['minmax_pctdiff__absdiff'] : >13.6f} " - template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >7.3f}" + template += f"{diff_stats['minmax_pctdiff__absdiff'] : >13.4e} " + template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >8.3f}" print(template, file=ofile) print("", file=ofile) template = " Start mass [Tg] " template += f"{ref_stats['start_mass'] : >20.13f} " template += f"{dev_stats['start_mass'] : >20.13f} " - template += f"{diff_stats['start_mass__absdiff'] : >13.6f} " - template += f"{diff_stats['start_mass__pctdiff'] : >7.3f}" + template += f"{diff_stats['start_mass__absdiff'] : >13.4e} " + template += f"{diff_stats['start_mass__pctdiff'] : >8.3f}" print(template, file=ofile) template = " End mass [Tg] " template += f"{ref_stats['end_mass'] : >20.13f} " template += f"{dev_stats['end_mass'] : >20.13f} " - template += f"{diff_stats['end_mass__absdiff'] : >13.6f} " - template += f"{diff_stats['end_mass__pctdiff'] : >7.3f}" + template += f"{diff_stats['end_mass__absdiff'] : >13.4e} " + template += f"{diff_stats['end_mass__pctdiff'] : >8.3f}" print(template, file=ofile) template = " Abs diff [g] " template += f"{ref_stats['startend_absdiff_g'] : >20.13f} " template += f"{dev_stats['startend_absdiff_g'] : >20.13f} " - template += f"{diff_stats['startend_absdiff_g__absdiff'] : >13.6f} " - template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >7.3f}" + template += f"{diff_stats['startend_absdiff_g__absdiff'] : >13.4e} " + template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >8.3f}" print(template, file=ofile) template = " % difference " template += f"{ref_stats['startend_pctdiff'] : >20.13f} " template += f"{dev_stats['startend_pctdiff'] : >20.13f} " - template += f"{diff_stats['startend_pctdiff__absdiff'] : >13.6f} " - template += f"{diff_stats['startend_pctdiff__pctdiff'] : >7.3f}" + template += f"{diff_stats['startend_pctdiff__absdiff'] : >13.4e} " + template += f"{diff_stats['startend_pctdiff__pctdiff'] : >8.3f}" print(template, file=ofile) print("", file=ofile) template = " Mean mass [Tg] " template += f"{ref_stats['mean_mass']:>20.13f} " template += f"{dev_stats['mean_mass']:>20.13f} " - template += f"{diff_stats['mean_mass__absdiff']:>13.6f} " - template += f"{diff_stats['mean_mass__pctdiff']:>7.3f}" + template += f"{diff_stats['mean_mass__absdiff']:>13.4e} " + template += f"{diff_stats['mean_mass__pctdiff']:>8.3f}" print(template, file=ofile) template = " Variance [Tg] " - template += f"{ref_stats['variance']:>20.13f} " - template += f"{dev_stats['variance']:>20.13f} " - template += f"{diff_stats['variance__absdiff']:>13.6f} " - template += f"{diff_stats['variance__pctdiff']:>7.3f}" + template += f"{ref_stats['variance']:>20.13e} " + template += f"{dev_stats['variance']:>20.13e} " + template += f"{diff_stats['variance__absdiff']:>13.4e} " + template += f"{diff_stats['variance__pctdiff']:>8.3f}" print(template, file=ofile) From 8e5c0bac6fc0b8e708b2b2fe7e49252b348cfa5f Mon Sep 17 00:00:00 2001 From: Yuanjian Zhang Date: Mon, 8 Apr 2024 11:31:31 -0500 Subject: [PATCH 10/43] Enabled 1 month Sigdiff benchmarking --- gcpy/benchmark_funcs.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gcpy/benchmark_funcs.py b/gcpy/benchmark_funcs.py index 18f7ffd3..e18e0390 100644 --- a/gcpy/benchmark_funcs.py +++ b/gcpy/benchmark_funcs.py @@ -1514,9 +1514,6 @@ def createplots(filecat): result.keys())[0]]['500'] for result in results} dict_zm = {list(result.keys())[0]: result[list( result.keys())[0]]['zm'] for result in results} - - print("stop here") - quit() # ============================================================== # Write the list of species having significant differences, From ff78c99c9bfc6681d9c57f627cee8a21acdb9836 Mon Sep 17 00:00:00 2001 From: Yuanjian Zhang Date: Mon, 8 Apr 2024 13:03:08 -0500 Subject: [PATCH 11/43] Supported 2D slices regridding formatting Enabled compare plots for 2D variables between two different cube-sphere resolutions. --- gcpy/regrid.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/gcpy/regrid.py b/gcpy/regrid.py index 3e9e4fc3..e84df88e 100644 --- a/gcpy/regrid.py +++ b/gcpy/regrid.py @@ -758,6 +758,12 @@ def ravel_checkpoint_lat(ds_out): }) return ds_out + # Filter non-existent coordinates/dimensions + def rename_existing(ds, rename_dict): + existing_keys = set(ds.coords) | set(ds.dims) + filtered_rename_dict = {key: value for key, value in rename_dict.items() if key in existing_keys} + return ds.rename(filtered_rename_dict) + dim_formats = { 'checkpoint': { 'unravel': [unravel_checkpoint_lat], @@ -790,13 +796,13 @@ def ravel_checkpoint_lat(ds_out): ds = unravel_callback(ds) # Rename dimensions - ds = ds.rename(dim_formats[format].get('rename', {})) + ds = rename_existing(ds, dim_formats[format].get('rename', {})) return ds # %%%% Renaming from the common format %%%% # Reverse rename - ds = ds.rename( + ds = rename_existing(ds, {v: k for k, v in dim_formats[format].get('rename', {}).items()}) # Ravel dimensions From 6f1a92151c44b138db5fa17952913a0b64b5b5fb Mon Sep 17 00:00:00 2001 From: Yuanjian Zhang Date: Tue, 9 Apr 2024 12:51:56 -0500 Subject: [PATCH 12/43] Corrected reformatting for regridding Cube-sphere dimensions' sequence was reverse. Enabling versatile reshaping handling. --- gcpy/regrid.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/gcpy/regrid.py b/gcpy/regrid.py index e84df88e..def2a28d 100644 --- a/gcpy/regrid.py +++ b/gcpy/regrid.py @@ -785,7 +785,8 @@ def rename_existing(ds, rename_dict): 'Ydim': 'Y', 'time': 'T', }, - 'transpose': ('time', 'lev', 'nf', 'Xdim', 'Ydim') + # match format of GCHP output + 'transpose': ('time', 'lev', 'nf', 'Ydim', 'Xdim') } } @@ -810,16 +811,7 @@ def rename_existing(ds, rename_dict): ds = ravel_callback(ds) # Transpose - if len(ds.dims) == 5 or (len(ds.dims) == 4 and 'lev' in list( - ds.dims) and 'time' in list(ds.dims)): - # full dim dataset - ds = ds.transpose(*dim_formats[format].get('transpose', [])) - elif len(ds.dims) == 4: - # single time - ds = ds.transpose(*dim_formats[format].get('transpose', [])[1:]) - elif len(ds.dims) == 3: - # single level / time - ds = ds.transpose(*dim_formats[format].get('transpose', [])[2:]) + ds = ds.transpose(*[x for x in dim_formats[format].get('transpose', []) if x in list(ds.dims)]) return ds From 3d09c4e5c5333376a5820bd58737f3e577a90384 Mon Sep 17 00:00:00 2001 From: Yuanjian Zhang Date: Tue, 9 Apr 2024 12:54:42 -0500 Subject: [PATCH 13/43] Corrected automatic regridding decision process Correct automatic regridding decision process to match documentation. --- gcpy/regrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcpy/regrid.py b/gcpy/regrid.py index def2a28d..3f79804f 100644 --- a/gcpy/regrid.py +++ b/gcpy/regrid.py @@ -426,7 +426,7 @@ def create_regridders( "Warning: zonal mean comparison must be lat-lon. Defaulting to 1x1.25") cmpres = '1x1.25' cmpgridtype = "ll" - elif sg_ref_params != [] or sg_dev_params != []: + elif sg_ref_params != [1, 170, -90] or sg_dev_params != [1, 170, -90]: # pick ref grid when a stretched-grid and non-stretched-grid # are passed cmpres = refres From 12107b6caeccd11be60a814ba49bca8334d822ee Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 9 Apr 2024 15:08:00 -0400 Subject: [PATCH 14/43] Now display up to 15 digits in the mass conservation table gcpy/benchmark/modules/benchmark_mass_cons_table.py - Increased the formatting from 20.13f to 20.15f for most entries in the 1st 2 columns. Signed-off-by: Bob Yantosca --- .../modules/benchmark_mass_cons_table.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index 63104cda..4e5b61d9 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -417,8 +417,8 @@ def make_benchmark_mass_conservation_table( absdiff = dev_masses[t_idx] - ref_masses[t_idx] pctdiff = (absdiff / ref_masses[t_idx]) * 100.0 template = f" {time} " - template += f"{ref_masses[t_idx] : >20.13f} " - template += f"{dev_masses[t_idx] : >20.13f} " + template += f"{ref_masses[t_idx] : >20.15f} " + template += f"{dev_masses[t_idx] : >20.15f} " template += f"{absdiff : >13.4e} " template += f"{pctdiff : >8.3f}" print(template, file=ofile) @@ -432,8 +432,8 @@ def make_benchmark_mass_conservation_table( template += " " + "-"*13 + " " + "-"*8 print(template, file=ofile) template = " Maximum mass [Tg] " - template += f"{ref_stats['max_mass'] : >20.13f} " - template += f"{dev_stats['max_mass'] : >20.13f} " + template += f"{ref_stats['max_mass'] : >20.15f} " + template += f"{dev_stats['max_mass'] : >20.15f} " template += f"{diff_stats['max_mass__absdiff'] : >13.4e} " template += f"{diff_stats['max_mass__pctdiff'] : >8.3f}" print(template, file=ofile) @@ -450,21 +450,21 @@ def make_benchmark_mass_conservation_table( template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >8.3f}" print(template, file=ofile) template = " % difference " - template += f"{ref_stats['minmax_pctdiff'] : >20.13f} " - template += f"{dev_stats['minmax_pctdiff'] : >20.13f} " + template += f"{ref_stats['minmax_pctdiff'] : >20.15f} " + template += f"{dev_stats['minmax_pctdiff'] : >20.15f} " template += f"{diff_stats['minmax_pctdiff__absdiff'] : >13.4e} " template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >8.3f}" print(template, file=ofile) print("", file=ofile) template = " Start mass [Tg] " - template += f"{ref_stats['start_mass'] : >20.13f} " - template += f"{dev_stats['start_mass'] : >20.13f} " + template += f"{ref_stats['start_mass'] : >20.15f} " + template += f"{dev_stats['start_mass'] : >20.15f} " template += f"{diff_stats['start_mass__absdiff'] : >13.4e} " template += f"{diff_stats['start_mass__pctdiff'] : >8.3f}" print(template, file=ofile) template = " End mass [Tg] " - template += f"{ref_stats['end_mass'] : >20.13f} " - template += f"{dev_stats['end_mass'] : >20.13f} " + template += f"{ref_stats['end_mass'] : >20.15f} " + template += f"{dev_stats['end_mass'] : >20.15f} " template += f"{diff_stats['end_mass__absdiff'] : >13.4e} " template += f"{diff_stats['end_mass__pctdiff'] : >8.3f}" print(template, file=ofile) @@ -475,15 +475,15 @@ def make_benchmark_mass_conservation_table( template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >8.3f}" print(template, file=ofile) template = " % difference " - template += f"{ref_stats['startend_pctdiff'] : >20.13f} " - template += f"{dev_stats['startend_pctdiff'] : >20.13f} " + template += f"{ref_stats['startend_pctdiff'] : >20.15f} " + template += f"{dev_stats['startend_pctdiff'] : >20.15f} " template += f"{diff_stats['startend_pctdiff__absdiff'] : >13.4e} " template += f"{diff_stats['startend_pctdiff__pctdiff'] : >8.3f}" print(template, file=ofile) print("", file=ofile) template = " Mean mass [Tg] " - template += f"{ref_stats['mean_mass']:>20.13f} " - template += f"{dev_stats['mean_mass']:>20.13f} " + template += f"{ref_stats['mean_mass']:>20.15f} " + template += f"{dev_stats['mean_mass']:>20.15f} " template += f"{diff_stats['mean_mass__absdiff']:>13.4e} " template += f"{diff_stats['mean_mass__pctdiff']:>8.3f}" print(template, file=ofile) From a65b4efe80d3fb348703b7ad4b2862738dc58c84 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 11 Apr 2024 10:43:00 -0400 Subject: [PATCH 15/43] Add further tweaks to mass conservation table gcpy/benchmark/modules/benchmark_mass_cons_table.py - Implemented the following suggestiongs from @lizziel on PR #309: - Now use 12 decimal places for Ref & Dev columns (floating point) - Now use 4 decimal places for Abs Diff & Diff (exponential notation) - Absolute difference in grames is now reported as an integer - Lined up column labels accordingly Signed-off-by: Bob Yantosca --- .../modules/benchmark_mass_cons_table.py | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index 4e5b61d9..83f2a44f 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -405,11 +405,11 @@ def make_benchmark_mass_conservation_table( # Headers print("", file=ofile) - template = " Date & Time" + " "*18 + "Ref mass [Tg]" + " "*13 - template += "Dev mass [Tg]"+ " "*6 + "Abs Diff" + " % Diff" + template = " Date & Time" + " "*15 + "Ref mass [Tg]" + " "*8 + template += "Dev mass [Tg]"+ " "*6 + "Abs Diff % Diff" print(template, file=ofile) - template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 - template += " " + "-"*13 + " " + "-"*8 + template = " " + "-"*17 + " "*5 + "-"*17 + " "*4 + "-"*17 + template += " " + "-"*13 + " " + "-"*13 print(template, file=ofile) # Total masses @@ -417,79 +417,79 @@ def make_benchmark_mass_conservation_table( absdiff = dev_masses[t_idx] - ref_masses[t_idx] pctdiff = (absdiff / ref_masses[t_idx]) * 100.0 template = f" {time} " - template += f"{ref_masses[t_idx] : >20.15f} " - template += f"{dev_masses[t_idx] : >20.15f} " + template += f"{ref_masses[t_idx] : >17.12f} " + template += f"{dev_masses[t_idx] : >17.12f} " template += f"{absdiff : >13.4e} " - template += f"{pctdiff : >8.3f}" + template += f"{pctdiff : >13.4e}" print(template, file=ofile) print(" ", file=ofile) # Statistics - template = " Summary" + " "*32+ "Ref" + " "*23 + "Dev" - template += " "*6 + "Abs Diff" + " % Diff" + template = " Summary" + " "*29+ "Ref" + " "*18 + "Dev" + template += " "*6 + "Abs Diff % Diff" print(template, file=ofile) - template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20 - template += " " + "-"*13 + " " + "-"*8 + template = " " + "-"*17 + " "*5 + "-"*17 + " "*4 + "-"*17 + template += " " + "-"*13 + " " + "-"*13 print(template, file=ofile) template = " Maximum mass [Tg] " - template += f"{ref_stats['max_mass'] : >20.15f} " - template += f"{dev_stats['max_mass'] : >20.15f} " + template += f"{ref_stats['max_mass'] : >17.12f} " + template += f"{dev_stats['max_mass'] : >17.12f} " template += f"{diff_stats['max_mass__absdiff'] : >13.4e} " - template += f"{diff_stats['max_mass__pctdiff'] : >8.3f}" + template += f"{diff_stats['max_mass__pctdiff'] : >13.4e}" print(template, file=ofile) template = " Minimum mass [Tg] " - template += f"{ref_stats['min_mass'] : >20.13f} " - template += f"{dev_stats['min_mass'] : >20.13f} " + template += f"{ref_stats['min_mass'] : >17.12f} " + template += f"{dev_stats['min_mass'] : >17.12f} " template += f"{diff_stats['min_mass__absdiff'] : >13.4e} " - template += f"{diff_stats['min_mass__pctdiff'] : >8.3f}" + template += f"{diff_stats['min_mass__pctdiff'] : >13.4e}" print(template, file=ofile) template = " Abs diff [g] " - template += f"{ref_stats['minmax_absdiff_g'] : >20.13f} " - template += f"{dev_stats['minmax_absdiff_g'] : >20.13f} " + template += f"{np.int64(ref_stats['minmax_absdiff_g']) : >17d} " + template += f"{np.int64(dev_stats['minmax_absdiff_g']) : >17d} " template += f"{diff_stats['minmax_absdiff_g__absdiff'] : >13.4e} " - template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >8.3f}" + template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >13.4e}" print(template, file=ofile) template = " % difference " - template += f"{ref_stats['minmax_pctdiff'] : >20.15f} " - template += f"{dev_stats['minmax_pctdiff'] : >20.15f} " + template += f"{ref_stats['minmax_pctdiff'] : >17.12f} " + template += f"{dev_stats['minmax_pctdiff'] : >17.12f} " template += f"{diff_stats['minmax_pctdiff__absdiff'] : >13.4e} " - template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >8.3f}" + template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >13.4e}" print(template, file=ofile) print("", file=ofile) template = " Start mass [Tg] " - template += f"{ref_stats['start_mass'] : >20.15f} " - template += f"{dev_stats['start_mass'] : >20.15f} " + template += f"{ref_stats['start_mass'] : >17.12f} " + template += f"{dev_stats['start_mass'] : >17.12f} " template += f"{diff_stats['start_mass__absdiff'] : >13.4e} " - template += f"{diff_stats['start_mass__pctdiff'] : >8.3f}" + template += f"{diff_stats['start_mass__pctdiff'] : >13.4e}" print(template, file=ofile) template = " End mass [Tg] " - template += f"{ref_stats['end_mass'] : >20.15f} " - template += f"{dev_stats['end_mass'] : >20.15f} " + template += f"{ref_stats['end_mass'] : >17.12f} " + template += f"{dev_stats['end_mass'] : >17.12f} " template += f"{diff_stats['end_mass__absdiff'] : >13.4e} " - template += f"{diff_stats['end_mass__pctdiff'] : >8.3f}" + template += f"{diff_stats['end_mass__pctdiff'] : >13.4e}" print(template, file=ofile) template = " Abs diff [g] " - template += f"{ref_stats['startend_absdiff_g'] : >20.13f} " - template += f"{dev_stats['startend_absdiff_g'] : >20.13f} " + template += f"{np.int64(ref_stats['startend_absdiff_g']) : >17d} " + template += f"{np.int64(dev_stats['startend_absdiff_g']) : >17d} " template += f"{diff_stats['startend_absdiff_g__absdiff'] : >13.4e} " - template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >8.3f}" + template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >13.4e}" print(template, file=ofile) template = " % difference " - template += f"{ref_stats['startend_pctdiff'] : >20.15f} " - template += f"{dev_stats['startend_pctdiff'] : >20.15f} " + template += f"{ref_stats['startend_pctdiff'] : >17.12f} " + template += f"{dev_stats['startend_pctdiff'] : >17.12f} " template += f"{diff_stats['startend_pctdiff__absdiff'] : >13.4e} " - template += f"{diff_stats['startend_pctdiff__pctdiff'] : >8.3f}" + template += f"{diff_stats['startend_pctdiff__pctdiff'] : >13.4e}" print(template, file=ofile) print("", file=ofile) template = " Mean mass [Tg] " - template += f"{ref_stats['mean_mass']:>20.15f} " - template += f"{dev_stats['mean_mass']:>20.15f} " + template += f"{ref_stats['mean_mass']:>17.12f} " + template += f"{dev_stats['mean_mass']:>17.12f} " template += f"{diff_stats['mean_mass__absdiff']:>13.4e} " - template += f"{diff_stats['mean_mass__pctdiff']:>8.3f}" + template += f"{diff_stats['mean_mass__pctdiff']:>13.4e}" print(template, file=ofile) - template = " Variance [Tg] " - template += f"{ref_stats['variance']:>20.13e} " - template += f"{dev_stats['variance']:>20.13e} " + template = " Variance [Tg] " + template += f"{ref_stats['variance']:>17.12e} " + template += f"{dev_stats['variance']:>17.12e} " template += f"{diff_stats['variance__absdiff']:>13.4e} " - template += f"{diff_stats['variance__pctdiff']:>8.3f}" + template += f"{diff_stats['variance__pctdiff']:>13.4e}" print(template, file=ofile) From b889d2e4348883f9a52c888c01628c9e387b8845 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 11 Apr 2024 11:28:21 -0400 Subject: [PATCH 16/43] Add function get_datetimes_from_filenames; Use in mass cons table gcpy/benchmark/modules/benchmark_utils.py - Added function get_datetimes_from_filenames to return an np.ndarray of np.datetime64 values. This is needed because often the initial restart file may have a different internal timestamp than the starting date of the simulation. gcpy/benchmark/modules/benchmark_mass_cons_table.py - Now use get_datetimes_from_filenames to define the ref_time and dev_time variables. CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 1 + .../modules/benchmark_mass_cons_table.py | 7 +++-- gcpy/benchmark/modules/benchmark_utils.py | 28 +++++++++++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ebd93b7..3132f3d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Function `copy_file_to_dir` in `gcpy/util.py`. This is a wrapper for `shutil.copyfile`. - Script `gcpy/benchmark/modules/benchmark_mass_cons_table.py`, with code to create mass conservation tables - Expanded statistics output in benchmark mass conservation tables +- Function `get_datetimes_from_filenames` in `gcpy/benchmark/modules/benchmark_utils.py` ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index 83f2a44f..5ad0d6d3 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -10,7 +10,8 @@ from gcpy.units import convert_units from gcpy.util import dataset_reader, get_area_from_dataset, \ make_directory, read_config_file, verify_variable_type - +from gcpy.benchmark.modules.benchmark_utils import \ + get_datetimes_from_filenames # Constants SPC_NAME = "PassiveTracer" @@ -339,8 +340,8 @@ def make_benchmark_mass_conservation_table( dev_delta_prs = get_delta_pressure(dev_data) # Get datetime values - ref_time = ref_data["time"].values - dev_time = dev_data["time"].values + ref_time = get_datetimes_from_filenames(ref_files) + dev_time = get_datetimes_from_filenames(dev_files) # Throw an error if Ref & Dev have differing datetime values if not np.all(ref_time == dev_time): diff --git a/gcpy/benchmark/modules/benchmark_utils.py b/gcpy/benchmark/modules/benchmark_utils.py index 64564cd5..8cac7beb 100644 --- a/gcpy/benchmark/modules/benchmark_utils.py +++ b/gcpy/benchmark/modules/benchmark_utils.py @@ -509,3 +509,31 @@ def rename_speciesconc_to_speciesconcvv( rename_dict[var] = var.replace("SpeciesConc_", "SpeciesConcVV_") return dset.rename(rename_dict) + + +def get_datetimes_from_filenames( + files +): + """ + Returns datetimes obtained from GEOS-Chem diagnostic or + restart file names. + + Args + files : list : GEOS-CHem diagnostic/restart file names + + Returns + datetimes : np.ndarray : Array of np.datetime64 values + """ + datetimes = np.zeros( + len(files), + dtype=np.datetime64("1970-01-01T00:00") + ) + for idx, ifile in enumerate(files): + substr = os.path.basename(ifile).split("_") + date = substr[0].split(".")[-1] + time = substr[1].split("z")[0] + dt_str = date[0:4] + "-" + date[4:6] + "-" + date[6:8] + dt_str += "T" + time[0:2] + ":" + time[2:4] + datetimes[idx] = np.datetime64(dt_str) + + return datetimes From 96b89faec49621a287752f0bf0a905da355cc65d Mon Sep 17 00:00:00 2001 From: Yuanjian Zhang Date: Thu, 11 Apr 2024 11:49:28 -0500 Subject: [PATCH 17/43] Go back to resolve merge conflicts with dev branch --- gcpy/benchmark_funcs.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gcpy/benchmark_funcs.py b/gcpy/benchmark_funcs.py index e18e0390..d8bb5f7c 100644 --- a/gcpy/benchmark_funcs.py +++ b/gcpy/benchmark_funcs.py @@ -1514,7 +1514,10 @@ def createplots(filecat): result.keys())[0]]['500'] for result in results} dict_zm = {list(result.keys())[0]: result[list( result.keys())[0]]['zm'] for result in results} - + + print("stop here") + quit() + # ============================================================== # Write the list of species having significant differences, # which we need to fill out the benchmark approval forms. From 9ac023dcecf968e34cd9fcf67174f6ce38d4f863 Mon Sep 17 00:00:00 2001 From: Yuanjian Zhang Date: Thu, 11 Apr 2024 11:51:53 -0500 Subject: [PATCH 18/43] Resolve merge conflicts with dev branch --- gcpy/benchmark_funcs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcpy/benchmark_funcs.py b/gcpy/benchmark_funcs.py index d8bb5f7c..18f7ffd3 100644 --- a/gcpy/benchmark_funcs.py +++ b/gcpy/benchmark_funcs.py @@ -1517,7 +1517,7 @@ def createplots(filecat): print("stop here") quit() - + # ============================================================== # Write the list of species having significant differences, # which we need to fill out the benchmark approval forms. From d0b24060562e4d7122de6c18fe14fc50bae24432 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 22 Apr 2024 11:37:19 -0400 Subject: [PATCH 19/43] Update GitHub labels for issues/PRs that should not go stale .github/stale.yml - Updated the list of label names for to account for the recent change in GitHub label names in the GCPy repo. CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- .github/stale.yml | 21 +++++++++++++++++---- CHANGELOG.md | 3 ++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/.github/stale.yml b/.github/stale.yml index 7011576b..08067324 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -1,19 +1,32 @@ +# +# stale.yml: GitHub stalebot configuration file +# + # Number of days of inactivity before an issue becomes stale daysUntilStale: 30 + # Number of days of inactivity before a stale issue is closed daysUntilClose: 7 + # Issues with these labels will never be considered stale exemptLabels: - - never stale - - feature - - discussion + - 'category: Discussion' + - 'category: Feature Request' + - 'deferred' + - 'help needed: Open Research Problem' + - 'help needed: Request Input from Community' + - 'never stale' + - 'TODO: Documentation' + # Label to use when marking an issue as stale staleLabel: stale + # Comment to post when marking an issue as stale. Set to `false` to disable markComment: > This issue has been automatically marked as stale because it has not had recent activity. If there are no updates within 7 days it will be closed. You can add the "never stale" tag to prevent the Stale bot from closing this issue. + # Comment to post when closing a stale issue. Set to `false` to disable -closeComment: Closing due to inactivity +closeComment: Closing due to inactivity diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d6bd958..5c58bab2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Benchmark functions now call `rename_speciesconc_to_speciesconcvv` - Create radionuclide, STE flux, and mass conservation tables for Ref and Dev versions in TransportTracers benchmarks - Use new function `copy_file_to_dir` to copy the benchmark script and configuration file to the benchmark results folders - +- Updated GitHub stalebot config file `stale.yml` with new issue/PR labels that should not go stale + ### Fixed - CS inquiry functions in `gcpy/cstools.py` now work properly for `xr.Dataset` and `xr.DataArray` objects - Prevent an import error by using `seaborn-v0_8-darkgrid` in`gcpy/benchmark/modules/benchmark_models_vs_obs.py` From 748d9bdd2e6d3e775de3b4e37f430a55b32d3990 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 22 Apr 2024 13:39:54 -0400 Subject: [PATCH 20/43] Now use the "stale" GitHub action instead of StaleBot .github/no-response.yml .github/stale.yml - Removed .github/workflows/build-test-environment.yml - Removed, this action was always failing .github/workflows/stale.yml - Configuration file for GitHub "stale" action, which replaces StaleBot. Use the most recent list of issue/PR labels to never be marked stale. CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- .github/no-response.yml | 13 ------- .github/stale.yml | 32 ----------------- .github/workflows/build-test-environment.yml | 38 -------------------- .github/workflows/stale.yml | 26 ++++++++------ CHANGELOG.md | 4 ++- 5 files changed, 19 insertions(+), 94 deletions(-) delete mode 100644 .github/no-response.yml delete mode 100644 .github/stale.yml delete mode 100644 .github/workflows/build-test-environment.yml diff --git a/.github/no-response.yml b/.github/no-response.yml deleted file mode 100644 index dd2b8cfe..00000000 --- a/.github/no-response.yml +++ /dev/null @@ -1,13 +0,0 @@ -# Configuration for probot-no-response - https://github.com/probot/no-response - -# Number of days of inactivity before an Issue is closed for lack of response -daysUntilClose: 14 -# Label requiring a response -responseRequiredLabel: more information needed -# Comment to post when closing an Issue for lack of response. Set to `false` to disable -closeComment: > - This issue has been automatically closed because there has been no response - to our request for more information from the original author. With only the - information that is currently in the issue, we don't have enough information - to take action. Please reach out if you have or find the answers we need so - that we can investigate further. diff --git a/.github/stale.yml b/.github/stale.yml deleted file mode 100644 index 08067324..00000000 --- a/.github/stale.yml +++ /dev/null @@ -1,32 +0,0 @@ -# -# stale.yml: GitHub stalebot configuration file -# - -# Number of days of inactivity before an issue becomes stale -daysUntilStale: 30 - -# Number of days of inactivity before a stale issue is closed -daysUntilClose: 7 - -# Issues with these labels will never be considered stale -exemptLabels: - - 'category: Discussion' - - 'category: Feature Request' - - 'deferred' - - 'help needed: Open Research Problem' - - 'help needed: Request Input from Community' - - 'never stale' - - 'TODO: Documentation' - -# Label to use when marking an issue as stale -staleLabel: stale - -# Comment to post when marking an issue as stale. Set to `false` to disable -markComment: > - This issue has been automatically marked as stale because it has not had - recent activity. If there are no updates within 7 days it will be closed. - You can add the "never stale" tag to prevent the Stale bot from closing - this issue. - -# Comment to post when closing a stale issue. Set to `false` to disable -closeComment: Closing due to inactivity diff --git a/.github/workflows/build-test-environment.yml b/.github/workflows/build-test-environment.yml deleted file mode 100644 index b3efb372..00000000 --- a/.github/workflows/build-test-environment.yml +++ /dev/null @@ -1,38 +0,0 @@ ---- -# -# GitHub action to build the GCPy test environment with micromamba -# See: https://github.com/marketplace/actions/setup-micromamba -# -name: build-test-environment - -on: - push: - branches: [ "main", "dev" ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ "main", "dev" ] - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.9"] - steps: - - name: Checkout the GCPy repository - uses: actions/checkout@v4 - - name: Create "testing" environment - uses: mamba-org/setup-micromamba@v1 - with: - micromamba-version: 'latest' - environment-file: docs/environment_files/testing.yml - init-shell: bash - cache-environment: false - generate-run-shell: true - post-cleanup: 'all' - - name: Test if "import gcpy" works - run: python -c "import gcpy" - shell: micromamba-shell {0} - - name: Test if we can create a plot - run: python -m gcpy.examples.plotting.create_test_plot - shell: micromamba-shell {0} diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 7a13f5ea..b14b1f62 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -1,17 +1,17 @@ -# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. +# This workflow warns and then closes issues that have had no activity for a specified amount of time. # # You can adjust the behavior by modifying this file. # For more information, see: # https://github.com/actions/stale -name: Mark stale issues and pull requests +name: Mark stale issues on: schedule: - # Job will run at midnight on the 1st of each month (POSIX time syntax) - - cron: '0 0 1 * *' + - cron: '0 0 * * *' # Run every night at midnight jobs: stale: + runs-on: ubuntu-latest permissions: issues: write @@ -21,12 +21,18 @@ jobs: - uses: actions/stale@v5 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - # Stale issue settings - days-before-issue-stale: 30 - days-before-issue-close: 30 stale-issue-label: 'stale' - stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity.' - close-issue-message: "This issue was closed because it has been inactive for 30 days since being marked as stale." - # Never mark PRs as stale + exempt-issue-labels: + - 'category: Discussion' + - 'category: Feature Request' + - 'deferred' + - 'help needed: Open Research Problem' + - 'help needed: Request Input from Community' + - 'never stale' + - 'TODO: Documentation' + days-before-issue-stale: 30 + days-before-issue-close: 7 + stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. If there are no updates within 7 days it will be closed. You can add the "never stale" tag to prevent the issue from closing this issue.' + close-issue-message: 'Closing due to inactivity' days-before-pr-stale: -1 days-before-pr-close: -1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c58bab2..ab8370d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added fixed level budget diagnostic to budget operations table - Function `rename_speciesconc_to_speciesconcvv` in `gcpy/benchmark/modules/benchmark_utils.py` - Function `copy_file_to_dir` in `gcpy/util.py`. This is a wrapper for `shutil.copyfile`. - +- GitHub Action config file `.github/workflows/stale.yml`, which replaces StaleBot + ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) - Bump pypdf from 3.16.1 to 3.17.0 (dependabot suggested this) @@ -59,6 +60,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Environment file `docs/environment_files/environment.yml` - Environment file `docs/environment_files/requirements.txt` - Removed `awscli` from the GCPy environment; version 2 is no longer available on conda-forge or PyPi +- GitHub config files `.github/stale.yml` and `.github/no-response.yml` ## [1.4.2] - 2024-01-26 ### Added From c98cb8c26bb404bcffcf15f31600de88caccb6b8 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 22 Apr 2024 16:22:42 -0400 Subject: [PATCH 21/43] Replace whitespace with underscores in file names & version strings gcpy/util.py - Add utility function "replace_whitespace" to replace whitespace characters with another character (underscore is default) gcpy/benchmark/modules/benchmark_mass_cons_table.py - Call replace_whitespace from util.py to replace spaces in version labels and the output file name. CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 1 + .../modules/benchmark_mass_cons_table.py | 15 +++++++++---- gcpy/util.py | 21 +++++++++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3132f3d8..99c88b9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Script `gcpy/benchmark/modules/benchmark_mass_cons_table.py`, with code to create mass conservation tables - Expanded statistics output in benchmark mass conservation tables - Function `get_datetimes_from_filenames` in `gcpy/benchmark/modules/benchmark_utils.py` +- Function `replace_whitespace` in `gcpy/util.py` ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index 5ad0d6d3..190029f1 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -8,7 +8,8 @@ import xarray as xr from gcpy.constants import skip_these_vars from gcpy.units import convert_units -from gcpy.util import dataset_reader, get_area_from_dataset, \ +from gcpy.util import \ + replace_whitespace, dataset_reader, get_area_from_dataset, \ make_directory, read_config_file, verify_variable_type from gcpy.benchmark.modules.benchmark_utils import \ get_datetimes_from_filenames @@ -319,6 +320,10 @@ def make_benchmark_mass_conservation_table( # Get a list of properties for the given species metadata = get_passive_tracer_metadata(spcdb_dir) + # Replace whitespace with underscores in version labels + ref_label = replace_whitespace(ref_label) + dev_label = replace_whitespace(dev_label) + # Preserve xarray attributes with xr.set_options(keep_attrs=True): @@ -390,9 +395,11 @@ def make_benchmark_mass_conservation_table( diff_stats = compute_diff_statistics(ref_stats, dev_stats) # Create file - outfilename = os.path.join( - dst, - f"Passive_mass.{ref_label}_vs_{dev_label}.txt" + outfilename = replace_whitespace( + os.path.join( + dst, + f"Passive_mass.{ref_label}_vs_{dev_label}.txt" + ) ) with open(outfilename, 'w', encoding="utf-8") as ofile: diff --git a/gcpy/util.py b/gcpy/util.py index 6a5b347b..e50d4874 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -2235,3 +2235,24 @@ def copy_file_to_dir( ofile = os.path.join(dest, os.path.basename(ifile)) if not os.path.exists(ofile): copyfile(ifile, ofile) + + +def replace_whitespace( + string, + repl_char="_" +): + """ + Replaces whitespace in a string with underscores. + Useful for removing spaces in filename strings. + + Args + string : str : The input string + repl_char : str : Replacement character (default is "_") + + Returns + string : str : String with whitespace replaced + """ + verify_variable_type(string, str) + verify_variable_type(repl_char, str) + + return repl_char.join(string.split()) From df1e984b1dc72b9cefc5ec903dbd5bbaf9d2d4b3 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 23 Apr 2024 10:25:09 -0400 Subject: [PATCH 22/43] Bug fix: Tell dask to allow large chunk sizes (needed for c180+) gcpy/benchmark/modules/benchmark_mass_cons_table.py - Import the dask.config class as "dask_config" - Add a call to dask_config in the same "with" blocks where we tell xarray to keep all variable/global attributes Signed-off-by: Bob Yantosca --- gcpy/benchmark/modules/benchmark_mass_cons_table.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index 190029f1..bd680c78 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -5,6 +5,7 @@ import os import warnings import numpy as np +from dask import config as dask_config import xarray as xr from gcpy.constants import skip_these_vars from gcpy.units import convert_units @@ -140,7 +141,10 @@ def compute_total_mass( Returns total_mass : np.float64 : Total mass [Tg] of species. """ - with xr.set_options(keep_attrs=True): + # Keep xarray attributes and allow large chunks in Dask slicing + with xr.set_options(keep_attrs=True) and dask_config.set({ + "array.slicing.split_large_chunks": False + }): # Local variables units = TARGET_UNITS @@ -325,8 +329,10 @@ def make_benchmark_mass_conservation_table( dev_label = replace_whitespace(dev_label) # Preserve xarray attributes - with xr.set_options(keep_attrs=True): - + with xr.set_options(keep_attrs=True) and dask_config.set({ + "array.slicing.split_large_chunks": False + }): + # ============================================================== # Read data and make sure time dimensions are consistent # ============================================================== From af8ef64b276d9a0eb1e6b68e8e0f27555a07d99a Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 23 Apr 2024 11:38:59 -0400 Subject: [PATCH 23/43] benchmark_mass_cons_table.py now loops over individual files gcpy/benchmark/modules/benchmark_mass_cons_table.py - Refactored code so that we read one file at a time in order to avoid memory issues when reading large files (e.g. c180 resolution). - Delete objects at the end of the loop over times to force garbage collection. - Remove references to dask_config, it's not needed. Signed-off-by: Bob Yantosca --- .../modules/benchmark_mass_cons_table.py | 68 +++++++++++-------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py index bd680c78..d9e340e3 100644 --- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py +++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py @@ -5,7 +5,6 @@ import os import warnings import numpy as np -from dask import config as dask_config import xarray as xr from gcpy.constants import skip_these_vars from gcpy.units import convert_units @@ -122,7 +121,6 @@ def get_passive_tracer_varname( def compute_total_mass( - t_idx, dset, area, delta_p, @@ -142,9 +140,7 @@ def compute_total_mass( total_mass : np.float64 : Total mass [Tg] of species. """ # Keep xarray attributes and allow large chunks in Dask slicing - with xr.set_options(keep_attrs=True) and dask_config.set({ - "array.slicing.split_large_chunks": False - }): + with xr.set_options(keep_attrs=True): # Local variables units = TARGET_UNITS @@ -156,12 +152,12 @@ def compute_total_mass( # Compute mass in Tg darr = convert_units( - dset[varname].astype(np.float64).isel(time=t_idx), + dset[varname].astype(np.float64), varname, metadata, units, area_m2=area, - delta_p=delta_p.isel(time=t_idx), + delta_p=delta_p, ) return np.sum(darr) @@ -327,28 +323,20 @@ def make_benchmark_mass_conservation_table( # Replace whitespace with underscores in version labels ref_label = replace_whitespace(ref_label) dev_label = replace_whitespace(dev_label) - + # Preserve xarray attributes - with xr.set_options(keep_attrs=True) and dask_config.set({ - "array.slicing.split_large_chunks": False - }): - + with xr.set_options(keep_attrs=True): + # ============================================================== - # Read data and make sure time dimensions are consistent + # Make sure Ref and Dev have consistent time dimensions # ============================================================== with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=xr.SerializationWarning) - # Pick the proper function to read the data - reader = dataset_reader(multi_files=True, verbose=False) - - # Get data - ref_data = reader(ref_files, drop_variables=skip_these_vars).load() - dev_data = reader(dev_files, drop_variables=skip_these_vars).load() - ref_area = get_area(ref_areapath, ref_data) - dev_area = get_area(dev_areapath, dev_data) - ref_delta_prs = get_delta_pressure(ref_data) - dev_delta_prs = get_delta_pressure(dev_data) + # Make sure Ref & Dev have the same number of elements + if len(ref_files) != len(dev_files): + msg = "Ref and Dev have different time dimensions!" + raise ValueError(msg) # Get datetime values ref_time = get_datetimes_from_filenames(ref_files) @@ -366,31 +354,55 @@ def make_benchmark_mass_conservation_table( # List for holding the datetimes display_dates = [] - # ================================================================== - # Calculate global mass for the tracer at all restart dates - # ================================================================== + # Pick the proper function to read the data + reader = dataset_reader(multi_files=False, verbose=False) + + # ============================================================== + # Read data and make sure time dimensions are consistent + # Loop over files individually to avoid memory issues + # ============================================================== for t_idx, time in enumerate(dev_time): + # Get data + ref_data = reader( + ref_files[t_idx], + drop_variables=skip_these_vars + ).load() + dev_data = reader( + dev_files[t_idx], + drop_variables=skip_these_vars + ).load() + ref_area = get_area(ref_areapath, ref_data) + dev_area = get_area(dev_areapath, dev_data) + ref_delta_prs = get_delta_pressure(ref_data) + dev_delta_prs = get_delta_pressure(dev_data) + # Save datetime string into display_dates list time = str(np.datetime_as_string(time, unit="m")) display_dates.append(time.replace("T", " ")) # Compute total masses [Tg] for Ref & Dev ref_masses[t_idx] = compute_total_mass( - t_idx, ref_data, ref_area, ref_delta_prs, metadata, ) dev_masses[t_idx] = compute_total_mass( - t_idx, dev_data, dev_area, dev_delta_prs, metadata, ) + # Free memory in large objects + del ref_data + del dev_data + del ref_area + del dev_area + del ref_delta_prs + del dev_delta_prs + # ================================================================== # Print masses and statistics to file # ================================================================== From 9377feb6a7a637770dd825e8f15056dafee1da46 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 26 Apr 2024 10:55:04 -0400 Subject: [PATCH 24/43] PR #311 post-merge fix: Test if lon_bnds, lat_bnds exist before dropping gcpy/file_regrid.py - Add if statements to test if lat_bnds and lon_bnds are in the data variables of the dataset before trying to drop them from the dataset. CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca Now allow make_regridder_L2L to use nearest_s2d regridding gcpy/regrid.py - --- CHANGELOG.md | 1 + gcpy/file_regrid.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab8370d5..e385b6a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added missing `n_cores` to `gcpy/examples/diagnostics/compare_diags.yml` - Added missing `plot_drydep` option to `gcpy/gcpy/benchmark/config/1yr_ch4_benchmark.yml` - Add `docs/requirements.txt` symbolic link to `docs/environment_files/read_the_docs_requirements.txt` for RTD builds +- `gcpy/file_regrid.py` now tests if `lon_bnds`, `lat_bnds` are in the dataset before trying to drop them ### Removed - Example script `gcpy/examples/plotting/mda8_o3_timeseries.py` diff --git a/gcpy/file_regrid.py b/gcpy/file_regrid.py index 5d4dbb23..e906285e 100644 --- a/gcpy/file_regrid.py +++ b/gcpy/file_regrid.py @@ -745,7 +745,10 @@ def regrid_ll_to_ll( if "lat" not in dset[var].dims \ and "lon" not in dset[var].dims ] - dset = dset.drop(["lat_bnds", "lon_bnds"]) + if "lat_bnds" in dset.data_vars: + dset = dset.drop(["lat_bnds"]) + if "lon_bnds" in dset.data_vars: + dset = dset.drop(["lon_bnds"]) non_fields = dset[non_fields] dset = dset.drop(non_fields) @@ -756,6 +759,12 @@ def regrid_ll_to_ll( dim_format_out="classic" ) + # Decide if we are regridding a data file or a mask + # by testing for the variable name "MASK" + method = "conservative" + if "MASK" in dset.data_vars: + method = "nearest_s2d" + # Create the regridder and regrid the data regridder = make_regridder_L2L( ll_res_in, @@ -763,7 +772,8 @@ def regrid_ll_to_ll( reuse_weights=True, in_extent=in_extent, out_extent=out_extent, - weightsdir=weightsdir + weightsdir=weightsdir, + method=method, ) dset = regridder( dset, From 29a06c85f6ef5265fa0e732691f644d3ec51a394 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 30 Apr 2024 11:35:06 -0400 Subject: [PATCH 25/43] Add example script "make_mask_file.py" gcpy/examples/working_with_files/make_mask_file.py - Example script to create a country mask from a netCDF file containing country IDs (HEMCO/MASKS/v2014-07/countrymask_0.1x0.1.nc). CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 1 + .../working_with_files/make_mask_file.py | 123 ++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100755 gcpy/examples/working_with_files/make_mask_file.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e385b6a7..b680347b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Function `rename_speciesconc_to_speciesconcvv` in `gcpy/benchmark/modules/benchmark_utils.py` - Function `copy_file_to_dir` in `gcpy/util.py`. This is a wrapper for `shutil.copyfile`. - GitHub Action config file `.github/workflows/stale.yml`, which replaces StaleBot +- Example script `gcpy/examples/working_with_files/make_mask_file.py` ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) diff --git a/gcpy/examples/working_with_files/make_mask_file.py b/gcpy/examples/working_with_files/make_mask_file.py new file mode 100755 index 00000000..e617298b --- /dev/null +++ b/gcpy/examples/working_with_files/make_mask_file.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +""" +Create a mask file (for emissions) from a netCDF file of country IDs: +Download this file before using: +https://gcgrid.s3.amazonaws.com/HEMCO/MASKS/v2014-07/countrymask_0.1x0.1.nc + +Usage: +------ +./make_mask_file.py [-i filein] -o fileout -c country_id -m true|false + +where + +-i filein : File of country IDs (download from link above). + Default value: "countrymask_0.1x0.1.nc" + +-o fileout : Output file for the mask + +-c country_id : ID of your desired country. + Use a netcdf file viewer to determine this value. + +-m true|false : Create a mirrored (i.e. inverted) mask. + Default value: False/ + +Examples: +--------- + +# Create a mask for Canada +./make_mask_file.py -o Canada_Mask.01x01.nc -c 124 + +# Create a mirrored mask for Mexico +./make_mask_file.py -o Mexico_Mask_Mirror.01x01.nc -c 484 -m true + +""" +import argparse +import numpy as np +import xarray as xr + +def make_mask( + filein, + country_id, + fileout, + mirror=False, +): + """ + Creates a netCDF mask file for a given country. + + Args + filein : str : File with country ID values + country_id : int : ID of the country that you want masked + fileout : str : Output mask file + mirror : bool : Return a mirrored (i.e. inverted) mask? + """ + with xr.set_options(keep_attrs=True): + + # Define zero and one values (for normal + mirror masks) + one = np.float32(1) + zero = np.float32(0) + if mirror: + one = np.float32(0) + zero = np.float32(1) + + # Open file and rename mask variable to MASK + dset = xr.open_dataset(filein) + dset = dset.rename({"CountryID": "MASK"}) + + # Mask out the country + array = np.where( + dset["MASK"].values == country_id, + one, + zero + ) + + # Cast to float to avoid issues w/ GCHP input + dset["MASK"].values = array + dset["MASK"] = dset["MASK"].astype(np.float32) + + # Write to disk + dset.to_netcdf(fileout) + + +if __name__ == '__main__': + + # Tell parser which arguments to expect + parser = argparse.ArgumentParser( + description="General cubed-sphere to cubed-sphere regridder." + ) + parser.add_argument( + "-i", "--filein", + metavar="FILEIN", + type=str, + required=False, + default="countrymask_0.1x0.1.nc", + help="netCDF file with country IDs" + ) + parser.add_argument( + "-o", "--fileout", + metavar="FILEOUT", + type=str, + required=True, + help="name of output file" + ) + parser.add_argument( + "-c", "--country-id", + metavar="COUNTRY-ID", + required=True, + type=int, + help="Country ID value to match in input file", + ) + parser.add_argument( + "-m", "--mirror", + metavar="MIRROR", + type=bool, + required=False, + default=False, + help="Create mirrored (reversed) mask" + ) + args = parser.parse_args() + make_mask( + args.filein, + args.country_id, + args.fileout, + args.mirror, + ) From db38f265a2af4bf5080d741bf75fc7128658e93b Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 30 Apr 2024 18:29:00 -0400 Subject: [PATCH 26/43] Initial commit: Add script to scrape GEOS-Chem timers gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py - Added this script to scrape benchmark timers into a table with to display timing info from Ref & Dev models. NOTE: More work will be needed to refine this in subsequent commits. Signed-off-by: Bob Yantosca --- .../benchmark_scrape_gcclassic_timers.py | 167 ++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100755 gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py new file mode 100755 index 00000000..a264a50c --- /dev/null +++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +""" +""" +import os +from gcpy.util import verify_variable_type +import json + + +def read_gcclassic(ifile): + """ + Determines if the input is a valid JSON. + + Args + ifile : str : file name + + Returns + result : dict : Dictionary with timing information + """ + + # Make sure file exists + if not os.path.exists(ifile): + raise FileNotFoundError(f"Could not find {ifile}!") + + # First try to read the file as a JSON, + # then try to read the file as text. + try: + result = read_gcclassic_json(ifile) + except ValueError as err: + result = read_gcclassic_log(ifile) + return result + + +def read_gcclassic_json( + ifile +): + """ + Parses the GEOS-Chem Classic timing information in JSON format + and returns a dictionary with the results. + + Args + ifile : str : File name + + Returns + result : dict : Dictionary with timing information + """ + try: + with open(ifile, encoding="utf-8") as json_file: + result = json.load(json_file) + return result["GEOS-Chem Classic timers"] + except ValueError as err: + raise ValueError from err + + +def read_gcclassic_log(ifile): + """ + Parses the GEOS-Chem Classic log file with timing information + and returns a dictionary with the results. + + Args + ifile : str : File name + + Returns + result : dict : Dictionary with timing information + """ + keep_line = False + timers = {} + + # Read the line backwards and get just keep the timing information + with open(ifile, encoding="utf-8") as log_file: + + for line in reversed(list(log_file)): + line = line.strip("\n") + + # Set a flag to denote the start & end of timing info + if "Unit conversions" in line: + keep_line = True + if "----------" in line: + keep_line = False + break + + # Append timing info lines into a list + if keep_line: + substr = line.split(":") + key = substr[0].strip() + val = substr[3].split()[1].strip() + timers[key] = {"seconds": val} + + return timers + + +def print_timer(key, ref, dev, ofile): + """ + Prints timing info for a single timer to a log file. + """ + line = f"{key:<25} {ref[key]['seconds']:>20} {dev[key]['seconds']:>20}" + print(line, file=ofile) + + +def display_timers(ref, ref_label, dev, dev_label, table_file): + """ + Prints the GEOS-Chem timer information to a table. + + Args + ref : dict : Timer output from the "Ref" model + ref : dict : Timer output from the "Dev" model + """ + with open(table_file, "w", encoding="utf-8") as ofile: + + # Print header + print(f"{'Timer':<25} {ref_label:>20} {dev_label:>20}", file=ofile) + print(f"{'-'*25:<25} {'-'*20:>20} {'-'*20:>20}", file=ofile) + + # Print timers + print_timer("GEOS-Chem", ref, dev, ofile) + print_timer("HEMCO", ref, dev, ofile) + print_timer("All chemistry", ref, dev, ofile) + print_timer("=> Gas-phase chem", ref, dev, ofile) + print_timer("=> Photolysis", ref, dev, ofile) + print_timer("=> Aerosol chem", ref, dev, ofile) + print_timer("=> Linearized chem", ref, dev, ofile) + print_timer("Transport", ref, dev, ofile) + print_timer("Convection", ref, dev, ofile) + print_timer("Boundary layer mixing", ref, dev, ofile) + print_timer("Dry deposition", ref, dev, ofile) + print_timer("Wet deposition", ref, dev, ofile) + print_timer("Diagnostics", ref, dev, ofile) + print_timer("Unit conversions", ref, dev, ofile) + + +def make_benchmark_timing_table( + ref_file, + ref_label, + dev_file, + dev_label, + dst, +): + """ + """ + verify_variable_type(ref_file, (str, list)) + verify_variable_type(ref_label, str) + verify_variable_type(dev_file, (str, list)) + verify_variable_type(dev_label, str) + verify_variable_type(dst, str) + + # Strip timing info from JSON or log ifle + ref_timers = read_gcclassic(ref_file) + dev_timers = read_gcclassic(dev_file) + + # Write timing info to a table + display_timers( + ref_timers, + ref_label, + dev_timers, + dev_label, + "sample_output.txt", + ) + + +if __name__ == '__main__': + make_benchmark_timing_table( + "./gcclassic_timers.json", + "GCC 14.4.0", + "./execute.gc_4x5_merra2_fullchem_benchmark.log", + "GCHP 14.4.0", + "./" + ) +# "./execute.gchp_merra2_fullchem_benchmark.log", From 5ce9fc3e0da0f019f8744435df6a1bf7c690b319 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 2 May 2024 16:04:08 -0400 Subject: [PATCH 27/43] Get updates for HEMCO formatting from @hannahnesser + additional fixes gcpy/community/format_hemco_data.py - Script to fix netCDF attributes, by @hannahnesser. - NOTE: Stored in the gcpy/community folder, to denote scripts that are submitted by members of the GEOS-Chem & GCPy user community. gcpy/community/__init__.py - Added this import script for the gcpy/community folder gcpy/__init__.py - Updated accordingly now that format_hemco_data has been moved to the gcpy/community folder gcpy/examples/README.txt - Removed gcpy/examples/README.md - Added this README file for the examples folder in MarkDown format gcpy/hemco/.gitignore - Added this to ignore *.nc* files in this folder gcpy/hemco/format_hemco_demo.py - Moved here from gcpy/format_hemco_demo.py gcpy/hemco/make_mask_file.py - Moved here from gcpy/hemco/working_with_files/make_mask_file.py CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 6 +- gcpy/__init__.py | 2 +- gcpy/community/__init__.py | 5 + gcpy/{ => community}/format_hemco_data.py | 134 +++++++++++----- gcpy/examples/README.md | 126 +++++++++++++++ gcpy/examples/README.txt | 9 -- gcpy/examples/hemco/.gitignore | 1 + gcpy/examples/hemco/__init__.py | 5 + gcpy/examples/hemco/format_hemco_demo.py | 144 ++++++++++++++++++ .../make_mask_file.py | 0 10 files changed, 377 insertions(+), 55 deletions(-) create mode 100644 gcpy/community/__init__.py rename gcpy/{ => community}/format_hemco_data.py (78%) create mode 100644 gcpy/examples/README.md delete mode 100644 gcpy/examples/README.txt create mode 100644 gcpy/examples/hemco/.gitignore create mode 100644 gcpy/examples/hemco/__init__.py create mode 100755 gcpy/examples/hemco/format_hemco_demo.py rename gcpy/examples/{working_with_files => hemco}/make_mask_file.py (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index b91f4fed..552f0429 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,8 +13,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Badges in `docs/source/index.rst` - GitHub action to push GCPy releases to PyPi - Script `./release/changeVersionNumbers.sh`, used to update version numbers in various files before release -- Added `gcpy/format_hemco_data.py` from @hannahnesser - - Mamba/Conda enviroment file `docs/environment_files/read_the_docs_environment.yml`, for building ReadTheDocs documentation - Environment files `docs/environment_files/gcpy_requirements.txt` and `docs/environment_files/read_the_docs_requirements.txt` - New benchmark script `gcpy/benchmark/modules/benchmark_models_vs_sondes.py` @@ -22,7 +20,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Function `rename_speciesconc_to_speciesconcvv` in `gcpy/benchmark/modules/benchmark_utils.py` - Function `copy_file_to_dir` in `gcpy/util.py`. This is a wrapper for `shutil.copyfile`. - GitHub Action config file `.github/workflows/stale.yml`, which replaces StaleBot -- Example script `gcpy/examples/working_with_files/make_mask_file.py` +- Example script `gcpy/examples/hemco/make_mask_file.py` +- Added `gcpy/community/format_hemco_data.py` from @hannahnesser +- Added `gcpy/examples/hemco/format_hemco_demo.py` from @hannahnesser ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) diff --git a/gcpy/__init__.py b/gcpy/__init__.py index 2b816661..10be51e5 100644 --- a/gcpy/__init__.py +++ b/gcpy/__init__.py @@ -3,6 +3,7 @@ """ from .benchmark import * +from .community import * from .examples import * from .append_grid_corners import * @@ -10,7 +11,6 @@ from .cstools import * from .date_time import * from .file_regrid import * -from .format_hemco_data import * from .grid import * from .grid_stretching_transforms import * from .plot import * diff --git a/gcpy/community/__init__.py b/gcpy/community/__init__.py new file mode 100644 index 00000000..65996793 --- /dev/null +++ b/gcpy/community/__init__.py @@ -0,0 +1,5 @@ +""" +GCPy import script +""" + +from .format_hemco_data import * diff --git a/gcpy/format_hemco_data.py b/gcpy/community/format_hemco_data.py similarity index 78% rename from gcpy/format_hemco_data.py rename to gcpy/community/format_hemco_data.py index 297db837..de818d8f 100644 --- a/gcpy/format_hemco_data.py +++ b/gcpy/community/format_hemco_data.py @@ -4,6 +4,7 @@ """ from os.path import join from copy import deepcopy as dc +import warnings import xarray as xr import numpy as np import pandas as pd @@ -21,12 +22,10 @@ def format_hemco_dimensions( """ Formats time, lat, lon, and lev (optionally) attributes for coards compliance (HEMCO compatibility). - Args: dset: xarray Dataset Dataset containing at least latitude and longitude variables, which must be named lat and lon, respectively. - Keyword Args (optional): start_time: string of the format "YYYY-MM-DD HH:mm:ss" String containing the start time of the dataset for @@ -54,7 +53,6 @@ def format_hemco_dimensions( GCHP (True) or GEOS-Chem Classic (False). This is primarily used to set the lev attributes. The default value is False. - Returns: dset: xarray Dataset An updated version of dset with encoding and attributes @@ -74,13 +72,16 @@ def format_hemco_dimensions( dset = _format_time(dset, start_time) # If level is included in the dimensions, set its attributes - if "lev" in dset.coordset: + if "lev" in dset.coords or "level" in dset.coords: # Note: this is relatively untested (2023/08/21 HON) dset = _format_lev(dset, lev_long_name, lev_units, - lev_formula_terms, gchp) + lev_formula_terms, gchp) # Require data order to be time, lat, lon (optionally lev) - dset = dset.transpose("time", "lat", "lon", ...) + if "lev" in dset.coords: + dset = dset.transpose("time", "lev", "lat", "lon", ...) + else: + dset = dset.transpose("time", "lat", "lon", ...) # Return the dataset return dset @@ -93,13 +94,11 @@ def _update_variable_attributes( """ Adds COARDS conforming variable attributes and/or replaces existing variable attributes with COARDS-conforming values. - Args: var_attrs : dict Dictionary of variable attributes. coards_attrs : dict Dictionary of COARDS-conforming variable attributes. - Returns var_attrs : dict Modified dictionary of variable attributes @@ -123,6 +122,10 @@ def _update_variable_attributes( # but do not clobber any other existing variable attrs. for (name, value) in coards_attrs.items(): if found[name]: + if var_attrs[name] != value: + print(f"Updating attribute value for {name}:") + print(f" Original value : {var_attrs[name]}") + print(f" New value: : {value}") var_attrs.update({name: value}) else: var_attrs[name] = value @@ -194,9 +197,10 @@ def _format_time( Formats the time dimension for COARDS compliance. See define_HEMCO_dimensions for argument listings. ''' - if "time" not in dset.coordset: - # If time isn't already in the coordset, create a dummy variable - dset = dset.assign_coordset(time=pd.to_datetime(start_time)) + if "time" not in dset.coords: + # If time isn't already in the coords, create a dummy variable + print(f"Assigning time coordinate from input start_time {start_time}.") + dset = dset.assign_coords(time=pd.to_datetime(start_time)) dset = dset.expand_dims("time") else: # Otherwise, update start_time to match the first time in the file, @@ -239,28 +243,46 @@ def _format_lev( See define_HEMCO_dimensions for argument listings. ''' ## HON 2023/08/22: This is relatively untested - # If there a dimension called level, rename it if "level" in dset.dims.keys(): dset = dset.rename_dims({"level" : "lev"}) - # If formula is provided, check that the components of the - # formula are included. + # Check whether both lev_formula_terms and lev["formula_terms"] + # are present--if so, raise an error. + if ((lev_formula_terms is not None) + and ("formula_terms" in dset["lev"].attrs)): + warnings.warn( + "Both lev_formula_terms and lev['formula_terms'] are provided." + " The provided lev_formula_term is being used." + ) + elif ((lev_formula_terms is None) + and ("formula_terms" not in dset["lev"].attrs)): + warnings.warn( + "Neither lev_formula_terms nor lev['formula_terms] are provided." + " Skipping lev_formula_terms formatting." + ) + elif ("formula_terms" in dset["lev"].attrs): + lev_formula_terms = dset["lev"].attrs["formula_terms"] + + # If lev_formula_terms is now defined: if lev_formula_terms is not None: - terms = lev_formula_terms.split(": ") + terms = lev_formula_terms.split(" ") terms = [term for i, term in enumerate(terms) if i % 2 == 1] + failed_terms = [] for term in terms: if term not in dset.data_vars.keys(): - raise ValueError( - f"{term} is in lev_formula_terms and could \ - not be found." - ) + failed_terms.append(term) + if len(failed_terms) > 0: + warnings.warn( + f"The following values are in lev_formula_terms and could" + f" not be found: {failed_terms}" + ) # If unit is level, require that the levels are integers - if lev_units == "level" and \ - (dset["lev"] != dset["lev"].astype(int)).any(): - raise ValueError("lev has units of level but dimension values \ - are not integers.") + if lev_units not in ["level", "eta_level", "sigma_level"]: + raise ValueError( + f"lev has units of {lev_units}. Please set it to one " + "of level, eta_level, or sigma_level.") # Set attributes ## Set positive to match the GCHP/GEOS-Chem conventions @@ -296,7 +318,6 @@ def _check_required_dim( Checks required dimensions (time, latitude, and longitude) for COARDS compliance (that the dimension exists and is monotonically increasing). - Args: dset: xarray Dataset dim: string ("time", "lat", or "lon") @@ -316,17 +337,42 @@ def _check_required_dim( return dset +def check_hemco_variables( + dset +): + verify_variable_type(dset, xr.Dataset) + + # Iterate through the dataset variables and check that each one + # has the required units and long_name attributes. + print("Checking dataset variables for HEMCO compliance.") + required_attrs = ["units", "long_name"] + missing = False + for (name, _) in dset.items(): + attr_names = [name for (name, _) in dset[name].attrs.items()] + missing_attrs = [name for name in required_attrs + if name not in attr_names] + if len(missing_attrs) > 0: + missing = True + print(f" {name} missing {missing_attrs}") + + if missing: + raise ValueError( + "Required units missing from dataset variables." + ) + else: + print("Dataset variables are HEMCO compliant.") + + def format_hemco_variable( dset, var, - long_name, - units, + long_name=None, + units=None, **kwargs ): """ Formats attributes for non-standard variables for COARDS compliance (HEMCO compatibility). - Args: dset: xarray Dataset Dataset containing HEMCO input data. @@ -341,7 +387,6 @@ def format_hemco_variable( for more information. **kwargs : dict Any other attributes wanted for the variable. - Returns: dset: xarray Dataset An updated version of dset with variable attributes @@ -349,23 +394,30 @@ def format_hemco_variable( """ verify_variable_type(dset, xr.Dataset) verify_variable_type(var, str) - verify_variable_type(long_name, str) - verify_variable_type(units, str) - # Add extra attributes if passed via **kwargs - if len(kwargs) != 0: - for (_, att_dict) in kwargs.items(): - dset[var].attrs.update(att_dict) + # Check required variables + coards_attrs = {"long_name" : long_name, + "units" : units} + for name, value in coards_attrs.items(): + if value is not None: + verify_variable_type(value, str) + elif name in dset[var].attrs: + coards_attrs[name] = dset[var].attrs[name] + else: + raise ValueError(f"{name} is not defined for {var}") # Update variable attributes to be COARDS-conforming # without clobbering any pre-existing attributes dset[var].attrs = _update_variable_attributes( dset[var].attrs, - coards_attrs={ - "long_name" : long_name, - "units" : units - } + coards_attrs=coards_attrs ) + + # Add extra attributes if passed via **kwargs + if len(kwargs) != 0: + for (_, att_dict) in kwargs.items(): + dset[var].attrs.update(att_dict) + return dset @@ -378,7 +430,6 @@ def save_hemco_netcdf( ): """ Saves COARDS compliant (HEMCO compatible) netcdf. - Args: dset: xarray Dataset Dataset containing HEMCO input data. @@ -386,7 +437,6 @@ def save_hemco_netcdf( The directory where the data will be saved. save_name: string The name the file will be named under. - Keyword Args (optional): dtype: data type The data type the data will be saved as. Default is @@ -399,7 +449,7 @@ def save_hemco_netcdf( verify_variable_type(save_dir, str) verify_variable_type(save_name, str) - # Check that the save_name endset in .nc + # Check that the save_name ends in .nc if save_name.split(".")[-1][:2] != "nc": save_name = f"{save_name}.nc" @@ -410,7 +460,7 @@ def save_hemco_netcdf( # Set default encoding and dtype for all variables and coordinates encoding = {"_FillValue" : None, "dtype" : dtype} var = {k : dc(encoding) for k in dset.keys()} - coord = {k : dc(encoding) for k in dset.coordset} + coord = {k : dc(encoding) for k in dset.coords} # Manually update the time encoding, which is often overwritten # by xarray defaults diff --git a/gcpy/examples/README.md b/gcpy/examples/README.md new file mode 100644 index 00000000..4cefedf7 --- /dev/null +++ b/gcpy/examples/README.md @@ -0,0 +1,126 @@ +# GCPy example scripts + +This directory contains several subdirectories with example scripts that demonstrate the capabilities of GCPy. + +## bpch_to_nc + +NOTE: The binary punch ("bpch") data format has been retired from GEOS-Chem. We keep these scripts here for those who work with the GEOS-Chem Adjoint code, which still uses bpch format. + +`bpch2nc.py` + +- Script to convert GEOS-Chem binary punch (aka "bpch") data to netCDF. + +`bpch_tagco_prodloss_to_nc.py` + +- Converts the prod/loss data files in bpch format for the tagged CO simulation to netCDF format. + + +## diagnostics + +`compare_diags.py` + +- Script to compare the contents of files from two different model versions: A reference version (aka "Ref") and a development version (aka "Dev"). + +`compare_diags.yml` + +- Configuration file for use with `compare_diags.py` + +## dry_run + +`download_data.py` + +- Downloads data from a GEOS-Chem Classic "dry-run" simulation. + +`download_data.yml` + +- Configuration file for `download_data.py`. + + +## hemco + +`format_hemco_demo.py` +# GCPy example scripts + +This directory contains several subdirectories with example scripts that demonstrate the capabilities of GCPy. + +## bpch_to_nc + +NOTE: The binary punch ("bpch") data format has been retired from GEOS-Chem. We keep these scripts here for those who work with the GEOS-Chem Adjoint code, which still uses bpch format. + +`bpch2nc.py` + +- Script to convert GEOS-Chem binary punch (aka "bpch") data to netCDF. + +`bpch_tagco_prodloss_to_nc.py` + +- Converts the prod/loss data files in bpch format for the tagged CO simulation to netCDF format. + + +## diagnostics + +`compare_diags.py` + +- Script to compare the contents of files from two different model versions: A reference version (aka "Ref") and a development version (aka "Dev"). + +`compare_diags.yml` + +- Configuration file for use with `compare_diags.py` + +## dry_run + +`download_data.py` + +- Downloads data from a GEOS-Chem Classic "dry-run" simulation. + +`download_data.yml` + +- Configuration file for `download_data.py`. + + +## hemco + +`format_hemco_demo.py` + +- Demonstrates how to fix a non-COARDS-compliant file (needed for HEMCO) using the `gcpy/community/format_hemco_data.py` module from Hannah Nesser (@hannahnesser). + +`make_mask_file.py` + +- Creates mask files for HEMCO emissions for a given country. + + +## plotting + +`create_test_plot.py` + +- Script to create a test pattern plot. Useful for testing if the Python environment has been installed properly. + +`plot_comparisons.py` + +- Plots data from two different models side-by-side for comparison purposes, in a "six-panel" plot layout. + +`plot_single_panel.py` + +- Creates several different types of single-panel plots. + +`plot_timeseries.py` + +- Reads and plots timeseries data. + + +## working_with_files + +`add_blank_var_to_restart_file.py` + +- Adds a "dummy" DataArray containing all zeroes to a GEOS-Chem restart file. + +`concatenate_files.py` + +- Combines several netCDF data files into a single file using xarray. + +`insert_field_into_restart_file.py` + +- Adds a DataArray field into a GEOS-Chem restart file. + +`regrid_restart_ll_to_cs.py` + +- Regrids data from the lat-lon grid to a cubed-sphere grid. \ No newline at end of file diff --git a/gcpy/examples/README.txt b/gcpy/examples/README.txt deleted file mode 100644 index d49fb235..00000000 --- a/gcpy/examples/README.txt +++ /dev/null @@ -1,9 +0,0 @@ - -.. examples-index - -Example Gallery -=============== - -.. contents:: Contents - :local: - :depth: 2 diff --git a/gcpy/examples/hemco/.gitignore b/gcpy/examples/hemco/.gitignore new file mode 100644 index 00000000..beb4dab7 --- /dev/null +++ b/gcpy/examples/hemco/.gitignore @@ -0,0 +1 @@ +*.nc* \ No newline at end of file diff --git a/gcpy/examples/hemco/__init__.py b/gcpy/examples/hemco/__init__.py new file mode 100644 index 00000000..532b5c82 --- /dev/null +++ b/gcpy/examples/hemco/__init__.py @@ -0,0 +1,5 @@ +""" +GCPy import script +""" +from .format_hemco_demo import * +from .make_mask_file import * diff --git a/gcpy/examples/hemco/format_hemco_demo.py b/gcpy/examples/hemco/format_hemco_demo.py new file mode 100755 index 00000000..e7064faf --- /dev/null +++ b/gcpy/examples/hemco/format_hemco_demo.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +""" +Example script using gcpy.community.format_hemco_data.py + +NOTE: Before starting this demo, please download the file: + +https://gcgrid.s3.amazonaws.com/HEMCO/GCClassic_Output/14.0.0/2019/GEOSChem.ProdLoss.20190101_0000z.nc4 + +to this folder and rename it to HEMCO_demonstration_file.nc. +""" +import xarray as xr +from copy import deepcopy as dc + +# ----------------------------------------------------------------- # +# Preparing the file for the demonstration +# ----------------------------------------------------------------- # + +# Load the data file + +# NOTE: You can copy any data from the HEMCO data path to this folder: + +data = xr.open_dataset("./HEMCO_demonstration_file.nc") + + +# We will now intentionally change the file to be HEMCO incompatible. + +# First, remove one of the attributes from latitude and longitude. +# These changes should all be handled with no manual edits from +# the user. +data["lat"].attrs.pop("units") +data["lon"].attrs.pop("axis") + +# We will also reverse latitude so that it"s monotonically decreasing. +# This should throw an error. +data["lat"] = data["lat"][::-1] + +# Second, remove the time variable. Often, files without an explicit +# time dimension will exclude time from the netcdf. This is bad for +# HEMCO, and we want to make sure that the functions can deal with it. +data = data.drop("time").squeeze() + +# Third, mess with the level attributes. We"ll add an extra formula +# term that doesn"t exist in the dataset. This change should throw an +# error. +data["lev"].attrs["formula_terms"] += " xs: xx" + +# We also change the positive direction. So long as gchp=False is +# passed to the function, this should be handled by the functions. +data["lev"].attrs["positive"] = "down" + +# Finally, we"ll change some things in the variable SpeciesRst_ACET. +# We"ll add a fourth attribute, which we hope won"t be clobbered. +# This should be the only difference between demo_original.txt +# and the updated demo_post_formatting.txt. +data["Loss_Ox"].attrs["test"] = ( + "Testing that additional attributes are not clobbered" +) + +# Save long name and units strings so we can restore it later +save_long_name = dc(data["Loss_Ox"].attrs["long_name"]) +save_units = dc(data["Loss_Ox"].attrs["units"]) + +# We also delete the units on data SpeciesRst_ACET +del(data["Loss_Ox"].attrs["units"]) + +# ----------------------------------------------------------------- # +# Using format_hemco_data to save a HEMCO-compatible file +# ----------------------------------------------------------------- # +# Using format_hemco_data.py is easy and requires only four steps. +data_fix = dc(data) + +# 1. Import the module. +from gcpy.community import format_hemco_data as hemco + +# 2. Format the required dimensions (time, lat, lon, and lev) for +# HEMCO. +# We have to provide the file start time because there is no time +# dimension in this file. If there was, we could still provide a +# start time, but it would be overwritten (with a warning) with +# the first time value in the dataset. +def test_format_hemco_dimensions(data): + try: + data = hemco.format_hemco_dimensions( + data, + start_time="2019-01-01 00:00:00" + ) + except Exception as error: + print(f"format_hemco_dimensions_failed: {error}") + return data + +# Let"s test this! +data_fix = test_format_hemco_dimensions(data_fix) +print("-"*70) + +# We return an error that "lat is not monotonically increasing." +# Good! We changed that intentionally. Let"s undo that and +# try again. +data_fix["lat"] = data_fix["lat"][::-1] +data_fix = test_format_hemco_dimensions(data_fix) +print("-"*70) + +# We also get a warning message that it is assigning the time coordinate +# from the provided start_time. This is needed for HEMCO compliance, but +# the user should be aware of the specification of the time dimension. + +# We find that "PS" and "xx" are included in lev_formula_terms but not in +# data_fix. This is a warning, so we don"t need to do anything. Onto the +# next step! + +# 3. Format any variables in the netcdf +# Run the checking function. +def test_check_variables(data): + try: + hemco.check_hemco_variables(data_fix) + except Exception as error: + print(f"check_hemco_variables failed: {error}") + +test_check_variables(data_fix) +print("-"*70) + +# We get the following error: +# Checking dataset variables for HEMCO compliance. +# Loss_Ox missing ["units"] +# check_hemco_variables failed: Required units missing from dataset variables. + +# We add units back in using the convenience function from the package so +# that we avoid clobbering anything important. +data_fix = hemco.format_hemco_variable( + data_fix, + "Loss_Ox", + long_name=save_long_name, + units=save_units, +) + +# Test one more time +test_check_variables(data_fix) +print("-"*70) + +# 4. Save out. +hemco.save_hemco_netcdf( + data_fix, + save_dir=".", + save_name="./HEMCO_demonstration_file_post_fixes.nc" +) diff --git a/gcpy/examples/working_with_files/make_mask_file.py b/gcpy/examples/hemco/make_mask_file.py similarity index 100% rename from gcpy/examples/working_with_files/make_mask_file.py rename to gcpy/examples/hemco/make_mask_file.py From 51fc9e9898e1bf13b82f7fc380fa5ae47300eb55 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 2 May 2024 16:13:27 -0400 Subject: [PATCH 28/43] Added README.md file for the community folder gcpy/community/README.md - Added this file with a description of the contents of the gcpy/community folder. This is where users can submit scripts of general use to GCPy. Signed-off-by: Bob Yantosca --- gcpy/community/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 gcpy/community/README.md diff --git a/gcpy/community/README.md b/gcpy/community/README.md new file mode 100644 index 00000000..711f28ba --- /dev/null +++ b/gcpy/community/README.md @@ -0,0 +1,10 @@ +# GCPy Community Contributions + +The scripts in this folder have been submitted by GCPy users. Please contact the author of each script directly if you have any questions about its usage. + +## Contents + +`format_hemco_data.py` + +- **Author:** Hannah Nesser (@hannahnesser) +- **Description:** Fixes netCDF file attributes so that they conform to the netCDF COARDS conventions. This is needed for input to HEMCO. \ No newline at end of file From c41a33f141feec99cdc8173df53ba1ea2c3ddd86 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 3 May 2024 13:28:49 -0400 Subject: [PATCH 29/43] Add function "replace_whitespace" in util.py gcpy/util.py - Added function "replace_whitespace", which replaces whitespace in a string with another character (default is "_"). This will be used to make sure the benchmark filenames and version labels do not have spaces. CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- gcpy/util.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/gcpy/util.py b/gcpy/util.py index 6a5b347b..e50d4874 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -2235,3 +2235,24 @@ def copy_file_to_dir( ofile = os.path.join(dest, os.path.basename(ifile)) if not os.path.exists(ofile): copyfile(ifile, ofile) + + +def replace_whitespace( + string, + repl_char="_" +): + """ + Replaces whitespace in a string with underscores. + Useful for removing spaces in filename strings. + + Args + string : str : The input string + repl_char : str : Replacement character (default is "_") + + Returns + string : str : String with whitespace replaced + """ + verify_variable_type(string, str) + verify_variable_type(repl_char, str) + + return repl_char.join(string.split()) From b258b3940cd32f9b5b5673b4d32ce54ab972c5ff Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 3 May 2024 13:31:43 -0400 Subject: [PATCH 30/43] Add benchmark script to scrape GEOS-Chem Classic timing information gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py - Benchmark module that can scrape the timers information from either gcclassic_timers.json or GEOS-Chem Classic log files. If multiple files are supplied as input, the timers information will be summed together. CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- .../benchmark_scrape_gcclassic_timers.py | 231 +++++++++++++----- 1 file changed, 171 insertions(+), 60 deletions(-) mode change 100755 => 100644 gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py old mode 100755 new mode 100644 index a264a50c..467ce6ed --- a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py +++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py @@ -1,74 +1,114 @@ #!/usr/bin/env python3 """ +Scrapes GEOS-Chem Classic benchmark timing information from one or +more JSON or text files. """ import os -from gcpy.util import verify_variable_type import json +from gcpy.util import make_directory, replace_whitespace, verify_variable_type -def read_gcclassic(ifile): +def read_gcclassic(input_files): """ - Determines if the input is a valid JSON. + Determines whether we should call a function to parse the given + input file(s) as JSON or plain text. Args - ifile : str : file name + input_files : str|list : File or list of files to parse Returns - result : dict : Dictionary with timing information + result : list of dict : List of dicts with timing info """ - - # Make sure file exists - if not os.path.exists(ifile): - raise FileNotFoundError(f"Could not find {ifile}!") - - # First try to read the file as a JSON, - # then try to read the file as text. try: - result = read_gcclassic_json(ifile) - except ValueError as err: - result = read_gcclassic_log(ifile) + result = read_timing_data(input_files, read_one_json_file) + except ValueError: + result = read_timing_data(input_files, read_one_text_file) return result -def read_gcclassic_json( - ifile +def read_timing_data( + input_files, + reader, ): """ Parses the GEOS-Chem Classic timing information in JSON format and returns a dictionary with the results. Args - ifile : str : File name + input files : str|list : JSON or text file(s) to parse + + Returns + timing : list of dict : Dictionary with timing information + """ + # Return value + timing = [] + + # If more than one file has been provided, read the timing + # information and return a list of dictionaries with results + if isinstance(input_files, list): + for input_file in input_files: + result = reader(input_file) + timing.append(result) + return timing + + # If only one file has been provided, then read it + # and return the dictionary in a list + if isinstance(input_files, str): + result = reader(input_files) + timing.append(result) + return timing + + raise ValueError("Argument 'input_files' is not of type str or list!") + + +def read_one_json_file(json_file): + """ + Parses a GEOS-Chem JSON file with timing information + and returns a dictionary with the results. + + Args + json_file : str : JSON file with timing information Returns - result : dict : Dictionary with timing information + result : dict : Dictionary with timing information """ + + # Make sure file exists + if not os.path.exists(json_file): + raise FileNotFoundError(f"Could not find {json_file}!") + + # If the file is not a JSON file, raise a ValueError, as + # this will prompt read_gcclassic to parse the file as text. try: - with open(ifile, encoding="utf-8") as json_file: - result = json.load(json_file) + with open(json_file, encoding="utf-8") as ifile: + result = json.load(ifile) return result["GEOS-Chem Classic timers"] except ValueError as err: raise ValueError from err -def read_gcclassic_log(ifile): +def read_one_text_file(text_file): """ - Parses the GEOS-Chem Classic log file with timing information - and returns a dictionary with the results. + Parses the GEOS-Chem Classic log file (plain text) with + timing information and returns a dictionary with the results. Args - ifile : str : File name + text_file : str : Text file with timing information Returns - result : dict : Dictionary with timing information + result : dict : Dictionary with timing information """ keep_line = False timers = {} + # Make sure file exists + if not os.path.exists(text_file): + raise FileNotFoundError(f"Could not find {text_file}!") + # Read the line backwards and get just keep the timing information - with open(ifile, encoding="utf-8") as log_file: + with open(text_file, encoding="utf-8") as ifile: - for line in reversed(list(log_file)): + for line in reversed(list(ifile)): line = line.strip("\n") # Set a flag to denote the start & end of timing info @@ -88,11 +128,48 @@ def read_gcclassic_log(ifile): return timers +def sum_timers(timers): + """ + Sums the time in seconds for each GEOS-Chem timer. Input may be + a single dict with timing information or a list of dicts. + + Args + timers : dict|list : GEOS-Chem timing information from one or more + JSON or log files. + + Returns + result : dict : Sum of timing information + """ + + # If timers is of type dict, no summing is needed. + if isinstance(timers, dict): + return timers + + # If timers is a list of dicts, sum the times + # in seconds into a new dict, and then return. + if isinstance(timers, list): + + # Initialize the result dict + result = {} + for timer in timers: + for (key, val) in timer.items(): + result[key] = 0.0 + + # Then sum the time in seconds for each timer + for timer in timers: + for (key, val) in timer.items(): + result[key] += float(val["seconds"]) + + return result + + raise ValueError("Argument 'timers' must be of type str or dict!") + + def print_timer(key, ref, dev, ofile): """ Prints timing info for a single timer to a log file. """ - line = f"{key:<25} {ref[key]['seconds']:>20} {dev[key]['seconds']:>20}" + line = f"{key:<25} {ref[key]:>20.3f} {dev[key]:>20.3f}" print(line, file=ofile) @@ -107,61 +184,95 @@ def display_timers(ref, ref_label, dev, dev_label, table_file): with open(table_file, "w", encoding="utf-8") as ofile: # Print header - print(f"{'Timer':<25} {ref_label:>20} {dev_label:>20}", file=ofile) - print(f"{'-'*25:<25} {'-'*20:>20} {'-'*20:>20}", file=ofile) - + print("%"*79, file=ofile) + print("%%% GEOS-Chem Classic Benchmark Timing Information", + file=ofile) + print("%%%", file=ofile) + print(f"%%% Ref = {ref_label}", file=ofile) + print(f"%%% Dev = {dev_label}", file=ofile) + print("%"*79, file=ofile) + print("\n", file=ofile) + print(f"{'Timer':<25} {'Ref [s]':>20} {'Dev [s]':>20}", file=ofile) + print("-"*79, file=ofile) + # Print timers - print_timer("GEOS-Chem", ref, dev, ofile) - print_timer("HEMCO", ref, dev, ofile) - print_timer("All chemistry", ref, dev, ofile) - print_timer("=> Gas-phase chem", ref, dev, ofile) - print_timer("=> Photolysis", ref, dev, ofile) - print_timer("=> Aerosol chem", ref, dev, ofile) - print_timer("=> Linearized chem", ref, dev, ofile) - print_timer("Transport", ref, dev, ofile) + print_timer("GEOS-Chem", ref, dev, ofile) + print_timer("HEMCO", ref, dev, ofile) + print_timer("All chemistry", ref, dev, ofile) + print_timer("=> Gas-phase chem", ref, dev, ofile) + print_timer("=> Photolysis", ref, dev, ofile) + print_timer("=> Aerosol chem", ref, dev, ofile) + print_timer("=> Linearized chem", ref, dev, ofile) + print_timer("Transport", ref, dev, ofile) print_timer("Convection", ref, dev, ofile) - print_timer("Boundary layer mixing", ref, dev, ofile) + print_timer("Boundary layer mixing", ref, dev, ofile) print_timer("Dry deposition", ref, dev, ofile) print_timer("Wet deposition", ref, dev, ofile) print_timer("Diagnostics", ref, dev, ofile) print_timer("Unit conversions", ref, dev, ofile) - + def make_benchmark_timing_table( - ref_file, + ref_files, ref_label, - dev_file, + dev_files, dev_label, - dst, + dst="./benchmark", + overwrite=False, ): """ + Creates a table of timing information for GEOS-Chem Classic + benchmark simulations given one or more JSON and/or text files + as input. + + Args: + """ - verify_variable_type(ref_file, (str, list)) + verify_variable_type(ref_files, (str, list)) verify_variable_type(ref_label, str) - verify_variable_type(dev_file, (str, list)) + verify_variable_type(dev_files, (str, list)) verify_variable_type(dev_label, str) verify_variable_type(dst, str) - # Strip timing info from JSON or log ifle - ref_timers = read_gcclassic(ref_file) - dev_timers = read_gcclassic(dev_file) + # Create the destination folder + make_directory(dst, overwrite) + + # Strip timing info from JSON/text file(s) and sum the them. + ref_timers = sum_timers(read_gcclassic(ref_files)) + dev_timers = sum_timers(read_gcclassic(dev_files)) + + # Filename for output + timing_table = replace_whitespace( + os.path.join( + dst, + f"Benchmark_Timers_{ref_label}_vs_{dev_label}.txt" + ) + ) # Write timing info to a table display_timers( ref_timers, - ref_label, + replace_whitespace(ref_label), dev_timers, - dev_label, - "sample_output.txt", + replace_whitespace(dev_label), + timing_table, ) if __name__ == '__main__': + + REF_FILES = [ + "./gcclassic_timers.json", + "./gcclassic_timers.json" + ] + DEV_FILES = "./execute.gc_4x5_merra2_fullchem_benchmark.log" + + # Debug test make_benchmark_timing_table( - "./gcclassic_timers.json", - "GCC 14.4.0", - "./execute.gc_4x5_merra2_fullchem_benchmark.log", - "GCHP 14.4.0", - "./" - ) -# "./execute.gchp_merra2_fullchem_benchmark.log", + REF_FILES, + "GCC 14.4.0 json", + DEV_FILES, + "GCC 14.4.0 log", + dst="./", + overwrite=True, +) From 4bb4a95c81dd4e6e705682b5e3ec992debaf5324 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 3 May 2024 18:12:43 -0400 Subject: [PATCH 31/43] Add script to parse GCHP benchmark timing information gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py - Script that scrapes the timing information at the end of the GCHP log file gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py - Updated docstring comments CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 5 +- .../benchmark_scrape_gcclassic_timers.py | 29 +- .../modules/benchmark_scrape_gchp_timers.py | 325 ++++++++++++++++++ 3 files changed, 351 insertions(+), 8 deletions(-) create mode 100644 gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b680347b..319989ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Function `copy_file_to_dir` in `gcpy/util.py`. This is a wrapper for `shutil.copyfile`. - GitHub Action config file `.github/workflows/stale.yml`, which replaces StaleBot - Example script `gcpy/examples/working_with_files/make_mask_file.py` - +- Convenience function `replace_whitespace` in `gcpy/util.py` +- Benchmark script `gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py` +- Benchmark script `gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py` + ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) - Bump pypdf from 3.16.1 to 3.17.0 (dependabot suggested this) diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py index 467ce6ed..084665a1 100644 --- a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py +++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py @@ -26,16 +26,14 @@ def read_gcclassic(input_files): return result -def read_timing_data( - input_files, - reader, -): +def read_timing_data(input_files, reader): """ Parses the GEOS-Chem Classic timing information in JSON format and returns a dictionary with the results. Args input files : str|list : JSON or text file(s) to parse + reader : function : Function that will parse the file(s) Returns timing : list of dict : Dictionary with timing information @@ -168,6 +166,12 @@ def sum_timers(timers): def print_timer(key, ref, dev, ofile): """ Prints timing info for a single timer to a log file. + + Args + key : str : Dictionary key to print + ref : dict : Timing information from the "Ref" model + dev : dict : Timing information from the "Dev" model + ofile : file : File object where info will be written """ line = f"{key:<25} {ref[key]:>20.3f} {dev[key]:>20.3f}" print(line, file=ofile) @@ -178,8 +182,11 @@ def display_timers(ref, ref_label, dev, dev_label, table_file): Prints the GEOS-Chem timer information to a table. Args - ref : dict : Timer output from the "Ref" model - ref : dict : Timer output from the "Dev" model + ref : dict : Timing information from the "Ref" model + ref_label : str : Version string for the "Ref" model + dev : dict : Timing information from the "Dev" model + dev_label : str : Version string for the "Dev" model + table_file : str : File name for the timing table output """ with open(table_file, "w", encoding="utf-8") as ofile: @@ -225,7 +232,15 @@ def make_benchmark_timing_table( benchmark simulations given one or more JSON and/or text files as input. - Args: + Args + ref_files : str|list : File(s) with timing info from the "Ref" model + ref_label : str : Version string for the "Ref" model + dev_files : str|list : File(s) with timing info from the "Ref" model + dev_label : str : Version string for the "Dev" model + + Kwargs + dst : str : Directory where output will be written + overwrite : bool : Overwrite existing files? (default: False) """ verify_variable_type(ref_files, (str, list)) diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py new file mode 100644 index 00000000..97ece6ab --- /dev/null +++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py @@ -0,0 +1,325 @@ +#!/usr/bin/env python3 +""" +Scrapes GCHP Classic benchmark timing information from one or +more text files. +""" +import os +from gcpy.util import make_directory, replace_whitespace, verify_variable_type + + +def read_timing_data(input_files): + """ + Parses the GEOS-Chem Classic timing information in JSON format + and returns a dictionary with the results. + + Args + input files : str|list : Text file(s) to parse + + Returns + timing : list of dict : Dictionary with timing information + """ + # Return value + timing = [] + + # If more than one file has been provided, read the timing + # information and return a list of dictionaries with results + if isinstance(input_files, list): + for input_file in input_files: + result = read_one_text_file(input_file) + timing.append(result) + return timing + + # If only one file has been provided, then read it + # and return the dictionary in a list + if isinstance(input_files, str): + result = read_one_text_file(input_files) + timing.append(result) + return timing + + raise ValueError("Argument 'input_files' is not of type str or list!") + + +def count_characters(text, char_to_match="-"): + """ + Returns the number of characters in a string of text. + + Args + text : str : The text to parse + + Kwargs + char_to_match : str : The character to look for in "text" + + Returns + result : int : Number of underscores in "text" + + Reference + https://stackoverflow.com/questions/991350/counting-repeated-characters-in-a-string-in-python + """ + # Create a dictionary where each character of "text" + # is a key, and all values are set to zero. + count = dict.fromkeys(text, 0) + + # Increment each time a character is found + for char in text: + count[char] += 1 + + # Return the count of underscores + if char_to_match not in count: + return 0 + return count[char_to_match] + + +def read_one_text_file(text_file): + """ + Parses the GCHP log file (plain text) with timing information + and returns a dictionary with the results. + + Args + text_file : str : Text file with timing information + + Returns + result : dict : Dictionary with timing information + """ + keep_line = True + temp_timers = [] + + # Make sure file exists + if not os.path.exists(text_file): + raise FileNotFoundError(f"Could not find {text_file}!") + + # Read the line backwards and get just keep the timing information + with open(text_file, encoding="utf-8") as ifile: + + for line in reversed(list(ifile)): + line = line.strip("\n") + + # Set a flag to denote the start & end of timing info + if "-------- --------- ------ --------- ------" in line: + keep_line = False + break + + # Append timing info lines into a list of dicts + if keep_line: + substr = line.split() + key = substr[0].strip() + val = float(substr[2].strip()) + temp_timers.append({key: val}) + + # Because we were reading the end of the file backwards, the + # entries in temp_timers are reversed. Now read through them + # in the forward order. + hdr = ["", "", ""] + timers = {} + for timer in reversed(temp_timers): + for (key, val) in timer.items(): + + # Denote how deep into the dictionary this key goes + # as determined by the number of prefixing "-" characters + depth = count_characters(key, "-") / 2 + + # Remove any prefixed "-" characters + new_key = key.strip("-") + + # Add results into the "timers" dictionary as a + # "flattened" dictionary, for expediency + if depth == 0: + hdr[0] = new_key + timers[new_key] = val + elif depth == 1: + hdr[1] = new_key + new_key = f"{hdr[0]}_{new_key}" + timers[new_key] = val + elif depth == 2: + hdr[2] = new_key + new_key = f"{hdr[0]}_{hdr[1]}_{new_key}" + timers[new_key] = val + else: + new_key = f"{hdr[0]}_{hdr[1]}_{hdr[2]}_{new_key}" + timers[new_key] = val + + return timers + + +def sum_timers(timers): + """ + Sums the time in seconds for each GEOS-Chem timer. Input may be + a single dict with timing information or a list of dicts. + + Args + timers : dict|list : GHCP timing information from one or more + log files in plain text format + + Returns + result : dict : Sum of timing information + """ + + # If timers is of type dict, no summing is needed. + if isinstance(timers, dict): + return timers + + # If timers is a list of dicts, sum the times + # in seconds into a new dict, and then return. + if isinstance(timers, list): + + # Initialize the result dict + result = {} + for timer in timers: + for (key, val) in timer.items(): + result[key] = 0.0 + + # Then sum the time in seconds for each timer + for timer in timers: + for (key, val) in timer.items(): + result[key] += float(val) + + return result + + raise ValueError("Argument 'timers' must be of type str or dict!") + + +def print_timer(key, ref, dev, ofile): + """ + Prints timing info for a single timer to a log file. + + Args + key : str : Dictionary key to print + ref : dict : Timing information from the "Ref" model + dev : dict : Timing information from the "Dev" model + ofile : file : File object where info will be written + """ + # Denote the level of the dictionary key by counting "_" chars + depth = count_characters(key, "_") + + # Prefix "--" characters to the end of the key to denote depth + # to replicate the label style at the end of the GCHP log file + label = "--"*depth + key.split("_")[-1] + + # Line to print + line = f"{label:<25} {ref[key]:>20.3f} {dev[key]:>20.3f}" + print(line, file=ofile) + + +def display_timers(ref, ref_label, dev, dev_label, table_file): + """ + Prints the GEOS-Che timer information to a table. + + Args + ref : dict : Timing information from the "Ref" model + ref_label : str : Version string for the "Ref" model + dev : dict : Timing information from the "Dev" model + dev_label : str : Version string for the "Dev" model + table_file : str : File name for the timing table output + """ + with open(table_file, "w", encoding="utf-8") as ofile: + + # Print header + print("%"*79, file=ofile) + print("%%% GCHP Classic Benchmark Timing Information", file=ofile) + print("%%%", file=ofile) + print(f"%%% Ref = {ref_label}", file=ofile) + print(f"%%% Dev = {dev_label}", file=ofile) + print("%"*79, file=ofile) + print("\n", file=ofile) + print(f"{'Timer':<25} {'Ref [s]':>20} {'Dev [s]':>20}", file=ofile) + print("-"*79, file=ofile) + + # Print timers + print_timer("All", ref, dev, ofile) + print_timer("All_SetService", ref, dev, ofile) + print_timer("All_SetService_GCHP", ref, dev, ofile) + print_timer("All_SetService_GCHP_GCHPctmEnv", ref, dev, ofile) + print_timer("All_SetService_GCHP_GCHPchem", ref, dev, ofile) + print_timer("All_SetService_GCHP_DYNAMICS", ref, dev, ofile) + print_timer("All_Initialize", ref, dev, ofile) + print_timer("All_Initialize_GCHP", ref, dev, ofile) + print_timer("All_Initialize_GCHP_GCHPctmEnv", ref, dev, ofile) + print_timer("All_Initialize_GCHP_DYNAMICS", ref, dev, ofile) + print_timer("All_Initialize_EXTDATA", ref, dev, ofile) + print_timer("All_Initialize_HIST", ref, dev, ofile) + print_timer("All_Run", ref, dev, ofile) + print_timer("All_Run_GCHP", ref, dev, ofile) + print_timer("All_Run_GCHP_GCHPctmEnv", ref, dev, ofile) + print_timer("All_Run_GCHP_GCHPchem", ref, dev, ofile) + print_timer("All_Run_GCHP_DYNAMICS", ref, dev, ofile) + print_timer("All_Run_EXTDATA", ref, dev, ofile) + print_timer("All_Run_HIST", ref, dev, ofile) + print_timer("All_Finalize", ref, dev, ofile) + print_timer("All_Finalize_GCHP", ref, dev, ofile) + print_timer("All_Finalize_GCHP_GCHPctmEnv", ref, dev, ofile) + print_timer("All_Finalize_GCHP_GCHPchem", ref, dev, ofile) + print_timer("All_Finalize_GCHP_DYNAMICS", ref, dev, ofile) + print_timer("All_Finalize_EXTDATA", ref, dev, ofile) + print_timer("All_Finalize_HIST", ref, dev, ofile) + + +def make_benchmark_timing_table( + ref_files, + ref_label, + dev_files, + dev_label, + dst="./benchmark", + overwrite=False, +): + """ + Creates a table of timing information for GEOS-Chem Classic + benchmark simulations given one or more JSON and/or text files + as input. + + Args + ref_files : str|list : File(s) with timing info from the "Ref" model + ref_label : str : Version string for the "Ref" model + dev_files : str|list : File(s) with timing info from the "Ref" model + dev_label : str : Version string for the "Dev" model + + Kwargs + dst : str : Directory where output will be written + overwrite : bool : Overwrite existing files? (default: False) + """ + verify_variable_type(ref_files, (str, list)) + verify_variable_type(ref_label, str) + verify_variable_type(dev_files, (str, list)) + verify_variable_type(dev_label, str) + verify_variable_type(dst, str) + + # Create the destination folder + make_directory(dst, overwrite) + + # Strip timing info from JSON/text file(s) and sum the them. + ref_timers = sum_timers(read_timing_data(ref_files)) + dev_timers = sum_timers(read_timing_data(dev_files)) + + # Filename for output + timing_table = replace_whitespace( + os.path.join( + dst, + f"Benchmark_Timers_{ref_label}_vs_{dev_label}.txt" + ) + ) + + # Write timing info to a table + display_timers( + ref_timers, + replace_whitespace(ref_label), + dev_timers, + replace_whitespace(dev_label), + timing_table, + ) + + +if __name__ == '__main__': + + REF_FILES = [ + "./execute.gchp_merra2_fullchem_benchmark.log", + "./execute.gchp_merra2_fullchem_benchmark.log", + ] + DEV_FILES = "./execute.gchp_merra2_fullchem_benchmark.log" + + # Debug test + make_benchmark_timing_table( + REF_FILES, + "GCHP 14.4.0 list input", + DEV_FILES, + "GCHP 14.4.0 str input", + dst="./", + overwrite=True, +) From 91617b52957036e332f1288cc8fa01719fce5b05 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 6 May 2024 14:05:16 -0400 Subject: [PATCH 32/43] Now also scrape GCHPchem timers as well as summary timers gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py - Modified to scrape the GCHPchem timers as well as the summary timers. This involves: - Reading the file forwards (instead of backwards) - Using "." as the delimiter in the flattened dictionary - Rewriting the algorithm to parse timer lines from the log file - Now loop over dictionary keys in print_timers function Signed-off-by: Bob Yantosca --- .../modules/benchmark_scrape_gchp_timers.py | 183 +++++++++++------- 1 file changed, 110 insertions(+), 73 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py index 97ece6ab..b4ef7df1 100644 --- a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py +++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py @@ -39,7 +39,7 @@ def read_timing_data(input_files): raise ValueError("Argument 'input_files' is not of type str or list!") -def count_characters(text, char_to_match="-"): +def count_characters(text, char_to_match="."): """ Returns the number of characters in a string of text. @@ -80,62 +80,112 @@ def read_one_text_file(text_file): Returns result : dict : Dictionary with timing information """ - keep_line = True - temp_timers = [] # Make sure file exists if not os.path.exists(text_file): raise FileNotFoundError(f"Could not find {text_file}!") - # Read the line backwards and get just keep the timing information - with open(text_file, encoding="utf-8") as ifile: + # ================================================================== + # Parse the GCHP log file + # ================================================================== + + # Initialize local variables + keep_line = False + temp_timers = [] + inclusive = 0 + temp_timers = [] - for line in reversed(list(ifile)): - line = line.strip("\n") + # Open the log file + with open(text_file, encoding="utf-8") as ifile: - # Set a flag to denote the start & end of timing info - if "-------- --------- ------ --------- ------" in line: + # Read each line in the file + for line in ifile: + + # Strip newlines; skip empty lines + line = line.strip() + if len(line) == 0: + continue + + # GCHP timers section (also skip header lines) + if 'Times for component ' in line: + keep_line = True + inclusive = 3 + continue + if keep_line and 'Min Mean' in line: + continue + if keep_line and '============================' in line: + continue + if keep_line and 'Name %' in line: + continue + if keep_line and '------ ---------- ----------' in line: + continue + if keep_line and '---------------------------------' in line: keep_line = False - break + continue + + # Summary section (also skip header lines) + if 'Report on process: 0' in line: + keep_line = True + inclusive = 2 + continue + if keep_line and 'Inclusive' in line: + continue + if keep_line and '================' in line: + continue + if keep_line and 'Name' in line: + continue + if keep_line and '-------- --------- ------ --------- ------' \ + in line: + continue # Append timing info lines into a list of dicts if keep_line: substr = line.split() key = substr[0].strip() - val = float(substr[2].strip()) + val = float(substr[inclusive].strip()) temp_timers.append({key: val}) - # Because we were reading the end of the file backwards, the - # entries in temp_timers are reversed. Now read through them - # in the forward order. - hdr = ["", "", ""] - timers = {} - for timer in reversed(temp_timers): - for (key, val) in timer.items(): - - # Denote how deep into the dictionary this key goes - # as determined by the number of prefixing "-" characters - depth = count_characters(key, "-") / 2 - - # Remove any prefixed "-" characters - new_key = key.strip("-") - - # Add results into the "timers" dictionary as a - # "flattened" dictionary, for expediency - if depth == 0: - hdr[0] = new_key - timers[new_key] = val - elif depth == 1: - hdr[1] = new_key - new_key = f"{hdr[0]}_{new_key}" - timers[new_key] = val - elif depth == 2: - hdr[2] = new_key - new_key = f"{hdr[0]}_{hdr[1]}_{new_key}" - timers[new_key] = val - else: - new_key = f"{hdr[0]}_{hdr[1]}_{hdr[2]}_{new_key}" - timers[new_key] = val + # ================================================================== + # Save timing results into a "flattened" dictionary + # ================================================================== + hdr = ["", "", "", "", ""] + timers = {} + for timer in temp_timers: + for (key, val) in timer.items(): + + # Denote how deep into the dictionary this key goes + # as determined by the number of prefixing "-" characters + depth = count_characters(key, "-") / 2 + + # Remove any prefixed "-" characters + new_key = key.strip("-") + + # Add results into the "timers" dictionary as a + # "flattened" dictionary, for expediency + # (This is the only way to update a nested dict) + if depth == 0: + hdr[0] = new_key + timers[new_key] = val + elif depth == 1: + hdr[1] = new_key + new_key = f"{hdr[0]}.{new_key}" + timers[new_key] = val + elif depth == 2: + hdr[2] = new_key + new_key = f"{hdr[0]}.{hdr[1]}.{new_key}" + timers[new_key] = val + elif depth == 3: + hdr[3] = new_key + new_key = f"{hdr[0]}.{hdr[1]}.{hdr[2]}.{new_key}" + timers[new_key] = val + elif depth == 4: + hdr[4] = new_key + new_key = f"{hdr[0]}.{hdr[1]}.{hdr[2]}.{hdr[3]}.{new_key}" + timers[new_key] = val + else: + new_key = \ + f"{hdr[0]}.{hdr[1]}.{hdr[2]}.{hdr[3]}.{hdr[4]}.{new_key}" + timers[new_key] = val return timers @@ -187,12 +237,12 @@ def print_timer(key, ref, dev, ofile): dev : dict : Timing information from the "Dev" model ofile : file : File object where info will be written """ - # Denote the level of the dictionary key by counting "_" chars - depth = count_characters(key, "_") + # Denote the level of the dictionary key by counting "." chars + depth = count_characters(key, ".") # Prefix "--" characters to the end of the key to denote depth # to replicate the label style at the end of the GCHP log file - label = "--"*depth + key.split("_")[-1] + label = "--"*depth + key.split(".")[-1] # Line to print line = f"{label:<25} {ref[key]:>20.3f} {dev[key]:>20.3f}" @@ -219,37 +269,24 @@ def display_timers(ref, ref_label, dev, dev_label, table_file): print(f"%%% Ref = {ref_label}", file=ofile) print(f"%%% Dev = {dev_label}", file=ofile) print("%"*79, file=ofile) + + # GCHPchem timers print("\n", file=ofile) - print(f"{'Timer':<25} {'Ref [s]':>20} {'Dev [s]':>20}", file=ofile) + print(f"{'GCHPchem Timer':<25} {'Ref [s]':>20} {'Dev [s]':>20}", + file=ofile) print("-"*79, file=ofile) + for key in dev: + if key.startswith("GCHPchem"): + print_timer(key, ref, dev, ofile) - # Print timers - print_timer("All", ref, dev, ofile) - print_timer("All_SetService", ref, dev, ofile) - print_timer("All_SetService_GCHP", ref, dev, ofile) - print_timer("All_SetService_GCHP_GCHPctmEnv", ref, dev, ofile) - print_timer("All_SetService_GCHP_GCHPchem", ref, dev, ofile) - print_timer("All_SetService_GCHP_DYNAMICS", ref, dev, ofile) - print_timer("All_Initialize", ref, dev, ofile) - print_timer("All_Initialize_GCHP", ref, dev, ofile) - print_timer("All_Initialize_GCHP_GCHPctmEnv", ref, dev, ofile) - print_timer("All_Initialize_GCHP_DYNAMICS", ref, dev, ofile) - print_timer("All_Initialize_EXTDATA", ref, dev, ofile) - print_timer("All_Initialize_HIST", ref, dev, ofile) - print_timer("All_Run", ref, dev, ofile) - print_timer("All_Run_GCHP", ref, dev, ofile) - print_timer("All_Run_GCHP_GCHPctmEnv", ref, dev, ofile) - print_timer("All_Run_GCHP_GCHPchem", ref, dev, ofile) - print_timer("All_Run_GCHP_DYNAMICS", ref, dev, ofile) - print_timer("All_Run_EXTDATA", ref, dev, ofile) - print_timer("All_Run_HIST", ref, dev, ofile) - print_timer("All_Finalize", ref, dev, ofile) - print_timer("All_Finalize_GCHP", ref, dev, ofile) - print_timer("All_Finalize_GCHP_GCHPctmEnv", ref, dev, ofile) - print_timer("All_Finalize_GCHP_GCHPchem", ref, dev, ofile) - print_timer("All_Finalize_GCHP_DYNAMICS", ref, dev, ofile) - print_timer("All_Finalize_EXTDATA", ref, dev, ofile) - print_timer("All_Finalize_HIST", ref, dev, ofile) + # Summary timers + print("\n", file=ofile) + print(f"{'Summary':<25} {'Ref [s]':>20} {'Dev [s]':>20}", + file=ofile) + print("-"*79, file=ofile) + for key in dev: + if key.startswith("All"): + print_timer(key, ref, dev, ofile) def make_benchmark_timing_table( From ac5056432c089bc3bd248ca2dbb1323600ea8897 Mon Sep 17 00:00:00 2001 From: Melissa Sulprizio Date: Tue, 7 May 2024 09:13:03 -0400 Subject: [PATCH 33/43] Add HCl to emissions_species.yml for GEOS-Chem 14.4.0 In GEOS-Chem 14.4.0 continental emissions of Chlorine (pCl and HCl) are added. We need to also include those emissions in benchmark plots and tables. See associated pull request: - https://github.com/geoschem/geos-chem/pull/2275 Signed-off-by: Melissa Sulprizio --- CHANGELOG.md | 3 ++- gcpy/benchmark/modules/emission_species.yml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 552f0429..65d2b5f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Example script `gcpy/examples/hemco/make_mask_file.py` - Added `gcpy/community/format_hemco_data.py` from @hannahnesser - Added `gcpy/examples/hemco/format_hemco_demo.py` from @hannahnesser - +- Added HCl to `gcpy/benchmark/modules/emission_species.yml` for GEOS-Chem 14.4.0 + ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) - Bump pypdf from 3.16.1 to 3.17.0 (dependabot suggested this) diff --git a/gcpy/benchmark/modules/emission_species.yml b/gcpy/benchmark/modules/emission_species.yml index 8f425679..47c4c72c 100644 --- a/gcpy/benchmark/modules/emission_species.yml +++ b/gcpy/benchmark/modules/emission_species.yml @@ -26,6 +26,7 @@ FullChemBenchmark: GLYC: Tg GLYX: Tg HAC: Tg + HCl: Tg HCOOH: Tg HNO2: Tg HNO3: Tg From 224bba7933faaa30aafa6552ad3c50213ca03df9 Mon Sep 17 00:00:00 2001 From: Melissa Sulprizio Date: Tue, 7 May 2024 09:35:28 -0400 Subject: [PATCH 34/43] Add GTChlorine inventory to emission_inventories.yml The HCl emissions added in GEOS-Chem 14.4.0 are read in from the GT (Georgia Tech) Chlorine inventory. Here we add that inventory to emission_inventories.yml for inclusion in the benchmark inventory table. Signed-off-by: Melissa Sulprizio --- CHANGELOG.md | 2 +- gcpy/benchmark/modules/emission_inventories.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65d2b5f5..9b799c42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Example script `gcpy/examples/hemco/make_mask_file.py` - Added `gcpy/community/format_hemco_data.py` from @hannahnesser - Added `gcpy/examples/hemco/format_hemco_demo.py` from @hannahnesser -- Added HCl to `gcpy/benchmark/modules/emission_species.yml` for GEOS-Chem 14.4.0 +- Added HCl to `gcpy/benchmark/modules/emission_species.yml` and GTChlorine to `gcpy/benchmark/modules/emission_inventories.yml` for GEOS-Chem 14.4.0 ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) diff --git a/gcpy/benchmark/modules/emission_inventories.yml b/gcpy/benchmark/modules/emission_inventories.yml index 46eb9e67..6cd01de2 100644 --- a/gcpy/benchmark/modules/emission_inventories.yml +++ b/gcpy/benchmark/modules/emission_inventories.yml @@ -9,6 +9,7 @@ FullChemBenchmark: DICEAfrica: Tg GEIAnatural: Tg GFED: Tg + GTChlorine: Tg IODINE: Tg LIANG: Tg LIGHTNOX: Tg From 3827302e6374bf757e63d49dfd7524a928f938f4 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 7 May 2024 15:41:05 -0400 Subject: [PATCH 35/43] run_1yr_fullchem_benchmark.py now produces timing table output gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py - Rename driver program to "make_benchmark_gcclassic_timing_table" - Remove if __name__ == "__main__": block gcpy/benchmark/modules/benchmark_scrape_gchp_timers,py - Rename driver program to "make_benchmark_gchp_timing_table" - Add an error check to exit after the last summary timer is found (this only affects GCHP log files from AWS cloud benchmarks) - Remove if __name__ == "__main__": block gcpy/benchmark/modules/benchmark_utils.py - Added gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gcc_dirs, and get_log_filenames to abstract repetitive code out of the 1-year benchmark scripts gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py - Call make_benchmark_gcclassic_timing_table to produce the GCC vs. GCC timing table - Call make_benchmark_gchp_timing_table to produce the GCHP vs. GCHP timing table CHANGELOG.md - Updated accordingly TODO: Add a "% diff" column to the timing table output --- CHANGELOG.md | 2 + .../benchmark_scrape_gcclassic_timers.py | 21 +-- .../modules/benchmark_scrape_gchp_timers.py | 27 +-- gcpy/benchmark/modules/benchmark_utils.py | 138 +++++++++++++++ .../modules/run_1yr_fullchem_benchmark.py | 165 ++++++++++-------- 5 files changed, 236 insertions(+), 117 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 319989ec..9cb203e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Convenience function `replace_whitespace` in `gcpy/util.py` - Benchmark script `gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py` - Benchmark script `gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py` +- 1-year benchmark scripts now produce GCC vs GCC and GCHP vs GCHP timing tables +- Functions `gcc_vs_gcc_dirs`, `gchp_vs_gcc_dirs`, `gchp_vs_gchp_dirs`, and `get_log_filepaths` in `gcpy/benchmark/modules/benchmark_utils.py` ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py index 084665a1..10491f2d 100644 --- a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py +++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py @@ -219,7 +219,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file): print_timer("Unit conversions", ref, dev, ofile) -def make_benchmark_timing_table( +def make_benchmark_gcclassic_timing_table( ref_files, ref_label, dev_files, @@ -272,22 +272,3 @@ def make_benchmark_timing_table( replace_whitespace(dev_label), timing_table, ) - - -if __name__ == '__main__': - - REF_FILES = [ - "./gcclassic_timers.json", - "./gcclassic_timers.json" - ] - DEV_FILES = "./execute.gc_4x5_merra2_fullchem_benchmark.log" - - # Debug test - make_benchmark_timing_table( - REF_FILES, - "GCC 14.4.0 json", - DEV_FILES, - "GCC 14.4.0 log", - dst="./", - overwrite=True, -) diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py index b4ef7df1..1253b1f3 100644 --- a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py +++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py @@ -138,6 +138,12 @@ def read_one_text_file(text_file): in line: continue + # NOTE: This line only appears in cloud benchmarks, + # which signals the end of GCHP output and the start of + # job statistics. Exit when we encounter this. + if keep_line and "Command being timed:" in line: + break + # Append timing info lines into a list of dicts if keep_line: substr = line.split() @@ -289,7 +295,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file): print_timer(key, ref, dev, ofile) -def make_benchmark_timing_table( +def make_benchmark_gchp_timing_table( ref_files, ref_label, dev_files, @@ -341,22 +347,3 @@ def make_benchmark_timing_table( replace_whitespace(dev_label), timing_table, ) - - -if __name__ == '__main__': - - REF_FILES = [ - "./execute.gchp_merra2_fullchem_benchmark.log", - "./execute.gchp_merra2_fullchem_benchmark.log", - ] - DEV_FILES = "./execute.gchp_merra2_fullchem_benchmark.log" - - # Debug test - make_benchmark_timing_table( - REF_FILES, - "GCHP 14.4.0 list input", - DEV_FILES, - "GCHP 14.4.0 str input", - dst="./", - overwrite=True, -) diff --git a/gcpy/benchmark/modules/benchmark_utils.py b/gcpy/benchmark/modules/benchmark_utils.py index 64564cd5..912714a9 100644 --- a/gcpy/benchmark/modules/benchmark_utils.py +++ b/gcpy/benchmark/modules/benchmark_utils.py @@ -509,3 +509,141 @@ def rename_speciesconc_to_speciesconcvv( rename_dict[var] = var.replace("SpeciesConc_", "SpeciesConcVV_") return dset.rename(rename_dict) + + +def gcc_vs_gcc_dirs( + config, + subdir, +): + """ + Convenience function to return GCC vs. GCC file paths + for use in the benchmarking modules. + + Args + config : dict : Info read from config file + subdir : str : Subdirectory + + Returns + refdir, devdir : str : Fike paths + """ + util.verify_variable_type(config, dict) + util.verify_variable_type(subdir, str) + + # Log file paths + refdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["ref"]["gcc"]["dir"], + config["data"]["ref"]["gcc"][subdir] + ) + devdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["dev"]["gcc"]["dir"], + config["data"]["dev"]["gcc"][subdir] + ) + + return refdir, devdir + + +def gchp_vs_gcc_dirs( + config, + subdir, +): + """ + Convenience function to return GCHP vs. GCC file paths + for use in the benchmarking modules. + + + Args + config : dict : Info read from config file + subdir : str : Subdirectory + + Returns + refdir, devdir : str : Fike paths + """ + util.verify_variable_type(config, dict) + util.verify_variable_type(subdir, str) + + refdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["dev"]["gcc"]["dir"], + config["data"]["dev"]["gcc"][subdir] + ) + devdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["dev"]["gchp"]["dir"], + config["data"]["dev"]["gchp"][subdir] + ) + + return refdir, devdir + + +def gchp_vs_gchp_dirs( + config, + subdir, +): + """ + Convenience function to return GCHP vs. GCHP file paths + for use in the benchmarking modules. + + Args + config : dict : Info read from config file + subdir : str : Subdirectory + + Returns + refdir, devdir : str : Fike paths + """ + util.verify_variable_type(config, dict) + util.verify_variable_type(subdir, str) + + refdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["ref"]["gchp"]["dir"], + config["data"]["ref"]["gchp"][subdir] + ) + devdir = os.path.join( + config["paths"]["main_dir"], + config["data"]["dev"]["gchp"]["dir"], + config["data"]["dev"]["gchp"][subdir] + ) + + return refdir, devdir + + +def get_log_filepaths( + logs_dir, + template, + timestamps, +): + """ + Returns a list of paths for GEOS-Chem log files. + These are needed to compute the benchmark timing tables. + + Args + logs_dir : str : Path to directory w/ log files + template : str : Log file template w/ "%DATE%" token + timestamps : list : List of datetimes + """ + util.verify_variable_type(logs_dir, str) + util.verify_variable_type(template, str) + + # Initialize local variables + format_str = "" + fmts = ["%Y", "%m", "%d", "%h"] + result = [] + + # Create the format string for the log file template + for fmt in fmts: + if fmt in template: + format_str += fmt + + # Create each output logfile name, replacing template with date + for timestamp in timestamps: + time = timestamp.item().strftime(format_str) + result.append( + os.path.join( + logs_dir, + template.replace(format_str, time), + ) + ) + + return result diff --git a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py index 1794499b..d375cdaf 100644 --- a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py @@ -44,7 +44,7 @@ $ export MPLBACKEND=agg -This script corresponds with GCPy 1.4.3. Edit this version ID if releasing +This script corresponds with GCPy 1.5.0. Edit this version ID if releasing a new version of GCPy. """ @@ -61,21 +61,27 @@ from gcpy.benchmark.modules.ste_flux import make_benchmark_ste_table from gcpy.benchmark.modules.oh_metrics import make_benchmark_oh_metrics from gcpy.benchmark.modules.budget_ox import global_ox_budget +#TODO: Peel out routines from benchmark_funcs.py into smaller +# routines in the gcpy/benchmark/modules folder, such as these: from gcpy.benchmark.modules.benchmark_funcs import \ diff_of_diffs_toprow_title, get_species_database_dir, \ make_benchmark_conc_plots, make_benchmark_emis_plots, \ make_benchmark_emis_tables, make_benchmark_jvalue_plots, \ make_benchmark_aod_plots, make_benchmark_mass_tables, \ make_benchmark_operations_budget, make_benchmark_aerosol_tables -from gcpy.benchmark.modules.benchmark_utils import print_benchmark_info +from gcpy.benchmark.modules.benchmark_utils import \ + gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gchp_dirs, \ + get_log_filepaths, print_benchmark_info from gcpy.benchmark.modules.benchmark_models_vs_obs \ import make_benchmark_models_vs_obs_plots from gcpy.benchmark.modules.benchmark_models_vs_sondes \ import make_benchmark_models_vs_sondes_plots -#TODO: Peel out routines from benchmark_funcs.py into smaller -# routines in the gcpy/benchmark/modules folder, such as these: from gcpy.benchmark.modules.benchmark_drydep \ import drydepvel_species, make_benchmark_drydep_plots +from gcpy.benchmark.modules.benchmark_scrape_gcclassic_timers import \ + make_benchmark_gcclassic_timing_table +from gcpy.benchmark.modules.benchmark_scrape_gchp_timers import \ + make_benchmark_gchp_timing_table # Tell matplotlib not to look for an X-window os.environ["QT_QPA_PLATFORM"] = "offscreen" @@ -107,73 +113,25 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # ====================================================================== # Diagnostics file directory paths - gcc_vs_gcc_refdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gcc"]["dir"], - config["data"]["ref"]["gcc"]["outputs_subdir"], - ) - gcc_vs_gcc_devdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["outputs_subdir"], - ) - gchp_vs_gcc_refdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["outputs_subdir"], - ) - gchp_vs_gcc_devdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["outputs_subdir"], - ) - gchp_vs_gchp_refdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gchp"]["dir"], - config["data"]["ref"]["gchp"]["outputs_subdir"], - ) - gchp_vs_gchp_devdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["outputs_subdir"], - ) + s = "outputs_subdir" + gcc_vs_gcc_refdir, gcc_vs_gcc_devdir = gcc_vs_gcc_dirs(config, s) + gchp_vs_gcc_refdir, gchp_vs_gcc_devdir = gchp_vs_gcc_dirs(config, s) + gchp_vs_gchp_refdir, gchp_vs_gchp_devdir = gchp_vs_gchp_dirs(config, s) # Restart file directory paths - gcc_vs_gcc_refrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gcc"]["dir"], - config["data"]["ref"]["gcc"]["restarts_subdir"] - ) - gcc_vs_gcc_devrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["restarts_subdir"] - ) - gchp_vs_gcc_refrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["restarts_subdir"] - ) - gchp_vs_gcc_devrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["restarts_subdir"] - ) - gchp_vs_gchp_refrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gchp"]["dir"], - config["data"]["ref"]["gchp"]["restarts_subdir"] - ) - gchp_vs_gchp_devrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["restarts_subdir"] - ) + s = "restarts_subdir" + gcc_vs_gcc_refrstdir, gcc_vs_gcc_devrstdir = gcc_vs_gcc_dirs(config, s) + gchp_vs_gcc_refrstdir, gchp_vs_gcc_devrstdir = gchp_vs_gcc_dirs(config, s) + gchp_vs_gchp_refrstdir, gchp_vs_gchp_devrstdir = gchp_vs_gchp_dirs(config, s) + + # Restart file directory paths + s = "logs_subdir" + gcc_vs_gcc_reflogdir, gcc_vs_gcc_devlogdir = gcc_vs_gcc_dirs(config, s) + gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s) + gchp_vs_gchp_reflogdir, gchp_vs_gchp_devlogdir = gchp_vs_gchp_dirs(config, s) # Directories where plots & tables will be created - mainresultsdir = os.path.join( - config["paths"]["results_dir"] - ) + mainresultsdir = os.path.join(config["paths"]["results_dir"]) gcc_vs_gcc_resultsdir = os.path.join( mainresultsdir, config["options"]["comparisons"]["gcc_vs_gcc"]["dir"] @@ -236,15 +194,10 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): #gchp_vs_gchp_budgetdir = os.path.join(gchp_vs_gchp_resultsdir, "Budget") # Models vs. observations directories - gcc_vs_gcc_models_vs_obs_dir = os.path.join( - gcc_vs_gcc_resultsdir, "ModelVsObs" - ) - gchp_vs_gcc_models_vs_obs_dir = os.path.join( - gchp_vs_gcc_resultsdir, "ModelVsObs" - ) - gchp_vs_gchp_models_vs_obs_dir = os.path.join( - gchp_vs_gchp_resultsdir, "ModelVsObs" - ) + s = "ModelVsObs" + gcc_vs_gcc_models_vs_obs_dir = os.path.join(gcc_vs_gcc_resultsdir, s) + gchp_vs_gcc_models_vs_obs_dir = os.path.join(gchp_vs_gcc_resultsdir, s) + gchp_vs_gchp_models_vs_obs_dir = os.path.join(gchp_vs_gchp_resultsdir, s) # ====================================================================== # Plot title strings @@ -866,6 +819,34 @@ def gcc_vs_gcc_ops_budg(mon): overwrite=True, ) + # ================================================================== + # GCC vs. GCC Benchmark Timing Table + # ================================================================== + if config["options"]["outputs"]["timing_table"]: + print("\n%%% Creating GCC vs. GCC Benchmark Timing table %%%") + + # Filepaths + ref = get_log_filepaths( + gcc_vs_gcc_reflogdir, + config["data"]["ref"]["gcc"]["logs_template"], + all_months_ref + ) + dev = get_log_filepaths( + gcc_vs_gcc_devlogdir, + config["data"]["dev"]["gcc"]["logs_template"], + all_months_dev + ) + + # Create the table + make_benchmark_gcclassic_timing_table( + ref, + config["data"]["ref"]["gcc"]["version"], + dev, + config["data"]["dev"]["gcc"]["version"], + dst=gcc_vs_gcc_tablesdir, + overwrite=True, + ) + # ================================================================== # GCC vs GCC Global mean OH, MCF Lifetime, CH4 Lifetime # ================================================================== @@ -2224,6 +2205,34 @@ def gchp_vs_gchp_ops_budg(mon): if config["options"]["outputs"]["ste_table"]: print("\n%%% Skipping GCHP vs. GCHP Strat-Trop Exchange table %%%") + # ================================================================== + # GCHP vs. GCHP Benchmark Timing Table + # ================================================================== + if config["options"]["outputs"]["timing_table"]: + print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%") + + # Filepaths + ref = get_log_filepaths( + gchp_vs_gchp_reflogdir, + config["data"]["ref"]["gchp"]["logs_template"], + all_months_ref, + )[0] + dev = get_log_filepaths( + gchp_vs_gchp_devlogdir, + config["data"]["dev"]["gchp"]["logs_template"], + all_months_dev, + )[0] + + # Create the table + make_benchmark_gchp_timing_table( + ref, + config["data"]["ref"]["gchp"]["version"], + dev, + config["data"]["dev"]["gchp"]["version"], + dst=gcc_vs_gcc_tablesdir, + overwrite=True, + ) + # ================================================================== # GCHP vs GCHP Model vs. Observations plots # ================================================================== @@ -2231,6 +2240,8 @@ def gchp_vs_gchp_ops_budg(mon): print("\n%%% Creating GCHP vs. GCHP models vs. obs. plots %%%") # Filepaths + # NOTE: If the GCHP benchmark is done in one-shot + # then you need the [0] after the call to get_filepaths. ref = get_filepaths( gchp_vs_gchp_refdir, "SpeciesConc", @@ -2361,4 +2372,4 @@ def gchp_vs_gchp_ops_budg(mon): # ================================================================== # Print a message indicating that the benchmarks finished # ================================================================== - print("\n %%%% All requested benchmark plots/tables created! %%%%") + print("\n%%%% All requested benchmark plots/tables created! %%%%") From 6f8e0497b1dbfe4e92f8c632ed2395b69647da45 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 10 May 2024 12:19:50 -0400 Subject: [PATCH 36/43] Add percent difference column to timing tables gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py - In function print_timer: - Compute % difference between ref & dev (or assign NaN if it would result in a div-by-zero) - Decrease width of timer name column from 25 to 22 spaces - Change format of Ref & Dev columns from 20.3f to 18.3f - Add percent diff column as 12.3e format (right-aligned) - In function display_timers: - Change width of column headers accordingly Signed-off-by: Bob Yantosca --- .../modules/benchmark_scrape_gcclassic_timers.py | 8 ++++++-- .../modules/benchmark_scrape_gchp_timers.py | 13 ++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py index 10491f2d..7560076c 100644 --- a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py +++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py @@ -5,6 +5,7 @@ """ import os import json +import numpy as np from gcpy.util import make_directory, replace_whitespace, verify_variable_type @@ -173,7 +174,10 @@ def print_timer(key, ref, dev, ofile): dev : dict : Timing information from the "Dev" model ofile : file : File object where info will be written """ - line = f"{key:<25} {ref[key]:>20.3f} {dev[key]:>20.3f}" + pctdiff = np.nan + if np.abs(ref[key] > 0.0): + pctdiff = ((dev[key] - ref[key]) / ref[key]) * 100.0 + line = f"{key:<22} {ref[key]:>18.3f} {dev[key]:>18.3f} {pctdiff:>12.3e}" print(line, file=ofile) @@ -199,7 +203,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file): print(f"%%% Dev = {dev_label}", file=ofile) print("%"*79, file=ofile) print("\n", file=ofile) - print(f"{'Timer':<25} {'Ref [s]':>20} {'Dev [s]':>20}", file=ofile) + print(f"{'Timer':<22} {'Ref [s]':>18} {'Dev [s]':>18} {'% Diff':>12}", file=ofile) print("-"*79, file=ofile) # Print timers diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py index 1253b1f3..01368432 100644 --- a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py +++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py @@ -4,6 +4,7 @@ more text files. """ import os +import numpy as np from gcpy.util import make_directory, replace_whitespace, verify_variable_type @@ -251,7 +252,11 @@ def print_timer(key, ref, dev, ofile): label = "--"*depth + key.split(".")[-1] # Line to print - line = f"{label:<25} {ref[key]:>20.3f} {dev[key]:>20.3f}" + pctdiff = np.nan + if np.abs(ref[key] > 0.0): + pctdiff = ((dev[key] - ref[key]) / ref[key]) * 100.0 + line = \ + f"{label:<22} {ref[key]:>18.3f} {dev[key]:>18.3f} {pctdiff:>12.3e}" print(line, file=ofile) @@ -278,8 +283,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file): # GCHPchem timers print("\n", file=ofile) - print(f"{'GCHPchem Timer':<25} {'Ref [s]':>20} {'Dev [s]':>20}", - file=ofile) + print(f"{'GCHPchem Timer':<22} {'Ref [s]':>18} {'Dev [s]':>18} {'% Diff':>12}", file=ofile) print("-"*79, file=ofile) for key in dev: if key.startswith("GCHPchem"): @@ -287,8 +291,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file): # Summary timers print("\n", file=ofile) - print(f"{'Summary':<25} {'Ref [s]':>20} {'Dev [s]':>20}", - file=ofile) + print(f"{'Summary':<22} {'Ref [s]':>18} {'Dev [s]':>18} {'% Diff':>12}", file=ofile) print("-"*79, file=ofile) for key in dev: if key.startswith("All"): From efafa95d69c65b06eb935e8528c38ede9dda0dc8 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 10 May 2024 12:34:50 -0400 Subject: [PATCH 37/43] Update benchmark scripts & config files to add timing table output gcpy/benchmark/cloud/template.1hr_benchmark.yml gcpy/benchmark/cloud/template.1mo_benchmark.yml gcpy/benchmark/config/1mo_benchmark.yml gcpy/benchmark/config/1yr_fullchem_benchmark.yml gcpy/benchmark/config/1yr_tt_benchmark.yml - Add "logs_subdir" and "logs_template" tags to GCC/GCHP Ref & Dev - Add "timing_table" to "outputs" section CHANGELOG.md - Updated accordingly gcpy/benchmark/run_benchmark.py gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py gcpy/benchmark/modules/run_1yr_tt_benchmark.py - Import & call make_benchmark_gcclassic_scrape_timers to create GCClassic vs. GCClassic timing information table Import & call make_benchmark_gchp_scrape_timers to create GCHP vs. GCHP timing information table Signed-off-by: Bob Yantosca --- CHANGELOG.md | 1 + .../cloud/template.1hr_benchmark.yml | 9 ++ .../cloud/template.1mo_benchmark.yml | 9 ++ gcpy/benchmark/config/1mo_benchmark.yml | 9 ++ .../config/1yr_fullchem_benchmark.yml | 9 ++ gcpy/benchmark/config/1yr_tt_benchmark.yml | 9 ++ .../modules/run_1yr_fullchem_benchmark.py | 13 +- .../benchmark/modules/run_1yr_tt_benchmark.py | 144 ++++++++++-------- gcpy/benchmark/run_benchmark.py | 139 +++++++++-------- 9 files changed, 212 insertions(+), 130 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cb203e9..b6ca2182 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Create radionuclide, STE flux, and mass conservation tables for Ref and Dev versions in TransportTracers benchmarks - Use new function `copy_file_to_dir` to copy the benchmark script and configuration file to the benchmark results folders - Updated GitHub stalebot config file `stale.yml` with new issue/PR labels that should not go stale +- Updated benchmark driver scripts and config files to print GCClassic & GCHP timing information ### Fixed - CS inquiry functions in `gcpy/cstools.py` now work properly for `xr.Dataset` and `xr.DataArray` objects diff --git a/gcpy/benchmark/cloud/template.1hr_benchmark.yml b/gcpy/benchmark/cloud/template.1hr_benchmark.yml index 6ec5cab0..3bb1a676 100644 --- a/gcpy/benchmark/cloud/template.1hr_benchmark.yml +++ b/gcpy/benchmark/cloud/template.1hr_benchmark.yml @@ -47,6 +47,8 @@ data: dir: ref-gcc/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: OutputDir + logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-07-01T01:00:00" gchp: @@ -54,6 +56,8 @@ data: dir: ref-gchp/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: OutputDir + logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-07-01T01:00:00" is_pre_14.0: False @@ -64,6 +68,8 @@ data: dir: dev-gcc/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: OutputDir + logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-07-01T01:00:00" gchp: @@ -71,6 +77,8 @@ data: dir: dev-gchp/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: OutputDir + logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-07-01T01:00:00" is_pre_14.0: False @@ -117,6 +125,7 @@ options: ops_budget_table: False OH_metrics: True ste_table: True # GCC only + timing_table: True summary_table: True plot_options: by_spc_cat: True diff --git a/gcpy/benchmark/cloud/template.1mo_benchmark.yml b/gcpy/benchmark/cloud/template.1mo_benchmark.yml index a8dad2e0..ff431932 100644 --- a/gcpy/benchmark/cloud/template.1mo_benchmark.yml +++ b/gcpy/benchmark/cloud/template.1mo_benchmark.yml @@ -47,6 +47,8 @@ data: dir: ref-gcc/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: OutputDir + logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" gchp: @@ -54,6 +56,8 @@ data: dir: ref-gchp/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: OutputDir + logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" is_pre_14.0: False @@ -64,6 +68,8 @@ data: dir: dev-gcc/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: OutputDir + logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" gchp: @@ -71,6 +77,8 @@ data: dir: dev-gchp/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: OutputDir + logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" is_pre_14.0: False @@ -117,6 +125,7 @@ options: ops_budget_table: False OH_metrics: True ste_table: True # GCC only + timing_table: True summary_table: True plot_options: by_spc_cat: True diff --git a/gcpy/benchmark/config/1mo_benchmark.yml b/gcpy/benchmark/config/1mo_benchmark.yml index 65f84b0a..173ed8eb 100644 --- a/gcpy/benchmark/config/1mo_benchmark.yml +++ b/gcpy/benchmark/config/1mo_benchmark.yml @@ -47,6 +47,8 @@ data: dir: GCC_ref outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: . + logs_template: "log.%Y%m%d" bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" gchp: @@ -54,6 +56,8 @@ data: dir: GCHP_ref outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: . + logs_template: "gchp.%Y%m%d_0000z.log" bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" is_pre_14.0: False @@ -64,6 +68,8 @@ data: dir: GCC_dev outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: . + logs_template: "log.%Y%m%d" bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" gchp: @@ -71,6 +77,8 @@ data: dir: GCHP_dev outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: Logs + logs_template: "gchp.%Y%m%d_0000z.log" bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" is_pre_14.0: False @@ -117,6 +125,7 @@ options: ops_budget_table: False OH_metrics: True ste_table: True # GCC only + timing_table: True summary_table: True plot_options: by_spc_cat: True diff --git a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml index aae845ac..6fbf1869 100644 --- a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml +++ b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml @@ -59,6 +59,8 @@ data: dir: GCC_ref outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: Logs + logs_template: "log.%Y%m%d" bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" gchp: @@ -66,6 +68,8 @@ data: dir: GCHP_ref outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: Logs + logs_template: "gchp.%Y%m%d_0000z.log" bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" is_pre_14.0: False @@ -76,6 +80,8 @@ data: dir: GCC_dev outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: Logs + logs_template: "log.%Y%m%d" bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" gchp: @@ -83,6 +89,8 @@ data: dir: GCHP_dev outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: Logs + logs_template: "gchp.%Y%m%d_0000z.log" bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" is_pre_14.0: False @@ -130,6 +138,7 @@ options: Ox_budget_table: True ste_table: True # GCC only OH_metrics: True + timing_table: True plot_models_vs_obs: True plot_options: by_spc_cat: True diff --git a/gcpy/benchmark/config/1yr_tt_benchmark.yml b/gcpy/benchmark/config/1yr_tt_benchmark.yml index 1f631cf1..c2809c0b 100644 --- a/gcpy/benchmark/config/1yr_tt_benchmark.yml +++ b/gcpy/benchmark/config/1yr_tt_benchmark.yml @@ -48,6 +48,8 @@ data: dir: GCC_ref outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: Logs + logs_template: "log.%Y%m%d" bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" gchp: @@ -55,6 +57,8 @@ data: dir: GCHP_ref outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: Logs + logs_template: "gchp.%Y%m%d_0000z.log" bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" is_pre_14.0: False @@ -65,6 +69,8 @@ data: dir: GCC_dev outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: Logs + logs_template: "log.%Y%m%d" bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" gchp: @@ -72,6 +78,8 @@ data: dir: GCHP_dev outputs_subdir: OutputDir restarts_subdir: Restarts + logs_subdir: Logs + logs_template: "gchp.%Y%m%d_0000z.log" bmk_start: "2019-01-01T00:00:00" bmk_end: "2020-01-01T00:00:00" is_pre_14.0: False @@ -115,6 +123,7 @@ options: mass_table: True ste_table: True cons_table: True + timing_table: False # # n_cores: Specify the number of cores to use. # -1: Use $OMP_NUM_THREADS cores diff --git a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py index d375cdaf..68d42dd0 100644 --- a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py @@ -85,6 +85,7 @@ # Tell matplotlib not to look for an X-window os.environ["QT_QPA_PLATFORM"] = "offscreen" + # Suppress annoying warning messages warnings.filterwarnings("ignore", category=RuntimeWarning) warnings.filterwarnings("ignore", category=UserWarning) @@ -124,7 +125,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): gchp_vs_gcc_refrstdir, gchp_vs_gcc_devrstdir = gchp_vs_gcc_dirs(config, s) gchp_vs_gchp_refrstdir, gchp_vs_gchp_devrstdir = gchp_vs_gchp_dirs(config, s) - # Restart file directory paths + # Log file directory paths s = "logs_subdir" gcc_vs_gcc_reflogdir, gcc_vs_gcc_devlogdir = gcc_vs_gcc_dirs(config, s) gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s) @@ -1575,7 +1576,7 @@ def gchp_vs_gcc_ops_budg(mon): if config["options"]["comparisons"]["gchp_vs_gchp"]["run"]: # ================================================================== - # GCHP vs GCC filepaths for StateMet collection data + # GCHP vs GCHP filepaths for StateMet collection data # ================================================================== refmet = get_filepaths( gchp_vs_gchp_refdir, @@ -2212,15 +2213,17 @@ def gchp_vs_gchp_ops_budg(mon): print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%") # Filepaths + # NOTE: Usually the GCHP 1-yr benchmark is run as + # one job, so we only need to take the 1st log file. ref = get_log_filepaths( gchp_vs_gchp_reflogdir, config["data"]["ref"]["gchp"]["logs_template"], - all_months_ref, + all_months_gchp_ref, )[0] dev = get_log_filepaths( gchp_vs_gchp_devlogdir, config["data"]["dev"]["gchp"]["logs_template"], - all_months_dev, + all_months_gchp_dev, )[0] # Create the table @@ -2229,7 +2232,7 @@ def gchp_vs_gchp_ops_budg(mon): config["data"]["ref"]["gchp"]["version"], dev, config["data"]["dev"]["gchp"]["version"], - dst=gcc_vs_gcc_tablesdir, + dst=gchp_vs_gchp_tablesdir, overwrite=True, ) diff --git a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py index b94055bd..bc5f58fe 100644 --- a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py @@ -64,7 +64,13 @@ make_benchmark_operations_budget, make_benchmark_mass_conservation_table from gcpy.benchmark.modules.budget_tt import transport_tracers_budgets from gcpy.benchmark.modules.ste_flux import make_benchmark_ste_table -from gcpy.benchmark.modules.benchmark_utils import print_benchmark_info +from gcpy.benchmark.modules.benchmark_utils import \ + gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gchp_dirs, \ + get_log_filepaths, print_benchmark_info +from gcpy.benchmark.modules.benchmark_scrape_gcclassic_timers import \ + make_benchmark_gcclassic_timing_table +from gcpy.benchmark.modules.benchmark_scrape_gchp_timers import \ + make_benchmark_gchp_timing_table # Tell matplotlib not to look for an X-window os.environ["QT_QPA_PLATFORM"] = "offscreen" @@ -93,69 +99,23 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # For gchp_vs_gcc_refdir use config["data"]["dev"]["gcc"]["version"], not ref (mps, 6/27/19) # ====================================================================== - # Diagnostic file directory paths - gcc_vs_gcc_refdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gcc"]["dir"], - config["data"]["ref"]["gcc"]["outputs_subdir"] - ) - gcc_vs_gcc_devdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["outputs_subdir"] - ) - gchp_vs_gcc_refdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["outputs_subdir"] - ) - gchp_vs_gcc_devdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["outputs_subdir"] - ) - gchp_vs_gchp_refdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gchp"]["dir"], - config["data"]["ref"]["gchp"]["outputs_subdir"] - ) - gchp_vs_gchp_devdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["outputs_subdir"] - ) + # Diagnostics file directory paths + s = "outputs_subdir" + gcc_vs_gcc_refdir, gcc_vs_gcc_devdir = gcc_vs_gcc_dirs(config, s) + gchp_vs_gcc_refdir, gchp_vs_gcc_devdir = gchp_vs_gcc_dirs(config, s) + gchp_vs_gchp_refdir, gchp_vs_gchp_devdir = gchp_vs_gchp_dirs(config, s) - # Diagnostic file directory paths - gcc_vs_gcc_refrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gcc"]["dir"], - config["data"]["ref"]["gcc"]["restarts_subdir"] - ) - gchp_vs_gcc_refrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["restarts_subdir"] - ) - gchp_vs_gchp_refrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gchp"]["dir"], - config["data"]["ref"]["gchp"]["restarts_subdir"] - ) - gcc_vs_gcc_devrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["restarts_subdir"] - ) - gchp_vs_gcc_devrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["restarts_subdir"] - ) - gchp_vs_gchp_devrstdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["restarts_subdir"] - ) + # Restart file directory paths + s = "restarts_subdir" + gcc_vs_gcc_refrstdir, gcc_vs_gcc_devrstdir = gcc_vs_gcc_dirs(config, s) + gchp_vs_gcc_refrstdir, gchp_vs_gcc_devrstdir = gchp_vs_gcc_dirs(config, s) + gchp_vs_gchp_refrstdir, gchp_vs_gchp_devrstdir = gchp_vs_gchp_dirs(config, s) + + # Log file directory paths + s = "logs_subdir" + gcc_vs_gcc_reflogdir, gcc_vs_gcc_devlogdir = gcc_vs_gcc_dirs(config, s) + gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s) + gchp_vs_gchp_reflogdir, gchp_vs_gchp_devlogdir = gchp_vs_gchp_dirs(config, s) # Directories where plots & tables will be created mainresultsdir = os.path.join( @@ -611,6 +571,34 @@ def gcc_vs_gcc_mass_table(mon): overwrite=True, ) + # ================================================================== + # GCC vs. GCC Benchmark Timing Table + # ================================================================== + if config["options"]["outputs"]["timing_table"]: + print("\n%%% Creating GCC vs. GCC Benchmark Timing table %%%") + + # Filepaths + ref = get_log_filepaths( + gcc_vs_gcc_reflogdir, + config["data"]["ref"]["gcc"]["logs_template"], + all_months_ref + ) + dev = get_log_filepaths( + gcc_vs_gcc_devlogdir, + config["data"]["dev"]["gcc"]["logs_template"], + all_months_dev + ) + + # Create the table + make_benchmark_gcclassic_timing_table( + ref, + config["data"]["ref"]["gcc"]["version"], + dev, + config["data"]["dev"]["gcc"]["version"], + dst=gcc_vs_gcc_tablesdir, + overwrite=True, + ) + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCC benchmark plots and tables # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1242,6 +1230,36 @@ def gchp_vs_gchp_mass_table(mon): dst=gchp_vs_gchp_tablesdir, ) + # ================================================================== + # GCHP vs. GCHP Benchmark Timing Table + # ================================================================== + if config["options"]["outputs"]["timing_table"]: + print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%") + + # Filepaths + # NOTE: Usually the GCHP 1-yr benchmark is run as + # one job, so we only need to take the 1st log file. + ref = get_log_filepaths( + gchp_vs_gchp_reflogdir, + config["data"]["ref"]["gchp"]["logs_template"], + all_months_gchp_ref, + )[0] + dev = get_log_filepaths( + gchp_vs_gchp_devlogdir, + config["data"]["dev"]["gchp"]["logs_template"], + all_months_gchp_dev, + )[0] + + # Create the table + make_benchmark_gchp_timing_table( + ref, + config["data"]["ref"]["gchp"]["version"], + dev, + config["data"]["dev"]["gchp"]["version"], + dst=gchp_vs_gchp_tablesdir, + overwrite=True, + ) + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create mass conservations tables for GCC and GCHP # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/gcpy/benchmark/run_benchmark.py b/gcpy/benchmark/run_benchmark.py index e474f2d7..0105d63f 100755 --- a/gcpy/benchmark/run_benchmark.py +++ b/gcpy/benchmark/run_benchmark.py @@ -63,9 +63,15 @@ import run_benchmark as run_1yr_benchmark from gcpy.benchmark.modules.run_1yr_tt_benchmark \ import run_benchmark as run_1yr_tt_benchmark -from gcpy.benchmark.modules.benchmark_utils import print_benchmark_info +from gcpy.benchmark.modules.benchmark_utils import \ + gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gchp_dirs, \ + get_log_filepaths, print_benchmark_info from gcpy.benchmark.modules.benchmark_drydep \ import drydepvel_species, make_benchmark_drydep_plots +from gcpy.benchmark.modules.benchmark_scrape_gcclassic_timers import \ + make_benchmark_gcclassic_timing_table +from gcpy.benchmark.modules.benchmark_scrape_gchp_timers import \ + make_benchmark_gchp_timing_table # Tell matplotlib not to look for an X-window os.environ["QT_QPA_PLATFORM"] = "offscreen" @@ -135,69 +141,23 @@ def run_benchmark_default(config): # not ref (mps, 6/27/19) # ===================================================================== - # Diagnostic file directory paths - gcc_vs_gcc_refdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gcc"]["dir"], - config["data"]["ref"]["gcc"]["outputs_subdir"], - ) - gcc_vs_gcc_devdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["outputs_subdir"], - ) - gchp_vs_gcc_refdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["outputs_subdir"], - ) - gchp_vs_gcc_devdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["outputs_subdir"], - ) - gchp_vs_gchp_refdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gchp"]["dir"], - config["data"]["ref"]["gchp"]["outputs_subdir"], - ) - gchp_vs_gchp_devdir = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["outputs_subdir"], - ) + # Diagnostics file directory paths + s = "outputs_subdir" + gcc_vs_gcc_refdir, gcc_vs_gcc_devdir = gcc_vs_gcc_dirs(config, s) + gchp_vs_gcc_refdir, gchp_vs_gcc_devdir = gchp_vs_gcc_dirs(config, s) + gchp_vs_gchp_refdir, gchp_vs_gchp_devdir = gchp_vs_gchp_dirs(config, s) # Restart file directory paths - gcc_vs_gcc_refrst = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gcc"]["dir"], - config["data"]["ref"]["gcc"]["restarts_subdir"] - ) - gcc_vs_gcc_devrst = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["restarts_subdir"] - ) - gchp_vs_gcc_refrst = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gcc"]["dir"], - config["data"]["dev"]["gcc"]["restarts_subdir"] - ) - gchp_vs_gcc_devrst = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["restarts_subdir"] - ) - gchp_vs_gchp_refrst = os.path.join( - config["paths"]["main_dir"], - config["data"]["ref"]["gchp"]["dir"], - config["data"]["ref"]["gchp"]["restarts_subdir"] - ) - gchp_vs_gchp_devrst = os.path.join( - config["paths"]["main_dir"], - config["data"]["dev"]["gchp"]["dir"], - config["data"]["dev"]["gchp"]["restarts_subdir"] - ) + s = "restarts_subdir" + gcc_vs_gcc_refrstdir, gcc_vs_gcc_devrstdir = gcc_vs_gcc_dirs(config, s) + gchp_vs_gcc_refrstdir, gchp_vs_gcc_devrstdir = gchp_vs_gcc_dirs(config, s) + gchp_vs_gchp_refrstdir, gchp_vs_gchp_devrstdir = gchp_vs_gchp_dirs(config, s) + + # Log file directory paths + s = "logs_subdir" + gcc_vs_gcc_reflogdir, gcc_vs_gcc_devlogdir = gcc_vs_gcc_dirs(config, s) + gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s) + gchp_vs_gchp_reflogdir, gchp_vs_gchp_devlogdir = gchp_vs_gchp_dirs(config, s) # ===================================================================== # Benchmark output directories @@ -666,6 +626,34 @@ def run_benchmark_default(config): month=gcc_dev_date.astype(datetime).month, ) + # ================================================================== + # GCC vs. GCC Benchmark Timing Table + # ================================================================== + if config["options"]["outputs"]["timing_table"]: + print("\n%%% Creating GCC vs. GCC Benchmark Timing table %%%") + + # Filepaths + ref = get_log_filepaths( + gcc_vs_gcc_reflogdir, + config["data"]["ref"]["gcc"]["logs_template"], + gcc_ref_date, + ) + dev = get_log_filepaths( + gcc_vs_gcc_devlogdir, + config["data"]["dev"]["gcc"]["logs_template"], + gcc_dev_date, + ) + + # Create the table + make_benchmark_gcclassic_timing_table( + ref, + config["data"]["ref"]["gcc"]["version"], + dev, + config["data"]["dev"]["gcc"]["version"], + dst=gcc_vs_gcc_tablesdir, + overwrite=True, + ) + # ================================================================== # GCC vs. GCC summary table # ================================================================== @@ -1094,6 +1082,33 @@ def run_benchmark_default(config): title = "\n%%% Skipping GCHP vs. GCC Strat-Trop Exchange table %%%" print(title) + # ================================================================== + # GCHP vs. GCHP Benchmark Timing Table + # ================================================================== + if config["options"]["outputs"]["timing_table"]: + print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%") + + # Filepaths + ref = get_log_filepaths( + gchp_vs_gchp_reflogdir, + config["data"]["ref"]["gchp"]["logs_template"], + gchp_ref_date, + ) + dev = get_log_filepaths( + gchp_vs_gchp_devlogdir, + config["data"]["dev"]["gchp"]["logs_template"], + gchp_dev_date, + ) + + # Create the table + make_benchmark_gchp_timing_table( + ref, + config["data"]["ref"]["gchp"]["version"], + dev, + config["data"]["dev"]["gchp"]["version"], + dst=gchp_vs_gchp_tablesdir, + overwrite=True, + ) # ================================================================== # GCHP vs. GCC summary table From e8e13c08f702bc953a37572340195be194473df1 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 10 May 2024 14:58:52 -0400 Subject: [PATCH 38/43] Add sevderal fixes for benchmark timing tables gcpy/benchmark/cloud/template.1hr_benchmark.yml gcpy/benchmark/cloud/template.1mo_benchmark.yml - Change logs_subdir YAML tag to ".", since the log file is placed in the run directory but not in the OutputDir subdirectory gcpy/benchmark/modules/benchmark_utils.py - Place the "timestamps" argument in a list if there is only one timestamp before trying to iterate over it with a for loop. gcpy/benchmark/run_benchmark.py - We had mistakely placed the GCHP timing table in the GCHP vs. GCC section. Move this to the GCHP vs. GCHP section. --- .../cloud/template.1hr_benchmark.yml | 8 +-- .../cloud/template.1mo_benchmark.yml | 8 +-- gcpy/benchmark/modules/benchmark_utils.py | 5 ++ gcpy/benchmark/run_benchmark.py | 56 +++++++++---------- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/gcpy/benchmark/cloud/template.1hr_benchmark.yml b/gcpy/benchmark/cloud/template.1hr_benchmark.yml index 3bb1a676..8daf5bad 100644 --- a/gcpy/benchmark/cloud/template.1hr_benchmark.yml +++ b/gcpy/benchmark/cloud/template.1hr_benchmark.yml @@ -47,7 +47,7 @@ data: dir: ref-gcc/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts - logs_subdir: OutputDir + logs_subdir: . logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-07-01T01:00:00" @@ -56,7 +56,7 @@ data: dir: ref-gchp/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts - logs_subdir: OutputDir + logs_subdir: . logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-07-01T01:00:00" @@ -68,7 +68,7 @@ data: dir: dev-gcc/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts - logs_subdir: OutputDir + logs_subdir: . logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-07-01T01:00:00" @@ -77,7 +77,7 @@ data: dir: dev-gchp/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts - logs_subdir: OutputDir + logs_subdir: . logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-07-01T01:00:00" diff --git a/gcpy/benchmark/cloud/template.1mo_benchmark.yml b/gcpy/benchmark/cloud/template.1mo_benchmark.yml index ff431932..b0098fad 100644 --- a/gcpy/benchmark/cloud/template.1mo_benchmark.yml +++ b/gcpy/benchmark/cloud/template.1mo_benchmark.yml @@ -47,7 +47,7 @@ data: dir: ref-gcc/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts - logs_subdir: OutputDir + logs_subdir: . logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" @@ -56,7 +56,7 @@ data: dir: ref-gchp/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts - logs_subdir: OutputDir + logs_subdir: . logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" @@ -68,7 +68,7 @@ data: dir: dev-gcc/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts - logs_subdir: OutputDir + logs_subdir: . logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" @@ -77,7 +77,7 @@ data: dir: dev-gchp/run-directory outputs_subdir: OutputDir restarts_subdir: Restarts - logs_subdir: OutputDir + logs_subdir: . logs_template: runlog.txt bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" diff --git a/gcpy/benchmark/modules/benchmark_utils.py b/gcpy/benchmark/modules/benchmark_utils.py index 912714a9..e4e49c5d 100644 --- a/gcpy/benchmark/modules/benchmark_utils.py +++ b/gcpy/benchmark/modules/benchmark_utils.py @@ -636,6 +636,11 @@ def get_log_filepaths( if fmt in template: format_str += fmt + # If there is only one timestamp, add it to a list + # so that the for loop below will work properly. + if timestamps.size == 1: + timestamps = [timestamps] + # Create each output logfile name, replacing template with date for timestamp in timestamps: time = timestamp.item().strftime(format_str) diff --git a/gcpy/benchmark/run_benchmark.py b/gcpy/benchmark/run_benchmark.py index 0105d63f..0482c3fd 100755 --- a/gcpy/benchmark/run_benchmark.py +++ b/gcpy/benchmark/run_benchmark.py @@ -1082,34 +1082,6 @@ def run_benchmark_default(config): title = "\n%%% Skipping GCHP vs. GCC Strat-Trop Exchange table %%%" print(title) - # ================================================================== - # GCHP vs. GCHP Benchmark Timing Table - # ================================================================== - if config["options"]["outputs"]["timing_table"]: - print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%") - - # Filepaths - ref = get_log_filepaths( - gchp_vs_gchp_reflogdir, - config["data"]["ref"]["gchp"]["logs_template"], - gchp_ref_date, - ) - dev = get_log_filepaths( - gchp_vs_gchp_devlogdir, - config["data"]["dev"]["gchp"]["logs_template"], - gchp_dev_date, - ) - - # Create the table - make_benchmark_gchp_timing_table( - ref, - config["data"]["ref"]["gchp"]["version"], - dev, - config["data"]["dev"]["gchp"]["version"], - dst=gchp_vs_gchp_tablesdir, - overwrite=True, - ) - # ================================================================== # GCHP vs. GCC summary table # ================================================================== @@ -1589,6 +1561,34 @@ def run_benchmark_default(config): if config["options"]["outputs"]["ste_table"]: print("\n%%% Skipping GCHP vs. GCHP Strat-Trop Exchange table %%%") + # ================================================================== + # GCHP vs. GCHP Benchmark Timing Table + # ================================================================== + if config["options"]["outputs"]["timing_table"]: + print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%") + + # Filepaths + ref = get_log_filepaths( + gchp_vs_gchp_reflogdir, + config["data"]["ref"]["gchp"]["logs_template"], + gchp_ref_date, + ) + dev = get_log_filepaths( + gchp_vs_gchp_devlogdir, + config["data"]["dev"]["gchp"]["logs_template"], + gchp_dev_date, + ) + + # Create the table + make_benchmark_gchp_timing_table( + ref, + config["data"]["ref"]["gchp"]["version"], + dev, + config["data"]["dev"]["gchp"]["version"], + dst=gchp_vs_gchp_tablesdir, + overwrite=True, + ) + # ================================================================== # GCHP vs. GCHP summary table # ================================================================== From d10b09ada44b3112682aa02b9aacba0e4031e866 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 10 May 2024 16:02:44 -0400 Subject: [PATCH 39/43] More minor fixes for benchmark timing scripts gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py - Set timers that did not run to np.nan before trying to parse timing information gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py - Fixed typo: "GCHP Classic" -> "GCHP" Signed-off-by: Bob Yantosca --- .../modules/benchmark_scrape_gcclassic_timers.py | 5 ++++- gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py | 9 ++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py index 7560076c..a12f088e 100644 --- a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py +++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py @@ -121,7 +121,10 @@ def read_one_text_file(text_file): if keep_line: substr = line.split(":") key = substr[0].strip() - val = substr[3].split()[1].strip() + if "THE TIMER DID NOT RUN" in line: + val = np.nan + else: + val = substr[3].split()[1].strip() timers[key] = {"seconds": val} return timers diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py index 01368432..9fd74dd9 100644 --- a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py +++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py @@ -262,7 +262,7 @@ def print_timer(key, ref, dev, ofile): def display_timers(ref, ref_label, dev, dev_label, table_file): """ - Prints the GEOS-Che timer information to a table. + Prints the GCHP timer information to a table. Args ref : dict : Timing information from the "Ref" model @@ -275,7 +275,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file): # Print header print("%"*79, file=ofile) - print("%%% GCHP Classic Benchmark Timing Information", file=ofile) + print("%%% GCHP Benchmark Timing Information", file=ofile) print("%%%", file=ofile) print(f"%%% Ref = {ref_label}", file=ofile) print(f"%%% Dev = {dev_label}", file=ofile) @@ -307,9 +307,8 @@ def make_benchmark_gchp_timing_table( overwrite=False, ): """ - Creates a table of timing information for GEOS-Chem Classic - benchmark simulations given one or more JSON and/or text files - as input. + Creates a table of timing information for GCHP benchmark + simulations given one or more text files as input. Args ref_files : str|list : File(s) with timing info from the "Ref" model From e0bd4a7a6bbf3089754d189749961f77358a8310 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 13 May 2024 10:03:16 -0400 Subject: [PATCH 40/43] Now use "GC.log" as the GCClassic file name for 1mo_benchmark.yml gcpy/benchmark/config/1mo_benchmark.yml - Renamed "log.%Y%m%d" to "GC.log", which is the same name used in the geoschem.benchmark.run for GEOS-Chem Classic benchmarks. - NOTE: This YAML file is only needed when we run 1-month benchmarks manually. The automatic cloud benchmarks use the template files in gcpy/benchmark/cloud. Signed-off-by: Bob Yantosca --- gcpy/benchmark/config/1mo_benchmark.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcpy/benchmark/config/1mo_benchmark.yml b/gcpy/benchmark/config/1mo_benchmark.yml index 173ed8eb..1c86aa5f 100644 --- a/gcpy/benchmark/config/1mo_benchmark.yml +++ b/gcpy/benchmark/config/1mo_benchmark.yml @@ -48,7 +48,7 @@ data: outputs_subdir: OutputDir restarts_subdir: Restarts logs_subdir: . - logs_template: "log.%Y%m%d" + logs_template: "GC.log" bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" gchp: @@ -69,7 +69,7 @@ data: outputs_subdir: OutputDir restarts_subdir: Restarts logs_subdir: . - logs_template: "log.%Y%m%d" + logs_template: "GC.log" bmk_start: "2019-07-01T00:00:00" bmk_end: "2019-08-01T00:00:00" gchp: From 91caa2be062c28fe66450e56d75b91528a5d9703 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 13 May 2024 14:04:05 -0400 Subject: [PATCH 41/43] Hotfix: Replace DELPDRY to Met_DELPDRY gcpy/util.py - In function rename_and_flip_gchp_rst_vars, we have restored the if block to rename DELPDRY to Met_DELPDRY. This is because GCHP restart files in 14.3.0 and prior have the dry delta-pressure field named as DELPDRY. But it has been correctly named as DELP_DRY in GCHP 14.4.0 and later. This HotFix is necessary in order to avoid a ValueError when comparing comparing GCHP versions 14.3.0 or prior. Signed-off-by: Bob Yantosca --- gcpy/util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcpy/util.py b/gcpy/util.py index e50d4874..9344e47a 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -788,6 +788,8 @@ def rename_and_flip_gchp_rst_vars( old_to_new[var] = 'SpeciesRst_' + spc if var == "DELP_DRY": old_to_new["DELP_DRY"] = "Met_DELPDRY" + if var == "DELPDRY": + old_to_new["DELPDRY"] = "Met_DELPDRY" if var == "BXHEIGHT": old_to_new["BXHEIGHT"] = "Met_BXHEIGHT" if var == "TropLev": From 18f03d54decd51d2088d09aaef8238a6c774541d Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 13 May 2024 17:33:19 -0400 Subject: [PATCH 42/43] HotFix: Use refrstdir and devrstdir in run_benchmark.py gcpy/benchmark/run_benchmark.py - We now use e.g. gcc_vs_gcc_refrstdir, gcc_vs_gcc_devrstdir, etc., for the variables that store restart file directory paths. This is the usage in the 1-year benchmark scripts. Signed-off-by: Bob Yantosca --- gcpy/benchmark/run_benchmark.py | 36 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/gcpy/benchmark/run_benchmark.py b/gcpy/benchmark/run_benchmark.py index 0482c3fd..710910ea 100755 --- a/gcpy/benchmark/run_benchmark.py +++ b/gcpy/benchmark/run_benchmark.py @@ -510,8 +510,8 @@ def run_benchmark_default(config): print("\n%%% Creating GCC vs. GCC global mass tables %%%") # Filepaths - ref = get_filepath(gcc_vs_gcc_refrst, "Restart", gcc_end_ref_date) - dev = get_filepath(gcc_vs_gcc_devrst, "Restart", gcc_end_dev_date) + ref = get_filepath(gcc_vs_gcc_refrstdir, "Restart", gcc_end_ref_date) + dev = get_filepath(gcc_vs_gcc_devrstdir, "Restart", gcc_end_dev_date) # Create tables make_benchmark_mass_tables( @@ -531,10 +531,10 @@ def run_benchmark_default(config): print("\n%%% Creating GCC vs. GCC mass accumulation tables %%%") # Filepaths for start and end restart files - refs = get_filepath(gcc_vs_gcc_refrst, "Restart", gcc_ref_date) - devs = get_filepath(gcc_vs_gcc_devrst, "Restart", gcc_dev_date) - refe = get_filepath(gcc_vs_gcc_refrst, "Restart", gcc_end_ref_date) - deve = get_filepath(gcc_vs_gcc_devrst, "Restart", gcc_end_dev_date) + refs = get_filepath(gcc_vs_gcc_refrstdir, "Restart", gcc_ref_date) + devs = get_filepath(gcc_vs_gcc_devrstdir, "Restart", gcc_dev_date) + refe = get_filepath(gcc_vs_gcc_refrstdir, "Restart", gcc_end_ref_date) + deve = get_filepath(gcc_vs_gcc_devrstdir, "Restart", gcc_end_dev_date) # Get period strings refs_str = np.datetime_as_string(gcc_ref_date, unit="s") @@ -932,12 +932,12 @@ def run_benchmark_default(config): # Filepaths ref = get_filepath( - gchp_vs_gcc_refrst, + gchp_vs_gcc_refrstdir, "Restart", gcc_end_dev_date ) dev = get_filepath( - gchp_vs_gcc_devrst, + gchp_vs_gcc_devrstdir, "Restart", gchp_end_dev_date, is_gchp=True, @@ -964,12 +964,12 @@ def run_benchmark_default(config): # Filepaths for start and end restart files refs = get_filepath( - gchp_vs_gcc_refrst, + gchp_vs_gcc_refrstdir, "Restart", gcc_dev_date ) devs = get_filepath( - gchp_vs_gcc_devrst, + gchp_vs_gcc_devrstdir, "Restart", gchp_dev_date, is_gchp=True, @@ -977,12 +977,12 @@ def run_benchmark_default(config): gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"] ) refe = get_filepath( - gchp_vs_gcc_refrst, + gchp_vs_gcc_refrstdir, "Restart", gcc_end_dev_date ) deve = get_filepath( - gchp_vs_gcc_devrst, + gchp_vs_gcc_devrstdir, "Restart", gchp_end_dev_date, is_gchp=True, @@ -1393,7 +1393,7 @@ def run_benchmark_default(config): # Filepaths ref = get_filepath( - gchp_vs_gchp_refrst, + gchp_vs_gchp_refrstdir, "Restart", gchp_end_ref_date, is_gchp=True, @@ -1401,7 +1401,7 @@ def run_benchmark_default(config): gchp_is_pre_14_0=config["data"]["ref"]["gchp"]["is_pre_14.0"] ) dev = get_filepath( - gchp_vs_gchp_devrst, + gchp_vs_gchp_devrstdir, "Restart", gchp_end_dev_date, is_gchp=True, @@ -1428,7 +1428,7 @@ def run_benchmark_default(config): # Filepaths for start and end restart files refs = get_filepath( - gchp_vs_gchp_refrst, + gchp_vs_gchp_refrstdir, "Restart", gchp_ref_date, is_gchp=True, @@ -1436,7 +1436,7 @@ def run_benchmark_default(config): gchp_is_pre_14_0=config["data"]["ref"]["gchp"]["is_pre_14.0"] ) devs = get_filepath( - gchp_vs_gchp_devrst, + gchp_vs_gchp_devrstdir, "Restart", gchp_dev_date, is_gchp=True, @@ -1444,7 +1444,7 @@ def run_benchmark_default(config): gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"] ) refe = get_filepath( - gchp_vs_gchp_refrst, + gchp_vs_gchp_refrstdir, "Restart", gchp_end_ref_date, is_gchp=True, @@ -1452,7 +1452,7 @@ def run_benchmark_default(config): gchp_is_pre_14_0=config["data"]["ref"]["gchp"]["is_pre_14.0"] ) deve = get_filepath( - gchp_vs_gchp_devrst, + gchp_vs_gchp_devrstdir, "Restart", gchp_end_dev_date, is_gchp=True, From fb7116d0dbd6ff6f28d5bdf322217f2383cdd98a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 21 May 2024 07:27:38 +0000 Subject: [PATCH 43/43] --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2fae9011..f390410d 100644 --- a/setup.py +++ b/setup.py @@ -109,7 +109,7 @@ def _write_version_file(): "python==3.9.18", "pypdf==3.16.1", "recommonmark==0.7.1", - "requests==2.31.0", + "requests==2.32.0", "scipy==1.11.2", "sparselt==0.1.3", "tabulate==0.9.0",