From f7973b1192f0ba2506fd22f956237a57b21371ae Mon Sep 17 00:00:00 2001
From: Hannah O Nesser <hannah.nesser@gmail.com>
Date: Tue, 22 Aug 2023 13:41:49 -0700
Subject: [PATCH 01/43] Adding a set of functions to format GEOS-Chem input
 files for HEMCO compatibility.

---
 gcpy/format_HEMCO.py | 304 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 304 insertions(+)
 create mode 100644 gcpy/format_HEMCO.py

diff --git a/gcpy/format_HEMCO.py b/gcpy/format_HEMCO.py
new file mode 100644
index 00000000..63e1d9e6
--- /dev/null
+++ b/gcpy/format_HEMCO.py
@@ -0,0 +1,304 @@
+
+import xarray as xr
+import numpy as np
+import pandas as pd
+from copy import deepcopy as dc
+from os.path import join
+
+def format_HEMCO_dimensions(ds, 
+                            start_time="2000-01-01 00:00:00",
+                            lev_long_name="level", 
+                            lev_units="level",
+                            lev_formula_terms=None,
+                            gchp=False):
+    """
+    Formats time, lat, lon, and lev (optionally) attributes for coards 
+    compliance (HEMCO compatibility).
+    
+    Args:
+        ds: xarray Dataset
+            Dataset containing at least latitude and longitude
+            variables, which must be named lat and lon, respectively.
+
+    Keyword Args (optional):
+        start_time: string of the format "YYYY-MM-DD HH:mm:ss"
+            String containing the start time of the dataset for
+            the purposes of encoding the time dimension. For GCHP
+            compliance, the first time value must be 0 time units 
+            from the beginning of the unit. The default value is
+            January 1, 2000.
+        lev_long_name: string
+            A detailed description of the level attribute. Examples 
+            include "level", "GEOS-Chem levels", "Eta centers", or
+            "Sigma centers". Default is "level."
+        lev_units: string
+            The unit of the vertical levels, which should be "level",
+            "eta_level", or "sigma_level". Setting both lev_units and
+            lev_long_name to "level" allows HEMCO to regrid between
+            vertical grids. Default is "level".
+        lev_formula_terms: string or None
+            If data is used that is not on the model vertical grid, the
+            data must contain surface pressure values and the hybrid coefficients
+            of the coordinate system together with the terms in the formula
+            (e.g., ”ap: hyam b: hybm ps: PS”). Default is None.
+        gchp: boolean
+            Boolean identifying whether this file is for use in 
+            GCHP (True) or GEOS-Chem Classic (False). This is primarily
+            used to set the lev attributes. The default value is 
+            False.
+    
+    Returns:
+        ds: xarray Dataset
+            An updated version of ds with encoding and attributes
+            set to be coards/HEMCO compliant.
+    """
+    # Require that ds is an xarray Dataset object
+    if not isinstance(ds, xr.Dataset):
+        raise TypeError("The ds argument must be an xarray Dataset.")
+
+    # Check that latitude and longitude are found in the dataset
+    ## First force all dimension names to be lowercase:
+    ds = ds.rename_dims({k : k.lower() for k in ds.dims.keys()
+                         if k != k.lower()})
+
+    # Check and format each of the required dimensions
+    ds = _format_lat(ds)
+    ds = _format_lon(ds)
+    ds = _format_time(ds, start_time)
+
+    # If level is included in the dimensions, set its attributes
+    if "lev" in ds.coords:
+        # Note: this is relatively untested (2023/08/21 HON)
+        ds = _format_lev(ds, lev_long_name, lev_units,
+                         lev_formula_terms, gchp)
+    
+    # Require data order to be time, lat, lon (optionally lev)
+    ds = ds.transpose("time", "lat", "lon", ...)
+
+    # Return the dataset
+    return ds
+
+
+def _format_lat(ds):
+    '''
+    Formats the latitude dimension for coards compliance.
+    See define_HEMCO_dimensions for argument listings.
+    '''
+    # If there is a dimension is called latitude, rename it
+    # (This function assumes ds has dimension names that are 
+    # all lower case)
+    if "latitude" in ds.dims.keys():
+        ds = ds.rename_dims({"latitude" : "lat"})
+
+    # Require that lat is a monotonically increasing dimension
+    _check_required_dim(ds, "lat")
+
+    # Set attributes
+    ds["lat"].attrs = {"long_name": "latitude", 
+                       "units": "degrees_north",
+                       "axis" : "Y"}
+
+    return ds
+
+
+def _format_lon(ds):
+    '''
+    Formats the longitude dimension for coards compliance.
+    See define_HEMCO_dimensions for argument listings.
+    '''
+    # If there is a dimension is called longitude, rename it
+    # (This function assumes ds has dimension names that are 
+    # all lower case)
+    if "longitude" in ds.dims.keys():
+        ds = ds.rename_dims({"longitude" : "lon"})
+
+    # Require that lon is a monotonically increasing dimension
+    _check_required_dim(ds, "lat")
+
+    # Set attributes
+    ds["lon"].attrs = {"long_name": "longitude", 
+                       "units": "degrees_east",
+                       "axis" : "X"}
+    
+    return ds
+
+
+def _format_time(ds, start_time):
+    '''
+    Formats the time dimension for coards compliance.
+    See define_HEMCO_dimensions for argument listings.
+    '''
+    if "time" not in ds.coords:
+        # If time isn't already in the coords, create a dummy variable
+        ds = ds.assign_coords(time=pd.to_datetime(start_time))
+        ds = ds.expand_dims("time")
+    else:
+        # Otherwise, update start_time to match the first time in the file,
+        # consistent with GCHP requirements
+        new_start_time = pd.to_datetime(ds["time"][0].values)
+        new_start_time = new_start_time.strftime("%Y-%m-%d %H:%M:%S")
+        print(f"Updating the reference start time from")
+        print(f"{start_time} to {new_start_time}")
+        print(f"so that time(0) = 0, consistent with GCHP requirements.")
+        start_time = new_start_time
+
+    # Now check that time is a monotonically increasing dimension
+    _check_required_dim(ds, "time")
+
+    # Set attributes
+    ds["time"].encoding= {"units" : f"hours since {start_time}",
+                          "calendar" : "standard"}
+    ds["time"].attrs = {"long_name" : "Time", "axis" : "T"}
+
+    return ds
+
+
+def _format_lev(ds, lev_long_name, lev_units, lev_formula_terms, gchp):
+    '''
+    Formats the level dimension for coards compliance.
+    See define_HEMCO_dimensions for argument listings.
+    '''
+    ## HON 2023/08/22: This is relatively untested
+
+    # If there a dimension called level, rename it
+    if "level" in ds.dims.keys():
+        ds = ds.rename_dims({"level" : "lev"})
+    
+    # If formula is provided, check that the components of the 
+    # formula are included.
+    if lev_formula_terms is not None:
+        terms = lev_formula_terms.split(": ")
+        terms = [t for i, t in enumerate(terms) if i % 2 == 1]
+        for t in terms:
+            if t not in ds.data_vars.keys():
+                raise ValueError(f"{t} is in lev_formula_terms and could \
+                                    not be found.")
+    
+    # If unit is level, require that the levels are integers
+    if (lev_units == "level") and (ds["lev"] != ds["lev"].astype(int)).any():
+        raise ValueError("lev has units of level but dimension values \
+                            are not integers.")
+
+    # Set attributes
+    ## Set positive to match the GCHP/GEOS-Chem conventions
+    if gchp:
+        positive = "down"
+    else:
+        positive = "up"
+
+    ## Setting both long_name and units to "level" allows HEMCO
+    ## to regrid between vertical grids (e.g., 47 -> 72 levels).
+    lev_attrs = {"long_name" : lev_long_name,
+                 "units" : lev_units,
+                 "positive" : positive,
+                 "axis" : "Z"}
+    if lev_formula_terms is not None:
+        lev_attrs.update({"formula_terms" : lev_formula_terms})
+    
+    ## Set the attributes
+    ds["lev"].attrs = lev_attrs
+
+    return ds
+
+
+def _check_required_dim(ds, dim):
+    '''
+    Checks required dimensions (time, latitude, and longitude)
+    for COARDS compliance (that the dimension exists and is
+    monotonically increasing).
+
+    Args:
+        ds: xarray Dataset
+        dim: string ("time", "lat", or "lon")
+            A string corresponding to the required dimension
+    '''
+    if dim not in ["time", "lat", "lon"]:
+        raise ValueError(f"{dim} is not a required dimension.")
+
+    # Check that the dim is included in 
+    if dim not in ds.dims.keys():
+        raise ValueError(f"{dim} is not included in the dimensions.")
+
+    # Require that the variable is monotonically increasing
+    if np.any(np.diff(ds[dim]).astype("float") < 0):
+        raise ValueError(f"{dim} is not monotonically increasing.")
+    
+    return ds
+
+
+def format_HEMCO_variable(ds, var, long_name, units, **kwargs):
+    """
+    Formats attributes for non-standard variables for coards compliance 
+    (HEMCO compatibility).
+    
+    Args:
+        ds: xarray Dataset
+            Dataset containing HEMCO input data.
+        var: string
+            The name of the non-standard variable to be formatted.
+        long_name: string
+            A required HEMCO attribute, a more descriptive name for
+            var.
+        units: string
+            A required HEMCO attribute giving the units of var. See 
+            https://hemco.readthedocs.io/en/stable/hco-ref-guide/input-file-format.html
+            for more information.
+        kwargs: dictionary
+            Any other attributes wanted for the variable.
+    
+    Returns:
+        ds: xarray Dataset
+            An updated version of ds with variable attributes
+            set to be coards/HEMCO compliant.
+    """
+    ds[var].attrs = {"long_name" : long_name, "units" : units,
+                       **kwargs}
+    return ds
+
+
+def save_HEMCO_netcdf(ds, save_dir, save_name, dtype="float", **kwargs):
+    """
+    Saves coards compliant (HEMCO compatible) netcdf.
+    
+    Args:
+        ds: xarray Dataset
+            Dataset containing HEMCO input data.
+        save_dir: string
+            The directory where the data will be saved.
+        save_name: string
+            The name the file will be named under.
+    
+    Keyword Args (optional):
+        dtype: data type
+            The data type the data will be saved as. Default is
+            float32 to minimize memory usage.
+        kwargs: dictionary
+            Any other attributes to be passed to the xarray
+            to_netcdf function.
+    """
+    # Check that the save_name ends in .nc
+    if save_name.split(".")[-1][:2] != "nc":
+        save_name = f"{save_name}.nc"
+    
+    # Get time encoding before overwriting
+    time_units = ds["time"].encoding["units"]
+    calendar = ds["time"].encoding["calendar"]
+
+    # Set default encoding and dtype for all variables and coordinates
+    encoding = {"_FillValue" : None, "dtype" : dtype}
+    var = {k : dc(encoding) for k in ds.keys()}
+    coord = {k : dc(encoding) for k in ds.coords}
+    
+    # Manually update the time encoding, which is often overwritten
+    # by xarray defaults
+    coord["time"]["units"] = time_units
+    coord["time"]["calendar"] = calendar
+    var.update(coord)
+
+    # Save out
+    ds.to_netcdf(join(save_dir, save_name), encoding=var,
+                 unlimited_dims=["time"], **kwargs)
+    
+    print("-"*70)
+    print("Saved to", join(save_dir, save_name))
+    print("-"*70)
\ No newline at end of file

From e53948efe19cae2c129a0fc1bfb35ed413c6f7bf Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Wed, 23 Aug 2023 10:56:42 -0400
Subject: [PATCH 02/43] Renamne format_HEMCO.py to format_hemco_data.py + other
 updates

gcpy/__init__.py
- Added "from .format_hemco_data" import *" to make sure that
  functions are imported from format_hemco_data.py

gcpy/format_HEMCO.py
- Renamed to format_hemco_data.py

gcpy/format_hemco_data.py
- Added code Y formatting updates suggested by Pylint
- Added _update_variable_attributes function so that we replace
  or update variable attributes to COARDS-conforming values
  without clobbering other attributes that may be present
- Updated imports statements to the proper order (suggested by Pylint)
- Added PyDoc header at the top of the module
- Renamed "t" variable to "term" to be snake_case conforming

CHANGELOG.md
- updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                                  |   1 +
 gcpy/__init__.py                              |   1 +
 .../{format_HEMCO.py => format_hemco_data.py} | 332 +++++++++++-------
 3 files changed, 214 insertions(+), 120 deletions(-)
 rename gcpy/{format_HEMCO.py => format_hemco_data.py} (50%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 16a2b079..f02ff3f5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Added new routine `format_number_for_table` in `util.py`
 - Added BrSALA and BrSALC to `emission_species.yml`
 - Added `ENCODING = "UTF-8"` to `gcpy/constants.py`
+- Added `gcpy/format_hemco_data.py` from @hannahnesser
 
 ### Changed
 - Simplified the Github issues templates into two options: `new-feature-or-discussion.md` and `question-issue.md`
diff --git a/gcpy/__init__.py b/gcpy/__init__.py
index 9d9efbb1..aeeaf9f0 100644
--- a/gcpy/__init__.py
+++ b/gcpy/__init__.py
@@ -27,3 +27,4 @@
 from .file_regrid import *
 from .grid_stretching_transforms import *
 from .cstools import *
+from .format_hemco_data import *
diff --git a/gcpy/format_HEMCO.py b/gcpy/format_hemco_data.py
similarity index 50%
rename from gcpy/format_HEMCO.py
rename to gcpy/format_hemco_data.py
index 63e1d9e6..dc19014e 100644
--- a/gcpy/format_HEMCO.py
+++ b/gcpy/format_hemco_data.py
@@ -1,22 +1,28 @@
-
+"""
+Contains functions to make sure that data files to be read by
+HEMCO adhere to the COARDS netCDF conventions.
+"""
 import xarray as xr
 import numpy as np
 import pandas as pd
 from copy import deepcopy as dc
 from os.path import join
 
-def format_HEMCO_dimensions(ds, 
-                            start_time="2000-01-01 00:00:00",
-                            lev_long_name="level", 
-                            lev_units="level",
-                            lev_formula_terms=None,
-                            gchp=False):
+
+def format_hemco_dimensions(
+        dset,
+        start_time="2000-01-01 00:00:00",
+        lev_long_name="level",
+        lev_units="level",
+        lev_formula_terms=None,
+        gchp=False
+):
     """
-    Formats time, lat, lon, and lev (optionally) attributes for coards 
+    Formats time, lat, lon, and lev (optionally) attributes for coards
     compliance (HEMCO compatibility).
-    
+
     Args:
-        ds: xarray Dataset
+        dset: xarray Dataset
             Dataset containing at least latitude and longitude
             variables, which must be named lat and lon, respectively.
 
@@ -24,11 +30,11 @@ def format_HEMCO_dimensions(ds,
         start_time: string of the format "YYYY-MM-DD HH:mm:ss"
             String containing the start time of the dataset for
             the purposes of encoding the time dimension. For GCHP
-            compliance, the first time value must be 0 time units 
+            compliance, the first time value must be 0 time units
             from the beginning of the unit. The default value is
             January 1, 2000.
         lev_long_name: string
-            A detailed description of the level attribute. Examples 
+            A detailed description of the level attribute. Examples
             include "level", "GEOS-Chem levels", "Eta centers", or
             "Sigma centers". Default is "level."
         lev_units: string
@@ -38,104 +44,162 @@ def format_HEMCO_dimensions(ds,
             vertical grids. Default is "level".
         lev_formula_terms: string or None
             If data is used that is not on the model vertical grid, the
-            data must contain surface pressure values and the hybrid coefficients
-            of the coordinate system together with the terms in the formula
-            (e.g., ”ap: hyam b: hybm ps: PS”). Default is None.
+            data must contain surface pressure values and the hybrid
+            coefficients of the coordinate system together with the
+            terms in the formula(e.g., ”ap: hyam b: hybm ps: PS”).
+            Default is None.
         gchp: boolean
-            Boolean identifying whether this file is for use in 
+            Boolean identifying whether this file is for use in
             GCHP (True) or GEOS-Chem Classic (False). This is primarily
-            used to set the lev attributes. The default value is 
+            used to set the lev attributes. The default value is
             False.
-    
+
     Returns:
-        ds: xarray Dataset
-            An updated version of ds with encoding and attributes
+        dset: xarray Dataset
+            An updated version of dset with encoding and attributes
             set to be coards/HEMCO compliant.
     """
-    # Require that ds is an xarray Dataset object
-    if not isinstance(ds, xr.Dataset):
-        raise TypeError("The ds argument must be an xarray Dataset.")
+    # Require that dset is an xarray Dataset object
+    if not isinstance(dset, xr.Dataset):
+        raise TypeError("The dset argument must be an xarray Dataset.")
 
     # Check that latitude and longitude are found in the dataset
     ## First force all dimension names to be lowercase:
-    ds = ds.rename_dims({k : k.lower() for k in ds.dims.keys()
+    dset = dset.rename_dims({k : k.lower() for k in dset.dims.keys()
                          if k != k.lower()})
 
     # Check and format each of the required dimensions
-    ds = _format_lat(ds)
-    ds = _format_lon(ds)
-    ds = _format_time(ds, start_time)
+    dset = _format_lat(dset)
+    dset = _format_lon(dset)
+    dset = _format_time(dset, start_time)
 
     # If level is included in the dimensions, set its attributes
-    if "lev" in ds.coords:
+    if "lev" in dset.coordset:
         # Note: this is relatively untested (2023/08/21 HON)
-        ds = _format_lev(ds, lev_long_name, lev_units,
+        dset = _format_lev(dset, lev_long_name, lev_units,
                          lev_formula_terms, gchp)
-    
+
     # Require data order to be time, lat, lon (optionally lev)
-    ds = ds.transpose("time", "lat", "lon", ...)
+    dset = dset.transpose("time", "lat", "lon", ...)
 
     # Return the dataset
-    return ds
+    return dset
+
+
+def _update_variable_attributes(
+        var_attrs,
+        coards_attrs
+):
+    """
+    Adds COARDS conforming variable attributes and/or replaces
+    existing variable attributes with COARDS-conforming values.
 
+    Args:
+        var_attrs : dict
+            Dictionary of variable attributes.
+        coards_attrs : dict
+            Dictionary of COARDS-conforming variable attributes.
+
+    Returns
+        var_attrs : dict
+           Modified dictionary of variable attributes
+    """
 
-def _format_lat(ds):
+    # Test if each COARDS-conforming attribute is
+    # present in the list of variable attributes.
+    found = {}
+    for (name, _) in coards_attrs.items():
+        found[name] = name in var_attrs.keys()
+
+    # If the variable attribute has a COARDS-conforming name,
+    # then replace it with a COARDS-conforming attribute value.
+    #
+    # If the variable attribute is missing, then add the
+    # COARDS-conforming attribute to the list of variable attrs.
+    #
+    # This makes sure that we add/replace variable attrs
+    # but do not clobber any other existing variable attrs.
+    for (name, value) in coards_attrs.items():
+        if found[name]:
+            var_attrs.update({name: value})
+        else:
+            var_attrs[name] = value
+
+    return var_attrs
+
+
+def _format_lat(dset):
     '''
     Formats the latitude dimension for coards compliance.
     See define_HEMCO_dimensions for argument listings.
     '''
     # If there is a dimension is called latitude, rename it
-    # (This function assumes ds has dimension names that are 
+    # (This function assumes ds has dimension names that are
     # all lower case)
-    if "latitude" in ds.dims.keys():
-        ds = ds.rename_dims({"latitude" : "lat"})
+    if "latitude" in dset.dims.keys():
+        dset = dset.rename_dims({"latitude" : "lat"})
 
     # Require that lat is a monotonically increasing dimension
-    _check_required_dim(ds, "lat")
+    _check_required_dim(dset, "lat")
 
-    # Set attributes
-    ds["lat"].attrs = {"long_name": "latitude", 
-                       "units": "degrees_north",
-                       "axis" : "Y"}
+    # Update attributes to be COARDS-conforming
+    dset["lat"].attrs = _update_variable_attributes(
+        dset["lat"].attrs,
+        coards_attrs={
+            "long_name": "latitude",
+            "units": "degrees_north",
+            "axis" : "Y"
+        }
+    )
 
-    return ds
+    return dset
 
 
-def _format_lon(ds):
+def _format_lon(
+        dset
+):
     '''
     Formats the longitude dimension for coards compliance.
     See define_HEMCO_dimensions for argument listings.
     '''
     # If there is a dimension is called longitude, rename it
-    # (This function assumes ds has dimension names that are 
+    # (This function assumes dset has dimension names that are
     # all lower case)
-    if "longitude" in ds.dims.keys():
-        ds = ds.rename_dims({"longitude" : "lon"})
+    if "longitude" in dset.dims.keys():
+        dset = dset.rename_dims({"longitude" : "lon"})
 
     # Require that lon is a monotonically increasing dimension
-    _check_required_dim(ds, "lat")
+    _check_required_dim(dset, "lat")
 
-    # Set attributes
-    ds["lon"].attrs = {"long_name": "longitude", 
-                       "units": "degrees_east",
-                       "axis" : "X"}
-    
-    return ds
+    # Update attributes to be COARDS-conforming
+    dset["lon"].attrs = _update_variable_attributes(
+        dset["lon"].attrs,
+        coards_attrs={
+            "long_name": "longitude",
+            "units": "degrees_east",
+            "axis" : "X"
+        }
+    )
+
+    return dset
 
 
-def _format_time(ds, start_time):
+def _format_time(
+        dset,
+        start_time
+):
     '''
-    Formats the time dimension for coards compliance.
+    Formats the time dimension for COARDS compliance.
     See define_HEMCO_dimensions for argument listings.
     '''
-    if "time" not in ds.coords:
-        # If time isn't already in the coords, create a dummy variable
-        ds = ds.assign_coords(time=pd.to_datetime(start_time))
-        ds = ds.expand_dims("time")
+    if "time" not in dset.coordset:
+        # If time isn't already in the coordset, create a dummy variable
+        dset = dset.assign_coordset(time=pd.to_datetime(start_time))
+        dset = dset.expand_dims("time")
     else:
         # Otherwise, update start_time to match the first time in the file,
         # consistent with GCHP requirements
-        new_start_time = pd.to_datetime(ds["time"][0].values)
+        new_start_time = pd.to_datetime(dset["time"][0].values)
         new_start_time = new_start_time.strftime("%Y-%m-%d %H:%M:%S")
         print(f"Updating the reference start time from")
         print(f"{start_time} to {new_start_time}")
@@ -143,96 +207,118 @@ def _format_time(ds, start_time):
         start_time = new_start_time
 
     # Now check that time is a monotonically increasing dimension
-    _check_required_dim(ds, "time")
+    _check_required_dim(dset, "time")
 
-    # Set attributes
-    ds["time"].encoding= {"units" : f"hours since {start_time}",
-                          "calendar" : "standard"}
-    ds["time"].attrs = {"long_name" : "Time", "axis" : "T"}
+    # Set attributes and make sure they are COARDS conforming.
+    dset["time"].encoding= {
+        "units" : f"hours since {start_time}",
+        "calendar" : "standard"
+    }
+    dset["time"].attrs = _update_variable_attributes(
+        dset["time"].attrs,
+        coards_attrs={
+            "long_name": "Time",
+            "axis" : "T"
+        }
+    )
 
-    return ds
+    return dset
 
 
-def _format_lev(ds, lev_long_name, lev_units, lev_formula_terms, gchp):
+def _format_lev(dset, lev_long_name, lev_units, lev_formula_terms, gchp):
     '''
-    Formats the level dimension for coards compliance.
+    Formats the level dimension for COARDS compliance.
     See define_HEMCO_dimensions for argument listings.
     '''
     ## HON 2023/08/22: This is relatively untested
 
     # If there a dimension called level, rename it
-    if "level" in ds.dims.keys():
-        ds = ds.rename_dims({"level" : "lev"})
-    
-    # If formula is provided, check that the components of the 
+    if "level" in dset.dims.keys():
+        dset = dset.rename_dims({"level" : "lev"})
+
+    # If formula is provided, check that the components of the
     # formula are included.
     if lev_formula_terms is not None:
         terms = lev_formula_terms.split(": ")
         terms = [t for i, t in enumerate(terms) if i % 2 == 1]
         for t in terms:
-            if t not in ds.data_vars.keys():
+            if t not in dset.data_vars.keys():
                 raise ValueError(f"{t} is in lev_formula_terms and could \
                                     not be found.")
-    
+
     # If unit is level, require that the levels are integers
-    if (lev_units == "level") and (ds["lev"] != ds["lev"].astype(int)).any():
+    if lev_units == "level" and \
+       (dset["lev"] != dset["lev"].astype(int)).any():
         raise ValueError("lev has units of level but dimension values \
                             are not integers.")
 
     # Set attributes
     ## Set positive to match the GCHP/GEOS-Chem conventions
+    positive = "up"
     if gchp:
         positive = "down"
-    else:
-        positive = "up"
 
+    ## Set attributes and make sure they are COARDS-conforming.
     ## Setting both long_name and units to "level" allows HEMCO
-    ## to regrid between vertical grids (e.g., 47 -> 72 levels).
-    lev_attrs = {"long_name" : lev_long_name,
-                 "units" : lev_units,
-                 "positive" : positive,
-                 "axis" : "Z"}
+    ## to regrid between vertical gridset (e.g., 47 -> 72 levels).
+    dset["lev"].attrs = _update_variable_attributes(
+        dset["lev"].attrs,
+        coards_attrs={
+            "long_name" : lev_long_name,
+            "units" : lev_units,
+            "positive" : positive,
+            "axis" : "Z"
+        }
+    )
     if lev_formula_terms is not None:
-        lev_attrs.update({"formula_terms" : lev_formula_terms})
-    
-    ## Set the attributes
-    ds["lev"].attrs = lev_attrs
+         dset["lev"].attrs.update({
+             "formula_terms" : lev_formula_terms
+         })
 
-    return ds
+    return dset
 
 
-def _check_required_dim(ds, dim):
+def _check_required_dim(
+        dset,
+        dim
+):
     '''
     Checks required dimensions (time, latitude, and longitude)
     for COARDS compliance (that the dimension exists and is
     monotonically increasing).
 
     Args:
-        ds: xarray Dataset
+        dset: xarray Dataset
         dim: string ("time", "lat", or "lon")
             A string corresponding to the required dimension
     '''
     if dim not in ["time", "lat", "lon"]:
         raise ValueError(f"{dim} is not a required dimension.")
 
-    # Check that the dim is included in 
-    if dim not in ds.dims.keys():
+    # Check that the dim is included in
+    if dim not in dset.dims.keys():
         raise ValueError(f"{dim} is not included in the dimensions.")
 
     # Require that the variable is monotonically increasing
-    if np.any(np.diff(ds[dim]).astype("float") < 0):
+    if np.any(np.diff(dset[dim]).astype("float") < 0):
         raise ValueError(f"{dim} is not monotonically increasing.")
-    
-    return ds
+
+    return dset
 
 
-def format_HEMCO_variable(ds, var, long_name, units, **kwargs):
+def format_hemco_variable(
+        dset,
+        var,
+        long_name,
+        units,
+        **kwargs
+):
     """
-    Formats attributes for non-standard variables for coards compliance 
+    Formats attributes for non-standard variables for COARDS compliance
     (HEMCO compatibility).
-    
+
     Args:
-        ds: xarray Dataset
+        dset: xarray Dataset
             Dataset containing HEMCO input data.
         var: string
             The name of the non-standard variable to be formatted.
@@ -240,34 +326,40 @@ def format_HEMCO_variable(ds, var, long_name, units, **kwargs):
             A required HEMCO attribute, a more descriptive name for
             var.
         units: string
-            A required HEMCO attribute giving the units of var. See 
+            A required HEMCO attribute giving the units of var. See
             https://hemco.readthedocs.io/en/stable/hco-ref-guide/input-file-format.html
             for more information.
         kwargs: dictionary
             Any other attributes wanted for the variable.
-    
+
     Returns:
-        ds: xarray Dataset
-            An updated version of ds with variable attributes
-            set to be coards/HEMCO compliant.
+        dset: xarray Dataset
+            An updated version of dset with variable attributes
+            set to be COARDS/HEMCO compliant.
     """
-    ds[var].attrs = {"long_name" : long_name, "units" : units,
+    dset[var].attrs = {"long_name" : long_name, "units" : units,
                        **kwargs}
-    return ds
+    return dset
 
 
-def save_HEMCO_netcdf(ds, save_dir, save_name, dtype="float", **kwargs):
+def save_hemco_netcdf(
+        dset,
+        save_dir,
+        save_name,
+        dtype="float",
+        **kwargs
+):
     """
-    Saves coards compliant (HEMCO compatible) netcdf.
-    
+    Saves COARDS compliant (HEMCO compatible) netcdf.
+
     Args:
-        ds: xarray Dataset
+        dset: xarray Dataset
             Dataset containing HEMCO input data.
         save_dir: string
             The directory where the data will be saved.
         save_name: string
             The name the file will be named under.
-    
+
     Keyword Args (optional):
         dtype: data type
             The data type the data will be saved as. Default is
@@ -276,19 +368,19 @@ def save_HEMCO_netcdf(ds, save_dir, save_name, dtype="float", **kwargs):
             Any other attributes to be passed to the xarray
             to_netcdf function.
     """
-    # Check that the save_name ends in .nc
+    # Check that the save_name endset in .nc
     if save_name.split(".")[-1][:2] != "nc":
         save_name = f"{save_name}.nc"
-    
+
     # Get time encoding before overwriting
-    time_units = ds["time"].encoding["units"]
-    calendar = ds["time"].encoding["calendar"]
+    time_units = dset["time"].encoding["units"]
+    calendar = dset["time"].encoding["calendar"]
 
     # Set default encoding and dtype for all variables and coordinates
     encoding = {"_FillValue" : None, "dtype" : dtype}
-    var = {k : dc(encoding) for k in ds.keys()}
-    coord = {k : dc(encoding) for k in ds.coords}
-    
+    var = {k : dc(encoding) for k in dset.keys()}
+    coord = {k : dc(encoding) for k in dset.coordset}
+
     # Manually update the time encoding, which is often overwritten
     # by xarray defaults
     coord["time"]["units"] = time_units
@@ -296,9 +388,9 @@ def save_HEMCO_netcdf(ds, save_dir, save_name, dtype="float", **kwargs):
     var.update(coord)
 
     # Save out
-    ds.to_netcdf(join(save_dir, save_name), encoding=var,
+    dset.to_netcdf(join(save_dir, save_name), encoding=var,
                  unlimited_dims=["time"], **kwargs)
-    
+
     print("-"*70)
     print("Saved to", join(save_dir, save_name))
-    print("-"*70)
\ No newline at end of file
+    print("-"*70)

From 2ffa8e3c6b74148d21aded05a2e90bfa44fba773 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Wed, 23 Aug 2023 16:12:43 -0400
Subject: [PATCH 03/43] Further updates to format_hemco_data.py

gcpy/format_hemco_data.py
- Reorder imports for optimal ordering according to Pylint
- Now use gcpy.util.verify_variable_type to check argument types
- Remove "f" in front of normal python strings w/ no replacements
- Split code across multiple lines for clarity
- Ensure variables conform to snake_case
- In routine format_hemco_variable, add code to make sure that
  variable attributs aren't clobbered.  Also accept extra attributes
  as a dictionary via **kwargs.
- Trimmed trailing whitespace

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/format_hemco_data.py | 75 ++++++++++++++++++++++++++++-----------
 1 file changed, 55 insertions(+), 20 deletions(-)

diff --git a/gcpy/format_hemco_data.py b/gcpy/format_hemco_data.py
index dc19014e..297db837 100644
--- a/gcpy/format_hemco_data.py
+++ b/gcpy/format_hemco_data.py
@@ -2,11 +2,12 @@
 Contains functions to make sure that data files to be read by
 HEMCO adhere to the COARDS netCDF conventions.
 """
+from os.path import join
+from copy import deepcopy as dc
 import xarray as xr
 import numpy as np
 import pandas as pd
-from copy import deepcopy as dc
-from os.path import join
+from gcpy.util import verify_variable_type
 
 
 def format_hemco_dimensions(
@@ -60,8 +61,7 @@ def format_hemco_dimensions(
             set to be coards/HEMCO compliant.
     """
     # Require that dset is an xarray Dataset object
-    if not isinstance(dset, xr.Dataset):
-        raise TypeError("The dset argument must be an xarray Dataset.")
+    verify_variable_type(dset, xr.Dataset)
 
     # Check that latitude and longitude are found in the dataset
     ## First force all dimension names to be lowercase:
@@ -104,6 +104,8 @@ def _update_variable_attributes(
         var_attrs : dict
            Modified dictionary of variable attributes
     """
+    verify_variable_type(var_attrs, dict)
+    verify_variable_type(coards_attrs, dict)
 
     # Test if each COARDS-conforming attribute is
     # present in the list of variable attributes.
@@ -201,9 +203,9 @@ def _format_time(
         # consistent with GCHP requirements
         new_start_time = pd.to_datetime(dset["time"][0].values)
         new_start_time = new_start_time.strftime("%Y-%m-%d %H:%M:%S")
-        print(f"Updating the reference start time from")
+        print("Updating the reference start time from")
         print(f"{start_time} to {new_start_time}")
-        print(f"so that time(0) = 0, consistent with GCHP requirements.")
+        print("so that time(0) = 0, consistent with GCHP requirements.")
         start_time = new_start_time
 
     # Now check that time is a monotonically increasing dimension
@@ -225,7 +227,13 @@ def _format_time(
     return dset
 
 
-def _format_lev(dset, lev_long_name, lev_units, lev_formula_terms, gchp):
+def _format_lev(
+        dset,
+        lev_long_name,
+        lev_units,
+        lev_formula_terms,
+        gchp
+):
     '''
     Formats the level dimension for COARDS compliance.
     See define_HEMCO_dimensions for argument listings.
@@ -240,11 +248,13 @@ def _format_lev(dset, lev_long_name, lev_units, lev_formula_terms, gchp):
     # formula are included.
     if lev_formula_terms is not None:
         terms = lev_formula_terms.split(": ")
-        terms = [t for i, t in enumerate(terms) if i % 2 == 1]
-        for t in terms:
-            if t not in dset.data_vars.keys():
-                raise ValueError(f"{t} is in lev_formula_terms and could \
-                                    not be found.")
+        terms = [term for i, term in enumerate(terms) if i % 2 == 1]
+        for term in terms:
+            if term not in dset.data_vars.keys():
+                raise ValueError(
+                    f"{term} is in lev_formula_terms and could \
+                    not be found."
+                )
 
     # If unit is level, require that the levels are integers
     if lev_units == "level" and \
@@ -271,9 +281,9 @@ def _format_lev(dset, lev_long_name, lev_units, lev_formula_terms, gchp):
         }
     )
     if lev_formula_terms is not None:
-         dset["lev"].attrs.update({
-             "formula_terms" : lev_formula_terms
-         })
+        dset["lev"].attrs.update({
+            "formula_terms" : lev_formula_terms
+        })
 
     return dset
 
@@ -329,7 +339,7 @@ def format_hemco_variable(
             A required HEMCO attribute giving the units of var. See
             https://hemco.readthedocs.io/en/stable/hco-ref-guide/input-file-format.html
             for more information.
-        kwargs: dictionary
+        **kwargs : dict
             Any other attributes wanted for the variable.
 
     Returns:
@@ -337,8 +347,25 @@ def format_hemco_variable(
             An updated version of dset with variable attributes
             set to be COARDS/HEMCO compliant.
     """
-    dset[var].attrs = {"long_name" : long_name, "units" : units,
-                       **kwargs}
+    verify_variable_type(dset, xr.Dataset)
+    verify_variable_type(var, str)
+    verify_variable_type(long_name, str)
+    verify_variable_type(units, str)
+
+    # Add extra attributes if passed via **kwargs
+    if len(kwargs) != 0:
+        for (_, att_dict) in kwargs.items():
+            dset[var].attrs.update(att_dict)
+
+    # Update variable attributes to be COARDS-conforming
+    # without clobbering any pre-existing attributes
+    dset[var].attrs = _update_variable_attributes(
+        dset[var].attrs,
+        coards_attrs={
+            "long_name" : long_name,
+            "units" : units
+        }
+    )
     return dset
 
 
@@ -368,6 +395,10 @@ def save_hemco_netcdf(
             Any other attributes to be passed to the xarray
             to_netcdf function.
     """
+    verify_variable_type(dset, xr.Dataset)
+    verify_variable_type(save_dir, str)
+    verify_variable_type(save_name, str)
+
     # Check that the save_name endset in .nc
     if save_name.split(".")[-1][:2] != "nc":
         save_name = f"{save_name}.nc"
@@ -388,8 +419,12 @@ def save_hemco_netcdf(
     var.update(coord)
 
     # Save out
-    dset.to_netcdf(join(save_dir, save_name), encoding=var,
-                 unlimited_dims=["time"], **kwargs)
+    dset.to_netcdf(
+        join(save_dir, save_name),
+        encoding=var,
+        unlimited_dims=["time"],
+        **kwargs
+    )
 
     print("-"*70)
     print("Saved to", join(save_dir, save_name))

From 1a988266208414a9689a77403d28bed57f8057e2 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 26 Mar 2024 15:16:10 -0400
Subject: [PATCH 04/43] Split off mass conservation table routine into new
 script

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- New script for printing mass conservation table output (from the
  PassiveTracer species).  Ref and Dev versions are now printed in
  the same table.
- Code has been refactored for more clarity and efficiency.  We now
  open all files into a common dataset and instead of opening one file
  at a time.  Also split off common operations into local functions.

gcpy/benchmark/modules/benchmark_funcs.py
- Removed make_benchmark_mass_conservation_table, this has now been
  ported to benchmark_mass_cons_table.py

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                                  |   2 +
 gcpy/benchmark/modules/benchmark_funcs.py     | 166 ---------
 .../modules/benchmark_mass_cons_table.py      | 327 ++++++++++++++++++
 3 files changed, 329 insertions(+), 166 deletions(-)
 create mode 100644 gcpy/benchmark/modules/benchmark_mass_cons_table.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d6bd958..a36541bb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Added fixed level budget diagnostic to budget operations table
 - Function `rename_speciesconc_to_speciesconcvv` in `gcpy/benchmark/modules/benchmark_utils.py`
 - Function `copy_file_to_dir` in `gcpy/util.py`.  This is a wrapper for `shutil.copyfile`.
+- Script `gcpy/benchmark/modules/benchmark_mass_cons_table.py`, with code to create mass conservation tables
 
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
@@ -58,6 +59,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Environment file `docs/environment_files/environment.yml`
 - Environment file `docs/environment_files/requirements.txt`
 - Removed `awscli` from the GCPy environment; version 2 is no longer available on conda-forge or PyPi
+- Routine `make_benchmark_mass_conservation_table` in `benchmark_funcs.py`; this is now obsolete
 
 ## [1.4.2] - 2024-01-26
 ### Added
diff --git a/gcpy/benchmark/modules/benchmark_funcs.py b/gcpy/benchmark/modules/benchmark_funcs.py
index 27965fd9..7f68855d 100644
--- a/gcpy/benchmark/modules/benchmark_funcs.py
+++ b/gcpy/benchmark/modules/benchmark_funcs.py
@@ -4915,172 +4915,6 @@ def make_benchmark_operations_budget(
     gc.collect()
 
 
-def make_benchmark_mass_conservation_table(
-        datafiles,
-        runstr,
-        dst="./benchmark",
-        overwrite=False,
-        areapath=None,
-        spcdb_dir=os.path.dirname(__file__)
-):
-    """
-    Creates a text file containing global mass of the PassiveTracer
-    from Transport Tracer simulations across a series of restart files.
-
-    Args:
-        datafiles: list of str
-            Path names of restart files.
-        runstr: str
-            Name to put in the filename and header of the output file
-
-    Keyword Args (optional):
-        dst: str
-            A string denoting the destination folder where the file
-            containing emissions totals will be written.
-            Default value: "./benchmark"
-        overwrite: bool
-            Set this flag to True to overwrite files in the
-            destination folder (specified by the dst argument).
-            Default value: False
-        areapath: str
-            Path to a restart file containing surface area data.
-            Default value: None
-        spcdb_dir: str
-            Path to the species_database.yml
-            Default value: points to gcpy/gcpy folder
-    """
-
-    # ==================================================================
-    # Initialize
-    # ==================================================================
-
-    # Create the destination folder
-    util.make_directory(dst, overwrite)
-
-    # Load a YAML file containing species properties (such as
-    # molecular weights), which we will need for unit conversions.
-    properties = util.read_config_file(
-        os.path.join(
-            spcdb_dir,
-            "species_database.yml"
-        ),
-        quiet=True
-    )
-
-    # Get the species name
-    spc_name = 'PassiveTracer'
-
-    # Get a list of properties for the given species
-    species_properties = properties.get(spc_name)
-
-    # Specify target units
-    target_units = "Tg"
-
-    dates = []
-    masses = []
-
-    # ==================================================================
-    # Make sure that surface area data is found
-    # ==================================================================
-    with warnings.catch_warnings():
-        warnings.filterwarnings("ignore", category=xr.SerializationWarning)
-
-
-    # ==================================================================
-    # Calculate global mass for the tracer at all restart dates
-    # ==================================================================
-    for f in datafiles:
-        ds = xr.open_dataset(f, drop_variables=skip_these_vars)
-
-        # Save date in desired format
-        #datestr = str(pd.to_datetime(ds.time.values[0]))
-        #dates.append(datestr[:4] + '-' + datestr[5:7] + '-' + datestr[8:10])
-
-        # Find the area variable in Dev
-        if areapath is None:
-            area = util.get_area_from_dataset(ds)
-        else:
-            area = util.get_area_from_dataset(
-                xr.open_dataset(
-                    areapath,
-                    drop_variables=skip_these_vars
-                )
-            )
-
-        # Assume typical restart file name format, but avoid using dates
-        # from within files which may be incorrect for the initial restart
-        datestr = f.split('/')[-1].split('.')[2][:9]
-        dates.append(datestr[:4] + '-' + datestr[4:6] + '-' + datestr[6:8])
-
-        # Select for GCC or GCHP
-        delta_p = ds['Met_DELPDRY'] if 'Met_DELPDRY' in list(ds.data_vars) else ds['DELP_DRY']
-
-        # ==============================================================
-        # Convert units of Ref and save to a DataArray
-        # (or skip if Ref contains NaNs everywhere)
-        # ==============================================================
-        # Select for GCC or GCHP
-        if 'SpeciesRst_PassiveTracer' in list(ds.data_vars):
-            attrs = ds['SpeciesRst_PassiveTracer'].attrs
-            da = ds['SpeciesRst_PassiveTracer'].astype(np.float64)
-            da.attrs = attrs
-        else:
-            attrs = ds['SPC_PassiveTracer'].attrs
-            da = ds['SPC_PassiveTracer'].astype(np.float64)
-            da.attrs = attrs
-        da = convert_units(
-            da,
-            spc_name,
-            species_properties,
-            target_units,
-            area_m2=area,
-            delta_p=delta_p
-        )
-
-        # Save total global mass
-        masses.append(np.sum(da.values))
-
-        # Clean up
-        del ds
-        del da
-        gc.collect()
-
-    # Calclate max and min mass, absolute diff, percent diff
-    max_mass = np.max(masses)
-    min_mass = np.min(masses)
-    # Convert absdiff to grams
-    absdiff = (max_mass-min_mass) * 10**12
-    pctdiff = (max_mass-min_mass)/min_mass * 100
-
-    # ==================================================================
-    # Print masses to file
-    # ==================================================================
-    # Create file
-    outfilename = os.path.join(dst, f"Passive_mass.{runstr}.txt")
-
-    with open(outfilename, 'w') as f:
-        titlestr = '  Global Mass of Passive Tracer in ' + runstr + '  '
-        #headers
-        print('%' * (len(titlestr)+4), file=f)
-        print(titlestr, file=f)
-        print('%' * (len(titlestr)+4), file=f)
-        print('', file=f)
-        print(' Date' + ' ' * 8 + 'Mass [Tg]', file=f)
-        print(' ' + '-' * 10 + '  ' + '-' * 16, file=f)
-        #masses
-        for i in range(len(masses)):
-            print(f" {dates[i]}  {masses[i] : 11.13f}", file=f)
-        print(' ', file=f)
-        print(' Summary', file=f)
-        print(' ' + '-' * 30, file=f)
-        print(f" Max mass =  {max_mass : 2.13f} Tg", file=f)
-        print(f" Min mass =  {min_mass : 2.13f} Tg", file=f)
-        print(f" Abs diff =  {absdiff : >16.3f} g", file=f)
-        print(f" Pct diff =  {pctdiff : >16.10f} %", file=f)
-
-    gc.collect()
-
-
 def get_species_database_dir(config):
     """
     Returns the directory in which the species_database.yml file is
diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
new file mode 100644
index 00000000..b4067729
--- /dev/null
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -0,0 +1,327 @@
+"""
+Creates mass conservation tables from passive tracer concentrations
+stored in GEOS-Chem Classic and/or GCHP restart files.
+"""
+import os
+import warnings
+import numpy as np
+import xarray as xr
+from gcpy.constants import skip_these_vars
+from gcpy.units import convert_units
+from gcpy.util import dataset_reader, get_area_from_dataset, \
+    make_directory, read_config_file, verify_variable_type
+
+
+# Constants
+SPC_NAME = "PassiveTracer"
+TARGET_UNITS = "Tg"
+
+
+def get_area(
+        area_path,
+        dset
+):
+    """
+    Returns the area variable from a dataset (if present),
+    or reads it from the supplied file path.
+
+    Args
+    area_path : str|None    : Full file path of area data
+    dset      : xr.Dataset  : Input data
+
+    Returns
+    area      : xr.DataArray : Grid box areas [m2]
+    """
+    verify_variable_type(area_path, (str, type(None)))
+    verify_variable_type(dset, xr.Dataset)
+
+    # If the area variable is present in the data set, return it
+    if area_path is None:
+        return get_area_from_dataset(dset)
+
+    # Otherwise read the data from the supplied area_path)
+    reader = dataset_reader(multi_files=False, verbose=False)
+    return get_area_from_dataset(
+        reader(area_path, drop_variables=skip_these_vars).load()
+    )
+
+
+def get_delta_pressure(
+        dset
+):
+    """
+    Returns the delta-pressure variable from GEOS-Chem Classic
+    or GCHP data files.
+
+    Args:
+    dset : xr.Dataset|xr.DataArray : Input data
+    """
+    verify_variable_type(dset, (xr.Dataset, xr.DataArray))
+
+    # GEOS-Chem Classic
+    if 'Met_DELPDRY' in list(dset.data_vars):
+        return dset['Met_DELPDRY']
+
+    # GCHP
+    return dset['DELP_DRY']
+
+
+def get_passive_tracer_metadata(
+        spcdb_dir
+):
+    """
+    Returns a dictionary with metadata for the passive tracer.
+
+    Args
+    spcdb_dir  : str  : Directory containing species_database.yml
+
+    Returns
+    properties : dict : Dictionary with species metadata
+    """
+    verify_variable_type(spcdb_dir, str)
+
+    spc_name = SPC_NAME
+    properties = read_config_file(
+        os.path.join(
+            spcdb_dir,
+            "species_database.yml"
+        ),
+        quiet=True
+    )
+
+    return properties.get(spc_name)
+
+
+def get_passive_tracer_varname(
+        dset
+):
+    """
+    Returns the variable name under which the passive tracer
+    is stored GEOS-Chem Classic or GCHP restart files.
+    """
+    verify_variable_type(dset, xr.Dataset)
+
+    # Name of species (it's more efficient to copy to local variable!)
+    name = SPC_NAME
+
+    # GEOS-Chem Classic
+    if f"SpeciesRst_{name}" in dset.data_vars:
+        return f"SpeciesRst_{name}"
+
+    # GCHP
+    return f"SPC_{name}"
+
+
+def compute_total_mass(
+        t_idx,
+        dset,
+        area,
+        delta_p,
+        metadata,
+):
+    """
+    Computes the total mass (in Tg) for the passive tracer.
+
+    Args
+    t_idx      : int          : Time index
+    dset       : xr.Dataset   : Data [mol/mol dry air]
+    area       : xr.DataArray : Grid box areas [m2]
+    delta_p    : xr.Dataset   : Pressure thicknesses [hPa]
+    metadata   : dict         : Dictionary w/ species metdata
+
+    Returns
+    total_mass : np.float64   : Total mass [Tg] of species.
+    """
+    with xr.set_options(keep_attrs=True):
+        units = TARGET_UNITS
+        varname = get_passive_tracer_varname(dset)
+        darr = convert_units(
+            dset[varname].astype(np.float64).isel(time=t_idx),
+            varname,
+            metadata,
+            units,
+            area_m2=area.isel(time=0),
+            delta_p=delta_p.isel(time=t_idx),
+        )
+
+        return np.sum(darr)
+
+
+def compute_statistics(masses):
+    """
+    Returns a dictionary with statistics for total masses.
+
+    Args
+    masses     : np.ndarray : Total masses in Tg
+
+    Returns
+    statistics : dict       : Dictionary with statistics
+    """
+    verify_variable_type(masses, (np.ndarray, list))
+
+    max_mass = np.max(masses)
+    min_mass = np.min(masses)
+
+    return {
+        "max_mass": max_mass,
+        "min_mass": min_mass,
+        "absdiff_g": (max_mass - min_mass) * 10**12,
+        "pctdiff":  (max_mass-min_mass)/min_mass * 100,
+    }
+
+
+def make_benchmark_mass_conservation_table(
+        ref_files,
+        ref_label,
+        dev_files,
+        dev_label,
+        dst="./benchmark",
+        overwrite=False,
+        ref_areapath=None,
+        dev_areapath=None,
+        spcdb_dir=os.path.dirname(__file__)
+):
+    """
+    Creates a text file containing global mass of passive species
+    contained in GEOS-Chem Classic and/or GCHP restart files.
+
+    Args
+    ref_files    : list|str : List of files from the Ref model
+    ref_label    : str      : Ref version label
+    dev_files    : list|str : List of files from the Dev model
+    dev_label    : str      : Dev version label
+    dst          : str      : Destination folder for file output
+    overwrite    : bool     : Overwrite pre-existing files?
+    ref_areapath : list|str : Path to file w/ Ref area data (optional)
+    dev_areapath : list|str : Path to file w/ Dev area data (optional)
+    spcdb_dir    : str      : Path to species database file
+    """
+
+    # ==================================================================
+    # Initialize
+    # ==================================================================
+
+    # Create the destination folder
+    make_directory(dst, overwrite)
+
+    # Get a list of properties for the given species
+    metadata = get_passive_tracer_metadata(spcdb_dir)
+
+    # Preserve xarray attributes
+    with xr.set_options(keep_attrs=True):
+
+        # ==============================================================
+        # Read data and make sure time dimensions are consistent
+        # ==============================================================
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=xr.SerializationWarning)
+
+        # Pick the proper function to read the data
+        reader = dataset_reader(multi_files=True, verbose=False)
+
+        # Get data
+        ref_data = reader(ref_files, drop_variables=skip_these_vars).load()
+        dev_data = reader(dev_files, drop_variables=skip_these_vars).load()
+        ref_area = get_area(ref_areapath, ref_data)
+        dev_area = get_area(dev_areapath, dev_data)
+        ref_delta_prs = get_delta_pressure(ref_data)
+        dev_delta_prs = get_delta_pressure(dev_data)
+
+        # Number of points in the time dimension
+        ref_time = ref_data["time"].values
+        dev_time = dev_data["time"].values
+
+        # Throw an error if Ref & Dev have differing time values
+        if not np.all(ref_time == dev_time):
+            msg = "Ref and Dev have inconsistent time values!\n"
+            raise ValueError(msg)
+
+        # Lists for holding the sum of masses in Ref & Dev
+        ref_masses = np.zeros(len(dev_time), dtype=np.float64)
+        dev_masses = np.zeros(len(dev_time), dtype=np.float64)
+
+        # List for holding the dates & times
+        display_dates = []
+
+        # ==================================================================
+        # Calculate global mass for the tracer at all restart dates
+        # ==================================================================
+        for t_idx, time in enumerate(dev_time):
+
+            # Save datetime string into display_dates list
+            time = str(np.datetime_as_string(time, unit="m"))
+            display_dates.append(time.replace("T", " "))
+
+            # Compute total masses [Tg] for Ref & Dev
+            ref_masses[t_idx] = compute_total_mass(
+                t_idx,
+                ref_data,
+                ref_area,
+                ref_delta_prs,
+                metadata,
+            )
+            dev_masses[t_idx] = compute_total_mass(
+                t_idx,
+                dev_data,
+                dev_area,
+                dev_delta_prs,
+                metadata,
+            )
+
+
+    # ==================================================================
+    # Print masses and statistics to file
+    # ==================================================================
+
+    # Get min, max, absdiff, maxdiff for Ref & Dev
+    ref_stats = compute_statistics(ref_masses)
+    dev_stats = compute_statistics(dev_masses)
+
+    # Create file
+    outfilename = os.path.join(
+        dst,
+        f"Passive_mass.{ref_label}_vs_{dev_label}.txt"
+    )
+    with open(outfilename, 'w', encoding="utf-8") as ofile:
+
+        # Title
+        print("="*79, file=ofile)
+        print("Global mass of PassiveTracer", file=ofile)
+        print("", file=ofile)
+        print(f"Ref = {ref_label}", file=ofile)
+        print(f"Dev = {dev_label}", file=ofile)
+        print("="*79, file=ofile)
+
+        # Headers
+        print("", file=ofile)
+        template = " Date & Time" + " "*18 + "Ref mass [Tg]"
+        template += " "*13 + "Dev mass [Tg]"
+        print(template, file=ofile)
+        template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
+        print(template, file=ofile)
+
+        # Total masses
+        for t_idx, time in enumerate(display_dates):
+            template = f" {time}      "
+            template +=f"{ref_masses[t_idx] : >20.13f}      "
+            template +=f"{dev_masses[t_idx] : >20.13f}"
+            print(template, file=ofile)
+        print(" ", file=ofile)
+
+        # Statistics
+        template = " Summary" + " "*32+ "Ref" + " "*23 + "Dev"
+        print(template, file=ofile)
+        template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
+        print(template, file=ofile)
+        template = f" Maximum mass [Tg]     {ref_stats['max_mass'] : >20.13f}"
+        template+= f"      {dev_stats['max_mass'] : >20.13f}"
+        print(template, file=ofile)
+        template = f" Minimum mass [Tg]     {ref_stats['min_mass'] : >20.13f}"
+        template+= f"      {dev_stats['min_mass'] : >20.13f}"
+        print(template, file=ofile)
+        template = f" Abs diff [g]          {ref_stats['absdiff_g'] : >20.13f}"
+        template+= f"      {dev_stats['absdiff_g'] : >20.13f}"
+        print(template, file=ofile)
+        template = f" % difference          {ref_stats['pctdiff'] : >20.13f}"
+        template+= f"      {dev_stats['pctdiff'] : >20.13f}"
+        print(template, file=ofile)

From ea2261ba9c449180aa186315a523b73bf0fd97b4 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 26 Mar 2024 16:25:06 -0400
Subject: [PATCH 05/43] run_1yr_tt_benchmark.py now uses
 benchmark_mass_cons_table.py

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- Bug fix: Test if the area variable has a time dimension before
  trying to use .isel(time=0)
- Add extra calls to verify_variable_type

gcpy/benchmark/modules/run_1yr_tt_benchmark.py
- No longer import make_benchmark_mass_conservation_table from
  benchmark_funcs.py; this has been removed
- Import make_benchmark_mass_conservation_table from the
  benchmark_mass_cons_table.py module
- Now pass Ref & Dev arguments to make_benchmark_mass_conservation_table
---
 .../modules/benchmark_mass_cons_table.py      | 19 ++++++++-
 .../benchmark/modules/run_1yr_tt_benchmark.py | 39 +++++--------------
 2 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index b4067729..f2f5098d 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -133,14 +133,22 @@ def compute_total_mass(
     total_mass : np.float64   : Total mass [Tg] of species.
     """
     with xr.set_options(keep_attrs=True):
+
+        # Local variables
         units = TARGET_UNITS
         varname = get_passive_tracer_varname(dset)
+
+        # If area has multiple time slices, take the first one
+        if "time" in area.dims:
+            area = area.isel(time=0)
+
+        # Compute mass in Tg
         darr = convert_units(
             dset[varname].astype(np.float64).isel(time=t_idx),
             varname,
             metadata,
             units,
-            area_m2=area.isel(time=0),
+            area_m2=area,
             delta_p=delta_p.isel(time=t_idx),
         )
 
@@ -196,6 +204,15 @@ def make_benchmark_mass_conservation_table(
     dev_areapath : list|str : Path to file w/ Dev area data (optional)
     spcdb_dir    : str      : Path to species database file
     """
+    verify_variable_type(ref_files, (list, str))
+    verify_variable_type(ref_label, str)
+    verify_variable_type(dev_files, (list, str))
+    verify_variable_type(dev_label, str)
+    verify_variable_type(dst, (str, type(None)))
+    verify_variable_type(overwrite, bool)
+    verify_variable_type(ref_areapath, (str, type(None)))
+    verify_variable_type(ref_areapath, (str, type(None)))
+    verify_variable_type(spcdb_dir, str)
 
     # ==================================================================
     # Initialize
diff --git a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py
index b94055bd..abea2ca3 100644
--- a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py
+++ b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py
@@ -61,9 +61,11 @@
 from gcpy.benchmark.modules.benchmark_funcs import \
     get_species_database_dir, make_benchmark_conc_plots, \
     make_benchmark_wetdep_plots, make_benchmark_mass_tables, \
-    make_benchmark_operations_budget, make_benchmark_mass_conservation_table
+    make_benchmark_operations_budget
 from gcpy.benchmark.modules.budget_tt import transport_tracers_budgets
 from gcpy.benchmark.modules.ste_flux import make_benchmark_ste_table
+from gcpy.benchmark.modules.benchmark_mass_cons_table import \
+    make_benchmark_mass_conservation_table
 from gcpy.benchmark.modules.benchmark_utils import print_benchmark_info
 
 # Tell matplotlib not to look for an X-window
@@ -1265,17 +1267,10 @@ def gchp_vs_gchp_mass_table(mon):
                 all_months_dev
             )[0]
 
-            # Ref
+            # Create table
             make_benchmark_mass_conservation_table(
                 ref_datafiles,
                 config["data"]["ref"]["gcc"]["version"],
-                dst=gcc_vs_gcc_tablesdir,
-                overwrite=True,
-                spcdb_dir=spcdb_dir,
-            )
-
-            # Dev
-            make_benchmark_mass_conservation_table(
                 dev_datafiles,
                 config["data"]["dev"]["gcc"]["version"],
                 dst=gcc_vs_gcc_tablesdir,
@@ -1286,7 +1281,7 @@ def gchp_vs_gchp_mass_table(mon):
         # ===================================================================
         # Create mass conservation table for GCHP vs GCC
         # ===================================================================
-        if config["options"]["comparisons"]["gcc_vs_gcc"]["run"]:
+        if config["options"]["comparisons"]["gchp_vs_gcc"]["run"]:
             print("\n%%% Creating GCHP vs GCC mass conservation tables %%%")
 
             # Filepaths
@@ -1317,23 +1312,16 @@ def gchp_vs_gchp_mass_table(mon):
                 gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"],
             )
 
-            # Ref
+            # Create table
             make_benchmark_mass_conservation_table(
                 ref_datafiles,
                 config["data"]["dev"]["gcc"]["version"],
-                dst=gchp_vs_gcc_tablesdir,
-                overwrite=True,
-                spcdb_dir=spcdb_dir,
-            )
-
-            # Dev
-            make_benchmark_mass_conservation_table(
                 dev_datafiles,
                 config["data"]["dev"]["gchp"]["version"],
                 dst=gchp_vs_gcc_tablesdir,
                 overwrite=True,
                 spcdb_dir=spcdb_dir,
-                areapath=dev_areapath,
+                dev_areapath=dev_areapath,
             )
 
         # =====================================================================
@@ -1381,24 +1369,17 @@ def gchp_vs_gchp_mass_table(mon):
                 gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"],
             )
 
-            # Ref
+            # Create table
             make_benchmark_mass_conservation_table(
                 ref_datafiles,
                 config["data"]["ref"]["gchp"]["version"],
-                dst=gchp_vs_gchp_tablesdir,
-                overwrite=True,
-                spcdb_dir=spcdb_dir,
-                areapath=ref_areapath
-            )
-
-            # Dev
-            make_benchmark_mass_conservation_table(
                 dev_datafiles,
                 config["data"]["dev"]["gchp"]["version"],
                 dst=gchp_vs_gchp_tablesdir,
                 overwrite=True,
                 spcdb_dir=spcdb_dir,
-                areapath=dev_areapath
+                ref_areapath=ref_areapath,
+                dev_areapath=dev_areapath,
             )
 
     # ==================================================================

From b2a95774a8f9379b72337c93f8c81633490ba199 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 26 Mar 2024 17:21:06 -0400
Subject: [PATCH 06/43] Update comments for clarity in
 benchmark_mass_cons_table.py

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- Add Pydoc comments for get_passive_tracer_varname
- Update comments for the time dimension

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark/modules/benchmark_mass_cons_table.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index f2f5098d..561729b8 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -98,6 +98,12 @@ def get_passive_tracer_varname(
     """
     Returns the variable name under which the passive tracer
     is stored GEOS-Chem Classic or GCHP restart files.
+
+    Args
+    dset    : xr.Dataset : The input data
+
+    Returns
+    varname : str        : Variable name for passive tracer
     """
     verify_variable_type(dset, xr.Dataset)
 
@@ -244,11 +250,11 @@ def make_benchmark_mass_conservation_table(
         ref_delta_prs = get_delta_pressure(ref_data)
         dev_delta_prs = get_delta_pressure(dev_data)
 
-        # Number of points in the time dimension
+        # Get datetime values
         ref_time = ref_data["time"].values
         dev_time = dev_data["time"].values
 
-        # Throw an error if Ref & Dev have differing time values
+        # Throw an error if Ref & Dev have differing datetime values
         if not np.all(ref_time == dev_time):
             msg = "Ref and Dev have inconsistent time values!\n"
             raise ValueError(msg)
@@ -257,7 +263,7 @@ def make_benchmark_mass_conservation_table(
         ref_masses = np.zeros(len(dev_time), dtype=np.float64)
         dev_masses = np.zeros(len(dev_time), dtype=np.float64)
 
-        # List for holding the dates & times
+        # List for holding the datetimes
         display_dates = []
 
         # ==================================================================
@@ -285,7 +291,6 @@ def make_benchmark_mass_conservation_table(
                 metadata,
             )
 
-
     # ==================================================================
     # Print masses and statistics to file
     # ==================================================================

From 5c8df1325a4e59ce5d70358d7ece11028234df1d Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Wed, 27 Mar 2024 16:10:20 -0400
Subject: [PATCH 07/43] Expand statistics output in mass conservation tables

gcpy/benchmark/modules/benchmark_mass_cons_tables
- Add extra fields to statistics output:
  - Start mass
  - End mass
  - Abs diff
  - % diff
  - Mean mass
  - Variance
  - Also add extra columns for Abs Diff & % Diff for each of these

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../modules/benchmark_mass_cons_table.py      | 171 ++++++++++++++++--
 1 file changed, 156 insertions(+), 15 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index 561729b8..1483d267 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -175,12 +175,100 @@ def compute_statistics(masses):
 
     max_mass = np.max(masses)
     min_mass = np.min(masses)
+    start_mass = masses[0]
+    end_mass = masses[-1]
 
     return {
-        "max_mass": max_mass,
-        "min_mass": min_mass,
-        "absdiff_g": (max_mass - min_mass) * 10**12,
-        "pctdiff":  (max_mass-min_mass)/min_mass * 100,
+        "min_mass"           : min_mass,
+        "max_mass"           : max_mass,
+        "minmax_absdiff_g"   : (max_mass - min_mass) * 1.0e12,
+        "minmax_pctdiff"     : (max_mass - min_mass)/min_mass * 100.0,
+        "start_mass"         : start_mass,
+        "end_mass"           : end_mass,
+        "startend_absdiff_g" : (end_mass - start_mass) * 1.0e12,
+        "startend_pctdiff"   : (end_mass - start_mass)/start_mass * 100.0,
+        "mean_mass"          : np.mean(masses),
+        "variance"           : np.var(masses),
+    }
+
+
+def compute_diff(
+        key,
+        ref,
+        dev
+):
+    """
+    Computes the difference in two dictionaries (Dev - Ref) for
+    a given search key.
+
+    key    : str   : Search key
+    ref    : dict  : Dictionary of values from Ref model
+    dev    : dict  : Dictionary of values from Dev model
+
+    Returns
+    diffs  : dict : Absolute & percent differences btw Dev & Ref for key
+    """
+    verify_variable_type(key, str)
+    verify_variable_type(ref, dict)
+    verify_variable_type(dev, dict)
+
+    return {
+        "absdiff": dev[key] - ref[key],
+        "pctdiff": ((dev[key] - ref[key]) / ref[key]) * 100.0
+    }
+
+
+def compute_diff_statistics(
+        ref,
+        dev
+):
+    """
+    Computes difference statistics between the Ref and Dev versions.
+
+    Args
+    ref_masses : dict : Statistics for Ref model
+    dev_masses : dict : Statistics for Dev model
+
+    Returns
+    diff_stats : dict : Difference statistics between Dev and Ref
+    """
+    verify_variable_type(ref, dict)
+    verify_variable_type(dev, dict)
+
+    min_mass           = compute_diff("min_mass",           ref, dev)
+    max_mass           = compute_diff("max_mass",           ref, dev)
+    minmax_absdiff_g   = compute_diff("minmax_absdiff_g",   ref, dev)
+    minmax_pctdiff     = compute_diff("minmax_pctdiff",     ref, dev)
+    start_mass         = compute_diff("start_mass",         ref, dev)
+    end_mass           = compute_diff("start_mass",         ref, dev)
+    startend_absdiff_g = compute_diff("startend_absdiff_g", ref, dev)
+    startend_pctdiff   = compute_diff("startend_pctdiff",   ref, dev)
+    mean_mass          = compute_diff("mean_mass",          ref, dev)
+    variance           = compute_diff("variance",           ref, dev)
+
+    return {
+        "min_mass__absdiff"           : min_mass["absdiff"],
+        "min_mass__pctdiff"           : min_mass["pctdiff"],
+        "max_mass__absdiff"           : max_mass["absdiff"],
+        "max_mass__pctdiff"           : max_mass["pctdiff"],
+        "minmax_absdiff_g__absdiff"   : minmax_absdiff_g["absdiff"],
+        "minmax_absdiff_g__pctdiff"   : minmax_absdiff_g["pctdiff"],
+        "minmax_pctdiff__absdiff"     : minmax_pctdiff["absdiff"],
+        "minmax_pctdiff__pctdiff"     : minmax_pctdiff["pctdiff"],
+
+        "start_mass__absdiff"         : start_mass["absdiff"],
+        "start_mass__pctdiff"         : start_mass["pctdiff"],
+        "end_mass__absdiff"           : end_mass["absdiff"],
+        "end_mass__pctdiff"           : end_mass["pctdiff"],
+        "startend_absdiff_g__absdiff" : startend_absdiff_g["absdiff"],
+        "startend_absdiff_g__pctdiff" : startend_absdiff_g["pctdiff"],
+        "startend_pctdiff__absdiff"   : startend_pctdiff["absdiff"],
+        "startend_pctdiff__pctdiff"   : startend_pctdiff["pctdiff"],
+
+        "mean_mass__absdiff"          : mean_mass["absdiff"],
+        "mean_mass__pctdiff"          : mean_mass["pctdiff"],
+        "variance__absdiff"           : variance["absdiff"],
+        "variance__pctdiff"           : variance["pctdiff"],
     }
 
 
@@ -298,6 +386,7 @@ def make_benchmark_mass_conservation_table(
     # Get min, max, absdiff, maxdiff for Ref & Dev
     ref_stats = compute_statistics(ref_masses)
     dev_stats = compute_statistics(dev_masses)
+    diff_stats = compute_diff_statistics(ref_stats, dev_stats)
 
     # Create file
     outfilename = os.path.join(
@@ -316,8 +405,8 @@ def make_benchmark_mass_conservation_table(
 
         # Headers
         print("", file=ofile)
-        template = " Date & Time" + " "*18 + "Ref mass [Tg]"
-        template += " "*13 + "Dev mass [Tg]"
+        template  = " Date & Time" + " "*18 + "Ref mass [Tg]"
+        template +=" "*13 + "Dev mass [Tg]"
         print(template, file=ofile)
         template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
         print(template, file=ofile)
@@ -332,18 +421,70 @@ def make_benchmark_mass_conservation_table(
 
         # Statistics
         template = " Summary" + " "*32+ "Ref" + " "*23 + "Dev"
+        template += " "*6 + "Abs Diff" + "  % Diff"
         print(template, file=ofile)
-        template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
+        template  = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
+        template += " " + "-"*13 +  " " + "-"*7
         print(template, file=ofile)
-        template = f" Maximum mass [Tg]     {ref_stats['max_mass'] : >20.13f}"
-        template+= f"      {dev_stats['max_mass'] : >20.13f}"
+        template  =  " Maximum mass [Tg]     "
+        template += f"{ref_stats['max_mass'] : >20.13f}      "
+        template += f"{dev_stats['max_mass'] : >20.13f} "
+        template += f"{diff_stats['max_mass__absdiff'] : >13.6f} "
+        template += f"{diff_stats['max_mass__pctdiff'] : >7.3f}"
         print(template, file=ofile)
-        template = f" Minimum mass [Tg]     {ref_stats['min_mass'] : >20.13f}"
-        template+= f"      {dev_stats['min_mass'] : >20.13f}"
+        template  =  " Minimum mass [Tg]     "
+        template += f"{ref_stats['min_mass'] : >20.13f}      "
+        template += f"{dev_stats['min_mass'] : >20.13f} "
+        template += f"{diff_stats['min_mass__absdiff'] : >13.6f} "
+        template += f"{diff_stats['min_mass__pctdiff'] : >7.3f} "
         print(template, file=ofile)
-        template = f" Abs diff [g]          {ref_stats['absdiff_g'] : >20.13f}"
-        template+= f"      {dev_stats['absdiff_g'] : >20.13f}"
+        template  =  " Abs diff [g]          "
+        template += f"{ref_stats['minmax_absdiff_g'] : >20.13f}      "
+        template += f"{dev_stats['minmax_absdiff_g'] : >20.13f} "
+        template += f"{diff_stats['minmax_absdiff_g__absdiff'] : >13.6f} "
+        template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >7.3f}"
+        print(template, file=ofile)
+        template  =  " % difference          "
+        template += f"{ref_stats['minmax_pctdiff'] : >20.13f}      "
+        template += f"{dev_stats['minmax_pctdiff'] : >20.13f} "
+        template += f"{diff_stats['minmax_pctdiff__absdiff'] : >13.6f} "
+        template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >7.3f}"
+        print(template, file=ofile)
+        print("", file=ofile)
+        template  =  " Start mass [Tg]       "
+        template += f"{ref_stats['start_mass'] : >20.13f}      "
+        template += f"{dev_stats['start_mass'] : >20.13f} "
+        template += f"{diff_stats['start_mass__absdiff'] : >13.6f} "
+        template += f"{diff_stats['start_mass__pctdiff'] : >7.3f}"
+        print(template, file=ofile)
+        template  =  " End mass [Tg]         "
+        template += f"{ref_stats['end_mass'] : >20.13f}      "
+        template += f"{dev_stats['end_mass'] : >20.13f} "
+        template += f"{diff_stats['end_mass__absdiff'] : >13.6f} "
+        template += f"{diff_stats['end_mass__pctdiff'] : >7.3f}"
+        print(template, file=ofile)
+        template  =  " Abs diff [g]          "
+        template += f"{ref_stats['startend_absdiff_g'] : >20.13f}      "
+        template += f"{dev_stats['startend_absdiff_g'] : >20.13f} "
+        template += f"{diff_stats['startend_absdiff_g__absdiff'] : >13.6f} "
+        template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >7.3f}"
+        print(template, file=ofile)
+        template  =  " % difference          "
+        template += f"{ref_stats['startend_pctdiff'] : >20.13f}      "
+        template += f"{dev_stats['startend_pctdiff'] : >20.13f} "
+        template += f"{diff_stats['startend_pctdiff__absdiff'] : >13.6f} "
+        template += f"{diff_stats['startend_pctdiff__pctdiff'] : >7.3f}"
+        print(template, file=ofile)
+        print("", file=ofile)
+        template  =  " Mean mass [Tg]        "
+        template += f"{ref_stats['mean_mass']:>20.13f}      "
+        template += f"{dev_stats['mean_mass']:>20.13f} "
+        template += f"{diff_stats['mean_mass__absdiff']:>13.6f} "
+        template += f"{diff_stats['mean_mass__pctdiff']:>7.3f}"
         print(template, file=ofile)
-        template = f" % difference          {ref_stats['pctdiff'] : >20.13f}"
-        template+= f"      {dev_stats['pctdiff'] : >20.13f}"
+        template  = " Variance [Tg]         "
+        template += f"{ref_stats['variance']:>20.13f}      "
+        template += f"{dev_stats['variance']:>20.13f} "
+        template += f"{diff_stats['variance__absdiff']:>13.6f} "
+        template += f"{diff_stats['variance__pctdiff']:>7.3f}"
         print(template, file=ofile)

From 0c3bca7f52b73b54b44071fbda460aa5b841a5cc Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 29 Mar 2024 14:42:03 -0400
Subject: [PATCH 08/43] Add "Abs Diff" & "% Diff" columns in mass conservation
 table

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- Added "Abs Diff" & "% Diff" columns to the list of Ref & Dev
  total masses

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                                  |  1 +
 .../modules/benchmark_mass_cons_table.py      | 21 ++++++++++++-------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a36541bb..1ebd93b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Function `rename_speciesconc_to_speciesconcvv` in `gcpy/benchmark/modules/benchmark_utils.py`
 - Function `copy_file_to_dir` in `gcpy/util.py`.  This is a wrapper for `shutil.copyfile`.
 - Script `gcpy/benchmark/modules/benchmark_mass_cons_table.py`, with code to create mass conservation tables
+- Expanded statistics output in benchmark mass conservation tables
 
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index 1483d267..a6faf4d1 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -405,26 +405,31 @@ def make_benchmark_mass_conservation_table(
 
         # Headers
         print("", file=ofile)
-        template  = " Date & Time" + " "*18 + "Ref mass [Tg]"
-        template +=" "*13 + "Dev mass [Tg]"
+        template  = " Date & Time" + " "*18 + "Ref mass [Tg]" + " "*13
+        template += "Dev mass [Tg]"+ " "*6 + "Abs Diff" + "  % Diff"
         print(template, file=ofile)
-        template = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
+        template  = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
+        template += " " + "-"*13 + " " + "-"*7
         print(template, file=ofile)
 
         # Total masses
         for t_idx, time in enumerate(display_dates):
-            template = f" {time}      "
-            template +=f"{ref_masses[t_idx] : >20.13f}      "
-            template +=f"{dev_masses[t_idx] : >20.13f}"
+            absdiff   = dev_masses[t_idx] - ref_masses[t_idx]
+            pctdiff   = (absdiff / ref_masses[t_idx]) * 100.0
+            template  = f" {time}      "
+            template += f"{ref_masses[t_idx] : >20.13f}      "
+            template += f"{dev_masses[t_idx] : >20.13f} "
+            template += f"{absdiff : >13.6f} "
+            template += f"{pctdiff : >7.3f}"
             print(template, file=ofile)
         print(" ", file=ofile)
 
         # Statistics
-        template = " Summary" + " "*32+ "Ref" + " "*23 + "Dev"
+        template  = " Summary" + " "*32+ "Ref" + " "*23 + "Dev"
         template += " "*6 + "Abs Diff" + "  % Diff"
         print(template, file=ofile)
         template  = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
-        template += " " + "-"*13 +  " " + "-"*7
+        template += " " + "-"*13 + " " + "-"*7
         print(template, file=ofile)
         template  =  " Maximum mass [Tg]     "
         template += f"{ref_stats['max_mass'] : >20.13f}      "

From 486815ba67b886fa60129c89a7a7df21a1234654 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 29 Mar 2024 15:45:47 -0400
Subject: [PATCH 09/43] Add more tweaks to the mass conservation table

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- Now compute mean_mass and variance with dtype=np.float64
- Use 13.4e right-adjusted format for "Abs Diff" column
- Use 8.3f right-adjusted format for "% Diff" column
- Update underlines accordingly
- Add an extra space between "Abs Diff" and "% Diff" column
- Now display variance with 20.13e right-adjusted format, since it
  can potentially be very small

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../modules/benchmark_mass_cons_table.py      | 60 +++++++++----------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index a6faf4d1..63104cda 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -187,8 +187,8 @@ def compute_statistics(masses):
         "end_mass"           : end_mass,
         "startend_absdiff_g" : (end_mass - start_mass) * 1.0e12,
         "startend_pctdiff"   : (end_mass - start_mass)/start_mass * 100.0,
-        "mean_mass"          : np.mean(masses),
-        "variance"           : np.var(masses),
+        "mean_mass"          : np.mean(masses, dtype=np.float64),
+        "variance"           : np.var(masses, dtype=np.float64),
     }
 
 
@@ -406,10 +406,10 @@ def make_benchmark_mass_conservation_table(
         # Headers
         print("", file=ofile)
         template  = " Date & Time" + " "*18 + "Ref mass [Tg]" + " "*13
-        template += "Dev mass [Tg]"+ " "*6 + "Abs Diff" + "  % Diff"
+        template += "Dev mass [Tg]"+ " "*6 + "Abs Diff" + "    % Diff"
         print(template, file=ofile)
         template  = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
-        template += " " + "-"*13 + " " + "-"*7
+        template += " " + "-"*13 + "  " + "-"*8
         print(template, file=ofile)
 
         # Total masses
@@ -419,77 +419,77 @@ def make_benchmark_mass_conservation_table(
             template  = f" {time}      "
             template += f"{ref_masses[t_idx] : >20.13f}      "
             template += f"{dev_masses[t_idx] : >20.13f} "
-            template += f"{absdiff : >13.6f} "
-            template += f"{pctdiff : >7.3f}"
+            template += f"{absdiff : >13.4e}  "
+            template += f"{pctdiff : >8.3f}"
             print(template, file=ofile)
         print(" ", file=ofile)
 
         # Statistics
         template  = " Summary" + " "*32+ "Ref" + " "*23 + "Dev"
-        template += " "*6 + "Abs Diff" + "  % Diff"
+        template += " "*6 + "Abs Diff" + "    % Diff"
         print(template, file=ofile)
         template  = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
-        template += " " + "-"*13 + " " + "-"*7
+        template += " " + "-"*13 + "  " + "-"*8
         print(template, file=ofile)
         template  =  " Maximum mass [Tg]     "
         template += f"{ref_stats['max_mass'] : >20.13f}      "
         template += f"{dev_stats['max_mass'] : >20.13f} "
-        template += f"{diff_stats['max_mass__absdiff'] : >13.6f} "
-        template += f"{diff_stats['max_mass__pctdiff'] : >7.3f}"
+        template += f"{diff_stats['max_mass__absdiff'] : >13.4e}  "
+        template += f"{diff_stats['max_mass__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         template  =  " Minimum mass [Tg]     "
         template += f"{ref_stats['min_mass'] : >20.13f}      "
         template += f"{dev_stats['min_mass'] : >20.13f} "
-        template += f"{diff_stats['min_mass__absdiff'] : >13.6f} "
-        template += f"{diff_stats['min_mass__pctdiff'] : >7.3f} "
+        template += f"{diff_stats['min_mass__absdiff'] : >13.4e}  "
+        template += f"{diff_stats['min_mass__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         template  =  " Abs diff [g]          "
         template += f"{ref_stats['minmax_absdiff_g'] : >20.13f}      "
         template += f"{dev_stats['minmax_absdiff_g'] : >20.13f} "
-        template += f"{diff_stats['minmax_absdiff_g__absdiff'] : >13.6f} "
-        template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >7.3f}"
+        template += f"{diff_stats['minmax_absdiff_g__absdiff'] : >13.4e}  "
+        template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         template  =  " % difference          "
         template += f"{ref_stats['minmax_pctdiff'] : >20.13f}      "
         template += f"{dev_stats['minmax_pctdiff'] : >20.13f} "
-        template += f"{diff_stats['minmax_pctdiff__absdiff'] : >13.6f} "
-        template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >7.3f}"
+        template += f"{diff_stats['minmax_pctdiff__absdiff'] : >13.4e}  "
+        template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         print("", file=ofile)
         template  =  " Start mass [Tg]       "
         template += f"{ref_stats['start_mass'] : >20.13f}      "
         template += f"{dev_stats['start_mass'] : >20.13f} "
-        template += f"{diff_stats['start_mass__absdiff'] : >13.6f} "
-        template += f"{diff_stats['start_mass__pctdiff'] : >7.3f}"
+        template += f"{diff_stats['start_mass__absdiff'] : >13.4e}  "
+        template += f"{diff_stats['start_mass__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         template  =  " End mass [Tg]         "
         template += f"{ref_stats['end_mass'] : >20.13f}      "
         template += f"{dev_stats['end_mass'] : >20.13f} "
-        template += f"{diff_stats['end_mass__absdiff'] : >13.6f} "
-        template += f"{diff_stats['end_mass__pctdiff'] : >7.3f}"
+        template += f"{diff_stats['end_mass__absdiff'] : >13.4e}  "
+        template += f"{diff_stats['end_mass__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         template  =  " Abs diff [g]          "
         template += f"{ref_stats['startend_absdiff_g'] : >20.13f}      "
         template += f"{dev_stats['startend_absdiff_g'] : >20.13f} "
-        template += f"{diff_stats['startend_absdiff_g__absdiff'] : >13.6f} "
-        template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >7.3f}"
+        template += f"{diff_stats['startend_absdiff_g__absdiff'] : >13.4e}  "
+        template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         template  =  " % difference          "
         template += f"{ref_stats['startend_pctdiff'] : >20.13f}      "
         template += f"{dev_stats['startend_pctdiff'] : >20.13f} "
-        template += f"{diff_stats['startend_pctdiff__absdiff'] : >13.6f} "
-        template += f"{diff_stats['startend_pctdiff__pctdiff'] : >7.3f}"
+        template += f"{diff_stats['startend_pctdiff__absdiff'] : >13.4e}  "
+        template += f"{diff_stats['startend_pctdiff__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         print("", file=ofile)
         template  =  " Mean mass [Tg]        "
         template += f"{ref_stats['mean_mass']:>20.13f}      "
         template += f"{dev_stats['mean_mass']:>20.13f} "
-        template += f"{diff_stats['mean_mass__absdiff']:>13.6f} "
-        template += f"{diff_stats['mean_mass__pctdiff']:>7.3f}"
+        template += f"{diff_stats['mean_mass__absdiff']:>13.4e}  "
+        template += f"{diff_stats['mean_mass__pctdiff']:>8.3f}"
         print(template, file=ofile)
         template  = " Variance [Tg]         "
-        template += f"{ref_stats['variance']:>20.13f}      "
-        template += f"{dev_stats['variance']:>20.13f} "
-        template += f"{diff_stats['variance__absdiff']:>13.6f} "
-        template += f"{diff_stats['variance__pctdiff']:>7.3f}"
+        template += f"{ref_stats['variance']:>20.13e}      "
+        template += f"{dev_stats['variance']:>20.13e} "
+        template += f"{diff_stats['variance__absdiff']:>13.4e}  "
+        template += f"{diff_stats['variance__pctdiff']:>8.3f}"
         print(template, file=ofile)

From 8e5c0bac6fc0b8e708b2b2fe7e49252b348cfa5f Mon Sep 17 00:00:00 2001
From: Yuanjian Zhang <yuanjian.z@wustl.edu>
Date: Mon, 8 Apr 2024 11:31:31 -0500
Subject: [PATCH 10/43] Enabled 1 month Sigdiff benchmarking

---
 gcpy/benchmark_funcs.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcpy/benchmark_funcs.py b/gcpy/benchmark_funcs.py
index 18f7ffd3..e18e0390 100644
--- a/gcpy/benchmark_funcs.py
+++ b/gcpy/benchmark_funcs.py
@@ -1514,9 +1514,6 @@ def createplots(filecat):
         result.keys())[0]]['500'] for result in results}
     dict_zm = {list(result.keys())[0]: result[list(
         result.keys())[0]]['zm'] for result in results}
-
-    print("stop here")
-    quit()
     
     # ==============================================================
     # Write the list of species having significant differences,

From ff78c99c9bfc6681d9c57f627cee8a21acdb9836 Mon Sep 17 00:00:00 2001
From: Yuanjian Zhang <yuanjian.z@wustl.edu>
Date: Mon, 8 Apr 2024 13:03:08 -0500
Subject: [PATCH 11/43] Supported 2D slices regridding formatting

Enabled compare plots for 2D variables between two different cube-sphere resolutions.
---
 gcpy/regrid.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/gcpy/regrid.py b/gcpy/regrid.py
index 3e9e4fc3..e84df88e 100644
--- a/gcpy/regrid.py
+++ b/gcpy/regrid.py
@@ -758,6 +758,12 @@ def ravel_checkpoint_lat(ds_out):
         })
         return ds_out
 
+    # Filter non-existent coordinates/dimensions
+    def rename_existing(ds, rename_dict):
+        existing_keys = set(ds.coords) | set(ds.dims)
+        filtered_rename_dict = {key: value for key, value in rename_dict.items() if key in existing_keys}
+        return ds.rename(filtered_rename_dict)
+
     dim_formats = {
         'checkpoint': {
             'unravel': [unravel_checkpoint_lat],
@@ -790,13 +796,13 @@ def ravel_checkpoint_lat(ds_out):
             ds = unravel_callback(ds)
 
         # Rename dimensions
-        ds = ds.rename(dim_formats[format].get('rename', {}))
+        ds = rename_existing(ds, dim_formats[format].get('rename', {}))
         return ds
 
 
     # %%%% Renaming from the common format %%%%
     # Reverse rename
-    ds = ds.rename(
+    ds = rename_existing(ds, 
         {v: k for k, v in dim_formats[format].get('rename', {}).items()})
 
     # Ravel dimensions

From 6f1a92151c44b138db5fa17952913a0b64b5b5fb Mon Sep 17 00:00:00 2001
From: Yuanjian Zhang <yuanjian.z@wustl.edu>
Date: Tue, 9 Apr 2024 12:51:56 -0500
Subject: [PATCH 12/43] Corrected reformatting for regridding

Cube-sphere dimensions' sequence was reverse.
Enabling versatile reshaping handling.
---
 gcpy/regrid.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/gcpy/regrid.py b/gcpy/regrid.py
index e84df88e..def2a28d 100644
--- a/gcpy/regrid.py
+++ b/gcpy/regrid.py
@@ -785,7 +785,8 @@ def rename_existing(ds, rename_dict):
                 'Ydim': 'Y',
                 'time': 'T',
             },
-            'transpose': ('time', 'lev', 'nf', 'Xdim', 'Ydim')
+            # match format of GCHP output
+            'transpose': ('time', 'lev', 'nf', 'Ydim', 'Xdim')
         }
     }
 
@@ -810,16 +811,7 @@ def rename_existing(ds, rename_dict):
         ds = ravel_callback(ds)
 
     # Transpose
-    if len(ds.dims) == 5 or (len(ds.dims) == 4 and 'lev' in list(
-            ds.dims) and 'time' in list(ds.dims)):
-        # full dim dataset
-        ds = ds.transpose(*dim_formats[format].get('transpose', []))
-    elif len(ds.dims) == 4:
-        # single time
-        ds = ds.transpose(*dim_formats[format].get('transpose', [])[1:])
-    elif len(ds.dims) == 3:
-        # single level / time
-        ds = ds.transpose(*dim_formats[format].get('transpose', [])[2:])
+    ds = ds.transpose(*[x for x in dim_formats[format].get('transpose', []) if x in list(ds.dims)])
     return ds
 
 

From 3d09c4e5c5333376a5820bd58737f3e577a90384 Mon Sep 17 00:00:00 2001
From: Yuanjian Zhang <yuanjian.z@wustl.edu>
Date: Tue, 9 Apr 2024 12:54:42 -0500
Subject: [PATCH 13/43] Corrected automatic regridding decision process

Correct automatic regridding decision process to match documentation.
---
 gcpy/regrid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcpy/regrid.py b/gcpy/regrid.py
index def2a28d..3f79804f 100644
--- a/gcpy/regrid.py
+++ b/gcpy/regrid.py
@@ -426,7 +426,7 @@ def create_regridders(
                     "Warning: zonal mean comparison must be lat-lon. Defaulting to 1x1.25")
                 cmpres = '1x1.25'
                 cmpgridtype = "ll"
-            elif sg_ref_params != [] or sg_dev_params != []:
+            elif sg_ref_params != [1, 170, -90] or sg_dev_params != [1, 170, -90]:
                 # pick ref grid when a stretched-grid and non-stretched-grid
                 # are passed
                 cmpres = refres

From 12107b6caeccd11be60a814ba49bca8334d822ee Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 9 Apr 2024 15:08:00 -0400
Subject: [PATCH 14/43] Now display up to 15 digits in the mass conservation
 table

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- Increased the formatting from 20.13f to 20.15f for most entries
  in the 1st 2 columns.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../modules/benchmark_mass_cons_table.py      | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index 63104cda..4e5b61d9 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -417,8 +417,8 @@ def make_benchmark_mass_conservation_table(
             absdiff   = dev_masses[t_idx] - ref_masses[t_idx]
             pctdiff   = (absdiff / ref_masses[t_idx]) * 100.0
             template  = f" {time}      "
-            template += f"{ref_masses[t_idx] : >20.13f}      "
-            template += f"{dev_masses[t_idx] : >20.13f} "
+            template += f"{ref_masses[t_idx] : >20.15f}      "
+            template += f"{dev_masses[t_idx] : >20.15f} "
             template += f"{absdiff : >13.4e}  "
             template += f"{pctdiff : >8.3f}"
             print(template, file=ofile)
@@ -432,8 +432,8 @@ def make_benchmark_mass_conservation_table(
         template += " " + "-"*13 + "  " + "-"*8
         print(template, file=ofile)
         template  =  " Maximum mass [Tg]     "
-        template += f"{ref_stats['max_mass'] : >20.13f}      "
-        template += f"{dev_stats['max_mass'] : >20.13f} "
+        template += f"{ref_stats['max_mass'] : >20.15f}      "
+        template += f"{dev_stats['max_mass'] : >20.15f} "
         template += f"{diff_stats['max_mass__absdiff'] : >13.4e}  "
         template += f"{diff_stats['max_mass__pctdiff'] : >8.3f}"
         print(template, file=ofile)
@@ -450,21 +450,21 @@ def make_benchmark_mass_conservation_table(
         template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         template  =  " % difference          "
-        template += f"{ref_stats['minmax_pctdiff'] : >20.13f}      "
-        template += f"{dev_stats['minmax_pctdiff'] : >20.13f} "
+        template += f"{ref_stats['minmax_pctdiff'] : >20.15f}      "
+        template += f"{dev_stats['minmax_pctdiff'] : >20.15f} "
         template += f"{diff_stats['minmax_pctdiff__absdiff'] : >13.4e}  "
         template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         print("", file=ofile)
         template  =  " Start mass [Tg]       "
-        template += f"{ref_stats['start_mass'] : >20.13f}      "
-        template += f"{dev_stats['start_mass'] : >20.13f} "
+        template += f"{ref_stats['start_mass'] : >20.15f}      "
+        template += f"{dev_stats['start_mass'] : >20.15f} "
         template += f"{diff_stats['start_mass__absdiff'] : >13.4e}  "
         template += f"{diff_stats['start_mass__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         template  =  " End mass [Tg]         "
-        template += f"{ref_stats['end_mass'] : >20.13f}      "
-        template += f"{dev_stats['end_mass'] : >20.13f} "
+        template += f"{ref_stats['end_mass'] : >20.15f}      "
+        template += f"{dev_stats['end_mass'] : >20.15f} "
         template += f"{diff_stats['end_mass__absdiff'] : >13.4e}  "
         template += f"{diff_stats['end_mass__pctdiff'] : >8.3f}"
         print(template, file=ofile)
@@ -475,15 +475,15 @@ def make_benchmark_mass_conservation_table(
         template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         template  =  " % difference          "
-        template += f"{ref_stats['startend_pctdiff'] : >20.13f}      "
-        template += f"{dev_stats['startend_pctdiff'] : >20.13f} "
+        template += f"{ref_stats['startend_pctdiff'] : >20.15f}      "
+        template += f"{dev_stats['startend_pctdiff'] : >20.15f} "
         template += f"{diff_stats['startend_pctdiff__absdiff'] : >13.4e}  "
         template += f"{diff_stats['startend_pctdiff__pctdiff'] : >8.3f}"
         print(template, file=ofile)
         print("", file=ofile)
         template  =  " Mean mass [Tg]        "
-        template += f"{ref_stats['mean_mass']:>20.13f}      "
-        template += f"{dev_stats['mean_mass']:>20.13f} "
+        template += f"{ref_stats['mean_mass']:>20.15f}      "
+        template += f"{dev_stats['mean_mass']:>20.15f} "
         template += f"{diff_stats['mean_mass__absdiff']:>13.4e}  "
         template += f"{diff_stats['mean_mass__pctdiff']:>8.3f}"
         print(template, file=ofile)

From a65b4efe80d3fb348703b7ad4b2862738dc58c84 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 11 Apr 2024 10:43:00 -0400
Subject: [PATCH 15/43] Add further tweaks to mass conservation table

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- Implemented the following suggestiongs from @lizziel on PR #309:
  - Now use 12 decimal places for Ref & Dev columns (floating point)
  - Now use 4 decimal places for Abs Diff & Diff (exponential notation)
  - Absolute difference in grames is now reported as an integer
  - Lined up column labels accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../modules/benchmark_mass_cons_table.py      | 84 +++++++++----------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index 4e5b61d9..83f2a44f 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -405,11 +405,11 @@ def make_benchmark_mass_conservation_table(
 
         # Headers
         print("", file=ofile)
-        template  = " Date & Time" + " "*18 + "Ref mass [Tg]" + " "*13
-        template += "Dev mass [Tg]"+ " "*6 + "Abs Diff" + "    % Diff"
+        template  = " Date & Time" + " "*15 + "Ref mass [Tg]" + " "*8
+        template += "Dev mass [Tg]"+ " "*6 + "Abs Diff         % Diff"
         print(template, file=ofile)
-        template  = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
-        template += " " + "-"*13 + "  " + "-"*8
+        template  = " " + "-"*17 + " "*5 + "-"*17 + " "*4 + "-"*17
+        template += " " + "-"*13 + "  " + "-"*13
         print(template, file=ofile)
 
         # Total masses
@@ -417,79 +417,79 @@ def make_benchmark_mass_conservation_table(
             absdiff   = dev_masses[t_idx] - ref_masses[t_idx]
             pctdiff   = (absdiff / ref_masses[t_idx]) * 100.0
             template  = f" {time}      "
-            template += f"{ref_masses[t_idx] : >20.15f}      "
-            template += f"{dev_masses[t_idx] : >20.15f} "
+            template += f"{ref_masses[t_idx] : >17.12f}    "
+            template += f"{dev_masses[t_idx] : >17.12f} "
             template += f"{absdiff : >13.4e}  "
-            template += f"{pctdiff : >8.3f}"
+            template += f"{pctdiff : >13.4e}"
             print(template, file=ofile)
         print(" ", file=ofile)
 
         # Statistics
-        template  = " Summary" + " "*32+ "Ref" + " "*23 + "Dev"
-        template += " "*6 + "Abs Diff" + "    % Diff"
+        template  = " Summary" + " "*29+ "Ref" + " "*18 + "Dev"
+        template += " "*6 + "Abs Diff         % Diff"
         print(template, file=ofile)
-        template  = " " + "-"*17 + " "*5 + "-"*20 + " "*6 + "-"*20
-        template += " " + "-"*13 + "  " + "-"*8
+        template  = " " + "-"*17 + " "*5 + "-"*17 + " "*4 + "-"*17
+        template += " " + "-"*13 + "  " + "-"*13
         print(template, file=ofile)
         template  =  " Maximum mass [Tg]     "
-        template += f"{ref_stats['max_mass'] : >20.15f}      "
-        template += f"{dev_stats['max_mass'] : >20.15f} "
+        template += f"{ref_stats['max_mass'] : >17.12f}    "
+        template += f"{dev_stats['max_mass'] : >17.12f} "
         template += f"{diff_stats['max_mass__absdiff'] : >13.4e}  "
-        template += f"{diff_stats['max_mass__pctdiff'] : >8.3f}"
+        template += f"{diff_stats['max_mass__pctdiff'] : >13.4e}"
         print(template, file=ofile)
         template  =  " Minimum mass [Tg]     "
-        template += f"{ref_stats['min_mass'] : >20.13f}      "
-        template += f"{dev_stats['min_mass'] : >20.13f} "
+        template += f"{ref_stats['min_mass'] : >17.12f}    "
+        template += f"{dev_stats['min_mass'] : >17.12f} "
         template += f"{diff_stats['min_mass__absdiff'] : >13.4e}  "
-        template += f"{diff_stats['min_mass__pctdiff'] : >8.3f}"
+        template += f"{diff_stats['min_mass__pctdiff'] : >13.4e}"
         print(template, file=ofile)
         template  =  " Abs diff [g]          "
-        template += f"{ref_stats['minmax_absdiff_g'] : >20.13f}      "
-        template += f"{dev_stats['minmax_absdiff_g'] : >20.13f} "
+        template += f"{np.int64(ref_stats['minmax_absdiff_g']) : >17d}    "
+        template += f"{np.int64(dev_stats['minmax_absdiff_g']) : >17d} "
         template += f"{diff_stats['minmax_absdiff_g__absdiff'] : >13.4e}  "
-        template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >8.3f}"
+        template += f"{diff_stats['minmax_absdiff_g__pctdiff'] : >13.4e}"
         print(template, file=ofile)
         template  =  " % difference          "
-        template += f"{ref_stats['minmax_pctdiff'] : >20.15f}      "
-        template += f"{dev_stats['minmax_pctdiff'] : >20.15f} "
+        template += f"{ref_stats['minmax_pctdiff'] : >17.12f}    "
+        template += f"{dev_stats['minmax_pctdiff'] : >17.12f} "
         template += f"{diff_stats['minmax_pctdiff__absdiff'] : >13.4e}  "
-        template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >8.3f}"
+        template += f"{diff_stats['minmax_pctdiff__pctdiff'] : >13.4e}"
         print(template, file=ofile)
         print("", file=ofile)
         template  =  " Start mass [Tg]       "
-        template += f"{ref_stats['start_mass'] : >20.15f}      "
-        template += f"{dev_stats['start_mass'] : >20.15f} "
+        template += f"{ref_stats['start_mass'] : >17.12f}    "
+        template += f"{dev_stats['start_mass'] : >17.12f} "
         template += f"{diff_stats['start_mass__absdiff'] : >13.4e}  "
-        template += f"{diff_stats['start_mass__pctdiff'] : >8.3f}"
+        template += f"{diff_stats['start_mass__pctdiff'] : >13.4e}"
         print(template, file=ofile)
         template  =  " End mass [Tg]         "
-        template += f"{ref_stats['end_mass'] : >20.15f}      "
-        template += f"{dev_stats['end_mass'] : >20.15f} "
+        template += f"{ref_stats['end_mass'] : >17.12f}    "
+        template += f"{dev_stats['end_mass'] : >17.12f} "
         template += f"{diff_stats['end_mass__absdiff'] : >13.4e}  "
-        template += f"{diff_stats['end_mass__pctdiff'] : >8.3f}"
+        template += f"{diff_stats['end_mass__pctdiff'] : >13.4e}"
         print(template, file=ofile)
         template  =  " Abs diff [g]          "
-        template += f"{ref_stats['startend_absdiff_g'] : >20.13f}      "
-        template += f"{dev_stats['startend_absdiff_g'] : >20.13f} "
+        template += f"{np.int64(ref_stats['startend_absdiff_g']) : >17d}    "
+        template += f"{np.int64(dev_stats['startend_absdiff_g']) : >17d} "
         template += f"{diff_stats['startend_absdiff_g__absdiff'] : >13.4e}  "
-        template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >8.3f}"
+        template += f"{diff_stats['startend_absdiff_g__pctdiff'] : >13.4e}"
         print(template, file=ofile)
         template  =  " % difference          "
-        template += f"{ref_stats['startend_pctdiff'] : >20.15f}      "
-        template += f"{dev_stats['startend_pctdiff'] : >20.15f} "
+        template += f"{ref_stats['startend_pctdiff'] : >17.12f}    "
+        template += f"{dev_stats['startend_pctdiff'] : >17.12f} "
         template += f"{diff_stats['startend_pctdiff__absdiff'] : >13.4e}  "
-        template += f"{diff_stats['startend_pctdiff__pctdiff'] : >8.3f}"
+        template += f"{diff_stats['startend_pctdiff__pctdiff'] : >13.4e}"
         print(template, file=ofile)
         print("", file=ofile)
         template  =  " Mean mass [Tg]        "
-        template += f"{ref_stats['mean_mass']:>20.15f}      "
-        template += f"{dev_stats['mean_mass']:>20.15f} "
+        template += f"{ref_stats['mean_mass']:>17.12f}    "
+        template += f"{dev_stats['mean_mass']:>17.12f} "
         template += f"{diff_stats['mean_mass__absdiff']:>13.4e}  "
-        template += f"{diff_stats['mean_mass__pctdiff']:>8.3f}"
+        template += f"{diff_stats['mean_mass__pctdiff']:>13.4e}"
         print(template, file=ofile)
-        template  = " Variance [Tg]         "
-        template += f"{ref_stats['variance']:>20.13e}      "
-        template += f"{dev_stats['variance']:>20.13e} "
+        template  = " Variance [Tg]        "
+        template += f"{ref_stats['variance']:>17.12e}   "
+        template += f"{dev_stats['variance']:>17.12e} "
         template += f"{diff_stats['variance__absdiff']:>13.4e}  "
-        template += f"{diff_stats['variance__pctdiff']:>8.3f}"
+        template += f"{diff_stats['variance__pctdiff']:>13.4e}"
         print(template, file=ofile)

From b889d2e4348883f9a52c888c01628c9e387b8845 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 11 Apr 2024 11:28:21 -0400
Subject: [PATCH 16/43] Add function get_datetimes_from_filenames; Use in mass
 cons table

gcpy/benchmark/modules/benchmark_utils.py
- Added function get_datetimes_from_filenames to return an np.ndarray
  of np.datetime64 values.  This is needed because often the initial
  restart file may have a different internal timestamp than the
  starting date of the simulation.

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- Now use get_datetimes_from_filenames to define the ref_time and
  dev_time variables.

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                                  |  1 +
 .../modules/benchmark_mass_cons_table.py      |  7 +++--
 gcpy/benchmark/modules/benchmark_utils.py     | 28 +++++++++++++++++++
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ebd93b7..3132f3d8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Function `copy_file_to_dir` in `gcpy/util.py`.  This is a wrapper for `shutil.copyfile`.
 - Script `gcpy/benchmark/modules/benchmark_mass_cons_table.py`, with code to create mass conservation tables
 - Expanded statistics output in benchmark mass conservation tables
+- Function `get_datetimes_from_filenames` in `gcpy/benchmark/modules/benchmark_utils.py`
 
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index 83f2a44f..5ad0d6d3 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -10,7 +10,8 @@
 from gcpy.units import convert_units
 from gcpy.util import dataset_reader, get_area_from_dataset, \
     make_directory, read_config_file, verify_variable_type
-
+from gcpy.benchmark.modules.benchmark_utils import \
+    get_datetimes_from_filenames
 
 # Constants
 SPC_NAME = "PassiveTracer"
@@ -339,8 +340,8 @@ def make_benchmark_mass_conservation_table(
         dev_delta_prs = get_delta_pressure(dev_data)
 
         # Get datetime values
-        ref_time = ref_data["time"].values
-        dev_time = dev_data["time"].values
+        ref_time = get_datetimes_from_filenames(ref_files)
+        dev_time = get_datetimes_from_filenames(dev_files)
 
         # Throw an error if Ref & Dev have differing datetime values
         if not np.all(ref_time == dev_time):
diff --git a/gcpy/benchmark/modules/benchmark_utils.py b/gcpy/benchmark/modules/benchmark_utils.py
index 64564cd5..8cac7beb 100644
--- a/gcpy/benchmark/modules/benchmark_utils.py
+++ b/gcpy/benchmark/modules/benchmark_utils.py
@@ -509,3 +509,31 @@ def rename_speciesconc_to_speciesconcvv(
             rename_dict[var] = var.replace("SpeciesConc_", "SpeciesConcVV_")
 
     return dset.rename(rename_dict)
+
+
+def get_datetimes_from_filenames(
+        files
+):
+    """
+    Returns datetimes obtained from GEOS-Chem diagnostic or
+    restart file names.
+
+    Args
+    files     : list       : GEOS-CHem diagnostic/restart file names
+
+    Returns
+    datetimes : np.ndarray : Array of np.datetime64 values
+    """
+    datetimes = np.zeros(
+        len(files),
+        dtype=np.datetime64("1970-01-01T00:00")
+    )
+    for idx, ifile in enumerate(files):
+        substr = os.path.basename(ifile).split("_")
+        date = substr[0].split(".")[-1]
+        time = substr[1].split("z")[0]
+        dt_str = date[0:4] + "-" + date[4:6] + "-" + date[6:8]
+        dt_str += "T" + time[0:2] + ":" + time[2:4]
+        datetimes[idx] = np.datetime64(dt_str)
+
+    return datetimes

From 96b89faec49621a287752f0bf0a905da355cc65d Mon Sep 17 00:00:00 2001
From: Yuanjian Zhang <yuanjian.z@wustl.edu>
Date: Thu, 11 Apr 2024 11:49:28 -0500
Subject: [PATCH 17/43] Go back to resolve merge conflicts with dev branch

---
 gcpy/benchmark_funcs.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcpy/benchmark_funcs.py b/gcpy/benchmark_funcs.py
index e18e0390..d8bb5f7c 100644
--- a/gcpy/benchmark_funcs.py
+++ b/gcpy/benchmark_funcs.py
@@ -1514,7 +1514,10 @@ def createplots(filecat):
         result.keys())[0]]['500'] for result in results}
     dict_zm = {list(result.keys())[0]: result[list(
         result.keys())[0]]['zm'] for result in results}
-    
+
+    print("stop here")
+    quit()
+
     # ==============================================================
     # Write the list of species having significant differences,
     # which we need to fill out the benchmark approval forms.

From 9ac023dcecf968e34cd9fcf67174f6ce38d4f863 Mon Sep 17 00:00:00 2001
From: Yuanjian Zhang <yuanjian.z@wustl.edu>
Date: Thu, 11 Apr 2024 11:51:53 -0500
Subject: [PATCH 18/43] Resolve merge conflicts with dev branch

---
 gcpy/benchmark_funcs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcpy/benchmark_funcs.py b/gcpy/benchmark_funcs.py
index d8bb5f7c..18f7ffd3 100644
--- a/gcpy/benchmark_funcs.py
+++ b/gcpy/benchmark_funcs.py
@@ -1517,7 +1517,7 @@ def createplots(filecat):
 
     print("stop here")
     quit()
-
+    
     # ==============================================================
     # Write the list of species having significant differences,
     # which we need to fill out the benchmark approval forms.

From d0b24060562e4d7122de6c18fe14fc50bae24432 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 22 Apr 2024 11:37:19 -0400
Subject: [PATCH 19/43] Update GitHub labels for issues/PRs that should not go
 stale

.github/stale.yml
- Updated the list of label names for to account for the recent change
  in GitHub label names in the GCPy repo.

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .github/stale.yml | 21 +++++++++++++++++----
 CHANGELOG.md      |  3 ++-
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/.github/stale.yml b/.github/stale.yml
index 7011576b..08067324 100644
--- a/.github/stale.yml
+++ b/.github/stale.yml
@@ -1,19 +1,32 @@
+#
+# stale.yml: GitHub stalebot configuration file
+#
+
 # Number of days of inactivity before an issue becomes stale
 daysUntilStale: 30
+
 # Number of days of inactivity before a stale issue is closed
 daysUntilClose: 7
+
 # Issues with these labels will never be considered stale
 exemptLabels:
-  - never stale
-  - feature
-  - discussion
+  - 'category: Discussion'
+  - 'category: Feature Request'
+  - 'deferred'
+  - 'help needed: Open Research Problem'
+  - 'help needed: Request Input from Community'
+  - 'never stale'
+  - 'TODO: Documentation'
+
 # Label to use when marking an issue as stale
 staleLabel: stale
+
 # Comment to post when marking an issue as stale. Set to `false` to disable
 markComment: >
   This issue has been automatically marked as stale because it has not had
   recent activity. If there are no updates within 7 days it will be closed.
   You can add the "never stale" tag to prevent the Stale bot from closing
   this issue.
+
 # Comment to post when closing a stale issue. Set to `false` to disable
-closeComment: Closing due to inactivity 
+closeComment: Closing due to inactivity
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d6bd958..5c58bab2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,7 +39,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Benchmark functions now call `rename_speciesconc_to_speciesconcvv`
 - Create radionuclide, STE flux, and mass conservation tables for Ref and Dev versions in TransportTracers benchmarks
 - Use new function `copy_file_to_dir` to copy the benchmark script and configuration file to the benchmark results folders
-
+- Updated GitHub stalebot config file `stale.yml` with new issue/PR labels that should not go stale
+  
 ### Fixed
 - CS inquiry functions in `gcpy/cstools.py` now work properly for `xr.Dataset` and `xr.DataArray` objects
 - Prevent an import error by using `seaborn-v0_8-darkgrid` in`gcpy/benchmark/modules/benchmark_models_vs_obs.py`

From 748d9bdd2e6d3e775de3b4e37f430a55b32d3990 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 22 Apr 2024 13:39:54 -0400
Subject: [PATCH 20/43] Now use the "stale" GitHub action instead of StaleBot

.github/no-response.yml
.github/stale.yml
- Removed

.github/workflows/build-test-environment.yml
- Removed, this action was always failing

.github/workflows/stale.yml
- Configuration file for GitHub "stale" action, which replaces
  StaleBot.  Use the most recent list of issue/PR labels to never
  be marked stale.

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .github/no-response.yml                      | 13 -------
 .github/stale.yml                            | 32 -----------------
 .github/workflows/build-test-environment.yml | 38 --------------------
 .github/workflows/stale.yml                  | 26 ++++++++------
 CHANGELOG.md                                 |  4 ++-
 5 files changed, 19 insertions(+), 94 deletions(-)
 delete mode 100644 .github/no-response.yml
 delete mode 100644 .github/stale.yml
 delete mode 100644 .github/workflows/build-test-environment.yml

diff --git a/.github/no-response.yml b/.github/no-response.yml
deleted file mode 100644
index dd2b8cfe..00000000
--- a/.github/no-response.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-# Configuration for probot-no-response - https://github.com/probot/no-response
-
-# Number of days of inactivity before an Issue is closed for lack of response
-daysUntilClose: 14
-# Label requiring a response
-responseRequiredLabel: more information needed
-# Comment to post when closing an Issue for lack of response. Set to `false` to disable
-closeComment: >
-  This issue has been automatically closed because there has been no response
-  to our request for more information from the original author. With only the
-  information that is currently in the issue, we don't have enough information
-  to take action. Please reach out if you have or find the answers we need so
-  that we can investigate further.
diff --git a/.github/stale.yml b/.github/stale.yml
deleted file mode 100644
index 08067324..00000000
--- a/.github/stale.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# stale.yml: GitHub stalebot configuration file
-#
-
-# Number of days of inactivity before an issue becomes stale
-daysUntilStale: 30
-
-# Number of days of inactivity before a stale issue is closed
-daysUntilClose: 7
-
-# Issues with these labels will never be considered stale
-exemptLabels:
-  - 'category: Discussion'
-  - 'category: Feature Request'
-  - 'deferred'
-  - 'help needed: Open Research Problem'
-  - 'help needed: Request Input from Community'
-  - 'never stale'
-  - 'TODO: Documentation'
-
-# Label to use when marking an issue as stale
-staleLabel: stale
-
-# Comment to post when marking an issue as stale. Set to `false` to disable
-markComment: >
-  This issue has been automatically marked as stale because it has not had
-  recent activity. If there are no updates within 7 days it will be closed.
-  You can add the "never stale" tag to prevent the Stale bot from closing
-  this issue.
-
-# Comment to post when closing a stale issue. Set to `false` to disable
-closeComment: Closing due to inactivity
diff --git a/.github/workflows/build-test-environment.yml b/.github/workflows/build-test-environment.yml
deleted file mode 100644
index b3efb372..00000000
--- a/.github/workflows/build-test-environment.yml
+++ /dev/null
@@ -1,38 +0,0 @@
----
-#
-# GitHub action to build the GCPy test environment with micromamba
-# See: https://github.com/marketplace/actions/setup-micromamba
-#
-name: build-test-environment
-
-on:
-  push:
-    branches: [ "main", "dev" ]
-  pull_request:
-    # The branches below must be a subset of the branches above
-    branches: [ "main", "dev" ]
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.9"]
-    steps:
-      - name: Checkout the GCPy repository
-        uses: actions/checkout@v4
-      - name: Create "testing" environment
-        uses: mamba-org/setup-micromamba@v1
-        with:
-          micromamba-version: 'latest'
-          environment-file: docs/environment_files/testing.yml
-          init-shell: bash
-          cache-environment: false
-          generate-run-shell: true
-          post-cleanup: 'all'
-      - name: Test if "import gcpy" works
-        run: python -c "import gcpy"
-        shell: micromamba-shell {0}
-      - name: Test if we can create a plot
-        run: python -m gcpy.examples.plotting.create_test_plot
-        shell: micromamba-shell {0}
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index 7a13f5ea..b14b1f62 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -1,17 +1,17 @@
-# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
+# This workflow warns and then closes issues that have had no activity for a specified amount of time.
 #
 # You can adjust the behavior by modifying this file.
 # For more information, see:
 # https://github.com/actions/stale
-name: Mark stale issues and pull requests
+name: Mark stale issues
 
 on:
   schedule:
-    # Job will run at midnight on the 1st of each month (POSIX time syntax)
-    - cron: '0 0 1 * *'
+  - cron: '0 0 * * *' # Run every night at midnight
 
 jobs:
   stale:
+
     runs-on: ubuntu-latest
     permissions:
       issues: write
@@ -21,12 +21,18 @@ jobs:
     - uses: actions/stale@v5
       with:
         repo-token: ${{ secrets.GITHUB_TOKEN }}
-        # Stale issue settings
-        days-before-issue-stale: 30
-        days-before-issue-close: 30
         stale-issue-label: 'stale'
-        stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity.'
-        close-issue-message: "This issue was closed because it has been inactive for 30 days since being marked as stale."
-        # Never mark PRs as stale
+        exempt-issue-labels:
+          - 'category: Discussion'
+          - 'category: Feature Request'
+          - 'deferred'
+          - 'help needed: Open Research Problem'
+          - 'help needed: Request Input from Community'
+          - 'never stale'
+          - 'TODO: Documentation'
+        days-before-issue-stale: 30
+        days-before-issue-close: 7
+        stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. If there are no updates within 7 days it will be closed. You can add the "never stale" tag to prevent the issue from closing this issue.'
+        close-issue-message: 'Closing due to inactivity'
         days-before-pr-stale: -1
         days-before-pr-close: -1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5c58bab2..ab8370d5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,7 +19,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Added fixed level budget diagnostic to budget operations table
 - Function `rename_speciesconc_to_speciesconcvv` in `gcpy/benchmark/modules/benchmark_utils.py`
 - Function `copy_file_to_dir` in `gcpy/util.py`.  This is a wrapper for `shutil.copyfile`.
-
+- GitHub Action config file `.github/workflows/stale.yml`, which replaces StaleBot
+  
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
 - Bump pypdf from 3.16.1 to 3.17.0 (dependabot suggested this)
@@ -59,6 +60,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Environment file `docs/environment_files/environment.yml`
 - Environment file `docs/environment_files/requirements.txt`
 - Removed `awscli` from the GCPy environment; version 2 is no longer available on conda-forge or PyPi
+- GitHub config files `.github/stale.yml` and `.github/no-response.yml`
 
 ## [1.4.2] - 2024-01-26
 ### Added

From c98cb8c26bb404bcffcf15f31600de88caccb6b8 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 22 Apr 2024 16:22:42 -0400
Subject: [PATCH 21/43] Replace whitespace with underscores in file names &
 version strings

gcpy/util.py
- Add utility function "replace_whitespace" to replace whitespace
  characters with another character (underscore is default)

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- Call replace_whitespace from util.py to replace spaces in version
  labels and the output file name.

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                                  |  1 +
 .../modules/benchmark_mass_cons_table.py      | 15 +++++++++----
 gcpy/util.py                                  | 21 +++++++++++++++++++
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3132f3d8..99c88b9c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Script `gcpy/benchmark/modules/benchmark_mass_cons_table.py`, with code to create mass conservation tables
 - Expanded statistics output in benchmark mass conservation tables
 - Function `get_datetimes_from_filenames` in `gcpy/benchmark/modules/benchmark_utils.py`
+- Function `replace_whitespace` in `gcpy/util.py`
 
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index 5ad0d6d3..190029f1 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -8,7 +8,8 @@
 import xarray as xr
 from gcpy.constants import skip_these_vars
 from gcpy.units import convert_units
-from gcpy.util import dataset_reader, get_area_from_dataset, \
+from gcpy.util import \
+    replace_whitespace, dataset_reader, get_area_from_dataset, \
     make_directory, read_config_file, verify_variable_type
 from gcpy.benchmark.modules.benchmark_utils import \
     get_datetimes_from_filenames
@@ -319,6 +320,10 @@ def make_benchmark_mass_conservation_table(
     # Get a list of properties for the given species
     metadata = get_passive_tracer_metadata(spcdb_dir)
 
+    # Replace whitespace with underscores in version labels
+    ref_label = replace_whitespace(ref_label)
+    dev_label = replace_whitespace(dev_label)
+    
     # Preserve xarray attributes
     with xr.set_options(keep_attrs=True):
 
@@ -390,9 +395,11 @@ def make_benchmark_mass_conservation_table(
     diff_stats = compute_diff_statistics(ref_stats, dev_stats)
 
     # Create file
-    outfilename = os.path.join(
-        dst,
-        f"Passive_mass.{ref_label}_vs_{dev_label}.txt"
+    outfilename = replace_whitespace(
+        os.path.join(
+            dst,
+            f"Passive_mass.{ref_label}_vs_{dev_label}.txt"
+        )
     )
     with open(outfilename, 'w', encoding="utf-8") as ofile:
 
diff --git a/gcpy/util.py b/gcpy/util.py
index 6a5b347b..e50d4874 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -2235,3 +2235,24 @@ def copy_file_to_dir(
     ofile = os.path.join(dest, os.path.basename(ifile))
     if not os.path.exists(ofile):
         copyfile(ifile, ofile)
+
+
+def replace_whitespace(
+        string,
+        repl_char="_"
+):
+    """
+    Replaces whitespace in a string with underscores.
+    Useful for removing spaces in filename strings.
+
+    Args
+    string    : str : The input string
+    repl_char : str : Replacement character (default is "_")
+
+    Returns
+    string    : str : String with whitespace replaced
+    """
+    verify_variable_type(string, str)
+    verify_variable_type(repl_char, str)
+
+    return repl_char.join(string.split())

From df1e984b1dc72b9cefc5ec903dbd5bbaf9d2d4b3 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 23 Apr 2024 10:25:09 -0400
Subject: [PATCH 22/43] Bug fix: Tell dask to allow large chunk sizes (needed
 for c180+)

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- Import the dask.config class as "dask_config"
- Add a call to dask_config in the same "with" blocks where we
  tell xarray to keep all variable/global attributes

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark/modules/benchmark_mass_cons_table.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index 190029f1..bd680c78 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -5,6 +5,7 @@
 import os
 import warnings
 import numpy as np
+from dask import config as dask_config
 import xarray as xr
 from gcpy.constants import skip_these_vars
 from gcpy.units import convert_units
@@ -140,7 +141,10 @@ def compute_total_mass(
     Returns
     total_mass : np.float64   : Total mass [Tg] of species.
     """
-    with xr.set_options(keep_attrs=True):
+    # Keep xarray attributes and allow large chunks in Dask slicing
+    with xr.set_options(keep_attrs=True) and dask_config.set({
+        "array.slicing.split_large_chunks": False
+    }):
 
         # Local variables
         units = TARGET_UNITS
@@ -325,8 +329,10 @@ def make_benchmark_mass_conservation_table(
     dev_label = replace_whitespace(dev_label)
     
     # Preserve xarray attributes
-    with xr.set_options(keep_attrs=True):
-
+    with xr.set_options(keep_attrs=True) and dask_config.set({
+        "array.slicing.split_large_chunks": False
+    }):
+        
         # ==============================================================
         # Read data and make sure time dimensions are consistent
         # ==============================================================

From af8ef64b276d9a0eb1e6b68e8e0f27555a07d99a Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 23 Apr 2024 11:38:59 -0400
Subject: [PATCH 23/43] benchmark_mass_cons_table.py now loops over individual
 files

gcpy/benchmark/modules/benchmark_mass_cons_table.py
- Refactored code so that we read one file at a time in order to
  avoid memory issues when reading large files (e.g. c180 resolution).
- Delete objects at the end of the loop over times to force
  garbage collection.
- Remove references to dask_config, it's not needed.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../modules/benchmark_mass_cons_table.py      | 68 +++++++++++--------
 1 file changed, 40 insertions(+), 28 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_mass_cons_table.py b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
index bd680c78..d9e340e3 100644
--- a/gcpy/benchmark/modules/benchmark_mass_cons_table.py
+++ b/gcpy/benchmark/modules/benchmark_mass_cons_table.py
@@ -5,7 +5,6 @@
 import os
 import warnings
 import numpy as np
-from dask import config as dask_config
 import xarray as xr
 from gcpy.constants import skip_these_vars
 from gcpy.units import convert_units
@@ -122,7 +121,6 @@ def get_passive_tracer_varname(
 
 
 def compute_total_mass(
-        t_idx,
         dset,
         area,
         delta_p,
@@ -142,9 +140,7 @@ def compute_total_mass(
     total_mass : np.float64   : Total mass [Tg] of species.
     """
     # Keep xarray attributes and allow large chunks in Dask slicing
-    with xr.set_options(keep_attrs=True) and dask_config.set({
-        "array.slicing.split_large_chunks": False
-    }):
+    with xr.set_options(keep_attrs=True):
 
         # Local variables
         units = TARGET_UNITS
@@ -156,12 +152,12 @@ def compute_total_mass(
 
         # Compute mass in Tg
         darr = convert_units(
-            dset[varname].astype(np.float64).isel(time=t_idx),
+            dset[varname].astype(np.float64),
             varname,
             metadata,
             units,
             area_m2=area,
-            delta_p=delta_p.isel(time=t_idx),
+            delta_p=delta_p,
         )
 
         return np.sum(darr)
@@ -327,28 +323,20 @@ def make_benchmark_mass_conservation_table(
     # Replace whitespace with underscores in version labels
     ref_label = replace_whitespace(ref_label)
     dev_label = replace_whitespace(dev_label)
-    
+
     # Preserve xarray attributes
-    with xr.set_options(keep_attrs=True) and dask_config.set({
-        "array.slicing.split_large_chunks": False
-    }):
-        
+    with xr.set_options(keep_attrs=True):
+
         # ==============================================================
-        # Read data and make sure time dimensions are consistent
+        # Make sure Ref and Dev have consistent time dimensions
         # ==============================================================
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore", category=xr.SerializationWarning)
 
-        # Pick the proper function to read the data
-        reader = dataset_reader(multi_files=True, verbose=False)
-
-        # Get data
-        ref_data = reader(ref_files, drop_variables=skip_these_vars).load()
-        dev_data = reader(dev_files, drop_variables=skip_these_vars).load()
-        ref_area = get_area(ref_areapath, ref_data)
-        dev_area = get_area(dev_areapath, dev_data)
-        ref_delta_prs = get_delta_pressure(ref_data)
-        dev_delta_prs = get_delta_pressure(dev_data)
+        # Make sure Ref & Dev have the same number of elements
+        if len(ref_files) != len(dev_files):
+            msg = "Ref and Dev have different time dimensions!"
+            raise ValueError(msg)
 
         # Get datetime values
         ref_time = get_datetimes_from_filenames(ref_files)
@@ -366,31 +354,55 @@ def make_benchmark_mass_conservation_table(
         # List for holding the datetimes
         display_dates = []
 
-        # ==================================================================
-        # Calculate global mass for the tracer at all restart dates
-        # ==================================================================
+        # Pick the proper function to read the data
+        reader = dataset_reader(multi_files=False, verbose=False)
+
+        # ==============================================================
+        # Read data and make sure time dimensions are consistent
+        # Loop over files individually to avoid memory issues
+        # ==============================================================
         for t_idx, time in enumerate(dev_time):
 
+            # Get data
+            ref_data = reader(
+                ref_files[t_idx],
+                drop_variables=skip_these_vars
+            ).load()
+            dev_data = reader(
+                dev_files[t_idx],
+                drop_variables=skip_these_vars
+            ).load()
+            ref_area = get_area(ref_areapath, ref_data)
+            dev_area = get_area(dev_areapath, dev_data)
+            ref_delta_prs = get_delta_pressure(ref_data)
+            dev_delta_prs = get_delta_pressure(dev_data)
+
             # Save datetime string into display_dates list
             time = str(np.datetime_as_string(time, unit="m"))
             display_dates.append(time.replace("T", " "))
 
             # Compute total masses [Tg] for Ref & Dev
             ref_masses[t_idx] = compute_total_mass(
-                t_idx,
                 ref_data,
                 ref_area,
                 ref_delta_prs,
                 metadata,
             )
             dev_masses[t_idx] = compute_total_mass(
-                t_idx,
                 dev_data,
                 dev_area,
                 dev_delta_prs,
                 metadata,
             )
 
+            # Free memory in large objects
+            del ref_data
+            del dev_data
+            del ref_area
+            del dev_area
+            del ref_delta_prs
+            del dev_delta_prs
+
     # ==================================================================
     # Print masses and statistics to file
     # ==================================================================

From 9377feb6a7a637770dd825e8f15056dafee1da46 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 26 Apr 2024 10:55:04 -0400
Subject: [PATCH 24/43] PR #311 post-merge fix: Test if lon_bnds, lat_bnds
 exist before dropping

gcpy/file_regrid.py
- Add if statements to test if lat_bnds and lon_bnds are in the data
  variables of the dataset before trying to drop them from the dataset.

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
Now allow make_regridder_L2L to use nearest_s2d regridding

gcpy/regrid.py
-
---
 CHANGELOG.md        |  1 +
 gcpy/file_regrid.py | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab8370d5..e385b6a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -53,6 +53,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Added missing `n_cores` to `gcpy/examples/diagnostics/compare_diags.yml`
 - Added missing `plot_drydep` option to `gcpy/gcpy/benchmark/config/1yr_ch4_benchmark.yml`
 - Add `docs/requirements.txt` symbolic link to `docs/environment_files/read_the_docs_requirements.txt` for RTD builds 
+- `gcpy/file_regrid.py` now tests if `lon_bnds`, `lat_bnds` are in the dataset before trying to drop them
 
 ### Removed
 - Example script `gcpy/examples/plotting/mda8_o3_timeseries.py`
diff --git a/gcpy/file_regrid.py b/gcpy/file_regrid.py
index 5d4dbb23..e906285e 100644
--- a/gcpy/file_regrid.py
+++ b/gcpy/file_regrid.py
@@ -745,7 +745,10 @@ def regrid_ll_to_ll(
             if "lat" not in dset[var].dims       \
                 and "lon" not in dset[var].dims
             ]
-        dset = dset.drop(["lat_bnds", "lon_bnds"])
+        if "lat_bnds" in dset.data_vars:
+            dset = dset.drop(["lat_bnds"])
+        if "lon_bnds" in dset.data_vars:
+            dset = dset.drop(["lon_bnds"])
         non_fields = dset[non_fields]
         dset = dset.drop(non_fields)
 
@@ -756,6 +759,12 @@ def regrid_ll_to_ll(
             dim_format_out="classic"
         )
 
+        # Decide if we are regridding a data file or a mask
+        # by testing for the variable name "MASK"
+        method = "conservative"
+        if "MASK" in dset.data_vars:
+            method = "nearest_s2d"
+
         # Create the regridder and regrid the data
         regridder = make_regridder_L2L(
         ll_res_in,
@@ -763,7 +772,8 @@ def regrid_ll_to_ll(
             reuse_weights=True,
             in_extent=in_extent,
             out_extent=out_extent,
-            weightsdir=weightsdir
+            weightsdir=weightsdir,
+            method=method,
         )
         dset = regridder(
             dset,

From 29a06c85f6ef5265fa0e732691f644d3ec51a394 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 30 Apr 2024 11:35:06 -0400
Subject: [PATCH 25/43] Add example script "make_mask_file.py"

gcpy/examples/working_with_files/make_mask_file.py
- Example script to create a country mask from a netCDF file containing
  country IDs (HEMCO/MASKS/v2014-07/countrymask_0.1x0.1.nc).

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                                  |   1 +
 .../working_with_files/make_mask_file.py      | 123 ++++++++++++++++++
 2 files changed, 124 insertions(+)
 create mode 100755 gcpy/examples/working_with_files/make_mask_file.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e385b6a7..b680347b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Function `rename_speciesconc_to_speciesconcvv` in `gcpy/benchmark/modules/benchmark_utils.py`
 - Function `copy_file_to_dir` in `gcpy/util.py`.  This is a wrapper for `shutil.copyfile`.
 - GitHub Action config file `.github/workflows/stale.yml`, which replaces StaleBot
+- Example script `gcpy/examples/working_with_files/make_mask_file.py`
   
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
diff --git a/gcpy/examples/working_with_files/make_mask_file.py b/gcpy/examples/working_with_files/make_mask_file.py
new file mode 100755
index 00000000..e617298b
--- /dev/null
+++ b/gcpy/examples/working_with_files/make_mask_file.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""
+Create a mask file (for emissions) from a netCDF file of country IDs:
+Download this file before using:
+https://gcgrid.s3.amazonaws.com/HEMCO/MASKS/v2014-07/countrymask_0.1x0.1.nc
+
+Usage:
+------
+./make_mask_file.py [-i filein] -o fileout -c country_id -m true|false
+
+where
+
+-i filein     : File of country IDs (download from link above).
+                Default value: "countrymask_0.1x0.1.nc"
+
+-o fileout    : Output file for the mask
+
+-c country_id : ID of your desired country.
+                Use a netcdf file viewer to determine this value.
+
+-m true|false : Create a mirrored (i.e. inverted) mask.
+                Default value: False/
+
+Examples:
+---------
+
+# Create a mask for Canada
+./make_mask_file.py -o Canada_Mask.01x01.nc -c 124
+
+# Create a mirrored mask for Mexico
+./make_mask_file.py -o Mexico_Mask_Mirror.01x01.nc -c 484 -m true
+
+"""
+import argparse
+import numpy as np
+import xarray as xr
+
+def make_mask(
+        filein,
+        country_id,
+        fileout,
+        mirror=False,
+):
+    """
+    Creates a netCDF mask file for a given country.
+
+    Args
+    filein     : str  : File with country ID values
+    country_id : int  : ID of the country that you want masked
+    fileout    : str  : Output mask file
+    mirror     : bool : Return a mirrored (i.e. inverted) mask?
+    """
+    with xr.set_options(keep_attrs=True):
+
+        # Define zero and one values (for normal + mirror masks)
+        one = np.float32(1)
+        zero = np.float32(0)
+        if mirror:
+            one = np.float32(0)
+            zero = np.float32(1)
+
+        # Open file and rename mask variable to MASK
+        dset = xr.open_dataset(filein)
+        dset = dset.rename({"CountryID": "MASK"})
+
+        # Mask out the country
+        array = np.where(
+            dset["MASK"].values == country_id,
+            one,
+            zero
+        )
+
+        # Cast to float to avoid issues w/ GCHP input
+        dset["MASK"].values = array
+        dset["MASK"] = dset["MASK"].astype(np.float32)
+
+        # Write to disk
+        dset.to_netcdf(fileout)
+
+
+if __name__ == '__main__':
+
+    # Tell parser which arguments to expect
+    parser = argparse.ArgumentParser(
+        description="General cubed-sphere to cubed-sphere regridder."
+    )
+    parser.add_argument(
+        "-i", "--filein",
+        metavar="FILEIN",
+        type=str,
+        required=False,
+        default="countrymask_0.1x0.1.nc",
+        help="netCDF file with country IDs"
+    )
+    parser.add_argument(
+        "-o", "--fileout",
+        metavar="FILEOUT",
+        type=str,
+        required=True,
+        help="name of output file"
+    )
+    parser.add_argument(
+        "-c", "--country-id",
+        metavar="COUNTRY-ID",
+        required=True,
+        type=int,
+        help="Country ID value to match in input file",
+    )
+    parser.add_argument(
+        "-m", "--mirror",
+        metavar="MIRROR",
+        type=bool,
+        required=False,
+        default=False,
+        help="Create mirrored (reversed) mask"
+    )
+    args = parser.parse_args()
+    make_mask(
+        args.filein,
+        args.country_id,
+        args.fileout,
+        args.mirror,
+    )

From db38f265a2af4bf5080d741bf75fc7128658e93b Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 30 Apr 2024 18:29:00 -0400
Subject: [PATCH 26/43] Initial commit: Add script to scrape GEOS-Chem timers

gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
- Added this script to scrape benchmark timers into a table
  with to display timing info from Ref & Dev models.

NOTE: More work will be needed to refine this in subsequent commits.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../benchmark_scrape_gcclassic_timers.py      | 167 ++++++++++++++++++
 1 file changed, 167 insertions(+)
 create mode 100755 gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py

diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
new file mode 100755
index 00000000..a264a50c
--- /dev/null
+++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+"""
+"""
+import os
+from gcpy.util import verify_variable_type
+import json
+
+
+def read_gcclassic(ifile):
+    """
+    Determines if the input is a valid JSON.
+
+    Args
+    ifile  : str  : file name
+
+    Returns
+    result : dict : Dictionary with timing information
+    """
+
+    # Make sure file exists
+    if not os.path.exists(ifile):
+        raise FileNotFoundError(f"Could not find {ifile}!")
+
+    # First try to read the file as a JSON,
+    # then try to read the file as text.
+    try:
+        result = read_gcclassic_json(ifile)
+    except ValueError as err:
+        result = read_gcclassic_log(ifile)
+    return result
+
+
+def read_gcclassic_json(
+        ifile
+):
+    """
+    Parses the GEOS-Chem Classic timing information in JSON format
+    and returns a dictionary with the results.
+
+    Args
+    ifile  : str  : File name
+
+    Returns
+    result : dict : Dictionary with timing information
+    """
+    try:
+        with open(ifile, encoding="utf-8") as json_file:
+            result = json.load(json_file)
+            return result["GEOS-Chem Classic timers"]
+    except ValueError as err:
+        raise ValueError from err
+
+
+def read_gcclassic_log(ifile):
+    """
+    Parses the GEOS-Chem Classic log file with timing information
+    and returns a dictionary with the results.
+
+    Args
+    ifile  : str  : File name
+
+    Returns
+    result : dict : Dictionary with timing information
+    """
+    keep_line = False
+    timers = {}
+
+    # Read the line backwards and get just keep the timing information
+    with open(ifile, encoding="utf-8") as log_file:
+
+        for line in reversed(list(log_file)):
+            line = line.strip("\n")
+
+            # Set a flag to denote the start & end of timing info
+            if "Unit conversions" in line:
+                keep_line = True
+            if "----------" in line:
+                keep_line = False
+                break
+
+            # Append timing info lines into a list
+            if keep_line:
+                substr = line.split(":")
+                key = substr[0].strip()
+                val = substr[3].split()[1].strip()
+                timers[key] = {"seconds": val}
+
+    return timers
+
+
+def print_timer(key, ref, dev, ofile):
+    """
+    Prints timing info for a single timer to a log file.
+    """
+    line = f"{key:<25}  {ref[key]['seconds']:>20}  {dev[key]['seconds']:>20}"
+    print(line, file=ofile)
+
+
+def display_timers(ref, ref_label, dev, dev_label, table_file):
+    """
+    Prints the GEOS-Chem timer information to a table.
+
+    Args
+    ref : dict : Timer output from the "Ref" model
+    ref : dict : Timer output from the "Dev" model
+    """
+    with open(table_file, "w", encoding="utf-8") as ofile:
+
+        # Print header
+        print(f"{'Timer':<25}  {ref_label:>20}  {dev_label:>20}", file=ofile)
+        print(f"{'-'*25:<25}  {'-'*20:>20}  {'-'*20:>20}", file=ofile)
+        
+        # Print timers
+        print_timer("GEOS-Chem",             ref, dev, ofile) 
+        print_timer("HEMCO",                 ref, dev, ofile)     
+        print_timer("All chemistry",         ref, dev, ofile) 
+        print_timer("=> Gas-phase chem",     ref, dev, ofile)  
+        print_timer("=> Photolysis",         ref, dev, ofile) 
+        print_timer("=> Aerosol chem",       ref, dev, ofile) 
+        print_timer("=> Linearized chem",    ref, dev, ofile)  
+        print_timer("Transport",             ref, dev, ofile) 
+        print_timer("Convection",            ref, dev, ofile)
+        print_timer("Boundary layer mixing", ref, dev, ofile) 
+        print_timer("Dry deposition",        ref, dev, ofile)
+        print_timer("Wet deposition",        ref, dev, ofile)
+        print_timer("Diagnostics",           ref, dev, ofile)
+        print_timer("Unit conversions",      ref, dev, ofile)
+
+        
+def make_benchmark_timing_table(
+        ref_file,
+        ref_label,
+        dev_file,
+        dev_label,
+        dst,
+):
+    """
+    """
+    verify_variable_type(ref_file, (str, list))
+    verify_variable_type(ref_label, str)
+    verify_variable_type(dev_file, (str, list))
+    verify_variable_type(dev_label, str)
+    verify_variable_type(dst, str)
+
+    # Strip timing info from JSON or log ifle
+    ref_timers = read_gcclassic(ref_file)
+    dev_timers = read_gcclassic(dev_file)
+
+    # Write timing info to a table
+    display_timers(
+        ref_timers,
+        ref_label,
+        dev_timers,
+        dev_label,
+        "sample_output.txt",
+    )
+
+
+if __name__ == '__main__':
+    make_benchmark_timing_table(
+       "./gcclassic_timers.json",
+        "GCC 14.4.0",
+        "./execute.gc_4x5_merra2_fullchem_benchmark.log",
+        "GCHP 14.4.0",
+        "./"
+    )
+#        "./execute.gchp_merra2_fullchem_benchmark.log",

From 5ce9fc3e0da0f019f8744435df6a1bf7c690b319 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 2 May 2024 16:04:08 -0400
Subject: [PATCH 27/43] Get updates for HEMCO formatting from @hannahnesser +
 additional fixes

gcpy/community/format_hemco_data.py
- Script to fix netCDF attributes, by @hannahnesser.
- NOTE: Stored in the gcpy/community folder, to denote scripts that
  are submitted by members of the GEOS-Chem & GCPy user community.

gcpy/community/__init__.py
- Added this import script for the gcpy/community folder

gcpy/__init__.py
- Updated accordingly now that format_hemco_data has been moved
  to the gcpy/community folder

gcpy/examples/README.txt
- Removed

gcpy/examples/README.md
- Added this README file for the examples folder in MarkDown format

gcpy/hemco/.gitignore
- Added this to ignore *.nc* files in this folder

gcpy/hemco/format_hemco_demo.py
- Moved here from gcpy/format_hemco_demo.py

gcpy/hemco/make_mask_file.py
- Moved here from gcpy/hemco/working_with_files/make_mask_file.py

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                                  |   6 +-
 gcpy/__init__.py                              |   2 +-
 gcpy/community/__init__.py                    |   5 +
 gcpy/{ => community}/format_hemco_data.py     | 134 +++++++++++-----
 gcpy/examples/README.md                       | 126 +++++++++++++++
 gcpy/examples/README.txt                      |   9 --
 gcpy/examples/hemco/.gitignore                |   1 +
 gcpy/examples/hemco/__init__.py               |   5 +
 gcpy/examples/hemco/format_hemco_demo.py      | 144 ++++++++++++++++++
 .../make_mask_file.py                         |   0
 10 files changed, 377 insertions(+), 55 deletions(-)
 create mode 100644 gcpy/community/__init__.py
 rename gcpy/{ => community}/format_hemco_data.py (78%)
 create mode 100644 gcpy/examples/README.md
 delete mode 100644 gcpy/examples/README.txt
 create mode 100644 gcpy/examples/hemco/.gitignore
 create mode 100644 gcpy/examples/hemco/__init__.py
 create mode 100755 gcpy/examples/hemco/format_hemco_demo.py
 rename gcpy/examples/{working_with_files => hemco}/make_mask_file.py (100%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b91f4fed..552f0429 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,8 +13,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Badges in `docs/source/index.rst`
 - GitHub action to push GCPy releases to PyPi
 - Script `./release/changeVersionNumbers.sh`, used to update version numbers in various files before release
-- Added `gcpy/format_hemco_data.py` from @hannahnesser
-
 - Mamba/Conda enviroment file `docs/environment_files/read_the_docs_environment.yml`, for building ReadTheDocs documentation
 - Environment files `docs/environment_files/gcpy_requirements.txt` and `docs/environment_files/read_the_docs_requirements.txt`
 - New benchmark script `gcpy/benchmark/modules/benchmark_models_vs_sondes.py`
@@ -22,7 +20,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Function `rename_speciesconc_to_speciesconcvv` in `gcpy/benchmark/modules/benchmark_utils.py`
 - Function `copy_file_to_dir` in `gcpy/util.py`.  This is a wrapper for `shutil.copyfile`.
 - GitHub Action config file `.github/workflows/stale.yml`, which replaces StaleBot
-- Example script `gcpy/examples/working_with_files/make_mask_file.py`
+- Example script `gcpy/examples/hemco/make_mask_file.py`
+- Added `gcpy/community/format_hemco_data.py` from @hannahnesser
+- Added `gcpy/examples/hemco/format_hemco_demo.py` from @hannahnesser
   
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
diff --git a/gcpy/__init__.py b/gcpy/__init__.py
index 2b816661..10be51e5 100644
--- a/gcpy/__init__.py
+++ b/gcpy/__init__.py
@@ -3,6 +3,7 @@
 """
 
 from .benchmark import *
+from .community import *
 from .examples import *
 
 from .append_grid_corners import *
@@ -10,7 +11,6 @@
 from .cstools import *
 from .date_time import *
 from .file_regrid import *
-from .format_hemco_data import *
 from .grid import *
 from .grid_stretching_transforms import *
 from .plot import *
diff --git a/gcpy/community/__init__.py b/gcpy/community/__init__.py
new file mode 100644
index 00000000..65996793
--- /dev/null
+++ b/gcpy/community/__init__.py
@@ -0,0 +1,5 @@
+"""
+GCPy import script
+"""
+
+from .format_hemco_data import *
diff --git a/gcpy/format_hemco_data.py b/gcpy/community/format_hemco_data.py
similarity index 78%
rename from gcpy/format_hemco_data.py
rename to gcpy/community/format_hemco_data.py
index 297db837..de818d8f 100644
--- a/gcpy/format_hemco_data.py
+++ b/gcpy/community/format_hemco_data.py
@@ -4,6 +4,7 @@
 """
 from os.path import join
 from copy import deepcopy as dc
+import warnings
 import xarray as xr
 import numpy as np
 import pandas as pd
@@ -21,12 +22,10 @@ def format_hemco_dimensions(
     """
     Formats time, lat, lon, and lev (optionally) attributes for coards
     compliance (HEMCO compatibility).
-
     Args:
         dset: xarray Dataset
             Dataset containing at least latitude and longitude
             variables, which must be named lat and lon, respectively.
-
     Keyword Args (optional):
         start_time: string of the format "YYYY-MM-DD HH:mm:ss"
             String containing the start time of the dataset for
@@ -54,7 +53,6 @@ def format_hemco_dimensions(
             GCHP (True) or GEOS-Chem Classic (False). This is primarily
             used to set the lev attributes. The default value is
             False.
-
     Returns:
         dset: xarray Dataset
             An updated version of dset with encoding and attributes
@@ -74,13 +72,16 @@ def format_hemco_dimensions(
     dset = _format_time(dset, start_time)
 
     # If level is included in the dimensions, set its attributes
-    if "lev" in dset.coordset:
+    if "lev" in dset.coords or "level" in dset.coords:
         # Note: this is relatively untested (2023/08/21 HON)
         dset = _format_lev(dset, lev_long_name, lev_units,
-                         lev_formula_terms, gchp)
+                           lev_formula_terms, gchp)
 
     # Require data order to be time, lat, lon (optionally lev)
-    dset = dset.transpose("time", "lat", "lon", ...)
+    if "lev" in dset.coords:
+        dset = dset.transpose("time", "lev", "lat", "lon", ...)
+    else:
+        dset = dset.transpose("time", "lat", "lon", ...)
 
     # Return the dataset
     return dset
@@ -93,13 +94,11 @@ def _update_variable_attributes(
     """
     Adds COARDS conforming variable attributes and/or replaces
     existing variable attributes with COARDS-conforming values.
-
     Args:
         var_attrs : dict
             Dictionary of variable attributes.
         coards_attrs : dict
             Dictionary of COARDS-conforming variable attributes.
-
     Returns
         var_attrs : dict
            Modified dictionary of variable attributes
@@ -123,6 +122,10 @@ def _update_variable_attributes(
     # but do not clobber any other existing variable attrs.
     for (name, value) in coards_attrs.items():
         if found[name]:
+            if var_attrs[name] != value:
+                print(f"Updating attribute value for {name}:")
+                print(f"    Original value : {var_attrs[name]}")
+                print(f"    New value:     : {value}")
             var_attrs.update({name: value})
         else:
             var_attrs[name] = value
@@ -194,9 +197,10 @@ def _format_time(
     Formats the time dimension for COARDS compliance.
     See define_HEMCO_dimensions for argument listings.
     '''
-    if "time" not in dset.coordset:
-        # If time isn't already in the coordset, create a dummy variable
-        dset = dset.assign_coordset(time=pd.to_datetime(start_time))
+    if "time" not in dset.coords:
+        # If time isn't already in the coords, create a dummy variable
+        print(f"Assigning time coordinate from input start_time {start_time}.")
+        dset = dset.assign_coords(time=pd.to_datetime(start_time))
         dset = dset.expand_dims("time")
     else:
         # Otherwise, update start_time to match the first time in the file,
@@ -239,28 +243,46 @@ def _format_lev(
     See define_HEMCO_dimensions for argument listings.
     '''
     ## HON 2023/08/22: This is relatively untested
-
     # If there a dimension called level, rename it
     if "level" in dset.dims.keys():
         dset = dset.rename_dims({"level" : "lev"})
 
-    # If formula is provided, check that the components of the
-    # formula are included.
+    # Check whether both lev_formula_terms and lev["formula_terms"]
+    # are present--if so, raise an error.
+    if ((lev_formula_terms is not None)
+        and ("formula_terms" in dset["lev"].attrs)):
+        warnings.warn(
+            "Both lev_formula_terms and lev['formula_terms'] are provided."
+            " The provided lev_formula_term is being used."
+        )
+    elif ((lev_formula_terms is None)
+          and ("formula_terms" not in dset["lev"].attrs)):
+        warnings.warn(
+            "Neither lev_formula_terms nor lev['formula_terms] are provided."
+            " Skipping lev_formula_terms formatting."
+        )
+    elif ("formula_terms" in dset["lev"].attrs):
+        lev_formula_terms = dset["lev"].attrs["formula_terms"]
+
+    # If lev_formula_terms is now defined:
     if lev_formula_terms is not None:
-        terms = lev_formula_terms.split(": ")
+        terms = lev_formula_terms.split(" ")
         terms = [term for i, term in enumerate(terms) if i % 2 == 1]
+        failed_terms = []
         for term in terms:
             if term not in dset.data_vars.keys():
-                raise ValueError(
-                    f"{term} is in lev_formula_terms and could \
-                    not be found."
-                )
+                failed_terms.append(term)
+        if len(failed_terms) > 0:
+            warnings.warn(
+                f"The following values are in lev_formula_terms and could"
+                f" not be found: {failed_terms}"
+            )
 
     # If unit is level, require that the levels are integers
-    if lev_units == "level" and \
-       (dset["lev"] != dset["lev"].astype(int)).any():
-        raise ValueError("lev has units of level but dimension values \
-                            are not integers.")
+    if lev_units not in ["level", "eta_level", "sigma_level"]:
+        raise ValueError(
+            f"lev has units of {lev_units}. Please set it to one "
+            "of level, eta_level, or sigma_level.")
 
     # Set attributes
     ## Set positive to match the GCHP/GEOS-Chem conventions
@@ -296,7 +318,6 @@ def _check_required_dim(
     Checks required dimensions (time, latitude, and longitude)
     for COARDS compliance (that the dimension exists and is
     monotonically increasing).
-
     Args:
         dset: xarray Dataset
         dim: string ("time", "lat", or "lon")
@@ -316,17 +337,42 @@ def _check_required_dim(
     return dset
 
 
+def check_hemco_variables(
+        dset
+):
+    verify_variable_type(dset, xr.Dataset)
+
+    # Iterate through the dataset variables and check that each one
+    # has the required units and long_name attributes.
+    print("Checking dataset variables for HEMCO compliance.")
+    required_attrs = ["units", "long_name"]
+    missing = False
+    for (name, _) in dset.items():
+        attr_names = [name for (name, _) in dset[name].attrs.items()]
+        missing_attrs = [name for name in required_attrs
+                         if name not in attr_names]
+        if len(missing_attrs) > 0:
+            missing = True
+            print(f"  {name} missing {missing_attrs}")
+
+    if missing:
+        raise ValueError(
+            "Required units missing from dataset variables."
+        )
+    else:
+        print("Dataset variables are HEMCO compliant.")
+
+
 def format_hemco_variable(
         dset,
         var,
-        long_name,
-        units,
+        long_name=None,
+        units=None,
         **kwargs
 ):
     """
     Formats attributes for non-standard variables for COARDS compliance
     (HEMCO compatibility).
-
     Args:
         dset: xarray Dataset
             Dataset containing HEMCO input data.
@@ -341,7 +387,6 @@ def format_hemco_variable(
             for more information.
         **kwargs : dict
             Any other attributes wanted for the variable.
-
     Returns:
         dset: xarray Dataset
             An updated version of dset with variable attributes
@@ -349,23 +394,30 @@ def format_hemco_variable(
     """
     verify_variable_type(dset, xr.Dataset)
     verify_variable_type(var, str)
-    verify_variable_type(long_name, str)
-    verify_variable_type(units, str)
 
-    # Add extra attributes if passed via **kwargs
-    if len(kwargs) != 0:
-        for (_, att_dict) in kwargs.items():
-            dset[var].attrs.update(att_dict)
+    # Check required variables
+    coards_attrs = {"long_name" : long_name,
+                    "units" : units}
+    for name, value in coards_attrs.items():
+        if value is not None:
+            verify_variable_type(value, str)
+        elif name in dset[var].attrs:
+            coards_attrs[name] = dset[var].attrs[name]
+        else:
+            raise ValueError(f"{name} is not defined for {var}")
 
     # Update variable attributes to be COARDS-conforming
     # without clobbering any pre-existing attributes
     dset[var].attrs = _update_variable_attributes(
         dset[var].attrs,
-        coards_attrs={
-            "long_name" : long_name,
-            "units" : units
-        }
+        coards_attrs=coards_attrs
     )
+
+    # Add extra attributes if passed via **kwargs
+    if len(kwargs) != 0:
+        for (_, att_dict) in kwargs.items():
+            dset[var].attrs.update(att_dict)
+
     return dset
 
 
@@ -378,7 +430,6 @@ def save_hemco_netcdf(
 ):
     """
     Saves COARDS compliant (HEMCO compatible) netcdf.
-
     Args:
         dset: xarray Dataset
             Dataset containing HEMCO input data.
@@ -386,7 +437,6 @@ def save_hemco_netcdf(
             The directory where the data will be saved.
         save_name: string
             The name the file will be named under.
-
     Keyword Args (optional):
         dtype: data type
             The data type the data will be saved as. Default is
@@ -399,7 +449,7 @@ def save_hemco_netcdf(
     verify_variable_type(save_dir, str)
     verify_variable_type(save_name, str)
 
-    # Check that the save_name endset in .nc
+    # Check that the save_name ends in .nc
     if save_name.split(".")[-1][:2] != "nc":
         save_name = f"{save_name}.nc"
 
@@ -410,7 +460,7 @@ def save_hemco_netcdf(
     # Set default encoding and dtype for all variables and coordinates
     encoding = {"_FillValue" : None, "dtype" : dtype}
     var = {k : dc(encoding) for k in dset.keys()}
-    coord = {k : dc(encoding) for k in dset.coordset}
+    coord = {k : dc(encoding) for k in dset.coords}
 
     # Manually update the time encoding, which is often overwritten
     # by xarray defaults
diff --git a/gcpy/examples/README.md b/gcpy/examples/README.md
new file mode 100644
index 00000000..4cefedf7
--- /dev/null
+++ b/gcpy/examples/README.md
@@ -0,0 +1,126 @@
+# GCPy example scripts
+
+This directory contains several subdirectories with example scripts that demonstrate the capabilities of GCPy.
+
+## bpch_to_nc
+
+NOTE: The binary punch ("bpch") data format has been retired from GEOS-Chem.  We keep these scripts here for those who work with the GEOS-Chem Adjoint code, which still uses bpch format.
+
+`bpch2nc.py`
+
+- Script to convert GEOS-Chem binary punch (aka "bpch") data to netCDF.
+
+`bpch_tagco_prodloss_to_nc.py`
+
+- Converts the prod/loss data files in bpch format for the tagged CO simulation to netCDF format.
+
+
+## diagnostics
+
+`compare_diags.py`
+
+- Script to compare the contents of files from two different model versions: A reference version (aka "Ref") and a development version (aka "Dev").
+
+`compare_diags.yml`
+
+- Configuration file for use with `compare_diags.py`
+
+## dry_run
+
+`download_data.py`
+
+- Downloads data from a GEOS-Chem Classic "dry-run" simulation.
+
+`download_data.yml`
+
+- Configuration file for `download_data.py`.
+
+
+## hemco
+
+`format_hemco_demo.py`
+# GCPy example scripts
+
+This directory contains several subdirectories with example scripts that demonstrate the capabilities of GCPy.
+
+## bpch_to_nc
+
+NOTE: The binary punch ("bpch") data format has been retired from GEOS-Chem.  We keep these scripts here for those who work with the GEOS-Chem Adjoint code, which still uses bpch format.
+
+`bpch2nc.py`
+
+- Script to convert GEOS-Chem binary punch (aka "bpch") data to netCDF.
+
+`bpch_tagco_prodloss_to_nc.py`
+
+- Converts the prod/loss data files in bpch format for the tagged CO simulation to netCDF format.
+
+
+## diagnostics
+
+`compare_diags.py`
+
+- Script to compare the contents of files from two different model versions: A reference version (aka "Ref") and a development version (aka "Dev").
+
+`compare_diags.yml`
+
+- Configuration file for use with `compare_diags.py`
+
+## dry_run
+
+`download_data.py`
+
+- Downloads data from a GEOS-Chem Classic "dry-run" simulation.
+
+`download_data.yml`
+
+- Configuration file for `download_data.py`.
+
+
+## hemco
+
+`format_hemco_demo.py`
+
+- Demonstrates how to fix a non-COARDS-compliant file (needed for HEMCO) using the `gcpy/community/format_hemco_data.py` module from Hannah Nesser (@hannahnesser).
+
+`make_mask_file.py`
+
+- Creates mask files for HEMCO emissions for a given country.
+
+
+## plotting
+
+`create_test_plot.py`
+
+- Script to create a test pattern plot.  Useful for testing if the Python environment has been installed properly.
+
+`plot_comparisons.py`
+
+- Plots data from two different models side-by-side for comparison purposes, in a "six-panel" plot layout.
+
+`plot_single_panel.py`
+
+- Creates several different types of single-panel plots.
+
+`plot_timeseries.py`
+
+- Reads and plots timeseries data.
+
+
+## working_with_files
+
+`add_blank_var_to_restart_file.py`
+
+- Adds a "dummy" DataArray containing all zeroes to a GEOS-Chem restart file.
+
+`concatenate_files.py`
+
+- Combines several netCDF data files into a single file using xarray.
+
+`insert_field_into_restart_file.py`
+
+- Adds a DataArray field into a GEOS-Chem restart file.
+
+`regrid_restart_ll_to_cs.py`
+
+- Regrids data from the lat-lon grid to a cubed-sphere grid.
\ No newline at end of file
diff --git a/gcpy/examples/README.txt b/gcpy/examples/README.txt
deleted file mode 100644
index d49fb235..00000000
--- a/gcpy/examples/README.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-
-.. examples-index
-
-Example Gallery
-===============
-
-.. contents:: Contents
-    :local:
-    :depth: 2
diff --git a/gcpy/examples/hemco/.gitignore b/gcpy/examples/hemco/.gitignore
new file mode 100644
index 00000000..beb4dab7
--- /dev/null
+++ b/gcpy/examples/hemco/.gitignore
@@ -0,0 +1 @@
+*.nc*
\ No newline at end of file
diff --git a/gcpy/examples/hemco/__init__.py b/gcpy/examples/hemco/__init__.py
new file mode 100644
index 00000000..532b5c82
--- /dev/null
+++ b/gcpy/examples/hemco/__init__.py
@@ -0,0 +1,5 @@
+"""
+GCPy import script
+"""
+from .format_hemco_demo import *
+from .make_mask_file import *
diff --git a/gcpy/examples/hemco/format_hemco_demo.py b/gcpy/examples/hemco/format_hemco_demo.py
new file mode 100755
index 00000000..e7064faf
--- /dev/null
+++ b/gcpy/examples/hemco/format_hemco_demo.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+"""
+Example script using gcpy.community.format_hemco_data.py
+
+NOTE: Before starting this demo, please download the file:
+
+https://gcgrid.s3.amazonaws.com/HEMCO/GCClassic_Output/14.0.0/2019/GEOSChem.ProdLoss.20190101_0000z.nc4
+
+to this folder and rename it to HEMCO_demonstration_file.nc.
+"""
+import xarray as xr
+from copy import deepcopy as dc
+
+# ----------------------------------------------------------------- #
+# Preparing the file for the demonstration
+# ----------------------------------------------------------------- #
+
+# Load the data file
+
+# NOTE: You can copy any data from the HEMCO data path to this folder:
+
+data = xr.open_dataset("./HEMCO_demonstration_file.nc")
+
+
+# We will now intentionally change the file to be HEMCO incompatible.
+
+# First, remove one of the attributes from latitude and longitude.
+# These changes should all be handled with no manual edits from
+# the user.
+data["lat"].attrs.pop("units")
+data["lon"].attrs.pop("axis")
+
+# We will also reverse latitude so that it"s monotonically decreasing.
+# This should throw an error.
+data["lat"] = data["lat"][::-1]
+
+# Second, remove the time variable. Often, files without an explicit
+# time dimension will exclude time from the netcdf. This is bad for
+# HEMCO, and we want to make sure that the functions can deal with it.
+data = data.drop("time").squeeze()
+
+# Third, mess with the level attributes. We"ll add an extra formula
+# term that doesn"t exist in the dataset. This change should throw an
+# error.
+data["lev"].attrs["formula_terms"] += " xs: xx"
+
+# We also change the positive direction. So long as gchp=False is
+# passed to the function, this should be handled by the functions.
+data["lev"].attrs["positive"] = "down"
+
+# Finally, we"ll change some things in the variable SpeciesRst_ACET.
+# We"ll add a fourth attribute, which we hope won"t be clobbered.
+# This should be the only difference between demo_original.txt
+# and the updated demo_post_formatting.txt.
+data["Loss_Ox"].attrs["test"] = (
+    "Testing that additional attributes are not clobbered"
+)
+
+# Save long name and units strings so we can restore it later
+save_long_name = dc(data["Loss_Ox"].attrs["long_name"])
+save_units = dc(data["Loss_Ox"].attrs["units"])
+
+# We also delete the units on data SpeciesRst_ACET
+del(data["Loss_Ox"].attrs["units"])
+
+# ----------------------------------------------------------------- #
+# Using format_hemco_data to save a HEMCO-compatible file
+# ----------------------------------------------------------------- #
+# Using format_hemco_data.py is easy and requires only four steps.
+data_fix = dc(data)
+
+# 1. Import the module.
+from gcpy.community import format_hemco_data as hemco
+
+# 2. Format the required dimensions (time, lat, lon, and lev) for
+# HEMCO.
+# We have to provide the file start time because there is no time
+# dimension in this file. If there was, we could still provide a
+# start time, but it would be overwritten (with a warning) with
+# the first time value in the dataset.
+def test_format_hemco_dimensions(data):
+    try:
+        data = hemco.format_hemco_dimensions(
+            data,
+            start_time="2019-01-01 00:00:00"
+        )
+    except Exception as error:
+        print(f"format_hemco_dimensions_failed: {error}")
+    return data
+
+# Let"s test this!
+data_fix = test_format_hemco_dimensions(data_fix)
+print("-"*70)
+
+# We return  an error that "lat is not monotonically increasing."
+# Good! We changed that intentionally. Let"s undo that and
+# try again.
+data_fix["lat"] = data_fix["lat"][::-1]
+data_fix = test_format_hemco_dimensions(data_fix)
+print("-"*70)
+
+# We also get a warning message that it is assigning the time coordinate
+# from the provided start_time. This is needed for HEMCO compliance, but
+# the user should be aware of the specification of the time dimension.
+
+# We find that "PS" and "xx" are included in lev_formula_terms but not in
+# data_fix. This is a warning, so we don"t need to do anything. Onto the
+# next step!
+
+# 3. Format any variables in the netcdf
+# Run the checking function.
+def test_check_variables(data):
+    try:
+        hemco.check_hemco_variables(data_fix)
+    except Exception as error:
+        print(f"check_hemco_variables failed: {error}")
+
+test_check_variables(data_fix)
+print("-"*70)
+
+# We get the following error:
+# Checking dataset variables for HEMCO compliance.
+#   Loss_Ox missing ["units"]
+# check_hemco_variables failed: Required units missing from dataset variables.
+
+# We add units back in using the convenience function from the package so
+# that we avoid clobbering anything important.
+data_fix = hemco.format_hemco_variable(
+    data_fix,
+    "Loss_Ox",
+    long_name=save_long_name,
+    units=save_units,
+)
+
+# Test one more time
+test_check_variables(data_fix)
+print("-"*70)
+
+# 4. Save out.
+hemco.save_hemco_netcdf(
+    data_fix,
+    save_dir=".",
+    save_name="./HEMCO_demonstration_file_post_fixes.nc"
+)
diff --git a/gcpy/examples/working_with_files/make_mask_file.py b/gcpy/examples/hemco/make_mask_file.py
similarity index 100%
rename from gcpy/examples/working_with_files/make_mask_file.py
rename to gcpy/examples/hemco/make_mask_file.py

From 51fc9e9898e1bf13b82f7fc380fa5ae47300eb55 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 2 May 2024 16:13:27 -0400
Subject: [PATCH 28/43] Added README.md file for the community folder

gcpy/community/README.md
- Added this file with a description of the contents of
  the gcpy/community folder.  This is where users can submit scripts
  of general use to GCPy.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/community/README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 gcpy/community/README.md

diff --git a/gcpy/community/README.md b/gcpy/community/README.md
new file mode 100644
index 00000000..711f28ba
--- /dev/null
+++ b/gcpy/community/README.md
@@ -0,0 +1,10 @@
+# GCPy Community Contributions
+
+The scripts in this folder have been submitted by GCPy users.  Please contact the author of each script directly if you have any questions about its usage.
+
+## Contents
+
+`format_hemco_data.py`
+
+- **Author:** Hannah Nesser (@hannahnesser)
+- **Description:** Fixes netCDF file attributes so that they conform to the netCDF COARDS conventions.  This is needed for input to HEMCO.
\ No newline at end of file

From c41a33f141feec99cdc8173df53ba1ea2c3ddd86 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 3 May 2024 13:28:49 -0400
Subject: [PATCH 29/43] Add function "replace_whitespace" in util.py

gcpy/util.py
- Added function "replace_whitespace", which replaces whitespace in
  a string with another character (default is "_").  This will be used
  to make sure the benchmark filenames and version labels do not have
  spaces.

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/util.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/gcpy/util.py b/gcpy/util.py
index 6a5b347b..e50d4874 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -2235,3 +2235,24 @@ def copy_file_to_dir(
     ofile = os.path.join(dest, os.path.basename(ifile))
     if not os.path.exists(ofile):
         copyfile(ifile, ofile)
+
+
+def replace_whitespace(
+        string,
+        repl_char="_"
+):
+    """
+    Replaces whitespace in a string with underscores.
+    Useful for removing spaces in filename strings.
+
+    Args
+    string    : str : The input string
+    repl_char : str : Replacement character (default is "_")
+
+    Returns
+    string    : str : String with whitespace replaced
+    """
+    verify_variable_type(string, str)
+    verify_variable_type(repl_char, str)
+
+    return repl_char.join(string.split())

From b258b3940cd32f9b5b5673b4d32ce54ab972c5ff Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 3 May 2024 13:31:43 -0400
Subject: [PATCH 30/43] Add benchmark script to scrape GEOS-Chem Classic timing
 information

gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
- Benchmark module that can scrape the timers information from either
  gcclassic_timers.json or GEOS-Chem Classic log files.  If multiple
  files are supplied as input, the timers information will be summed
  together.

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../benchmark_scrape_gcclassic_timers.py      | 231 +++++++++++++-----
 1 file changed, 171 insertions(+), 60 deletions(-)
 mode change 100755 => 100644 gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py

diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
old mode 100755
new mode 100644
index a264a50c..467ce6ed
--- a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
+++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
@@ -1,74 +1,114 @@
 #!/usr/bin/env python3
 """
+Scrapes GEOS-Chem Classic benchmark timing information from one or
+more JSON or text files.
 """
 import os
-from gcpy.util import verify_variable_type
 import json
+from gcpy.util import make_directory, replace_whitespace, verify_variable_type
 
 
-def read_gcclassic(ifile):
+def read_gcclassic(input_files):
     """
-    Determines if the input is a valid JSON.
+    Determines whether we should call a function to parse the given
+    input file(s) as JSON or plain text.
 
     Args
-    ifile  : str  : file name
+    input_files : str|list     : File or list of files to parse
 
     Returns
-    result : dict : Dictionary with timing information
+    result      : list of dict : List of dicts with timing info
     """
-
-    # Make sure file exists
-    if not os.path.exists(ifile):
-        raise FileNotFoundError(f"Could not find {ifile}!")
-
-    # First try to read the file as a JSON,
-    # then try to read the file as text.
     try:
-        result = read_gcclassic_json(ifile)
-    except ValueError as err:
-        result = read_gcclassic_log(ifile)
+        result = read_timing_data(input_files, read_one_json_file)
+    except ValueError:
+        result = read_timing_data(input_files, read_one_text_file)
     return result
 
 
-def read_gcclassic_json(
-        ifile
+def read_timing_data(
+        input_files,
+        reader,
 ):
     """
     Parses the GEOS-Chem Classic timing information in JSON format
     and returns a dictionary with the results.
 
     Args
-    ifile  : str  : File name
+    input files : str|list     : JSON or text file(s) to parse
+
+    Returns
+    timing      : list of dict : Dictionary with timing information
+    """
+    # Return value
+    timing = []
+
+    # If more than one file has been provided, read the timing
+    # information and return a list of dictionaries with results
+    if isinstance(input_files, list):
+        for input_file in input_files:
+            result = reader(input_file)
+            timing.append(result)
+        return timing
+
+    # If only one file has been provided, then read it
+    # and return the dictionary in a list
+    if isinstance(input_files, str):
+        result = reader(input_files)
+        timing.append(result)
+        return timing
+
+    raise ValueError("Argument 'input_files' is not of type str or list!")
+
+
+def read_one_json_file(json_file):
+    """
+    Parses a GEOS-Chem JSON file with timing information
+    and returns a dictionary with the results.
+
+    Args
+    json_file : str  : JSON file with timing information
 
     Returns
-    result : dict : Dictionary with timing information
+    result    : dict : Dictionary with timing information
     """
+
+    # Make sure file exists
+    if not os.path.exists(json_file):
+        raise FileNotFoundError(f"Could not find {json_file}!")
+
+    # If the file is not a JSON file, raise a ValueError, as
+    # this will prompt read_gcclassic to parse the file as text.
     try:
-        with open(ifile, encoding="utf-8") as json_file:
-            result = json.load(json_file)
+        with open(json_file, encoding="utf-8") as ifile:
+            result = json.load(ifile)
             return result["GEOS-Chem Classic timers"]
     except ValueError as err:
         raise ValueError from err
 
 
-def read_gcclassic_log(ifile):
+def read_one_text_file(text_file):
     """
-    Parses the GEOS-Chem Classic log file with timing information
-    and returns a dictionary with the results.
+    Parses the GEOS-Chem Classic log file (plain text) with
+    timing information and returns a dictionary with the results.
 
     Args
-    ifile  : str  : File name
+    text_file : str  : Text file with timing information
 
     Returns
-    result : dict : Dictionary with timing information
+    result    : dict : Dictionary with timing information
     """
     keep_line = False
     timers = {}
 
+   # Make sure file exists
+    if not os.path.exists(text_file):
+        raise FileNotFoundError(f"Could not find {text_file}!")
+
     # Read the line backwards and get just keep the timing information
-    with open(ifile, encoding="utf-8") as log_file:
+    with open(text_file, encoding="utf-8") as ifile:
 
-        for line in reversed(list(log_file)):
+        for line in reversed(list(ifile)):
             line = line.strip("\n")
 
             # Set a flag to denote the start & end of timing info
@@ -88,11 +128,48 @@ def read_gcclassic_log(ifile):
     return timers
 
 
+def sum_timers(timers):
+    """
+    Sums the time in seconds for each GEOS-Chem timer.  Input may be
+    a single dict with timing information or a list of dicts.
+
+    Args
+    timers : dict|list : GEOS-Chem timing information from one or more
+                         JSON or log files.
+
+    Returns
+    result : dict      : Sum of timing information
+    """
+
+    # If timers is of type dict, no summing is needed.
+    if isinstance(timers, dict):
+        return timers
+
+    # If timers is a list of dicts, sum the times
+    # in seconds into a new dict, and then return.
+    if isinstance(timers, list):
+
+        # Initialize the result dict
+        result = {}
+        for timer in timers:
+            for (key, val) in timer.items():
+                result[key] = 0.0
+
+        # Then sum the time in seconds for each timer
+        for timer in timers:
+            for (key, val) in timer.items():
+                result[key] += float(val["seconds"])
+
+        return result
+
+    raise ValueError("Argument 'timers' must be of type str or dict!")
+
+
 def print_timer(key, ref, dev, ofile):
     """
     Prints timing info for a single timer to a log file.
     """
-    line = f"{key:<25}  {ref[key]['seconds']:>20}  {dev[key]['seconds']:>20}"
+    line = f"{key:<25}  {ref[key]:>20.3f}  {dev[key]:>20.3f}"
     print(line, file=ofile)
 
 
@@ -107,61 +184,95 @@ def display_timers(ref, ref_label, dev, dev_label, table_file):
     with open(table_file, "w", encoding="utf-8") as ofile:
 
         # Print header
-        print(f"{'Timer':<25}  {ref_label:>20}  {dev_label:>20}", file=ofile)
-        print(f"{'-'*25:<25}  {'-'*20:>20}  {'-'*20:>20}", file=ofile)
-        
+        print("%"*79, file=ofile)
+        print("%%% GEOS-Chem Classic Benchmark Timing Information",
+              file=ofile)
+        print("%%%", file=ofile)
+        print(f"%%% Ref = {ref_label}", file=ofile)
+        print(f"%%% Dev = {dev_label}", file=ofile)
+        print("%"*79, file=ofile)
+        print("\n", file=ofile)
+        print(f"{'Timer':<25}  {'Ref [s]':>20}  {'Dev [s]':>20}", file=ofile)
+        print("-"*79, file=ofile)
+
         # Print timers
-        print_timer("GEOS-Chem",             ref, dev, ofile) 
-        print_timer("HEMCO",                 ref, dev, ofile)     
-        print_timer("All chemistry",         ref, dev, ofile) 
-        print_timer("=> Gas-phase chem",     ref, dev, ofile)  
-        print_timer("=> Photolysis",         ref, dev, ofile) 
-        print_timer("=> Aerosol chem",       ref, dev, ofile) 
-        print_timer("=> Linearized chem",    ref, dev, ofile)  
-        print_timer("Transport",             ref, dev, ofile) 
+        print_timer("GEOS-Chem",             ref, dev, ofile)
+        print_timer("HEMCO",                 ref, dev, ofile)
+        print_timer("All chemistry",         ref, dev, ofile)
+        print_timer("=> Gas-phase chem",     ref, dev, ofile)
+        print_timer("=> Photolysis",         ref, dev, ofile)
+        print_timer("=> Aerosol chem",       ref, dev, ofile)
+        print_timer("=> Linearized chem",    ref, dev, ofile)
+        print_timer("Transport",             ref, dev, ofile)
         print_timer("Convection",            ref, dev, ofile)
-        print_timer("Boundary layer mixing", ref, dev, ofile) 
+        print_timer("Boundary layer mixing", ref, dev, ofile)
         print_timer("Dry deposition",        ref, dev, ofile)
         print_timer("Wet deposition",        ref, dev, ofile)
         print_timer("Diagnostics",           ref, dev, ofile)
         print_timer("Unit conversions",      ref, dev, ofile)
 
-        
+
 def make_benchmark_timing_table(
-        ref_file,
+        ref_files,
         ref_label,
-        dev_file,
+        dev_files,
         dev_label,
-        dst,
+        dst="./benchmark",
+        overwrite=False,
 ):
     """
+    Creates a table of timing information for GEOS-Chem Classic
+    benchmark simulations given one or more JSON and/or text files
+    as input.
+
+    Args:
+
     """
-    verify_variable_type(ref_file, (str, list))
+    verify_variable_type(ref_files, (str, list))
     verify_variable_type(ref_label, str)
-    verify_variable_type(dev_file, (str, list))
+    verify_variable_type(dev_files, (str, list))
     verify_variable_type(dev_label, str)
     verify_variable_type(dst, str)
 
-    # Strip timing info from JSON or log ifle
-    ref_timers = read_gcclassic(ref_file)
-    dev_timers = read_gcclassic(dev_file)
+    # Create the destination folder
+    make_directory(dst, overwrite)
+
+    # Strip timing info from JSON/text file(s) and sum the them.
+    ref_timers = sum_timers(read_gcclassic(ref_files))
+    dev_timers = sum_timers(read_gcclassic(dev_files))
+
+    # Filename for output
+    timing_table = replace_whitespace(
+        os.path.join(
+            dst,
+            f"Benchmark_Timers_{ref_label}_vs_{dev_label}.txt"
+        )
+    )
 
     # Write timing info to a table
     display_timers(
         ref_timers,
-        ref_label,
+        replace_whitespace(ref_label),
         dev_timers,
-        dev_label,
-        "sample_output.txt",
+        replace_whitespace(dev_label),
+        timing_table,
     )
 
 
 if __name__ == '__main__':
+
+    REF_FILES = [
+        "./gcclassic_timers.json",
+        "./gcclassic_timers.json"
+    ]
+    DEV_FILES = "./execute.gc_4x5_merra2_fullchem_benchmark.log"
+
+    # Debug test
     make_benchmark_timing_table(
-       "./gcclassic_timers.json",
-        "GCC 14.4.0",
-        "./execute.gc_4x5_merra2_fullchem_benchmark.log",
-        "GCHP 14.4.0",
-        "./"
-    )
-#        "./execute.gchp_merra2_fullchem_benchmark.log",
+        REF_FILES,
+        "GCC 14.4.0 json",
+        DEV_FILES,
+        "GCC 14.4.0 log",
+        dst="./",
+        overwrite=True,
+)

From 4bb4a95c81dd4e6e705682b5e3ec992debaf5324 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 3 May 2024 18:12:43 -0400
Subject: [PATCH 31/43] Add script to parse GCHP benchmark timing information

gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
- Script that scrapes the timing information at the end of the
  GCHP log file

gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
- Updated docstring comments

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                                  |   5 +-
 .../benchmark_scrape_gcclassic_timers.py      |  29 +-
 .../modules/benchmark_scrape_gchp_timers.py   | 325 ++++++++++++++++++
 3 files changed, 351 insertions(+), 8 deletions(-)
 create mode 100644 gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b680347b..319989ec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,7 +21,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Function `copy_file_to_dir` in `gcpy/util.py`.  This is a wrapper for `shutil.copyfile`.
 - GitHub Action config file `.github/workflows/stale.yml`, which replaces StaleBot
 - Example script `gcpy/examples/working_with_files/make_mask_file.py`
-  
+- Convenience function `replace_whitespace` in `gcpy/util.py`
+- Benchmark script `gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py`
+- Benchmark script `gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py`
+
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
 - Bump pypdf from 3.16.1 to 3.17.0 (dependabot suggested this)
diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
index 467ce6ed..084665a1 100644
--- a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
+++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
@@ -26,16 +26,14 @@ def read_gcclassic(input_files):
     return result
 
 
-def read_timing_data(
-        input_files,
-        reader,
-):
+def read_timing_data(input_files, reader):
     """
     Parses the GEOS-Chem Classic timing information in JSON format
     and returns a dictionary with the results.
 
     Args
     input files : str|list     : JSON or text file(s) to parse
+    reader      : function     : Function that will parse the file(s)
 
     Returns
     timing      : list of dict : Dictionary with timing information
@@ -168,6 +166,12 @@ def sum_timers(timers):
 def print_timer(key, ref, dev, ofile):
     """
     Prints timing info for a single timer to a log file.
+
+    Args
+    key   : str  : Dictionary key to print
+    ref   : dict : Timing information from the "Ref" model
+    dev   : dict : Timing information from the "Dev" model
+    ofile : file : File object where info will be written
     """
     line = f"{key:<25}  {ref[key]:>20.3f}  {dev[key]:>20.3f}"
     print(line, file=ofile)
@@ -178,8 +182,11 @@ def display_timers(ref, ref_label, dev, dev_label, table_file):
     Prints the GEOS-Chem timer information to a table.
 
     Args
-    ref : dict : Timer output from the "Ref" model
-    ref : dict : Timer output from the "Dev" model
+    ref        : dict : Timing information from the "Ref" model
+    ref_label  : str  : Version string for the "Ref" model
+    dev        : dict : Timing information from the "Dev" model
+    dev_label  : str  : Version string for the "Dev" model
+    table_file : str  : File name for the timing table output
     """
     with open(table_file, "w", encoding="utf-8") as ofile:
 
@@ -225,7 +232,15 @@ def make_benchmark_timing_table(
     benchmark simulations given one or more JSON and/or text files
     as input.
 
-    Args:
+    Args
+    ref_files : str|list : File(s) with timing info from the "Ref" model
+    ref_label : str      : Version string for the "Ref" model
+    dev_files : str|list : File(s) with timing info from the "Ref" model
+    dev_label : str      : Version string for the "Dev" model
+
+    Kwargs
+    dst       : str      : Directory where output will be written
+    overwrite : bool     : Overwrite existing files? (default: False)
 
     """
     verify_variable_type(ref_files, (str, list))
diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
new file mode 100644
index 00000000..97ece6ab
--- /dev/null
+++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
@@ -0,0 +1,325 @@
+#!/usr/bin/env python3
+"""
+Scrapes GCHP Classic benchmark timing information from one or
+more text files.
+"""
+import os
+from gcpy.util import make_directory, replace_whitespace, verify_variable_type
+
+
+def read_timing_data(input_files):
+    """
+    Parses the GEOS-Chem Classic timing information in JSON format
+    and returns a dictionary with the results.
+
+    Args
+    input files : str|list     : Text file(s) to parse
+
+    Returns
+    timing      : list of dict : Dictionary with timing information
+    """
+    # Return value
+    timing = []
+
+    # If more than one file has been provided, read the timing
+    # information and return a list of dictionaries with results
+    if isinstance(input_files, list):
+        for input_file in input_files:
+            result = read_one_text_file(input_file)
+            timing.append(result)
+        return timing
+
+    # If only one file has been provided, then read it
+    # and return the dictionary in a list
+    if isinstance(input_files, str):
+        result = read_one_text_file(input_files)
+        timing.append(result)
+        return timing
+
+    raise ValueError("Argument 'input_files' is not of type str or list!")
+
+
+def count_characters(text, char_to_match="-"):
+    """
+    Returns the number of characters in a string of text.
+
+    Args
+    text          : str : The text to parse
+
+    Kwargs
+    char_to_match : str : The character to look for in "text"
+
+    Returns
+    result        : int : Number of underscores in "text"
+
+    Reference
+    https://stackoverflow.com/questions/991350/counting-repeated-characters-in-a-string-in-python
+    """
+    # Create a dictionary where each character of "text"
+    # is a key, and all values are set to zero.
+    count = dict.fromkeys(text, 0)
+
+    # Increment each time a character is found
+    for char in text:
+        count[char] += 1
+
+    # Return the count of underscores
+    if char_to_match not in count:
+        return 0
+    return count[char_to_match]
+
+
+def read_one_text_file(text_file):
+    """
+    Parses the GCHP log file (plain text) with timing information
+    and returns a dictionary with the results.
+
+    Args
+    text_file : str  : Text file with timing information
+
+    Returns
+    result    : dict : Dictionary with timing information
+    """
+    keep_line = True
+    temp_timers = []
+
+    # Make sure file exists
+    if not os.path.exists(text_file):
+        raise FileNotFoundError(f"Could not find {text_file}!")
+
+    # Read the line backwards and get just keep the timing information
+    with open(text_file, encoding="utf-8") as ifile:
+
+        for line in reversed(list(ifile)):
+            line = line.strip("\n")
+
+            # Set a flag to denote the start & end of timing info
+            if "-------- --------- ------ --------- ------" in line:
+                keep_line = False
+                break
+
+            # Append timing info lines into a list of dicts
+            if keep_line:
+                substr = line.split()
+                key = substr[0].strip()
+                val = float(substr[2].strip())
+                temp_timers.append({key: val})
+
+        # Because we were reading the end of the file backwards, the
+        # entries in temp_timers are reversed.  Now read through them
+        # in the forward order.
+        hdr = ["", "", ""]
+        timers = {}
+        for timer in reversed(temp_timers):
+            for (key, val) in timer.items():
+
+                # Denote how deep into the dictionary this key goes
+                # as determined by the number of prefixing "-" characters
+                depth = count_characters(key, "-") / 2
+
+                # Remove any prefixed "-" characters
+                new_key = key.strip("-")
+
+                # Add results into the "timers" dictionary as a
+                # "flattened" dictionary, for expediency
+                if depth == 0:
+                    hdr[0] = new_key
+                    timers[new_key] = val
+                elif depth == 1:
+                    hdr[1] = new_key
+                    new_key = f"{hdr[0]}_{new_key}"
+                    timers[new_key] = val
+                elif depth == 2:
+                    hdr[2] = new_key
+                    new_key = f"{hdr[0]}_{hdr[1]}_{new_key}"
+                    timers[new_key] = val
+                else:
+                    new_key = f"{hdr[0]}_{hdr[1]}_{hdr[2]}_{new_key}"
+                    timers[new_key] = val
+
+    return timers
+
+
+def sum_timers(timers):
+    """
+    Sums the time in seconds for each GEOS-Chem timer.  Input may be
+    a single dict with timing information or a list of dicts.
+
+    Args
+    timers : dict|list : GHCP timing information from one or more
+                         log files in plain text format
+
+    Returns
+    result : dict      : Sum of timing information
+    """
+
+    # If timers is of type dict, no summing is needed.
+    if isinstance(timers, dict):
+        return timers
+
+    # If timers is a list of dicts, sum the times
+    # in seconds into a new dict, and then return.
+    if isinstance(timers, list):
+
+        # Initialize the result dict
+        result = {}
+        for timer in timers:
+            for (key, val) in timer.items():
+                result[key] = 0.0
+
+        # Then sum the time in seconds for each timer
+        for timer in timers:
+            for (key, val) in timer.items():
+                result[key] += float(val)
+
+        return result
+
+    raise ValueError("Argument 'timers' must be of type str or dict!")
+
+
+def print_timer(key, ref, dev, ofile):
+    """
+    Prints timing info for a single timer to a log file.
+
+    Args
+    key   : str  : Dictionary key to print
+    ref   : dict : Timing information from the "Ref" model
+    dev   : dict : Timing information from the "Dev" model
+    ofile : file : File object where info will be written
+    """
+    # Denote the level of the dictionary key by counting "_" chars
+    depth = count_characters(key, "_")
+
+    # Prefix "--" characters to the end of the key to denote depth
+    # to replicate the label style at the end of the GCHP log file
+    label = "--"*depth + key.split("_")[-1]
+
+    # Line to print
+    line = f"{label:<25}  {ref[key]:>20.3f}  {dev[key]:>20.3f}"
+    print(line, file=ofile)
+
+
+def display_timers(ref, ref_label, dev, dev_label, table_file):
+    """
+    Prints the GEOS-Che timer information to a table.
+
+    Args
+    ref        : dict : Timing information from the "Ref" model
+    ref_label  : str  : Version string for the "Ref" model
+    dev        : dict : Timing information from the "Dev" model
+    dev_label  : str  : Version string for the "Dev" model
+    table_file : str  : File name for the timing table output
+    """
+    with open(table_file, "w", encoding="utf-8") as ofile:
+
+        # Print header
+        print("%"*79, file=ofile)
+        print("%%% GCHP Classic Benchmark Timing Information", file=ofile)
+        print("%%%", file=ofile)
+        print(f"%%% Ref = {ref_label}", file=ofile)
+        print(f"%%% Dev = {dev_label}", file=ofile)
+        print("%"*79, file=ofile)
+        print("\n", file=ofile)
+        print(f"{'Timer':<25}  {'Ref [s]':>20}  {'Dev [s]':>20}", file=ofile)
+        print("-"*79, file=ofile)
+
+        # Print timers
+        print_timer("All",                            ref, dev, ofile)
+        print_timer("All_SetService",                 ref, dev, ofile)
+        print_timer("All_SetService_GCHP",            ref, dev, ofile)
+        print_timer("All_SetService_GCHP_GCHPctmEnv", ref, dev, ofile)
+        print_timer("All_SetService_GCHP_GCHPchem",   ref, dev, ofile)
+        print_timer("All_SetService_GCHP_DYNAMICS",   ref, dev, ofile)
+        print_timer("All_Initialize",                 ref, dev, ofile)
+        print_timer("All_Initialize_GCHP",            ref, dev, ofile)
+        print_timer("All_Initialize_GCHP_GCHPctmEnv", ref, dev, ofile)
+        print_timer("All_Initialize_GCHP_DYNAMICS",   ref, dev, ofile)
+        print_timer("All_Initialize_EXTDATA",         ref, dev, ofile)
+        print_timer("All_Initialize_HIST",            ref, dev, ofile)
+        print_timer("All_Run",                        ref, dev, ofile)
+        print_timer("All_Run_GCHP",                   ref, dev, ofile)
+        print_timer("All_Run_GCHP_GCHPctmEnv",        ref, dev, ofile)
+        print_timer("All_Run_GCHP_GCHPchem",          ref, dev, ofile)
+        print_timer("All_Run_GCHP_DYNAMICS",          ref, dev, ofile)
+        print_timer("All_Run_EXTDATA",                ref, dev, ofile)
+        print_timer("All_Run_HIST",                   ref, dev, ofile)
+        print_timer("All_Finalize",                   ref, dev, ofile)
+        print_timer("All_Finalize_GCHP",              ref, dev, ofile)
+        print_timer("All_Finalize_GCHP_GCHPctmEnv",   ref, dev, ofile)
+        print_timer("All_Finalize_GCHP_GCHPchem",     ref, dev, ofile)
+        print_timer("All_Finalize_GCHP_DYNAMICS",     ref, dev, ofile)
+        print_timer("All_Finalize_EXTDATA",           ref, dev, ofile)
+        print_timer("All_Finalize_HIST",              ref, dev, ofile)
+
+
+def make_benchmark_timing_table(
+        ref_files,
+        ref_label,
+        dev_files,
+        dev_label,
+        dst="./benchmark",
+        overwrite=False,
+):
+    """
+    Creates a table of timing information for GEOS-Chem Classic
+    benchmark simulations given one or more JSON and/or text files
+    as input.
+
+    Args
+    ref_files : str|list : File(s) with timing info from the "Ref" model
+    ref_label : str      : Version string for the "Ref" model
+    dev_files : str|list : File(s) with timing info from the "Ref" model
+    dev_label : str      : Version string for the "Dev" model
+
+    Kwargs
+    dst       : str      : Directory where output will be written
+    overwrite : bool     : Overwrite existing files? (default: False)
+    """
+    verify_variable_type(ref_files, (str, list))
+    verify_variable_type(ref_label, str)
+    verify_variable_type(dev_files, (str, list))
+    verify_variable_type(dev_label, str)
+    verify_variable_type(dst, str)
+
+    # Create the destination folder
+    make_directory(dst, overwrite)
+
+    # Strip timing info from JSON/text file(s) and sum the them.
+    ref_timers = sum_timers(read_timing_data(ref_files))
+    dev_timers = sum_timers(read_timing_data(dev_files))
+
+    # Filename for output
+    timing_table = replace_whitespace(
+        os.path.join(
+            dst,
+            f"Benchmark_Timers_{ref_label}_vs_{dev_label}.txt"
+        )
+    )
+
+    # Write timing info to a table
+    display_timers(
+        ref_timers,
+        replace_whitespace(ref_label),
+        dev_timers,
+        replace_whitespace(dev_label),
+        timing_table,
+    )
+
+
+if __name__ == '__main__':
+
+    REF_FILES = [
+        "./execute.gchp_merra2_fullchem_benchmark.log",
+        "./execute.gchp_merra2_fullchem_benchmark.log",
+    ]
+    DEV_FILES = "./execute.gchp_merra2_fullchem_benchmark.log"
+
+    # Debug test
+    make_benchmark_timing_table(
+        REF_FILES,
+        "GCHP 14.4.0 list input",
+        DEV_FILES,
+        "GCHP 14.4.0 str input",
+        dst="./",
+        overwrite=True,
+)

From 91617b52957036e332f1288cc8fa01719fce5b05 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 6 May 2024 14:05:16 -0400
Subject: [PATCH 32/43] Now also scrape GCHPchem timers as well as summary
 timers

gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
- Modified to scrape the GCHPchem timers as well as the
  summary timers.  This involves:
  - Reading the file forwards (instead of backwards)
  - Using "." as the delimiter in the flattened dictionary
  - Rewriting the algorithm to parse timer lines from the log file
  - Now loop over dictionary keys in print_timers function

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../modules/benchmark_scrape_gchp_timers.py   | 183 +++++++++++-------
 1 file changed, 110 insertions(+), 73 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
index 97ece6ab..b4ef7df1 100644
--- a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
+++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
@@ -39,7 +39,7 @@ def read_timing_data(input_files):
     raise ValueError("Argument 'input_files' is not of type str or list!")
 
 
-def count_characters(text, char_to_match="-"):
+def count_characters(text, char_to_match="."):
     """
     Returns the number of characters in a string of text.
 
@@ -80,62 +80,112 @@ def read_one_text_file(text_file):
     Returns
     result    : dict : Dictionary with timing information
     """
-    keep_line = True
-    temp_timers = []
 
     # Make sure file exists
     if not os.path.exists(text_file):
         raise FileNotFoundError(f"Could not find {text_file}!")
 
-    # Read the line backwards and get just keep the timing information
-    with open(text_file, encoding="utf-8") as ifile:
+    # ==================================================================
+    # Parse the GCHP log file
+    # ==================================================================
+
+    # Initialize local variables
+    keep_line = False
+    temp_timers = []
+    inclusive = 0
+    temp_timers = []
 
-        for line in reversed(list(ifile)):
-            line = line.strip("\n")
+    # Open the log file
+    with open(text_file, encoding="utf-8") as ifile:
 
-            # Set a flag to denote the start & end of timing info
-            if "-------- --------- ------ --------- ------" in line:
+        # Read each line in the file
+        for line in ifile:
+
+            # Strip newlines; skip empty lines
+            line = line.strip()
+            if len(line) == 0:
+                continue
+
+            # GCHP timers section (also skip header lines)
+            if 'Times for component <GCHPchem>' in line:
+                keep_line = True
+                inclusive = 3
+                continue
+            if keep_line and 'Min                            Mean' in line:
+                continue
+            if keep_line and '============================' in line:
+                continue
+            if keep_line and 'Name                          %' in line:
+                continue
+            if keep_line and '------ ---------- ----------' in line:
+                continue
+            if keep_line and '---------------------------------' in line:
                 keep_line = False
-                break
+                continue
+
+            # Summary section (also skip header lines)
+            if 'Report on process:  0' in line:
+                keep_line = True
+                inclusive = 2
+                continue
+            if keep_line and 'Inclusive' in line:
+                continue
+            if keep_line and '================' in line:
+                continue
+            if keep_line and 'Name' in line:
+                continue
+            if keep_line and '-------- --------- ------ --------- ------' \
+               in line:
+                continue
 
             # Append timing info lines into a list of dicts
             if keep_line:
                 substr = line.split()
                 key = substr[0].strip()
-                val = float(substr[2].strip())
+                val = float(substr[inclusive].strip())
                 temp_timers.append({key: val})
 
-        # Because we were reading the end of the file backwards, the
-        # entries in temp_timers are reversed.  Now read through them
-        # in the forward order.
-        hdr = ["", "", ""]
-        timers = {}
-        for timer in reversed(temp_timers):
-            for (key, val) in timer.items():
-
-                # Denote how deep into the dictionary this key goes
-                # as determined by the number of prefixing "-" characters
-                depth = count_characters(key, "-") / 2
-
-                # Remove any prefixed "-" characters
-                new_key = key.strip("-")
-
-                # Add results into the "timers" dictionary as a
-                # "flattened" dictionary, for expediency
-                if depth == 0:
-                    hdr[0] = new_key
-                    timers[new_key] = val
-                elif depth == 1:
-                    hdr[1] = new_key
-                    new_key = f"{hdr[0]}_{new_key}"
-                    timers[new_key] = val
-                elif depth == 2:
-                    hdr[2] = new_key
-                    new_key = f"{hdr[0]}_{hdr[1]}_{new_key}"
-                    timers[new_key] = val
-                else:
-                    new_key = f"{hdr[0]}_{hdr[1]}_{hdr[2]}_{new_key}"
-                    timers[new_key] = val
+    # ==================================================================
+    # Save timing results into a "flattened" dictionary
+    # ==================================================================
+    hdr = ["", "", "", "", ""]
+    timers = {}
+    for timer in temp_timers:
+        for (key, val) in timer.items():
+
+            # Denote how deep into the dictionary this key goes
+            # as determined by the number of prefixing "-" characters
+            depth = count_characters(key, "-") / 2
+
+            # Remove any prefixed "-" characters
+            new_key = key.strip("-")
+
+            # Add results into the "timers" dictionary as a
+            # "flattened" dictionary, for expediency
+            # (This is the only way to update a nested dict)
+            if depth == 0:
+                hdr[0] = new_key
+                timers[new_key] = val
+            elif depth == 1:
+                hdr[1] = new_key
+                new_key = f"{hdr[0]}.{new_key}"
+                timers[new_key] = val
+            elif depth == 2:
+                hdr[2] = new_key
+                new_key = f"{hdr[0]}.{hdr[1]}.{new_key}"
+                timers[new_key] = val
+            elif depth == 3:
+                hdr[3] = new_key
+                new_key = f"{hdr[0]}.{hdr[1]}.{hdr[2]}.{new_key}"
+                timers[new_key] = val
+            elif depth == 4:
+                hdr[4] = new_key
+                new_key = f"{hdr[0]}.{hdr[1]}.{hdr[2]}.{hdr[3]}.{new_key}"
+                timers[new_key] = val
+            else:
+                new_key = \
+                    f"{hdr[0]}.{hdr[1]}.{hdr[2]}.{hdr[3]}.{hdr[4]}.{new_key}"
+                timers[new_key] = val
 
     return timers
 
@@ -187,12 +237,12 @@ def print_timer(key, ref, dev, ofile):
     dev   : dict : Timing information from the "Dev" model
     ofile : file : File object where info will be written
     """
-    # Denote the level of the dictionary key by counting "_" chars
-    depth = count_characters(key, "_")
+    # Denote the level of the dictionary key by counting "." chars
+    depth = count_characters(key, ".")
 
     # Prefix "--" characters to the end of the key to denote depth
     # to replicate the label style at the end of the GCHP log file
-    label = "--"*depth + key.split("_")[-1]
+    label = "--"*depth + key.split(".")[-1]
 
     # Line to print
     line = f"{label:<25}  {ref[key]:>20.3f}  {dev[key]:>20.3f}"
@@ -219,37 +269,24 @@ def display_timers(ref, ref_label, dev, dev_label, table_file):
         print(f"%%% Ref = {ref_label}", file=ofile)
         print(f"%%% Dev = {dev_label}", file=ofile)
         print("%"*79, file=ofile)
+
+        # GCHPchem timers
         print("\n", file=ofile)
-        print(f"{'Timer':<25}  {'Ref [s]':>20}  {'Dev [s]':>20}", file=ofile)
+        print(f"{'GCHPchem Timer':<25}  {'Ref [s]':>20}  {'Dev [s]':>20}",
+              file=ofile)
         print("-"*79, file=ofile)
+        for key in dev:
+            if key.startswith("GCHPchem"):
+                print_timer(key, ref, dev, ofile)
 
-        # Print timers
-        print_timer("All",                            ref, dev, ofile)
-        print_timer("All_SetService",                 ref, dev, ofile)
-        print_timer("All_SetService_GCHP",            ref, dev, ofile)
-        print_timer("All_SetService_GCHP_GCHPctmEnv", ref, dev, ofile)
-        print_timer("All_SetService_GCHP_GCHPchem",   ref, dev, ofile)
-        print_timer("All_SetService_GCHP_DYNAMICS",   ref, dev, ofile)
-        print_timer("All_Initialize",                 ref, dev, ofile)
-        print_timer("All_Initialize_GCHP",            ref, dev, ofile)
-        print_timer("All_Initialize_GCHP_GCHPctmEnv", ref, dev, ofile)
-        print_timer("All_Initialize_GCHP_DYNAMICS",   ref, dev, ofile)
-        print_timer("All_Initialize_EXTDATA",         ref, dev, ofile)
-        print_timer("All_Initialize_HIST",            ref, dev, ofile)
-        print_timer("All_Run",                        ref, dev, ofile)
-        print_timer("All_Run_GCHP",                   ref, dev, ofile)
-        print_timer("All_Run_GCHP_GCHPctmEnv",        ref, dev, ofile)
-        print_timer("All_Run_GCHP_GCHPchem",          ref, dev, ofile)
-        print_timer("All_Run_GCHP_DYNAMICS",          ref, dev, ofile)
-        print_timer("All_Run_EXTDATA",                ref, dev, ofile)
-        print_timer("All_Run_HIST",                   ref, dev, ofile)
-        print_timer("All_Finalize",                   ref, dev, ofile)
-        print_timer("All_Finalize_GCHP",              ref, dev, ofile)
-        print_timer("All_Finalize_GCHP_GCHPctmEnv",   ref, dev, ofile)
-        print_timer("All_Finalize_GCHP_GCHPchem",     ref, dev, ofile)
-        print_timer("All_Finalize_GCHP_DYNAMICS",     ref, dev, ofile)
-        print_timer("All_Finalize_EXTDATA",           ref, dev, ofile)
-        print_timer("All_Finalize_HIST",              ref, dev, ofile)
+        # Summary timers
+        print("\n", file=ofile)
+        print(f"{'Summary':<25}  {'Ref [s]':>20}  {'Dev [s]':>20}",
+              file=ofile)
+        print("-"*79, file=ofile)
+        for key in dev:
+            if key.startswith("All"):
+                print_timer(key, ref, dev, ofile)
 
 
 def make_benchmark_timing_table(

From ac5056432c089bc3bd248ca2dbb1323600ea8897 Mon Sep 17 00:00:00 2001
From: Melissa Sulprizio <mpayer@seas.harvard.edu>
Date: Tue, 7 May 2024 09:13:03 -0400
Subject: [PATCH 33/43] Add HCl to emissions_species.yml for GEOS-Chem 14.4.0

In GEOS-Chem 14.4.0 continental emissions of Chlorine (pCl and HCl) are
added. We need to also include those emissions in benchmark plots and tables.

See associated pull request:
- https://github.com/geoschem/geos-chem/pull/2275

Signed-off-by: Melissa Sulprizio <mpayer@seas.harvard.edu>
---
 CHANGELOG.md                                | 3 ++-
 gcpy/benchmark/modules/emission_species.yml | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 552f0429..65d2b5f5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,7 +23,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Example script `gcpy/examples/hemco/make_mask_file.py`
 - Added `gcpy/community/format_hemco_data.py` from @hannahnesser
 - Added `gcpy/examples/hemco/format_hemco_demo.py` from @hannahnesser
-  
+- Added HCl to `gcpy/benchmark/modules/emission_species.yml` for GEOS-Chem 14.4.0
+
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
 - Bump pypdf from 3.16.1 to 3.17.0 (dependabot suggested this)
diff --git a/gcpy/benchmark/modules/emission_species.yml b/gcpy/benchmark/modules/emission_species.yml
index 8f425679..47c4c72c 100644
--- a/gcpy/benchmark/modules/emission_species.yml
+++ b/gcpy/benchmark/modules/emission_species.yml
@@ -26,6 +26,7 @@ FullChemBenchmark:
   GLYC: Tg
   GLYX: Tg
   HAC: Tg
+  HCl: Tg
   HCOOH: Tg
   HNO2: Tg
   HNO3: Tg

From 224bba7933faaa30aafa6552ad3c50213ca03df9 Mon Sep 17 00:00:00 2001
From: Melissa Sulprizio <mpayer@seas.harvard.edu>
Date: Tue, 7 May 2024 09:35:28 -0400
Subject: [PATCH 34/43] Add GTChlorine inventory to emission_inventories.yml

The HCl emissions added in GEOS-Chem 14.4.0 are read in from the GT
(Georgia Tech) Chlorine inventory. Here we add that inventory to
emission_inventories.yml for inclusion in the benchmark inventory table.

Signed-off-by: Melissa Sulprizio <mpayer@seas.harvard.edu>
---
 CHANGELOG.md                                    | 2 +-
 gcpy/benchmark/modules/emission_inventories.yml | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 65d2b5f5..9b799c42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,7 +23,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Example script `gcpy/examples/hemco/make_mask_file.py`
 - Added `gcpy/community/format_hemco_data.py` from @hannahnesser
 - Added `gcpy/examples/hemco/format_hemco_demo.py` from @hannahnesser
-- Added HCl to `gcpy/benchmark/modules/emission_species.yml` for GEOS-Chem 14.4.0
+- Added HCl to `gcpy/benchmark/modules/emission_species.yml` and GTChlorine to `gcpy/benchmark/modules/emission_inventories.yml` for GEOS-Chem 14.4.0
 
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
diff --git a/gcpy/benchmark/modules/emission_inventories.yml b/gcpy/benchmark/modules/emission_inventories.yml
index 46eb9e67..6cd01de2 100644
--- a/gcpy/benchmark/modules/emission_inventories.yml
+++ b/gcpy/benchmark/modules/emission_inventories.yml
@@ -9,6 +9,7 @@ FullChemBenchmark:
   DICEAfrica: Tg
   GEIAnatural: Tg
   GFED: Tg
+  GTChlorine: Tg
   IODINE: Tg
   LIANG: Tg
   LIGHTNOX: Tg

From 3827302e6374bf757e63d49dfd7524a928f938f4 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 7 May 2024 15:41:05 -0400
Subject: [PATCH 35/43] run_1yr_fullchem_benchmark.py now produces timing table
 output

gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
- Rename driver program to "make_benchmark_gcclassic_timing_table"
- Remove if __name__ == "__main__": block

gcpy/benchmark/modules/benchmark_scrape_gchp_timers,py
- Rename driver program to "make_benchmark_gchp_timing_table"
- Add an error check to exit after the last summary timer is found
  (this only affects GCHP log files from AWS cloud benchmarks)
- Remove if __name__ == "__main__": block

gcpy/benchmark/modules/benchmark_utils.py
- Added gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gcc_dirs, and
  get_log_filenames to abstract repetitive code out of the
  1-year benchmark scripts

gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py
- Call make_benchmark_gcclassic_timing_table to produce the
  GCC vs. GCC timing table
- Call make_benchmark_gchp_timing_table to produce the
  GCHP vs. GCHP timing table

CHANGELOG.md
- Updated accordingly

TODO: Add a "% diff" column to the timing table output
---
 CHANGELOG.md                                  |   2 +
 .../benchmark_scrape_gcclassic_timers.py      |  21 +--
 .../modules/benchmark_scrape_gchp_timers.py   |  27 +--
 gcpy/benchmark/modules/benchmark_utils.py     | 138 +++++++++++++++
 .../modules/run_1yr_fullchem_benchmark.py     | 165 ++++++++++--------
 5 files changed, 236 insertions(+), 117 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 319989ec..9cb203e9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Convenience function `replace_whitespace` in `gcpy/util.py`
 - Benchmark script `gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py`
 - Benchmark script `gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py`
+- 1-year benchmark scripts now produce GCC vs GCC and GCHP vs GCHP timing tables
+- Functions `gcc_vs_gcc_dirs`, `gchp_vs_gcc_dirs`, `gchp_vs_gchp_dirs`, and `get_log_filepaths` in `gcpy/benchmark/modules/benchmark_utils.py`
 
 ### Changed
 - Bump pip from 23.2.1 to 23.3 (dependabot suggested this)
diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
index 084665a1..10491f2d 100644
--- a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
+++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
@@ -219,7 +219,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file):
         print_timer("Unit conversions",      ref, dev, ofile)
 
 
-def make_benchmark_timing_table(
+def make_benchmark_gcclassic_timing_table(
         ref_files,
         ref_label,
         dev_files,
@@ -272,22 +272,3 @@ def make_benchmark_timing_table(
         replace_whitespace(dev_label),
         timing_table,
     )
-
-
-if __name__ == '__main__':
-
-    REF_FILES = [
-        "./gcclassic_timers.json",
-        "./gcclassic_timers.json"
-    ]
-    DEV_FILES = "./execute.gc_4x5_merra2_fullchem_benchmark.log"
-
-    # Debug test
-    make_benchmark_timing_table(
-        REF_FILES,
-        "GCC 14.4.0 json",
-        DEV_FILES,
-        "GCC 14.4.0 log",
-        dst="./",
-        overwrite=True,
-)
diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
index b4ef7df1..1253b1f3 100644
--- a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
+++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
@@ -138,6 +138,12 @@ def read_one_text_file(text_file):
                in line:
                 continue
 
+            # NOTE: This line only appears in cloud benchmarks,
+            # which signals the end of GCHP output and the start of
+            # job statistics.  Exit when we encounter this.
+            if keep_line and "Command being timed:" in line:
+                break
+
             # Append timing info lines into a list of dicts
             if keep_line:
                 substr = line.split()
@@ -289,7 +295,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file):
                 print_timer(key, ref, dev, ofile)
 
 
-def make_benchmark_timing_table(
+def make_benchmark_gchp_timing_table(
         ref_files,
         ref_label,
         dev_files,
@@ -341,22 +347,3 @@ def make_benchmark_timing_table(
         replace_whitespace(dev_label),
         timing_table,
     )
-
-
-if __name__ == '__main__':
-
-    REF_FILES = [
-        "./execute.gchp_merra2_fullchem_benchmark.log",
-        "./execute.gchp_merra2_fullchem_benchmark.log",
-    ]
-    DEV_FILES = "./execute.gchp_merra2_fullchem_benchmark.log"
-
-    # Debug test
-    make_benchmark_timing_table(
-        REF_FILES,
-        "GCHP 14.4.0 list input",
-        DEV_FILES,
-        "GCHP 14.4.0 str input",
-        dst="./",
-        overwrite=True,
-)
diff --git a/gcpy/benchmark/modules/benchmark_utils.py b/gcpy/benchmark/modules/benchmark_utils.py
index 64564cd5..912714a9 100644
--- a/gcpy/benchmark/modules/benchmark_utils.py
+++ b/gcpy/benchmark/modules/benchmark_utils.py
@@ -509,3 +509,141 @@ def rename_speciesconc_to_speciesconcvv(
             rename_dict[var] = var.replace("SpeciesConc_", "SpeciesConcVV_")
 
     return dset.rename(rename_dict)
+
+
+def gcc_vs_gcc_dirs(
+        config,
+        subdir,
+):
+    """
+    Convenience function to return GCC vs. GCC file paths
+    for use in the benchmarking modules.
+
+    Args
+    config         : dict : Info read from config file
+    subdir         : str  : Subdirectory
+
+    Returns
+    refdir, devdir : str : Fike paths
+    """
+    util.verify_variable_type(config, dict)
+    util.verify_variable_type(subdir, str)
+
+    # Log file paths
+    refdir = os.path.join(
+        config["paths"]["main_dir"],
+        config["data"]["ref"]["gcc"]["dir"],
+        config["data"]["ref"]["gcc"][subdir]
+    )
+    devdir = os.path.join(
+        config["paths"]["main_dir"],
+        config["data"]["dev"]["gcc"]["dir"],
+        config["data"]["dev"]["gcc"][subdir]
+    )
+
+    return refdir, devdir
+
+
+def gchp_vs_gcc_dirs(
+        config,
+        subdir,
+):
+    """
+    Convenience function to return GCHP vs. GCC file paths
+    for use in the benchmarking modules.
+
+
+    Args
+    config         : dict : Info read from config file
+    subdir         : str  : Subdirectory
+
+    Returns
+    refdir, devdir : str : Fike paths
+    """
+    util.verify_variable_type(config, dict)
+    util.verify_variable_type(subdir, str)
+
+    refdir = os.path.join(
+        config["paths"]["main_dir"],
+        config["data"]["dev"]["gcc"]["dir"],
+        config["data"]["dev"]["gcc"][subdir]
+    )
+    devdir = os.path.join(
+        config["paths"]["main_dir"],
+        config["data"]["dev"]["gchp"]["dir"],
+        config["data"]["dev"]["gchp"][subdir]
+    )
+
+    return refdir, devdir
+
+
+def gchp_vs_gchp_dirs(
+        config,
+        subdir,
+):
+    """
+    Convenience function to return GCHP vs. GCHP file paths
+    for use in the benchmarking modules.
+
+    Args
+    config         : dict : Info read from config file
+    subdir         : str  : Subdirectory
+
+    Returns
+    refdir, devdir : str : Fike paths
+    """
+    util.verify_variable_type(config, dict)
+    util.verify_variable_type(subdir, str)
+
+    refdir = os.path.join(
+        config["paths"]["main_dir"],
+        config["data"]["ref"]["gchp"]["dir"],
+        config["data"]["ref"]["gchp"][subdir]
+    )
+    devdir = os.path.join(
+        config["paths"]["main_dir"],
+        config["data"]["dev"]["gchp"]["dir"],
+        config["data"]["dev"]["gchp"][subdir]
+    )
+
+    return refdir, devdir
+
+
+def get_log_filepaths(
+        logs_dir,
+        template,
+        timestamps,
+):
+    """
+    Returns a list of paths for GEOS-Chem log files.
+    These are needed to compute the benchmark timing tables.
+
+    Args
+    logs_dir   : str  : Path to directory w/ log files
+    template   : str  : Log file template w/ "%DATE%" token
+    timestamps : list : List of datetimes
+    """
+    util.verify_variable_type(logs_dir, str)
+    util.verify_variable_type(template, str)
+
+    # Initialize local variables
+    format_str = ""
+    fmts = ["%Y", "%m", "%d", "%h"]
+    result = []
+
+    # Create the format string for the log file template
+    for fmt in fmts:
+        if fmt in template:
+            format_str += fmt
+
+    # Create each output logfile name, replacing template with date
+    for timestamp in timestamps:
+        time = timestamp.item().strftime(format_str)
+        result.append(
+            os.path.join(
+                logs_dir,
+                template.replace(format_str, time),
+            )
+        )
+
+    return result
diff --git a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py
index 1794499b..d375cdaf 100644
--- a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py
+++ b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py
@@ -44,7 +44,7 @@
 
         $ export MPLBACKEND=agg
 
-This script corresponds with GCPy 1.4.3. Edit this version ID if releasing
+This script corresponds with GCPy 1.5.0. Edit this version ID if releasing
 a new version of GCPy.
 """
 
@@ -61,21 +61,27 @@
 from gcpy.benchmark.modules.ste_flux import make_benchmark_ste_table
 from gcpy.benchmark.modules.oh_metrics import make_benchmark_oh_metrics
 from gcpy.benchmark.modules.budget_ox import global_ox_budget
+#TODO: Peel out routines from benchmark_funcs.py into smaller
+# routines in the gcpy/benchmark/modules folder, such as these:
 from gcpy.benchmark.modules.benchmark_funcs import \
     diff_of_diffs_toprow_title, get_species_database_dir, \
     make_benchmark_conc_plots, make_benchmark_emis_plots, \
     make_benchmark_emis_tables, make_benchmark_jvalue_plots, \
     make_benchmark_aod_plots, make_benchmark_mass_tables, \
     make_benchmark_operations_budget, make_benchmark_aerosol_tables
-from gcpy.benchmark.modules.benchmark_utils import print_benchmark_info
+from gcpy.benchmark.modules.benchmark_utils import \
+    gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gchp_dirs, \
+    get_log_filepaths, print_benchmark_info
 from gcpy.benchmark.modules.benchmark_models_vs_obs \
     import make_benchmark_models_vs_obs_plots
 from gcpy.benchmark.modules.benchmark_models_vs_sondes \
     import make_benchmark_models_vs_sondes_plots
-#TODO: Peel out routines from benchmark_funcs.py into smaller
-# routines in the gcpy/benchmark/modules folder, such as these:
 from gcpy.benchmark.modules.benchmark_drydep \
     import drydepvel_species, make_benchmark_drydep_plots
+from gcpy.benchmark.modules.benchmark_scrape_gcclassic_timers import \
+    make_benchmark_gcclassic_timing_table
+from gcpy.benchmark.modules.benchmark_scrape_gchp_timers import \
+    make_benchmark_gchp_timing_table
 
 # Tell matplotlib not to look for an X-window
 os.environ["QT_QPA_PLATFORM"] = "offscreen"
@@ -107,73 +113,25 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev):
     # ======================================================================
 
     # Diagnostics file directory paths
-    gcc_vs_gcc_refdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gcc"]["dir"],
-        config["data"]["ref"]["gcc"]["outputs_subdir"],
-    )
-    gcc_vs_gcc_devdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["outputs_subdir"],
-    )
-    gchp_vs_gcc_refdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["outputs_subdir"],
-    )
-    gchp_vs_gcc_devdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["outputs_subdir"],
-    )
-    gchp_vs_gchp_refdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gchp"]["dir"],
-        config["data"]["ref"]["gchp"]["outputs_subdir"],
-    )
-    gchp_vs_gchp_devdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["outputs_subdir"],
-    )
+    s = "outputs_subdir"
+    gcc_vs_gcc_refdir, gcc_vs_gcc_devdir = gcc_vs_gcc_dirs(config, s)
+    gchp_vs_gcc_refdir, gchp_vs_gcc_devdir = gchp_vs_gcc_dirs(config, s)
+    gchp_vs_gchp_refdir, gchp_vs_gchp_devdir = gchp_vs_gchp_dirs(config, s)
 
     # Restart file directory paths
-    gcc_vs_gcc_refrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gcc"]["dir"],
-        config["data"]["ref"]["gcc"]["restarts_subdir"]
-    )
-    gcc_vs_gcc_devrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["restarts_subdir"]
-    )
-    gchp_vs_gcc_refrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["restarts_subdir"]
-    )
-    gchp_vs_gcc_devrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["restarts_subdir"]
-    )
-    gchp_vs_gchp_refrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gchp"]["dir"],
-        config["data"]["ref"]["gchp"]["restarts_subdir"]
-    )
-    gchp_vs_gchp_devrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["restarts_subdir"]
-    )
+    s = "restarts_subdir"
+    gcc_vs_gcc_refrstdir, gcc_vs_gcc_devrstdir = gcc_vs_gcc_dirs(config, s)
+    gchp_vs_gcc_refrstdir, gchp_vs_gcc_devrstdir = gchp_vs_gcc_dirs(config, s)
+    gchp_vs_gchp_refrstdir, gchp_vs_gchp_devrstdir = gchp_vs_gchp_dirs(config, s)
+
+    # Restart file directory paths
+    s = "logs_subdir"
+    gcc_vs_gcc_reflogdir, gcc_vs_gcc_devlogdir = gcc_vs_gcc_dirs(config, s)
+    gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s)
+    gchp_vs_gchp_reflogdir, gchp_vs_gchp_devlogdir = gchp_vs_gchp_dirs(config, s)
 
     # Directories where plots & tables will be created
-    mainresultsdir = os.path.join(
-        config["paths"]["results_dir"]
-    )
+    mainresultsdir = os.path.join(config["paths"]["results_dir"])
     gcc_vs_gcc_resultsdir = os.path.join(
         mainresultsdir,
         config["options"]["comparisons"]["gcc_vs_gcc"]["dir"]
@@ -236,15 +194,10 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev):
     #gchp_vs_gchp_budgetdir = os.path.join(gchp_vs_gchp_resultsdir, "Budget")
 
     # Models vs. observations directories
-    gcc_vs_gcc_models_vs_obs_dir = os.path.join(
-        gcc_vs_gcc_resultsdir, "ModelVsObs"
-    )
-    gchp_vs_gcc_models_vs_obs_dir = os.path.join(
-        gchp_vs_gcc_resultsdir, "ModelVsObs"
-    )
-    gchp_vs_gchp_models_vs_obs_dir = os.path.join(
-        gchp_vs_gchp_resultsdir, "ModelVsObs"
-    )
+    s = "ModelVsObs"
+    gcc_vs_gcc_models_vs_obs_dir = os.path.join(gcc_vs_gcc_resultsdir, s)
+    gchp_vs_gcc_models_vs_obs_dir = os.path.join(gchp_vs_gcc_resultsdir, s)
+    gchp_vs_gchp_models_vs_obs_dir = os.path.join(gchp_vs_gchp_resultsdir, s)
 
     # ======================================================================
     # Plot title strings
@@ -866,6 +819,34 @@ def gcc_vs_gcc_ops_budg(mon):
                 overwrite=True,
             )
 
+        # ==================================================================
+        # GCC vs. GCC Benchmark Timing Table
+        # ==================================================================
+        if config["options"]["outputs"]["timing_table"]:
+            print("\n%%% Creating GCC vs. GCC Benchmark Timing table %%%")
+
+            # Filepaths
+            ref = get_log_filepaths(
+                gcc_vs_gcc_reflogdir,
+                config["data"]["ref"]["gcc"]["logs_template"],
+                all_months_ref
+            )
+            dev = get_log_filepaths(
+                gcc_vs_gcc_devlogdir,
+                config["data"]["dev"]["gcc"]["logs_template"],
+                all_months_dev
+            )
+
+            # Create the table
+            make_benchmark_gcclassic_timing_table(
+                ref,
+                config["data"]["ref"]["gcc"]["version"],
+                dev,
+                config["data"]["dev"]["gcc"]["version"],
+                dst=gcc_vs_gcc_tablesdir,
+                overwrite=True,
+            )
+
         # ==================================================================
         # GCC vs GCC Global mean OH, MCF Lifetime, CH4 Lifetime
         # ==================================================================
@@ -2224,6 +2205,34 @@ def gchp_vs_gchp_ops_budg(mon):
         if config["options"]["outputs"]["ste_table"]:
             print("\n%%% Skipping GCHP vs. GCHP Strat-Trop Exchange table %%%")
 
+        # ==================================================================
+        # GCHP vs. GCHP Benchmark Timing Table
+        # ==================================================================
+        if config["options"]["outputs"]["timing_table"]:
+            print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%")
+
+            # Filepaths
+            ref = get_log_filepaths(
+                gchp_vs_gchp_reflogdir,
+                config["data"]["ref"]["gchp"]["logs_template"],
+                all_months_ref,
+            )[0]
+            dev = get_log_filepaths(
+                gchp_vs_gchp_devlogdir,
+                config["data"]["dev"]["gchp"]["logs_template"],
+                all_months_dev,
+            )[0]
+
+            # Create the table
+            make_benchmark_gchp_timing_table(
+                ref,
+                config["data"]["ref"]["gchp"]["version"],
+                dev,
+                config["data"]["dev"]["gchp"]["version"],
+                dst=gcc_vs_gcc_tablesdir,
+                overwrite=True,
+            )
+
         # ==================================================================
         # GCHP vs GCHP Model vs. Observations plots
         # ==================================================================
@@ -2231,6 +2240,8 @@ def gchp_vs_gchp_ops_budg(mon):
             print("\n%%% Creating GCHP vs. GCHP models vs. obs. plots %%%")
 
             # Filepaths
+            # NOTE: If the GCHP benchmark is done in one-shot
+            # then you need the [0] after the call to get_filepaths.
             ref = get_filepaths(
                 gchp_vs_gchp_refdir,
                 "SpeciesConc",
@@ -2361,4 +2372,4 @@ def gchp_vs_gchp_ops_budg(mon):
     # ==================================================================
     # Print a message indicating that the benchmarks finished
     # ==================================================================
-    print("\n %%%% All requested benchmark plots/tables created! %%%%")
+    print("\n%%%% All requested benchmark plots/tables created! %%%%")

From 6f8e0497b1dbfe4e92f8c632ed2395b69647da45 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 10 May 2024 12:19:50 -0400
Subject: [PATCH 36/43] Add percent difference column to timing tables

gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
- In function print_timer:
  - Compute % difference between ref & dev (or assign NaN if
    it would result in a div-by-zero)
  - Decrease width of timer name column from 25 to 22 spaces
  - Change format of Ref & Dev columns from 20.3f to 18.3f
  - Add percent diff column as 12.3e format (right-aligned)
- In function display_timers:
  - Change width of column headers accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../modules/benchmark_scrape_gcclassic_timers.py    |  8 ++++++--
 .../modules/benchmark_scrape_gchp_timers.py         | 13 ++++++++-----
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
index 10491f2d..7560076c 100644
--- a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
+++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
@@ -5,6 +5,7 @@
 """
 import os
 import json
+import numpy as np
 from gcpy.util import make_directory, replace_whitespace, verify_variable_type
 
 
@@ -173,7 +174,10 @@ def print_timer(key, ref, dev, ofile):
     dev   : dict : Timing information from the "Dev" model
     ofile : file : File object where info will be written
     """
-    line = f"{key:<25}  {ref[key]:>20.3f}  {dev[key]:>20.3f}"
+    pctdiff = np.nan
+    if np.abs(ref[key] > 0.0):
+        pctdiff = ((dev[key] - ref[key]) / ref[key]) * 100.0
+    line = f"{key:<22}  {ref[key]:>18.3f}  {dev[key]:>18.3f}   {pctdiff:>12.3e}"
     print(line, file=ofile)
 
 
@@ -199,7 +203,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file):
         print(f"%%% Dev = {dev_label}", file=ofile)
         print("%"*79, file=ofile)
         print("\n", file=ofile)
-        print(f"{'Timer':<25}  {'Ref [s]':>20}  {'Dev [s]':>20}", file=ofile)
+        print(f"{'Timer':<22}  {'Ref [s]':>18}  {'Dev [s]':>18}   {'% Diff':>12}", file=ofile)
         print("-"*79, file=ofile)
 
         # Print timers
diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
index 1253b1f3..01368432 100644
--- a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
+++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
@@ -4,6 +4,7 @@
 more text files.
 """
 import os
+import numpy as np
 from gcpy.util import make_directory, replace_whitespace, verify_variable_type
 
 
@@ -251,7 +252,11 @@ def print_timer(key, ref, dev, ofile):
     label = "--"*depth + key.split(".")[-1]
 
     # Line to print
-    line = f"{label:<25}  {ref[key]:>20.3f}  {dev[key]:>20.3f}"
+    pctdiff = np.nan
+    if np.abs(ref[key] > 0.0):
+        pctdiff = ((dev[key] - ref[key]) / ref[key]) * 100.0
+    line = \
+        f"{label:<22}  {ref[key]:>18.3f}  {dev[key]:>18.3f}   {pctdiff:>12.3e}"
     print(line, file=ofile)
 
 
@@ -278,8 +283,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file):
 
         # GCHPchem timers
         print("\n", file=ofile)
-        print(f"{'GCHPchem Timer':<25}  {'Ref [s]':>20}  {'Dev [s]':>20}",
-              file=ofile)
+        print(f"{'GCHPchem Timer':<22}  {'Ref [s]':>18}  {'Dev [s]':>18}   {'% Diff':>12}", file=ofile)
         print("-"*79, file=ofile)
         for key in dev:
             if key.startswith("GCHPchem"):
@@ -287,8 +291,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file):
 
         # Summary timers
         print("\n", file=ofile)
-        print(f"{'Summary':<25}  {'Ref [s]':>20}  {'Dev [s]':>20}",
-              file=ofile)
+        print(f"{'Summary':<22}  {'Ref [s]':>18}  {'Dev [s]':>18}   {'% Diff':>12}", file=ofile)
         print("-"*79, file=ofile)
         for key in dev:
             if key.startswith("All"):

From efafa95d69c65b06eb935e8528c38ede9dda0dc8 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 10 May 2024 12:34:50 -0400
Subject: [PATCH 37/43] Update benchmark scripts & config files to add timing
 table output

gcpy/benchmark/cloud/template.1hr_benchmark.yml
gcpy/benchmark/cloud/template.1mo_benchmark.yml
gcpy/benchmark/config/1mo_benchmark.yml
gcpy/benchmark/config/1yr_fullchem_benchmark.yml
gcpy/benchmark/config/1yr_tt_benchmark.yml
- Add "logs_subdir" and "logs_template" tags to GCC/GCHP Ref & Dev
- Add "timing_table" to "outputs" section

CHANGELOG.md
- Updated accordingly

gcpy/benchmark/run_benchmark.py
gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py
gcpy/benchmark/modules/run_1yr_tt_benchmark.py
- Import & call make_benchmark_gcclassic_scrape_timers to create
  GCClassic vs. GCClassic timing information table
 Import & call make_benchmark_gchp_scrape_timers to create
  GCHP vs. GCHP timing information table

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                                  |   1 +
 .../cloud/template.1hr_benchmark.yml          |   9 ++
 .../cloud/template.1mo_benchmark.yml          |   9 ++
 gcpy/benchmark/config/1mo_benchmark.yml       |   9 ++
 .../config/1yr_fullchem_benchmark.yml         |   9 ++
 gcpy/benchmark/config/1yr_tt_benchmark.yml    |   9 ++
 .../modules/run_1yr_fullchem_benchmark.py     |  13 +-
 .../benchmark/modules/run_1yr_tt_benchmark.py | 144 ++++++++++--------
 gcpy/benchmark/run_benchmark.py               | 139 +++++++++--------
 9 files changed, 212 insertions(+), 130 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9cb203e9..b6ca2182 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -47,6 +47,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Create radionuclide, STE flux, and mass conservation tables for Ref and Dev versions in TransportTracers benchmarks
 - Use new function `copy_file_to_dir` to copy the benchmark script and configuration file to the benchmark results folders
 - Updated GitHub stalebot config file `stale.yml` with new issue/PR labels that should not go stale
+- Updated benchmark driver scripts and config files to print GCClassic & GCHP timing information 
   
 ### Fixed
 - CS inquiry functions in `gcpy/cstools.py` now work properly for `xr.Dataset` and `xr.DataArray` objects
diff --git a/gcpy/benchmark/cloud/template.1hr_benchmark.yml b/gcpy/benchmark/cloud/template.1hr_benchmark.yml
index 6ec5cab0..3bb1a676 100644
--- a/gcpy/benchmark/cloud/template.1hr_benchmark.yml
+++ b/gcpy/benchmark/cloud/template.1hr_benchmark.yml
@@ -47,6 +47,8 @@ data:
       dir: ref-gcc/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: OutputDir
+      logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-07-01T01:00:00"
     gchp:
@@ -54,6 +56,8 @@ data:
       dir: ref-gchp/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: OutputDir
+      logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-07-01T01:00:00"
       is_pre_14.0: False
@@ -64,6 +68,8 @@ data:
       dir: dev-gcc/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: OutputDir
+      logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-07-01T01:00:00"
     gchp:
@@ -71,6 +77,8 @@ data:
       dir: dev-gchp/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: OutputDir
+      logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-07-01T01:00:00"
       is_pre_14.0: False
@@ -117,6 +125,7 @@ options:
     ops_budget_table: False
     OH_metrics: True
     ste_table: True # GCC only
+    timing_table: True
     summary_table: True
     plot_options:
       by_spc_cat: True
diff --git a/gcpy/benchmark/cloud/template.1mo_benchmark.yml b/gcpy/benchmark/cloud/template.1mo_benchmark.yml
index a8dad2e0..ff431932 100644
--- a/gcpy/benchmark/cloud/template.1mo_benchmark.yml
+++ b/gcpy/benchmark/cloud/template.1mo_benchmark.yml
@@ -47,6 +47,8 @@ data:
       dir: ref-gcc/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: OutputDir
+      logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
     gchp:
@@ -54,6 +56,8 @@ data:
       dir: ref-gchp/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: OutputDir
+      logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
       is_pre_14.0: False
@@ -64,6 +68,8 @@ data:
       dir: dev-gcc/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: OutputDir
+      logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
     gchp:
@@ -71,6 +77,8 @@ data:
       dir: dev-gchp/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: OutputDir
+      logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
       is_pre_14.0: False
@@ -117,6 +125,7 @@ options:
     ops_budget_table: False
     OH_metrics: True
     ste_table: True # GCC only
+    timing_table: True
     summary_table: True
     plot_options:
       by_spc_cat: True
diff --git a/gcpy/benchmark/config/1mo_benchmark.yml b/gcpy/benchmark/config/1mo_benchmark.yml
index 65f84b0a..173ed8eb 100644
--- a/gcpy/benchmark/config/1mo_benchmark.yml
+++ b/gcpy/benchmark/config/1mo_benchmark.yml
@@ -47,6 +47,8 @@ data:
       dir: GCC_ref
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: .
+      logs_template: "log.%Y%m%d"
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
     gchp:
@@ -54,6 +56,8 @@ data:
       dir: GCHP_ref
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: .
+      logs_template: "gchp.%Y%m%d_0000z.log"      
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
       is_pre_14.0: False
@@ -64,6 +68,8 @@ data:
       dir: GCC_dev
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: .
+      logs_template: "log.%Y%m%d"      
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
     gchp:
@@ -71,6 +77,8 @@ data:
       dir: GCHP_dev
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: Logs
+      logs_template: "gchp.%Y%m%d_0000z.log"
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
       is_pre_14.0: False
@@ -117,6 +125,7 @@ options:
     ops_budget_table: False
     OH_metrics: True
     ste_table: True # GCC only
+    timing_table: True
     summary_table: True
     plot_options:
       by_spc_cat: True
diff --git a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml
index aae845ac..6fbf1869 100644
--- a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml
+++ b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml
@@ -59,6 +59,8 @@ data:
       dir: GCC_ref
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: Logs
+      logs_template: "log.%Y%m%d"
       bmk_start: "2019-01-01T00:00:00"
       bmk_end: "2020-01-01T00:00:00"
     gchp:
@@ -66,6 +68,8 @@ data:
       dir: GCHP_ref
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: Logs
+      logs_template: "gchp.%Y%m%d_0000z.log"
       bmk_start: "2019-01-01T00:00:00"
       bmk_end: "2020-01-01T00:00:00"
       is_pre_14.0: False
@@ -76,6 +80,8 @@ data:
       dir: GCC_dev
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: Logs
+      logs_template: "log.%Y%m%d"
       bmk_start: "2019-01-01T00:00:00"
       bmk_end: "2020-01-01T00:00:00"
     gchp:
@@ -83,6 +89,8 @@ data:
       dir: GCHP_dev
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: Logs
+      logs_template: "gchp.%Y%m%d_0000z.log"
       bmk_start: "2019-01-01T00:00:00"
       bmk_end: "2020-01-01T00:00:00"
       is_pre_14.0: False
@@ -130,6 +138,7 @@ options:
     Ox_budget_table: True
     ste_table: True # GCC only
     OH_metrics: True
+    timing_table: True
     plot_models_vs_obs: True
     plot_options:
       by_spc_cat: True
diff --git a/gcpy/benchmark/config/1yr_tt_benchmark.yml b/gcpy/benchmark/config/1yr_tt_benchmark.yml
index 1f631cf1..c2809c0b 100644
--- a/gcpy/benchmark/config/1yr_tt_benchmark.yml
+++ b/gcpy/benchmark/config/1yr_tt_benchmark.yml
@@ -48,6 +48,8 @@ data:
       dir: GCC_ref
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: Logs
+      logs_template: "log.%Y%m%d"
       bmk_start: "2019-01-01T00:00:00"
       bmk_end: "2020-01-01T00:00:00"
     gchp:
@@ -55,6 +57,8 @@ data:
       dir: GCHP_ref
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: Logs
+      logs_template: "gchp.%Y%m%d_0000z.log"
       bmk_start: "2019-01-01T00:00:00"
       bmk_end: "2020-01-01T00:00:00"
       is_pre_14.0: False
@@ -65,6 +69,8 @@ data:
       dir: GCC_dev
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: Logs
+      logs_template: "log.%Y%m%d"
       bmk_start: "2019-01-01T00:00:00"
       bmk_end: "2020-01-01T00:00:00"
     gchp:
@@ -72,6 +78,8 @@ data:
       dir: GCHP_dev
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
+      logs_subdir: Logs
+      logs_template: "gchp.%Y%m%d_0000z.log"
       bmk_start: "2019-01-01T00:00:00"
       bmk_end: "2020-01-01T00:00:00"
       is_pre_14.0: False
@@ -115,6 +123,7 @@ options:
     mass_table: True
     ste_table: True
     cons_table: True
+    timing_table: False
   #
   # n_cores: Specify the number of cores to use.
   # -1: Use $OMP_NUM_THREADS         cores
diff --git a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py
index d375cdaf..68d42dd0 100644
--- a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py
+++ b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py
@@ -85,6 +85,7 @@
 
 # Tell matplotlib not to look for an X-window
 os.environ["QT_QPA_PLATFORM"] = "offscreen"
+
 # Suppress annoying warning messages
 warnings.filterwarnings("ignore", category=RuntimeWarning)
 warnings.filterwarnings("ignore", category=UserWarning)
@@ -124,7 +125,7 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev):
     gchp_vs_gcc_refrstdir, gchp_vs_gcc_devrstdir = gchp_vs_gcc_dirs(config, s)
     gchp_vs_gchp_refrstdir, gchp_vs_gchp_devrstdir = gchp_vs_gchp_dirs(config, s)
 
-    # Restart file directory paths
+    # Log file directory paths
     s = "logs_subdir"
     gcc_vs_gcc_reflogdir, gcc_vs_gcc_devlogdir = gcc_vs_gcc_dirs(config, s)
     gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s)
@@ -1575,7 +1576,7 @@ def gchp_vs_gcc_ops_budg(mon):
     if config["options"]["comparisons"]["gchp_vs_gchp"]["run"]:
 
         # ==================================================================
-        # GCHP vs GCC filepaths for StateMet collection data
+        # GCHP vs GCHP filepaths for StateMet collection data
         # ==================================================================
         refmet = get_filepaths(
             gchp_vs_gchp_refdir,
@@ -2212,15 +2213,17 @@ def gchp_vs_gchp_ops_budg(mon):
             print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%")
 
             # Filepaths
+            # NOTE: Usually the GCHP 1-yr benchmark is run as
+            # one job, so we only need to take the 1st log file.
             ref = get_log_filepaths(
                 gchp_vs_gchp_reflogdir,
                 config["data"]["ref"]["gchp"]["logs_template"],
-                all_months_ref,
+                all_months_gchp_ref,
             )[0]
             dev = get_log_filepaths(
                 gchp_vs_gchp_devlogdir,
                 config["data"]["dev"]["gchp"]["logs_template"],
-                all_months_dev,
+                all_months_gchp_dev,
             )[0]
 
             # Create the table
@@ -2229,7 +2232,7 @@ def gchp_vs_gchp_ops_budg(mon):
                 config["data"]["ref"]["gchp"]["version"],
                 dev,
                 config["data"]["dev"]["gchp"]["version"],
-                dst=gcc_vs_gcc_tablesdir,
+                dst=gchp_vs_gchp_tablesdir,
                 overwrite=True,
             )
 
diff --git a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py
index b94055bd..bc5f58fe 100644
--- a/gcpy/benchmark/modules/run_1yr_tt_benchmark.py
+++ b/gcpy/benchmark/modules/run_1yr_tt_benchmark.py
@@ -64,7 +64,13 @@
     make_benchmark_operations_budget, make_benchmark_mass_conservation_table
 from gcpy.benchmark.modules.budget_tt import transport_tracers_budgets
 from gcpy.benchmark.modules.ste_flux import make_benchmark_ste_table
-from gcpy.benchmark.modules.benchmark_utils import print_benchmark_info
+from gcpy.benchmark.modules.benchmark_utils import \
+    gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gchp_dirs, \
+    get_log_filepaths, print_benchmark_info
+from gcpy.benchmark.modules.benchmark_scrape_gcclassic_timers import \
+    make_benchmark_gcclassic_timing_table
+from gcpy.benchmark.modules.benchmark_scrape_gchp_timers import \
+    make_benchmark_gchp_timing_table
 
 # Tell matplotlib not to look for an X-window
 os.environ["QT_QPA_PLATFORM"] = "offscreen"
@@ -93,69 +99,23 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev):
     # For gchp_vs_gcc_refdir use config["data"]["dev"]["gcc"]["version"], not ref (mps, 6/27/19)
     # ======================================================================
 
-    # Diagnostic file directory paths
-    gcc_vs_gcc_refdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gcc"]["dir"],
-        config["data"]["ref"]["gcc"]["outputs_subdir"]
-    )
-    gcc_vs_gcc_devdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["outputs_subdir"]
-    )
-    gchp_vs_gcc_refdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["outputs_subdir"]
-    )
-    gchp_vs_gcc_devdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["outputs_subdir"]
-    )
-    gchp_vs_gchp_refdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gchp"]["dir"],
-        config["data"]["ref"]["gchp"]["outputs_subdir"]
-    )
-    gchp_vs_gchp_devdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["outputs_subdir"]
-    )
+    # Diagnostics file directory paths
+    s = "outputs_subdir"
+    gcc_vs_gcc_refdir, gcc_vs_gcc_devdir = gcc_vs_gcc_dirs(config, s)
+    gchp_vs_gcc_refdir, gchp_vs_gcc_devdir = gchp_vs_gcc_dirs(config, s)
+    gchp_vs_gchp_refdir, gchp_vs_gchp_devdir = gchp_vs_gchp_dirs(config, s)
 
-    # Diagnostic file directory paths
-    gcc_vs_gcc_refrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gcc"]["dir"],
-        config["data"]["ref"]["gcc"]["restarts_subdir"]
-    )
-    gchp_vs_gcc_refrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["restarts_subdir"]
-    )
-    gchp_vs_gchp_refrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gchp"]["dir"],
-        config["data"]["ref"]["gchp"]["restarts_subdir"]
-    )
-    gcc_vs_gcc_devrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["restarts_subdir"]
-    )
-    gchp_vs_gcc_devrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["restarts_subdir"]
-    )
-    gchp_vs_gchp_devrstdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["restarts_subdir"]
-    )
+    # Restart file directory paths
+    s = "restarts_subdir"
+    gcc_vs_gcc_refrstdir, gcc_vs_gcc_devrstdir = gcc_vs_gcc_dirs(config, s)
+    gchp_vs_gcc_refrstdir, gchp_vs_gcc_devrstdir = gchp_vs_gcc_dirs(config, s)
+    gchp_vs_gchp_refrstdir, gchp_vs_gchp_devrstdir = gchp_vs_gchp_dirs(config, s)
+
+    # Log file directory paths
+    s = "logs_subdir"
+    gcc_vs_gcc_reflogdir, gcc_vs_gcc_devlogdir = gcc_vs_gcc_dirs(config, s)
+    gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s)
+    gchp_vs_gchp_reflogdir, gchp_vs_gchp_devlogdir = gchp_vs_gchp_dirs(config, s)
 
     # Directories where plots & tables will be created
     mainresultsdir = os.path.join(
@@ -611,6 +571,34 @@ def gcc_vs_gcc_mass_table(mon):
                 overwrite=True,
             )
 
+        # ==================================================================
+        # GCC vs. GCC Benchmark Timing Table
+        # ==================================================================
+        if config["options"]["outputs"]["timing_table"]:
+            print("\n%%% Creating GCC vs. GCC Benchmark Timing table %%%")
+
+            # Filepaths
+            ref = get_log_filepaths(
+                gcc_vs_gcc_reflogdir,
+                config["data"]["ref"]["gcc"]["logs_template"],
+                all_months_ref
+            )
+            dev = get_log_filepaths(
+                gcc_vs_gcc_devlogdir,
+                config["data"]["dev"]["gcc"]["logs_template"],
+                all_months_dev
+            )
+
+            # Create the table
+            make_benchmark_gcclassic_timing_table(
+                ref,
+                config["data"]["ref"]["gcc"]["version"],
+                dev,
+                config["data"]["dev"]["gcc"]["version"],
+                dst=gcc_vs_gcc_tablesdir,
+                overwrite=True,
+            )
+
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create GCHP vs GCC benchmark plots and tables
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -1242,6 +1230,36 @@ def gchp_vs_gchp_mass_table(mon):
                 dst=gchp_vs_gchp_tablesdir,
             )
 
+        # ==================================================================
+        # GCHP vs. GCHP Benchmark Timing Table
+        # ==================================================================
+        if config["options"]["outputs"]["timing_table"]:
+            print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%")
+
+            # Filepaths
+            # NOTE: Usually the GCHP 1-yr benchmark is run as
+            # one job, so we only need to take the 1st log file.
+            ref = get_log_filepaths(
+                gchp_vs_gchp_reflogdir,
+                config["data"]["ref"]["gchp"]["logs_template"],
+                all_months_gchp_ref,
+            )[0]
+            dev = get_log_filepaths(
+                gchp_vs_gchp_devlogdir,
+                config["data"]["dev"]["gchp"]["logs_template"],
+                all_months_gchp_dev,
+            )[0]
+
+            # Create the table
+            make_benchmark_gchp_timing_table(
+                ref,
+                config["data"]["ref"]["gchp"]["version"],
+                dev,
+                config["data"]["dev"]["gchp"]["version"],
+                dst=gchp_vs_gchp_tablesdir,
+                overwrite=True,
+            )
+            
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create mass conservations tables for GCC and GCHP
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
diff --git a/gcpy/benchmark/run_benchmark.py b/gcpy/benchmark/run_benchmark.py
index e474f2d7..0105d63f 100755
--- a/gcpy/benchmark/run_benchmark.py
+++ b/gcpy/benchmark/run_benchmark.py
@@ -63,9 +63,15 @@
     import run_benchmark as run_1yr_benchmark
 from gcpy.benchmark.modules.run_1yr_tt_benchmark \
     import run_benchmark as run_1yr_tt_benchmark
-from gcpy.benchmark.modules.benchmark_utils import print_benchmark_info
+from gcpy.benchmark.modules.benchmark_utils import \
+    gcc_vs_gcc_dirs, gchp_vs_gcc_dirs, gchp_vs_gchp_dirs, \
+    get_log_filepaths, print_benchmark_info
 from gcpy.benchmark.modules.benchmark_drydep \
     import drydepvel_species, make_benchmark_drydep_plots
+from gcpy.benchmark.modules.benchmark_scrape_gcclassic_timers import \
+    make_benchmark_gcclassic_timing_table
+from gcpy.benchmark.modules.benchmark_scrape_gchp_timers import \
+    make_benchmark_gchp_timing_table
 
 # Tell matplotlib not to look for an X-window
 os.environ["QT_QPA_PLATFORM"] = "offscreen"
@@ -135,69 +141,23 @@ def run_benchmark_default(config):
     # not ref (mps, 6/27/19)
     # =====================================================================
 
-    # Diagnostic file directory paths
-    gcc_vs_gcc_refdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gcc"]["dir"],
-        config["data"]["ref"]["gcc"]["outputs_subdir"],
-    )
-    gcc_vs_gcc_devdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["outputs_subdir"],
-    )
-    gchp_vs_gcc_refdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["outputs_subdir"],
-    )
-    gchp_vs_gcc_devdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["outputs_subdir"],
-    )
-    gchp_vs_gchp_refdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gchp"]["dir"],
-        config["data"]["ref"]["gchp"]["outputs_subdir"],
-    )
-    gchp_vs_gchp_devdir = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["outputs_subdir"],
-    )
+    # Diagnostics file directory paths
+    s = "outputs_subdir"
+    gcc_vs_gcc_refdir, gcc_vs_gcc_devdir = gcc_vs_gcc_dirs(config, s)
+    gchp_vs_gcc_refdir, gchp_vs_gcc_devdir = gchp_vs_gcc_dirs(config, s)
+    gchp_vs_gchp_refdir, gchp_vs_gchp_devdir = gchp_vs_gchp_dirs(config, s)
 
     # Restart file directory paths
-    gcc_vs_gcc_refrst = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gcc"]["dir"],
-        config["data"]["ref"]["gcc"]["restarts_subdir"]
-    )
-    gcc_vs_gcc_devrst = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["restarts_subdir"]
-    )
-    gchp_vs_gcc_refrst = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gcc"]["dir"],
-        config["data"]["dev"]["gcc"]["restarts_subdir"]
-    )
-    gchp_vs_gcc_devrst = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["restarts_subdir"]
-    )
-    gchp_vs_gchp_refrst = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["ref"]["gchp"]["dir"],
-        config["data"]["ref"]["gchp"]["restarts_subdir"]
-    )
-    gchp_vs_gchp_devrst = os.path.join(
-        config["paths"]["main_dir"],
-        config["data"]["dev"]["gchp"]["dir"],
-        config["data"]["dev"]["gchp"]["restarts_subdir"]
-    )
+    s = "restarts_subdir"
+    gcc_vs_gcc_refrstdir, gcc_vs_gcc_devrstdir = gcc_vs_gcc_dirs(config, s)
+    gchp_vs_gcc_refrstdir, gchp_vs_gcc_devrstdir = gchp_vs_gcc_dirs(config, s)
+    gchp_vs_gchp_refrstdir, gchp_vs_gchp_devrstdir = gchp_vs_gchp_dirs(config, s)
+
+    # Log file directory paths
+    s = "logs_subdir"
+    gcc_vs_gcc_reflogdir, gcc_vs_gcc_devlogdir = gcc_vs_gcc_dirs(config, s)
+    gchp_vs_gcc_reflogdir, gchp_vs_gcc_devlogdir = gchp_vs_gcc_dirs(config, s)
+    gchp_vs_gchp_reflogdir, gchp_vs_gchp_devlogdir = gchp_vs_gchp_dirs(config, s)
 
     # =====================================================================
     # Benchmark output directories
@@ -666,6 +626,34 @@ def run_benchmark_default(config):
                     month=gcc_dev_date.astype(datetime).month,
                 )
 
+        # ==================================================================
+        # GCC vs. GCC Benchmark Timing Table
+        # ==================================================================
+        if config["options"]["outputs"]["timing_table"]:
+            print("\n%%% Creating GCC vs. GCC Benchmark Timing table %%%")
+
+            # Filepaths
+            ref = get_log_filepaths(
+                gcc_vs_gcc_reflogdir,
+                config["data"]["ref"]["gcc"]["logs_template"],
+                gcc_ref_date,
+            )
+            dev = get_log_filepaths(
+                gcc_vs_gcc_devlogdir,
+                config["data"]["dev"]["gcc"]["logs_template"],
+                gcc_dev_date,
+            )
+
+            # Create the table
+            make_benchmark_gcclassic_timing_table(
+                ref,
+                config["data"]["ref"]["gcc"]["version"],
+                dev,
+                config["data"]["dev"]["gcc"]["version"],
+                dst=gcc_vs_gcc_tablesdir,
+                overwrite=True,
+            )
+
         # ==================================================================
         # GCC vs. GCC summary table
         # ==================================================================
@@ -1094,6 +1082,33 @@ def run_benchmark_default(config):
             title = "\n%%% Skipping GCHP vs. GCC Strat-Trop Exchange table %%%"
             print(title)
 
+        # ==================================================================
+        # GCHP vs. GCHP Benchmark Timing Table
+        # ==================================================================
+        if config["options"]["outputs"]["timing_table"]:
+            print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%")
+
+            # Filepaths
+            ref = get_log_filepaths(
+                gchp_vs_gchp_reflogdir,
+                config["data"]["ref"]["gchp"]["logs_template"],
+                gchp_ref_date,
+            )
+            dev = get_log_filepaths(
+                gchp_vs_gchp_devlogdir,
+                config["data"]["dev"]["gchp"]["logs_template"],
+                gchp_dev_date,
+            )
+
+            # Create the table
+            make_benchmark_gchp_timing_table(
+                ref,
+                config["data"]["ref"]["gchp"]["version"],
+                dev,
+                config["data"]["dev"]["gchp"]["version"],
+                dst=gchp_vs_gchp_tablesdir,
+                overwrite=True,
+            )
 
         # ==================================================================
         # GCHP vs. GCC summary table

From e8e13c08f702bc953a37572340195be194473df1 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 10 May 2024 14:58:52 -0400
Subject: [PATCH 38/43] Add sevderal fixes for benchmark timing tables

gcpy/benchmark/cloud/template.1hr_benchmark.yml
gcpy/benchmark/cloud/template.1mo_benchmark.yml
- Change logs_subdir YAML tag to ".", since the log file is placed
  in the run directory but not in the OutputDir subdirectory

gcpy/benchmark/modules/benchmark_utils.py
- Place the "timestamps" argument in a list if there is only one
  timestamp before trying to iterate over it with a for loop.

gcpy/benchmark/run_benchmark.py
- We had mistakely placed the GCHP timing table in the GCHP vs. GCC
  section.  Move this to the GCHP vs. GCHP section.
---
 .../cloud/template.1hr_benchmark.yml          |  8 +--
 .../cloud/template.1mo_benchmark.yml          |  8 +--
 gcpy/benchmark/modules/benchmark_utils.py     |  5 ++
 gcpy/benchmark/run_benchmark.py               | 56 +++++++++----------
 4 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/gcpy/benchmark/cloud/template.1hr_benchmark.yml b/gcpy/benchmark/cloud/template.1hr_benchmark.yml
index 3bb1a676..8daf5bad 100644
--- a/gcpy/benchmark/cloud/template.1hr_benchmark.yml
+++ b/gcpy/benchmark/cloud/template.1hr_benchmark.yml
@@ -47,7 +47,7 @@ data:
       dir: ref-gcc/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
-      logs_subdir: OutputDir
+      logs_subdir: .
       logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-07-01T01:00:00"
@@ -56,7 +56,7 @@ data:
       dir: ref-gchp/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
-      logs_subdir: OutputDir
+      logs_subdir: .
       logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-07-01T01:00:00"
@@ -68,7 +68,7 @@ data:
       dir: dev-gcc/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
-      logs_subdir: OutputDir
+      logs_subdir: .
       logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-07-01T01:00:00"
@@ -77,7 +77,7 @@ data:
       dir: dev-gchp/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
-      logs_subdir: OutputDir
+      logs_subdir: .
       logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-07-01T01:00:00"
diff --git a/gcpy/benchmark/cloud/template.1mo_benchmark.yml b/gcpy/benchmark/cloud/template.1mo_benchmark.yml
index ff431932..b0098fad 100644
--- a/gcpy/benchmark/cloud/template.1mo_benchmark.yml
+++ b/gcpy/benchmark/cloud/template.1mo_benchmark.yml
@@ -47,7 +47,7 @@ data:
       dir: ref-gcc/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
-      logs_subdir: OutputDir
+      logs_subdir: .
       logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
@@ -56,7 +56,7 @@ data:
       dir: ref-gchp/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
-      logs_subdir: OutputDir
+      logs_subdir: .
       logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
@@ -68,7 +68,7 @@ data:
       dir: dev-gcc/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
-      logs_subdir: OutputDir
+      logs_subdir: .
       logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
@@ -77,7 +77,7 @@ data:
       dir: dev-gchp/run-directory
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
-      logs_subdir: OutputDir
+      logs_subdir: .
       logs_template: runlog.txt
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
diff --git a/gcpy/benchmark/modules/benchmark_utils.py b/gcpy/benchmark/modules/benchmark_utils.py
index 912714a9..e4e49c5d 100644
--- a/gcpy/benchmark/modules/benchmark_utils.py
+++ b/gcpy/benchmark/modules/benchmark_utils.py
@@ -636,6 +636,11 @@ def get_log_filepaths(
         if fmt in template:
             format_str += fmt
 
+    # If there is only one timestamp, add it to a list
+    # so that the for loop below will work properly.
+    if timestamps.size == 1:
+        timestamps = [timestamps]
+
     # Create each output logfile name, replacing template with date
     for timestamp in timestamps:
         time = timestamp.item().strftime(format_str)
diff --git a/gcpy/benchmark/run_benchmark.py b/gcpy/benchmark/run_benchmark.py
index 0105d63f..0482c3fd 100755
--- a/gcpy/benchmark/run_benchmark.py
+++ b/gcpy/benchmark/run_benchmark.py
@@ -1082,34 +1082,6 @@ def run_benchmark_default(config):
             title = "\n%%% Skipping GCHP vs. GCC Strat-Trop Exchange table %%%"
             print(title)
 
-        # ==================================================================
-        # GCHP vs. GCHP Benchmark Timing Table
-        # ==================================================================
-        if config["options"]["outputs"]["timing_table"]:
-            print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%")
-
-            # Filepaths
-            ref = get_log_filepaths(
-                gchp_vs_gchp_reflogdir,
-                config["data"]["ref"]["gchp"]["logs_template"],
-                gchp_ref_date,
-            )
-            dev = get_log_filepaths(
-                gchp_vs_gchp_devlogdir,
-                config["data"]["dev"]["gchp"]["logs_template"],
-                gchp_dev_date,
-            )
-
-            # Create the table
-            make_benchmark_gchp_timing_table(
-                ref,
-                config["data"]["ref"]["gchp"]["version"],
-                dev,
-                config["data"]["dev"]["gchp"]["version"],
-                dst=gchp_vs_gchp_tablesdir,
-                overwrite=True,
-            )
-
         # ==================================================================
         # GCHP vs. GCC summary table
         # ==================================================================
@@ -1589,6 +1561,34 @@ def run_benchmark_default(config):
         if config["options"]["outputs"]["ste_table"]:
             print("\n%%% Skipping GCHP vs. GCHP Strat-Trop Exchange table %%%")
 
+        # ==================================================================
+        # GCHP vs. GCHP Benchmark Timing Table
+        # ==================================================================
+        if config["options"]["outputs"]["timing_table"]:
+            print("\n%%% Creating GCHP vs. GCHP Benchmark Timing table %%%")
+
+            # Filepaths
+            ref = get_log_filepaths(
+                gchp_vs_gchp_reflogdir,
+                config["data"]["ref"]["gchp"]["logs_template"],
+                gchp_ref_date,
+            )
+            dev = get_log_filepaths(
+                gchp_vs_gchp_devlogdir,
+                config["data"]["dev"]["gchp"]["logs_template"],
+                gchp_dev_date,
+            )
+
+            # Create the table
+            make_benchmark_gchp_timing_table(
+                ref,
+                config["data"]["ref"]["gchp"]["version"],
+                dev,
+                config["data"]["dev"]["gchp"]["version"],
+                dst=gchp_vs_gchp_tablesdir,
+                overwrite=True,
+            )
+            
         # ==================================================================
         # GCHP vs. GCHP summary table
         # ==================================================================

From d10b09ada44b3112682aa02b9aacba0e4031e866 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Fri, 10 May 2024 16:02:44 -0400
Subject: [PATCH 39/43] More minor fixes for benchmark timing scripts

gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
- Set timers that did not run to np.nan before trying to parse
  timing information

gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
- Fixed typo: "GCHP Classic" -> "GCHP"

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 .../modules/benchmark_scrape_gcclassic_timers.py         | 5 ++++-
 gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py   | 9 ++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
index 7560076c..a12f088e 100644
--- a/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
+++ b/gcpy/benchmark/modules/benchmark_scrape_gcclassic_timers.py
@@ -121,7 +121,10 @@ def read_one_text_file(text_file):
             if keep_line:
                 substr = line.split(":")
                 key = substr[0].strip()
-                val = substr[3].split()[1].strip()
+                if "THE TIMER DID NOT RUN" in line:
+                    val = np.nan
+                else:
+                    val = substr[3].split()[1].strip()
                 timers[key] = {"seconds": val}
 
     return timers
diff --git a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
index 01368432..9fd74dd9 100644
--- a/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
+++ b/gcpy/benchmark/modules/benchmark_scrape_gchp_timers.py
@@ -262,7 +262,7 @@ def print_timer(key, ref, dev, ofile):
 
 def display_timers(ref, ref_label, dev, dev_label, table_file):
     """
-    Prints the GEOS-Che timer information to a table.
+    Prints the GCHP timer information to a table.
 
     Args
     ref        : dict : Timing information from the "Ref" model
@@ -275,7 +275,7 @@ def display_timers(ref, ref_label, dev, dev_label, table_file):
 
         # Print header
         print("%"*79, file=ofile)
-        print("%%% GCHP Classic Benchmark Timing Information", file=ofile)
+        print("%%% GCHP Benchmark Timing Information", file=ofile)
         print("%%%", file=ofile)
         print(f"%%% Ref = {ref_label}", file=ofile)
         print(f"%%% Dev = {dev_label}", file=ofile)
@@ -307,9 +307,8 @@ def make_benchmark_gchp_timing_table(
         overwrite=False,
 ):
     """
-    Creates a table of timing information for GEOS-Chem Classic
-    benchmark simulations given one or more JSON and/or text files
-    as input.
+    Creates a table of timing information for GCHP benchmark
+    simulations given one or more text files as input.
 
     Args
     ref_files : str|list : File(s) with timing info from the "Ref" model

From e0bd4a7a6bbf3089754d189749961f77358a8310 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 13 May 2024 10:03:16 -0400
Subject: [PATCH 40/43] Now use "GC.log" as the GCClassic file name for
 1mo_benchmark.yml

gcpy/benchmark/config/1mo_benchmark.yml
- Renamed "log.%Y%m%d" to "GC.log", which is the same name used in
  the geoschem.benchmark.run for GEOS-Chem Classic benchmarks.
- NOTE: This YAML file is only needed when we run 1-month benchmarks
  manually.  The automatic cloud benchmarks use the template
  files in gcpy/benchmark/cloud.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark/config/1mo_benchmark.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcpy/benchmark/config/1mo_benchmark.yml b/gcpy/benchmark/config/1mo_benchmark.yml
index 173ed8eb..1c86aa5f 100644
--- a/gcpy/benchmark/config/1mo_benchmark.yml
+++ b/gcpy/benchmark/config/1mo_benchmark.yml
@@ -48,7 +48,7 @@ data:
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
       logs_subdir: .
-      logs_template: "log.%Y%m%d"
+      logs_template: "GC.log"
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
     gchp:
@@ -69,7 +69,7 @@ data:
       outputs_subdir: OutputDir
       restarts_subdir: Restarts
       logs_subdir: .
-      logs_template: "log.%Y%m%d"      
+      logs_template: "GC.log"      
       bmk_start: "2019-07-01T00:00:00"
       bmk_end: "2019-08-01T00:00:00"
     gchp:

From 91caa2be062c28fe66450e56d75b91528a5d9703 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 13 May 2024 14:04:05 -0400
Subject: [PATCH 41/43] Hotfix: Replace DELPDRY to Met_DELPDRY

gcpy/util.py
- In function rename_and_flip_gchp_rst_vars, we have restored the
  if block to rename DELPDRY to Met_DELPDRY.  This is because GCHP
  restart files in 14.3.0 and prior have the dry delta-pressure field
  named as DELPDRY.  But it has been correctly named as DELP_DRY in
  GCHP 14.4.0 and later.

This HotFix is necessary in order to avoid a ValueError when comparing
comparing GCHP versions 14.3.0 or prior.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/util.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcpy/util.py b/gcpy/util.py
index e50d4874..9344e47a 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -788,6 +788,8 @@ def rename_and_flip_gchp_rst_vars(
             old_to_new[var] = 'SpeciesRst_' + spc
         if var == "DELP_DRY":
             old_to_new["DELP_DRY"] = "Met_DELPDRY"
+        if var == "DELPDRY":
+            old_to_new["DELPDRY"] = "Met_DELPDRY"
         if var == "BXHEIGHT":
             old_to_new["BXHEIGHT"] = "Met_BXHEIGHT"
         if var == "TropLev":

From 18f03d54decd51d2088d09aaef8238a6c774541d Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 13 May 2024 17:33:19 -0400
Subject: [PATCH 42/43] HotFix: Use refrstdir and devrstdir in run_benchmark.py

gcpy/benchmark/run_benchmark.py
- We now use e.g. gcc_vs_gcc_refrstdir, gcc_vs_gcc_devrstdir, etc.,
  for the variables that store restart file directory paths.
  This is the usage in the 1-year benchmark scripts.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark/run_benchmark.py | 36 ++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/gcpy/benchmark/run_benchmark.py b/gcpy/benchmark/run_benchmark.py
index 0482c3fd..710910ea 100755
--- a/gcpy/benchmark/run_benchmark.py
+++ b/gcpy/benchmark/run_benchmark.py
@@ -510,8 +510,8 @@ def run_benchmark_default(config):
             print("\n%%% Creating GCC vs. GCC global mass tables %%%")
 
             # Filepaths
-            ref = get_filepath(gcc_vs_gcc_refrst, "Restart", gcc_end_ref_date)
-            dev = get_filepath(gcc_vs_gcc_devrst, "Restart", gcc_end_dev_date)
+            ref = get_filepath(gcc_vs_gcc_refrstdir, "Restart", gcc_end_ref_date)
+            dev = get_filepath(gcc_vs_gcc_devrstdir, "Restart", gcc_end_dev_date)
 
             # Create tables
             make_benchmark_mass_tables(
@@ -531,10 +531,10 @@ def run_benchmark_default(config):
             print("\n%%% Creating GCC vs. GCC mass accumulation tables %%%")
 
             # Filepaths for start and end restart files
-            refs = get_filepath(gcc_vs_gcc_refrst, "Restart", gcc_ref_date)
-            devs = get_filepath(gcc_vs_gcc_devrst, "Restart", gcc_dev_date)
-            refe = get_filepath(gcc_vs_gcc_refrst, "Restart", gcc_end_ref_date)
-            deve = get_filepath(gcc_vs_gcc_devrst, "Restart", gcc_end_dev_date)
+            refs = get_filepath(gcc_vs_gcc_refrstdir, "Restart", gcc_ref_date)
+            devs = get_filepath(gcc_vs_gcc_devrstdir, "Restart", gcc_dev_date)
+            refe = get_filepath(gcc_vs_gcc_refrstdir, "Restart", gcc_end_ref_date)
+            deve = get_filepath(gcc_vs_gcc_devrstdir, "Restart", gcc_end_dev_date)
 
             # Get period strings
             refs_str = np.datetime_as_string(gcc_ref_date, unit="s")
@@ -932,12 +932,12 @@ def run_benchmark_default(config):
 
             # Filepaths
             ref = get_filepath(
-                gchp_vs_gcc_refrst,
+                gchp_vs_gcc_refrstdir,
                 "Restart",
                 gcc_end_dev_date
             )
             dev = get_filepath(
-                gchp_vs_gcc_devrst,
+                gchp_vs_gcc_devrstdir,
                 "Restart",
                 gchp_end_dev_date,
                 is_gchp=True,
@@ -964,12 +964,12 @@ def run_benchmark_default(config):
 
             # Filepaths for start and end restart files
             refs = get_filepath(
-                gchp_vs_gcc_refrst,
+                gchp_vs_gcc_refrstdir,
                 "Restart",
                 gcc_dev_date
             )
             devs = get_filepath(
-                gchp_vs_gcc_devrst,
+                gchp_vs_gcc_devrstdir,
                 "Restart",
                 gchp_dev_date,
                 is_gchp=True,
@@ -977,12 +977,12 @@ def run_benchmark_default(config):
                 gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"]
             )
             refe = get_filepath(
-                gchp_vs_gcc_refrst,
+                gchp_vs_gcc_refrstdir,
                 "Restart",
                 gcc_end_dev_date
             )
             deve = get_filepath(
-                gchp_vs_gcc_devrst,
+                gchp_vs_gcc_devrstdir,
                 "Restart",
                 gchp_end_dev_date,
                 is_gchp=True,
@@ -1393,7 +1393,7 @@ def run_benchmark_default(config):
 
             # Filepaths
             ref = get_filepath(
-                gchp_vs_gchp_refrst,
+                gchp_vs_gchp_refrstdir,
                 "Restart",
                 gchp_end_ref_date,
                 is_gchp=True,
@@ -1401,7 +1401,7 @@ def run_benchmark_default(config):
                 gchp_is_pre_14_0=config["data"]["ref"]["gchp"]["is_pre_14.0"]
             )
             dev = get_filepath(
-                gchp_vs_gchp_devrst,
+                gchp_vs_gchp_devrstdir,
                 "Restart",
                 gchp_end_dev_date,
                 is_gchp=True,
@@ -1428,7 +1428,7 @@ def run_benchmark_default(config):
 
             # Filepaths for start and end restart files
             refs = get_filepath(
-                gchp_vs_gchp_refrst,
+                gchp_vs_gchp_refrstdir,
                 "Restart",
                 gchp_ref_date,
                 is_gchp=True,
@@ -1436,7 +1436,7 @@ def run_benchmark_default(config):
                 gchp_is_pre_14_0=config["data"]["ref"]["gchp"]["is_pre_14.0"]
             )
             devs = get_filepath(
-                gchp_vs_gchp_devrst,
+                gchp_vs_gchp_devrstdir,
                 "Restart",
                 gchp_dev_date,
                 is_gchp=True,
@@ -1444,7 +1444,7 @@ def run_benchmark_default(config):
                 gchp_is_pre_14_0=config["data"]["dev"]["gchp"]["is_pre_14.0"]
             )
             refe = get_filepath(
-                gchp_vs_gchp_refrst,
+                gchp_vs_gchp_refrstdir,
                 "Restart",
                 gchp_end_ref_date,
                 is_gchp=True,
@@ -1452,7 +1452,7 @@ def run_benchmark_default(config):
                 gchp_is_pre_14_0=config["data"]["ref"]["gchp"]["is_pre_14.0"]
             )
             deve = get_filepath(
-                gchp_vs_gchp_devrst,
+                gchp_vs_gchp_devrstdir,
                 "Restart",
                 gchp_end_dev_date,
                 is_gchp=True,

From fb7116d0dbd6ff6f28d5bdf322217f2383cdd98a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 May 2024 07:27:38 +0000
Subject: [PATCH 43/43] --- updated-dependencies: - dependency-name: requests  
 dependency-type: direct:production ...

Signed-off-by: dependabot[bot] <support@github.com>
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 2fae9011..f390410d 100644
--- a/setup.py
+++ b/setup.py
@@ -109,7 +109,7 @@ def _write_version_file():
         "python==3.9.18",
         "pypdf==3.16.1",
         "recommonmark==0.7.1",
-        "requests==2.31.0",
+        "requests==2.32.0",
         "scipy==1.11.2",
         "sparselt==0.1.3",
         "tabulate==0.9.0",