ESCOMP · samsrabin · Feb 5, 2026 · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -76,3 +76,4 @@ cdf40d265cc82775607a1bf25f5f527bacc97405
 ac03492012837799b7111607188acff9f739044a
 d858665d799690d73b56bcb961684382551193f4
 c0c6da391ee359f2765439426f3a2a4593a95343
+598de2f05638286b3d99ac0ed120977cbc554c3d
diff --git a/cime_config/testdefs/ExpectedTestFails.xml b/cime_config/testdefs/ExpectedTestFails.xml
@@ -36,6 +36,27 @@
     </phase>
   </test>
 
+  <test name="RXCROPMATURITY_Lm61.f10_f10_mg37.IHistClm60BgcCrop.derecho_intel.clm-cropMonthOutput">
+    <phase name="RUN">
+      <status>FAIL</status>
+      <issue>#3740</issue>
+    </phase>
+  </test>
+
+  <test name="RXCROPMATURITY_Lm61.f09_t232.IHistClm60BgcCrop.derecho_intel.clm-cropMonthOutput">
+    <phase name="RUN">
+      <status>FAIL</status>
+      <issue>#3740</issue>
+    </phase>
+  </test>
+
+  <test name="RXCROPMATURITY_Lm61.f09_g17.IHistClm60BgcCrop.derecho_intel.clm-cropMonthOutput">
+    <phase name="RUN">
+      <status>FAIL</status>
+      <issue>#3740</issue>
+    </phase>
+  </test>
+
   <test name="SMS_Ld5.f09_g17.IHistClm50Sp.derecho_intel.clm-nofire">
     <phase name="SHAREDLIB_BUILD">
       <status>FAIL</status>

diff --git a/python/ctsm/crop_calendars/generate_gdds.py b/python/ctsm/crop_calendars/generate_gdds.py
@@ -21,6 +21,9 @@
 from ctsm.ctsm_logging import log, error  # pylint: disable=wrong-import-position
 import ctsm.crop_calendars.cropcal_module as cc  # pylint: disable=wrong-import-position
 import ctsm.crop_calendars.generate_gdds_functions as gddfn  # pylint: disable=wrong-import-position
+from ctsm.crop_calendars.import_ds import (  # pylint: disable=wrong-import-position
+    get_files_in_time_slice,  # pylint: disable=wrong-import-position
+)  # pylint: disable=wrong-import-position
 
 # Functions here were written with too many positional arguments. At some point that should be
 # fixed. For now, we'll just disable the warning.
@@ -42,6 +45,72 @@ def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfil
     return mxmats
 
 
+def _get_history_yr_range(first_season, last_season):
+    """
+    Get a range object that can be used for looping over all years we need to process timestamps
+    from.
+    """
+    # Saving at the end of a year receive the timestamp of the END of the year's final timestep,
+    # which means it will actually be 00:00 of Jan. 1 of the next year.
+    first_history_yr = first_season + 1
+
+    # Same deal for the last history timestep, but we have to read an extra year in that case,
+    # because in some places the last growing season won't complete until the year after it was
+    # planted.
+    last_history_yr = last_season + 2
+
+    # last_history_yr + 1 because range() will iterate up to but not including the second value.
+    history_yr_range = range(first_history_yr, last_history_yr + 1)
+
+    return history_yr_range
+
+
+def _get_time_slice_list(first_season, last_season):
+    """
+    Given the requested first and last seasons, get the list of time slices that the script should
+    look for. The assumptions here, as in import_and_process_1yr and as instructed in the docs, are
+    that the user (a) is saving instantaneous annual files and (b) started on Jan. 1.
+    """
+
+    # Input checks
+    if not all(isinstance(i, int) for i in [first_season, last_season]):
+        raise TypeError("_get_time_slice_list() arguments must be integers")
+    if first_season > last_season:
+        raise ValueError(f"first_season ({first_season}) > last_season ({last_season})")
+
+    slice_list = []
+    for history_yr in _get_history_yr_range(first_season, last_season):
+        slice_start = f"{history_yr}-01-01"
+        # Stop could probably be the same as start, since there should just be one value saved per
+        # year and that should get the Jan. 1 timestamp.
+        slice_stop = f"{history_yr}-12-31"
+        slice_list.append(slice(slice_start, slice_stop))
+
+    # We should be reading one more than the total number of years in [first_season, last_season].
+    assert len(slice_list) == last_season - first_season + 2
+
+    return slice_list
+
+
+def _get_file_lists(input_dir, time_slice_list, logger):
+    """
+    For each time slice in a list, find the file(s) that need to be read to get all history
+    timesteps in the slice. Returns both h1i and h2i file lists.
+    """
+    output_file_lists_list = [None, None]
+    for i, h in enumerate([1, 2]):
+        all_h_files = gddfn.find_inst_hist_files(input_dir, h=h, logger=logger)
+        h_file_lists = []
+        for time_slice in time_slice_list:
+            try:
+                h_file_lists.append(get_files_in_time_slice(all_h_files, time_slice, logger=logger))
+            except FileNotFoundError as e:
+                raise FileNotFoundError(f"No h{h} timesteps found in {time_slice}") from e
+        output_file_lists_list[i] = h_file_lists
+    h1_file_lists, h2_file_lists = tuple(output_file_lists_list)
+    return h1_file_lists, h2_file_lists
+
+
 def main(
     *,
     input_dir=None,
@@ -126,6 +195,9 @@ def main(
             + "(years are +1 because of CTSM output naming)",
         )
 
+        # This script uses pickle to save work in progress. In case of interruption, when the script
+        # is resumed, it will look for a pickle file. It will resume from the year after
+        # pickle_year, which is the last processed year in the pickle file.
         pickle_file = os.path.join(output_dir, f"{first_season}-{last_season}.pickle")
         h2_ds_file = os.path.join(output_dir, f"{first_season}-{last_season}.h2_ds.nc")
         if os.path.exists(pickle_file) and not no_pickle:
@@ -162,10 +234,20 @@ def main(
             max_season_length_from_hdates_file, paramfile, max_season_length_cushion
         )
 
-        h1_instantaneous = None
-        for yr_index, this_yr in enumerate(np.arange(first_season + 1, last_season + 3)):
+        # Get lists of history timesteps and files to read
+        time_slice_list = _get_time_slice_list(first_season, last_season)
+        h1_file_lists, h2_file_lists = _get_file_lists(input_dir, time_slice_list, logger)
+
+        for yr_index, this_yr in enumerate(_get_history_yr_range(first_season, last_season)):
+            # If resuming from a pickled file, we continue until we reach a year that hasn't yet
+            # been processed.
             if this_yr <= pickle_year:
                 continue
+            log(logger, f"netCDF year {this_yr}...")
+
+            # Get h1 and h2 files to read for this year
+            h1_file_list = h1_file_lists[yr_index]  # pylint: disable=unsubscriptable-object
+            h2_file_list = h2_file_lists[yr_index]  # pylint: disable=unsubscriptable-object
 
             (
                 h2_ds,
@@ -179,28 +261,26 @@ def main(
                 incl_vegtypes_str,
                 incl_patches1d_itype_veg,
                 mxsowings,
-                h1_instantaneous,
             ) = gddfn.import_and_process_1yr(
                 first_season,
                 last_season,
                 yr_index,
-                this_yr,
                 sdates_rx,
                 hdates_rx,
                 gddaccum_yp_list,
                 gddharv_yp_list,
                 skip_patches_for_isel_nan_lastyear,
                 lastyear_active_patch_indices_list,
                 incorrectly_daily,
-                input_dir,
                 incl_vegtypes_str,
                 h2_ds_file,
                 mxmats,
                 cc.get_gs_len_da,
                 skip_crops,
                 outdir_figs,
                 logger,
-                h1_instantaneous,
+                h1_file_list,
+                h2_file_list,
             )
 
             log(logger, f"   Saving pickle file ({pickle_file})...")

diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py
@@ -10,7 +10,6 @@
 import numpy as np
 import xarray as xr
 
-from ctsm.utils import is_instantaneous
 from ctsm.ctsm_logging import log, error
 import ctsm.crop_calendars.cropcal_utils as utils
 import ctsm.crop_calendars.cropcal_module as cc
@@ -290,64 +289,45 @@ def import_and_process_1yr(
     year_1,
     year_n,
     year_index,
-    this_year,
     sdates_rx,
     hdates_rx,
     gddaccum_yp_list,
     gddharv_yp_list,
     skip_patches_for_isel_nan_last_year,
     last_year_active_patch_indices_list,
     incorrectly_daily,
-    indir,
     incl_vegtypes_str_in,
     h2_ds_file,
     mxmats,
     get_gs_len_da,
     skip_crops,
     outdir_figs,
     logger,
-    h1_instantaneous,
+    h1_filelist,
+    h2_filelist,
 ):
     """
     Import one year of CLM output data for GDD generation
     """
     save_figs = True
-    log(logger, f"netCDF year {this_year}...")
 
     # Without dask, this can take a LONG time at resolutions finer than 2-deg
     if importlib_util.find_spec("dask"):
         chunks = {"time": 1}
     else:
         chunks = None
 
-    # Get h1 file (list)
-    h1_pattern = os.path.join(indir, "*h1i.*.nc")
-    h1_filelist = glob.glob(h1_pattern)
-    if not h1_filelist:
-        h1_pattern = os.path.join(indir, "*h1i.*.nc.base")
-        h1_filelist = glob.glob(h1_pattern)
-        if not h1_filelist:
-            error(logger, "No files found matching pattern '*h1i.*.nc(.base)'")
-
     # Get list of crops to include
     if skip_crops is not None:
         crops_to_read = [c for c in utils.define_mgdcrop_list_withgrasses() if c not in skip_crops]
     else:
         crops_to_read = utils.define_mgdcrop_list_withgrasses()
 
-    # Are h1 files instantaneous?
-    if h1_instantaneous is None:
-        h1_instantaneous = is_instantaneous(xr.open_dataset(h1_filelist[0])["time"])
-
-    if h1_instantaneous:
-        slice_year = this_year
-    else:
-        slice_year = this_year - 1
+    # Read h1 file(s)
     dates_ds = import_ds(
         h1_filelist,
         my_vars=["SDATES", "HDATES"],
         my_vegtypes=crops_to_read,
-        time_slice=slice(f"{slice_year}-01-01", f"{slice_year}-12-31"),
         chunks=chunks,
         logger=logger,
     )
@@ -631,16 +611,8 @@ def import_and_process_1yr(
     log(logger, "   Importing accumulated GDDs...")
     clm_gdd_var = "GDDACCUM"
     my_vars = [clm_gdd_var, "GDDHARV"]
-    patterns = [f"*h2i.{this_year-1}-01*.nc", f"*h2i.{this_year-1}-01*.nc.base"]
-    for pat in patterns:
-        pattern = os.path.join(indir, pat)
-        h2_files = glob.glob(pattern)
-        if h2_files:
-            break
-    if not h2_files:
-        error(logger, f"No files found matching patterns: {patterns}")
     h2_ds = import_ds(
-        h2_files,
+        h2_filelist,
         my_vars=my_vars,
         my_vegtypes=crops_to_read,
         chunks=chunks,
@@ -892,10 +864,80 @@ def import_and_process_1yr(
         incl_vegtypes_str,
         incl_patches1d_itype_veg,
         mxsowings,
-        h1_instantaneous,
     )
 
 
+def find_inst_hist_files(indir, *, h, this_year=None, logger=None):
+    """
+    Find all the instantaneous history files for a given tape number, optionally looking just for
+    one year in filename.
+
+    Args:
+        indir: Directory to search for history files
+        h: History tape number (must be an integer, e.g., 1 for h1, 2 for h2)
+        this_year: Optional year to filter files by. If provided, only files with dates starting
+                   with "{this_year}-01" will be returned. If None, all files matching the
+                   history tape number will be returned.
+        logger: Optional logger for error messages. If None, errors are raised without logging.
+
+    Returns:
+        List of file paths matching the search criteria
+
+    Raises:
+        TypeError: If h is not an integer
+        FileNotFoundError: If no files matching the patterns are found
+        RuntimeError: If files from multiple case names are found (indicates mixed output from
+                     different simulations, which is pathological)
+
+    Notes:
+        - Searches for files matching patterns: "*h{h}i.*.nc" or "*h{h}i.*.nc.base"
+        - When this_year is specified, searches for: "*h{h}i.{this_year}-01*.nc" or
+          "*h{h}i.{this_year}-01*.nc.base"
+        - Prefers .nc files over .nc.base files (searches .nc pattern first)
+        - All returned files must be from the same case name (extracted from filename before
+          ".clm2.h#i.")
+    """
+    if this_year is None:
+        patterns = [f"*h{h}i.*.nc", f"*h{h}i.*.nc.base"]
+    else:
+        if not isinstance(h, int):
+            err_msg = f"h ({h}) must be an integer, not {type(h)}"
+            err_type = TypeError
+            if logger:
+                error(logger, err_msg, error_type=err_type)
+            raise err_type(err_msg)
+        patterns = [f"*h{h}i.{this_year}-01*.nc", f"*h{h}i.{this_year}-01*.nc.base"]
+    for pat in patterns:
+        pattern = os.path.join(indir, pat)
+        file_list = glob.glob(pattern)
+        if file_list:
+            break
+    if not file_list:
+        err_msg = f"No files found matching patterns: {patterns}"
+        err_type = FileNotFoundError
+        if logger:
+            error(logger, err_msg, error_type=err_type)
+        raise err_type(err_msg)
+
+    # Error if files found from multiple cases
+    case_names = set()
+    for file in file_list:
+        basename = os.path.basename(file)
+        # Extract case name (everything before .clm2.h#i.)
+        parts = basename.split(".clm2.")
+        if len(parts) > 1:
+            case_name = parts[0]
+            case_names.add(case_name)
+    if len(case_names) > 1:
+        err_msg = f"Found files from multiple case names: {sorted(case_names)}"
+        err_type = RuntimeError
+        if logger:
+            error(logger, err_msg, error_type=err_type)
+        raise err_type(err_msg)
+
+    return file_list
+
+
 def get_multicrop_maps(this_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units):
     # pylint: disable=missing-function-docstring
     # Get GDDs for these crops