diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 0cec06fb3f..bb425bb602 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -76,3 +76,4 @@ cdf40d265cc82775607a1bf25f5f527bacc97405
ac03492012837799b7111607188acff9f739044a
d858665d799690d73b56bcb961684382551193f4
c0c6da391ee359f2765439426f3a2a4593a95343
+598de2f05638286b3d99ac0ed120977cbc554c3d
diff --git a/cime_config/testdefs/ExpectedTestFails.xml b/cime_config/testdefs/ExpectedTestFails.xml
index d19bcf05a6..b00f3e720e 100644
--- a/cime_config/testdefs/ExpectedTestFails.xml
+++ b/cime_config/testdefs/ExpectedTestFails.xml
@@ -36,6 +36,27 @@
+
+
+ FAIL
+ #3740
+
+
+
+
+
+ FAIL
+ #3740
+
+
+
+
+
+ FAIL
+ #3740
+
+
+
FAIL
diff --git a/python/ctsm/crop_calendars/generate_gdds.py b/python/ctsm/crop_calendars/generate_gdds.py
index 7af82f9fa1..196d7a96da 100644
--- a/python/ctsm/crop_calendars/generate_gdds.py
+++ b/python/ctsm/crop_calendars/generate_gdds.py
@@ -21,6 +21,9 @@
from ctsm.ctsm_logging import log, error # pylint: disable=wrong-import-position
import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position
import ctsm.crop_calendars.generate_gdds_functions as gddfn # pylint: disable=wrong-import-position
+from ctsm.crop_calendars.import_ds import ( # pylint: disable=wrong-import-position
+ get_files_in_time_slice, # pylint: disable=wrong-import-position
+) # pylint: disable=wrong-import-position
# Functions here were written with too many positional arguments. At some point that should be
# fixed. For now, we'll just disable the warning.
@@ -42,6 +45,72 @@ def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfil
return mxmats
+def _get_history_yr_range(first_season, last_season):
+ """
+ Get a range object that can be used for looping over all years we need to process timestamps
+ from.
+ """
+ # Saving at the end of a year receive the timestamp of the END of the year's final timestep,
+ # which means it will actually be 00:00 of Jan. 1 of the next year.
+ first_history_yr = first_season + 1
+
+ # Same deal for the last history timestep, but we have to read an extra year in that case,
+ # because in some places the last growing season won't complete until the year after it was
+ # planted.
+ last_history_yr = last_season + 2
+
+ # last_history_yr + 1 because range() will iterate up to but not including the second value.
+ history_yr_range = range(first_history_yr, last_history_yr + 1)
+
+ return history_yr_range
+
+
+def _get_time_slice_list(first_season, last_season):
+ """
+ Given the requested first and last seasons, get the list of time slices that the script should
+ look for. The assumptions here, as in import_and_process_1yr and as instructed in the docs, are
+ that the user (a) is saving instantaneous annual files and (b) started on Jan. 1.
+ """
+
+ # Input checks
+ if not all(isinstance(i, int) for i in [first_season, last_season]):
+ raise TypeError("_get_time_slice_list() arguments must be integers")
+ if first_season > last_season:
+ raise ValueError(f"first_season ({first_season}) > last_season ({last_season})")
+
+ slice_list = []
+ for history_yr in _get_history_yr_range(first_season, last_season):
+ slice_start = f"{history_yr}-01-01"
+ # Stop could probably be the same as start, since there should just be one value saved per
+ # year and that should get the Jan. 1 timestamp.
+ slice_stop = f"{history_yr}-12-31"
+ slice_list.append(slice(slice_start, slice_stop))
+
+ # We should be reading one more than the total number of years in [first_season, last_season].
+ assert len(slice_list) == last_season - first_season + 2
+
+ return slice_list
+
+
+def _get_file_lists(input_dir, time_slice_list, logger):
+ """
+ For each time slice in a list, find the file(s) that need to be read to get all history
+ timesteps in the slice. Returns both h1i and h2i file lists.
+ """
+ output_file_lists_list = [None, None]
+ for i, h in enumerate([1, 2]):
+ all_h_files = gddfn.find_inst_hist_files(input_dir, h=h, logger=logger)
+ h_file_lists = []
+ for time_slice in time_slice_list:
+ try:
+ h_file_lists.append(get_files_in_time_slice(all_h_files, time_slice, logger=logger))
+ except FileNotFoundError as e:
+ raise FileNotFoundError(f"No h{h} timesteps found in {time_slice}") from e
+ output_file_lists_list[i] = h_file_lists
+ h1_file_lists, h2_file_lists = tuple(output_file_lists_list)
+ return h1_file_lists, h2_file_lists
+
+
def main(
*,
input_dir=None,
@@ -126,6 +195,9 @@ def main(
+ "(years are +1 because of CTSM output naming)",
)
+ # This script uses pickle to save work in progress. In case of interruption, when the script
+ # is resumed, it will look for a pickle file. It will resume from the year after
+ # pickle_year, which is the last processed year in the pickle file.
pickle_file = os.path.join(output_dir, f"{first_season}-{last_season}.pickle")
h2_ds_file = os.path.join(output_dir, f"{first_season}-{last_season}.h2_ds.nc")
if os.path.exists(pickle_file) and not no_pickle:
@@ -162,10 +234,20 @@ def main(
max_season_length_from_hdates_file, paramfile, max_season_length_cushion
)
- h1_instantaneous = None
- for yr_index, this_yr in enumerate(np.arange(first_season + 1, last_season + 3)):
+ # Get lists of history timesteps and files to read
+ time_slice_list = _get_time_slice_list(first_season, last_season)
+ h1_file_lists, h2_file_lists = _get_file_lists(input_dir, time_slice_list, logger)
+
+ for yr_index, this_yr in enumerate(_get_history_yr_range(first_season, last_season)):
+ # If resuming from a pickled file, we continue until we reach a year that hasn't yet
+ # been processed.
if this_yr <= pickle_year:
continue
+ log(logger, f"netCDF year {this_yr}...")
+
+ # Get h1 and h2 files to read for this year
+ h1_file_list = h1_file_lists[yr_index] # pylint: disable=unsubscriptable-object
+ h2_file_list = h2_file_lists[yr_index] # pylint: disable=unsubscriptable-object
(
h2_ds,
@@ -179,12 +261,10 @@ def main(
incl_vegtypes_str,
incl_patches1d_itype_veg,
mxsowings,
- h1_instantaneous,
) = gddfn.import_and_process_1yr(
first_season,
last_season,
yr_index,
- this_yr,
sdates_rx,
hdates_rx,
gddaccum_yp_list,
@@ -192,7 +272,6 @@ def main(
skip_patches_for_isel_nan_lastyear,
lastyear_active_patch_indices_list,
incorrectly_daily,
- input_dir,
incl_vegtypes_str,
h2_ds_file,
mxmats,
@@ -200,7 +279,8 @@ def main(
skip_crops,
outdir_figs,
logger,
- h1_instantaneous,
+ h1_file_list,
+ h2_file_list,
)
log(logger, f" Saving pickle file ({pickle_file})...")
diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py
index 4f6dd6b966..ed271bdd8d 100644
--- a/python/ctsm/crop_calendars/generate_gdds_functions.py
+++ b/python/ctsm/crop_calendars/generate_gdds_functions.py
@@ -10,7 +10,6 @@
import numpy as np
import xarray as xr
-from ctsm.utils import is_instantaneous
from ctsm.ctsm_logging import log, error
import ctsm.crop_calendars.cropcal_utils as utils
import ctsm.crop_calendars.cropcal_module as cc
@@ -290,7 +289,6 @@ def import_and_process_1yr(
year_1,
year_n,
year_index,
- this_year,
sdates_rx,
hdates_rx,
gddaccum_yp_list,
@@ -298,7 +296,6 @@ def import_and_process_1yr(
skip_patches_for_isel_nan_last_year,
last_year_active_patch_indices_list,
incorrectly_daily,
- indir,
incl_vegtypes_str_in,
h2_ds_file,
mxmats,
@@ -306,13 +303,13 @@ def import_and_process_1yr(
skip_crops,
outdir_figs,
logger,
- h1_instantaneous,
+ h1_filelist,
+ h2_filelist,
):
"""
Import one year of CLM output data for GDD generation
"""
save_figs = True
- log(logger, f"netCDF year {this_year}...")
# Without dask, this can take a LONG time at resolutions finer than 2-deg
if importlib_util.find_spec("dask"):
@@ -320,34 +317,17 @@ def import_and_process_1yr(
else:
chunks = None
- # Get h1 file (list)
- h1_pattern = os.path.join(indir, "*h1i.*.nc")
- h1_filelist = glob.glob(h1_pattern)
- if not h1_filelist:
- h1_pattern = os.path.join(indir, "*h1i.*.nc.base")
- h1_filelist = glob.glob(h1_pattern)
- if not h1_filelist:
- error(logger, "No files found matching pattern '*h1i.*.nc(.base)'")
-
# Get list of crops to include
if skip_crops is not None:
crops_to_read = [c for c in utils.define_mgdcrop_list_withgrasses() if c not in skip_crops]
else:
crops_to_read = utils.define_mgdcrop_list_withgrasses()
- # Are h1 files instantaneous?
- if h1_instantaneous is None:
- h1_instantaneous = is_instantaneous(xr.open_dataset(h1_filelist[0])["time"])
-
- if h1_instantaneous:
- slice_year = this_year
- else:
- slice_year = this_year - 1
+ # Read h1 file(s)
dates_ds = import_ds(
h1_filelist,
my_vars=["SDATES", "HDATES"],
my_vegtypes=crops_to_read,
- time_slice=slice(f"{slice_year}-01-01", f"{slice_year}-12-31"),
chunks=chunks,
logger=logger,
)
@@ -631,16 +611,8 @@ def import_and_process_1yr(
log(logger, " Importing accumulated GDDs...")
clm_gdd_var = "GDDACCUM"
my_vars = [clm_gdd_var, "GDDHARV"]
- patterns = [f"*h2i.{this_year-1}-01*.nc", f"*h2i.{this_year-1}-01*.nc.base"]
- for pat in patterns:
- pattern = os.path.join(indir, pat)
- h2_files = glob.glob(pattern)
- if h2_files:
- break
- if not h2_files:
- error(logger, f"No files found matching patterns: {patterns}")
h2_ds = import_ds(
- h2_files,
+ h2_filelist,
my_vars=my_vars,
my_vegtypes=crops_to_read,
chunks=chunks,
@@ -892,10 +864,80 @@ def import_and_process_1yr(
incl_vegtypes_str,
incl_patches1d_itype_veg,
mxsowings,
- h1_instantaneous,
)
+def find_inst_hist_files(indir, *, h, this_year=None, logger=None):
+ """
+ Find all the instantaneous history files for a given tape number, optionally looking just for
+ one year in filename.
+
+ Args:
+ indir: Directory to search for history files
+ h: History tape number (must be an integer, e.g., 1 for h1, 2 for h2)
+ this_year: Optional year to filter files by. If provided, only files with dates starting
+ with "{this_year}-01" will be returned. If None, all files matching the
+ history tape number will be returned.
+ logger: Optional logger for error messages. If None, errors are raised without logging.
+
+ Returns:
+ List of file paths matching the search criteria
+
+ Raises:
+ TypeError: If h is not an integer
+ FileNotFoundError: If no files matching the patterns are found
+ RuntimeError: If files from multiple case names are found (indicates mixed output from
+ different simulations, which is pathological)
+
+ Notes:
+ - Searches for files matching patterns: "*h{h}i.*.nc" or "*h{h}i.*.nc.base"
+ - When this_year is specified, searches for: "*h{h}i.{this_year}-01*.nc" or
+ "*h{h}i.{this_year}-01*.nc.base"
+ - Prefers .nc files over .nc.base files (searches .nc pattern first)
+ - All returned files must be from the same case name (extracted from filename before
+ ".clm2.h#i.")
+ """
+ if this_year is None:
+ patterns = [f"*h{h}i.*.nc", f"*h{h}i.*.nc.base"]
+ else:
+ if not isinstance(h, int):
+ err_msg = f"h ({h}) must be an integer, not {type(h)}"
+ err_type = TypeError
+ if logger:
+ error(logger, err_msg, error_type=err_type)
+ raise err_type(err_msg)
+ patterns = [f"*h{h}i.{this_year}-01*.nc", f"*h{h}i.{this_year}-01*.nc.base"]
+ for pat in patterns:
+ pattern = os.path.join(indir, pat)
+ file_list = glob.glob(pattern)
+ if file_list:
+ break
+ if not file_list:
+ err_msg = f"No files found matching patterns: {patterns}"
+ err_type = FileNotFoundError
+ if logger:
+ error(logger, err_msg, error_type=err_type)
+ raise err_type(err_msg)
+
+ # Error if files found from multiple cases
+ case_names = set()
+ for file in file_list:
+ basename = os.path.basename(file)
+ # Extract case name (everything before .clm2.h#i.)
+ parts = basename.split(".clm2.")
+ if len(parts) > 1:
+ case_name = parts[0]
+ case_names.add(case_name)
+ if len(case_names) > 1:
+ err_msg = f"Found files from multiple case names: {sorted(case_names)}"
+ err_type = RuntimeError
+ if logger:
+ error(logger, err_msg, error_type=err_type)
+ raise err_type(err_msg)
+
+ return file_list
+
+
def get_multicrop_maps(this_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units):
# pylint: disable=missing-function-docstring
# Get GDDs for these crops
diff --git a/python/ctsm/crop_calendars/import_ds.py b/python/ctsm/crop_calendars/import_ds.py
index 66a0ec9746..656d10985e 100644
--- a/python/ctsm/crop_calendars/import_ds.py
+++ b/python/ctsm/crop_calendars/import_ds.py
@@ -247,23 +247,7 @@ def import_ds(
# elements through end-1 will be selected, but that seems not to be the case in the xarray
# implementation.
if time_slice:
- new_filelist = []
- for file in sorted(filelist):
- log(logger, f"Getting filetime from file: {file}")
- filetime = xr.open_dataset(file).time
- filetime_sel = utils.safer_timeslice(filetime, time_slice)
- include_this_file = filetime_sel.size
- if include_this_file:
- log(logger, f"Including filetime : {filetime_sel['time'].values}")
- new_filelist.append(file)
-
- # If you found some matching files, but then you find one that doesn't, stop going
- # through the list.
- elif new_filelist:
- break
- if not new_filelist:
- raise RuntimeError(f"No files found in time_slice {time_slice}")
- filelist = new_filelist
+ filelist = get_files_in_time_slice(filelist, time_slice, logger)
# The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one
# variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function
@@ -324,3 +308,29 @@ def import_ds(
log(logger, "End")
return this_ds
+
+
+def get_files_in_time_slice(filelist, time_slice, logger=None):
+ """
+ For a given list of files, find the files that need to be read in order to get all history
+ timesteps in the slice.
+ """
+ new_filelist = []
+ for file in sorted(filelist):
+ if logger:
+ log(logger, f"Getting filetime from file: {file}")
+ filetime = xr.open_dataset(file).time
+ filetime_sel = utils.safer_timeslice(filetime, time_slice)
+ include_this_file = filetime_sel.size
+ if include_this_file:
+ if logger:
+ log(logger, f"Including filetime : {filetime_sel['time'].values}")
+ new_filelist.append(file)
+
+ # If you found some matching files, but then you find one that doesn't, stop going
+ # through the list.
+ elif new_filelist:
+ break
+ if not new_filelist:
+ raise FileNotFoundError(f"No files found in time_slice {time_slice}")
+ return new_filelist
diff --git a/python/ctsm/test/test_unit_generate_gdds.py b/python/ctsm/test/test_unit_generate_gdds.py
index 4976097b7d..5c0219cd09 100755
--- a/python/ctsm/test/test_unit_generate_gdds.py
+++ b/python/ctsm/test/test_unit_generate_gdds.py
@@ -7,9 +7,14 @@
import unittest
import os
import argparse
+import tempfile
+import shutil
+import logging
+import re
import numpy as np
import xarray as xr
+from cftime import DatetimeNoLeap
from ctsm import unit_testing
from ctsm.crop_calendars import generate_gdds as gg
@@ -125,8 +130,10 @@ def test_generate_gdds_args_error_with_paramfile_and_nomxmat(self):
gg._parse_args(args)
def test_generate_gdds_args_error_with_nomxmat_and_cushion(self):
- """Should error if both --max-season-length-cushion and --max-season-length-from-hdates-file
- are given"""
+ """
+ Should error if both --max-season-length-cushion and --max-season-length-from-hdates-file
+ are given
+ """
args = [
"--input-dir",
self._input_dir,
@@ -230,6 +237,49 @@ def test_generate_gdds_get_mxmats_cushionneg14(self):
self.assertEqual(mxmats["miscanthus"], 210 - cushion)
+class TestGetTimeSliceList(unittest.TestCase):
+ """Tests for _get_time_slice_list()"""
+
+ def test_generate_gdds_get_time_slice_list(self):
+ """Test that _get_time_slice_list works with two different years"""
+ season_list = [1986, 1987]
+ result = gg._get_time_slice_list(season_list[0], season_list[-1])
+ expected = [
+ slice("1987-01-01", "1987-12-31"),
+ slice("1988-01-01", "1988-12-31"),
+ slice("1989-01-01", "1989-12-31"),
+ ]
+ assert result == expected
+
+ def test_generate_gdds_get_time_slice_list_1yr(self):
+ """Test that _get_time_slice_list works with the same year"""
+ result = gg._get_time_slice_list(1987, 1987)
+ expected = [
+ slice("1988-01-01", "1988-12-31"),
+ slice("1989-01-01", "1989-12-31"),
+ ]
+ assert result == expected
+
+ def test_generate_gdds_get_time_slice_list_valueerror(self):
+ """Test that _get_time_slice_list raises ValueError if last < first"""
+ with self.assertRaisesRegex(ValueError, "first_season.* > last_season"):
+ gg._get_time_slice_list(1987, 1986)
+
+ def test_generate_gdds_get_time_slice_list_typeerror_first(self):
+ """Test that _get_time_slice_list raises TypeError if not given integer first season"""
+ with self.assertRaisesRegex(
+ TypeError, r"_get_time_slice_list\(\) arguments must be integers"
+ ):
+ gg._get_time_slice_list(1986.3, 1987)
+
+ def test_generate_gdds_get_time_slice_list_typeerror_last(self):
+ """Test that _get_time_slice_list raises TypeError if not given integer last season"""
+ with self.assertRaisesRegex(
+ TypeError, r"_get_time_slice_list\(\) arguments must be integers"
+ ):
+ gg._get_time_slice_list(1986, None)
+
+
class TestCheckGridMatch(unittest.TestCase):
"""Tests check_grid_match()"""
@@ -325,6 +375,373 @@ def test_check_grid_match_matchnans_falseshape_dada(self):
self.assertIsNone(max_abs_diff)
+class TestFindInstHistFiles(unittest.TestCase):
+ """Tests of find_inst_hist_files()"""
+
+ def setUp(self):
+ """
+ Set up and change to temporary directory
+ """
+ self.prev_dir = os.getcwd()
+ self.temp_dir = tempfile.mkdtemp()
+ os.chdir(self.temp_dir)
+
+ def tearDown(self):
+ """
+ Delete temporary directory
+ """
+ os.chdir(self.prev_dir)
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+ def _create_test_file(self, filename):
+ """Helper to create an empty test file"""
+ filepath = os.path.join(self.temp_dir, filename)
+ with open(filepath, "a", encoding="utf-8"):
+ pass
+ return filepath
+
+ def test_find_inst_hist_files_h1_no_year(self):
+ """Test finding h1 files without specifying year"""
+ # Create test files
+ file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ file2 = self._create_test_file("test.clm2.h1i.2000-02-01-00000.nc")
+ file3 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
+
+ result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None)
+
+ # Should find all h1i files
+ self.assertEqual(len(result), 3)
+ self.assertIn(file1, result)
+ self.assertIn(file2, result)
+ self.assertIn(file3, result)
+
+ def test_find_inst_hist_files_h2_no_year(self):
+ """Test finding h2 files without specifying year"""
+ # Create test files
+ file1 = self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
+ file2 = self._create_test_file("test.clm2.h2i.2001-01-01-00000.nc")
+ # Create h1 file that should not be found
+ self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+
+ result = gf.find_inst_hist_files(self.temp_dir, h=2, this_year=None)
+
+ # Should find only h2i files
+ self.assertEqual(len(result), 2)
+ self.assertIn(file1, result)
+ self.assertIn(file2, result)
+
+ def test_find_inst_hist_files_with_year(self):
+ """Test finding files for a specific year"""
+ # Create test files
+ file_2000 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ file_2001 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
+ file_2002 = self._create_test_file("test.clm2.h1i.2002-01-01-00000.nc")
+
+ result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2001)
+
+ # Should find only 2001 file
+ self.assertEqual(len(result), 1)
+ self.assertIn(file_2001, result)
+ self.assertNotIn(file_2000, result)
+ self.assertNotIn(file_2002, result)
+
+ def test_find_inst_hist_files_base_extension(self):
+ """Test finding files with .nc.base extension"""
+ # Create test files with .nc.base extension
+ file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc.base")
+ file2 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc.base")
+
+ result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None)
+
+ # Should find .nc.base files
+ self.assertEqual(len(result), 2)
+ self.assertIn(file1, result)
+ self.assertIn(file2, result)
+
+ def test_find_inst_hist_files_prefer_nc_over_base(self):
+ """Test that .nc files are preferred over .nc.base files"""
+ # Create both .nc and .nc.base files
+ file_nc = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ file_nc_base = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc.base")
+
+ result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None)
+
+ # Should find .nc files first (pattern order preference)
+ self.assertIn(file_nc, result)
+ self.assertNotIn(file_nc_base, result)
+
+ def test_find_inst_hist_files_multiple_months_same_year(self):
+ """Test finding multiple files from the same year"""
+ # Create multiple files from 2000
+ file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ file2 = self._create_test_file("test.clm2.h1i.2000-01-15-00000.nc")
+ file3 = self._create_test_file("test.clm2.h1i.2000-01-31-00000.nc")
+ # Create file from different year
+ self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
+
+ result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2000)
+
+ # Should find all January 2000 files
+ self.assertEqual(len(result), 3)
+ self.assertIn(file1, result)
+ self.assertIn(file2, result)
+ self.assertIn(file3, result)
+
+ def test_find_inst_hist_files_no_files_found(self):
+ """Test error when no matching files are found"""
+ # Create a non-matching file
+ self._create_test_file("test.clm2.h0.2000-01-01-00000.nc")
+
+ # Should raise a FileNotFoundError error
+ with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"):
+ gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None)
+
+ def test_find_inst_hist_files_different_case_names(self):
+ """Test that RuntimeError is raised when files from different case names are found"""
+ # Create files with different case names
+ self._create_test_file("case1.clm2.h1i.2000-01-01-00000.nc")
+ self._create_test_file("case2.clm2.h1i.2000-01-01-00000.nc")
+ self._create_test_file("longcasename.clm2.h1i.2000-01-01-00000.nc")
+
+ # Should raise RuntimeError due to multiple case names
+ with self.assertRaisesRegex(RuntimeError, "Found files from multiple case names"):
+ gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2000)
+
+ def test_find_inst_hist_files_different_case_names_with_logger(self):
+ """
+ Test that RuntimeError is raised when files from different case names are found, with logger
+ """
+ # Create a logger
+ logger = logging.getLogger("test_logger_case_names")
+ logger.setLevel(logging.DEBUG)
+
+ # Create files with different case names
+ self._create_test_file("case1.clm2.h1i.2000-01-01-00000.nc")
+ self._create_test_file("case2.clm2.h1i.2000-01-01-00000.nc")
+ self._create_test_file("longcasename.clm2.h1i.2000-01-01-00000.nc")
+
+ # Should raise RuntimeError due to multiple case names, even with logger
+ with self.assertRaisesRegex(RuntimeError, "Found files from multiple case names"):
+ gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2000, logger=logger)
+
+ def test_find_inst_hist_files_no_files_found_with_logger(self):
+ """Test error when no matching files are found, with logger"""
+ # Create a logger
+ logger = logging.getLogger("test_logger_no_files")
+ logger.setLevel(logging.DEBUG)
+
+ # Create a non-matching file
+ self._create_test_file("test.clm2.h0.2000-01-01-00000.nc")
+
+ # Should raise a FileNotFoundError even with logger
+ with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"):
+ gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None, logger=logger)
+
+ def test_find_inst_hist_files_h_str_with_logger(self):
+ """Test that TypeError is raised when h is a string, with logger"""
+ # Create a logger
+ logger = logging.getLogger("test_logger_h_str")
+ logger.setLevel(logging.DEBUG)
+
+ self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+
+ with self.assertRaisesRegex(TypeError, "must be an integer, not"):
+ gf.find_inst_hist_files(self.temp_dir, h="1", this_year=2000, logger=logger)
+
+ def test_find_inst_hist_files_h_float_with_logger(self):
+ """Test that TypeError is raised when h is a float, with logger"""
+ # Create a logger
+ logger = logging.getLogger("test_logger_h_float")
+ logger.setLevel(logging.DEBUG)
+
+ self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+
+ with self.assertRaisesRegex(TypeError, "must be an integer, not"):
+ gf.find_inst_hist_files(self.temp_dir, h=1.0, this_year=2000, logger=logger)
+
+
+class TestGetFileLists(unittest.TestCase):
+ """Tests of _get_file_lists()"""
+
+ def setUp(self):
+ """
+ Set up and change to temporary directory
+ """
+ self.prev_dir = os.getcwd()
+ self.temp_dir = tempfile.mkdtemp()
+ os.chdir(self.temp_dir)
+
+ def tearDown(self):
+ """
+ Delete temporary directory
+ """
+ os.chdir(self.prev_dir)
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+ def _create_test_file(self, filename):
+ """Helper to create an empty test file with time coordinate"""
+ filepath = os.path.join(self.temp_dir, filename)
+
+ # Extract date from filename using regex (format: *.h#i.YYYY-MM-DD-*.nc)
+ match = re.search(r"(\d{4})-(\d{2})-(\d{2})", filename)
+ if match:
+ year, month, day = match.groups()
+ time_val = DatetimeNoLeap(int(year), int(month), int(day), has_year_zero=True)
+ else:
+ raise ValueError(f"Could not extract date from filename: {filename}")
+
+ # Create a simple dataset with time coordinate
+ time = xr.DataArray([time_val], dims=["time"], name="time")
+ ds = xr.Dataset({"time": time})
+ ds.to_netcdf(filepath)
+
+ return filepath
+
+ def test_get_file_lists_single_year(self):
+ """Test _get_file_lists with a single year of data"""
+ # Create h1 and h2 files for 2000
+ h1_file = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ h2_file = self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
+
+ time_slice_list = [slice("2000-01-01", "2000-12-31")]
+
+ h1_file_lists, h2_file_lists = gg._get_file_lists(
+ self.temp_dir, time_slice_list, logger=None
+ )
+
+ # Should have one list for each time slice
+ self.assertEqual(len(h1_file_lists), 1)
+ self.assertEqual(len(h2_file_lists), 1)
+
+ # Check contents of file lists
+ # pylint: disable=unsubscriptable-object
+ self.assertEqual(len(h1_file_lists[0]), 1)
+ self.assertEqual(len(h2_file_lists[0]), 1)
+ self.assertEqual(h1_file_lists[0], [h1_file])
+ self.assertEqual(h2_file_lists[0], [h2_file])
+
+ def test_get_file_lists_multiple_years(self):
+ """Test _get_file_lists with multiple years of data"""
+ # Create h1 and h2 files for 2000-2002
+ h1_files = []
+ h2_files = []
+ for year in [2000, 2001, 2002]:
+ h1_files.append(self._create_test_file(f"test.clm2.h1i.{year}-01-01-00000.nc"))
+ h2_files.append(self._create_test_file(f"test.clm2.h2i.{year}-01-01-00000.nc"))
+
+ time_slice_list = [
+ slice("2000-01-01", "2000-12-31"),
+ slice("2001-01-01", "2001-12-31"),
+ slice("2002-01-01", "2002-12-31"),
+ ]
+
+ h1_file_lists, h2_file_lists = gg._get_file_lists(
+ self.temp_dir, time_slice_list, logger=None
+ )
+
+ # Should have one list for each time slice
+ self.assertEqual(len(h1_file_lists), 3)
+ self.assertEqual(len(h2_file_lists), 3)
+
+ # Check contents of file lists
+ # pylint: disable=unsubscriptable-object
+ for i in range(3):
+ self.assertEqual(len(h1_file_lists[i]), 1)
+ self.assertEqual(len(h2_file_lists[i]), 1)
+ self.assertEqual(h1_file_lists[i], [h1_files[i]])
+ self.assertEqual(h2_file_lists[i], [h2_files[i]])
+
+ def test_get_file_lists_multiple_files_per_slice(self):
+ """Test _get_file_lists when multiple files fall within a time slice"""
+ # Create multiple h1 and h2 files for 2000
+ h1_files = []
+ h2_files = []
+ for month in ["01", "06", "12"]:
+ h1_files.append(self._create_test_file(f"test.clm2.h1i.2000-{month}-01-00000.nc"))
+ h2_files.append(self._create_test_file(f"test.clm2.h2i.2000-{month}-01-00000.nc"))
+
+ time_slice_list = [slice("2000-01-01", "2000-12-31")]
+
+ h1_file_lists, h2_file_lists = gg._get_file_lists(
+ self.temp_dir, time_slice_list, logger=None
+ )
+
+ # Should have one list for the time slice
+ self.assertEqual(len(h1_file_lists), 1)
+ self.assertEqual(len(h2_file_lists), 1)
+
+ # Check contents of file lists (should be sorted)
+ # pylint: disable=unsubscriptable-object
+ self.assertEqual(len(h1_file_lists[0]), 3)
+ self.assertEqual(len(h2_file_lists[0]), 3)
+ self.assertEqual(h1_file_lists[0], sorted(h1_files))
+ self.assertEqual(h2_file_lists[0], sorted(h2_files))
+
+ def test_get_file_lists_no_h1_files(self):
+ """Test _get_file_lists when h1 files are missing"""
+ # Create only h2 files
+ self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
+
+ time_slice_list = [slice("2000-01-01", "2000-12-31")]
+
+ # Should raise FileNotFoundError when h1 files are not found
+ with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"):
+ gg._get_file_lists(self.temp_dir, time_slice_list, logger=None)
+
+ def test_get_file_lists_no_h2_files(self):
+ """Test _get_file_lists when h2 files are missing"""
+ # Create only h1 files
+ self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+
+ time_slice_list = [slice("2000-01-01", "2000-12-31")]
+
+ # Should raise FileNotFoundError when h2 files are not found
+ with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"):
+ gg._get_file_lists(self.temp_dir, time_slice_list, logger=None)
+
+ def test_get_file_lists_h1_outside_time_slice(self):
+ """Test _get_file_lists when h1 files exist but have no timesteps in the slice"""
+ # Create h1 files for 2000 and h2 files for 2001
+ self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ self._create_test_file("test.clm2.h2i.2001-01-01-00000.nc")
+
+ # Request time slice for 2001 (h1 files exist but are outside the slice)
+ time_slice_list = [slice("2001-01-01", "2001-12-31")]
+
+ # Should raise FileNotFoundError when h1 files have no timesteps in slice
+ with self.assertRaisesRegex(FileNotFoundError, "No h1 timesteps found in"):
+ gg._get_file_lists(self.temp_dir, time_slice_list, logger=None)
+
+ def test_get_file_lists_h2_outside_time_slice(self):
+ """Test _get_file_lists when h2 files exist but have no timesteps in the slice"""
+ # Create h1 files for 2001 and h2 files for 2000
+ self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
+ self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
+
+ # Request time slice for 2001 (h2 files exist but are outside the slice)
+ time_slice_list = [slice("2001-01-01", "2001-12-31")]
+
+ # Should raise FileNotFoundError when h2 files have no timesteps in slice
+ with self.assertRaisesRegex(FileNotFoundError, "No h2 timesteps found in"):
+ gg._get_file_lists(self.temp_dir, time_slice_list, logger=None)
+
+ def test_get_file_lists_partial_overlap(self):
+ """Test _get_file_lists when some time slices have files and others don't"""
+ # Create h1 and h2 files for 2000 only
+ self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
+
+ # Request time slices for 2000 and 2001
+ time_slice_list = [
+ slice("2000-01-01", "2000-12-31"),
+ slice("2001-01-01", "2001-12-31"),
+ ]
+
+ # Should raise FileNotFoundError when second time slice has no files
+ with self.assertRaisesRegex(FileNotFoundError, "No h1 timesteps found in"):
+ gg._get_file_lists(self.temp_dir, time_slice_list, logger=None)
+
+
if __name__ == "__main__":
unit_testing.setup_for_tests()
unittest.main()
diff --git a/python/ctsm/test/test_unit_import_ds.py b/python/ctsm/test/test_unit_import_ds.py
new file mode 100755
index 0000000000..89349be9d4
--- /dev/null
+++ b/python/ctsm/test/test_unit_import_ds.py
@@ -0,0 +1,354 @@
+#!/usr/bin/env python3
+
+"""
+Unit tests for import_ds.py
+"""
+
+import unittest
+import os
+import tempfile
+import shutil
+
+import xarray as xr
+from cftime import DatetimeNoLeap
+
+from ctsm import unit_testing
+from ctsm.crop_calendars import import_ds
+
+# Allow test names that pylint doesn't like; otherwise hard to make them
+# readable
+# pylint: disable=invalid-name
+
+# pylint: disable=protected-access
+
+
+def _make_timestep(str_in):
+ """
+ Because of float imprecision, microseconds should be specified like:
+ 1,8 instead of 1.000008
+ and:
+ 1,800000 instead of 1.8
+ """
+ h = minute = s = us = 0
+
+ str_in_split = str_in.split(" ")
+ y, month, d = str_in_split[0].split("-")
+ if len(str_in_split) > 1:
+ h, minute, s = str_in_split[1].split(":")
+ if "," in s:
+ s, us = s.split(",")
+ inputs = [int(x) for x in [y, month, d, h, minute, s, us]]
+ return DatetimeNoLeap(*inputs, has_year_zero=True)
+
+
+class TestMakeTimestep(unittest.TestCase):
+ """Test this test module's _make_timestep() function"""
+
+ def test_make_timestep_ymd(self):
+ """Test with YYYY-MM-DD"""
+ self.assertEqual(
+ _make_timestep("1987-07-24"),
+ DatetimeNoLeap(1987, 7, 24, 0, 0, 0, 0, has_year_zero=True),
+ )
+
+ def test_make_timestep_hms(self):
+ """Test with YYYY-MM-DD hh:mm:ss"""
+ self.assertEqual(
+ _make_timestep("1987-07-24 09:25:07"),
+ DatetimeNoLeap(1987, 7, 24, 9, 25, 7, 0, has_year_zero=True),
+ )
+
+ def test_make_timestep_microsec_leadzeros(self):
+ """Test with microseconds with leading zeros"""
+ self.assertEqual(
+ _make_timestep("1987-07-24 09:25:07,8"),
+ DatetimeNoLeap(1987, 7, 24, 9, 25, 7, 8, has_year_zero=True),
+ )
+
+ def test_make_timestep_microsec_noleadzeros(self):
+ """Test with microseconds without leading zeros"""
+ self.assertEqual(
+ _make_timestep("1987-07-24 09:25:07,800000"),
+ DatetimeNoLeap(1987, 7, 24, 9, 25, 7, 800000, has_year_zero=True),
+ )
+
+
+class TestGetFilesInTimeSlice(unittest.TestCase):
+ """Tests of get_files_in_time_slice()"""
+
+ def setUp(self):
+ """
+ Set up and change to temporary directory
+ """
+ self.prev_dir = os.getcwd()
+ self.temp_dir = tempfile.mkdtemp()
+ os.chdir(self.temp_dir)
+
+ def tearDown(self):
+ """
+ Delete temporary directory and any files within
+ """
+ os.chdir(self.prev_dir)
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+ def _create_annual_test_files(self, years):
+ """
+ Helper method to create test files with one timestep per file (annual)
+
+ Args:
+ years: List of years to create files for
+
+ Returns:
+ List of filenames created
+ """
+ filelist = []
+ for year in years:
+ filename = os.path.join(self.temp_dir, f"test_{year}.nc")
+ filelist.append(filename)
+
+ # Create a simple dataset with one time step
+ time = xr.DataArray(
+ [_make_timestep(f"{year}-01-01")],
+ dims=["time"],
+ name="time",
+ )
+ ds = xr.Dataset({"time": time})
+ ds.to_netcdf(filename)
+
+ return filelist
+
+ def _create_monthly_test_files(self, year_month_list):
+ """
+ Helper method to create test files with multiple timesteps per file (monthly)
+
+ Args:
+ year_month_list: List of tuples (year, list_of_months) where each file
+ contains multiple monthly timesteps
+
+ Returns:
+ List of filenames created
+ """
+ filelist = []
+ for year, months in year_month_list:
+ filename = os.path.join(self.temp_dir, f"test_{year}.nc")
+ filelist.append(filename)
+
+ # Create a dataset with multiple monthly time steps
+ timesteps = [_make_timestep(f"{year}-{month:02d}-15 12:00:00") for month in months]
+ time = xr.DataArray(timesteps, dims=["time"], name="time")
+ ds = xr.Dataset({"time": time})
+ ds.to_netcdf(filename)
+
+ return filelist
+
+ def _create_daily_test_file(self, year, month, days, *, hour=0, minute=0, second=0):
+ """
+ Helper method to create a test file with daily timesteps
+
+ Args:
+ year: Year for the file
+ month: Month for the file
+ days: List of days to include
+ hour: Hour of day (default 0)
+ minute: Minute of hour (default 0)
+ second: Second of minute (default 0)
+
+ Returns:
+ Filename created
+ """
+ filename = os.path.join(self.temp_dir, f"test_{year}_{month:02d}.nc")
+
+ # Create a dataset with daily time steps
+ time_str = f"{hour:02d}:{minute:02d}:{second:02d}" if hour or minute or second else ""
+ if time_str:
+ timesteps = [_make_timestep(f"{year}-{month:02d}-{day:02d} {time_str}") for day in days]
+ else:
+ timesteps = [_make_timestep(f"{year}-{month:02d}-{day:02d}") for day in days]
+ time = xr.DataArray(timesteps, dims=["time"], name="time")
+ ds = xr.Dataset({"time": time})
+ ds.to_netcdf(filename)
+
+ return filename
+
+ def test_get_files_in_time_slice_middle(self):
+ """Test get_files_in_time_slice with a slice in the middle of the range"""
+ years = [2000, 2001, 2002, 2003, 2004]
+ filelist = self._create_annual_test_files(years)
+
+ time_slice = slice(_make_timestep("2001-01-01"), _make_timestep("2003-01-01"))
+ result = import_ds.get_files_in_time_slice(filelist, time_slice)
+ result_basenames = [os.path.basename(f) for f in result]
+ expected = ["test_2001.nc", "test_2002.nc", "test_2003.nc"]
+ self.assertEqual(result_basenames, expected)
+
+ def test_get_files_in_time_slice_middle_slice_just_strings(self):
+ """
+ As test_get_files_in_time_slice_middle, but with the slice containing strings instead of
+ actual cftime timestamps
+ """
+ years = [2000, 2001, 2002, 2003, 2004]
+ filelist = self._create_annual_test_files(years)
+
+ time_slice = slice("2001-01-01", "2003-01-01")
+ result = import_ds.get_files_in_time_slice(filelist, time_slice)
+ result_basenames = [os.path.basename(f) for f in result]
+ expected = ["test_2001.nc", "test_2002.nc", "test_2003.nc"]
+ self.assertEqual(result_basenames, expected)
+
+ def test_get_files_in_time_slice_from_beginning(self):
+ """Test get_files_in_time_slice with unbounded start (from beginning)"""
+ years = [2000, 2001, 2002, 2003, 2004]
+ filelist = self._create_annual_test_files(years)
+
+ time_slice = slice(None, _make_timestep("2001-01-01"))
+ result = import_ds.get_files_in_time_slice(filelist, time_slice)
+ result_basenames = [os.path.basename(f) for f in result]
+ expected = ["test_2000.nc", "test_2001.nc"]
+ self.assertEqual(result_basenames, expected)
+
+ def test_get_files_in_time_slice_to_end(self):
+ """Test get_files_in_time_slice with unbounded end (to the end)"""
+ years = [2000, 2001, 2002, 2003, 2004]
+ filelist = self._create_annual_test_files(years)
+
+ time_slice = slice(_make_timestep("2003-01-01"), None)
+ result = import_ds.get_files_in_time_slice(filelist, time_slice)
+ result_basenames = [os.path.basename(f) for f in result]
+ expected = ["test_2003.nc", "test_2004.nc"]
+ self.assertEqual(result_basenames, expected)
+
+ def test_get_files_in_time_slice_all_files(self):
+ """Test get_files_in_time_slice with unbounded slice (all files)"""
+ years = [2000, 2001, 2002, 2003, 2004]
+ filelist = self._create_annual_test_files(years)
+
+ time_slice = slice(None, None)
+ result = import_ds.get_files_in_time_slice(filelist, time_slice)
+ # For this test, compare full paths since expected is also full paths
+ self.assertEqual(result, filelist)
+
+ def test_get_files_in_time_slice_no_match(self):
+ """Test get_files_in_time_slice with no matching files (should raise FileNotFoundError)"""
+ years = [2000, 2001, 2002, 2003, 2004]
+ filelist = self._create_annual_test_files(years)
+
+ time_slice = slice(_make_timestep("2010-01-01"), _make_timestep("2011-01-01"))
+ with self.assertRaises(FileNotFoundError):
+ import_ds.get_files_in_time_slice(filelist, time_slice)
+
+ def test_get_files_in_time_slice_monthly_multiple_per_file(self):
+ """Test get_files_in_time_slice with monthly data, multiple timesteps per file"""
+ # Create files with monthly data: each file has 12 months
+ year_month_list = [
+ (2000, list(range(1, 13))), # Jan-Dec 2000
+ (2001, list(range(1, 13))), # Jan-Dec 2001
+ (2002, list(range(1, 13))), # Jan-Dec 2002
+ ]
+ filelist = self._create_monthly_test_files(year_month_list)
+
+ # Select from mid-2000 to mid-2001
+ time_slice = slice(_make_timestep("2000-06-01"), _make_timestep("2001-08-01"))
+ result = import_ds.get_files_in_time_slice(filelist, time_slice)
+ result_basenames = [os.path.basename(f) for f in result]
+ expected = ["test_2000.nc", "test_2001.nc"]
+ self.assertEqual(result_basenames, expected)
+
+ def test_get_files_in_time_slice_monthly_partial_overlap(self):
+ """Test get_files_in_time_slice with monthly data selecting partial year"""
+ year_month_list = [
+ (2000, list(range(1, 13))),
+ (2001, list(range(1, 13))),
+ (2002, list(range(1, 13))),
+ ]
+ filelist = self._create_monthly_test_files(year_month_list)
+
+ # Select only within 2001
+ time_slice = slice(_make_timestep("2001-03-01"), _make_timestep("2001-09-01"))
+ result = import_ds.get_files_in_time_slice(filelist, time_slice)
+ result_basenames = [os.path.basename(f) for f in result]
+ expected = ["test_2001.nc"]
+ self.assertEqual(result_basenames, expected)
+
+ def test_get_files_in_time_slice_with_hours_minutes_seconds(self):
+ """Test get_files_in_time_slice with timesteps including hours, minutes, seconds"""
+ # Create daily files with specific times
+ filelist = []
+ filelist.append(
+ self._create_daily_test_file(2000, 6, list(range(1, 31)), hour=6, minute=30, second=15)
+ )
+ filelist.append(
+ self._create_daily_test_file(2000, 7, list(range(1, 32)), hour=6, minute=30, second=15)
+ )
+ filelist.append(
+ self._create_daily_test_file(2000, 8, list(range(1, 32)), hour=6, minute=30, second=15)
+ )
+
+ # Select from mid-June to mid-July with specific time
+ time_slice = slice(
+ _make_timestep("2000-06-15 06:30:15"),
+ _make_timestep("2000-07-20 06:30:15"),
+ )
+ result = import_ds.get_files_in_time_slice(filelist, time_slice)
+ result_basenames = [os.path.basename(f) for f in result]
+ expected = ["test_2000_06.nc", "test_2000_07.nc"]
+ self.assertEqual(result_basenames, expected)
+
+ def test_get_files_in_time_slice_mixed_frequencies(self):
+ """Test get_files_in_time_slice with files containing different numbers of timesteps"""
+ filelist = []
+ # File 1: Single timestep (annual)
+ filename1 = os.path.join(self.temp_dir, "test_1999.nc")
+ time1 = xr.DataArray([_make_timestep("1999-07-01 00:00:00")], dims=["time"], name="time")
+ xr.Dataset({"time": time1}).to_netcdf(filename1)
+ filelist.append(filename1)
+
+ # File 2: Monthly timesteps
+ filename2 = os.path.join(self.temp_dir, "test_2000.nc")
+ timesteps2 = [_make_timestep(f"2000-{m:02d}-15 12:00:00") for m in range(1, 13)]
+ time2 = xr.DataArray(timesteps2, dims=["time"], name="time")
+ xr.Dataset({"time": time2}).to_netcdf(filename2)
+ filelist.append(filename2)
+
+ # File 3: Daily timesteps for one month
+ filelist.append(
+ self._create_daily_test_file(2001, 1, list(range(1, 32)), hour=3, minute=0, second=0)
+ )
+
+ # Select from late 1999 to mid-2000
+ time_slice = slice(_make_timestep("1999-06-01"), _make_timestep("2000-08-01"))
+ result = import_ds.get_files_in_time_slice(filelist, time_slice)
+ result_basenames = [os.path.basename(f) for f in result]
+ expected = ["test_1999.nc", "test_2000.nc"]
+ self.assertEqual(result_basenames, expected)
+
+ def test_get_files_in_time_slice_exact_boundary_match(self):
+ """Test get_files_in_time_slice with exact timestamp boundary matches at file edges"""
+ filelist = []
+ filelist.append(
+ self._create_daily_test_file(
+ 2000, 12, list(range(1, 32)), hour=23, minute=59, second=59
+ )
+ )
+ filelist.append(
+ self._create_daily_test_file(2001, 1, list(range(1, 32)), hour=23, minute=59, second=59)
+ )
+ filelist.append(
+ self._create_daily_test_file(2001, 2, list(range(1, 29)), hour=23, minute=59, second=59)
+ )
+
+ # Select from last timestep of December file to first timestep of February file
+ # This tests exact boundary matching at file edges
+ time_slice = slice(
+ _make_timestep("2000-12-31 23:59:59"), # Last timestep in first file
+ _make_timestep("2001-02-01 23:59:59"), # First timestep in third file
+ )
+ result = import_ds.get_files_in_time_slice(filelist, time_slice)
+ result_basenames = [os.path.basename(f) for f in result]
+ # Should include all three files since boundaries match exactly
+ expected = ["test_2000_12.nc", "test_2001_01.nc", "test_2001_02.nc"]
+ self.assertEqual(result_basenames, expected)
+
+
+if __name__ == "__main__":
+ unit_testing.setup_for_tests()
+ unittest.main()