Skip to content
Merged
1 change: 1 addition & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,4 @@ cdf40d265cc82775607a1bf25f5f527bacc97405
ac03492012837799b7111607188acff9f739044a
d858665d799690d73b56bcb961684382551193f4
c0c6da391ee359f2765439426f3a2a4593a95343
598de2f05638286b3d99ac0ed120977cbc554c3d
21 changes: 21 additions & 0 deletions cime_config/testdefs/ExpectedTestFails.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,27 @@
</phase>
</test>

<test name="RXCROPMATURITY_Lm61.f10_f10_mg37.IHistClm60BgcCrop.derecho_intel.clm-cropMonthOutput">
<phase name="RUN">
<status>FAIL</status>
<issue>#3740</issue>
</phase>
</test>

<test name="RXCROPMATURITY_Lm61.f09_t232.IHistClm60BgcCrop.derecho_intel.clm-cropMonthOutput">
<phase name="RUN">
<status>FAIL</status>
<issue>#3740</issue>
</phase>
</test>

<test name="RXCROPMATURITY_Lm61.f09_g17.IHistClm60BgcCrop.derecho_intel.clm-cropMonthOutput">
<phase name="RUN">
<status>FAIL</status>
<issue>#3740</issue>
</phase>
</test>

<test name="SMS_Ld5.f09_g17.IHistClm50Sp.derecho_intel.clm-nofire">
<phase name="SHAREDLIB_BUILD">
<status>FAIL</status>
Expand Down
92 changes: 86 additions & 6 deletions python/ctsm/crop_calendars/generate_gdds.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
from ctsm.ctsm_logging import log, error # pylint: disable=wrong-import-position
import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position
import ctsm.crop_calendars.generate_gdds_functions as gddfn # pylint: disable=wrong-import-position
from ctsm.crop_calendars.import_ds import ( # pylint: disable=wrong-import-position
get_files_in_time_slice, # pylint: disable=wrong-import-position
) # pylint: disable=wrong-import-position

# Functions here were written with too many positional arguments. At some point that should be
# fixed. For now, we'll just disable the warning.
Expand All @@ -42,6 +45,72 @@ def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfil
return mxmats


def _get_history_yr_range(first_season, last_season):
"""
Get a range object that can be used for looping over all years we need to process timestamps
from.
"""
# Saving at the end of a year receive the timestamp of the END of the year's final timestep,
# which means it will actually be 00:00 of Jan. 1 of the next year.
first_history_yr = first_season + 1

# Same deal for the last history timestep, but we have to read an extra year in that case,
# because in some places the last growing season won't complete until the year after it was
# planted.
last_history_yr = last_season + 2

# last_history_yr + 1 because range() will iterate up to but not including the second value.
history_yr_range = range(first_history_yr, last_history_yr + 1)

return history_yr_range


def _get_time_slice_list(first_season, last_season):
"""
Given the requested first and last seasons, get the list of time slices that the script should
look for. The assumptions here, as in import_and_process_1yr and as instructed in the docs, are
that the user (a) is saving instantaneous annual files and (b) started on Jan. 1.
"""

# Input checks
if not all(isinstance(i, int) for i in [first_season, last_season]):
raise TypeError("_get_time_slice_list() arguments must be integers")
if first_season > last_season:
raise ValueError(f"first_season ({first_season}) > last_season ({last_season})")

slice_list = []
for history_yr in _get_history_yr_range(first_season, last_season):
slice_start = f"{history_yr}-01-01"
# Stop could probably be the same as start, since there should just be one value saved per
# year and that should get the Jan. 1 timestamp.
slice_stop = f"{history_yr}-12-31"
slice_list.append(slice(slice_start, slice_stop))

# We should be reading one more than the total number of years in [first_season, last_season].
assert len(slice_list) == last_season - first_season + 2

return slice_list


def _get_file_lists(input_dir, time_slice_list, logger):
"""
For each time slice in a list, find the file(s) that need to be read to get all history
timesteps in the slice. Returns both h1i and h2i file lists.
"""
output_file_lists_list = [None, None]
for i, h in enumerate([1, 2]):
all_h_files = gddfn.find_inst_hist_files(input_dir, h=h, logger=logger)
h_file_lists = []
for time_slice in time_slice_list:
try:
h_file_lists.append(get_files_in_time_slice(all_h_files, time_slice, logger=logger))
except FileNotFoundError as e:
raise FileNotFoundError(f"No h{h} timesteps found in {time_slice}") from e
output_file_lists_list[i] = h_file_lists
h1_file_lists, h2_file_lists = tuple(output_file_lists_list)
return h1_file_lists, h2_file_lists


def main(
*,
input_dir=None,
Expand Down Expand Up @@ -126,6 +195,9 @@ def main(
+ "(years are +1 because of CTSM output naming)",
)

# This script uses pickle to save work in progress. In case of interruption, when the script
# is resumed, it will look for a pickle file. It will resume from the year after
# pickle_year, which is the last processed year in the pickle file.
pickle_file = os.path.join(output_dir, f"{first_season}-{last_season}.pickle")
h2_ds_file = os.path.join(output_dir, f"{first_season}-{last_season}.h2_ds.nc")
if os.path.exists(pickle_file) and not no_pickle:
Expand Down Expand Up @@ -162,10 +234,20 @@ def main(
max_season_length_from_hdates_file, paramfile, max_season_length_cushion
)

h1_instantaneous = None
for yr_index, this_yr in enumerate(np.arange(first_season + 1, last_season + 3)):
# Get lists of history timesteps and files to read
time_slice_list = _get_time_slice_list(first_season, last_season)
h1_file_lists, h2_file_lists = _get_file_lists(input_dir, time_slice_list, logger)

for yr_index, this_yr in enumerate(_get_history_yr_range(first_season, last_season)):
# If resuming from a pickled file, we continue until we reach a year that hasn't yet
# been processed.
if this_yr <= pickle_year:
Comment thread
samsrabin marked this conversation as resolved.
continue
log(logger, f"netCDF year {this_yr}...")

# Get h1 and h2 files to read for this year
h1_file_list = h1_file_lists[yr_index] # pylint: disable=unsubscriptable-object
h2_file_list = h2_file_lists[yr_index] # pylint: disable=unsubscriptable-object

(
h2_ds,
Expand All @@ -179,28 +261,26 @@ def main(
incl_vegtypes_str,
incl_patches1d_itype_veg,
mxsowings,
h1_instantaneous,
) = gddfn.import_and_process_1yr(
first_season,
last_season,
yr_index,
this_yr,
sdates_rx,
hdates_rx,
gddaccum_yp_list,
gddharv_yp_list,
skip_patches_for_isel_nan_lastyear,
lastyear_active_patch_indices_list,
incorrectly_daily,
input_dir,
incl_vegtypes_str,
h2_ds_file,
mxmats,
cc.get_gs_len_da,
skip_crops,
outdir_figs,
logger,
h1_instantaneous,
h1_file_list,
h2_file_list,
)

log(logger, f" Saving pickle file ({pickle_file})...")
Expand Down
108 changes: 75 additions & 33 deletions python/ctsm/crop_calendars/generate_gdds_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import numpy as np
import xarray as xr

from ctsm.utils import is_instantaneous
from ctsm.ctsm_logging import log, error
import ctsm.crop_calendars.cropcal_utils as utils
import ctsm.crop_calendars.cropcal_module as cc
Expand Down Expand Up @@ -290,64 +289,45 @@ def import_and_process_1yr(
year_1,
year_n,
year_index,
this_year,
sdates_rx,
hdates_rx,
gddaccum_yp_list,
gddharv_yp_list,
skip_patches_for_isel_nan_last_year,
last_year_active_patch_indices_list,
incorrectly_daily,
indir,
incl_vegtypes_str_in,
h2_ds_file,
mxmats,
get_gs_len_da,
skip_crops,
outdir_figs,
logger,
h1_instantaneous,
h1_filelist,
h2_filelist,
):
"""
Import one year of CLM output data for GDD generation
"""
save_figs = True
log(logger, f"netCDF year {this_year}...")

# Without dask, this can take a LONG time at resolutions finer than 2-deg
if importlib_util.find_spec("dask"):
chunks = {"time": 1}
else:
chunks = None

# Get h1 file (list)
h1_pattern = os.path.join(indir, "*h1i.*.nc")
h1_filelist = glob.glob(h1_pattern)
if not h1_filelist:
h1_pattern = os.path.join(indir, "*h1i.*.nc.base")
h1_filelist = glob.glob(h1_pattern)
if not h1_filelist:
error(logger, "No files found matching pattern '*h1i.*.nc(.base)'")

# Get list of crops to include
if skip_crops is not None:
crops_to_read = [c for c in utils.define_mgdcrop_list_withgrasses() if c not in skip_crops]
else:
crops_to_read = utils.define_mgdcrop_list_withgrasses()

# Are h1 files instantaneous?
if h1_instantaneous is None:
h1_instantaneous = is_instantaneous(xr.open_dataset(h1_filelist[0])["time"])

if h1_instantaneous:
slice_year = this_year
else:
slice_year = this_year - 1
# Read h1 file(s)
dates_ds = import_ds(
h1_filelist,
my_vars=["SDATES", "HDATES"],
my_vegtypes=crops_to_read,
time_slice=slice(f"{slice_year}-01-01", f"{slice_year}-12-31"),
chunks=chunks,
logger=logger,
)
Expand Down Expand Up @@ -631,16 +611,8 @@ def import_and_process_1yr(
log(logger, " Importing accumulated GDDs...")
clm_gdd_var = "GDDACCUM"
my_vars = [clm_gdd_var, "GDDHARV"]
patterns = [f"*h2i.{this_year-1}-01*.nc", f"*h2i.{this_year-1}-01*.nc.base"]
for pat in patterns:
pattern = os.path.join(indir, pat)
h2_files = glob.glob(pattern)
if h2_files:
break
if not h2_files:
error(logger, f"No files found matching patterns: {patterns}")
h2_ds = import_ds(
h2_files,
h2_filelist,
my_vars=my_vars,
my_vegtypes=crops_to_read,
chunks=chunks,
Expand Down Expand Up @@ -892,10 +864,80 @@ def import_and_process_1yr(
incl_vegtypes_str,
incl_patches1d_itype_veg,
mxsowings,
h1_instantaneous,
)


def find_inst_hist_files(indir, *, h, this_year=None, logger=None):
"""
Find all the instantaneous history files for a given tape number, optionally looking just for
one year in filename.

Args:
indir: Directory to search for history files
h: History tape number (must be an integer, e.g., 1 for h1, 2 for h2)
this_year: Optional year to filter files by. If provided, only files with dates starting
with "{this_year}-01" will be returned. If None, all files matching the
history tape number will be returned.
logger: Optional logger for error messages. If None, errors are raised without logging.

Returns:
List of file paths matching the search criteria

Raises:
TypeError: If h is not an integer
FileNotFoundError: If no files matching the patterns are found
RuntimeError: If files from multiple case names are found (indicates mixed output from
different simulations, which is pathological)

Notes:
- Searches for files matching patterns: "*h{h}i.*.nc" or "*h{h}i.*.nc.base"
- When this_year is specified, searches for: "*h{h}i.{this_year}-01*.nc" or
"*h{h}i.{this_year}-01*.nc.base"
- Prefers .nc files over .nc.base files (searches .nc pattern first)
- All returned files must be from the same case name (extracted from filename before
".clm2.h#i.")
"""
if this_year is None:
patterns = [f"*h{h}i.*.nc", f"*h{h}i.*.nc.base"]
else:
if not isinstance(h, int):
err_msg = f"h ({h}) must be an integer, not {type(h)}"
err_type = TypeError
if logger:
error(logger, err_msg, error_type=err_type)
raise err_type(err_msg)
patterns = [f"*h{h}i.{this_year}-01*.nc", f"*h{h}i.{this_year}-01*.nc.base"]
for pat in patterns:
pattern = os.path.join(indir, pat)
file_list = glob.glob(pattern)
if file_list:
break
if not file_list:
err_msg = f"No files found matching patterns: {patterns}"
err_type = FileNotFoundError
if logger:
error(logger, err_msg, error_type=err_type)
raise err_type(err_msg)

# Error if files found from multiple cases
case_names = set()
for file in file_list:
basename = os.path.basename(file)
# Extract case name (everything before .clm2.h#i.)
parts = basename.split(".clm2.")
if len(parts) > 1:
case_name = parts[0]
case_names.add(case_name)
if len(case_names) > 1:
err_msg = f"Found files from multiple case names: {sorted(case_names)}"
err_type = RuntimeError
if logger:
error(logger, err_msg, error_type=err_type)
raise err_type(err_msg)

return file_list


def get_multicrop_maps(this_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units):
# pylint: disable=missing-function-docstring
# Get GDDs for these crops
Expand Down
Loading
Loading