Skip to content

Commit 89e902d

Browse files
committed
bug fix for esoteric edge case with missing leap day in source data
1 parent 8085a43 commit 89e902d

File tree

3 files changed

+84
-19
lines changed

3 files changed

+84
-19
lines changed

sup3r/bias/bias_transforms.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from scipy.ndimage import gaussian_filter
2020

2121
from sup3r.preprocessing import Rasterizer
22+
from sup3r.preprocessing.utilities import make_time_index_from_kws
2223

2324
logger = logging.getLogger(__name__)
2425

@@ -402,7 +403,7 @@ def monthly_local_linear_bc(
402403
out : np.ndarray
403404
out = data * scalar + adder
404405
"""
405-
time_index = pd.date_range(**date_range_kwargs)
406+
time_index = make_time_index_from_kws(date_range_kwargs)
406407
out = _get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
407408
scalar, adder = out['scalar'], out['adder']
408409

@@ -589,10 +590,13 @@ def local_qdm_bc(
589590
590591
"""
591592
# Confirm that the given time matches the expected data size
592-
time_index = pd.date_range(**date_range_kwargs)
593-
assert (
594-
data.shape[2] == time_index.size
595-
), 'Time should align with data 3rd dimension'
593+
msg = f'data was expected to be a 3D array but got shape {data.shape}'
594+
assert data.ndim == 3, msg
595+
time_index = make_time_index_from_kws(date_range_kwargs)
596+
msg = (f'Time should align with data 3rd dimension but got data '
597+
f'{data.shape} and time_index length '
598+
f'{time_index.size}: {time_index}')
599+
assert data.shape[-1] == time_index.size, msg
596600

597601
params = _get_spatial_bc_quantiles(
598602
lat_lon=lat_lon,
@@ -1031,11 +1035,13 @@ def local_presrat_bc(data: np.ndarray,
10311035
max_workers : int | None
10321036
Max number of workers to use for QDM process pool
10331037
"""
1034-
time_index = pd.date_range(**date_range_kwargs)
1035-
assert data.ndim == 3, 'data was expected to be a 3D array'
1036-
assert (
1037-
data.shape[-1] == time_index.size
1038-
), 'The last dimension of data should be time'
1038+
time_index = make_time_index_from_kws(date_range_kwargs)
1039+
msg = f'data was expected to be a 3D array but got shape {data.shape}'
1040+
assert data.ndim == 3, msg
1041+
msg = (f'Time should align with data 3rd dimension but got data '
1042+
f'{data.shape} and time_index length '
1043+
f'{time_index.size}: {time_index}')
1044+
assert data.shape[-1] == time_index.size, msg
10391045

10401046
params = _get_spatial_bc_presrat(
10411047
lat_lon, base_dset, feature_name, bias_fp, threshold

sup3r/bias/utilities.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,19 @@ def bias_correct_features(
268268

269269
time_slice = _parse_time_slice(time_slice)
270270
for feat in features:
271-
input_handler[feat][..., time_slice] = bias_correct_feature(
272-
source_feature=feat,
273-
input_handler=input_handler,
274-
time_slice=time_slice,
275-
bc_method=bc_method,
276-
bc_kwargs=bc_kwargs,
277-
)
271+
try:
272+
input_handler[feat][..., time_slice] = bias_correct_feature(
273+
source_feature=feat,
274+
input_handler=input_handler,
275+
time_slice=time_slice,
276+
bc_method=bc_method,
277+
bc_kwargs=bc_kwargs,
278+
)
279+
except Exception as e:
280+
msg = (f'Could not run bias correction method {bc_method} on '
281+
f'feature {feat} time slice {time_slice} with input '
282+
f'handler of class {type(input_handler)} with shape '
283+
f'{input_handler.shape}. Received error: {e}')
284+
logger.exception(msg)
285+
raise RuntimeError(msg) from e
278286
return input_handler

sup3r/preprocessing/utilities.py

+53-2
Original file line numberDiff line numberDiff line change
@@ -128,18 +128,69 @@ def wrapper(self, *args, **kwargs):
128128
def get_date_range_kwargs(time_index):
129129
"""Get kwargs for pd.date_range from a DatetimeIndex. This is used to
130130
provide a concise time_index representation which can be passed through
131-
the cli and avoid logging lengthly time indices."""
131+
the cli and avoid logging lengthly time indices.
132+
133+
Parameters
134+
----------
135+
time_index : pd.DatetimeIndex
136+
Output time index.
137+
138+
Returns
139+
-------
140+
kwargs : dict
141+
Dictionary to pass to pd.date_range(). Can also include kwarg
142+
``drop_leap``
143+
"""
132144
freq = (
133145
f'{(time_index[-1] - time_index[0]).total_seconds() / 60}min'
134146
if len(time_index) == 2
135147
else pd.infer_freq(time_index)
136148
)
137-
return {
149+
150+
kwargs = {
138151
'start': time_index[0].strftime('%Y-%m-%d %H:%M:%S'),
139152
'end': time_index[-1].strftime('%Y-%m-%d %H:%M:%S'),
140153
'freq': freq,
141154
}
142155

156+
nominal_ti = pd.date_range(**kwargs)
157+
uneven_freq = len(time_index.diff()[1:].unique()) > 1
158+
159+
if uneven_freq and len(nominal_ti) > len(time_index):
160+
kwargs['drop_leap'] = True
161+
162+
elif uneven_freq:
163+
msg = (f'Got uneven frequency for time index: {time_index}')
164+
warn(msg)
165+
logger.warning(msg)
166+
167+
return kwargs
168+
169+
170+
def make_time_index_from_kws(date_range_kwargs):
171+
"""Function to make a pandas DatetimeIndex from the
172+
``get_date_range_kwargs`` outputs
173+
174+
Parameters
175+
----------
176+
date_range_kwargs : dict
177+
Dictionary to pass to pd.date_range(), typically produced from
178+
``get_date_range_kwargs()``. Can also include kwarg ``drop_leap``
179+
180+
Returns
181+
-------
182+
time_index : pd.DatetimeIndex
183+
Output time index.
184+
"""
185+
drop_leap = date_range_kwargs.pop('drop_leap', False)
186+
time_index = pd.date_range(**date_range_kwargs)
187+
188+
if drop_leap:
189+
leap_mask = (time_index.month == 2) & (time_index.day == 29)
190+
time_index = time_index[~leap_mask]
191+
192+
return time_index
193+
143194

144195
def _compute_chunks_if_dask(arr):
145196
return (

0 commit comments

Comments
 (0)