Skip to content

Commit 3bcee20

Browse files
committed
bug fix for esoteric edge case with missing leap day in source data
1 parent 8085a43 commit 3bcee20

File tree

3 files changed

+84
-20
lines changed

3 files changed

+84
-20
lines changed

sup3r/bias/bias_transforms.py

+16-11
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@
1414

1515
import dask.array as da
1616
import numpy as np
17-
import pandas as pd
1817
from rex.utilities.bc_utils import QuantileDeltaMapping
1918
from scipy.ndimage import gaussian_filter
2019

2120
from sup3r.preprocessing import Rasterizer
21+
from sup3r.preprocessing.utilities import make_time_index_from_kws
2222

2323
logger = logging.getLogger(__name__)
2424

@@ -402,7 +402,7 @@ def monthly_local_linear_bc(
402402
out : np.ndarray
403403
out = data * scalar + adder
404404
"""
405-
time_index = pd.date_range(**date_range_kwargs)
405+
time_index = make_time_index_from_kws(date_range_kwargs)
406406
out = _get_spatial_bc_factors(lat_lon, feature_name, bias_fp)
407407
scalar, adder = out['scalar'], out['adder']
408408

@@ -589,10 +589,13 @@ def local_qdm_bc(
589589
590590
"""
591591
# Confirm that the given time matches the expected data size
592-
time_index = pd.date_range(**date_range_kwargs)
593-
assert (
594-
data.shape[2] == time_index.size
595-
), 'Time should align with data 3rd dimension'
592+
msg = f'data was expected to be a 3D array but got shape {data.shape}'
593+
assert data.ndim == 3, msg
594+
time_index = make_time_index_from_kws(date_range_kwargs)
595+
msg = (f'Time should align with data 3rd dimension but got data '
596+
f'{data.shape} and time_index length '
597+
f'{time_index.size}: {time_index}')
598+
assert data.shape[-1] == time_index.size, msg
596599

597600
params = _get_spatial_bc_quantiles(
598601
lat_lon=lat_lon,
@@ -1031,11 +1034,13 @@ def local_presrat_bc(data: np.ndarray,
10311034
max_workers : int | None
10321035
Max number of workers to use for QDM process pool
10331036
"""
1034-
time_index = pd.date_range(**date_range_kwargs)
1035-
assert data.ndim == 3, 'data was expected to be a 3D array'
1036-
assert (
1037-
data.shape[-1] == time_index.size
1038-
), 'The last dimension of data should be time'
1037+
time_index = make_time_index_from_kws(date_range_kwargs)
1038+
msg = f'data was expected to be a 3D array but got shape {data.shape}'
1039+
assert data.ndim == 3, msg
1040+
msg = (f'Time should align with data 3rd dimension but got data '
1041+
f'{data.shape} and time_index length '
1042+
f'{time_index.size}: {time_index}')
1043+
assert data.shape[-1] == time_index.size, msg
10391044

10401045
params = _get_spatial_bc_presrat(
10411046
lat_lon, base_dset, feature_name, bias_fp, threshold

sup3r/bias/utilities.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,19 @@ def bias_correct_features(
268268

269269
time_slice = _parse_time_slice(time_slice)
270270
for feat in features:
271-
input_handler[feat][..., time_slice] = bias_correct_feature(
272-
source_feature=feat,
273-
input_handler=input_handler,
274-
time_slice=time_slice,
275-
bc_method=bc_method,
276-
bc_kwargs=bc_kwargs,
277-
)
271+
try:
272+
input_handler[feat][..., time_slice] = bias_correct_feature(
273+
source_feature=feat,
274+
input_handler=input_handler,
275+
time_slice=time_slice,
276+
bc_method=bc_method,
277+
bc_kwargs=bc_kwargs,
278+
)
279+
except Exception as e:
280+
msg = (f'Could not run bias correction method {bc_method} on '
281+
f'feature {feat} time slice {time_slice} with input '
282+
f'handler of class {type(input_handler)} with shape '
283+
f'{input_handler.shape}. Received error: {e}')
284+
logger.exception(msg)
285+
raise RuntimeError(msg) from e
278286
return input_handler

sup3r/preprocessing/utilities.py

+53-2
Original file line numberDiff line numberDiff line change
@@ -128,18 +128,69 @@ def wrapper(self, *args, **kwargs):
128128
def get_date_range_kwargs(time_index):
129129
"""Get kwargs for pd.date_range from a DatetimeIndex. This is used to
130130
provide a concise time_index representation which can be passed through
131-
the cli and avoid logging lengthly time indices."""
131+
the cli and avoid logging lengthly time indices.
132+
133+
Parameters
134+
----------
135+
time_index : pd.DatetimeIndex
136+
Output time index.
137+
138+
Returns
139+
-------
140+
kwargs : dict
141+
Dictionary to pass to pd.date_range(). Can also include kwarg
142+
``drop_leap``
143+
"""
132144
freq = (
133145
f'{(time_index[-1] - time_index[0]).total_seconds() / 60}min'
134146
if len(time_index) == 2
135147
else pd.infer_freq(time_index)
136148
)
137-
return {
149+
150+
kwargs = {
138151
'start': time_index[0].strftime('%Y-%m-%d %H:%M:%S'),
139152
'end': time_index[-1].strftime('%Y-%m-%d %H:%M:%S'),
140153
'freq': freq,
141154
}
142155

156+
nominal_ti = pd.date_range(**kwargs)
157+
uneven_freq = len(time_index.diff()[1:].unique()) > 1
158+
159+
if uneven_freq and len(nominal_ti) > len(time_index):
160+
kwargs['drop_leap'] = True
161+
162+
elif uneven_freq:
163+
msg = (f'Got uneven frequency for time index: {time_index}')
164+
warn(msg)
165+
logger.warning(msg)
166+
167+
return kwargs
168+
169+
170+
def make_time_index_from_kws(date_range_kwargs):
171+
"""Function to make a pandas DatetimeIndex from the
172+
``get_date_range_kwargs`` outputs
173+
174+
Parameters
175+
----------
176+
date_range_kwargs : dict
177+
Dictionary to pass to pd.date_range(), typically produced from
178+
``get_date_range_kwargs()``. Can also include kwarg ``drop_leap``
179+
180+
Returns
181+
-------
182+
time_index : pd.DatetimeIndex
183+
Output time index.
184+
"""
185+
drop_leap = date_range_kwargs.pop('drop_leap', False)
186+
time_index = pd.date_range(**date_range_kwargs)
187+
188+
if drop_leap:
189+
leap_mask = (time_index.month == 2) & (time_index.day == 29)
190+
time_index = time_index[~leap_mask]
191+
192+
return time_index
193+
143194

144195
def _compute_chunks_if_dask(arr):
145196
return (

0 commit comments

Comments
 (0)