Skip to content

Commit 8de7061

Browse files
Fix overflow issue in decode_cf_datetime for dtypes <= np.uint32 (pydata#6598)
1 parent e02b1c3 commit 8de7061

File tree

3 files changed

+36
-3
lines changed

3 files changed

+36
-3
lines changed

doc/whats-new.rst

+3
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ Bug fixes
126126
- :py:meth:`isel` with `drop=True` works as intended with scalar :py:class:`DataArray` indexers.
127127
(:issue:`6554`, :pull:`6579`)
128128
By `Michael Niklas <https://github.com/headtr1ck>`_.
129+
- Fixed silent overflow issue when decoding times encoded with 32-bit and below
130+
unsigned integer data types (:issue:`6589`, :pull:`6598`). By `Spencer Clark
131+
<https://github.com/spencerkclark>`_.
129132

130133
Documentation
131134
~~~~~~~~~~~~~

xarray/coding/times.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,12 @@ def _decode_datetime_with_pandas(flat_num_dates, units, calendar):
218218
pd.to_timedelta(flat_num_dates.max(), delta) + ref_date
219219

220220
# To avoid integer overflow when converting to nanosecond units for integer
221-
# dtypes smaller than np.int64 cast all integer-dtype arrays to np.int64
222-
# (GH 2002).
223-
if flat_num_dates.dtype.kind == "i":
221+
# dtypes smaller than np.int64 cast all integer and unsigned integer dtype
222+
# arrays to np.int64 (GH 2002, GH 6589). Note this is safe even in the case
223+
# of np.uint64 values, because any np.uint64 value that would lead to
224+
# overflow when converting to np.int64 would not be representable with a
225+
# timedelta64 value, and therefore would raise an error in the lines above.
226+
if flat_num_dates.dtype.kind in "iu":
224227
flat_num_dates = flat_num_dates.astype(np.int64)
225228

226229
# Cast input ordinals to integers of nanoseconds because pd.to_timedelta

xarray/tests/test_coding_times.py

+27
Original file line numberDiff line numberDiff line change
@@ -1121,3 +1121,30 @@ def test_should_cftime_be_used_target_not_npable():
11211121
ValueError, match="Calendar 'noleap' is only valid with cftime."
11221122
):
11231123
_should_cftime_be_used(src, "noleap", False)
1124+
1125+
1126+
@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64])
1127+
def test_decode_cf_datetime_uint(dtype):
1128+
units = "seconds since 2018-08-22T03:23:03Z"
1129+
num_dates = dtype(50)
1130+
result = decode_cf_datetime(num_dates, units)
1131+
expected = np.asarray(np.datetime64("2018-08-22T03:23:53", "ns"))
1132+
np.testing.assert_equal(result, expected)
1133+
1134+
1135+
@requires_cftime
1136+
def test_decode_cf_datetime_uint64_with_cftime():
1137+
units = "days since 1700-01-01"
1138+
num_dates = np.uint64(182621)
1139+
result = decode_cf_datetime(num_dates, units)
1140+
expected = np.asarray(np.datetime64("2200-01-01", "ns"))
1141+
np.testing.assert_equal(result, expected)
1142+
1143+
1144+
@requires_cftime
1145+
def test_decode_cf_datetime_uint64_with_cftime_overflow_error():
1146+
units = "microseconds since 1700-01-01"
1147+
calendar = "360_day"
1148+
num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000)
1149+
with pytest.raises(OverflowError):
1150+
decode_cf_datetime(num_dates, units, calendar)

0 commit comments

Comments
 (0)