From 5fdc448bb2e990f9cb9c5e4723ef2d910a32aa37 Mon Sep 17 00:00:00 2001 From: Cjesse Date: Wed, 19 Nov 2025 10:17:01 -0500 Subject: [PATCH 1/2] update tests for datetime concat --- pandas/tests/reshape/concat/test_datetimes.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index b1cba7ee31eac..fbaea0d081df9 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -278,6 +278,31 @@ def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): result = concat([first, second]) tm.assert_frame_equal(result, expected) + def test_concat_ns_and_s_preserves_datetime64(self): + # ensure concatenating a datetime64[ns] column and a copy cast to M8[s] + # yields a datetime64 dtype (finest unit should be ns) + df = pd.DataFrame( + {"ints": range(2), "dates": pd.date_range("2000", periods=2, freq="min")} + ) + df2 = df.copy() + df2["dates"] = df2["dates"].astype("M8[s]") + + combined = pd.concat([df, df2], ignore_index=True) + + # dtype is a datetime64 type + assert pd.api.types.is_datetime64_any_dtype(combined["dates"].dtype) + + # unit should be the finest (ns) when mixing ns and s + unit = np.datetime_data(combined["dates"].dtype)[0] + assert unit == "ns" + + # values preserved (compare as ns) + exp = pd.to_datetime(list(df["dates"]) + list(df2["dates"])) + tm.assert_series_equal( + combined["dates"].astype("datetime64[ns]").reset_index(drop=True), + pd.Series(exp.astype("datetime64[ns]"), name="dates").reset_index(drop=True), + ) + class TestTimezoneConcat: def test_concat_tz_series(self): @@ -591,3 +616,25 @@ def test_concat_float_datetime64(): result = concat([df_time, df_float.iloc[:0]]) tm.assert_frame_equal(result, expected) + +@pytest.mark.parametrize("order", [[0, 1], [1, 0]]) +def test_concat_ns_and_s_order_invariance(order): + df = pd.DataFrame( + {"ints": range(2), "dates": pd.date_range("2000", periods=2, freq="min")} + ) + df2 = df.copy() + df2["dates"] = df2["dates"].astype("M8[s]") + + parts = [df, df2] + combined = pd.concat([parts[i] for i in order], ignore_index=True) + + assert pd.api.types.is_datetime64_any_dtype(combined["dates"].dtype) + + +def test_concat_ns_and_s_with_all_nat_and_empty(): + # mixing a ns datetime column with an all-NaT seconds-typed column + df = pd.DataFrame({"dates": pd.date_range("2000", periods=2, freq="min")}) + df2 = pd.DataFrame({"dates": [pd.NaT, pd.NaT]}).astype({"dates": "datetime64[s]"}) + + combined = pd.concat([df, df2], ignore_index=True) + assert pd.api.types.is_datetime64_any_dtype(combined["dates"].dtype) From 958577d2369e4df1a379bc612419410459e9f78d Mon Sep 17 00:00:00 2001 From: Cjesse Date: Tue, 25 Nov 2025 23:56:41 -0500 Subject: [PATCH 2/2] expand the tests --- pandas/tests/reshape/concat/test_datetimes.py | 56 ++++++++++++++----- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index fbaea0d081df9..2588f59268334 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -279,6 +279,7 @@ def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): tm.assert_frame_equal(result, expected) def test_concat_ns_and_s_preserves_datetime64(self): + # GH 53307 # ensure concatenating a datetime64[ns] column and a copy cast to M8[s] # yields a datetime64 dtype (finest unit should be ns) df = pd.DataFrame( @@ -617,24 +618,51 @@ def test_concat_float_datetime64(): result = concat([df_time, df_float.iloc[:0]]) tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("order", [[0, 1], [1, 0]]) -def test_concat_ns_and_s_order_invariance(order): - df = pd.DataFrame( - {"ints": range(2), "dates": pd.date_range("2000", periods=2, freq="min")} - ) + +@pytest.mark.parametrize( + "unit,unit2", + [(u1, u2) for u1 in ("ns", "us", "ms", "s") for u2 in ("ns", "us", "ms", "s")], +) +def test_concat_mixed_units_preserve_datetime_and_unit(unit, unit2): + # GH 53307 + # for each pair of units, concatenating columns of those units should + # result in a datetime64 dtype with the finest unit + df = pd.DataFrame({"dates": pd.to_datetime(["2000-01-01", "2000-01-02"])}) + # cast copies to requested unit + df1 = df.copy() + df1["dates"] = df1["dates"].astype(f"M8[{unit}]") df2 = df.copy() - df2["dates"] = df2["dates"].astype("M8[s]") + df2["dates"] = df2["dates"].astype(f"M8[{unit2}]") - parts = [df, df2] - combined = pd.concat([parts[i] for i in order], ignore_index=True) + exp_unit = tm.get_finest_unit(unit, unit2) - assert pd.api.types.is_datetime64_any_dtype(combined["dates"].dtype) + # test both concat orders + for a, b in ((df1, df2), (df2, df1)): + combined = pd.concat([a, b], ignore_index=True) + + assert pd.api.types.is_datetime64_any_dtype(combined["dates"].dtype) + + res_unit = np.datetime_data(combined["dates"].dtype)[0] + assert res_unit == exp_unit + + +@pytest.mark.parametrize( + "unit,unit2", + [(u1, u2) for u1 in ("ns", "us", "ms", "s") for u2 in ("ns", "us", "ms", "s")], +) +def test_concat_mixed_units_with_all_nat(unit, unit2): + # GH 53307 + # mixing non-empty datetime column and an all-NaT column typed to unit2 + df = pd.DataFrame({"dates": pd.to_datetime(["2000-01-01"])}) + df1 = df.copy() + df1["dates"] = df1["dates"].astype(f"M8[{unit}]") + ser_nat = pd.Series([pd.NaT], dtype=f"datetime64[{unit2}]") + df2 = pd.DataFrame({"dates": ser_nat}) -def test_concat_ns_and_s_with_all_nat_and_empty(): - # mixing a ns datetime column with an all-NaT seconds-typed column - df = pd.DataFrame({"dates": pd.date_range("2000", periods=2, freq="min")}) - df2 = pd.DataFrame({"dates": [pd.NaT, pd.NaT]}).astype({"dates": "datetime64[s]"}) + exp_unit = tm.get_finest_unit(unit, unit2) - combined = pd.concat([df, df2], ignore_index=True) + combined = pd.concat([df1, df2], ignore_index=True) assert pd.api.types.is_datetime64_any_dtype(combined["dates"].dtype) + res_unit = np.datetime_data(combined["dates"].dtype)[0] + assert res_unit == exp_unit