Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ Renamed the following offset aliases (:issue:`57986`):

Other Removals
^^^^^^^^^^^^^^
- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`)
- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when a group has all NA values, or when used with ``skipna=False`` and any NA value is encountered (:issue:`10694`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does 10694 cover both of these cases?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No - added the linked issue in the OP. Thanks!

- :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`)
- :func:`concat` with all-NA entries no longer ignores the dtype of those entries when determining the result dtype (:issue:`40893`)
- :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
Expand Down
5 changes: 2 additions & 3 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2048,9 +2048,8 @@ def group_idxmin_idxmax(
group_min_or_max = np.empty_like(out, dtype=values.dtype)
seen = np.zeros_like(out, dtype=np.uint8)

# When using transform, we need a valid value for take in the case
# a category is not observed; these values will be dropped
out[:] = 0
# Sentinel for no valid values.
out[:] = -1

with nogil(numeric_object_t is not object):
for i in range(N):
Expand Down
40 changes: 36 additions & 4 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1404,7 +1404,15 @@ def idxmin(self, skipna: bool = True) -> Series:
Raises
------
ValueError
If the Series is empty or skipna=False and any value is NA.
When there are no valid values for a group. Then can happen if:

* There is an unobserved group and ``observed=False``.
* All values for a group are NA.
* Some values for a group are NA and ``skipna=False``.

.. versionchanged:: 3.0.0
Previously if all values for a group are NA or some values for a group are
NA and ``skipna=False``, this method would return NA. Now it raises instead.

See Also
--------
Expand Down Expand Up @@ -1457,7 +1465,15 @@ def idxmax(self, skipna: bool = True) -> Series:
Raises
------
ValueError
If the Series is empty or skipna=False and any value is NA.
When there are no valid values for a group. Then can happen if:

* There is an unobserved group and ``observed=False``.
* All values for a group are NA.
* Some values for a group are NA and ``skipna=False``.

.. versionchanged:: 3.0.0
Previously if all values for a group are NA or some values for a group are
NA and ``skipna=False``, this method would return NA. Now it raises instead.

See Also
--------
Expand Down Expand Up @@ -2597,7 +2613,15 @@ def idxmax(
Raises
------
ValueError
* If a column is empty or skipna=False and any value is NA.
When there are no valid values for a group. Then can happen if:

* There is an unobserved group and ``observed=False``.
* All values for a group are NA.
* Some values for a group are NA and ``skipna=False``.

.. versionchanged:: 3.0.0
Previously if all values for a group are NA or some values for a group are
NA and ``skipna=False``, this method would return NA. Now it raises instead.

See Also
--------
Expand Down Expand Up @@ -2663,7 +2687,15 @@ def idxmin(
Raises
------
ValueError
* If a column is empty or skipna=False and any value is NA.
When there are no valid values for a group. Then can happen if:

* There is an unobserved group and ``observed=False``.
* All values for a group are NA.
* Some values for a group are NA and ``skipna=False``.

.. versionchanged:: 3.0.0
Previously if all values for a group are NA or some values for a group are
NA and ``skipna=False``, this method would return NA. Now it raises instead.

See Also
--------
Expand Down
12 changes: 10 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1784,7 +1784,8 @@ def array_func(values: ArrayLike) -> ArrayLike:
new_mgr = data.grouped_reduce(array_func)
res = self._wrap_agged_manager(new_mgr)
if how in ["idxmin", "idxmax"]:
res = self._wrap_idxmax_idxmin(res)
# mypy expects how to be Literal["idxmin", "idxmax"].
res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"]) # type: ignore[arg-type]
out = self._wrap_aggregated_output(res)
return out

Expand Down Expand Up @@ -5715,10 +5716,17 @@ def _idxmax_idxmin(
)
return result

def _wrap_idxmax_idxmin(self, res: NDFrameT) -> NDFrameT:
def _wrap_idxmax_idxmin(
self, res: NDFrameT, how: Literal["idxmax", "idxmin"], skipna: bool
) -> NDFrameT:
index = self.obj.index
if res.size == 0:
result = res.astype(index.dtype)
elif skipna and res.lt(0).any(axis=None):
raise ValueError(
f"{type(self).__name__}.{how} with skipna=True encountered all NA "
f"values in a group."
)
else:
if isinstance(index, MultiIndex):
index = index.to_flat_index()
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,18 +286,22 @@ def __init__(
self._indexer: npt.NDArray[np.intp] | None = None

def _get_grouper(
self, obj: NDFrameT, validate: bool = True
self, obj: NDFrameT, validate: bool = True, observed: bool = True
) -> tuple[ops.BaseGrouper, NDFrameT]:
"""
Parameters
----------
obj : Series or DataFrame
Object being grouped.
validate : bool, default True
if True, validate the grouper
If True, validate the grouper.
observed : bool, default True
Whether only observed groups should be in the result. Only
has an impact when grouping on categorical data.

Returns
-------
a tuple of grouper, obj (possibly sorted)
A tuple of grouper, obj (possibly sorted)
"""
obj, _, _ = self._set_grouper(obj)
grouper, _, obj = get_grouper(
Expand All @@ -307,6 +311,7 @@ def _get_grouper(
sort=self.sort,
validate=validate,
dropna=self.dropna,
observed=observed,
)

return grouper, obj
Expand Down Expand Up @@ -787,7 +792,7 @@ def get_grouper(

# a passed-in Grouper, directly convert
if isinstance(key, Grouper):
grouper, obj = key._get_grouper(obj, validate=False)
grouper, obj = key._get_grouper(obj, validate=False, observed=observed)
if key.key is None:
return grouper, frozenset(), obj
else:
Expand Down
16 changes: 15 additions & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2305,8 +2305,22 @@ def _get_resampler(self, obj: NDFrame) -> Resampler:
)

def _get_grouper(
self, obj: NDFrameT, validate: bool = True
self, obj: NDFrameT, validate: bool = True, observed: bool = True
) -> tuple[BinGrouper, NDFrameT]:
"""
Parameters
----------
obj : Series or DataFrame
Object being grouped.
validate : bool, default True
Unused. Only for compatibility with ``Grouper._get_grouper``.
observed : bool, default True
Unused. Only for compatibility with ``Grouper._get_grouper``.

Returns
-------
A tuple of grouper, obj (possibly sorted)
"""
# create the resampler and return our binner
r = self._get_resampler(obj)
return r._grouper, cast(NDFrameT, r.obj)
Expand Down
15 changes: 7 additions & 8 deletions pandas/tests/groupby/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
max_value = np.finfo(float_numpy_dtype).max
df = DataFrame(
{
"a": Series(np.repeat(range(1, 6), repeats=2), dtype="intp"),
"a": Series(np.repeat(range(1, 5), repeats=2), dtype="intp"),
"b": Series(
[
np.nan,
Expand All @@ -283,8 +283,6 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
np.nan,
max_value,
np.nan,
np.nan,
np.nan,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this change is bc there's an all-NA group so itll now raise?

Copy link
Member Author

@rhshadrach rhshadrach Aug 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and the case of testing for raising is done elsewhere.

],
dtype=float_numpy_dtype,
),
Expand All @@ -299,7 +297,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
return
result = getattr(gb, how)(skipna=skipna)
expected = DataFrame(
{"b": [1, 3, 4, 6, np.nan]}, index=pd.Index(range(1, 6), name="a", dtype="intp")
{"b": [1, 3, 4, 6]}, index=pd.Index(range(1, 5), name="a", dtype="intp")
)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1003,8 +1001,6 @@ def test_string_dtype_all_na(
else:
expected_dtype = "int64"
expected_value = 1 if reduction_func == "size" else 0
elif reduction_func in ["idxmin", "idxmax"]:
expected_dtype, expected_value = "float64", np.nan
elif not skipna or min_count > 0:
expected_value = pd.NA
elif reduction_func == "sum":
Expand Down Expand Up @@ -1032,8 +1028,11 @@ def test_string_dtype_all_na(
with pytest.raises(TypeError, match=msg):
method(*args, **kwargs)
return
elif reduction_func in ["idxmin", "idxmax"] and not skipna:
msg = f"{reduction_func} with skipna=False encountered an NA value."
elif reduction_func in ["idxmin", "idxmax"]:
if skipna:
msg = f"{reduction_func} with skipna=True encountered all NA values"
else:
msg = f"{reduction_func} with skipna=False encountered an NA value."
with pytest.raises(ValueError, match=msg):
method(*args, **kwargs)
return
Expand Down
Loading