Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ Indexing
- Bug in :meth:`Index.sort_values` where, when empty values were passed, the method would break by trying to compare missing values instead of pushing them to the end of the sort order. (:issue:`35584`)
- Bug in :meth:`Index.get_indexer` and :meth:`Index.get_indexer_non_unique` where int64 arrays are returned instead of intp. (:issue:`36359`)
- Bug in :meth:`DataFrame.sort_index` where parameter ascending passed as a list on a single level index gives wrong result. (:issue:`32334`)
- Bug in :meth:`DataFrame.reset_index` was incorrectly raising a ``ValueError`` for input with a :class:`MultiIndex` with missing values in a level with ``Categorical`` dtype (:issue:`24206`)

Missing
^^^^^^^
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,10 @@ def maybe_casted_values(index, codes=None):
values = values._data # TODO: can we de-kludge yet?

if mask.any():
values, _ = maybe_upcast_putmask(values, mask, np.nan)
if isinstance(values, np.ndarray):
values, _ = maybe_upcast_putmask(values, mask, np.nan)
else:
values[mask] = np.nan

if issubclass(values_type, DatetimeLikeArrayMixin):
values = values_type(values, dtype=values_dtype)
Expand Down
61 changes: 46 additions & 15 deletions pandas/tests/frame/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@

from pandas import (
Categorical,
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
IntervalIndex,
MultiIndex,
Series,
Timestamp,
cut,
Expand Down Expand Up @@ -171,21 +173,6 @@ def test_assign_columns(self, float_frame):
tm.assert_series_equal(float_frame["C"], df["baz"], check_names=False)
tm.assert_series_equal(float_frame["hi"], df["foo2"], check_names=False)

def test_set_index_preserve_categorical_dtype(self):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved this to sit with the only other categorical test (the one I'm adding) in TestCategoricalIndex class

# GH13743, GH13854
df = DataFrame(
{
"A": [1, 2, 1, 1, 2],
"B": [10, 16, 22, 28, 34],
"C1": Categorical(list("abaab"), categories=list("bac"), ordered=False),
"C2": Categorical(list("abaab"), categories=list("bac"), ordered=True),
}
)
for cols in ["C1", "C2", ["A", "C1"], ["A", "C2"], ["C1", "C2"]]:
result = df.set_index(cols).reset_index()
result = result.reindex(columns=df.columns)
tm.assert_frame_equal(result, df)

def test_rename_signature(self):
sig = inspect.signature(DataFrame.rename)
parameters = set(sig.parameters)
Expand Down Expand Up @@ -266,3 +253,47 @@ def test_set_reset_index(self):
df = df.set_index("B")

df = df.reset_index()


class TestCategoricalIndex:
def test_set_index_preserve_categorical_dtype(self):
# GH13743, GH13854
df = DataFrame(
{
"A": [1, 2, 1, 1, 2],
"B": [10, 16, 22, 28, 34],
"C1": Categorical(list("abaab"), categories=list("bac"), ordered=False),
"C2": Categorical(list("abaab"), categories=list("bac"), ordered=True),
}
)
for cols in ["C1", "C2", ["A", "C1"], ["A", "C2"], ["C1", "C2"]]:
result = df.set_index(cols).reset_index()
result = result.reindex(columns=df.columns)
tm.assert_frame_equal(result, df)

@pytest.mark.parametrize(
"codes", ([[0, 0, 1, 1], [0, 1, 0, 1]], [[0, 0, -1, 1], [0, 1, 0, 1]])
)
def test_reindexing_with_missing_values(self, codes):
# GH 24206

index = MultiIndex(
[CategoricalIndex(["A", "B"]), CategoricalIndex(["a", "b"])], codes
)
data = {"col": range(len(index))}
df = DataFrame(data=data, index=index)

expected = DataFrame(
{
"level_0": Categorical.from_codes(codes[0], categories=["A", "B"]),
"level_1": Categorical.from_codes(codes[1], categories=["a", "b"]),
"col": range(4),
}
)

res = df.reset_index()
tm.assert_frame_equal(res, expected)

# roundtrip
res = expected.set_index(["level_0", "level_1"]).reset_index()
tm.assert_frame_equal(res, expected)