Skip to content
Open
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
e3d6970
FEAT: Add insert_level method to MultiIndex
Oct 7, 2025
45ac8ef
FEAT: Add insert_level method to MultiIndex
Oct 7, 2025
5b76304
FEAT: Add insert_level method to MultiIndex
Oct 7, 2025
5f0caf0
ENH: Add insert_level method to MultiIndex with formatting fixes
Oct 7, 2025
97a98e5
STYLE: Format code with ruff
Oct 7, 2025
1a9ddc5
FIX: Remove undefined pd reference
Oct 7, 2025
2199e6e
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 8, 2025
44985ad
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 10, 2025
9e8676d
Update pandas/core/indexes/multi.py
Chiwendaiyue Oct 12, 2025
094958d
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 14, 2025
77f3af8
DOC: Add whatsnew entry for MultiIndex.insert_level
Oct 14, 2025
7bf3067
TEST: Comprehensive consolidation of all test cases into parametrized…
Oct 15, 2025
00a346f
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 15, 2025
c4ecf7a
FIX: Remove accidental binary file and update whatsnew
Oct 15, 2025
8e0068a
FIX: Revert accidental changes to test_query_eval.py
Oct 15, 2025
87bd44b
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 19, 2025
79e04b6
PERF: Optimize MultiIndex.insert_level to avoid unnecessary type conv…
Oct 19, 2025
1447886
REF: Simplify tuple construction in MultiIndex.insert_level
Oct 19, 2025
e2917d0
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 20, 2025
471c2d6
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 21, 2025
e2334ac
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 22, 2025
eb320fb
Add API reference documentation & Implement insert_level using levels…
Oct 22, 2025
9f80ad1
all changes without levels/codes operations
Oct 22, 2025
a8d626a
add see also
Oct 23, 2025
37f12e6
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 23, 2025
fd93622
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 24, 2025
0aa81ec
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 26, 2025
66026af
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 28, 2025
10ecc1f
Change from_tuple to factorize_from_iterable
Oct 28, 2025
00d3bbb
Merge branch 'shiny-new-feature' of https://github.com/Chiwendaiyue/p…
Oct 28, 2025
4b2bb50
for type annotation and mypy fixes
Oct 28, 2025
d08eb28
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Oct 29, 2025
336c3e8
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Nov 2, 2025
e8bbd3a
Merge branch 'main' into shiny-new-feature
Chiwendaiyue Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ Other enhancements
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
- Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
- Added :meth:`MultiIndex.insert_level` to insert new levels at specified positions in a MultiIndex (:issue:`62558`)
- Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`)
- Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`)
- Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
Expand Down
54 changes: 54 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2710,6 +2710,60 @@ def reorder_levels(self, order) -> MultiIndex:
result = self._reorder_ilevels(order)
return result

def insert_level(self, position: int, value, name=None):
"""
Insert a new level at the specified position in the MultiIndex.

Parameters
----------
position : int
The position at which to insert the new level (0-based).
value : scalar or array-like
Value(s) to use for the new level. If scalar, broadcast to all items.
If array-like, length must match the length of the index.
name : object, optional
Name for the new level.

Returns
-------
MultiIndex
New MultiIndex with the inserted level.

Examples
--------
>>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)])
>>> idx.insert_level(0, "new_value")
MultiIndex([('new_value', 'A', 1), ('new_value', 'B', 2)], ...)

>>> idx.insert_level(1, ["X", "Y"])
MultiIndex([('A', 'X', 1), ('B', 'Y', 2)], ...)

>>> idx.insert_level(0, "new_val", name="new_level")
MultiIndex([('new_val', 'A', 1), ('new_val', 'B', 2)], ...)
"""
if not isinstance(position, int):
raise TypeError("position must be an integer")

if position < 0 or position > self.nlevels:
raise ValueError(f"position must be between 0 and {self.nlevels}")

if not hasattr(value, "__iter__") or isinstance(value, str):
value = [value] * len(self)
else:
value = list(value)
if len(value) != len(self):
raise ValueError("Length of values must match length of index")

new_tuples = []

for i, tup in enumerate(self):
new_tuple = tup[:position] + (value[i],) + tup[position:]
new_tuples.append(new_tuple)

new_names = self.names[:position] + [name] + self.names[position:]

return MultiIndex.from_tuples(new_tuples, names=new_names)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using from_tuples (which will be slow), can you calculate the level and codes of the new values (you can find examples of this in this file) and interpose that with the existing self.levels and self.codes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @mroeschke
I'm working on this and have a question about the optimal approach.
I started with a levels/codes based implementation:

def insert_level(self, position: int, value, name: Hashable = lib.no_default) -> MultiIndex:
    #...
    new_level = Index(value)
    new_codes_for_level = new_level.get_indexer(value)
    
    new_levels = self.levels[:position] + [new_level] + self.levels[position:]
    new_codes = self.codes[:position] + [new_codes_for_level] + self.codes[position:]
    new_names = self.names[:position] + [name] + self.names[position:]
    
    return MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False)

However, I'm encountering issues with None value handling where new_level.get_indexer(value) fails when the new level contains duplicate values (like [None, None, None]).Should I:Continue with levels/codes optimization and implement custom codes mapping to handle duplicates, similar to how from_arrays handles it internally?Or just use the simpler from_tuples approach that builds new tuples and delegates to the well-tested MultiIndex.from_tuples method?The from_tuples approach would be more reliable for edge cases but potentially less performant for large indices.
Thanks for your guidance!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you try using new_codes, new_level = factorize_from_iterable(value) calculate the new level and codes to insert?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image I tried `new_codes, new_level = factorize_from_iterable(value)`, but the behavior with ` all None` values can't match with the result of from_tuple So I test the actual behavior and there is the comparison **Implementation Level 1 inferred_type Codes** - from_tuples Index([], dtype='object') "empty" [-1, -1, -1] - the factorize_from_iterable approach Index([], dtype='float64') "floating" [-1, -1, -1] I tried to add an "if-else", so that all the tests can pass. ``` if all(val is None for val in value): new_level = Index([], dtype='object') new_codes = [-1] * len(value) else: new_codes, new_level = factorize_from_iterable(value) ``` I wonder if it's right. Looking forward to your guidance, thanks!


def _reorder_ilevels(self, order) -> MultiIndex:
if len(order) != self.nlevels:
raise AssertionError(
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexes/multi/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,3 +870,14 @@ def test_dtype_representation(using_infer_string):
dtype=object,
)
tm.assert_series_equal(result, expected)


def test_insert_level_integration():
idx = MultiIndex.from_tuples([("A", 1), ("B", 2)])

df = pd.DataFrame({"data": [10, 20]}, index=idx)
new_idx = idx.insert_level(0, "group1")
df_new = df.set_index(new_idx)

assert df_new.index.nlevels == 3
assert len(df_new) == 2
149 changes: 149 additions & 0 deletions pandas/tests/indexes/multi/test_insert_level.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import pytest

import pandas as pd
import pandas._testing as tm


class TestMultiIndexInsertLevel:
@pytest.mark.parametrize(
"position, value, name, expected_tuples, expected_names",
[
(
0,
"new_value",
None,
[("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)],
[None, "level1", "level2"],
),
(
1,
"middle",
None,
[("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)],
["level1", None, "level2"],
),
(
0,
"new_val",
"new_level",
[("new_val", "A", 1), ("new_val", "B", 2), ("new_val", "C", 3)],
["new_level", "level1", "level2"],
),
(
1,
"middle",
"custom_name",
[("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)],
["level1", "custom_name", "level2"],
),
(
0,
"start",
None,
[("start", "A", 1), ("start", "B", 2), ("start", "C", 3)],
[None, "level1", "level2"],
),
(
2,
"end",
None,
[("A", 1, "end"), ("B", 2, "end"), ("C", 3, "end")],
["level1", "level2", None],
),
(
1,
100,
None,
[("A", 100, 1), ("B", 100, 2), ("C", 100, 3)],
["level1", None, "level2"],
),
(
1,
1.5,
None,
[("A", 1.5, 1), ("B", 1.5, 2), ("C", 1.5, 3)],
["level1", None, "level2"],
),
(
1,
None,
None,
[("A", None, 1), ("B", None, 2), ("C", None, 3)],
["level1", None, "level2"],
),
(
1,
["X", "Y", "Z"],
None,
[("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)],
["level1", None, "level2"],
),
(
0,
"",
"empty_string",
[("", "A", 1), ("", "B", 2), ("", "C", 3)],
["empty_string", "level1", "level2"],
),
(
1,
True,
None,
[("A", True, 1), ("B", True, 2), ("C", True, 3)],
["level1", None, "level2"],
),
],
)
def test_insert_level_basic(
self, position, value, name, expected_tuples, expected_names
):
simple_idx = pd.MultiIndex.from_tuples(
[("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"]
)

result = simple_idx.insert_level(position, value, name=name)
expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"position, value, expected_error",
[
(5, "invalid", "position must be between"),
(-1, "invalid", "position must be between"),
(1, ["too", "few"], "Length of values must match"),
(3, "value", "position must be between"),
],
)
def test_insert_level_error_cases(self, position, value, expected_error):
simple_idx = pd.MultiIndex.from_tuples(
[("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"]
)

with pytest.raises(ValueError, match=expected_error):
simple_idx.insert_level(position, value)

def test_insert_level_preserves_original(self):
simple_idx = pd.MultiIndex.from_tuples(
[("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"]
)

original = simple_idx.copy()
simple_idx.insert_level(1, "temp")

tm.assert_index_equal(original, simple_idx)

def test_insert_level_empty_index(self):
empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"])

result = empty_idx.insert_level(0, [])
expected = pd.MultiIndex.from_tuples([], names=[None, "level1", "level2"])
tm.assert_index_equal(result, expected)

def test_insert_level_single_element(self):
single_idx = pd.MultiIndex.from_tuples([("A", 1)], names=["level1", "level2"])

result = single_idx.insert_level(1, "middle")
expected = pd.MultiIndex.from_tuples(
[("A", "middle", 1)], names=["level1", None, "level2"]
)
tm.assert_index_equal(result, expected)
Loading