-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
ENH: Implement MultiIndex.insert_level for inserting levels at specified positions #62610
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 16 commits
e3d6970
45ac8ef
5b76304
5f0caf0
97a98e5
1a9ddc5
2199e6e
44985ad
9e8676d
094958d
77f3af8
7bf3067
00a346f
c4ecf7a
8e0068a
87bd44b
79e04b6
1447886
e2917d0
471c2d6
e2334ac
eb320fb
9f80ad1
a8d626a
37f12e6
fd93622
0aa81ec
66026af
10ecc1f
00d3bbb
4b2bb50
d08eb28
336c3e8
e8bbd3a
f63f40a
5337514
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2710,6 +2710,73 @@ def reorder_levels(self, order) -> MultiIndex: | |||||||||||||||||||||||||||||
| result = self._reorder_ilevels(order) | ||||||||||||||||||||||||||||||
| return result | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| def insert_level(self, position: int, value, name=None) -> MultiIndex: | ||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||
| Insert a new level at the specified position and return a new MultiIndex. | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| Parameters | ||||||||||||||||||||||||||||||
| ---------- | ||||||||||||||||||||||||||||||
| position : int | ||||||||||||||||||||||||||||||
| The integer position where the new level should be inserted. | ||||||||||||||||||||||||||||||
| Must be between 0 and ``self.nlevels`` (inclusive). | ||||||||||||||||||||||||||||||
| value : scalar or sequence | ||||||||||||||||||||||||||||||
| Values for the inserted level. If a scalar is provided, it is | ||||||||||||||||||||||||||||||
| broadcast to the length of the index. If a sequence is provided, | ||||||||||||||||||||||||||||||
| it must be the same length as the index. | ||||||||||||||||||||||||||||||
| name : Hashable, default None | ||||||||||||||||||||||||||||||
| Name of the inserted level. If not provided, the inserted level | ||||||||||||||||||||||||||||||
| name will be ``None``. | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| Returns | ||||||||||||||||||||||||||||||
| ------- | ||||||||||||||||||||||||||||||
| MultiIndex | ||||||||||||||||||||||||||||||
| A new ``MultiIndex`` with the inserted level. | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| Examples | ||||||||||||||||||||||||||||||
| -------- | ||||||||||||||||||||||||||||||
| >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)], names=["x", "y"]) | ||||||||||||||||||||||||||||||
| >>> idx.insert_level(0, "grp") | ||||||||||||||||||||||||||||||
| MultiIndex([('grp', 'A', 1), ('grp', 'B', 2)], | ||||||||||||||||||||||||||||||
| names=[None, 'x', 'y']) | ||||||||||||||||||||||||||||||
| >>> idx.insert_level(1, ["L1", "L2"], name="z") | ||||||||||||||||||||||||||||||
| MultiIndex([('A', 'L1', 1), ('B', 'L2', 2)], | ||||||||||||||||||||||||||||||
| names=['x', 'z', 'y']) | ||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||
| if not isinstance(position, int): | ||||||||||||||||||||||||||||||
| raise TypeError("position must be an integer") | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| if position < 0 or position > self.nlevels: | ||||||||||||||||||||||||||||||
| raise ValueError(f"position must be between 0 and {self.nlevels}") | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| if not hasattr(value, "__iter__") or isinstance(value, str): | ||||||||||||||||||||||||||||||
| value = [value] * len(self) | ||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||
| value = list(value) | ||||||||||||||||||||||||||||||
| if len(value) != len(self): | ||||||||||||||||||||||||||||||
| raise ValueError("Length of values must match length of index") | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| tuples = list(self) | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| new_tuples = [] | ||||||||||||||||||||||||||||||
| for i, tup in enumerate(tuples): | ||||||||||||||||||||||||||||||
| if isinstance(tup, tuple): | ||||||||||||||||||||||||||||||
| new_tuple = list(tup) | ||||||||||||||||||||||||||||||
| new_tuple.insert(position, value[i]) | ||||||||||||||||||||||||||||||
| new_tuples.append(tuple(new_tuple)) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
| new_tuple = [tup] | ||||||||||||||||||||||||||||||
| new_tuple.insert(position, value[i]) | ||||||||||||||||||||||||||||||
| new_tuples.append(tuple(new_tuple)) | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| if self.names is not None: | ||||||||||||||||||||||||||||||
| new_names = list(self.names) | ||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||
| new_names = [None] * self.nlevels | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| new_names.insert(position, name) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
| if self.names is not None: | |
| new_names = list(self.names) | |
| else: | |
| new_names = [None] * self.nlevels | |
| new_names.insert(position, name) | |
| if self.names is not None: | |
| new_names = self.names[:position] + [name] + self.names[position + 1:] | |
| else: | |
| new_names = [None] * (position) + [name] + [None] * (self.nlevel - position) |
Is there a case where self.names is None?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I research the Constructors, If the user has not named it, the result._names = [None] * len(levels) would make it a list class. Can it be regard as self.names would never be None? Maybe here can be new_names = self.names[:position] + [name] + self.names[position:]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can it be regard as self.names would never be None?
The MultiIndex constructor contains these lines
pandas/pandas/core/indexes/multi.py
Lines 329 to 332 in a329dc3
| result._names = [None] * len(levels) | |
| if names is not None: | |
| # handles name validation | |
| result._set_names(names) |
That indicates it would never be None. So I think it's safe to remove the branching.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| import pytest | ||
|
|
||
| import pandas as pd | ||
| import pandas._testing as tm | ||
|
|
||
|
|
||
| class TestMultiIndexInsertLevel: | ||
mroeschke marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| @pytest.mark.parametrize( | ||
| "position, value, name, expected_tuples, expected_names", | ||
| [ | ||
| ( | ||
| 0, | ||
| "new_value", | ||
| None, | ||
| [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], | ||
| [None, "level1", "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| "middle", | ||
| None, | ||
| [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 0, | ||
| "new_val", | ||
| "new_level", | ||
| [("new_val", "A", 1), ("new_val", "B", 2), ("new_val", "C", 3)], | ||
| ["new_level", "level1", "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| "middle", | ||
| "custom_name", | ||
| [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], | ||
| ["level1", "custom_name", "level2"], | ||
| ), | ||
| ( | ||
| 0, | ||
| "start", | ||
| None, | ||
| [("start", "A", 1), ("start", "B", 2), ("start", "C", 3)], | ||
| [None, "level1", "level2"], | ||
| ), | ||
| ( | ||
| 2, | ||
| "end", | ||
| None, | ||
| [("A", 1, "end"), ("B", 2, "end"), ("C", 3, "end")], | ||
| ["level1", "level2", None], | ||
| ), | ||
| ( | ||
| 1, | ||
| 100, | ||
| None, | ||
| [("A", 100, 1), ("B", 100, 2), ("C", 100, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| 1.5, | ||
| None, | ||
| [("A", 1.5, 1), ("B", 1.5, 2), ("C", 1.5, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| None, | ||
| None, | ||
| [("A", None, 1), ("B", None, 2), ("C", None, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| ["X", "Y", "Z"], | ||
| None, | ||
| [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 0, | ||
| "", | ||
| "empty_string", | ||
| [("", "A", 1), ("", "B", 2), ("", "C", 3)], | ||
| ["empty_string", "level1", "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| True, | ||
| None, | ||
| [("A", True, 1), ("B", True, 2), ("C", True, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ], | ||
| ) | ||
| def test_insert_level_basic( | ||
| self, position, value, name, expected_tuples, expected_names | ||
| ): | ||
| simple_idx = pd.MultiIndex.from_tuples( | ||
| [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] | ||
| ) | ||
|
|
||
| result = simple_idx.insert_level(position, value, name=name) | ||
| expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names) | ||
| tm.assert_index_equal(result, expected) | ||
|
|
||
| @pytest.mark.parametrize( | ||
| "position, value, expected_error", | ||
| [ | ||
| (5, "invalid", "position must be between"), | ||
| (-1, "invalid", "position must be between"), | ||
| (1, ["too", "few"], "Length of values must match"), | ||
| (3, "value", "position must be between"), | ||
| ], | ||
| ) | ||
| def test_insert_level_error_cases(self, position, value, expected_error): | ||
| simple_idx = pd.MultiIndex.from_tuples( | ||
| [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] | ||
| ) | ||
|
|
||
| with pytest.raises(ValueError, match=expected_error): | ||
| simple_idx.insert_level(position, value) | ||
|
|
||
| def test_insert_level_preserves_original(self): | ||
| simple_idx = pd.MultiIndex.from_tuples( | ||
| [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] | ||
| ) | ||
|
|
||
| original = simple_idx.copy() | ||
| simple_idx.insert_level(1, "temp") | ||
|
|
||
| tm.assert_index_equal(original, simple_idx) | ||
|
|
||
| def test_insert_level_empty_index(self): | ||
| empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) | ||
|
|
||
| result = empty_idx.insert_level(0, []) | ||
| expected = pd.MultiIndex.from_tuples([], names=[None, "level1", "level2"]) | ||
| tm.assert_index_equal(result, expected) | ||
|
|
||
| def test_insert_level_single_element(self): | ||
| single_idx = pd.MultiIndex.from_tuples([("A", 1)], names=["level1", "level2"]) | ||
|
|
||
| result = single_idx.insert_level(1, "middle") | ||
| expected = pd.MultiIndex.from_tuples( | ||
| [("A", "middle", 1)], names=["level1", None, "level2"] | ||
| ) | ||
| tm.assert_index_equal(result, expected) | ||
|
Uh oh!
There was an error while loading. Please reload this page.