- 
          
 - 
                Notifications
    
You must be signed in to change notification settings  - Fork 19.2k
 
ENH: Implement MultiIndex.insert_level for inserting levels at specified positions #62610
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 13 commits
e3d6970
              45ac8ef
              5b76304
              5f0caf0
              97a98e5
              1a9ddc5
              2199e6e
              44985ad
              9e8676d
              094958d
              77f3af8
              7bf3067
              00a346f
              c4ecf7a
              8e0068a
              87bd44b
              79e04b6
              1447886
              e2917d0
              471c2d6
              e2334ac
              eb320fb
              9f80ad1
              a8d626a
              37f12e6
              fd93622
              0aa81ec
              66026af
              10ecc1f
              00d3bbb
              4b2bb50
              d08eb28
              336c3e8
              e8bbd3a
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| 
          
            
          
           | 
    @@ -214,6 +214,7 @@ Other enhancements | |
| - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) | ||
| - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) | ||
| - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). | ||
| - Added :meth:`MultiIndex.insert_level` to insert new levels at specified positions in a MultiIndex (:issue:`62558`) | ||
| - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`) | ||
| - Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`) | ||
| - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`) | ||
| 
        
          
        
         | 
    @@ -228,7 +229,7 @@ Other enhancements | |
| - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) | ||
| - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) | ||
| - Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`) | ||
| - | ||
| 
     | 
||
                
       | 
||
| 
     | 
||
| .. --------------------------------------------------------------------------- | ||
| .. _whatsnew_300.notable_bug_fixes: | ||
| 
          
            
          
           | 
    ||
                              
       | 
            
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 
          
            
          
           | 
    @@ -2710,6 +2710,73 @@ def reorder_levels(self, order) -> MultiIndex: | |||||||||||||||||||||||||||||
| result = self._reorder_ilevels(order) | ||||||||||||||||||||||||||||||
| return result | ||||||||||||||||||||||||||||||
| 
     | 
||||||||||||||||||||||||||||||
| def insert_level(self, position: int, value, name=None) -> MultiIndex: | ||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||
| Insert a new level at the specified position and return a new MultiIndex. | ||||||||||||||||||||||||||||||
| 
     | 
||||||||||||||||||||||||||||||
| Parameters | ||||||||||||||||||||||||||||||
| ---------- | ||||||||||||||||||||||||||||||
| position : int | ||||||||||||||||||||||||||||||
| The integer position where the new level should be inserted. | ||||||||||||||||||||||||||||||
| Must be between 0 and ``self.nlevels`` (inclusive). | ||||||||||||||||||||||||||||||
| value : scalar or sequence | ||||||||||||||||||||||||||||||
| Values for the inserted level. If a scalar is provided, it is | ||||||||||||||||||||||||||||||
| broadcast to the length of the index. If a sequence is provided, | ||||||||||||||||||||||||||||||
| it must be the same length as the index. | ||||||||||||||||||||||||||||||
| name : Hashable, default None | ||||||||||||||||||||||||||||||
| Name of the inserted level. If not provided, the inserted level | ||||||||||||||||||||||||||||||
| name will be ``None``. | ||||||||||||||||||||||||||||||
| 
     | 
||||||||||||||||||||||||||||||
| Returns | ||||||||||||||||||||||||||||||
| ------- | ||||||||||||||||||||||||||||||
| MultiIndex | ||||||||||||||||||||||||||||||
| A new ``MultiIndex`` with the inserted level. | ||||||||||||||||||||||||||||||
| 
     | 
||||||||||||||||||||||||||||||
| Examples | ||||||||||||||||||||||||||||||
| -------- | ||||||||||||||||||||||||||||||
| >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)], names=["x", "y"]) | ||||||||||||||||||||||||||||||
| >>> idx.insert_level(0, "grp") | ||||||||||||||||||||||||||||||
| MultiIndex([('grp', 'A', 1), ('grp', 'B', 2)], | ||||||||||||||||||||||||||||||
| names=[None, 'x', 'y']) | ||||||||||||||||||||||||||||||
| >>> idx.insert_level(1, ["L1", "L2"], name="z") | ||||||||||||||||||||||||||||||
| MultiIndex([('A', 'L1', 1), ('B', 'L2', 2)], | ||||||||||||||||||||||||||||||
| names=['x', 'z', 'y']) | ||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||
| if not isinstance(position, int): | ||||||||||||||||||||||||||||||
| raise TypeError("position must be an integer") | ||||||||||||||||||||||||||||||
| 
     | 
||||||||||||||||||||||||||||||
| if position < 0 or position > self.nlevels: | ||||||||||||||||||||||||||||||
| raise ValueError(f"position must be between 0 and {self.nlevels}") | ||||||||||||||||||||||||||||||
| 
     | 
||||||||||||||||||||||||||||||
| if not hasattr(value, "__iter__") or isinstance(value, str): | ||||||||||||||||||||||||||||||
| value = [value] * len(self) | ||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||
| value = list(value) | ||||||||||||||||||||||||||||||
| if len(value) != len(self): | ||||||||||||||||||||||||||||||
| raise ValueError("Length of values must match length of index") | ||||||||||||||||||||||||||||||
| 
     | 
||||||||||||||||||||||||||||||
| tuples = list(self) | ||||||||||||||||||||||||||||||
| 
     | 
||||||||||||||||||||||||||||||
| new_tuples = [] | ||||||||||||||||||||||||||||||
| for i, tup in enumerate(tuples): | ||||||||||||||||||||||||||||||
| if isinstance(tup, tuple): | ||||||||||||||||||||||||||||||
| new_tuple = list(tup) | ||||||||||||||||||||||||||||||
| new_tuple.insert(position, value[i]) | ||||||||||||||||||||||||||||||
| new_tuples.append(tuple(new_tuple)) | ||||||||||||||||||||||||||||||
                
       | 
||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||
                
       | 
||||||||||||||||||||||||||||||
| new_tuple = [tup] | ||||||||||||||||||||||||||||||
| new_tuple.insert(position, value[i]) | ||||||||||||||||||||||||||||||
| new_tuples.append(tuple(new_tuple)) | ||||||||||||||||||||||||||||||
| 
     | 
||||||||||||||||||||||||||||||
| if self.names is not None: | ||||||||||||||||||||||||||||||
| new_names = list(self.names) | ||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||
| new_names = [None] * self.nlevels | ||||||||||||||||||||||||||||||
| 
     | 
||||||||||||||||||||||||||||||
| new_names.insert(position, name) | ||||||||||||||||||||||||||||||
                
       | 
||||||||||||||||||||||||||||||
| if self.names is not None: | |
| new_names = list(self.names) | |
| else: | |
| new_names = [None] * self.nlevels | |
| new_names.insert(position, name) | |
| if self.names is not None: | |
| new_names = self.names[:position] + [name] + self.names[position + 1:] | |
| else: | |
| new_names = [None] * (position) + [name] + [None] * (self.nlevel - position) | 
Is there a case where self.names is None?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I research the Constructors, If the user has not named it,  the result._names = [None] * len(levels) would make it a list class. Can it be regard as self.names would never be None? Maybe here can be new_names = self.names[:position] + [name] + self.names[position:]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can it be regard as self.names would never be None?
The MultiIndex constructor contains these lines
pandas/pandas/core/indexes/multi.py
Lines 329 to 332 in a329dc3
| result._names = [None] * len(levels) | |
| if names is not None: | |
| # handles name validation | |
| result._set_names(names) | 
That indicates it would never be None. So I think it's safe to remove the branching.
| 
                       There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The changes in this file seems unrelated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, It might be remnants of a merge conflict - looks like an issue from resolving conflicts.I have made it the same as the main branch.  | 
            
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| import pytest | ||
| 
     | 
||
| import pandas as pd | ||
| import pandas._testing as tm | ||
| 
     | 
||
| 
     | 
||
| class TestMultiIndexInsertLevel: | ||
                
      
                  mroeschke marked this conversation as resolved.
               
              
                Outdated
          
            Show resolved
            Hide resolved
         | 
||
| @pytest.mark.parametrize( | ||
| "position, value, name, expected_tuples, expected_names", | ||
| [ | ||
| ( | ||
| 0, | ||
| "new_value", | ||
| None, | ||
| [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], | ||
| [None, "level1", "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| "middle", | ||
| None, | ||
| [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 0, | ||
| "new_val", | ||
| "new_level", | ||
| [("new_val", "A", 1), ("new_val", "B", 2), ("new_val", "C", 3)], | ||
| ["new_level", "level1", "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| "middle", | ||
| "custom_name", | ||
| [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], | ||
| ["level1", "custom_name", "level2"], | ||
| ), | ||
| ( | ||
| 0, | ||
| "start", | ||
| None, | ||
| [("start", "A", 1), ("start", "B", 2), ("start", "C", 3)], | ||
| [None, "level1", "level2"], | ||
| ), | ||
| ( | ||
| 2, | ||
| "end", | ||
| None, | ||
| [("A", 1, "end"), ("B", 2, "end"), ("C", 3, "end")], | ||
| ["level1", "level2", None], | ||
| ), | ||
| ( | ||
| 1, | ||
| 100, | ||
| None, | ||
| [("A", 100, 1), ("B", 100, 2), ("C", 100, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| 1.5, | ||
| None, | ||
| [("A", 1.5, 1), ("B", 1.5, 2), ("C", 1.5, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| None, | ||
| None, | ||
| [("A", None, 1), ("B", None, 2), ("C", None, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| ["X", "Y", "Z"], | ||
| None, | ||
| [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 0, | ||
| "", | ||
| "empty_string", | ||
| [("", "A", 1), ("", "B", 2), ("", "C", 3)], | ||
| ["empty_string", "level1", "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| True, | ||
| None, | ||
| [("A", True, 1), ("B", True, 2), ("C", True, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ], | ||
| ) | ||
| def test_insert_level_basic( | ||
| self, position, value, name, expected_tuples, expected_names | ||
| ): | ||
| simple_idx = pd.MultiIndex.from_tuples( | ||
| [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] | ||
| ) | ||
| 
     | 
||
| result = simple_idx.insert_level(position, value, name=name) | ||
| expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names) | ||
| tm.assert_index_equal(result, expected) | ||
| 
     | 
||
| @pytest.mark.parametrize( | ||
| "position, value, expected_error", | ||
| [ | ||
| (5, "invalid", "position must be between"), | ||
| (-1, "invalid", "position must be between"), | ||
| (1, ["too", "few"], "Length of values must match"), | ||
| (3, "value", "position must be between"), | ||
| ], | ||
| ) | ||
| def test_insert_level_error_cases(self, position, value, expected_error): | ||
| simple_idx = pd.MultiIndex.from_tuples( | ||
| [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] | ||
| ) | ||
| 
     | 
||
| with pytest.raises(ValueError, match=expected_error): | ||
| simple_idx.insert_level(position, value) | ||
| 
     | 
||
| def test_insert_level_preserves_original(self): | ||
| simple_idx = pd.MultiIndex.from_tuples( | ||
| [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] | ||
| ) | ||
| 
     | 
||
| original = simple_idx.copy() | ||
| simple_idx.insert_level(1, "temp") | ||
| 
     | 
||
| tm.assert_index_equal(original, simple_idx) | ||
| 
     | 
||
| def test_insert_level_empty_index(self): | ||
| empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) | ||
| 
     | 
||
| result = empty_idx.insert_level(0, []) | ||
| expected = pd.MultiIndex.from_tuples([], names=[None, "level1", "level2"]) | ||
| tm.assert_index_equal(result, expected) | ||
| 
     | 
||
| def test_insert_level_single_element(self): | ||
| single_idx = pd.MultiIndex.from_tuples([("A", 1)], names=["level1", "level2"]) | ||
| 
     | 
||
| result = single_idx.insert_level(1, "middle") | ||
| expected = pd.MultiIndex.from_tuples( | ||
| [("A", "middle", 1)], names=["level1", None, "level2"] | ||
| ) | ||
| tm.assert_index_equal(result, expected) | ||
                              
       | 
            
Uh oh!
There was an error while loading. Please reload this page.