-
-
Notifications
You must be signed in to change notification settings - Fork 19.2k
ENH: Implement MultiIndex.insert_level for inserting levels at specified positions #62610
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 13 commits
e3d6970
45ac8ef
5b76304
5f0caf0
97a98e5
1a9ddc5
2199e6e
44985ad
9e8676d
094958d
77f3af8
7bf3067
00a346f
c4ecf7a
8e0068a
87bd44b
79e04b6
1447886
e2917d0
471c2d6
e2334ac
eb320fb
9f80ad1
a8d626a
37f12e6
fd93622
0aa81ec
66026af
10ecc1f
00d3bbb
4b2bb50
d08eb28
336c3e8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -214,6 +214,7 @@ Other enhancements | |
| - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) | ||
| - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) | ||
| - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). | ||
| - Added :meth:`MultiIndex.insert_level` to insert new levels at specified positions in a MultiIndex (:issue:`62558`) | ||
| - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`) | ||
| - Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`) | ||
| - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`) | ||
|
|
@@ -228,7 +229,7 @@ Other enhancements | |
| - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) | ||
| - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) | ||
| - Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`) | ||
| - | ||
|
|
||
|
||
|
|
||
| .. --------------------------------------------------------------------------- | ||
| .. _whatsnew_300.notable_bug_fixes: | ||
|
|
||
|
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2710,6 +2710,73 @@ def reorder_levels(self, order) -> MultiIndex: | |||||||||||||||||||||||||||||
| result = self._reorder_ilevels(order) | ||||||||||||||||||||||||||||||
| return result | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| def insert_level(self, position: int, value, name=None) -> MultiIndex: | ||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||
| Insert a new level at the specified position and return a new MultiIndex. | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| Parameters | ||||||||||||||||||||||||||||||
| ---------- | ||||||||||||||||||||||||||||||
| position : int | ||||||||||||||||||||||||||||||
| The integer position where the new level should be inserted. | ||||||||||||||||||||||||||||||
| Must be between 0 and ``self.nlevels`` (inclusive). | ||||||||||||||||||||||||||||||
| value : scalar or sequence | ||||||||||||||||||||||||||||||
| Values for the inserted level. If a scalar is provided, it is | ||||||||||||||||||||||||||||||
| broadcast to the length of the index. If a sequence is provided, | ||||||||||||||||||||||||||||||
| it must be the same length as the index. | ||||||||||||||||||||||||||||||
| name : Hashable, default None | ||||||||||||||||||||||||||||||
| Name of the inserted level. If not provided, the inserted level | ||||||||||||||||||||||||||||||
| name will be ``None``. | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| Returns | ||||||||||||||||||||||||||||||
| ------- | ||||||||||||||||||||||||||||||
| MultiIndex | ||||||||||||||||||||||||||||||
| A new ``MultiIndex`` with the inserted level. | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| Examples | ||||||||||||||||||||||||||||||
| -------- | ||||||||||||||||||||||||||||||
| >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)], names=["x", "y"]) | ||||||||||||||||||||||||||||||
| >>> idx.insert_level(0, "grp") | ||||||||||||||||||||||||||||||
| MultiIndex([('grp', 'A', 1), ('grp', 'B', 2)], | ||||||||||||||||||||||||||||||
| names=[None, 'x', 'y']) | ||||||||||||||||||||||||||||||
| >>> idx.insert_level(1, ["L1", "L2"], name="z") | ||||||||||||||||||||||||||||||
| MultiIndex([('A', 'L1', 1), ('B', 'L2', 2)], | ||||||||||||||||||||||||||||||
| names=['x', 'z', 'y']) | ||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||
| if not isinstance(position, int): | ||||||||||||||||||||||||||||||
| raise TypeError("position must be an integer") | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| if position < 0 or position > self.nlevels: | ||||||||||||||||||||||||||||||
| raise ValueError(f"position must be between 0 and {self.nlevels}") | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| if not hasattr(value, "__iter__") or isinstance(value, str): | ||||||||||||||||||||||||||||||
| value = [value] * len(self) | ||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||
| value = list(value) | ||||||||||||||||||||||||||||||
| if len(value) != len(self): | ||||||||||||||||||||||||||||||
| raise ValueError("Length of values must match length of index") | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| tuples = list(self) | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| new_tuples = [] | ||||||||||||||||||||||||||||||
| for i, tup in enumerate(tuples): | ||||||||||||||||||||||||||||||
| if isinstance(tup, tuple): | ||||||||||||||||||||||||||||||
| new_tuple = list(tup) | ||||||||||||||||||||||||||||||
| new_tuple.insert(position, value[i]) | ||||||||||||||||||||||||||||||
| new_tuples.append(tuple(new_tuple)) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
| new_tuple = [tup] | ||||||||||||||||||||||||||||||
| new_tuple.insert(position, value[i]) | ||||||||||||||||||||||||||||||
| new_tuples.append(tuple(new_tuple)) | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| if self.names is not None: | ||||||||||||||||||||||||||||||
| new_names = list(self.names) | ||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||
| new_names = [None] * self.nlevels | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| new_names.insert(position, name) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
| if self.names is not None: | |
| new_names = list(self.names) | |
| else: | |
| new_names = [None] * self.nlevels | |
| new_names.insert(position, name) | |
| if self.names is not None: | |
| new_names = self.names[:position] + [name] + self.names[position + 1:] | |
| else: | |
| new_names = [None] * (position) + [name] + [None] * (self.nlevel - position) |
Is there a case where self.names is None?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I research the Constructors, If the user has not named it, the result._names = [None] * len(levels) would make it a list class. Can it be regard as self.names would never be None? Maybe here can be new_names = self.names[:position] + [name] + self.names[position:]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can it be regard as self.names would never be None?
The MultiIndex constructor contains these lines
pandas/pandas/core/indexes/multi.py
Lines 329 to 332 in a329dc3
| result._names = [None] * len(levels) | |
| if names is not None: | |
| # handles name validation | |
| result._set_names(names) |
That indicates it would never be None. So I think it's safe to remove the branching.
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The changes in this file seems unrelated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, It might be remnants of a merge conflict - looks like an issue from resolving conflicts.I have made it the same as the main branch. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| import pytest | ||
|
|
||
| import pandas as pd | ||
| import pandas._testing as tm | ||
|
|
||
|
|
||
| class TestMultiIndexInsertLevel: | ||
mroeschke marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| @pytest.mark.parametrize( | ||
| "position, value, name, expected_tuples, expected_names", | ||
| [ | ||
| ( | ||
| 0, | ||
| "new_value", | ||
| None, | ||
| [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], | ||
| [None, "level1", "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| "middle", | ||
| None, | ||
| [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 0, | ||
| "new_val", | ||
| "new_level", | ||
| [("new_val", "A", 1), ("new_val", "B", 2), ("new_val", "C", 3)], | ||
| ["new_level", "level1", "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| "middle", | ||
| "custom_name", | ||
| [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], | ||
| ["level1", "custom_name", "level2"], | ||
| ), | ||
| ( | ||
| 0, | ||
| "start", | ||
| None, | ||
| [("start", "A", 1), ("start", "B", 2), ("start", "C", 3)], | ||
| [None, "level1", "level2"], | ||
| ), | ||
| ( | ||
| 2, | ||
| "end", | ||
| None, | ||
| [("A", 1, "end"), ("B", 2, "end"), ("C", 3, "end")], | ||
| ["level1", "level2", None], | ||
| ), | ||
| ( | ||
| 1, | ||
| 100, | ||
| None, | ||
| [("A", 100, 1), ("B", 100, 2), ("C", 100, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| 1.5, | ||
| None, | ||
| [("A", 1.5, 1), ("B", 1.5, 2), ("C", 1.5, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| None, | ||
| None, | ||
| [("A", None, 1), ("B", None, 2), ("C", None, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| ["X", "Y", "Z"], | ||
| None, | ||
| [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ( | ||
| 0, | ||
| "", | ||
| "empty_string", | ||
| [("", "A", 1), ("", "B", 2), ("", "C", 3)], | ||
| ["empty_string", "level1", "level2"], | ||
| ), | ||
| ( | ||
| 1, | ||
| True, | ||
| None, | ||
| [("A", True, 1), ("B", True, 2), ("C", True, 3)], | ||
| ["level1", None, "level2"], | ||
| ), | ||
| ], | ||
| ) | ||
| def test_insert_level_basic( | ||
| self, position, value, name, expected_tuples, expected_names | ||
| ): | ||
| simple_idx = pd.MultiIndex.from_tuples( | ||
| [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] | ||
| ) | ||
|
|
||
| result = simple_idx.insert_level(position, value, name=name) | ||
| expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names) | ||
| tm.assert_index_equal(result, expected) | ||
|
|
||
| @pytest.mark.parametrize( | ||
| "position, value, expected_error", | ||
| [ | ||
| (5, "invalid", "position must be between"), | ||
| (-1, "invalid", "position must be between"), | ||
| (1, ["too", "few"], "Length of values must match"), | ||
| (3, "value", "position must be between"), | ||
| ], | ||
| ) | ||
| def test_insert_level_error_cases(self, position, value, expected_error): | ||
| simple_idx = pd.MultiIndex.from_tuples( | ||
| [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] | ||
| ) | ||
|
|
||
| with pytest.raises(ValueError, match=expected_error): | ||
| simple_idx.insert_level(position, value) | ||
|
|
||
| def test_insert_level_preserves_original(self): | ||
| simple_idx = pd.MultiIndex.from_tuples( | ||
| [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] | ||
| ) | ||
|
|
||
| original = simple_idx.copy() | ||
| simple_idx.insert_level(1, "temp") | ||
|
|
||
| tm.assert_index_equal(original, simple_idx) | ||
|
|
||
| def test_insert_level_empty_index(self): | ||
| empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) | ||
|
|
||
| result = empty_idx.insert_level(0, []) | ||
| expected = pd.MultiIndex.from_tuples([], names=[None, "level1", "level2"]) | ||
| tm.assert_index_equal(result, expected) | ||
|
|
||
| def test_insert_level_single_element(self): | ||
| single_idx = pd.MultiIndex.from_tuples([("A", 1)], names=["level1", "level2"]) | ||
|
|
||
| result = single_idx.insert_level(1, "middle") | ||
| expected = pd.MultiIndex.from_tuples( | ||
| [("A", "middle", 1)], names=["level1", None, "level2"] | ||
| ) | ||
| tm.assert_index_equal(result, expected) | ||
|
Uh oh!
There was an error while loading. Please reload this page.