-
-
Couldn't load subscription status.
- Fork 19.2k
REF: Add Manager.column_setitem to set values into a single column (without intermediate series) #47074
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
REF: Add Manager.column_setitem to set values into a single column (without intermediate series) #47074
Changes from 7 commits
0e4c58e
a2aa8aa
ce0649b
103d1fe
d20b0cb
453eaba
be740ad
e63c7f6
025a3d4
caf7be8
8d7ee1a
25e903b
5e30199
9d4566f
faed070
ea063e6
3f30cab
db8e866
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,7 +52,6 @@ | |
| from pandas.core.indexers import ( | ||
| check_array_indexer, | ||
| is_empty_indexer, | ||
| is_exact_shape_match, | ||
| is_list_like_indexer, | ||
| is_scalar_indexer, | ||
| length_of_indexer, | ||
|
|
@@ -1936,42 +1935,31 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): | |
| """ | ||
| pi = plane_indexer | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| ser = self.obj._ixs(loc, axis=1) | ||
|
|
||
| # perform the equivalent of a setitem on the info axis | ||
| # as we have a null slice or a slice with full bounds | ||
| # which means essentially reassign to the columns of a | ||
| # multi-dim object | ||
| # GH#6149 (null slice), GH#10408 (full bounds) | ||
| if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)): | ||
| ser = value | ||
| self.obj._iset_item(loc, value) | ||
| elif ( | ||
| is_array_like(value) | ||
| and is_exact_shape_match(ser, value) | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| and len(value.shape) > 0 | ||
| and self.obj.shape[0] == value.shape[0] | ||
| and not is_empty_indexer(pi) | ||
| ): | ||
| if is_list_like(pi): | ||
| ser = value[np.argsort(pi)] | ||
| value = value[np.argsort(pi)] | ||
| else: | ||
| # in case of slice | ||
| ser = value[pi] | ||
| value = value[pi] | ||
| self.obj._iset_item(loc, value) | ||
| else: | ||
| # set the item, first attempting to operate inplace, then | ||
|
||
| # falling back to casting if necessary; see | ||
| # _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace | ||
|
|
||
| orig_values = ser._values | ||
| ser._mgr = ser._mgr.setitem((pi,), value) | ||
|
|
||
| if ser._values is orig_values: | ||
| # The setitem happened inplace, so the DataFrame's values | ||
| # were modified inplace. | ||
| return | ||
| self.obj._iset_item(loc, ser) | ||
| return | ||
|
|
||
| # reset the sliced object if unique | ||
| self.obj._iset_item(loc, ser) | ||
| self.obj._mgr.column_setitem(loc, plane_indexer, value) | ||
| self.obj._clear_item_cache() | ||
|
|
||
| def _setitem_single_block(self, indexer, value, name: str): | ||
| """ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -869,6 +869,25 @@ def iset( | |
| self.arrays[mgr_idx] = value_arr | ||
| return | ||
|
|
||
| def column_setitem( | ||
| self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False | ||
| ) -> None: | ||
| """ | ||
| Set values ("setitem") into a single column (not setting the full column). | ||
|
|
||
| This is a method on the ArrayManager level, to avoid creating an | ||
| intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) | ||
| """ | ||
| arr = self.arrays[loc] | ||
| # create temporary SingleArrayManager without ref to use setitem implementation | ||
|
||
| mgr = SingleArrayManager([arr], [self._axes[0]]) | ||
| if inplace: | ||
| mgr.setitem_inplace(idx, value) | ||
| else: | ||
| new_mgr = mgr.setitem((idx,), value) | ||
| # update existing ArrayManager in-place | ||
| self.arrays[loc] = new_mgr.arrays[0] | ||
jorisvandenbossche marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: | ||
| """ | ||
| Insert item at selected position. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1082,7 +1082,7 @@ def test_setitem_partial_column_inplace(self, consolidate, using_array_manager): | |
| tm.assert_numpy_array_equal(zvals, expected.values) | ||
| assert np.shares_memory(zvals, df["z"]._values) | ||
| if not consolidate: | ||
| assert df["z"]._values is zvals | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See #47074 (comment) for comment about this removal |
||
| assert df["z"]._values.base is zvals.base | ||
|
||
|
|
||
| def test_setitem_duplicate_columns_not_inplace(self): | ||
| # GH#39510 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should this comment refer to column_setitem instead of just setitem?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it also looks like the new method doesn't actually do the validation this comment refers to?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is an existing comment (originally a few lines below), but so I suppose this comment was actually already not up to date anymore.
So before this PR the comment was about
setitem_inplace, and that also doesn't do any validation.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
setitem_inplace calls np_can_hold_element, which raises on failure
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, yes, I see. That is doing validation that can raise the LossySetitemError. So the new
column_setitemcallssetitem, which will also callnp_can_hold_element, but there catching the LossySetitemError and coercing to the target dtype if needed.That is something that the loc/iloc fallback below otherwise will also do, so I suppose this change is OK (but the comment is then indeed no longer correct, and we also don't need to catch LossySetitemError here)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yah, possibly also TypeError and ValueError