-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Bug in iloc aligned objects #37728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bug in iloc aligned objects #37728
Changes from 5 commits
9d11511
ca3f47b
f1306aa
c55cd62
ea41407
6bced6a
689a1ab
d870dd6
23dab4a
48e0d25
ea068ba
8ff2430
8a697f7
3bef35a
7375f22
e58fbc8
55e9403
7016977
b965eee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -681,7 +681,7 @@ def __setitem__(self, key, value): | |
self._has_valid_setitem_indexer(key) | ||
|
||
iloc = self if self.name == "iloc" else self.obj.iloc | ||
iloc._setitem_with_indexer(indexer, value) | ||
iloc._setitem_with_indexer(indexer, value, self.name) | ||
|
||
def _validate_key(self, key, axis: int): | ||
""" | ||
|
@@ -1525,7 +1525,7 @@ def _get_setitem_indexer(self, key): | |
|
||
# ------------------------------------------------------------------- | ||
|
||
def _setitem_with_indexer(self, indexer, value): | ||
def _setitem_with_indexer(self, indexer, value, name): | ||
""" | ||
_setitem_with_indexer is for setting values on a Series/DataFrame | ||
using positional indexers. | ||
|
@@ -1601,7 +1601,7 @@ def _setitem_with_indexer(self, indexer, value): | |
new_indexer = convert_from_missing_indexer_tuple( | ||
indexer, self.obj.axes | ||
) | ||
self._setitem_with_indexer(new_indexer, value) | ||
self._setitem_with_indexer(new_indexer, value, name) | ||
|
||
return | ||
|
||
|
@@ -1626,17 +1626,17 @@ def _setitem_with_indexer(self, indexer, value): | |
indexer, missing = convert_missing_indexer(indexer) | ||
|
||
if missing: | ||
self._setitem_with_indexer_missing(indexer, value) | ||
self._setitem_with_indexer_missing(indexer, value, name) | ||
return | ||
|
||
# align and set the values | ||
if take_split_path: | ||
# We have to operate column-wise | ||
self._setitem_with_indexer_split_path(indexer, value) | ||
self._setitem_with_indexer_split_path(indexer, value, name) | ||
else: | ||
self._setitem_single_block(indexer, value) | ||
self._setitem_single_block(indexer, value, name) | ||
|
||
def _setitem_with_indexer_split_path(self, indexer, value): | ||
def _setitem_with_indexer_split_path(self, indexer, value, name): | ||
""" | ||
Setitem column-wise. | ||
""" | ||
|
@@ -1648,7 +1648,7 @@ def _setitem_with_indexer_split_path(self, indexer, value): | |
if len(indexer) > self.ndim: | ||
raise IndexError("too many indices for array") | ||
|
||
if isinstance(value, ABCSeries): | ||
if isinstance(value, ABCSeries) and name != "iloc": | ||
value = self._align_series(indexer, value) | ||
|
||
# Ensure we have something we can iterate over | ||
|
@@ -1677,7 +1677,7 @@ def _setitem_with_indexer_split_path(self, indexer, value): | |
|
||
# we have an equal len Frame | ||
if isinstance(value, ABCDataFrame): | ||
self._setitem_with_indexer_frame_value(indexer, value) | ||
self._setitem_with_indexer_frame_value(indexer, value, name) | ||
|
||
# we have an equal len ndarray/convertible to our ilocs | ||
# hasattr first, to avoid coercing to ndarray without reason. | ||
|
@@ -1736,7 +1736,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value): | |
# setting with a list, re-coerces | ||
self._setitem_single_column(loc, value[:, i].tolist(), plane_indexer) | ||
|
||
def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"): | ||
def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you type name here (and above) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
ilocs = self._ensure_iterable_column_indexer(indexer[1]) | ||
|
||
sub_indexer = list(indexer) | ||
|
@@ -1746,7 +1746,12 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"): | |
|
||
unique_cols = value.columns.is_unique | ||
|
||
if not unique_cols and value.columns.equals(self.obj.columns): | ||
if name == "iloc": | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for index, loc in zip(range(len(ilocs)), ilocs): | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
val = value.iloc[:, index] | ||
self._setitem_single_column(loc, val, plane_indexer) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can this be solved with an appropriate call to _align_frame or _align_series? id really prefer to avoid passing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm no not to my knowledge. The issue is, that we want to achieve oposing things between loc and iloc, but the objects do look the same. But I can not see a way to do this without passing some kind of flag to the align calls. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looked into this a bit again. The indexer in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it sounds like we should be doing some appropriate alignment within Loc before passing it into iLoc? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would be an option, but would result in a bigger rewrite and I am not sure if that would introduce more complexity, because the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it looks like this PR is making it so that we never call align_frame/align_series if setitem_with_indexer was called from iloc, only if it was called from loc. setitem_with_indexer is only called from one place within loc. Can we just do that alignment right before that call? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, we could try, but it is not that simple. For example before calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for explaining, i think i have a better handle on it now. ive got an upcoming PR to make all 2D cases go through split_path. let's revisit this after that goes through and see if we can make it cleaner There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That sounds great. Just ping me when merged, then I will have a look. |
||
|
||
elif not unique_cols and value.columns.equals(self.obj.columns): | ||
# We assume we are already aligned, see | ||
# test_iloc_setitem_frame_duplicate_columns_multiple_blocks | ||
for loc in ilocs: | ||
|
@@ -1804,7 +1809,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): | |
# reset the sliced object if unique | ||
self.obj._iset_item(loc, ser) | ||
|
||
def _setitem_single_block(self, indexer, value): | ||
def _setitem_single_block(self, indexer, value, name): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
""" | ||
_setitem_with_indexer for the case when we have a single Block. | ||
""" | ||
|
@@ -1833,13 +1838,13 @@ def _setitem_single_block(self, indexer, value): | |
|
||
indexer = maybe_convert_ix(*indexer) | ||
|
||
if isinstance(value, (ABCSeries, dict)): | ||
if isinstance(value, (ABCSeries, dict)) and name != "iloc": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we have a dict here dont we still want to wrap it in a Series? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, have no tests getting here with iloc. Will add one There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LMK when these tests are all ready and ill take another look. this one is almost ready There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tests are all in |
||
# TODO(EA): ExtensionBlock.setitem this causes issues with | ||
# setting for extensionarrays that store dicts. Need to decide | ||
# if it's worth supporting that. | ||
value = self._align_series(indexer, Series(value)) | ||
|
||
elif isinstance(value, ABCDataFrame): | ||
elif isinstance(value, ABCDataFrame) and name != "iloc": | ||
value = self._align_frame(indexer, value) | ||
|
||
# check for chained assignment | ||
|
@@ -1850,7 +1855,7 @@ def _setitem_single_block(self, indexer, value): | |
self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) | ||
self.obj._maybe_update_cacher(clear=True) | ||
|
||
def _setitem_with_indexer_missing(self, indexer, value): | ||
def _setitem_with_indexer_missing(self, indexer, value, name): | ||
""" | ||
Insert new row(s) or column(s) into the Series or DataFrame. | ||
""" | ||
|
@@ -1871,7 +1876,7 @@ def _setitem_with_indexer_missing(self, indexer, value): | |
if index.is_unique: | ||
new_indexer = index.get_indexer([new_index[-1]]) | ||
if (new_indexer != -1).any(): | ||
return self._setitem_with_indexer(new_indexer, value) | ||
return self._setitem_with_indexer(new_indexer, value, name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. double check me on this, but i think it might be the case that we only get here with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, you are right. Iloc raises when indexer is a dict. That is the only way we can get in there. Removed it |
||
|
||
# this preserves dtype of the value | ||
new_values = Series([value])._values | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -298,6 +298,14 @@ def test_iloc_setitem_bool_indexer(self, klass): | |
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) | ||
tm.assert_frame_equal(df, expected) | ||
|
||
def test_setitem_iloc_pure_position_based(self): | ||
# GH: 22046 | ||
df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) | ||
df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) | ||
df2.iloc[:, [1]] = df1.iloc[:, [0]] | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are there other indexer/value type combinations that are relevant here? e.g. the value being set here is a DataFrame, would a Series trigger the same bug? what if instead of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
raises
Did not check slice previously, this triggered the bug too. Will add test |
||
expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) | ||
tm.assert_frame_equal(df2, expected) | ||
|
||
|
||
class TestDataFrameSetItemSlicing: | ||
def test_setitem_slice_position(self): | ||
|
Uh oh!
There was an error while loading. Please reload this page.