Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ Indexing
- Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`)
- Bug in indexing on a non-unique object-dtype :class:`Index` with an NA scalar (e.g. ``np.nan``) (:issue:`43711`)
- Bug in :meth:`DataFrame.__setitem__` incorrectly writing into an existing column's array rather than setting a new array when the new dtype and the old dtype match (:issue:`43406`)
- Bug in setting floating-dtype values into a :class:`Series` with integer dtype failing to set inplace when those values can be losslessly converted to integers (:issue:`44316`)
- Bug in :meth:`Series.__setitem__` with object dtype when setting an array with matching size and dtype='datetime64[ns]' or dtype='timedelta64[ns]' incorrectly converting the datetime/timedeltas to integers (:issue:`43868`)
- Bug in :meth:`DataFrame.sort_index` where ``ignore_index=True`` was not being respected when the index was already sorted (:issue:`43591`)
- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`)
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2205,6 +2205,14 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
if tipo.kind not in ["i", "u"]:
if is_float(element) and element.is_integer():
return True

if isinstance(element, np.ndarray) and element.dtype.kind == "f":
# If all can be losslessly cast to integers, then we can hold them
# We do something similar in putmask_smart
casted = element.astype(dtype)
comp = casted == element
return comp.all()

# Anything other than integer we cannot hold
return False
elif dtype.itemsize < tipo.itemsize:
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,18 @@ def asi8(self) -> npt.NDArray[np.int64]:
)
return self._values.view(self._default_dtype)

def _validate_fill_value(self, value):
# e.g. np.array([1.0]) we want np.array([1], dtype=self.dtype)
# see TestSetitemFloatNDarrayIntoIntegerSeries
super()._validate_fill_value(value)
if hasattr(value, "dtype") and is_float_dtype(value.dtype):
converted = value.astype(self.dtype)
if (converted == value).all():
# See also: can_hold_element
return converted
raise TypeError
return value


class Int64Index(IntegerIndex):
_index_descr_args = {
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,14 @@ def where(self, other, cond) -> list[Block]:
values, icond.sum(), other # type: ignore[arg-type]
)
if alt is not other:
if is_list_like(other) and len(other) < len(values):
# call np.where with other to get the appropriate ValueError
np.where(~icond, values, other)
raise NotImplementedError(
"This should not be reached; call to np.where above is "
"expected to raise ValueError. Please report a bug at "
"github.com/pandas-dev/pandas"
)
result = values.copy()
np.putmask(result, icond, alt)
else:
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/dtypes/cast/test_can_hold_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,16 @@ def test_can_hold_element_range(any_int_numpy_dtype):
rng = range(10 ** 10, 10 ** 10)
assert len(rng) == 0
assert can_hold_element(arr, rng)


def test_can_hold_element_int_values_float_ndarray():
arr = np.array([], dtype=np.int64)

element = np.array([1.0, 2.0])
assert can_hold_element(arr, element)

assert not can_hold_element(arr, element + 0.5)

# integer but not losslessly castable to int64
element = np.array([3, 2 ** 65], dtype=np.float64)
assert not can_hold_element(arr, element)
17 changes: 15 additions & 2 deletions pandas/tests/indexing/multiindex/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,16 +196,29 @@ def test_multiindex_assignment(self):
df.loc[4, "d"] = arr
tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))

def test_multiindex_assignment_single_dtype(self):
# GH3777 part 2b
# single dtype
arr = np.array([0.0, 1.0])

df = DataFrame(
np.random.randint(5, 10, size=9).reshape(3, 3),
columns=list("abc"),
index=[[4, 4, 8], [8, 10, 12]],
dtype=np.int64,
)

# arr can be losslessly cast to int, so this setitem is inplace
df.loc[4, "c"] = arr
exp = Series(arr, index=[8, 10], name="c", dtype="float64")
tm.assert_series_equal(df.loc[4, "c"], exp)
exp = Series(arr, index=[8, 10], name="c", dtype="int64")
result = df.loc[4, "c"]
tm.assert_series_equal(result, exp)

# arr + 0.5 cannot be cast losslessly to int, so we upcast
df.loc[4, "c"] = arr + 0.5
result = df.loc[4, "c"]
exp = exp + 0.5
tm.assert_series_equal(result, exp)

# scalar ok
df.loc[4, "c"] = 10
Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,9 +515,18 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(
# but on a DataFrame with multiple blocks
df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"])

# setting float values that can be held by existing integer arrays
# is inplace
df.iloc[:, 0] = df.iloc[:, 0].astype("f8")
if not using_array_manager:
assert len(df._mgr.blocks) == 1

# if the assigned values cannot be held by existing integer arrays,
# we cast
df.iloc[:, 0] = df.iloc[:, 0] + 0.5
if not using_array_manager:
assert len(df._mgr.blocks) == 2

expected = df.copy()

# assign back to self
Expand Down Expand Up @@ -892,7 +901,7 @@ def test_iloc_with_boolean_operation(self):
tm.assert_frame_equal(result, expected)

result.iloc[[False, False, True, True]] /= 2
expected = DataFrame([[0.0, 4.0], [8.0, 12.0], [4.0, 5.0], [6.0, np.nan]])
expected = DataFrame([[0, 4.0], [8, 12.0], [4, 5.0], [6, np.nan]])
tm.assert_frame_equal(result, expected)

def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self):
Expand Down
57 changes: 47 additions & 10 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,23 +620,27 @@ def test_mask_key(self, obj, key, expected, val, indexer_sli):
mask[key] = True

obj = obj.copy()

if is_list_like(val) and len(val) < mask.sum():
msg = "boolean index did not match indexed array along dimension"
with pytest.raises(IndexError, match=msg):
indexer_sli(obj)[mask] = val
return

indexer_sli(obj)[mask] = val
tm.assert_series_equal(obj, expected)

def test_series_where(self, obj, key, expected, val, is_inplace):
if is_list_like(val) and len(val) < len(obj):
# Series.where is not valid here
if isinstance(val, range):
return

# FIXME: The remaining TestSetitemDT64IntoInt that go through here
# are relying on technically-incorrect behavior because Block.where
# uses np.putmask instead of expressions.where in those cases,
# which has different length-checking semantics.

mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

if is_list_like(val) and len(val) < len(obj):
# Series.where is not valid here
msg = "operands could not be broadcast together with shapes"
with pytest.raises(ValueError, match=msg):
obj.where(~mask, val)
return

orig = obj
obj = obj.copy()
arr = obj._values
Expand Down Expand Up @@ -1014,6 +1018,39 @@ def inplace(self):
return True


@pytest.mark.parametrize(
"val",
[
np.array([2.0, 3.0]),
np.array([2.5, 3.5]),
np.array([2 ** 65, 2 ** 65 + 1], dtype=np.float64), # all ints, but can't cast
],
)
class TestSetitemFloatNDarrayIntoIntegerSeries(SetitemCastingEquivalents):
@pytest.fixture
def obj(self):
return Series(range(5), dtype=np.int64)

@pytest.fixture
def key(self):
return slice(0, 2)

@pytest.fixture
def inplace(self, val):
# NB: this condition is based on currently-harcoded "val" cases
return val[0] == 2

@pytest.fixture
def expected(self, val, inplace):
if inplace:
dtype = np.int64
else:
dtype = np.float64
res_values = np.array(range(5), dtype=dtype)
res_values[:2] = val
return Series(res_values)


def test_setitem_int_as_positional_fallback_deprecation():
# GH#42215 deprecated falling back to positional on __setitem__ with an
# int not contained in the index
Expand Down