Skip to content
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ Indexing
- Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`)
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
- Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`)

Missing
^^^^^^^
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
ensure_platform_int,
is_bool,
is_bool_dtype,
is_categorical,
is_categorical_dtype,
is_datetime64_any_dtype,
is_dtype_equal,
Expand Down Expand Up @@ -532,6 +531,9 @@ def _shallow_copy_with_infer(self, values, **kwargs):
return self._constructor(values, **attributes)
except (TypeError, ValueError):
pass

# Remove tz so Index will try non-DatetimeIndex inference
attributes.pop("tz", None)
return Index(values, **attributes)

def _update_inplace(self, result, **kwargs):
Expand Down Expand Up @@ -4657,10 +4659,8 @@ def get_indexer_non_unique(self, target):
if pself is not self or ptarget is not target:
return pself.get_indexer_non_unique(ptarget)

if is_categorical(target):
if is_categorical_dtype(target.dtype):
tgt_values = np.asarray(target)
elif self.is_all_dates and target.is_all_dates: # GH 30399
tgt_values = target.asi8
else:
tgt_values = target._get_engine_target()

Expand Down
26 changes: 24 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@

from pandas._libs import NaT, iNaT, join as libjoin, lib
from pandas._libs.tslibs import timezones
from pandas._typing import Label
from pandas._typing import DtypeObj, Label
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes.common import (
ensure_int64,
ensure_platform_int,
is_bool_dtype,
is_categorical_dtype,
is_dtype_equal,
Expand All @@ -32,7 +33,7 @@
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
from pandas.core.base import _shared_docs
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.core.indexes.base import Index, _index_shared_docs, ensure_index
from pandas.core.indexes.extension import (
ExtensionIndex,
inherit_names,
Expand Down Expand Up @@ -101,6 +102,12 @@ class DatetimeIndexOpsMixin(ExtensionIndex):
def is_all_dates(self) -> bool:
return True

def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
"""
raise AbstractMethodError(self)

# ------------------------------------------------------------------------
# Abstract data attributes

Expand Down Expand Up @@ -426,6 +433,21 @@ def _partial_date_slice(
# try to find the dates
return (lhs_mask & rhs_mask).nonzero()[0]

@Appender(Index.get_indexer_non_unique.__doc__)
def get_indexer_non_unique(self, target):
target = ensure_index(target)
pself, ptarget = self._maybe_promote(target)
if pself is not self or ptarget is not target:
return pself.get_indexer_non_unique(ptarget)

if not self._is_comparable_dtype(target.dtype):
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
return no_matches, no_matches

tgt_values = target.asi8
indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
return ensure_platform_int(indexer), missing

# --------------------------------------------------------------------

__add__ = make_wrapped_arith_op("__add__")
Expand Down
24 changes: 22 additions & 2 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,18 @@

from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib as libts
from pandas._libs.tslibs import fields, parsing, timezones
from pandas._typing import Label
from pandas._typing import DtypeObj, Label
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar
from pandas.core.dtypes.common import (
_NS_DTYPE,
is_datetime64_any_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_float,
is_integer,
is_scalar,
)
from pandas.core.dtypes.missing import is_valid_nat_for_dtype

from pandas.core.arrays.datetimes import DatetimeArray, tz_to_dtype
Expand Down Expand Up @@ -298,6 +306,18 @@ def _convert_for_op(self, value):
return Timestamp(value).asm8
raise ValueError("Passed item and index have different timezone")

def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
"""
if not is_datetime64_any_dtype(dtype):
return False
if self.tz is not None:
# If we have tz, we can compare to tzaware
return is_datetime64tz_dtype(dtype)
# if we dont have tz, we can only compare to tznaive
return is_datetime64_dtype(dtype)

# --------------------------------------------------------------------
# Rendering Methods

Expand Down
20 changes: 14 additions & 6 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandas._libs.tslibs import frequencies as libfrequencies, resolution
from pandas._libs.tslibs.parsing import parse_time_string
from pandas._libs.tslibs.period import Period
from pandas._typing import Label
from pandas._typing import DtypeObj, Label
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes.common import (
Expand All @@ -23,6 +23,7 @@
is_scalar,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import PeriodDtype

from pandas.core.arrays.period import (
PeriodArray,
Expand Down Expand Up @@ -298,6 +299,14 @@ def _maybe_convert_timedelta(self, other):
# raise when input doesn't have freq
raise raise_on_incompatible(self, None)

def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
"""
if not isinstance(dtype, PeriodDtype):
return False
return dtype.freq == self.freq

# ------------------------------------------------------------------------
# Rendering Methods

Expand Down Expand Up @@ -454,12 +463,11 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
def get_indexer_non_unique(self, target):
target = ensure_index(target)

if isinstance(target, PeriodIndex):
if target.freq != self.freq:
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
return no_matches, no_matches
if not self._is_comparable_dtype(target.dtype):
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
return no_matches, no_matches

target = target.asi8
target = target.asi8

indexer, missing = self._int64index.get_indexer_non_unique(target)
return ensure_platform_int(indexer), missing
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
""" implement the TimedeltaIndex """

from pandas._libs import NaT, Timedelta, index as libindex
from pandas._typing import Label
from pandas._typing import DtypeObj, Label
from pandas.util._decorators import Appender

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -213,6 +213,12 @@ def _maybe_promote(self, other):
other = TimedeltaIndex(other)
return self, other

def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
"""
return is_timedelta64_dtype(dtype)

def get_loc(self, key, method=None, tolerance=None):
"""
Get integer location for requested label
Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2616,3 +2616,40 @@ def test_convert_almost_null_slice(indices):
msg = "'>=' not supported between instances of 'str' and 'int'"
with pytest.raises(TypeError, match=msg):
idx._convert_slice_indexer(key, "loc")


dtlike_dtypes = [
np.dtype("timedelta64[ns]"),
np.dtype("datetime64[ns]"),
pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
pd.PeriodDtype("ns"),
]


@pytest.mark.parametrize("ldtype", dtlike_dtypes)
@pytest.mark.parametrize("rdtype", dtlike_dtypes)
def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):

vals = np.tile(3600 * 10 ** 9 * np.arange(3), 2)

def construct(dtype):
if dtype is dtlike_dtypes[-1]:
# PeriodArray will try to cast ints to strings
return pd.DatetimeIndex(vals).astype(dtype)
return pd.Index(vals, dtype=dtype)

left = construct(ldtype)
right = construct(rdtype)

result = left.get_indexer_non_unique(right)

if ldtype is rdtype:
ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
ex2 = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(result[0], ex1)
tm.assert_numpy_array_equal(result[1], ex2.astype(np.int64))

else:
no_matches = np.array([-1] * 6, dtype=np.intp)
tm.assert_numpy_array_equal(result[0], no_matches)
tm.assert_numpy_array_equal(result[1], no_matches)
22 changes: 22 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,3 +1073,25 @@ def test_loc_slice_disallows_positional():
with tm.assert_produces_warning(FutureWarning):
# GH#31840 deprecated incorrect behavior
df.loc[1:3, 1] = 2


def test_loc_datetimelike_mismatched_dtypes():
# GH#32650 dont mix and match datetime/timedelta/period dtypes

df = pd.DataFrame(
np.random.randn(5, 3),
columns=["a", "b", "c"],
index=pd.date_range("2012", freq="H", periods=5),
)
# create dataframe with non-unique DatetimeIndex
df = df.iloc[[0, 2, 2, 3]].copy()

dti = df.index
tdi = pd.TimedeltaIndex(dti.asi8) # matching i8 values

msg = r"None of \[TimedeltaIndex.* are in the \[index\]"
with pytest.raises(KeyError, match=msg):
df.loc[tdi]

with pytest.raises(KeyError, match=msg):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would test the reverse as well (e.g. TDI is the index with a DTI as indexer). This should also raise for Period, I think? (these can be handled as followons)

df["a"].loc[tdi]