pandas-dev · jreback · Mar 17, 2020 · Mar 12, 2020 · Mar 12, 2020 · Mar 12, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -301,6 +301,7 @@ Indexing
 - Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`)
 - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
 - Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
+- Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`)
 
 Missing
 ^^^^^^^

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -29,7 +29,6 @@
     ensure_platform_int,
     is_bool,
     is_bool_dtype,
-    is_categorical,
     is_categorical_dtype,
     is_datetime64_any_dtype,
     is_dtype_equal,
@@ -532,6 +531,9 @@ def _shallow_copy_with_infer(self, values, **kwargs):
                 return self._constructor(values, **attributes)
             except (TypeError, ValueError):
                 pass
+
+        # Remove tz so Index will try non-DatetimeIndex inference
+        attributes.pop("tz", None)
         return Index(values, **attributes)
 
     def _update_inplace(self, result, **kwargs):
@@ -4657,10 +4659,8 @@ def get_indexer_non_unique(self, target):
         if pself is not self or ptarget is not target:
             return pself.get_indexer_non_unique(ptarget)
 
-        if is_categorical(target):
+        if is_categorical_dtype(target.dtype):
             tgt_values = np.asarray(target)
-        elif self.is_all_dates and target.is_all_dates:  # GH 30399
-            tgt_values = target.asi8
         else:
             tgt_values = target._get_engine_target()
 

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -8,13 +8,14 @@
 
 from pandas._libs import NaT, iNaT, join as libjoin, lib
 from pandas._libs.tslibs import timezones
-from pandas._typing import Label
+from pandas._typing import DtypeObj, Label
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import Appender, cache_readonly
 
 from pandas.core.dtypes.common import (
     ensure_int64,
+    ensure_platform_int,
     is_bool_dtype,
     is_categorical_dtype,
     is_dtype_equal,
@@ -32,7 +33,7 @@
 from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
 from pandas.core.base import _shared_docs
 import pandas.core.indexes.base as ibase
-from pandas.core.indexes.base import Index, _index_shared_docs
+from pandas.core.indexes.base import Index, _index_shared_docs, ensure_index
 from pandas.core.indexes.extension import (
     ExtensionIndex,
     inherit_names,
@@ -101,6 +102,12 @@ class DatetimeIndexOpsMixin(ExtensionIndex):
     def is_all_dates(self) -> bool:
         return True
 
+    def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
+        """
+        Can we compare values of the given dtype to our own?
+        """
+        raise AbstractMethodError(self)
+
     # ------------------------------------------------------------------------
     # Abstract data attributes
 
@@ -426,6 +433,21 @@ def _partial_date_slice(
             # try to find the dates
             return (lhs_mask & rhs_mask).nonzero()[0]
 
+    @Appender(Index.get_indexer_non_unique.__doc__)
+    def get_indexer_non_unique(self, target):
+        target = ensure_index(target)
+        pself, ptarget = self._maybe_promote(target)
+        if pself is not self or ptarget is not target:
+            return pself.get_indexer_non_unique(ptarget)
+
+        if not self._is_comparable_dtype(target.dtype):
+            no_matches = -1 * np.ones(self.shape, dtype=np.intp)
+            return no_matches, no_matches
+
+        tgt_values = target.asi8
+        indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
+        return ensure_platform_int(indexer), missing
+
     # --------------------------------------------------------------------
 
     __add__ = make_wrapped_arith_op("__add__")

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -7,10 +7,18 @@
 
 from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib as libts
 from pandas._libs.tslibs import fields, parsing, timezones
-from pandas._typing import Label
+from pandas._typing import DtypeObj, Label
 from pandas.util._decorators import cache_readonly
 
-from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar
+from pandas.core.dtypes.common import (
+    _NS_DTYPE,
+    is_datetime64_any_dtype,
+    is_datetime64_dtype,
+    is_datetime64tz_dtype,
+    is_float,
+    is_integer,
+    is_scalar,
+)
 from pandas.core.dtypes.missing import is_valid_nat_for_dtype
 
 from pandas.core.arrays.datetimes import DatetimeArray, tz_to_dtype
@@ -298,6 +306,18 @@ def _convert_for_op(self, value):
             return Timestamp(value).asm8
         raise ValueError("Passed item and index have different timezone")
 
+    def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
+        """
+        Can we compare values of the given dtype to our own?
+        """
+        if not is_datetime64_any_dtype(dtype):
+            return False
+        if self.tz is not None:
+            # If we have tz, we can compare to tzaware
+            return is_datetime64tz_dtype(dtype)
+        # if we dont have tz, we can only compare to tznaive
+        return is_datetime64_dtype(dtype)
+
     # --------------------------------------------------------------------
     # Rendering Methods
 

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
@@ -9,7 +9,7 @@
 from pandas._libs.tslibs import frequencies as libfrequencies, resolution
 from pandas._libs.tslibs.parsing import parse_time_string
 from pandas._libs.tslibs.period import Period
-from pandas._typing import Label
+from pandas._typing import DtypeObj, Label
 from pandas.util._decorators import Appender, cache_readonly
 
 from pandas.core.dtypes.common import (
@@ -23,6 +23,7 @@
     is_scalar,
     pandas_dtype,
 )
+from pandas.core.dtypes.dtypes import PeriodDtype
 
 from pandas.core.arrays.period import (
     PeriodArray,
@@ -298,6 +299,14 @@ def _maybe_convert_timedelta(self, other):
         # raise when input doesn't have freq
         raise raise_on_incompatible(self, None)
 
+    def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
+        """
+        Can we compare values of the given dtype to our own?
+        """
+        if not isinstance(dtype, PeriodDtype):
+            return False
+        return dtype.freq == self.freq
+
     # ------------------------------------------------------------------------
     # Rendering Methods
 
@@ -454,12 +463,11 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
     def get_indexer_non_unique(self, target):
         target = ensure_index(target)
 
-        if isinstance(target, PeriodIndex):
-            if target.freq != self.freq:
-                no_matches = -1 * np.ones(self.shape, dtype=np.intp)
-                return no_matches, no_matches
+        if not self._is_comparable_dtype(target.dtype):
+            no_matches = -1 * np.ones(self.shape, dtype=np.intp)
+            return no_matches, no_matches
 
-            target = target.asi8
+        target = target.asi8
 
         indexer, missing = self._int64index.get_indexer_non_unique(target)
         return ensure_platform_int(indexer), missing

diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
@@ -1,7 +1,7 @@
 """ implement the TimedeltaIndex """
 
 from pandas._libs import NaT, Timedelta, index as libindex
-from pandas._typing import Label
+from pandas._typing import DtypeObj, Label
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import (
@@ -213,6 +213,12 @@ def _maybe_promote(self, other):
             other = TimedeltaIndex(other)
         return self, other
 
+    def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
+        """
+        Can we compare values of the given dtype to our own?
+        """
+        return is_timedelta64_dtype(dtype)
+
     def get_loc(self, key, method=None, tolerance=None):
         """
         Get integer location for requested label

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -2616,3 +2616,40 @@ def test_convert_almost_null_slice(indices):
         msg = "'>=' not supported between instances of 'str' and 'int'"
         with pytest.raises(TypeError, match=msg):
             idx._convert_slice_indexer(key, "loc")
+
+
+dtlike_dtypes = [
+    np.dtype("timedelta64[ns]"),
+    np.dtype("datetime64[ns]"),
+    pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
+    pd.PeriodDtype("ns"),
+]
+
+
+@pytest.mark.parametrize("ldtype", dtlike_dtypes)
+@pytest.mark.parametrize("rdtype", dtlike_dtypes)
+def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
+
+    vals = np.tile(3600 * 10 ** 9 * np.arange(3), 2)
+
+    def construct(dtype):
+        if dtype is dtlike_dtypes[-1]:
+            # PeriodArray will try to cast ints to strings
+            return pd.DatetimeIndex(vals).astype(dtype)
+        return pd.Index(vals, dtype=dtype)
+
+    left = construct(ldtype)
+    right = construct(rdtype)
+
+    result = left.get_indexer_non_unique(right)
+
+    if ldtype is rdtype:
+        ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
+        ex2 = np.array([], dtype=np.intp)
+        tm.assert_numpy_array_equal(result[0], ex1)
+        tm.assert_numpy_array_equal(result[1], ex2.astype(np.int64))
+
+    else:
+        no_matches = np.array([-1] * 6, dtype=np.intp)
+        tm.assert_numpy_array_equal(result[0], no_matches)
+        tm.assert_numpy_array_equal(result[1], no_matches)
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -1073,3 +1073,25 @@ def test_loc_slice_disallows_positional():
     with tm.assert_produces_warning(FutureWarning):
         # GH#31840 deprecated incorrect behavior
         df.loc[1:3, 1] = 2
+
+
+def test_loc_datetimelike_mismatched_dtypes():
+    # GH#32650 dont mix and match datetime/timedelta/period dtypes
+
+    df = pd.DataFrame(
+        np.random.randn(5, 3),
+        columns=["a", "b", "c"],
+        index=pd.date_range("2012", freq="H", periods=5),
+    )
+    # create dataframe with non-unique DatetimeIndex
+    df = df.iloc[[0, 2, 2, 3]].copy()
+
+    dti = df.index
+    tdi = pd.TimedeltaIndex(dti.asi8)  # matching i8 values
+
+    msg = r"None of \[TimedeltaIndex.* are in the \[index\]"
+    with pytest.raises(KeyError, match=msg):
+        df.loc[tdi]
+
+    with pytest.raises(KeyError, match=msg):
+        df["a"].loc[tdi]