Skip to content

BUG: .isin on datetimelike indexes do not validate input of level parameter #26677

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 9, 2019
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,7 @@ Datetimelike
- Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`)
- Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`)
- Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`)
- Bug in :meth:`isin` for datetimelike indexes; :class:`DatetimeIndex`, :class:`TimedeltaIndex` and :class:`PeriodIndex` where the ``levels`` parmeter was ignored. (:issue:`26675`)
- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'``

Timedelta
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,9 @@ def isin(self, values, level=None):
-------
is_contained : ndarray (boolean dtype)
"""
if level is not None:
self._validate_index_level(level)

if not isinstance(values, type(self)):
try:
values = type(self)(values)
Expand Down
36 changes: 35 additions & 1 deletion pandas/tests/indexes/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

import pandas as pd
from pandas.core.indexes.api import Index, MultiIndex
from pandas.core.indexes.api import Index, MultiIndex, RangeIndex, PeriodIndex
import pandas.util.testing as tm

indices_list = [tm.makeUnicodeIndex(100),
Expand Down Expand Up @@ -47,3 +47,37 @@ def zero(request):
# For testing division by (or of) zero for Index with length 5, this
# gives several scalar-zeros and length-5 vector-zeros
return request.param


def _get_subclasses(cls):
for subclass in cls.__subclasses__():
yield from _get_subclasses(subclass)
yield subclass


all_indexes = [index for index in ([Index] + list(set(_get_subclasses(Index))))
if getattr(pd, index.__name__, None) is not None]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the name check for?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can just simplify to if getattr(pd, index.__name__, False)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missed that. thanks.



@pytest.fixture(params=all_indexes)
def all_index_types(request):
"""
A Fixture for all indexes types. Index and subclasses in pandas namespace.
"""
return request.param


@pytest.fixture
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this just work off of the indices_list? why creating extra machinery here?

def all_index_empty(all_index_types):
"""
A Fixture for empty instances of all indexes types in pandas namespace.
"""
cls = all_index_types
if issubclass(cls, RangeIndex):
return cls(0, name='foo')
elif issubclass(cls, MultiIndex):
return cls.from_arrays([[], []], names=['foo', 'bar'])
elif issubclass(cls, PeriodIndex):
return cls([], freq='M', name='foo')
else:
return cls([], name='foo')
26 changes: 13 additions & 13 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1791,22 +1791,22 @@ def test_isin_level_kwarg(self, level, index):
tm.assert_numpy_array_equal(expected,
index.isin(values, level='foobar'))

@pytest.mark.parametrize("level", [1, 10, -2])
@pytest.mark.parametrize("index", [
Index(['qux', 'baz', 'foo', 'bar']),
# Float64Index overrides isin, so must be checked separately
Float64Index([1.0, 2.0, 3.0, 4.0])])
def test_isin_level_kwarg_raises_bad_index(self, level, index):
@pytest.mark.parametrize("level", [2, 10, -3])
def test_isin_level_kwarg_bad_level_raises(self, level, all_index_empty):
index = all_index_empty
with pytest.raises(IndexError, match='Too many levels'):
index.isin([], level=level)

@pytest.mark.parametrize("level", [1.0, 'foobar', 'xyzzy', np.nan])
@pytest.mark.parametrize("index", [
Index(['qux', 'baz', 'foo', 'bar']),
Float64Index([1.0, 2.0, 3.0, 4.0])])
def test_isin_level_kwarg_raises_key(self, level, index):
with pytest.raises(KeyError, match='must be same as name'):
index.isin([], level=level)
@pytest.mark.parametrize("label", [1.0, 'foobar', 'xyzzy', np.nan])
def test_isin_level_kwarg_bad_label_raises(
self, label, all_index_empty):
index = all_index_empty
if isinstance(index, pd.MultiIndex):
msg = "'Level {} not found'"
else:
msg = r"'Level {} must be same as name \(foo\)'"
with pytest.raises(KeyError, match=msg.format(label)):
index.isin([], level=label)

@pytest.mark.parametrize("empty", [[], Series(), np.array([])])
def test_isin_empty(self, empty):
Expand Down