diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index f888648a9363e..3c01a6d330071 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1529,6 +1529,7 @@ Reshaping - Bug in :func:`pandas.melt` when passing column names that are not present in ``DataFrame`` (:issue:`23575`) - Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`) - Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`) +- Bug in :func:`cut` with ``bins`` as an overlapping ``IntervalIndex`` where multiple bins were returned per item instead of raising a ``ValueError`` (:issue:`23980`) .. _whatsnew_0240.bug_fixes.sparse: diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 8ad2a48e8767c..5d5f6cf8102be 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -43,7 +43,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, and maximum values of `x`. * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. - * IntervalIndex : Defines the exact bins to be used. + * IntervalIndex : Defines the exact bins to be used. Note that + IntervalIndex for `bins` must be non-overlapping. right : bool, default True Indicates whether `bins` includes the rightmost edge or not. If @@ -217,7 +218,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, bins[-1] += adj elif isinstance(bins, IntervalIndex): - pass + if bins.is_overlapping: + raise ValueError('Overlapping IntervalIndex is not accepted.') + else: bins = np.asarray(bins) bins = _convert_bin_to_numeric_type(bins, dtype) diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index f04e9a55a6c8d..b0445f5a9e2d5 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -91,6 +91,12 @@ def test_bins_from_intervalindex(self): tm.assert_numpy_array_equal(result.codes, np.array([1, 1, 2], dtype='int8')) + def test_bins_not_overlapping_from_intervalindex(self): + # verify if issue 23980 is properly solved. + ii = IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)]) + with pytest.raises(ValueError): + cut([5, 6], bins=ii) + def test_bins_not_monotonic(self): data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] pytest.raises(ValueError, cut, data, [0.1, 1.5, 1, 10])