From e9d664af628ea399393554a602a279c5986c066d Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Thu, 29 Nov 2018 19:34:37 +0100 Subject: [PATCH 1/6] raise ValueError when IntervalIndex is overlapping --- pandas/core/reshape/tile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 8ad2a48e8767c..74839b81dab01 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -217,6 +217,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, bins[-1] += adj elif isinstance(bins, IntervalIndex): + if bins.is_overlapping: + raise ValueError('Overlapping IntervalIndex is not accepted.') pass else: bins = np.asarray(bins) From 4d4027bbe68945e5c22ad0402dd96672b350d9b8 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Thu, 29 Nov 2018 19:35:21 +0100 Subject: [PATCH 2/6] pytest for it --- pandas/tests/reshape/test_tile.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index f04e9a55a6c8d..c167de4d78ae2 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -91,6 +91,11 @@ def test_bins_from_intervalindex(self): tm.assert_numpy_array_equal(result.codes, np.array([1, 1, 2], dtype='int8')) + def test_bins_not_overlapping_from_intervalindex(self): + ii = pd.IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)]) + with pytest.raises(ValueError): + pd.cut([5, 6], bins=ii) + def test_bins_not_monotonic(self): data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] pytest.raises(ValueError, cut, data, [0.1, 1.5, 1, 10]) From 2e27412d2d3731f5fff76bd723d4d22981f39688 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Thu, 29 Nov 2018 19:40:02 +0100 Subject: [PATCH 3/6] remove pass --- pandas/core/reshape/tile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 74839b81dab01..a1a62a425ac39 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -219,7 +219,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, elif isinstance(bins, IntervalIndex): if bins.is_overlapping: raise ValueError('Overlapping IntervalIndex is not accepted.') - pass + else: bins = np.asarray(bins) bins = _convert_bin_to_numeric_type(bins, dtype) From fe0fc43f26c6d75b639d72608245c84cbca1a3a3 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Thu, 29 Nov 2018 22:52:23 +0100 Subject: [PATCH 4/6] changes based on reviews --- pandas/core/reshape/tile.py | 3 ++- pandas/tests/reshape/test_tile.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index a1a62a425ac39..31766737593a0 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -43,7 +43,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, and maximum values of `x`. * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. - * IntervalIndex : Defines the exact bins to be used. + * IntervalIndex : Defines the exact bins to be used. Note that IntervalIndex + for `bins` must be non-overlapping. right : bool, default True Indicates whether `bins` includes the rightmost edge or not. If diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index c167de4d78ae2..b0445f5a9e2d5 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -92,9 +92,10 @@ def test_bins_from_intervalindex(self): np.array([1, 1, 2], dtype='int8')) def test_bins_not_overlapping_from_intervalindex(self): - ii = pd.IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)]) + # verify if issue 23980 is properly solved. + ii = IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)]) with pytest.raises(ValueError): - pd.cut([5, 6], bins=ii) + cut([5, 6], bins=ii) def test_bins_not_monotonic(self): data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] From fa019c6da58d651f735688b85cbded6a46997c3f Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Thu, 29 Nov 2018 23:22:56 +0100 Subject: [PATCH 5/6] add issue explanation in whatsnew --- doc/source/whatsnew/v0.24.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index f888648a9363e..3c01a6d330071 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1529,6 +1529,7 @@ Reshaping - Bug in :func:`pandas.melt` when passing column names that are not present in ``DataFrame`` (:issue:`23575`) - Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`) - Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`) +- Bug in :func:`cut` with ``bins`` as an overlapping ``IntervalIndex`` where multiple bins were returned per item instead of raising a ``ValueError`` (:issue:`23980`) .. _whatsnew_0240.bug_fixes.sparse: From 4bfcea2601d2199225c8002c6e7c82af0f178694 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Fri, 30 Nov 2018 09:16:53 +0100 Subject: [PATCH 6/6] fix linting failure --- pandas/core/reshape/tile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 31766737593a0..5d5f6cf8102be 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -43,8 +43,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, and maximum values of `x`. * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. - * IntervalIndex : Defines the exact bins to be used. Note that IntervalIndex - for `bins` must be non-overlapping. + * IntervalIndex : Defines the exact bins to be used. Note that + IntervalIndex for `bins` must be non-overlapping. right : bool, default True Indicates whether `bins` includes the rightmost edge or not. If