From 11876a34faaaa96c1e57040f8b7e11171223e2ed Mon Sep 17 00:00:00 2001 From: raj-thapa Date: Fri, 11 Aug 2023 14:30:11 +0000 Subject: [PATCH 01/10] #51826_Fixed bug when np.nan is used as index value with .reindex on pd.Series with pd.IntervalIndex --- pandas/_libs/intervaltree.pxi.in | 4 ++++ .../tests/indexing/interval/test_interval.py | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 0b99aebbd3816..3de7f8e025c95 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -391,6 +391,10 @@ cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode(IntervalNode): """Recursively query this node and its sub-nodes for intervals that overlap with the query point. """ + # GH 51826: ensures nan is handled properly during reindexing + if np.isnan(point): + return + cdef: int64_t[:] indices {{dtype}}_t[:] values diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 717cb7de42021..267d1ce420385 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -172,3 +172,21 @@ def test_mi_intervalindex_slicing_with_scalar(self): ) expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value") tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "base, expected_result", + [ + (10, Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)), + (100, Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)), + (101, Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)), + (1010, Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)), + ], + ) + def test_interval_index_reindex_behavior(self, base, expected_result): + # GH 51826 + d = Series( + range(base), + index=IntervalIndex.from_arrays(range(base), range(1, base + 1)), + ) + result = d.reindex(index=[np.nan, 1.0]) + tm.assert_series_equal(result, expected_result) From d141774d5d1870fa83800871bdbfce5b839f178d Mon Sep 17 00:00:00 2001 From: raj-thapa Date: Fri, 11 Aug 2023 19:20:03 +0000 Subject: [PATCH 02/10] Fix casting-related error in test_interval_index_reindex_behavior --- pandas/tests/indexing/interval/test_interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 267d1ce420385..2cb7ffff4710d 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -189,4 +189,4 @@ def test_interval_index_reindex_behavior(self, base, expected_result): index=IntervalIndex.from_arrays(range(base), range(1, base + 1)), ) result = d.reindex(index=[np.nan, 1.0]) - tm.assert_series_equal(result, expected_result) + tm.assert_series_equal(result, expected_result, check_dtype=False) From 1a16cb30b9f67ecd1e976aafded8c07331cb92a7 Mon Sep 17 00:00:00 2001 From: raj-thapa Date: Fri, 11 Aug 2023 20:45:26 +0000 Subject: [PATCH 03/10] test: Added specific dype on test cases --- pandas/tests/indexing/interval/test_interval.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 2cb7ffff4710d..9a03fee32b23f 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -176,10 +176,10 @@ def test_mi_intervalindex_slicing_with_scalar(self): @pytest.mark.parametrize( "base, expected_result", [ - (10, Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)), - (100, Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)), - (101, Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)), - (1010, Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)), + (10, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), + (100, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), + (101, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), + (1010, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), ], ) def test_interval_index_reindex_behavior(self, base, expected_result): @@ -189,4 +189,4 @@ def test_interval_index_reindex_behavior(self, base, expected_result): index=IntervalIndex.from_arrays(range(base), range(1, base + 1)), ) result = d.reindex(index=[np.nan, 1.0]) - tm.assert_series_equal(result, expected_result, check_dtype=False) + tm.assert_series_equal(result, expected_result) From 82e7c0be1aeeff6ccd88f793cbd66e95984b11c4 Mon Sep 17 00:00:00 2001 From: raj-thapa Date: Sat, 12 Aug 2023 01:44:21 +0000 Subject: [PATCH 04/10] Tried to fix unexpected casting from (int64) to (int32) --- pandas/_libs/intervaltree.pxi.in | 4 ++-- pandas/tests/indexing/interval/test_interval.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 3de7f8e025c95..435df49eba68f 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -193,13 +193,13 @@ cdef class IntervalTree(IntervalMixin): cdef take(ndarray source, ndarray indices): """Take the given positions from a 1D ndarray """ - return PyArray_Take(source, indices, 0) + return PyArray_Take(source, indices, np.int64(0)) cdef sort_values_and_indices(all_values, all_indices, subset): indices = take(all_indices, subset) values = take(all_values, subset) - sorter = PyArray_ArgSort(values, 0, NPY_QUICKSORT) + sorter = PyArray_ArgSort(values, np.int64(0), NPY_QUICKSORT) sorted_values = take(values, sorter) sorted_indices = take(indices, sorter) return sorted_values, sorted_indices diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 9a03fee32b23f..dba8f2f0eaa75 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -176,10 +176,10 @@ def test_mi_intervalindex_slicing_with_scalar(self): @pytest.mark.parametrize( "base, expected_result", [ - (10, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), - (100, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), - (101, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), - (1010, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), + (10, Series([np.nan, 0.0], index=[np.nan, 1.0], dtype=np.float64)), + (100, Series([np.nan, 0.0], index=[np.nan, 1.0], dtype=np.float64)), + (101, Series([np.nan, 0.0], index=[np.nan, 1.0], dtype=np.float64)), + (1010, Series([np.nan, 0.0], index=[np.nan, 1.0], dtype=np.float64)), ], ) def test_interval_index_reindex_behavior(self, base, expected_result): From 012edcf73ad58a521bff9f55084f04844c10597f Mon Sep 17 00:00:00 2001 From: raj-thapa Date: Sun, 13 Aug 2023 18:50:31 +0000 Subject: [PATCH 05/10] Revert "Tried to fix unexpected casting from (int64) to (int32)" This reverts commit 82e7c0be1aeeff6ccd88f793cbd66e95984b11c4. --- pandas/_libs/intervaltree.pxi.in | 4 ++-- pandas/tests/indexing/interval/test_interval.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 435df49eba68f..3de7f8e025c95 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -193,13 +193,13 @@ cdef class IntervalTree(IntervalMixin): cdef take(ndarray source, ndarray indices): """Take the given positions from a 1D ndarray """ - return PyArray_Take(source, indices, np.int64(0)) + return PyArray_Take(source, indices, 0) cdef sort_values_and_indices(all_values, all_indices, subset): indices = take(all_indices, subset) values = take(all_values, subset) - sorter = PyArray_ArgSort(values, np.int64(0), NPY_QUICKSORT) + sorter = PyArray_ArgSort(values, 0, NPY_QUICKSORT) sorted_values = take(values, sorter) sorted_indices = take(indices, sorter) return sorted_values, sorted_indices diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index dba8f2f0eaa75..9a03fee32b23f 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -176,10 +176,10 @@ def test_mi_intervalindex_slicing_with_scalar(self): @pytest.mark.parametrize( "base, expected_result", [ - (10, Series([np.nan, 0.0], index=[np.nan, 1.0], dtype=np.float64)), - (100, Series([np.nan, 0.0], index=[np.nan, 1.0], dtype=np.float64)), - (101, Series([np.nan, 0.0], index=[np.nan, 1.0], dtype=np.float64)), - (1010, Series([np.nan, 0.0], index=[np.nan, 1.0], dtype=np.float64)), + (10, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), + (100, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), + (101, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), + (1010, Series([np.nan, 0], index=[np.nan, 1.0], dtype=np.float64)), ], ) def test_interval_index_reindex_behavior(self, base, expected_result): From 98052ffa831bbd389930d95b0758d6a0d065507e Mon Sep 17 00:00:00 2001 From: raj-thapa Date: Sun, 13 Aug 2023 19:00:31 +0000 Subject: [PATCH 06/10] Tried to fix unexpected casting --- pandas/_libs/intervaltree.pxi.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 3de7f8e025c95..235e634307413 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -193,7 +193,7 @@ cdef class IntervalTree(IntervalMixin): cdef take(ndarray source, ndarray indices): """Take the given positions from a 1D ndarray """ - return PyArray_Take(source, indices, 0) + return source[indices] cdef sort_values_and_indices(all_values, all_indices, subset): From 1d6f58c8937b522900fe47d2dfc2a69f4c2c5f69 Mon Sep 17 00:00:00 2001 From: raj-thapa Date: Sun, 13 Aug 2023 19:57:46 +0000 Subject: [PATCH 07/10] Revert Tried to fix unexpected casting --- pandas/_libs/intervaltree.pxi.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 235e634307413..3de7f8e025c95 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -193,7 +193,7 @@ cdef class IntervalTree(IntervalMixin): cdef take(ndarray source, ndarray indices): """Take the given positions from a 1D ndarray """ - return source[indices] + return PyArray_Take(source, indices, 0) cdef sort_values_and_indices(all_values, all_indices, subset): From 7166af916c9f0571067f27d3a0e5cc467ad796b2 Mon Sep 17 00:00:00 2001 From: raj-thapa Date: Mon, 14 Aug 2023 14:15:52 +0000 Subject: [PATCH 08/10] #51826_Fixed unexpected casting error --- pandas/tests/indexing/interval/test_interval.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 9a03fee32b23f..64b2a67640d1a 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -184,9 +184,11 @@ def test_mi_intervalindex_slicing_with_scalar(self): ) def test_interval_index_reindex_behavior(self, base, expected_result): # GH 51826 + left = np.arange(base, dtype=np.int64) + right = np.arange(1, base + 1, dtype=np.int64) d = Series( range(base), - index=IntervalIndex.from_arrays(range(base), range(1, base + 1)), + index=IntervalIndex.from_arrays(left, right), ) result = d.reindex(index=[np.nan, 1.0]) tm.assert_series_equal(result, expected_result) From e1abb0d0148e4d6ecd368c277808697bb0402efe Mon Sep 17 00:00:00 2001 From: raj-thapa Date: Mon, 14 Aug 2023 20:46:43 +0000 Subject: [PATCH 09/10] Set dtype as int32 --- pandas/tests/indexing/interval/test_interval.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 64b2a67640d1a..e9c7d34a2c87e 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -184,11 +184,8 @@ def test_mi_intervalindex_slicing_with_scalar(self): ) def test_interval_index_reindex_behavior(self, base, expected_result): # GH 51826 - left = np.arange(base, dtype=np.int64) - right = np.arange(1, base + 1, dtype=np.int64) - d = Series( - range(base), - index=IntervalIndex.from_arrays(left, right), - ) + left = np.arange(base, dtype=np.int32) + right = np.arange(1, base + 1, dtype=np.int32) + d = Series(range(base), index=IntervalIndex.from_arrays(left, right)) result = d.reindex(index=[np.nan, 1.0]) tm.assert_series_equal(result, expected_result) From 5c96289cee2eb3640d5e9220202dd5c05f62e4d7 Mon Sep 17 00:00:00 2001 From: raj-thapa Date: Tue, 15 Aug 2023 00:04:34 +0000 Subject: [PATCH 10/10] Removing dtypes from arrays --- pandas/tests/indexing/interval/test_interval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index e9c7d34a2c87e..d9428113b297c 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -184,8 +184,8 @@ def test_mi_intervalindex_slicing_with_scalar(self): ) def test_interval_index_reindex_behavior(self, base, expected_result): # GH 51826 - left = np.arange(base, dtype=np.int32) - right = np.arange(1, base + 1, dtype=np.int32) + left = np.arange(base) + right = np.arange(1, base + 1) d = Series(range(base), index=IntervalIndex.from_arrays(left, right)) result = d.reindex(index=[np.nan, 1.0]) tm.assert_series_equal(result, expected_result)