diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py index 527ae3ce37d1e..35d58c29b1e03 100644 --- a/pandas/tests/arrays/sparse/test_libsparse.py +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -16,183 +16,134 @@ TEST_LENGTH = 20 -plain_case = { - "xloc": [0, 7, 15], - "xlen": [3, 5, 5], - "yloc": [2, 9, 14], - "ylen": [2, 3, 5], - "intersect_loc": [2, 9, 15], - "intersect_len": [1, 3, 4], -} -delete_blocks = { - "xloc": [0, 5], - "xlen": [4, 4], - "yloc": [1], - "ylen": [4], - "intersect_loc": [1], - "intersect_len": [3], -} -split_blocks = { - "xloc": [0], - "xlen": [10], - "yloc": [0, 5], - "ylen": [3, 7], - "intersect_loc": [0, 5], - "intersect_len": [3, 5], -} -skip_block = { - "xloc": [10], - "xlen": [5], - "yloc": [0, 12], - "ylen": [5, 3], - "intersect_loc": [12], - "intersect_len": [3], -} - -no_intersect = { - "xloc": [0, 10], - "xlen": [4, 6], - "yloc": [5, 17], - "ylen": [4, 2], - "intersect_loc": [], - "intersect_len": [], -} - - -def check_cases(_check_case): - def _check_case_dict(case): - _check_case( - case["xloc"], - case["xlen"], - case["yloc"], - case["ylen"], - case["intersect_loc"], - case["intersect_len"], - ) - - _check_case_dict(plain_case) - _check_case_dict(delete_blocks) - _check_case_dict(split_blocks) - _check_case_dict(skip_block) - _check_case_dict(no_intersect) - - # one or both is empty - _check_case([0], [5], [], [], [], []) - _check_case([], [], [], [], [], []) +plain_case = [ + [0, 7, 15], + [3, 5, 5], + [2, 9, 14], + [2, 3, 5], + [2, 9, 15], + [1, 3, 4], +] +delete_blocks = [ + [0, 5], + [4, 4], + [1], + [4], + [1], + [3], +] +split_blocks = [ + [0], + [10], + [0, 5], + [3, 7], + [0, 5], + [3, 5], +] +skip_block = [ + [10], + [5], + [0, 12], + [5, 3], + [12], + [3], +] + +no_intersect = [ + [0, 10], + [4, 6], + [5, 17], + [4, 2], + [], + [], +] + +one_empty = [ + [0], + [5], + [], + [], + [], + [], +] + +both_empty = [ # type: ignore[var-annotated] + [], + [], + [], + [], + [], + [], +] + +CASES = [plain_case, delete_blocks, split_blocks, skip_block, no_intersect, one_empty] +IDS = [ + "plain_case", + "delete_blocks", + "split_blocks", + "skip_block", + "no_intersect", + "one_empty", +] class TestSparseIndexUnion: - def test_index_make_union(self): - def _check_case(xloc, xlen, yloc, ylen, eloc, elen): - xindex = BlockIndex(TEST_LENGTH, xloc, xlen) - yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - bresult = xindex.make_union(yindex) - assert isinstance(bresult, BlockIndex) - tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32)) - tm.assert_numpy_array_equal( - bresult.blengths, np.array(elen, dtype=np.int32) - ) - - ixindex = xindex.to_int_index() - iyindex = yindex.to_int_index() - iresult = ixindex.make_union(iyindex) - assert isinstance(iresult, IntIndex) - tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices) - - """ - x: ---- - y: ---- - r: -------- - """ - xloc = [0] - xlen = [5] - yloc = [5] - ylen = [4] - eloc = [0] - elen = [9] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ----- ----- - y: ----- -- - """ - xloc = [0, 10] - xlen = [5, 5] - yloc = [2, 17] - ylen = [5, 2] - eloc = [0, 10, 17] - elen = [7, 5, 2] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ------ - y: ------- - r: ---------- - """ - xloc = [1] - xlen = [5] - yloc = [3] - ylen = [5] - eloc = [1] - elen = [7] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ------ ----- - y: ------- - r: ------------- - """ - xloc = [2, 10] - xlen = [4, 4] - yloc = [4] - ylen = [8] - eloc = [2] - elen = [12] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: --- ----- - y: ------- - r: ------------- - """ - xloc = [0, 5] - xlen = [3, 5] - yloc = [0] - ylen = [7] - eloc = [0] - elen = [10] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ------ ----- - y: ------- --- - r: ------------- - """ - xloc = [2, 10] - xlen = [4, 4] - yloc = [4, 13] - ylen = [8, 4] - eloc = [2] - elen = [15] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ---------------------- - y: ---- ---- --- - r: ---------------------- - """ - xloc = [2] - xlen = [15] - yloc = [4, 9, 14] - ylen = [3, 2, 2] - eloc = [2] - elen = [15] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ---- --- - y: --- --- - """ - xloc = [0, 10] - xlen = [3, 3] - yloc = [5, 15] - ylen = [2, 2] - eloc = [0, 5, 10, 15] - elen = [3, 2, 3, 2] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) + @pytest.mark.parametrize( + "xloc, xlen, yloc, ylen, eloc, elen", + [ + [[0], [5], [5], [4], [0], [9]], + [[0, 10], [5, 5], [2, 17], [5, 2], [0, 10, 17], [7, 5, 2]], + [[1], [5], [3], [5], [1], [7]], + [[2, 10], [4, 4], [4], [8], [2], [12]], + [[0, 5], [3, 5], [0], [7], [0], [10]], + [[2, 10], [4, 4], [4, 13], [8, 4], [2], [15]], + [[2], [15], [4, 9, 14], [3, 2, 2], [2], [15]], + [[0, 10], [3, 3], [5, 15], [2, 2], [0, 5, 10, 15], [3, 2, 3, 2]], + ], + ) + def test_index_make_union(self, xloc, xlen, yloc, ylen, eloc, elen): + # Case 1 + # x: ---- + # y: ---- + # r: -------- + # Case 2 + # x: ----- ----- + # y: ----- -- + # Case 3 + # x: ------ + # y: ------- + # r: ---------- + # Case 4 + # x: ------ ----- + # y: ------- + # r: ------------- + # Case 5 + # x: --- ----- + # y: ------- + # r: ------------- + # Case 6 + # x: ------ ----- + # y: ------- --- + # r: ------------- + # Case 7 + # x: ---------------------- + # y: ---- ---- --- + # r: ---------------------- + # Case 8 + # x: ---- --- + # y: --- --- + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + bresult = xindex.make_union(yindex) + assert isinstance(bresult, BlockIndex) + tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32)) + tm.assert_numpy_array_equal(bresult.blengths, np.array(elen, dtype=np.int32)) + + ixindex = xindex.to_int_index() + iyindex = yindex.to_int_index() + iresult = ixindex.make_union(iyindex) + assert isinstance(iresult, IntIndex) + tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices) def test_int_index_make_union(self): a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32)) @@ -229,31 +180,23 @@ def test_int_index_make_union(self): class TestSparseIndexIntersect: @td.skip_if_windows - def test_intersect(self): - def _check_correct(a, b, expected): - result = a.intersect(b) - assert result.equals(expected) + @pytest.mark.parametrize("xloc, xlen, yloc, ylen, eloc, elen", CASES, ids=IDS) + def test_intersect(self, xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + expected = BlockIndex(TEST_LENGTH, eloc, elen) + longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) + + result = xindex.intersect(yindex) + assert result.equals(expected) + result = xindex.to_int_index().intersect(yindex.to_int_index()) + assert result.equals(expected.to_int_index()) - def _check_length_exc(a, longer): - msg = "Indices must reference same underlying length" - with pytest.raises(Exception, match=msg): - a.intersect(longer) - - def _check_case(xloc, xlen, yloc, ylen, eloc, elen): - xindex = BlockIndex(TEST_LENGTH, xloc, xlen) - yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - expected = BlockIndex(TEST_LENGTH, eloc, elen) - longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) - - _check_correct(xindex, yindex, expected) - _check_correct( - xindex.to_int_index(), yindex.to_int_index(), expected.to_int_index() - ) - - _check_length_exc(xindex, longer_index) - _check_length_exc(xindex.to_int_index(), longer_index.to_int_index()) - - check_cases(_check_case) + msg = "Indices must reference same underlying length" + with pytest.raises(Exception, match=msg): + xindex.intersect(longer_index) + with pytest.raises(Exception, match=msg): + xindex.to_int_index().intersect(longer_index.to_int_index()) def test_intersect_empty(self): xindex = IntIndex(4, np.array([], dtype=np.int32)) @@ -266,18 +209,19 @@ def test_intersect_empty(self): assert xindex.intersect(yindex).equals(xindex) assert yindex.intersect(xindex).equals(xindex) - def test_intersect_identical(self): - cases = [ - IntIndex(5, np.array([1, 2], dtype=np.int32)), - IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), - IntIndex(0, np.array([], dtype=np.int32)), - IntIndex(5, np.array([], dtype=np.int32)), - ] - - for case in cases: - assert case.intersect(case).equals(case) - case = case.to_block_index() - assert case.intersect(case).equals(case) + @pytest.mark.parametrize( + "case", + [ + IntIndex(5, np.array([1, 2], dtype=np.int32)), # type: ignore[arg-type] + IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), # type: ignore[arg-type] + IntIndex(0, np.array([], dtype=np.int32)), # type: ignore[arg-type] + IntIndex(5, np.array([], dtype=np.int32)), # type: ignore[arg-type] + ], + ) + def test_intersect_identical(self, case): + assert case.intersect(case).equals(case) + case = case.to_block_index() + assert case.intersect(case).equals(case) class TestSparseIndexCommon: @@ -324,74 +268,71 @@ def test_block_internal(self): tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) - def test_lookup(self): - for kind in ["integer", "block"]: - idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) - assert idx.lookup(-1) == -1 - assert idx.lookup(0) == -1 - assert idx.lookup(1) == -1 - assert idx.lookup(2) == 0 - assert idx.lookup(3) == 1 - assert idx.lookup(4) == -1 - - idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) - - for i in range(-1, 5): - assert idx.lookup(i) == -1 - - idx = make_sparse_index( - 4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind - ) - assert idx.lookup(-1) == -1 - assert idx.lookup(0) == 0 - assert idx.lookup(1) == 1 - assert idx.lookup(2) == 2 - assert idx.lookup(3) == 3 - assert idx.lookup(4) == -1 - - idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) - assert idx.lookup(-1) == -1 - assert idx.lookup(0) == 0 - assert idx.lookup(1) == -1 - assert idx.lookup(2) == 1 - assert idx.lookup(3) == 2 - assert idx.lookup(4) == -1 - - def test_lookup_array(self): - for kind in ["integer", "block"]: - idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) - - res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) - exp = np.array([-1, -1, 0], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) - exp = np.array([-1, 0, -1, 1], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) - res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) - exp = np.array([-1, -1, -1, -1], dtype=np.int32) - - idx = make_sparse_index( - 4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind - ) - res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) - exp = np.array([-1, 0, 2], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) - exp = np.array([-1, 2, 1, 3], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) - res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) - exp = np.array([1, -1, 2, 0], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) - exp = np.array([-1, -1, 1, -1], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) + @pytest.mark.parametrize("kind", ["integer", "block"]) + def test_lookup(self, kind): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == -1 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 0 + assert idx.lookup(3) == 1 + assert idx.lookup(4) == -1 + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) + + for i in range(-1, 5): + assert idx.lookup(i) == -1 + + idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == 1 + assert idx.lookup(2) == 2 + assert idx.lookup(3) == 3 + assert idx.lookup(4) == -1 + + idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 1 + assert idx.lookup(3) == 2 + assert idx.lookup(4) == -1 + + @pytest.mark.parametrize("kind", ["integer", "block"]) + def test_lookup_array(self, kind): + idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, -1, 0], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 0, -1, 1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) + exp = np.array([-1, -1, -1, -1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, 0, 2], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 2, 1, 3], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) + exp = np.array([1, -1, 2, 0], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) + exp = np.array([-1, -1, 1, -1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) @pytest.mark.parametrize( "idx, expected", @@ -442,13 +383,13 @@ def test_block_internal(self): tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) - def test_make_block_boundary(self): - for i in [5, 10, 100, 101]: - idx = make_sparse_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block") + @pytest.mark.parametrize("i", [5, 10, 100, 101]) + def test_make_block_boundary(self, i): + idx = make_sparse_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block") - exp = np.arange(0, i, 2, dtype=np.int32) - tm.assert_numpy_array_equal(idx.blocs, exp) - tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32)) + exp = np.arange(0, i, 2, dtype=np.int32) + tm.assert_numpy_array_equal(idx.blocs, exp) + tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32)) def test_equals(self): index = BlockIndex(10, [0, 4], [2, 5]) @@ -551,19 +492,17 @@ def test_equals(self): assert index.equals(index) assert not index.equals(IntIndex(10, [0, 1, 2, 3])) - def test_to_block_index(self): - def _check_case(xloc, xlen, yloc, ylen, eloc, elen): - xindex = BlockIndex(TEST_LENGTH, xloc, xlen) - yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + @pytest.mark.parametrize("xloc, xlen, yloc, ylen, eloc, elen", CASES, ids=IDS) + def test_to_block_index(self, xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - # see if survive the round trip - xbindex = xindex.to_int_index().to_block_index() - ybindex = yindex.to_int_index().to_block_index() - assert isinstance(xbindex, BlockIndex) - assert xbindex.equals(xindex) - assert ybindex.equals(yindex) - - check_cases(_check_case) + # see if survive the round trip + xbindex = xindex.to_int_index().to_block_index() + ybindex = yindex.to_int_index().to_block_index() + assert isinstance(xbindex, BlockIndex) + assert xbindex.equals(xindex) + assert ybindex.equals(yindex) def test_to_int_index(self): index = IntIndex(10, [2, 3, 4, 5, 6]) @@ -571,48 +510,44 @@ def test_to_int_index(self): class TestSparseOperators: - def _op_tests(self, sparse_op, python_op): - def _check_case(xloc, xlen, yloc, ylen, eloc, elen): - xindex = BlockIndex(TEST_LENGTH, xloc, xlen) - yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - - xdindex = xindex.to_int_index() - ydindex = yindex.to_int_index() + @pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"]) + @pytest.mark.parametrize("xloc, xlen, yloc, ylen, eloc, elen", CASES, ids=IDS) + def test_op(self, opname, xloc, xlen, yloc, ylen, eloc, elen): + sparse_op = getattr(splib, f"sparse_{opname}_float64") + python_op = getattr(operator, opname) - x = np.arange(xindex.npoints) * 10.0 + 1 - y = np.arange(yindex.npoints) * 100.0 + 1 + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - xfill = 0 - yfill = 2 + xdindex = xindex.to_int_index() + ydindex = yindex.to_int_index() - result_block_vals, rb_index, bfill = sparse_op( - x, xindex, xfill, y, yindex, yfill - ) - result_int_vals, ri_index, ifill = sparse_op( - x, xdindex, xfill, y, ydindex, yfill - ) + x = np.arange(xindex.npoints) * 10.0 + 1 + y = np.arange(yindex.npoints) * 100.0 + 1 - assert rb_index.to_int_index().equals(ri_index) - tm.assert_numpy_array_equal(result_block_vals, result_int_vals) - assert bfill == ifill + xfill = 0 + yfill = 2 - # check versus Series... - xseries = Series(x, xdindex.indices) - xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill) + result_block_vals, rb_index, bfill = sparse_op( + x, xindex, xfill, y, yindex, yfill + ) + result_int_vals, ri_index, ifill = sparse_op( + x, xdindex, xfill, y, ydindex, yfill + ) - yseries = Series(y, ydindex.indices) - yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill) + assert rb_index.to_int_index().equals(ri_index) + tm.assert_numpy_array_equal(result_block_vals, result_int_vals) + assert bfill == ifill - series_result = python_op(xseries, yseries) - series_result = series_result.reindex(ri_index.indices) + # check versus Series... + xseries = Series(x, xdindex.indices) + xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill) - tm.assert_numpy_array_equal(result_block_vals, series_result.values) - tm.assert_numpy_array_equal(result_int_vals, series_result.values) + yseries = Series(y, ydindex.indices) + yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill) - check_cases(_check_case) + series_result = python_op(xseries, yseries) + series_result = series_result.reindex(ri_index.indices) - @pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"]) - def test_op(self, opname): - sparse_op = getattr(splib, f"sparse_{opname}_float64") - python_op = getattr(operator, opname) - self._op_tests(sparse_op, python_op) + tm.assert_numpy_array_equal(result_block_vals, series_result.values) + tm.assert_numpy_array_equal(result_int_vals, series_result.values)