diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e5917c9176c54..d940d564b8df2 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -33,6 +33,7 @@ Other enhancements - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`). +- :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`) - :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`) - Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) @@ -231,6 +232,7 @@ Other API changes ^^^^^^^^^^^^^^^^^ - 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`) - :func:`read_table`'s ``parse_dates`` argument defaults to ``None`` to improve consistency with :func:`read_csv` (:issue:`57476`) +- All classes inheriting from builtin ``tuple`` (including types created with :func:`collections.namedtuple`) are now hashed and compared as builtin ``tuple`` during indexing operations (:issue:`57922`) - Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`) - Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`) - Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`) diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h index 8d4c382241d39..2fa61642968cf 100644 --- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h @@ -207,7 +207,8 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) { if (PyComplex_CheckExact(a)) { return complexobject_cmp((PyComplexObject *)a, (PyComplexObject *)b); } - if (PyTuple_CheckExact(a)) { + if (PyTuple_Check(a)) { + // compare tuple subclasses as builtin tuples return tupleobject_cmp((PyTupleObject *)a, (PyTupleObject *)b); } // frozenset isn't yet supported @@ -311,7 +312,8 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) { // because complex(k,0) == k holds for any int-object k // and kh_complex128_hash_func doesn't respect it hash = complexobject_hash((PyComplexObject *)key); - } else if (PyTuple_CheckExact(key)) { + } else if (PyTuple_Check(key)) { + // hash tuple subclasses as builtin tuples hash = tupleobject_hash((PyTupleObject *)key); } else { hash = PyObject_Hash(key); diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index f08a7625e7f8a..d82203a53a60f 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -1,3 +1,4 @@ +from collections import namedtuple from datetime import timedelta import re @@ -1006,3 +1007,26 @@ def test_get_indexer_for_multiindex_with_nans(nulls_fixture): result = idx1.get_indexer(idx2) expected = np.array([-1, 1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) + + +def test_get_loc_namedtuple_behaves_like_tuple(): + # GH57922 + NamedIndex = namedtuple("NamedIndex", ("a", "b")) + multi_idx = MultiIndex.from_tuples( + [NamedIndex("i1", "i2"), NamedIndex("i3", "i4"), NamedIndex("i5", "i6")] + ) + for idx in (multi_idx, multi_idx.to_flat_index()): + assert idx.get_loc(NamedIndex("i1", "i2")) == 0 + assert idx.get_loc(NamedIndex("i3", "i4")) == 1 + assert idx.get_loc(NamedIndex("i5", "i6")) == 2 + assert idx.get_loc(("i1", "i2")) == 0 + assert idx.get_loc(("i3", "i4")) == 1 + assert idx.get_loc(("i5", "i6")) == 2 + multi_idx = MultiIndex.from_tuples([("i1", "i2"), ("i3", "i4"), ("i5", "i6")]) + for idx in (multi_idx, multi_idx.to_flat_index()): + assert idx.get_loc(NamedIndex("i1", "i2")) == 0 + assert idx.get_loc(NamedIndex("i3", "i4")) == 1 + assert idx.get_loc(NamedIndex("i5", "i6")) == 2 + assert idx.get_loc(("i1", "i2")) == 0 + assert idx.get_loc(("i3", "i4")) == 1 + assert idx.get_loc(("i5", "i6")) == 2 diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py index b70386191d9d9..50b561aefcf49 100644 --- a/pandas/tests/libs/test_hashtable.py +++ b/pandas/tests/libs/test_hashtable.py @@ -1,3 +1,4 @@ +from collections import namedtuple from collections.abc import Generator from contextlib import contextmanager import re @@ -405,9 +406,8 @@ def test_nan_complex_real(self): table = ht.PyObjectHashTable() table.set_item(nan1, 42) assert table.get_item(nan2) == 42 - with pytest.raises(KeyError, match=None) as error: + with pytest.raises(KeyError, match=re.escape(repr(other))): table.get_item(other) - assert str(error.value) == str(other) def test_nan_complex_imag(self): nan1 = complex(1, float("nan")) @@ -417,9 +417,8 @@ def test_nan_complex_imag(self): table = ht.PyObjectHashTable() table.set_item(nan1, 42) assert table.get_item(nan2) == 42 - with pytest.raises(KeyError, match=None) as error: + with pytest.raises(KeyError, match=re.escape(repr(other))): table.get_item(other) - assert str(error.value) == str(other) def test_nan_in_tuple(self): nan1 = (float("nan"),) @@ -436,9 +435,28 @@ def test_nan_in_nested_tuple(self): table = ht.PyObjectHashTable() table.set_item(nan1, 42) assert table.get_item(nan2) == 42 - with pytest.raises(KeyError, match=None) as error: + with pytest.raises(KeyError, match=re.escape(repr(other))): + table.get_item(other) + + def test_nan_in_namedtuple(self): + T = namedtuple("T", ["x"]) + nan1 = T(float("nan")) + nan2 = T(float("nan")) + assert nan1.x is not nan2.x + table = ht.PyObjectHashTable() + table.set_item(nan1, 42) + assert table.get_item(nan2) == 42 + + def test_nan_in_nested_namedtuple(self): + T = namedtuple("T", ["x", "y"]) + nan1 = T(1, (2, (float("nan"),))) + nan2 = T(1, (2, (float("nan"),))) + other = T(1, 2) + table = ht.PyObjectHashTable() + table.set_item(nan1, 42) + assert table.get_item(nan2) == 42 + with pytest.raises(KeyError, match=re.escape(repr(other))): table.get_item(other) - assert str(error.value) == str(other) def test_hash_equal_tuple_with_nans(): @@ -448,6 +466,22 @@ def test_hash_equal_tuple_with_nans(): assert ht.objects_are_equal(a, b) +def test_hash_equal_namedtuple_with_nans(): + T = namedtuple("T", ["x", "y"]) + a = T(float("nan"), (float("nan"), float("nan"))) + b = T(float("nan"), (float("nan"), float("nan"))) + assert ht.object_hash(a) == ht.object_hash(b) + assert ht.objects_are_equal(a, b) + + +def test_hash_equal_namedtuple_and_tuple(): + T = namedtuple("T", ["x", "y"]) + a = T(1, (2, 3)) + b = (1, (2, 3)) + assert ht.object_hash(a) == ht.object_hash(b) + assert ht.objects_are_equal(a, b) + + def test_get_labels_groupby_for_Int64(writable): table = ht.Int64HashTable() vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64)