From 3b8ebf08268646a16fdd389a1b2a2c42c501b102 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 13 Feb 2021 19:18:29 +1100 Subject: [PATCH 01/18] Add failing test --- pandas/_libs/testing.pyx | 4 ++- pandas/tests/dtypes/test_inference.py | 52 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index cfe9f40f12452..4dd15a0384052 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -105,7 +105,9 @@ cpdef assert_almost_equal(a, b, return True a_is_ndarray = is_array(a) + a_has_size_and_shape = hasattr(a, "size") and hasattr(a, "shape") b_is_ndarray = is_array(b) + b_has_size_and_shape = hasattr(b, "size") and hasattr(b, "shape") if obj is None: if a_is_ndarray or b_is_ndarray: @@ -125,7 +127,7 @@ cpdef assert_almost_equal(a, b, f"Can't compare objects without length, one or both is invalid: ({a}, {b})" ) - if a_is_ndarray and b_is_ndarray: + if (a_is_ndarray and b_is_ndarray) or (a_has_size_and_shape and b_has_size_and_shape): na, nb = a.size, b.size if a.shape != b.shape: from pandas._testing import raise_assert_detail diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 5936248456ca7..1986eca56aa77 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -75,6 +75,50 @@ def coerce(request): return request.param +class MockNumpyLikeArray: + """ + A class which is numpy-like (e.g. Pint's Quantity) but not actually numpy + + The key is that it is not actually a numpy array so + ``util.is_array(mock_numpy_like_array_instance)`` returns ``False``. Other + important properties are that the class defines a :meth:`__iter__` method + (so that ``isinstance(abc.Iterable)`` returns ``True``) and has a + :meth:`ndim` property which can be used as a check for whether it is a + scalar or not. + """ + + def __init__(self, values): + self._values = values + + def __iter__(self): + iter_values = iter(self._values) + + def it_outer(): + for element in iter_values: + yield element + + return it_outer() + + def __len__(self): + return len(self._values) + + @property + def ndim(self): + return self._values.ndim + + @property + def dtype(self): + return self._values.dtype + + @property + def size(self): + return self._values.size + + @property + def shape(self): + return self._values.shape + + # collect all objects to be tested for list-like-ness; use tuples of objects, # whether they are list-like or not (special casing for sets), and their ID ll_params = [ @@ -193,6 +237,14 @@ class DtypeList(list): assert not inference.is_array_like(123) +@pytest.mark.parametrize("eg", ( + np.array(2), + MockNumpyLikeArray(np.array(2)), +)) +def test_assert_almost_equal(eg): + tm.assert_almost_equal(eg, eg) + + @pytest.mark.parametrize( "inner", [ From 764a44edacb6af01900a9c42955488ee4718a326 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 13 Feb 2021 19:38:07 +1100 Subject: [PATCH 02/18] Add __array__ method to mock numpy-like --- pandas/tests/dtypes/test_inference.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 1986eca56aa77..405afb5aa46c5 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -94,14 +94,16 @@ def __iter__(self): iter_values = iter(self._values) def it_outer(): - for element in iter_values: - yield element + yield from iter_values return it_outer() def __len__(self): return len(self._values) + def __array__(self, t=None): + return self._values + @property def ndim(self): return self._values.ndim @@ -237,10 +239,13 @@ class DtypeList(list): assert not inference.is_array_like(123) -@pytest.mark.parametrize("eg", ( - np.array(2), - MockNumpyLikeArray(np.array(2)), -)) +@pytest.mark.parametrize( + "eg", + ( + np.array(2), + MockNumpyLikeArray(np.array(2)), + ), +) def test_assert_almost_equal(eg): tm.assert_almost_equal(eg, eg) From dabcf625a3af24f8c541515a5a36ed29f63cd52b Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 5 Jul 2020 15:23:26 +1000 Subject: [PATCH 03/18] TST: GH35131 Add failing test of numpy-like array handling --- pandas/tests/dtypes/test_inference.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 405afb5aa46c5..c202349341f2f 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -155,6 +155,15 @@ def shape(self): (np.ndarray((2,) * 4), True, "ndarray-4d"), (np.array([[[[]]]]), True, "ndarray-4d-empty"), (np.array(2), False, "ndarray-0d"), + (MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"), + (MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"), + (MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"), + (MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"), + (MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"), + (MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"), (1, False, "int"), (b"123", False, "bytes"), (b"", False, "bytes-empty"), From c0ce501b37dfb2b81d81e6b1a53ccd842460099c Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 5 Jul 2020 15:26:06 +1000 Subject: [PATCH 04/18] ENH: GH35131 Implement fix which allows numpy-like handling Now pd.core.dtypes.inference.is_list_like correctly identifies numpy-like scalars as not being iterable --- pandas/_libs/lib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f527882a9dc9d..3d48ded92c7f9 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1103,8 +1103,8 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: getattr(obj, "__iter__", None) is not None and not isinstance(obj, type) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) - # exclude zero-dimensional numpy arrays, effectively scalars - and not cnp.PyArray_IsZeroDim(obj) + # exclude zero-dimensional duck arrays, effectively scalars + and not (hasattr(obj, "ndim") and obj.ndim == 0) # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) From 4ffe216d86705c8be9d51367be9cf2137d06118f Mon Sep 17 00:00:00 2001 From: Zeb Nicholls Date: Mon, 6 Jul 2020 09:43:34 +1000 Subject: [PATCH 05/18] Simplify ndim check Co-authored-by: keewis --- pandas/_libs/lib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 3d48ded92c7f9..eb4a918a5375f 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1104,7 +1104,7 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) # exclude zero-dimensional duck arrays, effectively scalars - and not (hasattr(obj, "ndim") and obj.ndim == 0) + and getattr(obj, "ndim", 0) != 0 # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) From d340a809b6ee0239185b41e64a80178767a5a8d2 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 6 Jul 2020 10:01:36 +1000 Subject: [PATCH 06/18] Revert change because it broke tests I'm not completely sure why, but reverting here for simplicity --- pandas/_libs/lib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index eb4a918a5375f..3d48ded92c7f9 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1104,7 +1104,7 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) # exclude zero-dimensional duck arrays, effectively scalars - and getattr(obj, "ndim", 0) != 0 + and not (hasattr(obj, "ndim") and obj.ndim == 0) # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) From ad86e5d80a2145965a7b72db4d0c9e362dc1eb45 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 6 Jul 2020 10:20:06 +1000 Subject: [PATCH 07/18] Use slightly clearer logic --- pandas/_libs/lib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 3d48ded92c7f9..6e172ce61d6f8 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1103,8 +1103,8 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: getattr(obj, "__iter__", None) is not None and not isinstance(obj, type) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) - # exclude zero-dimensional duck arrays, effectively scalars - and not (hasattr(obj, "ndim") and obj.ndim == 0) + # assume not a 0d array unless there's evidence otherwise + and getattr(obj, "ndim", 1) != 0 # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) From 9a2eb94b5c7ac5211915eaaed065ba796c0d4443 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Tue, 7 Jul 2020 10:21:51 +1000 Subject: [PATCH 08/18] Update to use numpy iterable --- pandas/_libs/lib.pyx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6e172ce61d6f8..42208a4af3839 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1099,12 +1099,9 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool: cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: return ( - # equiv: `isinstance(obj, abc.Iterable)` - getattr(obj, "__iter__", None) is not None and not isinstance(obj, type) + np.iterable(obj) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) - # assume not a 0d array unless there's evidence otherwise - and getattr(obj, "ndim", 1) != 0 # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) From 465dd37a1f92f607c2b77a5070d59eabdacf11bf Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Tue, 7 Jul 2020 11:02:09 +1000 Subject: [PATCH 09/18] Add failing is_scalar tests --- pandas/tests/dtypes/test_inference.py | 72 +++++++++++++++++---------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index c202349341f2f..ed4e593a8001e 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1852,34 +1852,52 @@ def test_is_scalar_builtin_nonscalars(self): assert not is_scalar(slice(None)) assert not is_scalar(Ellipsis) - def test_is_scalar_numpy_array_scalars(self): - assert is_scalar(np.int64(1)) - assert is_scalar(np.float64(1.0)) - assert is_scalar(np.int32(1)) - assert is_scalar(np.complex64(2)) - assert is_scalar(np.object_("foobar")) - assert is_scalar(np.str_("foobar")) - assert is_scalar(np.unicode_("foobar")) - assert is_scalar(np.bytes_(b"foobar")) - assert is_scalar(np.datetime64("2014-01-01")) - assert is_scalar(np.timedelta64(1, "h")) - - def test_is_scalar_numpy_zerodim_arrays(self): - for zerodim in [ - np.array(1), - np.array("foobar"), - np.array(np.datetime64("2014-01-01")), - np.array(np.timedelta64(1, "h")), - np.array(np.datetime64("NaT")), - ]: - assert not is_scalar(zerodim) - assert is_scalar(lib.item_from_zerodim(zerodim)) - + @pytest.mark.parametrize("start", ( + np.int64(1), + np.float64(1.0), + np.int32(1), + np.complex64(2), + np.object_("foobar"), + np.str_("foobar"), + np.unicode_("foobar"), + np.bytes_(b"foobar"), + np.datetime64("2014-01-01"), + np.timedelta64(1, "h"), + )) + @pytest.mark.parametrize("numpy_like", (True, False)) + def test_is_scalar_numpy_array_scalars(self, start, numpy_like): + if numpy_like: + start = MockNumpyLikeArray(start) + + assert is_scalar(start) + + @pytest.mark.parametrize("zerodim", ( + np.array(1), + np.array("foobar"), + np.array(np.datetime64("2014-01-01")), + np.array(np.timedelta64(1, "h")), + np.array(np.datetime64("NaT")), + )) + @pytest.mark.parametrize("numpy_like", (True, False)) + def test_is_scalar_numpy_zerodim_arrays(self, zerodim, numpy_like): + if numpy_like: + zerodim = MockNumpyLikeArray(zerodim) + + assert not is_scalar(zerodim) + assert is_scalar(lib.item_from_zerodim(zerodim)) + + @pytest.mark.parametrize("start", ( + np.array([]), + np.array([[]]), + np.matrix("1; 2"), + )) + @pytest.mark.parametrize("numpy_like", (True, False)) @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") - def test_is_scalar_numpy_arrays(self): - assert not is_scalar(np.array([])) - assert not is_scalar(np.array([[]])) - assert not is_scalar(np.matrix("1; 2")) + def test_is_scalar_numpy_arrays(self, start, numpy_like): + if numpy_like: + start = MockNumpyLikeArray(start) + + assert not is_scalar(start) def test_is_scalar_pandas_scalars(self): assert is_scalar(Timestamp("2014-01-01")) From b83109e64922461a7bb9a9f3d1ae39a25dff8ab8 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Wed, 8 Jul 2020 08:53:42 +1000 Subject: [PATCH 10/18] Revert to relying on python's shortcircuit operators Also avoid np.iterable --- pandas/_libs/lib.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 42208a4af3839..5d3f4618cdbe3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1099,7 +1099,9 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool: cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: return ( - np.iterable(obj) + isinstance(obj, abc.Iterable) + # avoid numpy-style scalars + and not (hasattr(obj, "ndim") and obj.ndim == 0) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) # exclude sets if allow_sets is False From 2e08233ac2615322ca837dccf36f354140b60147 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 13 Feb 2021 19:13:01 +1100 Subject: [PATCH 11/18] Make a mess --- pandas/_libs/testing.pyx | 6 ++---- pandas/core/dtypes/missing.py | 9 ++++++--- pandas/tests/dtypes/test_inference.py | 8 ++++++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 4dd15a0384052..a7b4289e79570 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -14,10 +14,8 @@ from pandas._libs.util cimport ( ) from pandas.core.dtypes.common import is_dtype_equal -from pandas.core.dtypes.missing import ( - array_equivalent, - isna, -) +from pandas.core.dtypes.inference import is_array_like +from pandas.core.dtypes.missing import array_equivalent, isna cdef bint isiterable(obj): diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index d2733cddf8cee..01b037e2a7363 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -436,9 +436,12 @@ def array_equivalent( # Slow path when we allow comparing different dtypes. # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat - if left.dtype.kind in "OSU" or right.dtype.kind in "OSU": - # Note: `in "OSU"` is non-trivially faster than `in ["O", "S", "U"]` - # or `in ("O", "S", "U")` + try: + return np.array_equal(left, right) + except: + pass + + if is_string_dtype(left.dtype) or is_string_dtype(right.dtype): return _array_equivalent_object(left, right, strict_nan) # NaNs can occur in float and complex arrays. diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index ed4e593a8001e..ae2d9cccd2264 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -236,6 +236,8 @@ def test_is_array_like(): assert inference.is_array_like(Series([1, 2])) assert inference.is_array_like(np.array(["a", "b"])) assert inference.is_array_like(Index(["2016-01-01"])) + assert inference.is_array_like(np.array([2, 3])) + assert inference.is_array_like(MockNumpyLikeArray(np.array([2, 3]))) class DtypeList(list): dtype = "special" @@ -248,6 +250,12 @@ class DtypeList(list): assert not inference.is_array_like(123) +def test_assert_almost_equal(): + tm.assert_almost_equal(np.array(2), np.array(2)) + eg = MockNumpyLikeArray(np.array(2)) + tm.assert_almost_equal(eg, eg) + + @pytest.mark.parametrize( "eg", ( From 8941c28f04f6b836102795f32b4edfbd12ba46d7 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 13 Feb 2021 19:42:55 +1100 Subject: [PATCH 12/18] Fix missing module --- pandas/core/dtypes/missing.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 01b037e2a7363..7a1284b26911b 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -436,11 +436,6 @@ def array_equivalent( # Slow path when we allow comparing different dtypes. # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat - try: - return np.array_equal(left, right) - except: - pass - if is_string_dtype(left.dtype) or is_string_dtype(right.dtype): return _array_equivalent_object(left, right, strict_nan) From 2c58795e9baf6e0aa692e3dc0562d66f1f601339 Mon Sep 17 00:00:00 2001 From: Yves Delley Date: Fri, 26 Nov 2021 11:58:30 +0100 Subject: [PATCH 13/18] rebased to current master; reverted changes to assert_almost_equal --- pandas/_libs/lib.pyx | 7 +-- pandas/_libs/testing.pyx | 10 ++-- pandas/core/dtypes/missing.py | 4 +- pandas/tests/dtypes/test_inference.py | 74 ++++++++++++--------------- 4 files changed, 45 insertions(+), 50 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5d3f4618cdbe3..21e9bacf372e8 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1099,11 +1099,12 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool: cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: return ( - isinstance(obj, abc.Iterable) - # avoid numpy-style scalars - and not (hasattr(obj, "ndim") and obj.ndim == 0) + # equiv: `isinstance(obj, abc.Iterable)` + getattr(obj, "__iter__", None) is not None and not isinstance(obj, type) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) + # avoid numpy-style scalars + and not (hasattr(obj, "ndim") and obj.ndim == 0) # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index a7b4289e79570..cfe9f40f12452 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -14,8 +14,10 @@ from pandas._libs.util cimport ( ) from pandas.core.dtypes.common import is_dtype_equal -from pandas.core.dtypes.inference import is_array_like -from pandas.core.dtypes.missing import array_equivalent, isna +from pandas.core.dtypes.missing import ( + array_equivalent, + isna, +) cdef bint isiterable(obj): @@ -103,9 +105,7 @@ cpdef assert_almost_equal(a, b, return True a_is_ndarray = is_array(a) - a_has_size_and_shape = hasattr(a, "size") and hasattr(a, "shape") b_is_ndarray = is_array(b) - b_has_size_and_shape = hasattr(b, "size") and hasattr(b, "shape") if obj is None: if a_is_ndarray or b_is_ndarray: @@ -125,7 +125,7 @@ cpdef assert_almost_equal(a, b, f"Can't compare objects without length, one or both is invalid: ({a}, {b})" ) - if (a_is_ndarray and b_is_ndarray) or (a_has_size_and_shape and b_has_size_and_shape): + if a_is_ndarray and b_is_ndarray: na, nb = a.size, b.size if a.shape != b.shape: from pandas._testing import raise_assert_detail diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 7a1284b26911b..d2733cddf8cee 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -436,7 +436,9 @@ def array_equivalent( # Slow path when we allow comparing different dtypes. # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat - if is_string_dtype(left.dtype) or is_string_dtype(right.dtype): + if left.dtype.kind in "OSU" or right.dtype.kind in "OSU": + # Note: `in "OSU"` is non-trivially faster than `in ["O", "S", "U"]` + # or `in ("O", "S", "U")` return _array_equivalent_object(left, right, strict_nan) # NaNs can occur in float and complex arrays. diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index ae2d9cccd2264..9d9638c06fc20 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -250,23 +250,6 @@ class DtypeList(list): assert not inference.is_array_like(123) -def test_assert_almost_equal(): - tm.assert_almost_equal(np.array(2), np.array(2)) - eg = MockNumpyLikeArray(np.array(2)) - tm.assert_almost_equal(eg, eg) - - -@pytest.mark.parametrize( - "eg", - ( - np.array(2), - MockNumpyLikeArray(np.array(2)), - ), -) -def test_assert_almost_equal(eg): - tm.assert_almost_equal(eg, eg) - - @pytest.mark.parametrize( "inner", [ @@ -1860,18 +1843,21 @@ def test_is_scalar_builtin_nonscalars(self): assert not is_scalar(slice(None)) assert not is_scalar(Ellipsis) - @pytest.mark.parametrize("start", ( - np.int64(1), - np.float64(1.0), - np.int32(1), - np.complex64(2), - np.object_("foobar"), - np.str_("foobar"), - np.unicode_("foobar"), - np.bytes_(b"foobar"), - np.datetime64("2014-01-01"), - np.timedelta64(1, "h"), - )) + @pytest.mark.parametrize( + "start", + ( + np.int64(1), + np.float64(1.0), + np.int32(1), + np.complex64(2), + np.object_("foobar"), + np.str_("foobar"), + np.unicode_("foobar"), + np.bytes_(b"foobar"), + np.datetime64("2014-01-01"), + np.timedelta64(1, "h"), + ), + ) @pytest.mark.parametrize("numpy_like", (True, False)) def test_is_scalar_numpy_array_scalars(self, start, numpy_like): if numpy_like: @@ -1879,13 +1865,16 @@ def test_is_scalar_numpy_array_scalars(self, start, numpy_like): assert is_scalar(start) - @pytest.mark.parametrize("zerodim", ( - np.array(1), - np.array("foobar"), - np.array(np.datetime64("2014-01-01")), - np.array(np.timedelta64(1, "h")), - np.array(np.datetime64("NaT")), - )) + @pytest.mark.parametrize( + "zerodim", + ( + np.array(1), + np.array("foobar"), + np.array(np.datetime64("2014-01-01")), + np.array(np.timedelta64(1, "h")), + np.array(np.datetime64("NaT")), + ), + ) @pytest.mark.parametrize("numpy_like", (True, False)) def test_is_scalar_numpy_zerodim_arrays(self, zerodim, numpy_like): if numpy_like: @@ -1894,11 +1883,14 @@ def test_is_scalar_numpy_zerodim_arrays(self, zerodim, numpy_like): assert not is_scalar(zerodim) assert is_scalar(lib.item_from_zerodim(zerodim)) - @pytest.mark.parametrize("start", ( - np.array([]), - np.array([[]]), - np.matrix("1; 2"), - )) + @pytest.mark.parametrize( + "start", + ( + np.array([]), + np.array([[]]), + np.matrix("1; 2"), + ), + ) @pytest.mark.parametrize("numpy_like", (True, False)) @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") def test_is_scalar_numpy_arrays(self, start, numpy_like): From e7bcee07a4f85075564c9a6621fdf48783d519fb Mon Sep 17 00:00:00 2001 From: Yves Delley Date: Fri, 26 Nov 2021 12:07:32 +0100 Subject: [PATCH 14/18] fix tests and clarify that a 0-dimensional duck-array is *NOT* a scalar (neither are 0-dimensional numpy arrays) --- pandas/tests/dtypes/test_inference.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 9d9638c06fc20..e6dd858c00c66 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -83,8 +83,12 @@ class MockNumpyLikeArray: ``util.is_array(mock_numpy_like_array_instance)`` returns ``False``. Other important properties are that the class defines a :meth:`__iter__` method (so that ``isinstance(abc.Iterable)`` returns ``True``) and has a - :meth:`ndim` property which can be used as a check for whether it is a - scalar or not. + :meth:`ndim` property, as pandas special-cases 0-dimensional arrays in some + cases. + + We expect pandas to behave with respect to such duck arrays exactly as + with real numpy arrays. In particular, a 0-dimensional duck array is *NOT* + a scalar (`is_scalar(np.array(1)) == False`), but it is not list-like either. """ def __init__(self, values): @@ -102,7 +106,7 @@ def __len__(self): return len(self._values) def __array__(self, t=None): - return self._values + return np.asarray(self._values, dtype=t) @property def ndim(self): @@ -1858,11 +1862,7 @@ def test_is_scalar_builtin_nonscalars(self): np.timedelta64(1, "h"), ), ) - @pytest.mark.parametrize("numpy_like", (True, False)) - def test_is_scalar_numpy_array_scalars(self, start, numpy_like): - if numpy_like: - start = MockNumpyLikeArray(start) - + def test_is_scalar_numpy_array_scalars(self, start): assert is_scalar(start) @pytest.mark.parametrize( @@ -1875,11 +1875,7 @@ def test_is_scalar_numpy_array_scalars(self, start, numpy_like): np.array(np.datetime64("NaT")), ), ) - @pytest.mark.parametrize("numpy_like", (True, False)) - def test_is_scalar_numpy_zerodim_arrays(self, zerodim, numpy_like): - if numpy_like: - zerodim = MockNumpyLikeArray(zerodim) - + def test_is_scalar_numpy_zerodim_arrays(self, zerodim): assert not is_scalar(zerodim) assert is_scalar(lib.item_from_zerodim(zerodim)) From c4c348b40cc521c680762d19a95730249012121c Mon Sep 17 00:00:00 2001 From: Yves Delley Date: Fri, 26 Nov 2021 13:02:22 +0100 Subject: [PATCH 15/18] performance short-cuts for np.ndarray and list --- pandas/_libs/lib.pyx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 21e9bacf372e8..704db3d16b8cc 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -14,6 +14,7 @@ from cpython.datetime cimport ( PyTime_Check, ) from cpython.iterator cimport PyIter_Check +from cpython.list cimport PyList_Check from cpython.number cimport PyNumber_Check from cpython.object cimport ( Py_EQ, @@ -1098,6 +1099,12 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool: cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: + # first, performance short-cuts for the most common cases + if cnp.PyArray_Check(obj): + return not cnp.PyArray_IsZeroDim(obj) + if PyList_Check(obj): + return True + # then the generic implementation return ( # equiv: `isinstance(obj, abc.Iterable)` getattr(obj, "__iter__", None) is not None and not isinstance(obj, type) From b28529350dece8e04eaf5d05dcaf27360dcfa24f Mon Sep 17 00:00:00 2001 From: Yves Delley Date: Sat, 27 Nov 2021 09:17:00 +0100 Subject: [PATCH 16/18] changes requested during review by @jbrockmendel --- pandas/_libs/lib.pyx | 5 +-- pandas/tests/dtypes/test_inference.py | 61 ++++++++++----------------- 2 files changed, 25 insertions(+), 41 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 704db3d16b8cc..fb53fa1b67fe5 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -14,7 +14,6 @@ from cpython.datetime cimport ( PyTime_Check, ) from cpython.iterator cimport PyIter_Check -from cpython.list cimport PyList_Check from cpython.number cimport PyNumber_Check from cpython.object cimport ( Py_EQ, @@ -1100,9 +1099,9 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool: cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: # first, performance short-cuts for the most common cases - if cnp.PyArray_Check(obj): + if util.is_array(obj): return not cnp.PyArray_IsZeroDim(obj) - if PyList_Check(obj): + if isinstance(obj, list): return True # then the generic implementation return ( diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index e6dd858c00c66..7953d650636be 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1847,53 +1847,38 @@ def test_is_scalar_builtin_nonscalars(self): assert not is_scalar(slice(None)) assert not is_scalar(Ellipsis) - @pytest.mark.parametrize( - "start", - ( - np.int64(1), - np.float64(1.0), - np.int32(1), - np.complex64(2), - np.object_("foobar"), - np.str_("foobar"), - np.unicode_("foobar"), - np.bytes_(b"foobar"), - np.datetime64("2014-01-01"), - np.timedelta64(1, "h"), - ), - ) - def test_is_scalar_numpy_array_scalars(self, start): - assert is_scalar(start) - - @pytest.mark.parametrize( - "zerodim", - ( + def test_is_scalar_numpy_array_scalars(self): + assert is_scalar(np.int64(1)) + assert is_scalar(np.float64(1.0)) + assert is_scalar(np.int32(1)) + assert is_scalar(np.complex64(2)) + assert is_scalar(np.object_("foobar")) + assert is_scalar(np.str_("foobar")) + assert is_scalar(np.unicode_("foobar")) + assert is_scalar(np.bytes_(b"foobar")) + assert is_scalar(np.datetime64("2014-01-01")) + assert is_scalar(np.timedelta64(1, "h")) + + def test_is_scalar_numpy_zerodim_arrays(self): + for zerodim in [ np.array(1), np.array("foobar"), np.array(np.datetime64("2014-01-01")), np.array(np.timedelta64(1, "h")), np.array(np.datetime64("NaT")), - ), - ) - def test_is_scalar_numpy_zerodim_arrays(self, zerodim): - assert not is_scalar(zerodim) - assert is_scalar(lib.item_from_zerodim(zerodim)) + ]: + assert not is_scalar(zerodim) + assert is_scalar(lib.item_from_zerodim(zerodim)) - @pytest.mark.parametrize( - "start", - ( + @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") + def test_is_scalar_numpy_arrays(self): + for a in [ np.array([]), np.array([[]]), np.matrix("1; 2"), - ), - ) - @pytest.mark.parametrize("numpy_like", (True, False)) - @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") - def test_is_scalar_numpy_arrays(self, start, numpy_like): - if numpy_like: - start = MockNumpyLikeArray(start) - - assert not is_scalar(start) + ]: + assert not is_scalar(a) + assert not is_scalar(MockNumpyLikeArray(a)) def test_is_scalar_pandas_scalars(self): assert is_scalar(Timestamp("2014-01-01")) From 99af0bd9f245a93bcb053b321aad86c9ecd7d984 Mon Sep 17 00:00:00 2001 From: Yves Delley Date: Sat, 18 Dec 2021 10:59:48 +0100 Subject: [PATCH 17/18] change requested in review by jreback --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/_libs/lib.pyx | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 413dbb9cd0850..55ab1db7137e8 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -231,6 +231,7 @@ Other enhancements - :meth:`UInt64Index.map` now retains ``dtype`` where possible (:issue:`44609`) - :meth:`read_json` can now parse unsigned long long integers (:issue:`26068`) - :meth:`DataFrame.take` now raises a ``TypeError`` when passed a scalar for the indexer (:issue:`42875`) +- :meth:`is_list_like` now identifies duck-arrays as list-like unless ``.ndim == 0`` (:issue:`35131`) - diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index fb53fa1b67fe5..a31b5a7237058 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1101,7 +1101,7 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: # first, performance short-cuts for the most common cases if util.is_array(obj): return not cnp.PyArray_IsZeroDim(obj) - if isinstance(obj, list): + elif isinstance(obj, list): return True # then the generic implementation return ( From 8fff19038b00124eaf00d52671d39f648baee551 Mon Sep 17 00:00:00 2001 From: Yves Delley Date: Sat, 18 Dec 2021 11:13:26 +0100 Subject: [PATCH 18/18] made code comments a little bit more explicit --- pandas/_libs/lib.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a31b5a7237058..0814a3a1354f0 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1100,6 +1100,7 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool: cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: # first, performance short-cuts for the most common cases if util.is_array(obj): + # exclude zero-dimensional numpy arrays, effectively scalars return not cnp.PyArray_IsZeroDim(obj) elif isinstance(obj, list): return True @@ -1109,7 +1110,7 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: getattr(obj, "__iter__", None) is not None and not isinstance(obj, type) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) - # avoid numpy-style scalars + # exclude zero-dimensional duck-arrays, effectively scalars and not (hasattr(obj, "ndim") and obj.ndim == 0) # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set))