diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index f91b96dc1b1dc..0c8f2baabc804 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -190,11 +190,7 @@ def maybe_indices_to_slice( max_len: int, ) -> slice | np.ndarray: ... # np.ndarray[np.uint8] -def clean_index_list(obj: list) -> tuple[ - list | np.ndarray, # np.ndarray[object | np.int64 | np.uint64] - bool, -]: ... - +def is_all_arraylike(obj: list) -> bool: ... # ----------------------------------------------------------------- # Functions which in reality take memoryviews diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4b5ef3e909a00..1a07b76583fca 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -740,19 +740,15 @@ cpdef ndarray[object] ensure_string_array( return result -@cython.wraparound(False) -@cython.boundscheck(False) -def clean_index_list(obj: list): +def is_all_arraylike(obj: list) -> bool: """ - Utility used in ``pandas.core.indexes.api.ensure_index``. + Should we treat these as levels of a MultiIndex, as opposed to Index items? """ cdef: Py_ssize_t i, n = len(obj) object val bint all_arrays = True - # First check if we have a list of arraylikes, in which case we will - # pass them to MultiIndex.from_arrays for i in range(n): val = obj[i] if not (isinstance(val, list) or @@ -762,31 +758,7 @@ def clean_index_list(obj: list): all_arrays = False break - if all_arrays: - return obj, all_arrays - - # don't force numpy coerce with nan's - inferred = infer_dtype(obj, skipna=False) - if inferred in ['string', 'bytes', 'mixed', 'mixed-integer']: - return np.asarray(obj, dtype=object), 0 - elif inferred in ['integer']: - # we infer an integer but it *could* be a uint64 - - arr = np.asarray(obj) - if arr.dtype.kind not in ["i", "u"]: - # eg [0, uint64max] gets cast to float64, - # but then we know we have either uint64 or object - if (arr < 0).any(): - # TODO: similar to maybe_cast_to_integer_array - return np.asarray(obj, dtype="object"), 0 - - # GH#35481 - guess = np.asarray(obj, dtype="uint64") - return guess, 0 - - return arr, 0 - - return np.asarray(obj), 0 + return all_arrays # ------------------------------------------------------------------------------ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 124903446220d..96278f5686b57 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,6 +1,5 @@ from __future__ import annotations -from copy import copy as copy_func from datetime import datetime import functools from itertools import zip_longest @@ -6312,21 +6311,15 @@ def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Ind # check in clean_index_list index_like = list(index_like) - converted, all_arrays = lib.clean_index_list(index_like) - - if len(converted) > 0 and all_arrays: + if len(index_like) and lib.is_all_arraylike(index_like): from pandas.core.indexes.multi import MultiIndex - return MultiIndex.from_arrays(converted) + return MultiIndex.from_arrays(index_like) else: - index_like = converted + return Index(index_like, copy=copy, tupleize_cols=False) else: - # clean_index_list does the equivalent of copying - # so only need to do this if not list instance - if copy: - index_like = copy_func(index_like) - return Index(index_like) + return Index(index_like, copy=copy) def ensure_has_len(seq): diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index c796a25faf0a6..9572aeaf41c91 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -531,6 +531,14 @@ def test_constructor(self, dtype): res = Index([1, 2 ** 63 + 1], dtype=dtype) tm.assert_index_equal(res, idx) + @pytest.mark.xfail(reason="https://github.com/numpy/numpy/issues/19146") + def test_constructor_does_not_cast_to_float(self): + # https://github.com/numpy/numpy/issues/19146 + values = [0, np.iinfo(np.uint64).max] + + result = UInt64Index(values) + assert list(result) == values + @pytest.mark.parametrize( "box", diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f75e4af888643..d7abaf0b5dfbe 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1622,6 +1622,18 @@ def test_ensure_index_mixed_closed_intervals(self): expected = Index(intervals, dtype=object) tm.assert_index_equal(result, expected) + def test_ensure_index_uint64(self): + # with both 0 and a large-uint64, np.array will infer to float64 + # https://github.com/numpy/numpy/issues/19146 + # but a more accurate choice would be uint64 + values = [0, np.iinfo(np.uint64).max] + + result = ensure_index(values) + assert list(result) == values + + expected = Index(values, dtype="uint64") + tm.assert_index_equal(result, expected) + def test_get_combined_index(self): result = _get_combined_index([]) expected = Index([]) diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py index 0b1f807f2da63..5b7e90fe16d8f 100644 --- a/pandas/tests/libs/test_lib.py +++ b/pandas/tests/libs/test_lib.py @@ -206,15 +206,3 @@ def test_no_default_pickle(): # GH#40397 obj = tm.round_trip_pickle(lib.no_default) assert obj is lib.no_default - - -def test_clean_index_list(): - # with both 0 and a large-uint64, np.array will infer to float64 - # https://github.com/numpy/numpy/issues/19146 - # but a more accurate choice would be uint64 - values = [0, np.iinfo(np.uint64).max] - - result, _ = lib.clean_index_list(values) - - expected = np.array(values, dtype="uint64") - tm.assert_numpy_array_equal(result, expected, check_dtype=True)