From ee8911815ba164655d7b47324906882ae6c13fab Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 2 Jun 2021 06:45:06 -0700
Subject: [PATCH] BUG: clean_index_list handle uint64 case

---
 pandas/_libs/lib.pyi          |  2 +-
 pandas/_libs/lib.pyx          | 28 ++++++++++++++++++++--------
 pandas/core/indexes/base.py   | 28 ++++++----------------------
 pandas/core/indexing.py       |  4 +++-
 pandas/tests/libs/test_lib.py | 12 ++++++++++++
 5 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index 06620c2ad0dca..92daad2d6a5d7 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -185,7 +185,7 @@ def maybe_indices_to_slice(
 ) -> slice | np.ndarray: ...  # np.ndarray[np.uint8]
 
 def clean_index_list(obj: list) -> tuple[
-    list | np.ndarray,  # np.ndarray[object] | np.ndarray[np.int64]
+    list | np.ndarray,  # np.ndarray[object | np.int64 | np.uint64]
     bool,
 ]: ...
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 4d184ee13e3db..cbe5a556d55b0 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -747,10 +747,14 @@ def clean_index_list(obj: list):
         object val
         bint all_arrays = True
 
+    # First check if we have a list of arraylikes, in which case we will
+    #  pass them to MultiIndex.from_arrays
     for i in range(n):
         val = obj[i]
         if not (isinstance(val, list) or
                 util.is_array(val) or hasattr(val, '_data')):
+            # TODO: EA?
+            # exclude tuples, frozensets as they may be contained in an Index
             all_arrays = False
             break
 
@@ -762,11 +766,21 @@ def clean_index_list(obj: list):
     if inferred in ['string', 'bytes', 'mixed', 'mixed-integer']:
         return np.asarray(obj, dtype=object), 0
     elif inferred in ['integer']:
-        # TODO: we infer an integer but it *could* be a uint64
-        try:
-            return np.asarray(obj, dtype='int64'), 0
-        except OverflowError:
-            return np.asarray(obj, dtype='object'), 0
+        # we infer an integer but it *could* be a uint64
+
+        arr = np.asarray(obj)
+        if arr.dtype.kind not in ["i", "u"]:
+            # eg [0, uint64max] gets cast to float64,
+            #  but then we know we have either uint64 or object
+            if (arr < 0).any():
+                # TODO: similar to maybe_cast_to_integer_array
+                return np.asarray(obj, dtype="object"), 0
+
+            # GH#35481
+            guess = np.asarray(obj, dtype="uint64")
+            return guess, 0
+
+        return arr, 0
 
     return np.asarray(obj), 0
 
@@ -1552,9 +1566,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
 
     for i in range(n):
         val = values[i]
-        if (util.is_integer_object(val) and
-                not util.is_timedelta64_object(val) and
-                not util.is_datetime64_object(val)):
+        if util.is_integer_object(val):
             return "mixed-integer"
 
     return "mixed"
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 02fd680775141..14ec3d6009b61 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6299,27 +6299,18 @@ def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Ind
         if copy:
             index_like = index_like.copy()
         return index_like
-    if hasattr(index_like, "name"):
-        # https://github.com/python/mypy/issues/1424
-        # error: Item "ExtensionArray" of "Union[ExtensionArray,
-        # Sequence[Any]]" has no attribute "name"
-        # error: Item "Sequence[Any]" of "Union[ExtensionArray, Sequence[Any]]"
-        # has no attribute "name"
-        # error: "Sequence[Any]" has no attribute "name"
-        # error: Item "Sequence[Any]" of "Union[Series, Sequence[Any]]" has no
-        # attribute "name"
-        # error: Item "Sequence[Any]" of "Union[Any, Sequence[Any]]" has no
-        # attribute "name"
-        name = index_like.name  # type: ignore[union-attr, attr-defined]
+
+    if isinstance(index_like, ABCSeries):
+        name = index_like.name
         return Index(index_like, name=name, copy=copy)
 
     if is_iterator(index_like):
         index_like = list(index_like)
 
-    # must check for exactly list here because of strict type
-    # check in clean_index_list
     if isinstance(index_like, list):
-        if type(index_like) != list:
+        if type(index_like) is not list:
+            # must check for exactly list here because of strict type
+            # check in clean_index_list
             index_like = list(index_like)
 
         converted, all_arrays = lib.clean_index_list(index_like)
@@ -6329,13 +6320,6 @@ def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Ind
 
             return MultiIndex.from_arrays(converted)
         else:
-            if isinstance(converted, np.ndarray) and converted.dtype == np.int64:
-                # Check for overflows if we should actually be uint64
-                # xref GH#35481
-                alt = np.asarray(index_like)
-                if alt.dtype == np.uint64:
-                    converted = alt
-
             index_like = converted
     else:
         # clean_index_list does the equivalent of copying
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index be5b89f08b5ca..d5555561088eb 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1934,7 +1934,9 @@ def _setitem_with_indexer_missing(self, indexer, value):
             # e.g. 0.0 -> 0
             # GH#12246
             if index.is_unique:
-                new_indexer = index.get_indexer([new_index[-1]])
+                # pass new_index[-1:] instead if [new_index[-1]]
+                #  so that we retain dtype
+                new_indexer = index.get_indexer(new_index[-1:])
                 if (new_indexer != -1).any():
                     # We get only here with loc, so can hard code
                     return self._setitem_with_indexer(new_indexer, value, "loc")
diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py
index 5b7e90fe16d8f..0b1f807f2da63 100644
--- a/pandas/tests/libs/test_lib.py
+++ b/pandas/tests/libs/test_lib.py
@@ -206,3 +206,15 @@ def test_no_default_pickle():
     # GH#40397
     obj = tm.round_trip_pickle(lib.no_default)
     assert obj is lib.no_default
+
+
+def test_clean_index_list():
+    # with both 0 and a large-uint64, np.array will infer to float64
+    #  https://github.com/numpy/numpy/issues/19146
+    #  but a more accurate choice would be uint64
+    values = [0, np.iinfo(np.uint64).max]
+
+    result, _ = lib.clean_index_list(values)
+
+    expected = np.array(values, dtype="uint64")
+    tm.assert_numpy_array_equal(result, expected, check_dtype=True)