pandas-dev · jorisvandenbossche · Dec 5, 2020 · Dec 5, 2020 · Dec 10, 2020 · Dec 11, 2020
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -213,7 +213,7 @@ def _reconstruct_data(
         if isinstance(values, cls) and values.dtype == dtype:
             return values
 
-        values = cls._from_sequence(values)
+        values = cls._from_scalars(values, dtype=dtype)
     elif is_bool_dtype(dtype):
         values = values.astype(dtype, copy=False)
 

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -189,6 +189,12 @@ class ExtensionArray:
     # Constructors
     # ------------------------------------------------------------------------
 
+    @classmethod
+    def _from_scalars(cls, data, dtype):
+        if not all(isinstance(v, dtype.type) or isna(v) for v in data):
+            raise TypeError("Requires dtype scalars")
+        return cls._from_sequence(data, dtype=dtype)
+
     @classmethod
     def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False):
         """
@@ -688,7 +694,7 @@ def fillna(self, value=None, method=None, limit=None):
             if method is not None:
                 func = get_fill_func(method)
                 new_values = func(self.astype(object), limit=limit, mask=mask)
-                new_values = self._from_sequence(new_values, dtype=self.dtype)
+                new_values = self._from_scalars(new_values, dtype=self.dtype)
             else:
                 # fill with value
                 new_values = self.copy()
@@ -750,7 +756,7 @@ def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray:
         if isna(fill_value):
             fill_value = self.dtype.na_value
 
-        empty = self._from_sequence(
+        empty = self._from_scalars(
             [fill_value] * min(abs(periods), len(self)), dtype=self.dtype
         )
         if periods > 0:
@@ -770,7 +776,7 @@ def unique(self):
         uniques : ExtensionArray
         """
         uniques = unique(self.astype(object))
-        return self._from_sequence(uniques, dtype=self.dtype)
+        return self._from_scalars(uniques, dtype=self.dtype)
 
     def searchsorted(self, value, side="left", sorter=None):
         """
@@ -1080,7 +1086,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
 
                result = take(data, indices, fill_value=fill_value,
                              allow_fill=allow_fill)
-               return self._from_sequence(result, dtype=self.dtype)
+               return self._from_scalars(result, dtype=self.dtype)
         """
         # Implementer note: The `fill_value` parameter should be a user-facing
         # value, an instance of self.dtype.type. When passed `fill_value=None`,
@@ -1420,7 +1426,7 @@ def _maybe_convert(arr):
                     # https://github.com/pandas-dev/pandas/issues/22850
                     # We catch all regular exceptions here, and fall back
                     # to an ndarray.
-                    res = maybe_cast_to_extension_array(type(self), arr)
+                    res = maybe_cast_to_extension_array(type(self), arr, self.dtype)
                     if not isinstance(res, type(self)):
                         # exception raised in _from_sequence; ensure we have ndarray
                         res = np.asarray(arr)

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -275,6 +275,13 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
     def dtype(self) -> BooleanDtype:
         return self._dtype
 
+    @classmethod
+    def _from_scalars(cls, data, dtype) -> BooleanArray:
+        # override because dtype.type is only the numpy scalar
+        if not all(isinstance(v, (bool, np.bool_)) or isna(v) for v in data):
+            raise TypeError("Requires dtype scalars")
+        return cls._from_sequence(data, dtype=dtype)
+
     @classmethod
     def _from_sequence(
         cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -426,6 +426,13 @@ def _constructor(self) -> Type[Categorical]:
         return Categorical
 
     @classmethod
+    def _from_scalars(cls, data, dtype):
+        # if not all(
+        #     isinstance(v, dtype.categories.dtype.type) or isna(v) for v in data
+        # ):
+        #     raise TypeError("Requires dtype scalars")
+        return cls._from_sequence(data, dtype=dtype)
+
     def _from_sequence(cls, scalars, *, dtype: Optional[Dtype] = None, copy=False):
         return Categorical(scalars, dtype=dtype, copy=copy)
 

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -305,6 +305,13 @@ def _simple_new(
         result._dtype = dtype
         return result
 
+    @classmethod
+    def _from_scalars(cls, data, dtype):
+        # override because dtype.type is not always Timestamp
+        if not all(isinstance(v, Timestamp) or isna(v) for v in data):
+            raise TypeError("Requires timestamp scalars")
+        return cls._from_sequence(data, dtype=dtype)
+
     @classmethod
     def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
         return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)

diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -241,6 +241,13 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
             )
         super().__init__(values, mask, copy=copy)
 
+    @classmethod
+    def _from_scalars(cls, data, dtype):
+        # override because dtype.type is only the numpy scalar
+        if not all(isinstance(v, (float, dtype.type)) or isna(v) for v in data):
+            raise TypeError("Requires dtype scalars")
+        return cls._from_sequence(data, dtype=dtype)
+
     @classmethod
     def _from_sequence(
         cls, scalars, *, dtype=None, copy: bool = False

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -302,6 +302,16 @@ def __pos__(self):
     def __abs__(self):
         return type(self)(np.abs(self._data), self._mask)
 
+    @classmethod
+    def _from_scalars(cls, data, dtype):
+        # override because dtype.type is only the numpy scalar
+        # TODO accept float here?
+        if not all(
+            isinstance(v, (int, dtype.type, float, np.float_)) or isna(v) for v in data
+        ):
+            raise TypeError("Requires dtype scalars")
+        return cls._from_sequence(data, dtype=dtype)
+
     @classmethod
     def _from_sequence(
         cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -882,7 +882,7 @@ def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:
             fill_value = Index(self._left, copy=False)._na_value
             empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
         else:
-            empty = self._from_sequence([fill_value] * empty_len)
+            empty = self._from_scalars([fill_value] * empty_len, self.dtype)
 
         if periods > 0:
             a = empty

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -78,6 +78,13 @@ def __init__(self, values: Union[np.ndarray, PandasArray], copy: bool = False):
         self._ndarray = values
         self._dtype = PandasDtype(values.dtype)
 
+    @classmethod
+    def _from_scalars(cls, data, dtype):
+        # doesn't work for object dtype
+        # if not all(isinstance(v, dtype.type) or isna(v) for v in data):
+        #     raise TypeError("Requires dtype scalars")
+        return cls._from_sequence(data, dtype=dtype)
+
     @classmethod
     def _from_sequence(
         cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -345,6 +345,12 @@ def maybe_cast_result(
     """
     dtype = obj.dtype
     dtype = maybe_cast_result_dtype(dtype, how)
+    # result_dtype = maybe_cast_result_dtype(dtype, how)
+    # if result_dtype is not None:
+    #     # we know what the result dtypes needs to be -> be more permissive in casting
+    #     # (eg ints with nans became floats)
+    #     cls = result_dtype.construct_array_type()
+    #     return cls._from_sequence(obj, dtype=result_dtype)
 
     assert not is_scalar(result)
 
@@ -395,19 +401,20 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj:
     ):
         return Float64Dtype()
     return dtype
+    # return None
 
 
 def maybe_cast_to_extension_array(
     cls: Type[ExtensionArray], obj: ArrayLike, dtype: Optional[ExtensionDtype] = None
 ) -> ArrayLike:
     """
-    Call to `_from_sequence` that returns the object unchanged on Exception.
+    Call to `_from_scalars` that returns the object unchanged on Exception.
 
     Parameters
     ----------
     cls : class, subclass of ExtensionArray
     obj : arraylike
-        Values to pass to cls._from_sequence
+        Values to pass to cls._from_scalars
     dtype : ExtensionDtype, optional
 
     Returns
@@ -429,7 +436,7 @@ def maybe_cast_to_extension_array(
         return obj
 
     try:
-        result = cls._from_sequence(obj, dtype=dtype)
+        result = cls._from_scalars(obj, dtype=dtype)
     except Exception:
         # We can't predict what downstream EA constructors may raise
         result = obj

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2951,7 +2951,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
             arr_type = dtype.construct_array_type()
             values = self.values
 
-            new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values]
+            new_values = [arr_type._from_scalars(row, dtype=dtype) for row in values]
             result = self._constructor(
                 dict(zip(self.index, new_values)), index=self.columns
             )

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -957,7 +957,7 @@ def fast_xs(self, loc: int) -> ArrayLike:
                 result[rl] = blk.iget((i, loc))
 
         if isinstance(dtype, ExtensionDtype):
-            result = dtype.construct_array_type()._from_sequence(result, dtype=dtype)
+            result = dtype.construct_array_type()._from_scalars(result, dtype=dtype)
 
         return result
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2913,7 +2913,9 @@ def combine(self, other, func, fill_value=None) -> Series:
             # TODO: can we do this for only SparseDtype?
             # The function can return something of any type, so check
             # if the type is compatible with the calling EA.
-            new_values = maybe_cast_to_extension_array(type(self._values), new_values)
+            new_values = maybe_cast_to_extension_array(
+                type(self._values), new_values, self.dtype
+            )
         return self._constructor(new_values, index=new_index, name=new_name)
 
     def combine_first(self, other) -> Series:

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -72,6 +72,13 @@ def __init__(self, values, dtype=None, copy=False, context=None):
     def dtype(self):
         return self._dtype
 
+    @classmethod
+    def _from_scalars(cls, data, dtype):
+        # TODO not needed if we keep the base class method
+        if not all(isinstance(v, dtype.type) or pd.isna(v) for v in data):
+            raise TypeError("Requires dtype scalars")
+        return cls._from_sequence(data, dtype=dtype)
+
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):
         return cls(scalars)

diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py
@@ -35,8 +35,15 @@ def test_astype_object(self, index):
 
     def test_astype_category(self, index):
         result = index.astype("category")
-        expected = CategoricalIndex(index.values)
+        # TODO astype doesn't preserve the exact interval dtype (eg uint64)
+        # while the CategoricalIndex constructor does -> temporarily also
+        # here convert to object dtype numpy array.
+        # Once this is fixed, the commented code can be uncommented
+        # -> https://github.com/pandas-dev/pandas/issues/38316
+        # expected = CategoricalIndex(index.values)
+        expected = CategoricalIndex(np.asarray(index.values))
         tm.assert_index_equal(result, expected)
+        # assert result.dtype.categories.dtype == index.dtype
 
         result = index.astype(CategoricalDtype())
         tm.assert_index_equal(result, expected)