pandas-dev · jorisvandenbossche · Apr 24, 2020 · Apr 22, 2020 · Apr 22, 2020 · Apr 22, 2020
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -330,7 +330,7 @@ def __init__(
                 values = _convert_to_list_like(values)
 
                 # By convention, empty lists result in object dtype:
-                sanitize_dtype = "object" if len(values) == 0 else None
+                sanitize_dtype = np.dtype("object") if len(values) == 0 else None
                 null_mask = isna(values)
                 if null_mask.any():
                     values = [values[idx] for idx in np.where(~null_mask)[0]]

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -13,7 +13,7 @@
 
 from pandas._libs import lib
 from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime
-from pandas._typing import ArrayLike, Dtype
+from pandas._typing import ArrayLike, Dtype, DtypeObj
 
 from pandas.core.dtypes.cast import (
     construct_1d_arraylike_from_scalar,
@@ -36,7 +36,6 @@
     is_list_like,
     is_object_dtype,
     is_timedelta64_ns_dtype,
-    pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype, registry
 from pandas.core.dtypes.generic import (
@@ -52,13 +51,12 @@
 if TYPE_CHECKING:
     from pandas.core.series import Series  # noqa: F401
     from pandas.core.indexes.api import Index  # noqa: F401
+    from pandas.core.arrays import ExtensionArray  # noqa: F401
 
 
 def array(
-    data: Sequence[object],
-    dtype: Optional[Union[str, np.dtype, ExtensionDtype]] = None,
-    copy: bool = True,
-) -> ABCExtensionArray:
+    data: Sequence[object], dtype: Optional[Dtype] = None, copy: bool = True,
+) -> "ExtensionArray":
     """
     Create an array.
 
@@ -388,14 +386,16 @@ def extract_array(obj, extract_numpy: bool = False):
 
 
 def sanitize_array(
-    data, index, dtype=None, copy: bool = False, raise_cast_failure: bool = False
-):
+    data,
+    index: Optional["Index"],
+    dtype: Optional[DtypeObj] = None,
+    copy: bool = False,
+    raise_cast_failure: bool = False,
+) -> ArrayLike:
     """
     Sanitize input data to an ndarray, copy if specified, coerce to the
     dtype if specified.
     """
-    if dtype is not None:
-        dtype = pandas_dtype(dtype)
 
     if isinstance(data, ma.MaskedArray):
         mask = ma.getmaskarray(data)
@@ -508,10 +508,7 @@ def sanitize_array(
 
 
 def _try_cast(
-    arr,
-    dtype: Optional[Union[np.dtype, "ExtensionDtype"]],
-    copy: bool,
-    raise_cast_failure: bool,
+    arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool,
 ):
     """
     Convert input to numpy ndarray and optionally cast to a given dtype.

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -3,7 +3,7 @@
 """
 
 from datetime import date, datetime, timedelta
-from typing import TYPE_CHECKING, Type
+from typing import TYPE_CHECKING, Any, Optional, Tuple, Type
 
 import numpy as np
 
@@ -17,7 +17,7 @@
     iNaT,
 )
 from pandas._libs.tslibs.timezones import tz_compare
-from pandas._typing import Dtype, DtypeObj
+from pandas._typing import ArrayLike, Dtype, DtypeObj
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.common import (
@@ -613,7 +613,7 @@ def _ensure_dtype_type(value, dtype):
     return dtype.type(value)
 
 
-def infer_dtype_from(val, pandas_dtype: bool = False):
+def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
     """
     Interpret the dtype from a scalar or array.
 
@@ -630,7 +630,7 @@ def infer_dtype_from(val, pandas_dtype: bool = False):
     return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)
 
 
-def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
+def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
     """
     Interpret the dtype from a scalar.
 
@@ -641,7 +641,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
         If False, scalar belongs to pandas extension types is inferred as
         object
     """
-    dtype = np.object_
+    dtype = np.dtype(object)
 
     # a 1-element ndarray
     if isinstance(val, np.ndarray):
@@ -660,7 +660,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
         # instead of np.empty (but then you still don't want things
         # coming out as np.str_!
 
-        dtype = np.object_
+        dtype = np.dtype(object)
 
     elif isinstance(val, (np.datetime64, datetime)):
         val = tslibs.Timestamp(val)
@@ -671,30 +671,30 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
                 dtype = DatetimeTZDtype(unit="ns", tz=val.tz)
             else:
                 # return datetimetz as object
-                return np.object_, val
+                return np.dtype(object), val
         val = val.value
 
     elif isinstance(val, (np.timedelta64, timedelta)):
         val = tslibs.Timedelta(val).value
         dtype = np.dtype("m8[ns]")
 
     elif is_bool(val):
-        dtype = np.bool_
+        dtype = np.dtype(np.bool_)
 
     elif is_integer(val):
         if isinstance(val, np.integer):
-            dtype = type(val)
+            dtype = np.dtype(type(val))
         else:
-            dtype = np.int64
+            dtype = np.dtype(np.int64)
 
     elif is_float(val):
         if isinstance(val, np.floating):
-            dtype = type(val)
+            dtype = np.dtype(type(val))
         else:
-            dtype = np.float64
+            dtype = np.dtype(np.float64)
 
     elif is_complex(val):
-        dtype = np.complex_
+        dtype = np.dtype(np.complex_)
 
     elif pandas_dtype:
         if lib.is_period(val):
@@ -707,7 +707,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
     return dtype, val
 
 
-def infer_dtype_from_array(arr, pandas_dtype: bool = False):
+def infer_dtype_from_array(arr, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
     """
     Infer the dtype from an array.
 
@@ -738,7 +738,7 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False):
     array(['1', '1'], dtype='<U21')
 
     >>> infer_dtype_from_array([1, '1'])
-    (<class 'numpy.object_'>, [1, '1'])
+    (dtype('O'), [1, '1'])
     """
     if isinstance(arr, np.ndarray):
         return arr.dtype, arr
@@ -755,7 +755,7 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False):
     # don't force numpy coerce with nan's
     inferred = lib.infer_dtype(arr, skipna=False)
     if inferred in ["string", "bytes", "mixed", "mixed-integer"]:
-        return (np.object_, arr)
+        return (np.dtype(np.object_), arr)
 
     arr = np.asarray(arr)
     return arr.dtype, arr
@@ -1469,7 +1469,7 @@ def find_common_type(types):
     return np.find_common_type(types, [])
 
 
-def cast_scalar_to_array(shape, value, dtype=None):
+def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.ndarray:
     """
     Create np.ndarray of specified shape and dtype, filled with values.
 
@@ -1496,7 +1496,9 @@ def cast_scalar_to_array(shape, value, dtype=None):
     return values
 
 
-def construct_1d_arraylike_from_scalar(value, length: int, dtype):
+def construct_1d_arraylike_from_scalar(
+    value, length: int, dtype: DtypeObj
+) -> ArrayLike:
     """
     create a np.ndarray / pandas type of specified shape and dtype
     filled with values
@@ -1505,7 +1507,7 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
     ----------
     value : scalar value
     length : int
-    dtype : pandas_dtype / np.dtype
+    dtype : pandas_dtype or np.dtype
 
     Returns
     -------
@@ -1517,8 +1519,6 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
         subarr = cls._from_sequence([value] * length, dtype=dtype)
 
     else:
-        if not isinstance(dtype, (np.dtype, type(np.dtype))):
-            dtype = dtype.dtype
 
         if length and is_integer_dtype(dtype) and isna(value):
             # coerce if we have nan for an integer dtype
@@ -1536,7 +1536,7 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
     return subarr
 
 
-def construct_1d_object_array_from_listlike(values):
+def construct_1d_object_array_from_listlike(values) -> np.ndarray:
     """
     Transform any list-like object in a 1-dimensional numpy array of object
     dtype.
@@ -1561,7 +1561,9 @@ def construct_1d_object_array_from_listlike(values):
     return result
 
 
-def construct_1d_ndarray_preserving_na(values, dtype=None, copy: bool = False):
+def construct_1d_ndarray_preserving_na(
+    values, dtype: Optional[DtypeObj] = None, copy: bool = False
+) -> np.ndarray:
     """
     Construct a new ndarray, coercing `values` to `dtype`, preserving NA.
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -104,6 +104,7 @@
     is_scalar,
     is_sequence,
     needs_i8_conversion,
+    pandas_dtype,
 )
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
@@ -1943,6 +1944,9 @@ def _from_arrays(
         -------
         DataFrame
         """
+        if dtype is not None:
+            dtype = pandas_dtype(dtype)
+
         mgr = arrays_to_mgr(
             arrays,
             columns,