From e471c496ca3a0631727ad69c79dbbc825d4a4b48 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 May 2021 15:52:34 -0700 Subject: [PATCH 1/3] REF: _try_cast; go through fastpath more often, closes #28145 --- pandas/core/construction.py | 33 ++++++++++++++++++++++++--------- pandas/core/dtypes/cast.py | 17 ----------------- pandas/core/dtypes/common.py | 15 --------------- 3 files changed, 24 insertions(+), 41 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 0fef02b1489ac..d0ad05554ad34 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -39,9 +39,9 @@ construct_1d_object_array_from_listlike, maybe_cast_to_datetime, maybe_cast_to_integer_array, - maybe_castable, maybe_convert_platform, maybe_upcast, + sanitize_to_nanoseconds, ) from pandas.core.dtypes.common import ( is_datetime64_ns_dtype, @@ -664,30 +664,45 @@ def _try_cast( # perf shortcut as this is the most common case if ( isinstance(arr, np.ndarray) - and maybe_castable(arr.dtype) + and arr.dtype != object and not copy and dtype is None ): - return arr + return sanitize_to_nanoseconds(arr) - if isinstance(dtype, ExtensionDtype) and not isinstance(dtype, DatetimeTZDtype): + if isinstance(dtype, ExtensionDtype): # create an extension array from its dtype # DatetimeTZ case needs to go through maybe_cast_to_datetime but # SparseDtype does not + if isinstance(dtype, DatetimeTZDtype): + # We can't go through _from_sequence because it handles dt64naive + # data differently; _from_sequence treats naive as wall times, + # while maybe_cast_to_datetime treats it as UTC + # see test_maybe_promote_any_numpy_dtype_with_datetimetz + + # error: Incompatible return value type (got "Union[ExtensionArray, + # ndarray, List[Any]]", expected "Union[ExtensionArray, ndarray]") + return maybe_cast_to_datetime(arr, dtype) # type: ignore[return-value] + # TODO: copy? + array_type = dtype.construct_array_type()._from_sequence subarr = array_type(arr, dtype=dtype, copy=copy) return subarr - if is_object_dtype(dtype) and not isinstance(arr, np.ndarray): - subarr = construct_1d_object_array_from_listlike(arr) - return subarr + elif is_object_dtype(dtype): + if not isinstance(arr, np.ndarray): + subarr = construct_1d_object_array_from_listlike(arr) + return subarr + return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) - if dtype is None and isinstance(arr, list): + elif dtype is None and isinstance(arr, list): # filter out cases that we _dont_ want to go through maybe_cast_to_datetime varr = np.array(arr, copy=False) if varr.dtype != object or varr.size == 0: return varr - arr = varr + # error: Incompatible return value type (got "Union[ExtensionArray, + # ndarray, List[Any]]", expected "Union[ExtensionArray, ndarray]") + return maybe_cast_to_datetime(varr, None) # type: ignore[return-value] try: # GH#15832: Check if we are requesting a numeric dtype and diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 94cffe8fb840d..e35026205f25f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -45,7 +45,6 @@ from pandas.core.dtypes.common import ( DT64NS_DTYPE, - POSSIBLY_CAST_DTYPES, TD64NS_DTYPE, ensure_int8, ensure_int16, @@ -58,7 +57,6 @@ is_complex, is_complex_dtype, is_datetime64_dtype, - is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal, @@ -73,7 +71,6 @@ is_sparse, is_string_dtype, is_timedelta64_dtype, - is_timedelta64_ns_dtype, is_unsigned_integer_dtype, pandas_dtype, ) @@ -1466,20 +1463,6 @@ def convert_dtypes( return inferred_dtype -def maybe_castable(dtype: np.dtype) -> bool: - # return False to force a non-fastpath - - # check datetime64[ns]/timedelta64[ns] are valid - # otherwise try to coerce - kind = dtype.kind - if kind == "M": - return is_datetime64_ns_dtype(dtype) - elif kind == "m": - return is_timedelta64_ns_dtype(dtype) - - return dtype.name not in POSSIBLY_CAST_DTYPES - - def maybe_infer_to_datetimelike( value: np.ndarray, ) -> np.ndarray | DatetimeArray | TimedeltaArray: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 593e42f7ed749..3f43681687945 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -58,21 +58,6 @@ is_sequence, ) -POSSIBLY_CAST_DTYPES = { - np.dtype(t).name - for t in [ - "O", - "int8", - "uint8", - "int16", - "uint16", - "int32", - "uint32", - "int64", - "uint64", - ] -} - DT64NS_DTYPE = conversion.DT64NS_DTYPE TD64NS_DTYPE = conversion.TD64NS_DTYPE INT64_DTYPE = np.dtype(np.int64) From 59ddbc096abb5dc2ec6b81876a6e832bbc187878 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 May 2021 10:15:17 -0700 Subject: [PATCH 2/3] PERF: single isinstance check --- pandas/core/construction.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e3cb6c63768c4..e67ca16506687 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -656,13 +656,10 @@ def _try_cast( ------- np.ndarray or ExtensionArray """ + is_ndarray = isinstance(arr, np.ndarray) + # perf shortcut as this is the most common case - if ( - isinstance(arr, np.ndarray) - and arr.dtype != object - and not copy - and dtype is None - ): + if is_ndarray and arr.dtype != object and not copy and dtype is None: return sanitize_to_nanoseconds(arr) if isinstance(dtype, ExtensionDtype): @@ -685,12 +682,12 @@ def _try_cast( return subarr elif is_object_dtype(dtype): - if not isinstance(arr, np.ndarray): + if not is_ndarray: subarr = construct_1d_object_array_from_listlike(arr) return subarr return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) - elif dtype is None and isinstance(arr, list): + elif dtype is None and not is_ndarray: # filter out cases that we _dont_ want to go through maybe_cast_to_datetime varr = np.array(arr, copy=False) if varr.dtype != object or varr.size == 0: From fabbc423edb008d2298e1faf0a3cfe469e16fffa Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 May 2021 13:43:11 -0700 Subject: [PATCH 3/3] mypy fixup --- pandas/core/construction.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e67ca16506687..51b9ed5fd22c7 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -659,8 +659,16 @@ def _try_cast( is_ndarray = isinstance(arr, np.ndarray) # perf shortcut as this is the most common case - if is_ndarray and arr.dtype != object and not copy and dtype is None: - return sanitize_to_nanoseconds(arr) + # Item "List[Any]" of "Union[List[Any], ndarray]" has no attribute "dtype" + if ( + is_ndarray + and arr.dtype != object # type: ignore[union-attr] + and not copy + and dtype is None + ): + # Argument 1 to "sanitize_to_nanoseconds" has incompatible type + # "Union[List[Any], ndarray]"; expected "ndarray" + return sanitize_to_nanoseconds(arr) # type: ignore[arg-type] if isinstance(dtype, ExtensionDtype): # create an extension array from its dtype