diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index a7379376c2f78..1dce5c2be809b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -3,7 +3,18 @@ """ from datetime import date, datetime, timedelta -from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, + Sequence, + Set, + Sized, + Tuple, + Type, + Union, +) import numpy as np @@ -18,7 +29,7 @@ ints_to_pydatetime, ) from pandas._libs.tslibs.timezones import tz_compare -from pandas._typing import ArrayLike, Dtype, DtypeObj +from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( @@ -83,6 +94,8 @@ if TYPE_CHECKING: from pandas import Series from pandas.core.arrays import ExtensionArray + from pandas.core.indexes.base import Index + from pandas.core.indexes.datetimes import DatetimeIndex _int8_max = np.iinfo(np.int8).max _int16_max = np.iinfo(np.int16).max @@ -118,7 +131,7 @@ def is_nested_object(obj) -> bool: return False -def maybe_downcast_to_dtype(result, dtype): +def maybe_downcast_to_dtype(result, dtype: Dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 @@ -186,7 +199,7 @@ def maybe_downcast_to_dtype(result, dtype): return result -def maybe_downcast_numeric(result, dtype, do_round: bool = False): +def maybe_downcast_numeric(result, dtype: DtypeObj, do_round: bool = False): """ Subset of maybe_downcast_to_dtype restricted to numeric dtypes. @@ -329,7 +342,9 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: return dtype -def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None): +def maybe_cast_to_extension_array( + cls: Type["ExtensionArray"], obj: ArrayLike, dtype: Optional[ExtensionDtype] = None +) -> ArrayLike: """ Call to `_from_sequence` that returns the object unchanged on Exception. @@ -362,7 +377,9 @@ def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None): return result -def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray, other): +def maybe_upcast_putmask( + result: np.ndarray, mask: np.ndarray, other: Scalar +) -> Tuple[np.ndarray, bool]: """ A safe version of putmask that potentially upcasts the result. @@ -444,7 +461,9 @@ def changeit(): return result, False -def maybe_casted_values(index, codes=None): +def maybe_casted_values( + index: "Index", codes: Optional[np.ndarray] = None +) -> ArrayLike: """ Convert an index, given directly or as a pair (level, code), to a 1D array. @@ -468,7 +487,7 @@ def maybe_casted_values(index, codes=None): # if we have the codes, extract the values with a mask if codes is not None: - mask = codes == -1 + mask: np.ndarray = codes == -1 # we can have situations where the whole mask is -1, # meaning there is nothing found in codes, so make all nan's @@ -660,7 +679,7 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def _ensure_dtype_type(value, dtype): +def _ensure_dtype_type(value, dtype: DtypeObj): """ Ensure that the given value is an instance of the given dtype. @@ -786,8 +805,9 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, return dtype, val -# TODO: try to make the Any in the return annotation more specific -def infer_dtype_from_array(arr, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: +def infer_dtype_from_array( + arr, pandas_dtype: bool = False +) -> Tuple[DtypeObj, ArrayLike]: """ Infer the dtype from an array. @@ -875,7 +895,12 @@ def maybe_infer_dtype_type(element): return tipo -def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): +def maybe_upcast( + values: ArrayLike, + fill_value: Scalar = np.nan, + dtype: Dtype = None, + copy: bool = False, +) -> Tuple[ArrayLike, Scalar]: """ Provide explicit type promotion and coercion. @@ -887,6 +912,13 @@ def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): dtype : if None, then use the dtype of the values, else coerce to this type copy : bool, default True If True always make a copy even if no upcast is required. + + Returns + ------- + values: ndarray or ExtensionArray + the original array, possibly upcast + fill_value: + the fill value, possibly upcast """ if not is_scalar(fill_value) and not is_object_dtype(values.dtype): # We allow arbitrary fill values for object dtype @@ -907,7 +939,7 @@ def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): return values, fill_value -def invalidate_string_dtypes(dtype_set): +def invalidate_string_dtypes(dtype_set: Set[DtypeObj]): """ Change string like dtypes to object for ``DataFrame.select_dtypes()``. @@ -929,7 +961,7 @@ def coerce_indexer_dtype(indexer, categories): return ensure_int64(indexer) -def coerce_to_dtypes(result, dtypes): +def coerce_to_dtypes(result: Sequence[Scalar], dtypes: Sequence[Dtype]) -> List[Scalar]: """ given a dtypes and a result set, coerce the result elements to the dtypes @@ -959,7 +991,9 @@ def conv(r, dtype): return [conv(r, dtype) for r, dtype in zip(result, dtypes)] -def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False): +def astype_nansafe( + arr, dtype: DtypeObj, copy: bool = True, skipna: bool = False +) -> ArrayLike: """ Cast the elements of an array to a given dtype a nan-safe manner. @@ -1063,7 +1097,9 @@ def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False): return arr.view(dtype) -def maybe_convert_objects(values: np.ndarray, convert_numeric: bool = True): +def maybe_convert_objects( + values: np.ndarray, convert_numeric: bool = True +) -> Union[np.ndarray, "DatetimeIndex"]: """ If we have an object dtype array, try to coerce dates and/or numbers. @@ -1184,7 +1220,7 @@ def soft_convert_objects( def convert_dtypes( - input_array, + input_array: AnyArrayLike, convert_string: bool = True, convert_integer: bool = True, convert_boolean: bool = True, @@ -1195,7 +1231,7 @@ def convert_dtypes( Parameters ---------- - input_array : ExtensionArray or PandasArray + input_array : ExtensionArray, Index, Series or np.ndarray convert_string : bool, default True Whether object dtypes should be converted to ``StringDtype()``. convert_integer : bool, default True @@ -1250,9 +1286,11 @@ def convert_dtypes( return inferred_dtype -def maybe_castable(arr) -> bool: +def maybe_castable(arr: np.ndarray) -> bool: # return False to force a non-fastpath + assert isinstance(arr, np.ndarray) # GH 37024 + # check datetime64[ns]/timedelta64[ns] are valid # otherwise try to coerce kind = arr.dtype.kind @@ -1264,7 +1302,9 @@ def maybe_castable(arr) -> bool: return arr.dtype.name not in POSSIBLY_CAST_DTYPES -def maybe_infer_to_datetimelike(value, convert_dates: bool = False): +def maybe_infer_to_datetimelike( + value: Union[ArrayLike, Scalar], convert_dates: bool = False +): """ we might have a array (or single object) that is datetime like, and no dtype is passed don't change the value unless we find a @@ -1373,7 +1413,7 @@ def try_timedelta(v): return value -def maybe_cast_to_datetime(value, dtype, errors: str = "raise"): +def maybe_cast_to_datetime(value, dtype: DtypeObj, errors: str = "raise"): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT @@ -1566,7 +1606,9 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj: return np.find_common_type(types, []) -def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.ndarray: +def cast_scalar_to_array( + shape: Tuple, value: Scalar, dtype: Optional[DtypeObj] = None +) -> np.ndarray: """ Create np.ndarray of specified shape and dtype, filled with values. @@ -1594,7 +1636,7 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n def construct_1d_arraylike_from_scalar( - value, length: int, dtype: DtypeObj + value: Scalar, length: int, dtype: DtypeObj ) -> ArrayLike: """ create a np.ndarray / pandas type of specified shape and dtype @@ -1638,7 +1680,7 @@ def construct_1d_arraylike_from_scalar( return subarr -def construct_1d_object_array_from_listlike(values) -> np.ndarray: +def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: """ Transform any list-like object in a 1-dimensional numpy array of object dtype. @@ -1664,7 +1706,7 @@ def construct_1d_object_array_from_listlike(values) -> np.ndarray: def construct_1d_ndarray_preserving_na( - values, dtype: Optional[DtypeObj] = None, copy: bool = False + values: Sequence, dtype: Optional[DtypeObj] = None, copy: bool = False ) -> np.ndarray: """ Construct a new ndarray, coercing `values` to `dtype`, preserving NA. @@ -1698,7 +1740,7 @@ def construct_1d_ndarray_preserving_na( return subarr -def maybe_cast_to_integer_array(arr, dtype, copy: bool = False): +def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False): """ Takes any dtype and returns the casted version, raising for when data is incompatible with integer/unsigned integer dtypes. @@ -1768,7 +1810,7 @@ def maybe_cast_to_integer_array(arr, dtype, copy: bool = False): raise ValueError("Trying to coerce float values to integers") -def convert_scalar_for_putitemlike(scalar, dtype: np.dtype): +def convert_scalar_for_putitemlike(scalar: Scalar, dtype: np.dtype) -> Scalar: """ Convert datetimelike scalar if we are setting into a datetime64 or timedelta64 ndarray. @@ -1799,7 +1841,7 @@ def convert_scalar_for_putitemlike(scalar, dtype: np.dtype): return scalar -def validate_numeric_casting(dtype: np.dtype, value): +def validate_numeric_casting(dtype: np.dtype, value: Scalar) -> None: """ Check that we can losslessly insert the given value into an array with the given dtype.