From c4774ffea22b6a1abad61ce3b213ef80c1dfd05b Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 27 Jan 2020 15:38:51 +0000 Subject: [PATCH 1/3] TYP: core.arrays.numpy_ --- pandas/core/arrays/numpy_.py | 107 +++++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 36 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 075096f6cfb54..a8ff61baa8253 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -1,5 +1,5 @@ import numbers -from typing import Union +from typing import Optional, Tuple, Type, TypeVar, Union import numpy as np from numpy.lib.mixins import NDArrayOperatorsMixin @@ -35,52 +35,68 @@ class PandasDtype(ExtensionDtype): Parameters ---------- - dtype : numpy.dtype + dtype : object + Object to be converted to a NumPy data type object. + + See Also + -------- + numpy.dtype """ _metadata = ("_dtype",) - def __init__(self, dtype): - dtype = np.dtype(dtype) - self._dtype = dtype - self._type = dtype.type + def __init__(self, dtype: object): + self._dtype = np.dtype(dtype) def __repr__(self) -> str: return f"PandasDtype({repr(self.name)})" @property - def numpy_dtype(self): - """The NumPy dtype this PandasDtype wraps.""" + def numpy_dtype(self) -> np.dtype: + """ + The NumPy dtype this PandasDtype wraps. + """ return self._dtype @property - def name(self): + def name(self) -> str: + """ + A bit-width name for this data-type. + + Un-sized flexible data-type objects do not have this attribute. + """ return self._dtype.name @property - def type(self): - return self._type + def type(self) -> Type[np.generic]: + """ + The type object used to instantiate a scalar of this NumPy data-type. + """ + return self._dtype.type @property - def _is_numeric(self): + def _is_numeric(self) -> bool: # exclude object, str, unicode, void. return self.kind in set("biufc") @property - def _is_boolean(self): + def _is_boolean(self) -> bool: return self.kind == "b" @classmethod - def construct_from_string(cls, string): + def construct_from_string(cls, string: str) -> "PandasDtype": try: - return cls(np.dtype(string)) + dtype = np.dtype(string) except TypeError as err: - raise TypeError( - f"Cannot construct a 'PandasDtype' from '{string}'" - ) from err + if not isinstance(string, str): + msg = f"'construct_from_string' expects a string, got {type(string)}" + else: + msg = f"Cannot construct a 'PandasDtype' from '{string}'" + raise TypeError(msg) from err + return cls(dtype) @classmethod - def construct_array_type(cls): + def construct_array_type(cls) -> Type["PandasArray"]: """ Return the array type associated with this dtype. @@ -91,15 +107,23 @@ def construct_array_type(cls): return PandasArray @property - def kind(self): + def kind(self) -> str: + """ + A character code (one of 'biufcmMOSUV') identifying the general kind of data. + """ return self._dtype.kind @property - def itemsize(self): - """The element size of this data-type object.""" + def itemsize(self) -> int: + """ + The element size of this data-type object. + """ return self._dtype.itemsize +PandasArrayT = TypeVar("PandasArrayT", bound="PandasArray") + + class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): """ A pandas ExtensionArray for NumPy data. @@ -136,7 +160,9 @@ class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): # ------------------------------------------------------------------------ # Constructors - def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False): + def __init__( + self: PandasArrayT, values: Union[np.ndarray, PandasArrayT], copy: bool = False + ): if isinstance(values, type(self)): values = values._ndarray if not isinstance(values, np.ndarray): @@ -154,7 +180,9 @@ def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False) self._dtype = PandasDtype(values.dtype) @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence( + cls: Type[PandasArrayT], scalars, dtype=None, copy: bool = False + ) -> PandasArrayT: if isinstance(dtype, PandasDtype): dtype = dtype._dtype @@ -164,18 +192,18 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): return cls(result) @classmethod - def _from_factorized(cls, values, original): + def _from_factorized(cls: Type[PandasArrayT], values, original) -> PandasArrayT: return cls(values) @classmethod - def _concat_same_type(cls, to_concat): + def _concat_same_type(cls: Type[PandasArrayT], to_concat) -> PandasArrayT: return cls(np.concatenate(to_concat)) # ------------------------------------------------------------------------ # Data @property - def dtype(self): + def dtype(self) -> PandasDtype: return self._dtype # ------------------------------------------------------------------------ @@ -186,7 +214,7 @@ def __array__(self, dtype=None) -> np.ndarray: _HANDLED_TYPES = (np.ndarray, numbers.Number) - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs): # Lightly modified version of # https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/\ # numpy.lib.mixins.NDArrayOperatorsMixin.html @@ -242,7 +270,7 @@ def __getitem__(self, item): result = type(self)(result) return result - def __setitem__(self, key, value): + def __setitem__(self, key, value) -> None: value = extract_array(value, extract_numpy=True) scalar_key = lib.is_scalar(key) @@ -263,10 +291,15 @@ def __len__(self) -> int: def nbytes(self) -> int: return self._ndarray.nbytes - def isna(self): + def isna(self) -> np.ndarray: return isna(self._ndarray) - def fillna(self, value=None, method=None, limit=None): + def fillna( + self: PandasArrayT, + value=None, + method: Optional[str] = None, + limit: Optional[int] = None, + ) -> PandasArrayT: # TODO(_values_for_fillna): remove this value, method = validate_fillna_kwargs(value, method) @@ -293,7 +326,9 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def take(self, indices, allow_fill=False, fill_value=None): + def take( + self: PandasArrayT, indices, allow_fill=False, fill_value=None + ) -> PandasArrayT: if fill_value is None: # Primarily for subclasses fill_value = self.dtype.na_value @@ -302,16 +337,16 @@ def take(self, indices, allow_fill=False, fill_value=None): ) return type(self)(result) - def copy(self): + def copy(self: PandasArrayT) -> PandasArrayT: return type(self)(self._ndarray.copy()) - def _values_for_argsort(self): + def _values_for_argsort(self) -> np.ndarray: return self._ndarray - def _values_for_factorize(self): + def _values_for_factorize(self) -> Tuple[np.ndarray, int]: return self._ndarray, -1 - def unique(self): + def unique(self: PandasArrayT) -> PandasArrayT: return type(self)(unique(self._ndarray)) # ------------------------------------------------------------------------ From 3dfed64934b41de3443013f014c3aba355a0dbdb Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 27 Jan 2020 19:33:46 +0000 Subject: [PATCH 2/3] update docstring --- pandas/core/arrays/numpy_.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index a8ff61baa8253..b8ed72cc88ed8 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -62,8 +62,6 @@ def numpy_dtype(self) -> np.dtype: def name(self) -> str: """ A bit-width name for this data-type. - - Un-sized flexible data-type objects do not have this attribute. """ return self._dtype.name From 41e16ed213305d078b640895a63d54e35afcc555 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 27 Jan 2020 19:43:54 +0000 Subject: [PATCH 3/3] remove typevar --- pandas/core/arrays/numpy_.py | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index b8ed72cc88ed8..e63153f3435bb 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -1,5 +1,5 @@ import numbers -from typing import Optional, Tuple, Type, TypeVar, Union +from typing import Optional, Tuple, Type, Union import numpy as np from numpy.lib.mixins import NDArrayOperatorsMixin @@ -119,9 +119,6 @@ def itemsize(self) -> int: return self._dtype.itemsize -PandasArrayT = TypeVar("PandasArrayT", bound="PandasArray") - - class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): """ A pandas ExtensionArray for NumPy data. @@ -158,9 +155,7 @@ class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): # ------------------------------------------------------------------------ # Constructors - def __init__( - self: PandasArrayT, values: Union[np.ndarray, PandasArrayT], copy: bool = False - ): + def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False): if isinstance(values, type(self)): values = values._ndarray if not isinstance(values, np.ndarray): @@ -178,9 +173,7 @@ def __init__( self._dtype = PandasDtype(values.dtype) @classmethod - def _from_sequence( - cls: Type[PandasArrayT], scalars, dtype=None, copy: bool = False - ) -> PandasArrayT: + def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "PandasArray": if isinstance(dtype, PandasDtype): dtype = dtype._dtype @@ -190,11 +183,11 @@ def _from_sequence( return cls(result) @classmethod - def _from_factorized(cls: Type[PandasArrayT], values, original) -> PandasArrayT: + def _from_factorized(cls, values, original) -> "PandasArray": return cls(values) @classmethod - def _concat_same_type(cls: Type[PandasArrayT], to_concat) -> PandasArrayT: + def _concat_same_type(cls, to_concat) -> "PandasArray": return cls(np.concatenate(to_concat)) # ------------------------------------------------------------------------ @@ -293,11 +286,8 @@ def isna(self) -> np.ndarray: return isna(self._ndarray) def fillna( - self: PandasArrayT, - value=None, - method: Optional[str] = None, - limit: Optional[int] = None, - ) -> PandasArrayT: + self, value=None, method: Optional[str] = None, limit: Optional[int] = None, + ) -> "PandasArray": # TODO(_values_for_fillna): remove this value, method = validate_fillna_kwargs(value, method) @@ -324,9 +314,7 @@ def fillna( new_values = self.copy() return new_values - def take( - self: PandasArrayT, indices, allow_fill=False, fill_value=None - ) -> PandasArrayT: + def take(self, indices, allow_fill=False, fill_value=None) -> "PandasArray": if fill_value is None: # Primarily for subclasses fill_value = self.dtype.na_value @@ -335,7 +323,7 @@ def take( ) return type(self)(result) - def copy(self: PandasArrayT) -> PandasArrayT: + def copy(self) -> "PandasArray": return type(self)(self._ndarray.copy()) def _values_for_argsort(self) -> np.ndarray: @@ -344,7 +332,7 @@ def _values_for_argsort(self) -> np.ndarray: def _values_for_factorize(self) -> Tuple[np.ndarray, int]: return self._ndarray, -1 - def unique(self: PandasArrayT) -> PandasArrayT: + def unique(self) -> "PandasArray": return type(self)(unique(self._ndarray)) # ------------------------------------------------------------------------