Skip to content

REF: Share NumericArray/NumericDtype methods #45997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 4 additions & 36 deletions pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import numpy as np

from pandas._typing import DtypeObj
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import is_float_dtype
from pandas.core.dtypes.dtypes import register_extension_dtype

from pandas.core.arrays.numeric import (
Expand All @@ -24,13 +24,7 @@ class FloatingDtype(NumericDtype):
"""

_default_np_dtype = np.dtype(np.float64)

def __repr__(self) -> str:
return f"{self.name}Dtype()"

@property
def _is_numeric(self) -> bool:
return True
_checker = is_float_dtype

@classmethod
def construct_array_type(cls) -> type[FloatingArray]:
Expand Down Expand Up @@ -58,18 +52,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
return None

@classmethod
def _standardize_dtype(cls, dtype) -> FloatingDtype:
if isinstance(dtype, str) and dtype.startswith("Float"):
# Avoid DeprecationWarning from NumPy about np.dtype("Float64")
# https://github.com/numpy/numpy/pull/7476
dtype = dtype.lower()

if not issubclass(type(dtype), FloatingDtype):
try:
dtype = FLOAT_STR_TO_DTYPE[str(np.dtype(dtype))]
except KeyError as err:
raise ValueError(f"invalid dtype specified {dtype}") from err
return dtype
def _str_to_dtype_mapping(cls):
return FLOAT_STR_TO_DTYPE

@classmethod
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
Expand Down Expand Up @@ -151,22 +135,6 @@ class FloatingArray(NumericArray):
_truthy_value = 1.0
_falsey_value = 0.0

@cache_readonly
def dtype(self) -> FloatingDtype:
return FLOAT_STR_TO_DTYPE[str(self._data.dtype)]

def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
if not (isinstance(values, np.ndarray) and values.dtype.kind == "f"):
raise TypeError(
"values should be floating numpy array. Use "
"the 'pd.array' function instead"
)
if values.dtype == np.float16:
# If we don't raise here, then accessing self.dtype would raise
raise TypeError("FloatingArray does not support np.float16 dtype.")

super().__init__(values, mask, copy=copy)


_dtype_docstring = """
An ExtensionDtype for {dtype} data.
Expand Down
71 changes: 16 additions & 55 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import numpy as np

from pandas._typing import DtypeObj
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.base import register_extension_dtype
from pandas.core.dtypes.common import is_integer_dtype

from pandas.core.arrays.masked import BaseMaskedDtype
from pandas.core.arrays.numeric import (
Expand All @@ -14,33 +14,18 @@
)


class _IntegerDtype(NumericDtype):
class IntegerDtype(NumericDtype):
"""
An ExtensionDtype to hold a single size & kind of integer dtype.

These specific implementations are subclasses of the non-public
_IntegerDtype. For example we have Int8Dtype to represent signed int 8s.
IntegerDtype. For example we have Int8Dtype to represent signed int 8s.

The attributes name & type are set when these subclasses are created.
"""

_default_np_dtype = np.dtype(np.int64)

def __repr__(self) -> str:
sign = "U" if self.is_unsigned_integer else ""
return f"{sign}Int{8 * self.itemsize}Dtype()"

@cache_readonly
def is_signed_integer(self) -> bool:
return self.kind == "i"

@cache_readonly
def is_unsigned_integer(self) -> bool:
return self.kind == "u"

@property
def _is_numeric(self) -> bool:
return True
_checker = is_integer_dtype

@classmethod
def construct_array_type(cls) -> type[IntegerArray]:
Expand Down Expand Up @@ -86,20 +71,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
return None

@classmethod
def _standardize_dtype(cls, dtype) -> _IntegerDtype:
if isinstance(dtype, str) and (
dtype.startswith("Int") or dtype.startswith("UInt")
):
# Avoid DeprecationWarning from NumPy about np.dtype("Int64")
# https://github.com/numpy/numpy/pull/7476
dtype = dtype.lower()

if not issubclass(type(dtype), _IntegerDtype):
try:
dtype = INT_STR_TO_DTYPE[str(np.dtype(dtype))]
except KeyError as err:
raise ValueError(f"invalid dtype specified {dtype}") from err
return dtype
def _str_to_dtype_mapping(cls):
return INT_STR_TO_DTYPE

@classmethod
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
Expand Down Expand Up @@ -189,26 +162,14 @@ class IntegerArray(NumericArray):
Length: 3, dtype: UInt16
"""

_dtype_cls = _IntegerDtype
_dtype_cls = IntegerDtype

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = 1
# Fill values used for any/all
_truthy_value = 1
_falsey_value = 0

@cache_readonly
def dtype(self) -> _IntegerDtype:
return INT_STR_TO_DTYPE[str(self._data.dtype)]

def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]):
raise TypeError(
"values should be integer numpy array. Use "
"the 'pd.array' function instead"
)
super().__init__(values, mask, copy=copy)


_dtype_docstring = """
An ExtensionDtype for {dtype} integer data.
Expand All @@ -231,62 +192,62 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):


@register_extension_dtype
class Int8Dtype(_IntegerDtype):
class Int8Dtype(IntegerDtype):
type = np.int8
name = "Int8"
__doc__ = _dtype_docstring.format(dtype="int8")


@register_extension_dtype
class Int16Dtype(_IntegerDtype):
class Int16Dtype(IntegerDtype):
type = np.int16
name = "Int16"
__doc__ = _dtype_docstring.format(dtype="int16")


@register_extension_dtype
class Int32Dtype(_IntegerDtype):
class Int32Dtype(IntegerDtype):
type = np.int32
name = "Int32"
__doc__ = _dtype_docstring.format(dtype="int32")


@register_extension_dtype
class Int64Dtype(_IntegerDtype):
class Int64Dtype(IntegerDtype):
type = np.int64
name = "Int64"
__doc__ = _dtype_docstring.format(dtype="int64")


@register_extension_dtype
class UInt8Dtype(_IntegerDtype):
class UInt8Dtype(IntegerDtype):
type = np.uint8
name = "UInt8"
__doc__ = _dtype_docstring.format(dtype="uint8")


@register_extension_dtype
class UInt16Dtype(_IntegerDtype):
class UInt16Dtype(IntegerDtype):
type = np.uint16
name = "UInt16"
__doc__ = _dtype_docstring.format(dtype="uint16")


@register_extension_dtype
class UInt32Dtype(_IntegerDtype):
class UInt32Dtype(IntegerDtype):
type = np.uint32
name = "UInt32"
__doc__ = _dtype_docstring.format(dtype="uint32")


@register_extension_dtype
class UInt64Dtype(_IntegerDtype):
class UInt64Dtype(IntegerDtype):
type = np.uint64
name = "UInt64"
__doc__ = _dtype_docstring.format(dtype="uint64")


INT_STR_TO_DTYPE: dict[str, _IntegerDtype] = {
INT_STR_TO_DTYPE: dict[str, IntegerDtype] = {
"int8": Int8Dtype(),
"int16": Int16Dtype(),
"int32": Int32Dtype(),
Expand Down
64 changes: 59 additions & 5 deletions pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numbers
from typing import (
TYPE_CHECKING,
Any,
Callable,
TypeVar,
)

Expand All @@ -17,6 +19,7 @@
DtypeObj,
)
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import (
is_bool_dtype,
Expand All @@ -41,6 +44,22 @@

class NumericDtype(BaseMaskedDtype):
_default_np_dtype: np.dtype
_checker: Callable[[Any], bool] # is_foo_dtype
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be more partial to _dtype_checker, but won't die on that hill

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure. OK for follow-up?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup sounds good


def __repr__(self) -> str:
return f"{self.name}Dtype()"

@cache_readonly
def is_signed_integer(self) -> bool:
return self.kind == "i"

@cache_readonly
def is_unsigned_integer(self) -> bool:
return self.kind == "u"

@property
def _is_numeric(self) -> bool:
return True

def __from_arrow__(
self, array: pyarrow.Array | pyarrow.ChunkedArray
Expand Down Expand Up @@ -90,12 +109,27 @@ def __from_arrow__(
else:
return array_class._concat_same_type(results)

@classmethod
def _str_to_dtype_mapping(cls):
raise AbstractMethodError(cls)

@classmethod
def _standardize_dtype(cls, dtype) -> NumericDtype:
"""
Convert a string representation or a numpy dtype to NumericDtype.
"""
raise AbstractMethodError(cls)
if isinstance(dtype, str) and (dtype.startswith(("Int", "UInt", "Float"))):
# Avoid DeprecationWarning from NumPy about np.dtype("Int64")
# https://github.com/numpy/numpy/pull/7476
dtype = dtype.lower()

if not issubclass(type(dtype), cls):
mapping = cls._str_to_dtype_mapping()
try:
dtype = mapping[str(np.dtype(dtype))]
except KeyError as err:
raise ValueError(f"invalid dtype specified {dtype}") from err
return dtype

@classmethod
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
Expand All @@ -108,10 +142,7 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr


def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype):
if default_dtype.kind == "f":
checker = is_float_dtype
else:
checker = is_integer_dtype
checker = dtype_cls._checker

inferred_type = None

Expand Down Expand Up @@ -188,6 +219,29 @@ class NumericArray(BaseMaskedArray):

_dtype_cls: type[NumericDtype]

def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
checker = self._dtype_cls._checker
if not (isinstance(values, np.ndarray) and checker(values.dtype)):
descr = (
"floating"
if self._dtype_cls.kind == "f" # type: ignore[comparison-overlap]
else "integer"
)
raise TypeError(
f"values should be {descr} numpy array. Use "
"the 'pd.array' function instead"
)
if values.dtype == np.float16:
# If we don't raise here, then accessing self.dtype would raise
raise TypeError("FloatingArray does not support np.float16 dtype.")

super().__init__(values, mask, copy=copy)

@cache_readonly
def dtype(self) -> NumericDtype:
mapping = self._dtype_cls._str_to_dtype_mapping()
return mapping[str(self._data.dtype)]

@classmethod
def _coerce_to_array(
cls, value, *, dtype: DtypeObj, copy: bool = False
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
)
from pandas.core.arrays.base import ExtensionArray
from pandas.core.arrays.floating import FloatingDtype
from pandas.core.arrays.integer import _IntegerDtype
from pandas.core.arrays.integer import IntegerDtype
from pandas.core.construction import extract_array
from pandas.core.indexers import check_array_indexer
from pandas.core.missing import isna
Expand Down Expand Up @@ -432,7 +432,7 @@ def astype(self, dtype, copy: bool = True):
return self.copy()
return self

elif isinstance(dtype, _IntegerDtype):
elif isinstance(dtype, IntegerDtype):
arr = self._ndarray.copy()
mask = self.isna()
arr[mask] = 0
Expand Down
Loading