Skip to content

Remove _DtypeOpsMixin #26331

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 122 additions & 135 deletions pandas/core/dtypes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,141 +8,7 @@
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries


class _DtypeOpsMixin:
# Not all of pandas' extension dtypes are compatibile with
# the new ExtensionArray interface. This means PandasExtensionDtype
# can't subclass ExtensionDtype yet, as is_extension_array_dtype would
# incorrectly say that these types are extension types.
#
# In the interim, we put methods that are shared between the two base
# classes ExtensionDtype and PandasExtensionDtype here. Both those base
# classes will inherit from this Mixin. Once everything is compatible, this
# class's methods can be moved to ExtensionDtype and removed.

# na_value is the default NA value to use for this type. This is used in
# e.g. ExtensionArray.take. This should be the user-facing "boxed" version
# of the NA value, not the physical NA vaalue for storage.
# e.g. for JSONArray, this is an empty dictionary.
na_value = np.nan
_metadata = () # type: Tuple[str, ...]

def __eq__(self, other):
"""Check whether 'other' is equal to self.

By default, 'other' is considered equal if either

* it's a string matching 'self.name'.
* it's an instance of this type and all of the
the attributes in ``self._metadata`` are equal between
`self` and `other`.

Parameters
----------
other : Any

Returns
-------
bool
"""
if isinstance(other, str):
try:
other = self.construct_from_string(other)
except TypeError:
return False
if isinstance(other, type(self)):
return all(
getattr(self, attr) == getattr(other, attr)
for attr in self._metadata
)
return False

def __hash__(self):
return hash(tuple(getattr(self, attr) for attr in self._metadata))

def __ne__(self, other):
return not self.__eq__(other)

@property
def names(self) -> Optional[List[str]]:
"""Ordered list of field names, or None if there are no fields.

This is for compatibility with NumPy arrays, and may be removed in the
future.
"""
return None

@classmethod
def is_dtype(cls, dtype):
"""Check if we match 'dtype'.

Parameters
----------
dtype : object
The object to check.

Returns
-------
is_dtype : bool

Notes
-----
The default implementation is True if

1. ``cls.construct_from_string(dtype)`` is an instance
of ``cls``.
2. ``dtype`` is an object and is an instance of ``cls``
3. ``dtype`` has a ``dtype`` attribute, and any of the above
conditions is true for ``dtype.dtype``.
"""
dtype = getattr(dtype, 'dtype', dtype)

if isinstance(dtype, (ABCSeries, ABCIndexClass,
ABCDataFrame, np.dtype)):
# https://github.com/pandas-dev/pandas/issues/22960
# avoid passing data to `construct_from_string`. This could
# cause a FutureWarning from numpy about failing elementwise
# comparison from, e.g., comparing DataFrame == 'category'.
return False
elif dtype is None:
return False
elif isinstance(dtype, cls):
return True
try:
return cls.construct_from_string(dtype) is not None
except TypeError:
return False

@property
def _is_numeric(self) -> bool:
"""
Whether columns with this dtype should be considered numeric.

By default ExtensionDtypes are assumed to be non-numeric.
They'll be excluded from operations that exclude non-numeric
columns, like (groupby) reductions, plotting, etc.
"""
return False

@property
def _is_boolean(self) -> bool:
"""
Whether this dtype should be considered boolean.

By default, ExtensionDtypes are assumed to be non-numeric.
Setting this to True will affect the behavior of several places,
e.g.

* is_bool
* boolean indexing

Returns
-------
bool
"""
return False


class ExtensionDtype(_DtypeOpsMixin):
class ExtensionDtype:
"""
A custom data type, to be paired with an ExtensionArray.

Expand Down Expand Up @@ -202,10 +68,52 @@ class property**.
``pandas.errors.AbstractMethodError`` and no ``register`` method is
provided for registering virtual subclasses.
"""
# na_value is the default NA value to use for this type. This is used in
# e.g. ExtensionArray.take. This should be the user-facing "boxed" version
# of the NA value, not the physical NA value for storage.
# e.g. for JSONArray, this is an empty dictionary.
na_value = np.nan
_metadata = () # type: Tuple[str, ...]

def __str__(self):
return self.name

def __eq__(self, other):
"""Check whether 'other' is equal to self.

By default, 'other' is considered equal if either

* it's a string matching 'self.name'.
* it's an instance of this type and all of the
the attributes in ``self._metadata`` are equal between
`self` and `other`.

Parameters
----------
other : Any

Returns
-------
bool
"""
if isinstance(other, str):
try:
other = self.construct_from_string(other)
except TypeError:
return False
if isinstance(other, type(self)):
return all(
getattr(self, attr) == getattr(other, attr)
for attr in self._metadata
)
return False

def __hash__(self):
return hash(tuple(getattr(self, attr) for attr in self._metadata))

def __ne__(self, other):
return not self.__eq__(other)

@property
def type(self) -> Type:
"""
Expand Down Expand Up @@ -243,6 +151,15 @@ def name(self) -> str:
"""
raise AbstractMethodError(self)

@property
def names(self) -> Optional[List[str]]:
"""Ordered list of field names, or None if there are no fields.

This is for compatibility with NumPy arrays, and may be removed in the
future.
"""
return None

@classmethod
def construct_array_type(cls):
"""
Expand Down Expand Up @@ -286,3 +203,73 @@ def construct_from_string(cls, string):
... "'{}'".format(cls, string))
"""
raise AbstractMethodError(cls)

@classmethod
def is_dtype(cls, dtype):
"""Check if we match 'dtype'.

Parameters
----------
dtype : object
The object to check.

Returns
-------
is_dtype : bool

Notes
-----
The default implementation is True if

1. ``cls.construct_from_string(dtype)`` is an instance
of ``cls``.
2. ``dtype`` is an object and is an instance of ``cls``
3. ``dtype`` has a ``dtype`` attribute, and any of the above
conditions is true for ``dtype.dtype``.
"""
dtype = getattr(dtype, 'dtype', dtype)

if isinstance(dtype, (ABCSeries, ABCIndexClass,
ABCDataFrame, np.dtype)):
# https://github.com/pandas-dev/pandas/issues/22960
# avoid passing data to `construct_from_string`. This could
# cause a FutureWarning from numpy about failing elementwise
# comparison from, e.g., comparing DataFrame == 'category'.
return False
elif dtype is None:
return False
elif isinstance(dtype, cls):
return True
try:
return cls.construct_from_string(dtype) is not None
except TypeError:
return False

@property
def _is_numeric(self) -> bool:
"""
Whether columns with this dtype should be considered numeric.

By default ExtensionDtypes are assumed to be non-numeric.
They'll be excluded from operations that exclude non-numeric
columns, like (groupby) reductions, plotting, etc.
"""
return False

@property
def _is_boolean(self) -> bool:
"""
Whether this dtype should be considered boolean.

By default, ExtensionDtypes are assumed to be non-numeric.
Setting this to True will affect the behavior of several places,
e.g.

* is_bool
* boolean indexing

Returns
-------
bool
"""
return False
6 changes: 2 additions & 4 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
is_integer, is_integer_dtype, is_object_dtype, is_scalar, is_string_dtype,
is_timedelta64_dtype, is_timedelta64_ns_dtype, is_unsigned_integer_dtype,
pandas_dtype)
from .dtypes import (
DatetimeTZDtype, ExtensionDtype, PandasExtensionDtype, PeriodDtype)
from .dtypes import DatetimeTZDtype, ExtensionDtype, PeriodDtype
from .generic import (
ABCDatetimeArray, ABCDatetimeIndex, ABCPeriodArray, ABCPeriodIndex,
ABCSeries)
Expand Down Expand Up @@ -1108,8 +1107,7 @@ def find_common_type(types):
if all(is_dtype_equal(first, t) for t in types[1:]):
return first

if any(isinstance(t, (PandasExtensionDtype, ExtensionDtype))
for t in types):
if any(isinstance(t, ExtensionDtype) for t in types):
return np.object

# take lowest unit
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pandas.core.dtypes.dtypes import (
CategoricalDtype, DatetimeTZDtype, ExtensionDtype, IntervalDtype,
PandasExtensionDtype, PeriodDtype, registry)
PeriodDtype, registry)
from pandas.core.dtypes.generic import (
ABCCategorical, ABCDateOffset, ABCDatetimeIndex, ABCIndexClass,
ABCPeriodArray, ABCPeriodIndex, ABCSeries)
Expand Down Expand Up @@ -1888,7 +1888,7 @@ def _is_dtype_type(arr_or_dtype, condition):
if isinstance(arr_or_dtype, np.dtype):
return condition(arr_or_dtype.type)
elif isinstance(arr_or_dtype, type):
if issubclass(arr_or_dtype, (PandasExtensionDtype, ExtensionDtype)):
if issubclass(arr_or_dtype, ExtensionDtype):
arr_or_dtype = arr_or_dtype.type
return condition(np.dtype(arr_or_dtype).type)
elif arr_or_dtype is None:
Expand Down Expand Up @@ -1936,7 +1936,7 @@ def infer_dtype_from_object(dtype):
if isinstance(dtype, type) and issubclass(dtype, np.generic):
# Type object from a dtype
return dtype
elif isinstance(dtype, (np.dtype, PandasExtensionDtype, ExtensionDtype)):
elif isinstance(dtype, (np.dtype, ExtensionDtype)):
# dtype object
try:
_validate_date_like_dtype(dtype)
Expand Down Expand Up @@ -2021,7 +2021,7 @@ def pandas_dtype(dtype):
# short-circuit
if isinstance(dtype, np.ndarray):
return dtype.dtype
elif isinstance(dtype, (np.dtype, PandasExtensionDtype, ExtensionDtype)):
elif isinstance(dtype, (np.dtype, ExtensionDtype)):
return dtype

# registered extension types
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pandas.core.dtypes.generic import (
ABCCategoricalIndex, ABCDateOffset, ABCIndexClass)

from .base import ExtensionDtype, _DtypeOpsMixin
from .base import ExtensionDtype
from .inference import is_list_like

str_type = str
Expand Down Expand Up @@ -68,7 +68,7 @@ def register(self, dtype):
----------
dtype : ExtensionDtype
"""
if not issubclass(dtype, (PandasExtensionDtype, ExtensionDtype)):
if not issubclass(dtype, ExtensionDtype):
raise ValueError("can only register pandas extension dtypes")

self.dtypes.append(dtype)
Expand Down Expand Up @@ -104,7 +104,7 @@ def find(self, dtype):
registry = Registry()


class PandasExtensionDtype(_DtypeOpsMixin):
class PandasExtensionDtype(ExtensionDtype):
"""
A np.dtype duck-typed class, suitable for holding a custom dtype.

Expand Down Expand Up @@ -577,7 +577,7 @@ def _is_boolean(self):


@register_extension_dtype
class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype):
class DatetimeTZDtype(PandasExtensionDtype):
"""
An ExtensionDtype for timezone-aware datetime data.

Expand Down Expand Up @@ -737,7 +737,7 @@ def __setstate__(self, state):


@register_extension_dtype
class PeriodDtype(ExtensionDtype, PandasExtensionDtype):
class PeriodDtype(PandasExtensionDtype):
"""
An ExtensionDtype for Period data.

Expand Down Expand Up @@ -894,7 +894,7 @@ def construct_array_type(cls):


@register_extension_dtype
class IntervalDtype(PandasExtensionDtype, ExtensionDtype):
class IntervalDtype(PandasExtensionDtype):
"""
An ExtensionDtype for Interval data.

Expand Down
Loading