From 0a3994b4c32bd22f169940cf8c2169cd7024a349 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 23 Jul 2018 14:16:01 -0500 Subject: [PATCH 1/9] REF/API: Stricter extension checking. Removes is_extension_array_dtype's handling of both arrays and dtypes. Now it handles just arrays, and we provide `is_extension_dtype` for checking whether a dtype is an extension dtype. It's the caller's responsibility to know whether they have an array or dtype. Closes #22021 --- doc/source/api.rst | 2 + doc/source/whatsnew/v0.24.0.txt | 3 +- pandas/core/algorithms.py | 3 +- pandas/core/dtypes/api.py | 2 + pandas/core/dtypes/cast.py | 8 ++-- pandas/core/dtypes/common.py | 57 ++++++++++++++++++------ pandas/core/indexes/base.py | 5 ++- pandas/core/series.py | 3 +- pandas/tests/extension/base/interface.py | 10 +++-- pandas/tests/extension/test_common.py | 16 +++++-- 10 files changed, 80 insertions(+), 29 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 9056b1f47323a..4ec603c4df1aa 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2501,6 +2501,8 @@ Dtype introspection api.types.is_datetime64_ns_dtype api.types.is_datetime64tz_dtype api.types.is_extension_type + api.types.is_extension_array_dtype + api.types.is_extension_dtype api.types.is_float_dtype api.types.is_int64_dtype api.types.is_integer_dtype diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 973b75f0e1451..8327193a7e78d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -323,7 +323,8 @@ ExtensionType Changes - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`) - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) -- +- Added :func:`pandas.api.types.is_extension_array_dtype` for testing whether an array is an ExtensionArray and :func:`pandas.api.types.is_extension_dtype` for testing whether a dtype is an ExtensionDtype (:issue:`22021`) + .. _whatsnew_0240.api.incompatibilities: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 78c9113ce60de..a5c3c6af33def 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -19,6 +19,7 @@ is_integer_dtype, is_complex_dtype, is_object_dtype, is_extension_array_dtype, + is_extension_dtype, is_categorical_dtype, is_sparse, is_period_dtype, is_numeric_dtype, is_float_dtype, @@ -153,7 +154,7 @@ def _reconstruct_data(values, dtype, original): Index for extension types, otherwise ndarray casted to dtype """ from pandas import Index - if is_extension_array_dtype(dtype): + if is_extension_dtype(dtype): values = dtype.construct_array_type()._from_sequence(values) elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype): values = Index(original)._shallow_copy(values, name=None) diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index 738e1ea9062f6..6b9952b8bbe1b 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -4,6 +4,8 @@ from .common import (pandas_dtype, is_dtype_equal, + is_extension_dtype, + is_extension_array_dtype, is_extension_type, # categorical diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ead7b39309f5e..71205daa1168e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -12,7 +12,7 @@ is_complex, is_datetimetz, is_categorical_dtype, is_datetimelike, is_extension_type, - is_extension_array_dtype, + is_extension_dtype, is_object_dtype, is_datetime64tz_dtype, is_datetime64_dtype, is_datetime64_ns_dtype, @@ -294,7 +294,7 @@ def maybe_promote(dtype, fill_value=np.nan): elif is_datetimetz(dtype): if isna(fill_value): fill_value = iNaT - elif is_extension_array_dtype(dtype) and isna(fill_value): + elif is_extension_dtype(dtype) and isna(fill_value): fill_value = dtype.na_value elif is_float(fill_value): if issubclass(dtype.type, np.bool_): @@ -332,7 +332,7 @@ def maybe_promote(dtype, fill_value=np.nan): dtype = np.object_ # in case we have a string that looked like a number - if is_extension_array_dtype(dtype): + if is_extension_dtype(dtype): pass elif is_datetimetz(dtype): pass @@ -650,7 +650,7 @@ def astype_nansafe(arr, dtype, copy=True): need to be very careful as the result shape could change! """ # dispatch on extension dtype if needed - if is_extension_array_dtype(dtype): + if is_extension_dtype(dtype): return dtype.construct_array_type()._from_sequence( arr, dtype=dtype, copy=copy) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 355bf58540219..8a04d488a8df3 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1688,12 +1688,12 @@ def is_extension_type(arr): return False -def is_extension_array_dtype(arr_or_dtype): - """Check if an object is a pandas extension array type. +def is_extension_array_dtype(arr): + """Check if an array object is a pandas extension array type. Parameters ---------- - arr_or_dtype : object + arr : object Returns ------- @@ -1701,25 +1701,56 @@ def is_extension_array_dtype(arr_or_dtype): Notes ----- - This checks whether an object implements the pandas extension + This checks whether an array object implements the pandas extension array interface. In pandas, this includes: * Categorical + * Interval - Third-party libraries may implement arrays or types satisfying + Third-party libraries may implement arrays satisfying this interface as well. - """ - from pandas.core.arrays import ExtensionArray - if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)): - arr_or_dtype = arr_or_dtype._values + See Also + -------- + is_extension_dtype : Similar method for dtypes. + """ + from pandas.core.dtypes.base import ExtensionDtype try: - arr_or_dtype = pandas_dtype(arr_or_dtype) - except TypeError: - pass + dtype = getattr(arr, 'dtype') + except AttributeError: + return False + + return isinstance(dtype, ExtensionDtype) - return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray)) + +def is_extension_dtype(dtype): + """Check if a dtype object is a pandas extension dtype. + + Parameters + ---------- + arr : object + + Returns + ------- + bool + + Notes + ----- + This checks whether a dtype object implements the pandas extension + array interface. In pandas, this includes: + + * CategoricalDtype + * IntervalDtype + + Third-party libraries may implement dtypes satisfying + this interface as well. + + See Also + -------- + is_extension_array_dtype : Similar method for arrays. + """ + return isinstance(dtype, ExtensionDtype) def is_complex_dtype(arr_or_dtype): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cf4b4fe6bc084..b0145ed33693b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -45,6 +45,7 @@ is_datetime64tz_dtype, is_timedelta64_dtype, is_extension_array_dtype, + is_extension_dtype, is_hashable, is_iterator, is_list_like, is_scalar) @@ -275,7 +276,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, closed=closed) # extension dtype - elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): + elif is_extension_array_dtype(data) or is_extension_dtype(dtype): data = np.asarray(data) if not (dtype is None or is_object_dtype(dtype)): @@ -1191,7 +1192,7 @@ def astype(self, dtype, copy=True): return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) - elif is_extension_array_dtype(dtype): + elif is_extension_dtype(dtype): return Index(np.asarray(self), dtype=dtype, copy=copy) try: diff --git a/pandas/core/series.py b/pandas/core/series.py index 3571e908fc6a7..2deb0b4d816ec 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -22,6 +22,7 @@ is_float_dtype, is_extension_type, is_extension_array_dtype, + is_extension_dtype, is_datetimelike, is_datetime64tz_dtype, is_timedelta64_dtype, @@ -4088,7 +4089,7 @@ def _try_cast(arr, take_fast_path): # that Categorical is the only array type for 'category'. subarr = Categorical(arr, dtype.categories, ordered=dtype.ordered) - elif is_extension_array_dtype(dtype): + elif is_extension_dtype(dtype): # create an extension array from its dtype array_type = dtype.construct_array_type() subarr = array_type(subarr, dtype=dtype, copy=copy) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 69de0e1900831..76300ce8d6532 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -2,8 +2,10 @@ import pandas as pd from pandas.compat import StringIO -from pandas.core.dtypes.common import is_extension_array_dtype -from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.api.types import ( + is_extension_array_dtype, is_extension_dtype +) +from pandas.api.extensions import ExtensionDtype from .base import BaseExtensionTests @@ -58,10 +60,12 @@ def test_dtype_name_in_info(self, data): def test_is_extension_array_dtype(self, data): assert is_extension_array_dtype(data) - assert is_extension_array_dtype(data.dtype) assert is_extension_array_dtype(pd.Series(data)) assert isinstance(data.dtype, ExtensionDtype) + def test_is_extension_dtype(self, data): + assert is_extension_dtype(data.dtype) + def test_no_values_attribute(self, data): # GH-20735: EA's with .values attribute give problems with internal # code, disallowing this for now until solved diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index b6223ea96d7dd..f70237acb64a8 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -4,7 +4,9 @@ import pandas as pd import pandas.util.testing as tm from pandas.core.arrays import ExtensionArray -from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.common import ( + is_extension_array_dtype, is_extension_dtype +) from pandas.core.dtypes import dtypes @@ -38,14 +40,20 @@ class TestExtensionArrayDtype(object): @pytest.mark.parametrize('values', [ pd.Categorical([]), - pd.Categorical([]).dtype, pd.Series(pd.Categorical([])), - DummyDtype(), + DummyArray(np.array([1, 2])), ]) def test_is_extension_array_dtype(self, values): assert is_extension_array_dtype(values) + @pytest.mark.parametrize('dtype', [ + pd.Categorical([]).dtype, + DummyDtype(), + ]) + def test_is_extension_dtype(self, dtype): + assert is_extension_dtype(dtype) + @pytest.mark.parametrize('values', [ np.array([]), pd.Series(np.array([])), @@ -91,4 +99,4 @@ def test_is_not_extension_array_dtype(dtype): ]) def test_is_extension_array_dtype(dtype): assert isinstance(dtype, dtypes.ExtensionDtype) - assert is_extension_array_dtype(dtype) + assert is_extension_dtype(dtype) From b908a2fa3e3be66b5687bb610d35fae9b634f27b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 23 Jul 2018 16:00:33 -0500 Subject: [PATCH 2/9] Added to API test --- pandas/tests/api/test_types.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index bd4891326c751..86ab8b769e0a8 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -30,7 +30,9 @@ class TestTypes(Base): 'is_dict_like', 'is_iterator', 'is_file_like', 'is_list_like', 'is_hashable', 'is_array_like', 'is_named_tuple', - 'pandas_dtype', 'union_categoricals', 'infer_dtype'] + 'is_extension_dtype', + 'is_extension_array_dtype', + 'pandas_dtype', 'union_categoricals', 'infer_dtype',] deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence'] dtypes = ['CategoricalDtype', 'DatetimeTZDtype', 'PeriodDtype', 'IntervalDtype'] From 34ff2cd45eda7e5e58ac435c784620f436b1c573 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 23 Jul 2018 16:03:56 -0500 Subject: [PATCH 3/9] Linting --- pandas/tests/api/test_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 86ab8b769e0a8..7a24963d816db 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -32,7 +32,7 @@ class TestTypes(Base): 'is_named_tuple', 'is_extension_dtype', 'is_extension_array_dtype', - 'pandas_dtype', 'union_categoricals', 'infer_dtype',] + 'pandas_dtype', 'union_categoricals', 'infer_dtype'] deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence'] dtypes = ['CategoricalDtype', 'DatetimeTZDtype', 'PeriodDtype', 'IntervalDtype'] From ec31716540bc7630cbb799abecf15fe46873f510 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 24 Jul 2018 08:46:35 -0500 Subject: [PATCH 4/9] Rename --- doc/source/api.rst | 2 +- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/algorithms.py | 10 +++++----- pandas/core/arrays/categorical.py | 4 ++-- pandas/core/base.py | 6 +++--- pandas/core/dtypes/api.py | 2 +- pandas/core/dtypes/base.py | 4 ++-- pandas/core/dtypes/common.py | 4 ++-- pandas/core/dtypes/concat.py | 4 ++-- pandas/core/dtypes/missing.py | 8 ++++---- pandas/core/frame.py | 6 +++--- pandas/core/indexes/base.py | 4 ++-- pandas/core/internals/__init__.py | 4 ++-- pandas/core/internals/blocks.py | 6 +++--- pandas/core/ops.py | 14 +++++++------- pandas/core/series.py | 6 +++--- pandas/tests/api/test_types.py | 2 +- pandas/tests/extension/base/interface.py | 10 +++++----- pandas/tests/extension/test_common.py | 12 ++++++------ pandas/util/testing.py | 6 +++--- 20 files changed, 58 insertions(+), 58 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 4ec603c4df1aa..9fb3514a11a28 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2501,7 +2501,7 @@ Dtype introspection api.types.is_datetime64_ns_dtype api.types.is_datetime64tz_dtype api.types.is_extension_type - api.types.is_extension_array_dtype + api.types.is_extension_array api.types.is_extension_dtype api.types.is_float_dtype api.types.is_int64_dtype diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 8327193a7e78d..99d2bc910c1a2 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -323,7 +323,7 @@ ExtensionType Changes - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`) - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) -- Added :func:`pandas.api.types.is_extension_array_dtype` for testing whether an array is an ExtensionArray and :func:`pandas.api.types.is_extension_dtype` for testing whether a dtype is an ExtensionDtype (:issue:`22021`) +- Added :func:`pandas.api.types.is_extension_array` for testing whether an array is an ExtensionArray and :func:`pandas.api.types.is_extension_dtype` for testing whether a dtype is an ExtensionDtype (:issue:`22021`) .. _whatsnew_0240.api.incompatibilities: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a5c3c6af33def..1efcb13637a3d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -18,7 +18,7 @@ is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, is_complex_dtype, is_object_dtype, - is_extension_array_dtype, + is_extension_array, is_extension_dtype, is_categorical_dtype, is_sparse, is_period_dtype, @@ -358,7 +358,7 @@ def unique(values): values = _ensure_arraylike(values) - if is_extension_array_dtype(values): + if is_extension_array(values): # Dispatch to extension dtype's unique. return values.unique() @@ -611,7 +611,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): values = _ensure_arraylike(values) original = values - if is_extension_array_dtype(values): + if is_extension_array(values): values = getattr(values, '_values', values) labels, uniques = values.factorize(na_sentinel=na_sentinel) dtype = original.dtype @@ -706,7 +706,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, else: - if is_extension_array_dtype(values) or is_sparse(values): + if is_extension_array(values) or is_sparse(values): # handle Categorical and sparse, result = Series(values)._values.value_counts(dropna=dropna) @@ -1592,7 +1592,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, # TODO(EA): Remove these if / elifs as datetimeTZ, interval, become EAs # dispatch to internal type takes - if is_extension_array_dtype(arr): + if is_extension_array(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) elif is_datetimetz(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0d73b2c60d76d..9f2df23ba1c82 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -20,7 +20,7 @@ ensure_int64, ensure_object, ensure_platform_int, - is_extension_array_dtype, + is_extension_array, is_dtype_equal, is_datetimelike, is_datetime64_dtype, @@ -1244,7 +1244,7 @@ def __array__(self, dtype=None): ret = take_1d(self.categories.values, self._codes) if dtype and not is_dtype_equal(dtype, self.categories.dtype): return np.asarray(ret, dtype) - if is_extension_array_dtype(ret): + if is_extension_array(ret): # When we're a Categorical[ExtensionArray], like Interval, # we need to ensure __array__ get's all the way to an # ndarray. diff --git a/pandas/core/base.py b/pandas/core/base.py index 1226662824eb5..7f2699ccff0fa 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -15,7 +15,7 @@ is_list_like, is_scalar, is_extension_type, - is_extension_array_dtype) + is_extension_array) from pandas.util._validators import validate_bool_kwarg from pandas.errors import AbstractMethodError @@ -749,7 +749,7 @@ def _ndarray_values(self): - categorical -> codes """ - if is_extension_array_dtype(self): + if is_extension_array(self): return self.values._ndarray_values return self.values @@ -857,7 +857,7 @@ def tolist(self): """ if is_datetimelike(self._values): return [com._maybe_box_datetimelike(x) for x in self._values] - elif is_extension_array_dtype(self._values): + elif is_extension_array(self._values): return list(self._values) else: return self._values.tolist() diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index 6b9952b8bbe1b..ed3cf2f524c85 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -5,7 +5,7 @@ from .common import (pandas_dtype, is_dtype_equal, is_extension_dtype, - is_extension_array_dtype, + is_extension_array, is_extension_type, # categorical diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 5f405e0d10657..2aeb8379954d2 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -6,9 +6,9 @@ class _DtypeOpsMixin(object): - # Not all of pandas' extension dtypes are compatibile with + # Not all of pandas' extension dtypes are compatible with # the new ExtensionArray interface. This means PandasExtensionDtype - # can't subclass ExtensionDtype yet, as is_extension_array_dtype would + # can't subclass ExtensionDtype yet, as is_extension_dtype would # incorrectly say that these types are extension types. # # In the interim, we put methods that are shared between the two base diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 8a04d488a8df3..54be5ba5e82fc 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1688,7 +1688,7 @@ def is_extension_type(arr): return False -def is_extension_array_dtype(arr): +def is_extension_array(arr): """Check if an array object is a pandas extension array type. Parameters @@ -1748,7 +1748,7 @@ def is_extension_dtype(dtype): See Also -------- - is_extension_array_dtype : Similar method for arrays. + is_extension_array : Similar method for arrays. """ return isinstance(dtype, ExtensionDtype) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 5768fd361c3db..273d3f044d09e 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -8,7 +8,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_sparse, - is_extension_array_dtype, + is_extension_array, is_datetimetz, is_datetime64_dtype, is_timedelta64_dtype, @@ -177,7 +177,7 @@ def is_nonempty(x): elif 'sparse' in typs: return _concat_sparse(to_concat, axis=axis, typs=typs) - extensions = [is_extension_array_dtype(x) for x in to_concat] + extensions = [is_extension_array(x) for x in to_concat] if any(extensions) and axis == 1: to_concat = [np.atleast_2d(x.astype('object')) for x in to_concat] diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 66998aa6866f6..0b3c142c76913 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -15,7 +15,7 @@ is_complex_dtype, is_string_like_dtype, is_bool_dtype, is_integer_dtype, is_dtype_equal, - is_extension_array_dtype, + is_extension_array, needs_i8_conversion, ensure_object, pandas_dtype, is_scalar, @@ -190,7 +190,7 @@ def _isna_ndarraylike(obj): values = getattr(obj, 'values', obj) dtype = values.dtype - if is_extension_array_dtype(obj): + if is_extension_array(obj): if isinstance(obj, (ABCIndexClass, ABCSeries)): values = obj._values else: @@ -502,7 +502,7 @@ def na_value_for_dtype(dtype, compat=True): """ dtype = pandas_dtype(dtype) - if is_extension_array_dtype(dtype): + if is_extension_array(dtype): return dtype.na_value if (is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype) or is_timedelta64_dtype(dtype) or is_period_dtype(dtype)): @@ -522,7 +522,7 @@ def remove_na_arraylike(arr): """ Return array-like containing only true/non-NaN values, possibly empty. """ - if is_extension_array_dtype(arr): + if is_extension_array(arr): return arr[notna(arr)] else: return arr[notna(lib.values_from_object(arr))] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 873170eb9813b..2aa63c284841b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -40,7 +40,7 @@ is_categorical_dtype, is_object_dtype, is_extension_type, - is_extension_array_dtype, + is_extension_array, is_datetimetz, is_datetime64_any_dtype, is_bool_dtype, @@ -517,7 +517,7 @@ def _get_axes(N, K, index=index, columns=columns): index, columns = _get_axes(len(values), 1) return _arrays_to_mgr([values], columns, index, columns, dtype=dtype) - elif (is_datetimetz(values) or is_extension_array_dtype(values)): + elif (is_datetimetz(values) or is_extension_array(values)): # GH19157 if columns is None: columns = [0] @@ -3507,7 +3507,7 @@ def reindexer(value): value = maybe_cast_to_datetime(value, value.dtype) # return internal types directly - if is_extension_type(value) or is_extension_array_dtype(value): + if is_extension_type(value) or is_extension_array(value): return value # broadcast across multiple columns if necessary diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b0145ed33693b..c09ea3c6b4dd7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -44,7 +44,7 @@ is_datetime64_any_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, - is_extension_array_dtype, + is_extension_array, is_extension_dtype, is_hashable, is_iterator, is_list_like, @@ -276,7 +276,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, closed=closed) # extension dtype - elif is_extension_array_dtype(data) or is_extension_dtype(dtype): + elif is_extension_array(data) or is_extension_dtype(dtype): data = np.asarray(data) if not (dtype is None or is_object_dtype(dtype)): diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index a4cd301806569..f5ae16993d529 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -23,7 +23,7 @@ is_datetimelike_v_numeric, is_float_dtype, is_numeric_dtype, is_numeric_v_string_like, is_extension_type, - is_extension_array_dtype, + is_extension_array, is_scalar, _get_dtype) from pandas.core.dtypes.cast import ( @@ -1071,7 +1071,7 @@ def set(self, item, value, check=False): # TODO(EA): Remove an is_extension_ when all extension types satisfy # the interface value_is_extension_type = (is_extension_type(value) or - is_extension_array_dtype(value)) + is_extension_array(value)) # categorical/spares/datetimetz if value_is_extension_type: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ffa2267dd6877..e8467ad88c13b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -32,7 +32,7 @@ is_object_dtype, is_float_dtype, is_numeric_v_string_like, is_extension_type, - is_extension_array_dtype, + is_extension_array, is_list_like, is_re, is_re_compilable, @@ -2416,7 +2416,7 @@ def should_store(self, value): # TODO(ExtensionArray): remove is_extension_type # when all extension arrays have been ported. is_extension_type(value) or - is_extension_array_dtype(value)) + is_extension_array(value)) def replace(self, to_replace, value, inplace=False, filter=None, regex=False, convert=True, mgr=None): @@ -3174,7 +3174,7 @@ def get_block_type(values, dtype=None): cls = ComplexBlock elif is_categorical(values): cls = CategoricalBlock - elif is_extension_array_dtype(values): + elif is_extension_array(values): cls = ExtensionBlock elif issubclass(vtype, np.datetime64): assert not is_datetimetz(values) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index a8c1b954a61b7..bb94ca68f0b46 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -33,7 +33,7 @@ is_bool_dtype, is_list_like, is_scalar, - is_extension_array_dtype, + is_extension_array, ensure_object) from pandas.core.dtypes.cast import ( maybe_upcast_putmask, find_common_type, @@ -1059,7 +1059,7 @@ def dispatch_to_extension_op(op, left, right): # we need to listify to avoid ndarray, or non-same-type extension array # dispatching - if is_extension_array_dtype(left): + if is_extension_array(left): new_left = left.values if isinstance(right, np.ndarray): @@ -1070,7 +1070,7 @@ def dispatch_to_extension_op(op, left, right): if is_scalar(new_right): new_right = [new_right] new_right = list(new_right) - elif is_extension_array_dtype(right) and type(left) != type(right): + elif is_extension_array(right) and type(left) != type(right): new_right = list(new_right) else: new_right = right @@ -1158,8 +1158,8 @@ def wrapper(left, right): raise TypeError("{typ} cannot perform the operation " "{op}".format(typ=type(left).__name__, op=str_rep)) - elif (is_extension_array_dtype(left) or - is_extension_array_dtype(right)): + elif (is_extension_array(left) or + is_extension_array(right)): return dispatch_to_extension_op(op, left, right) elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left): @@ -1354,8 +1354,8 @@ def wrapper(self, other, axis=None): return self._constructor(res_values, index=self.index, name=res_name) - elif (is_extension_array_dtype(self) or - (is_extension_array_dtype(other) and + elif (is_extension_array(self) or + (is_extension_array(other) and not is_scalar(other))): return dispatch_to_extension_op(op, self, other) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2deb0b4d816ec..2dcebc0045077 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -21,7 +21,7 @@ is_integer, is_integer_dtype, is_float_dtype, is_extension_type, - is_extension_array_dtype, + is_extension_array, is_extension_dtype, is_datetimelike, is_datetime64tz_dtype, @@ -237,7 +237,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, '`index` argument. `copy` must ' 'be False.') - elif is_extension_array_dtype(data): + elif is_extension_array(data): pass elif (isinstance(data, types.GeneratorType) or (compat.PY3 and isinstance(data, map))): @@ -2267,7 +2267,7 @@ def combine(self, other, func, fill_value=None): if is_categorical_dtype(self.values): pass - elif is_extension_array_dtype(self.values): + elif is_extension_array(self.values): # The function can return something of any type, so check # if the type is compatible with the calling EA try: diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 7a24963d816db..65de412137482 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -31,7 +31,7 @@ class TestTypes(Base): 'is_list_like', 'is_hashable', 'is_array_like', 'is_named_tuple', 'is_extension_dtype', - 'is_extension_array_dtype', + 'is_extension_array', 'pandas_dtype', 'union_categoricals', 'infer_dtype'] deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence'] dtypes = ['CategoricalDtype', 'DatetimeTZDtype', diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 76300ce8d6532..e13bf58702ec6 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -3,7 +3,7 @@ import pandas as pd from pandas.compat import StringIO from pandas.api.types import ( - is_extension_array_dtype, is_extension_dtype + is_extension_array, is_extension_dtype ) from pandas.api.extensions import ExtensionDtype @@ -58,13 +58,13 @@ def test_dtype_name_in_info(self, data): result = buf.getvalue() assert data.dtype.name in result - def test_is_extension_array_dtype(self, data): - assert is_extension_array_dtype(data) - assert is_extension_array_dtype(pd.Series(data)) - assert isinstance(data.dtype, ExtensionDtype) + def test_is_extension_array(self, data): + assert is_extension_array(data) + assert is_extension_array(pd.Series(data)) def test_is_extension_dtype(self, data): assert is_extension_dtype(data.dtype) + assert isinstance(data.dtype, ExtensionDtype) def test_no_values_attribute(self, data): # GH-20735: EA's with .values attribute give problems with internal diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index f70237acb64a8..20c4cf586c25c 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -5,7 +5,7 @@ import pandas.util.testing as tm from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.common import ( - is_extension_array_dtype, is_extension_dtype + is_extension_array, is_extension_dtype ) from pandas.core.dtypes import dtypes @@ -44,8 +44,8 @@ class TestExtensionArrayDtype(object): DummyArray(np.array([1, 2])), ]) - def test_is_extension_array_dtype(self, values): - assert is_extension_array_dtype(values) + def test_is_extension_array(self, values): + assert is_extension_array(values) @pytest.mark.parametrize('dtype', [ pd.Categorical([]).dtype, @@ -59,7 +59,7 @@ def test_is_extension_dtype(self, dtype): pd.Series(np.array([])), ]) def test_is_not_extension_array_dtype(self, values): - assert not is_extension_array_dtype(values) + assert not is_extension_array(values) def test_astype(): @@ -90,13 +90,13 @@ def test_astype_no_copy(): ]) def test_is_not_extension_array_dtype(dtype): assert not isinstance(dtype, dtypes.ExtensionDtype) - assert not is_extension_array_dtype(dtype) + assert not is_extension_array(dtype) @pytest.mark.parametrize('dtype', [ dtypes.CategoricalDtype(), dtypes.IntervalDtype(), ]) -def test_is_extension_array_dtype(dtype): +def test_is_extension_array(dtype): assert isinstance(dtype, dtypes.ExtensionDtype) assert is_extension_dtype(dtype) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 9697c991122dd..f5694fc9d867f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -30,7 +30,7 @@ is_interval_dtype, is_sequence, is_list_like, - is_extension_array_dtype) + is_extension_array) from pandas.io.formats.printing import pprint_thing from pandas.core.algorithms import take_1d import pandas.core.common as com @@ -1278,8 +1278,8 @@ def assert_series_equal(left, right, check_dtype=True, elif is_interval_dtype(left) or is_interval_dtype(right): assert_interval_array_equal(left.values, right.values) - elif (is_extension_array_dtype(left) and not is_categorical_dtype(left) and - is_extension_array_dtype(right) and not is_categorical_dtype(right)): + elif (is_extension_array(left) and not is_categorical_dtype(left) and + is_extension_array(right) and not is_categorical_dtype(right)): return assert_extension_array_equal(left.values, right.values) else: From 94318e19f7b7664008d563df5a2dec42d62e8bfe Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Jul 2018 06:00:21 -0500 Subject: [PATCH 5/9] Rename is_extension_array --- pandas/core/dtypes/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 54be5ba5e82fc..08cf73bad76f4 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1729,7 +1729,7 @@ def is_extension_dtype(dtype): Parameters ---------- - arr : object + dtype : dtype Returns ------- From e92d75fb6793e6a3b1022f09de17c4ae9081b283 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Jul 2018 07:46:56 -0500 Subject: [PATCH 6/9] Take 2 --- pandas/core/dtypes/common.py | 18 +++++------------- pandas/core/dtypes/dtypes.py | 5 ++--- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 355bf58540219..36d764e56e3d3 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -9,7 +9,7 @@ from pandas.core.dtypes.dtypes import ( registry, CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, IntervalDtype, - IntervalDtypeType, ExtensionDtype) + IntervalDtypeType, PandasExtensionDtype, ExtensionDtype) from pandas.core.dtypes.generic import ( ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass, @@ -1709,17 +1709,9 @@ def is_extension_array_dtype(arr_or_dtype): Third-party libraries may implement arrays or types satisfying this interface as well. """ - from pandas.core.arrays import ExtensionArray - - if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)): - arr_or_dtype = arr_or_dtype._values - - try: - arr_or_dtype = pandas_dtype(arr_or_dtype) - except TypeError: - pass - - return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray)) + dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) + return (isinstance(dtype, ExtensionDtype) or + registry.find(dtype) is not None) def is_complex_dtype(arr_or_dtype): @@ -2004,7 +1996,7 @@ def pandas_dtype(dtype): return result # un-registered extension types - elif isinstance(dtype, ExtensionDtype): + elif isinstance(dtype, (PandasExtensionDtype, ExtensionDtype)): return dtype # try a numpy dtype diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 57b1d81d94754..861ec6ab6be30 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -50,7 +50,7 @@ def find(self, dtype): dtype_type = dtype if not isinstance(dtype, type): dtype_type = type(dtype) - if issubclass(dtype_type, (PandasExtensionDtype, ExtensionDtype)): + if issubclass(dtype_type, ExtensionDtype): return dtype return None @@ -823,7 +823,6 @@ def is_dtype(cls, dtype): # register the dtypes in search order -registry.register(DatetimeTZDtype) -registry.register(PeriodDtype) registry.register(IntervalDtype) registry.register(CategoricalDtype) +# TODO(extension): Add DatetimeTZDtype and PeriodDtype From 2731c70c522ca6aa2168d932af05f69a0c90131a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Jul 2018 07:51:08 -0500 Subject: [PATCH 7/9] revert --- doc/source/api.rst | 2 - doc/source/whatsnew/v0.24.0.txt | 3 +- pandas/core/algorithms.py | 13 +++--- pandas/core/arrays/categorical.py | 4 +- pandas/core/base.py | 6 +-- pandas/core/dtypes/api.py | 2 - pandas/core/dtypes/base.py | 4 +- pandas/core/dtypes/cast.py | 8 ++-- pandas/core/dtypes/common.py | 57 ++++++------------------ pandas/core/dtypes/concat.py | 4 +- pandas/core/dtypes/missing.py | 8 ++-- pandas/core/frame.py | 6 +-- pandas/core/indexes/base.py | 7 ++- pandas/core/internals/blocks.py | 6 +-- pandas/core/ops.py | 14 +++--- pandas/core/series.py | 9 ++-- pandas/tests/api/test_types.py | 2 - pandas/tests/extension/base/interface.py | 16 +++---- pandas/tests/extension/test_common.py | 26 ++++------- pandas/util/testing.py | 6 +-- 20 files changed, 75 insertions(+), 128 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 9fb3514a11a28..9056b1f47323a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2501,8 +2501,6 @@ Dtype introspection api.types.is_datetime64_ns_dtype api.types.is_datetime64tz_dtype api.types.is_extension_type - api.types.is_extension_array - api.types.is_extension_dtype api.types.is_float_dtype api.types.is_int64_dtype api.types.is_integer_dtype diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 41b00d202d9e5..1ac6d075946dd 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -323,8 +323,7 @@ ExtensionType Changes - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`) - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) -- Added :func:`pandas.api.types.is_extension_array` for testing whether an array is an ExtensionArray and :func:`pandas.api.types.is_extension_dtype` for testing whether a dtype is an ExtensionDtype (:issue:`22021`) - +- .. _whatsnew_0240.api.incompatibilities: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 766dacb540cf8..49705cb6d9ad2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -18,8 +18,7 @@ is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, is_complex_dtype, is_object_dtype, - is_extension_array, - is_extension_dtype, + is_extension_array_dtype, is_categorical_dtype, is_sparse, is_period_dtype, is_numeric_dtype, is_float_dtype, @@ -154,7 +153,7 @@ def _reconstruct_data(values, dtype, original): Index for extension types, otherwise ndarray casted to dtype """ from pandas import Index - if is_extension_dtype(dtype): + if is_extension_array_dtype(dtype): values = dtype.construct_array_type()._from_sequence(values) elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype): values = Index(original)._shallow_copy(values, name=None) @@ -358,7 +357,7 @@ def unique(values): values = _ensure_arraylike(values) - if is_extension_array(values): + if is_extension_array_dtype(values): # Dispatch to extension dtype's unique. return values.unique() @@ -611,7 +610,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): values = _ensure_arraylike(values) original = values - if is_extension_array(values): + if is_extension_array_dtype(values): values = getattr(values, '_values', values) labels, uniques = values.factorize(na_sentinel=na_sentinel) dtype = original.dtype @@ -706,7 +705,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, else: - if is_extension_array(values) or is_sparse(values): + if is_extension_array_dtype(values) or is_sparse(values): # handle Categorical and sparse, result = Series(values)._values.value_counts(dropna=dropna) @@ -1592,7 +1591,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, # TODO(EA): Remove these if / elifs as datetimeTZ, interval, become EAs # dispatch to internal type takes - if is_extension_array(arr): + if is_extension_array_dtype(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) elif is_datetimetz(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c2f7d3577f653..4584e4694cdc5 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -20,7 +20,7 @@ ensure_int64, ensure_object, ensure_platform_int, - is_extension_array, + is_extension_array_dtype, is_dtype_equal, is_datetimelike, is_datetime64_dtype, @@ -1245,7 +1245,7 @@ def __array__(self, dtype=None): ret = take_1d(self.categories.values, self._codes) if dtype and not is_dtype_equal(dtype, self.categories.dtype): return np.asarray(ret, dtype) - if is_extension_array(ret): + if is_extension_array_dtype(ret): # When we're a Categorical[ExtensionArray], like Interval, # we need to ensure __array__ get's all the way to an # ndarray. diff --git a/pandas/core/base.py b/pandas/core/base.py index 8836e2c0045e9..5382315bad32b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -15,7 +15,7 @@ is_list_like, is_scalar, is_extension_type, - is_extension_array) + is_extension_array_dtype) from pandas.util._validators import validate_bool_kwarg from pandas.errors import AbstractMethodError @@ -749,7 +749,7 @@ def _ndarray_values(self): - categorical -> codes """ - if is_extension_array(self): + if is_extension_array_dtype(self): return self.values._ndarray_values return self.values @@ -857,7 +857,7 @@ def tolist(self): """ if is_datetimelike(self._values): return [com.maybe_box_datetimelike(x) for x in self._values] - elif is_extension_array(self._values): + elif is_extension_array_dtype(self._values): return list(self._values) else: return self._values.tolist() diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index ed3cf2f524c85..738e1ea9062f6 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -4,8 +4,6 @@ from .common import (pandas_dtype, is_dtype_equal, - is_extension_dtype, - is_extension_array, is_extension_type, # categorical diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 2aeb8379954d2..5f405e0d10657 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -6,9 +6,9 @@ class _DtypeOpsMixin(object): - # Not all of pandas' extension dtypes are compatible with + # Not all of pandas' extension dtypes are compatibile with # the new ExtensionArray interface. This means PandasExtensionDtype - # can't subclass ExtensionDtype yet, as is_extension_dtype would + # can't subclass ExtensionDtype yet, as is_extension_array_dtype would # incorrectly say that these types are extension types. # # In the interim, we put methods that are shared between the two base diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 71205daa1168e..ead7b39309f5e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -12,7 +12,7 @@ is_complex, is_datetimetz, is_categorical_dtype, is_datetimelike, is_extension_type, - is_extension_dtype, + is_extension_array_dtype, is_object_dtype, is_datetime64tz_dtype, is_datetime64_dtype, is_datetime64_ns_dtype, @@ -294,7 +294,7 @@ def maybe_promote(dtype, fill_value=np.nan): elif is_datetimetz(dtype): if isna(fill_value): fill_value = iNaT - elif is_extension_dtype(dtype) and isna(fill_value): + elif is_extension_array_dtype(dtype) and isna(fill_value): fill_value = dtype.na_value elif is_float(fill_value): if issubclass(dtype.type, np.bool_): @@ -332,7 +332,7 @@ def maybe_promote(dtype, fill_value=np.nan): dtype = np.object_ # in case we have a string that looked like a number - if is_extension_dtype(dtype): + if is_extension_array_dtype(dtype): pass elif is_datetimetz(dtype): pass @@ -650,7 +650,7 @@ def astype_nansafe(arr, dtype, copy=True): need to be very careful as the result shape could change! """ # dispatch on extension dtype if needed - if is_extension_dtype(dtype): + if is_extension_array_dtype(dtype): return dtype.construct_array_type()._from_sequence( arr, dtype=dtype, copy=copy) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 08cf73bad76f4..355bf58540219 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1688,12 +1688,12 @@ def is_extension_type(arr): return False -def is_extension_array(arr): - """Check if an array object is a pandas extension array type. +def is_extension_array_dtype(arr_or_dtype): + """Check if an object is a pandas extension array type. Parameters ---------- - arr : object + arr_or_dtype : object Returns ------- @@ -1701,56 +1701,25 @@ def is_extension_array(arr): Notes ----- - This checks whether an array object implements the pandas extension + This checks whether an object implements the pandas extension array interface. In pandas, this includes: * Categorical - * Interval - Third-party libraries may implement arrays satisfying + Third-party libraries may implement arrays or types satisfying this interface as well. - - See Also - -------- - is_extension_dtype : Similar method for dtypes. """ - from pandas.core.dtypes.base import ExtensionDtype - - try: - dtype = getattr(arr, 'dtype') - except AttributeError: - return False - - return isinstance(dtype, ExtensionDtype) + from pandas.core.arrays import ExtensionArray + if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)): + arr_or_dtype = arr_or_dtype._values -def is_extension_dtype(dtype): - """Check if a dtype object is a pandas extension dtype. - - Parameters - ---------- - dtype : dtype - - Returns - ------- - bool - - Notes - ----- - This checks whether a dtype object implements the pandas extension - array interface. In pandas, this includes: - - * CategoricalDtype - * IntervalDtype - - Third-party libraries may implement dtypes satisfying - this interface as well. + try: + arr_or_dtype = pandas_dtype(arr_or_dtype) + except TypeError: + pass - See Also - -------- - is_extension_array : Similar method for arrays. - """ - return isinstance(dtype, ExtensionDtype) + return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray)) def is_complex_dtype(arr_or_dtype): diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 273d3f044d09e..5768fd361c3db 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -8,7 +8,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_sparse, - is_extension_array, + is_extension_array_dtype, is_datetimetz, is_datetime64_dtype, is_timedelta64_dtype, @@ -177,7 +177,7 @@ def is_nonempty(x): elif 'sparse' in typs: return _concat_sparse(to_concat, axis=axis, typs=typs) - extensions = [is_extension_array(x) for x in to_concat] + extensions = [is_extension_array_dtype(x) for x in to_concat] if any(extensions) and axis == 1: to_concat = [np.atleast_2d(x.astype('object')) for x in to_concat] diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 0b3c142c76913..66998aa6866f6 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -15,7 +15,7 @@ is_complex_dtype, is_string_like_dtype, is_bool_dtype, is_integer_dtype, is_dtype_equal, - is_extension_array, + is_extension_array_dtype, needs_i8_conversion, ensure_object, pandas_dtype, is_scalar, @@ -190,7 +190,7 @@ def _isna_ndarraylike(obj): values = getattr(obj, 'values', obj) dtype = values.dtype - if is_extension_array(obj): + if is_extension_array_dtype(obj): if isinstance(obj, (ABCIndexClass, ABCSeries)): values = obj._values else: @@ -502,7 +502,7 @@ def na_value_for_dtype(dtype, compat=True): """ dtype = pandas_dtype(dtype) - if is_extension_array(dtype): + if is_extension_array_dtype(dtype): return dtype.na_value if (is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype) or is_timedelta64_dtype(dtype) or is_period_dtype(dtype)): @@ -522,7 +522,7 @@ def remove_na_arraylike(arr): """ Return array-like containing only true/non-NaN values, possibly empty. """ - if is_extension_array(arr): + if is_extension_array_dtype(arr): return arr[notna(arr)] else: return arr[notna(lib.values_from_object(arr))] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7b7fe920cd2eb..078e176ff2b99 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -40,7 +40,7 @@ is_categorical_dtype, is_object_dtype, is_extension_type, - is_extension_array, + is_extension_array_dtype, is_datetimetz, is_datetime64_any_dtype, is_bool_dtype, @@ -517,7 +517,7 @@ def _get_axes(N, K, index=index, columns=columns): index, columns = _get_axes(len(values), 1) return _arrays_to_mgr([values], columns, index, columns, dtype=dtype) - elif (is_datetimetz(values) or is_extension_array(values)): + elif (is_datetimetz(values) or is_extension_array_dtype(values)): # GH19157 if columns is None: columns = [0] @@ -3507,7 +3507,7 @@ def reindexer(value): value = maybe_cast_to_datetime(value, value.dtype) # return internal types directly - if is_extension_type(value) or is_extension_array(value): + if is_extension_type(value) or is_extension_array_dtype(value): return value # broadcast across multiple columns if necessary diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 44bf9236001c3..20926ea5163af 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -44,8 +44,7 @@ is_datetime64_any_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, - is_extension_array, - is_extension_dtype, + is_extension_array_dtype, is_hashable, is_iterator, is_list_like, is_scalar) @@ -276,7 +275,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, closed=closed) # extension dtype - elif is_extension_array(data) or is_extension_dtype(dtype): + elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): data = np.asarray(data) if not (dtype is None or is_object_dtype(dtype)): @@ -1192,7 +1191,7 @@ def astype(self, dtype, copy=True): return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) - elif is_extension_dtype(dtype): + elif is_extension_array_dtype(dtype): return Index(np.asarray(self), dtype=dtype, copy=copy) try: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e8467ad88c13b..ffa2267dd6877 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -32,7 +32,7 @@ is_object_dtype, is_float_dtype, is_numeric_v_string_like, is_extension_type, - is_extension_array, + is_extension_array_dtype, is_list_like, is_re, is_re_compilable, @@ -2416,7 +2416,7 @@ def should_store(self, value): # TODO(ExtensionArray): remove is_extension_type # when all extension arrays have been ported. is_extension_type(value) or - is_extension_array(value)) + is_extension_array_dtype(value)) def replace(self, to_replace, value, inplace=False, filter=None, regex=False, convert=True, mgr=None): @@ -3174,7 +3174,7 @@ def get_block_type(values, dtype=None): cls = ComplexBlock elif is_categorical(values): cls = CategoricalBlock - elif is_extension_array(values): + elif is_extension_array_dtype(values): cls = ExtensionBlock elif issubclass(vtype, np.datetime64): assert not is_datetimetz(values) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 30acd2f4e91af..c65d2dcdc478c 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -33,7 +33,7 @@ is_bool_dtype, is_list_like, is_scalar, - is_extension_array, + is_extension_array_dtype, ensure_object) from pandas.core.dtypes.cast import ( maybe_upcast_putmask, find_common_type, @@ -1059,7 +1059,7 @@ def dispatch_to_extension_op(op, left, right): # we need to listify to avoid ndarray, or non-same-type extension array # dispatching - if is_extension_array(left): + if is_extension_array_dtype(left): new_left = left.values if isinstance(right, np.ndarray): @@ -1070,7 +1070,7 @@ def dispatch_to_extension_op(op, left, right): if is_scalar(new_right): new_right = [new_right] new_right = list(new_right) - elif is_extension_array(right) and type(left) != type(right): + elif is_extension_array_dtype(right) and type(left) != type(right): new_right = list(new_right) else: new_right = right @@ -1158,8 +1158,8 @@ def wrapper(left, right): raise TypeError("{typ} cannot perform the operation " "{op}".format(typ=type(left).__name__, op=str_rep)) - elif (is_extension_array(left) or - is_extension_array(right)): + elif (is_extension_array_dtype(left) or + is_extension_array_dtype(right)): return dispatch_to_extension_op(op, left, right) elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left): @@ -1354,8 +1354,8 @@ def wrapper(self, other, axis=None): return self._constructor(res_values, index=self.index, name=res_name) - elif (is_extension_array(self) or - (is_extension_array(other) and + elif (is_extension_array_dtype(self) or + (is_extension_array_dtype(other) and not is_scalar(other))): return dispatch_to_extension_op(op, self, other) diff --git a/pandas/core/series.py b/pandas/core/series.py index 5089aa367a9ac..d4c11b19082ab 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -21,8 +21,7 @@ is_integer, is_integer_dtype, is_float_dtype, is_extension_type, - is_extension_array, - is_extension_dtype, + is_extension_array_dtype, is_datetimelike, is_datetime64tz_dtype, is_timedelta64_dtype, @@ -237,7 +236,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, '`index` argument. `copy` must ' 'be False.') - elif is_extension_array(data): + elif is_extension_array_dtype(data): pass elif (isinstance(data, types.GeneratorType) or (compat.PY3 and isinstance(data, map))): @@ -2272,7 +2271,7 @@ def combine(self, other, func, fill_value=None): if is_categorical_dtype(self.values): pass - elif is_extension_array(self.values): + elif is_extension_array_dtype(self.values): # The function can return something of any type, so check # if the type is compatible with the calling EA try: @@ -4094,7 +4093,7 @@ def _try_cast(arr, take_fast_path): # that Categorical is the only array type for 'category'. subarr = Categorical(arr, dtype.categories, ordered=dtype.ordered) - elif is_extension_dtype(dtype): + elif is_extension_array_dtype(dtype): # create an extension array from its dtype array_type = dtype.construct_array_type() subarr = array_type(subarr, dtype=dtype, copy=copy) diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 65de412137482..bd4891326c751 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -30,8 +30,6 @@ class TestTypes(Base): 'is_dict_like', 'is_iterator', 'is_file_like', 'is_list_like', 'is_hashable', 'is_array_like', 'is_named_tuple', - 'is_extension_dtype', - 'is_extension_array', 'pandas_dtype', 'union_categoricals', 'infer_dtype'] deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence'] dtypes = ['CategoricalDtype', 'DatetimeTZDtype', diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index e13bf58702ec6..69de0e1900831 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -2,10 +2,8 @@ import pandas as pd from pandas.compat import StringIO -from pandas.api.types import ( - is_extension_array, is_extension_dtype -) -from pandas.api.extensions import ExtensionDtype +from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.dtypes import ExtensionDtype from .base import BaseExtensionTests @@ -58,12 +56,10 @@ def test_dtype_name_in_info(self, data): result = buf.getvalue() assert data.dtype.name in result - def test_is_extension_array(self, data): - assert is_extension_array(data) - assert is_extension_array(pd.Series(data)) - - def test_is_extension_dtype(self, data): - assert is_extension_dtype(data.dtype) + def test_is_extension_array_dtype(self, data): + assert is_extension_array_dtype(data) + assert is_extension_array_dtype(data.dtype) + assert is_extension_array_dtype(pd.Series(data)) assert isinstance(data.dtype, ExtensionDtype) def test_no_values_attribute(self, data): diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 20c4cf586c25c..b6223ea96d7dd 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -4,9 +4,7 @@ import pandas as pd import pandas.util.testing as tm from pandas.core.arrays import ExtensionArray -from pandas.core.dtypes.common import ( - is_extension_array, is_extension_dtype -) +from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes import dtypes @@ -40,26 +38,20 @@ class TestExtensionArrayDtype(object): @pytest.mark.parametrize('values', [ pd.Categorical([]), - pd.Series(pd.Categorical([])), - - DummyArray(np.array([1, 2])), - ]) - def test_is_extension_array(self, values): - assert is_extension_array(values) - - @pytest.mark.parametrize('dtype', [ pd.Categorical([]).dtype, + pd.Series(pd.Categorical([])), DummyDtype(), + DummyArray(np.array([1, 2])), ]) - def test_is_extension_dtype(self, dtype): - assert is_extension_dtype(dtype) + def test_is_extension_array_dtype(self, values): + assert is_extension_array_dtype(values) @pytest.mark.parametrize('values', [ np.array([]), pd.Series(np.array([])), ]) def test_is_not_extension_array_dtype(self, values): - assert not is_extension_array(values) + assert not is_extension_array_dtype(values) def test_astype(): @@ -90,13 +82,13 @@ def test_astype_no_copy(): ]) def test_is_not_extension_array_dtype(dtype): assert not isinstance(dtype, dtypes.ExtensionDtype) - assert not is_extension_array(dtype) + assert not is_extension_array_dtype(dtype) @pytest.mark.parametrize('dtype', [ dtypes.CategoricalDtype(), dtypes.IntervalDtype(), ]) -def test_is_extension_array(dtype): +def test_is_extension_array_dtype(dtype): assert isinstance(dtype, dtypes.ExtensionDtype) - assert is_extension_dtype(dtype) + assert is_extension_array_dtype(dtype) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index f5694fc9d867f..9697c991122dd 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -30,7 +30,7 @@ is_interval_dtype, is_sequence, is_list_like, - is_extension_array) + is_extension_array_dtype) from pandas.io.formats.printing import pprint_thing from pandas.core.algorithms import take_1d import pandas.core.common as com @@ -1278,8 +1278,8 @@ def assert_series_equal(left, right, check_dtype=True, elif is_interval_dtype(left) or is_interval_dtype(right): assert_interval_array_equal(left.values, right.values) - elif (is_extension_array(left) and not is_categorical_dtype(left) and - is_extension_array(right) and not is_categorical_dtype(right)): + elif (is_extension_array_dtype(left) and not is_categorical_dtype(left) and + is_extension_array_dtype(right) and not is_categorical_dtype(right)): return assert_extension_array_equal(left.values, right.values) else: From bc24cd741a082e91f973cc9874938c71ae87ca4a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Jul 2018 08:04:47 -0500 Subject: [PATCH 8/9] Split registries --- pandas/core/dtypes/common.py | 5 +++-- pandas/core/dtypes/dtypes.py | 10 +++++++--- pandas/tests/dtypes/test_dtypes.py | 23 ++++++++++++++++------- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 36d764e56e3d3..6f03102899eef 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -9,7 +9,8 @@ from pandas.core.dtypes.dtypes import ( registry, CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, IntervalDtype, - IntervalDtypeType, PandasExtensionDtype, ExtensionDtype) + IntervalDtypeType, PandasExtensionDtype, ExtensionDtype, + _pandas_registry) from pandas.core.dtypes.generic import ( ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass, @@ -1991,7 +1992,7 @@ def pandas_dtype(dtype): return dtype # registered extension types - result = registry.find(dtype) + result = _pandas_registry.find(dtype) or registry.find(dtype) if result is not None: return result diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 861ec6ab6be30..939c098b75cd6 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -22,9 +22,9 @@ class Registry(object): -------- registry.register(MyExtensionDtype) """ - dtypes = [] + def __init__(self): + self.dtypes = [] - @classmethod def register(self, dtype): """ Parameters @@ -65,6 +65,9 @@ def find(self, dtype): registry = Registry() +# TODO(Extension): remove the second registry once all internal extension +# dtypes are real extension dtypes. +_pandas_registry = Registry() class PandasExtensionDtype(_DtypeOpsMixin): @@ -825,4 +828,5 @@ def is_dtype(cls, dtype): # register the dtypes in search order registry.register(IntervalDtype) registry.register(CategoricalDtype) -# TODO(extension): Add DatetimeTZDtype and PeriodDtype +_pandas_registry.register(DatetimeTZDtype) +_pandas_registry.register(PeriodDtype) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 02ac7fc7d5ed7..8df21645a22f9 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -9,7 +9,7 @@ from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, PeriodDtype, - IntervalDtype, CategoricalDtype, registry) + IntervalDtype, CategoricalDtype, registry, _pandas_registry) from pandas.core.dtypes.common import ( is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, @@ -775,11 +775,15 @@ def test_update_dtype_errors(self, bad_dtype): @pytest.mark.parametrize( 'dtype', - [DatetimeTZDtype, CategoricalDtype, - PeriodDtype, IntervalDtype]) + [CategoricalDtype, IntervalDtype]) def test_registry(dtype): assert dtype in registry.dtypes +@pytest.mark.parametrize('dtype', [DatetimeTZDtype, PeriodDtype]) +def test_pandas_registry(dtype): + assert dtype not in registry.dtypes + assert dtype in _pandas_registry.dtypes + @pytest.mark.parametrize( 'dtype, expected', @@ -787,9 +791,14 @@ def test_registry(dtype): ('interval', IntervalDtype()), ('interval[int64]', IntervalDtype()), ('interval[datetime64[ns]]', IntervalDtype('datetime64[ns]')), - ('category', CategoricalDtype()), - ('period[D]', PeriodDtype('D')), - ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern'))]) + ('category', CategoricalDtype())]) def test_registry_find(dtype, expected): - assert registry.find(dtype) == expected + + +@pytest.mark.parametrize( + 'dtype, expected', + [('period[D]', PeriodDtype('D')), + ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern'))]) +def test_pandas_registry_find(dtype, expected): + assert _pandas_registry.find(dtype) == expected From 08d1c40dcdc2a058a88bbbe66207dd512d047e66 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Jul 2018 08:12:51 -0500 Subject: [PATCH 9/9] lint --- pandas/tests/dtypes/test_dtypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 8df21645a22f9..55c841ba1fc46 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -779,6 +779,7 @@ def test_update_dtype_errors(self, bad_dtype): def test_registry(dtype): assert dtype in registry.dtypes + @pytest.mark.parametrize('dtype', [DatetimeTZDtype, PeriodDtype]) def test_pandas_registry(dtype): assert dtype not in registry.dtypes