diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index c7286616672b9..b74713e7102dc 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -135,6 +135,110 @@ by some other storage type, like Python lists. See the `extension array source`_ for the interface definition. The docstrings and comments contain guidance for properly implementing the interface. +:class:`~pandas.api.extensions.NDArrayBackedExtensionArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For ExtensionArrays backed by a single NumPy array, the +:class:`~pandas.api.extensions.NDArrayBackedExtensionArray` class can save you +some effort. It contains a private property ``_ndarray`` with the backing NumPy +array and implements the extension array interface. + +Implement the following: + +``_box_func`` + Convert from array values to the type you wish to expose to users. + +``_internal_fill_value`` + Scalar used to denote ``NA`` value inside our ``self._ndarray``, e.g. ``-1`` + for ``Categorical``, ``iNaT`` for ``Period``. + +``_validate_scalar`` + Convert from an object to a value which can be stored in the NumPy array. + +``_validate_setitem_value`` + Convert a value or values for use in setting a value or values in the backing + NumPy array. + +``_validate_searchsorted_value`` + Convert a value for use in searching for a value in the backing NumPy array. + Note: in most cases, the implementation can be identical to that of + ``_validate_setitem_value``. + +.. code-block:: python + + class DateArray(NDArrayBackedExtensionArray): + _internal_fill_value = numpy.datetime64("NaT") + + def __init__(self, values): + backing_array_dtype = " ArrayLike: # We handle datetime64, datetime64tz, timedelta64, and period # dtypes here. Everything else we pass through to the underlying @@ -152,6 +153,7 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" return arr.view(dtype=dtype) # type: ignore[arg-type] + @doc(ExtensionArray.view) def take( self: NDArrayBackedExtensionArrayT, indices: TakeIndexer, @@ -388,8 +390,9 @@ def insert( self: NDArrayBackedExtensionArrayT, loc: int, item ) -> NDArrayBackedExtensionArrayT: """ - Make new ExtensionArray inserting new item at location. Follows - Python list.append semantics for negative values. + Make new ExtensionArray inserting new item at location. + + Follows Python list.append semantics for negative values. Parameters ---------- diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 995b1668046d2..c5d5ac979cc4e 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -5,6 +5,7 @@ import pandas as pd from pandas import api import pandas._testing as tm +from pandas.api import extensions class Base: @@ -241,6 +242,33 @@ def test_api(self): self.check(api, self.allowed) +class TestExtensions(Base): + # top-level classes + classes = [ + "ExtensionDtype", + "ExtensionArray", + "ExtensionScalarOpsMixin", + "NDArrayBackedExtensionArray", + ] + + # top-level functions + funcs = [ + "register_extension_dtype", + "register_dataframe_accessor", + "register_index_accessor", + "register_series_accessor", + "take", + ] + + # misc + misc = ["no_default"] + + def test_api(self): + checkthese = self.classes + self.funcs + self.misc + + self.check(namespace=extensions, expected=checkthese) + + class TestTesting(Base): funcs = [ "assert_frame_equal",