From aa746425a4db9dd1d1c8a7aaceab1bdc31eb91cf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 25 Jul 2022 15:01:42 -0700 Subject: [PATCH 01/10] Start arrow docs --- doc/source/reference/arrays.rst | 55 +++++++++++++++----- pandas/core/arrays/arrow/array.py | 21 +++++++- pandas/core/arrays/arrow/dtype.py | 31 +++++++++-- scripts/validate_rst_title_capitalization.py | 1 + 4 files changed, 90 insertions(+), 18 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index cd0ce581519a8..8ec317a2bdca1 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -19,19 +19,20 @@ objects contained with a :class:`Index`, :class:`Series`, or For some data types, pandas extends NumPy's type system. String aliases for these types can be found at :ref:`basics.dtypes`. -=================== ========================= ================== ============================= -Kind of Data pandas Data Type Scalar Array -=================== ========================= ================== ============================= -TZ-aware datetime :class:`DatetimeTZDtype` :class:`Timestamp` :ref:`api.arrays.datetime` -Timedeltas (none) :class:`Timedelta` :ref:`api.arrays.timedelta` -Period (time spans) :class:`PeriodDtype` :class:`Period` :ref:`api.arrays.period` -Intervals :class:`IntervalDtype` :class:`Interval` :ref:`api.arrays.interval` -Nullable Integer :class:`Int64Dtype`, ... (none) :ref:`api.arrays.integer_na` -Categorical :class:`CategoricalDtype` (none) :ref:`api.arrays.categorical` -Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse` -Strings :class:`StringDtype` :class:`str` :ref:`api.arrays.string` -Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool` -=================== ========================= ================== ============================= +=================== ========================= ============================= ============================= +Kind of Data pandas Data Type Scalar Array +=================== ========================= ============================= ============================= +PyArrow :class:`ArrowDtype` Python Scalars or :class:`NA` :ref:`api.arrays.arrow` +TZ-aware datetime :class:`DatetimeTZDtype` :class:`Timestamp` :ref:`api.arrays.datetime` +Timedeltas (none) :class:`Timedelta` :ref:`api.arrays.timedelta` +Period (time spans) :class:`PeriodDtype` :class:`Period` :ref:`api.arrays.period` +Intervals :class:`IntervalDtype` :class:`Interval` :ref:`api.arrays.interval` +Nullable Integer :class:`Int64Dtype`, ... (none) :ref:`api.arrays.integer_na` +Categorical :class:`CategoricalDtype` (none) :ref:`api.arrays.categorical` +Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse` +Strings :class:`StringDtype` :class:`str` :ref:`api.arrays.string` +Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool` +=================== ========================= ============================= ============================= pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`). The top-level :meth:`array` method can be used to create a new array, which may be @@ -42,6 +43,34 @@ stored in a :class:`Series`, :class:`Index`, or as a column in a :class:`DataFra array +.. _api.arrays.arrow: + +PyArrow +------- + +The :class:`arrays.ArrowExtensionArray` is backed by a :external+pyarrow:py:class:`pyarrow.ChunkedArray` with a +:external+pyarrow:py:class:`pyarrow.DataType` instead of a NumPy array and data type. The ``.dtype`` of a :class:`arrays.ArrowExtensionArray` +is an :class:`ArrowDtype`. + +`Pyarrow `__ provides similar array and `data type `__ +support as NumPy including first-class nullability support for all data types, immutability and more. + +While individual values in an :class:`arrays.ArrowExtensionArray` are stored as a PyArrow objects, scalars are **returned** +as Python scalars corresponding to the data type, e.g. a PyArrow int64 will be returned as Python int, or :class:`NA` for missing +values. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.ArrowExtensionArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + ArrowDtype + .. _api.arrays.datetime: Datetimes diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index a882d3a955469..6d910127ca2aa 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -155,8 +155,25 @@ def to_pyarrow_type( class ArrowExtensionArray(OpsMixin, ExtensionArray): """ - Base class for ExtensionArray backed by Arrow ChunkedArray. - """ + Pandas ExtensionArray backed by a PyArrow ChunkedArray. + + Parameters + ---------- + values: pyarrow.Array or pyarrow.ChunkedArray + + Returns + ------- + ArrowExtensionArray + + Notes + ----- + Most methods are implemented using `pyarrow compute functions. `__ + Some methods may either raise an exception or raise a ``PerformanceWarning`` if an + associated compute function is not available based on the installed version of PyArrow. + + Please install the latest version of PyArrow to enable this functionality and avoid + potential bugs in prior versions of PyArrow. + """ # noqa: E501 _data: pa.ChunkedArray diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index 4a32663a68ed2..d36f9bf88858e 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -17,9 +17,34 @@ @register_extension_dtype class ArrowDtype(StorageExtensionDtype): """ - Base class for dtypes for ArrowExtensionArray. - Modeled after BaseMaskedDtype - """ + An ExtensionDtype for PyArrow data types. + + While most ``dtype`` arguments can accept the "string" + constructor, e.g. ``int64[pyarrow]``, ArrowDtype is useful + if the data type contains parameters like ``pyarrow.timestamp``. + + Parameters + ---------- + pyarrow_dtype : pa.DataType + An instance of a `pyarrow.DataType `__ + + Returns + ------- + ArrowDtype + + Example + ------- + >>> import pyarrow as pa + >>> pd.ArrowDtype(pa.int64()) + int64[pyarrow] + + Types with parameters must be constructed with ArrowDtype + + >>> pd.ArrowDtype(pa.timestamp("s", tz="America/New_York")) + timestamp[s, tz=America/New_York][pyarrow] + >>> pd.ArrowDtype(pa.list_(pa.int64())) + list[pyarrow] + """ # noqa: E501 _metadata = ("storage", "pyarrow_dtype") # type: ignore[assignment] diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index 9aca47dbddbf2..e7233484e16b6 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -150,6 +150,7 @@ "LZMA", "Numba", "Timestamp", + "PyArrow", } CAP_EXCEPTIONS_DICT = {word.lower(): word for word in CAPITALIZATION_EXCEPTIONS} From c207851dc1a76947a957ec92afc749dcb10da6df Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 25 Jul 2022 18:13:07 -0700 Subject: [PATCH 02/10] Add whatsnew example --- doc/source/whatsnew/v1.5.0.rst | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7f07187e34c78..68ca6e4b43c26 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -14,6 +14,37 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ +.. _whatsnew_150.enhancements.arrow: + +Native PyArrow-backed ExtensionArray +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +With `Pyarrow `__ installed, users can now create pandas objects +that are backed by a ``pyarrow.ChunkedArray`` and ``pyarrow.DataType``. + +The ``dtype`` argument can accept a string of a `pyarrow data type `__ +with ``pyarrow`` in brackets e.g. ``int64[pyarrow]`` or, for pyarrow data types that take parameters, a :class:`ArrowDtype` +initialized with a ``pyarrow.DataType`` + +.. ipython:: python + + import pyarrow as pa + ser_float = pd.Series([1.0, 2.0, None], dtype="float32[pyarrow]") + ser_float + + ser_list = pd.Series([[1, 2]], dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + ser_list + +Most operations are supported and have been implemented using `pyarrow compute `__ functions. +We recommend installing the latest version of PyArrow to access the most recently implemented compute functions. + +.. ipython:: python + + ser_list.take([0, 0]) + ser_float * 5 + ser_float.mean() + ser_float.dropna() + .. _whatsnew_150.enhancements.dataframe_exchange: DataFrame exchange protocol implementation From c63656e0f5f01599d67135c5fb12d477dcfeeb10 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 29 Jul 2022 11:46:32 -0700 Subject: [PATCH 03/10] Add note about strings --- doc/source/reference/arrays.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 8ec317a2bdca1..08ad38cdc1311 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -55,6 +55,12 @@ is an :class:`ArrowDtype`. `Pyarrow `__ provides similar array and `data type `__ support as NumPy including first-class nullability support for all data types, immutability and more. +.. note:: + + For string types (``pyarrow.string()``, ``string[pyarrow]``), PyArrow support is still facilitated + by :class:`arrays.ArrowStringArray` and ``StringDtype("pyarrow")``. See the :ref:`string section ` + below. + While individual values in an :class:`arrays.ArrowExtensionArray` are stored as a PyArrow objects, scalars are **returned** as Python scalars corresponding to the data type, e.g. a PyArrow int64 will be returned as Python int, or :class:`NA` for missing values. From a9e858119c1ea1db0c1516c84038768aec33e140 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 1 Aug 2022 10:31:56 -0700 Subject: [PATCH 04/10] Address review --- doc/source/whatsnew/v1.5.0.rst | 5 +++-- pandas/core/arrays/arrow/array.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 900557e1b285b..af777f32660bf 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -32,7 +32,8 @@ initialized with a ``pyarrow.DataType`` ser_float = pd.Series([1.0, 2.0, None], dtype="float32[pyarrow]") ser_float - ser_list = pd.Series([[1, 2]], dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + list_of_int_type = pd.ArrowDtype(pa.list_(pa.int64())) + ser_list = pd.Series([[1, 2], [3, None]], dtype=list_of_int_type) ser_list Most operations are supported and have been implemented using `pyarrow compute `__ functions. @@ -40,7 +41,7 @@ We recommend installing the latest version of PyArrow to access the most recentl .. ipython:: python - ser_list.take([0, 0]) + ser_list.take([1, 0]) ser_float * 5 ser_float.mean() ser_float.dropna() diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 3342cf816d5a9..85dff113c0d76 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -163,7 +163,7 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray): Parameters ---------- - values: pyarrow.Array or pyarrow.ChunkedArray + values : pyarrow.Array or pyarrow.ChunkedArray Returns ------- @@ -175,9 +175,9 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray): Some methods may either raise an exception or raise a ``PerformanceWarning`` if an associated compute function is not available based on the installed version of PyArrow. - Please install the latest version of PyArrow to enable this functionality and avoid + Please install the latest version of PyArrow to enable the best functionality and avoid potential bugs in prior versions of PyArrow. - """ # noqa: E501 + """ # noqa: E501 (http link too long) _data: pa.ChunkedArray _dtype: ArrowDtype From 02e3aa4c46dd5571298384b67d92d1f36f09d5a9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 10 Aug 2022 13:27:03 -0700 Subject: [PATCH 05/10] modify whatsnew --- doc/source/whatsnew/v1.5.0.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a15c4168e5f42..04690bfa2a5f9 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -34,7 +34,7 @@ that are backed by a ``pyarrow.ChunkedArray`` and ``pyarrow.DataType``. The ``dtype`` argument can accept a string of a `pyarrow data type `__ with ``pyarrow`` in brackets e.g. ``int64[pyarrow]`` or, for pyarrow data types that take parameters, a :class:`ArrowDtype` -initialized with a ``pyarrow.DataType`` +initialized with a ``pyarrow.DataType``. .. ipython:: python @@ -46,16 +46,14 @@ initialized with a ``pyarrow.DataType`` ser_list = pd.Series([[1, 2], [3, None]], dtype=list_of_int_type) ser_list -Most operations are supported and have been implemented using `pyarrow compute `__ functions. -We recommend installing the latest version of PyArrow to access the most recently implemented compute functions. - -.. ipython:: python - ser_list.take([1, 0]) ser_float * 5 ser_float.mean() ser_float.dropna() +Most operations are supported and have been implemented using `pyarrow compute `__ functions. +We recommend installing the latest version of PyArrow to access the most recently implemented compute functions. + .. _whatsnew_150.enhancements.dataframe_interchange: DataFrame interchange protocol implementation From 4e3b3b993f250231285f69d951bdf5f131f58f13 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 10 Aug 2022 13:34:34 -0700 Subject: [PATCH 06/10] Add experimental note --- doc/source/whatsnew/v1.5.0.rst | 2 ++ pandas/core/arrays/arrow/array.py | 5 +++++ pandas/core/arrays/arrow/dtype.py | 5 +++++ 3 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 04690bfa2a5f9..4abfbb3e30c49 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -54,6 +54,8 @@ initialized with a ``pyarrow.DataType``. Most operations are supported and have been implemented using `pyarrow compute `__ functions. We recommend installing the latest version of PyArrow to access the most recently implemented compute functions. +This feature is experimental and may change in a future release. + .. _whatsnew_150.enhancements.dataframe_interchange: DataFrame interchange protocol implementation diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 85dff113c0d76..8e971d32f2795 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -161,6 +161,11 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray): """ Pandas ExtensionArray backed by a PyArrow ChunkedArray. + .. warning:: + + ArrowExtensionArray is considered experimental. The implementation and + parts of the API may change without warning. + Parameters ---------- values : pyarrow.Array or pyarrow.ChunkedArray diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index d36f9bf88858e..1a843622dd075 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -19,6 +19,11 @@ class ArrowDtype(StorageExtensionDtype): """ An ExtensionDtype for PyArrow data types. + .. warning:: + + ArrowDtype is considered experimental. The implementation and + parts of the API may change without warning. + While most ``dtype`` arguments can accept the "string" constructor, e.g. ``int64[pyarrow]``, ArrowDtype is useful if the data type contains parameters like ``pyarrow.timestamp``. From 7d84a5e7b4ecbde401e8bf0ee22400fd0ef9a2a3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 15 Aug 2022 13:52:20 -0700 Subject: [PATCH 07/10] Address Joris' comments --- doc/source/reference/arrays.rst | 6 +++++- doc/source/whatsnew/v1.5.0.rst | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 08ad38cdc1311..e05be4d9a2776 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -22,7 +22,6 @@ can be found at :ref:`basics.dtypes`. =================== ========================= ============================= ============================= Kind of Data pandas Data Type Scalar Array =================== ========================= ============================= ============================= -PyArrow :class:`ArrowDtype` Python Scalars or :class:`NA` :ref:`api.arrays.arrow` TZ-aware datetime :class:`DatetimeTZDtype` :class:`Timestamp` :ref:`api.arrays.datetime` Timedeltas (none) :class:`Timedelta` :ref:`api.arrays.timedelta` Period (time spans) :class:`PeriodDtype` :class:`Period` :ref:`api.arrays.period` @@ -32,6 +31,7 @@ Categorical :class:`CategoricalDtype` (none) :ref Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse` Strings :class:`StringDtype` :class:`str` :ref:`api.arrays.string` Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool` +PyArrow :class:`ArrowDtype` Python Scalars or :class:`NA` :ref:`api.arrays.arrow` =================== ========================= ============================= ============================= pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`). @@ -48,6 +48,10 @@ stored in a :class:`Series`, :class:`Index`, or as a column in a :class:`DataFra PyArrow ------- +.. warning:: + + This feature is experimental, and the API can change in a future release without warning. + The :class:`arrays.ArrowExtensionArray` is backed by a :external+pyarrow:py:class:`pyarrow.ChunkedArray` with a :external+pyarrow:py:class:`pyarrow.DataType` instead of a NumPy array and data type. The ``.dtype`` of a :class:`arrays.ArrowExtensionArray` is an :class:`ArrowDtype`. diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 528b201d11e35..633affb79b435 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -54,7 +54,9 @@ initialized with a ``pyarrow.DataType``. Most operations are supported and have been implemented using `pyarrow compute `__ functions. We recommend installing the latest version of PyArrow to access the most recently implemented compute functions. -This feature is experimental and may change in a future release. +.. warning:: + + This feature is experimental, and the API can change in a future release without warning. .. _whatsnew_150.enhancements.dataframe_interchange: From 42b9002ff79a6a0c4e93bb954f49209c1e5c13f3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 16 Aug 2022 18:32:46 -0700 Subject: [PATCH 08/10] Fix repr, and add missing import --- pandas/arrays/__init__.py | 2 ++ pandas/core/arrays/arrow/dtype.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py index 89d362eb77e68..3a8e80a6b5d2b 100644 --- a/pandas/arrays/__init__.py +++ b/pandas/arrays/__init__.py @@ -4,6 +4,7 @@ See :ref:`extending.extension-types` for more. """ from pandas.core.arrays import ( + ArrowExtensionArray, ArrowStringArray, BooleanArray, Categorical, @@ -19,6 +20,7 @@ ) __all__ = [ + "ArrowExtensionArray", "ArrowStringArray", "BooleanArray", "Categorical", diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index df9ff56db73fe..0ddbac43bad9b 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -67,6 +67,9 @@ def __init__(self, pyarrow_dtype: pa.DataType) -> None: ) self.pyarrow_dtype = pyarrow_dtype + def __repr__(self) -> str: + return self.name + @property def type(self): """ From 14422b70597a936706eb9f06805e9aa3ffb0e7a3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 17 Aug 2022 11:33:11 -0700 Subject: [PATCH 09/10] Add extra sections --- pandas/core/arrays/arrow/array.py | 8 ++++++++ pandas/core/arrays/arrow/dtype.py | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 8e971d32f2795..3601881fb1c59 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -170,6 +170,14 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray): ---------- values : pyarrow.Array or pyarrow.ChunkedArray + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- ArrowExtensionArray diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index 0ddbac43bad9b..6e280f76a518c 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -34,19 +34,27 @@ class ArrowDtype(StorageExtensionDtype): Parameters ---------- pyarrow_dtype : pa.DataType - An instance of a `pyarrow.DataType `__ + An instance of a `pyarrow.DataType `__. + + Attributes + ---------- + pyarrow_dtype + + Methods + ------- + None Returns ------- ArrowDtype - Example - ------- + Examples + -------- >>> import pyarrow as pa >>> pd.ArrowDtype(pa.int64()) int64[pyarrow] - Types with parameters must be constructed with ArrowDtype + Types with parameters must be constructed with ArrowDtype. >>> pd.ArrowDtype(pa.timestamp("s", tz="America/New_York")) timestamp[s, tz=America/New_York][pyarrow] From 549f0ce98321eb2e119fe03eb7502d19f5b392ec Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 18 Aug 2022 11:00:47 -0700 Subject: [PATCH 10/10] Add quotes and example --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/arrays/arrow/array.py | 9 +++++++++ pandas/core/arrays/arrow/dtype.py | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 1b54f009313ad..f0eb73edda332 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -33,7 +33,7 @@ With `Pyarrow `__ installed, us that are backed by a ``pyarrow.ChunkedArray`` and ``pyarrow.DataType``. The ``dtype`` argument can accept a string of a `pyarrow data type `__ -with ``pyarrow`` in brackets e.g. ``int64[pyarrow]`` or, for pyarrow data types that take parameters, a :class:`ArrowDtype` +with ``pyarrow`` in brackets e.g. ``"int64[pyarrow]"`` or, for pyarrow data types that take parameters, a :class:`ArrowDtype` initialized with a ``pyarrow.DataType``. .. ipython:: python diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index d977c016f009a..1f7939011a1f1 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -190,6 +190,15 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray): Please install the latest version of PyArrow to enable the best functionality and avoid potential bugs in prior versions of PyArrow. + + Examples + -------- + Create an ArrowExtensionArray with :func:`pandas.array`: + + >>> pd.array([1, 1, None], dtype="int64[pyarrow]") + + [1, 1, ] + Length: 3, dtype: int64[pyarrow] """ # noqa: E501 (http link too long) _data: pa.ChunkedArray diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index 6e280f76a518c..48e2c5bdda2f8 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -28,7 +28,7 @@ class ArrowDtype(StorageExtensionDtype): parts of the API may change without warning. While most ``dtype`` arguments can accept the "string" - constructor, e.g. ``int64[pyarrow]``, ArrowDtype is useful + constructor, e.g. ``"int64[pyarrow]"``, ArrowDtype is useful if the data type contains parameters like ``pyarrow.timestamp``. Parameters