From 73d23a17b2854f5534fdff76114e60f1370cb9f0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 27 Oct 2022 09:44:23 -0700 Subject: [PATCH 1/8] start adding hooks --- pandas/io/parsers/arrow_parser_wrapper.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 49b71efbfb6ec..222fd1b97d4f5 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -1,16 +1,17 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from pandas._typing import ReadBuffer from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.inference import is_integer -from pandas.io.parsers.base_parser import ParserBase +from pandas import ( + DataFrame, + arrays, + get_option, +) -if TYPE_CHECKING: - from pandas import DataFrame +from pandas.io.parsers.base_parser import ParserBase class ArrowParserWrapper(ParserBase): @@ -150,6 +151,16 @@ def read(self) -> DataFrame: parse_options=pyarrow_csv.ParseOptions(**self.parse_options), convert_options=pyarrow_csv.ConvertOptions(**self.convert_options), ) - + if ( + self.kwds["use_nullable_dtypes"] + and get_option("io.nullable_backend") == "pyarrow" + ): + result = DataFrame( + { + col_name: arrays.ArrowExtensionArray(pa_col) + for col_name, pa_col in zip(table.column_names, table.itercolumns()) + } + ) + return result frame = table.to_pandas() return self._finalize_output(frame) From 274f2f3d1a881acaa1a1227f8eb889f90c55cfd1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 27 Oct 2022 21:47:44 -0700 Subject: [PATCH 2/8] add test and implementation for pyarrow engine --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/io/parsers/arrow_parser_wrapper.py | 10 ++-- pandas/io/parsers/readers.py | 7 +++ .../io/parser/dtypes/test_dtypes_basic.py | 59 +++++++++++++++++-- 4 files changed, 66 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 252c444b2e60c..16f05d00ebce4 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -42,7 +42,7 @@ Other enhancements - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`) - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`) - Added new argument ``use_nullable_dtypes`` to :func:`read_csv` and :func:`read_excel` to enable automatic conversion to nullable dtypes (:issue:`36712`) -- Added new global configuration, ``io.nullable_backend`` to allow ``use_nullable_dtypes=True`` to return pyarrow-backed dtypes when set to ``"pyarrow"`` in :func:`read_parquet` (:issue:`48957`) +- Added new global configuration, ``io.nullable_backend`` to allow ``use_nullable_dtypes=True`` to return pyarrow-backed dtypes when set to ``"pyarrow"`` in :func:`read_parquet`, :func:`read_csv` (with ``engine="pyarrow"``) (:issue:`48957`) - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`) - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`) - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 222fd1b97d4f5..d2cb777d145e2 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -78,7 +78,7 @@ def _get_pyarrow_options(self) -> None: else self.kwds["skiprows"], } - def _finalize_output(self, frame: DataFrame) -> DataFrame: + def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame: """ Processes data read in based on kwargs. @@ -155,12 +155,12 @@ def read(self) -> DataFrame: self.kwds["use_nullable_dtypes"] and get_option("io.nullable_backend") == "pyarrow" ): - result = DataFrame( + frame = DataFrame( { col_name: arrays.ArrowExtensionArray(pa_col) for col_name, pa_col in zip(table.column_names, table.itercolumns()) } ) - return result - frame = table.to_pandas() - return self._finalize_output(frame) + else: + frame = table.to_pandas() + return self._finalize_pandas_output(frame) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index abd1182214f5f..6822097a1043c 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -22,6 +22,8 @@ import numpy as np +from pandas._config import get_option + import pandas._libs.lib as lib from pandas._libs.parsers import STR_NA_VALUES from pandas._typing import ( @@ -601,6 +603,11 @@ def _read( raise ValueError( "The 'chunksize' option is not supported with the 'pyarrow' engine" ) + elif kwds["use_nullable_dtypes"] and get_option("io.nullable_backend") == "pyarrow": + raise NotImplementedError( + f"use_nullable_dtypes=True and engine={kwds['engine']} with " + "io.nullable_backend set to 'pyarrow' is not implemented." + ) else: chunksize = validate_integer("chunksize", chunksize, 1) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index e2c7f77aae815..84d17e0949ba6 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -9,7 +9,6 @@ import pytest from pandas.errors import ParserWarning -import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -22,13 +21,10 @@ StringArray, ) -# TODO(1.4): Change me into xfail at release time -# and xfail individual tests -pytestmark = pytest.mark.usefixtures("pyarrow_skip") - @pytest.mark.parametrize("dtype", [str, object]) @pytest.mark.parametrize("check_orig", [True, False]) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_all_columns(all_parsers, dtype, check_orig): # see gh-3795, gh-6607 parser = all_parsers @@ -53,6 +49,7 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_per_column(all_parsers): parser = all_parsers data = """\ @@ -71,6 +68,7 @@ def test_dtype_per_column(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_invalid_dtype_per_column(all_parsers): parser = all_parsers data = """\ @@ -84,6 +82,7 @@ def test_invalid_dtype_per_column(all_parsers): parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"}) +@pytest.mark.usefixtures("pyarrow_xfail") def test_raise_on_passed_int_dtype_with_nas(all_parsers): # see gh-2631 parser = all_parsers @@ -101,6 +100,7 @@ def test_raise_on_passed_int_dtype_with_nas(all_parsers): parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_with_converters(all_parsers): parser = all_parsers data = """a,b @@ -132,6 +132,7 @@ def test_numeric_dtype(all_parsers, dtype): tm.assert_frame_equal(expected, result) +@pytest.mark.usefixtures("pyarrow_xfail") def test_boolean_dtype(all_parsers): parser = all_parsers data = "\n".join( @@ -184,6 +185,7 @@ def test_boolean_dtype(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_delimiter_with_usecols_and_parse_dates(all_parsers): # GH#35873 result = all_parsers.read_csv( @@ -264,6 +266,7 @@ def test_skip_whitespace(c_parser_only, float_precision): tm.assert_series_equal(df.iloc[:, 1], pd.Series([1.2, 2.1, 1.0, 1.2], name="num")) +@pytest.mark.usefixtures("pyarrow_xfail") def test_true_values_cast_to_bool(all_parsers): # GH#34655 text = """a,b @@ -286,6 +289,7 @@ def test_true_values_cast_to_bool(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") @pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)]) def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): # GH#35211 @@ -300,6 +304,7 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_mangle_dup_cols_single_dtype(all_parsers): # GH#42022 parser = all_parsers @@ -309,6 +314,7 @@ def test_dtype_mangle_dup_cols_single_dtype(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_multi_index(all_parsers): # GH 42446 parser = all_parsers @@ -355,6 +361,7 @@ def test_nullable_int_dtype(all_parsers, any_int_ea_dtype): tm.assert_frame_equal(actual, expected) +@pytest.mark.usefixtures("pyarrow_xfail") @pytest.mark.parametrize("default", ["float", "float64"]) def test_dtypes_defaultdict(all_parsers, default): # GH#41574 @@ -368,6 +375,7 @@ def test_dtypes_defaultdict(all_parsers, default): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtypes_defaultdict_mangle_dup_cols(all_parsers): # GH#41574 data = """a,b,a,b,b.1 @@ -381,6 +389,7 @@ def test_dtypes_defaultdict_mangle_dup_cols(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtypes_defaultdict_invalid(all_parsers): # GH#41574 data = """a,b @@ -392,6 +401,7 @@ def test_dtypes_defaultdict_invalid(all_parsers): parser.read_csv(StringIO(data), dtype=dtype) +@pytest.mark.usefixtures("pyarrow_xfail") def test_use_nullable_dtypes(all_parsers): # GH#36712 @@ -435,7 +445,7 @@ def test_use_nullabla_dtypes_and_dtype(all_parsers): tm.assert_frame_equal(result, expected) -@td.skip_if_no("pyarrow") +@pytest.mark.usefixtures("pyarrow_xfail") @pytest.mark.parametrize("storage", ["pyarrow", "python"]) def test_use_nullabla_dtypes_string(all_parsers, storage): # GH#36712 @@ -477,3 +487,40 @@ def test_use_nullable_dtypes_ea_dtype_specified(all_parsers): result = parser.read_csv(StringIO(data), dtype="Int64", use_nullable_dtypes=True) expected = DataFrame({"a": [1], "b": 2}, dtype="Int64") tm.assert_frame_equal(result, expected) + + +def test_use_nullable_dtypes_pyarrow_backend(all_parsers, request): + # GH#36712 + pa = pytest.importorskip("pyarrow") + parser = all_parsers + + data = """a,b,c,d,e,f,g,h,i,j +1,2.5,True,a,,,,,12-31-2019, +3,4.5,False,b,6,7.5,True,a,12-31-2019, +""" + with pd.option_context("io.nullable_backend", "pyarrow"): + if parser.engine != "pyarrow": + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"Not implemented with engine={parser.engine}", + ) + ) + result = parser.read_csv( + StringIO(data), use_nullable_dtypes=True, parse_dates=["i"] + ) + expected = DataFrame( + { + "a": pd.Series([1, 3], dtype="int64[pyarrow]"), + "b": pd.Series([2.5, 4.5], dtype="float64[pyarrow]"), + "c": pd.Series([True, False], dtype="bool[pyarrow]"), + "d": pd.Series(["a", "b"], dtype=pd.ArrowDtype(pa.string())), + "e": pd.Series([pd.NA, 6], dtype="int64[pyarrow]"), + "f": pd.Series([pd.NA, 7.5], dtype="float64[pyarrow]"), + "g": pd.Series([pd.NA, True], dtype="bool[pyarrow]"), + "h": pd.Series(["", "a"], dtype=pd.ArrowDtype(pa.string())), + "i": pd.Series([Timestamp("2019-12-31")] * 2), + "j": pd.Series([pd.NA, pd.NA], dtype="null[pyarrow]"), + } + ) + tm.assert_frame_equal(result, expected) From 4b90df69059b4f7551930019a11680a5640cce0d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 28 Oct 2022 12:16:15 -0700 Subject: [PATCH 3/8] Use get --- pandas/io/parsers/readers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index dfb65bfa31544..75746dc9f7b75 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -602,7 +602,10 @@ def _read( raise ValueError( "The 'chunksize' option is not supported with the 'pyarrow' engine" ) - elif kwds["use_nullable_dtypes"] and get_option("io.nullable_backend") == "pyarrow": + elif ( + kwds.get("use_nullable_dtypes", False) + and get_option("io.nullable_backend") == "pyarrow" + ): raise NotImplementedError( f"use_nullable_dtypes=True and engine={kwds['engine']} with " "io.nullable_backend set to 'pyarrow' is not implemented." From 8bdabaa6f1540a84a72eefcc70b218eda600f45d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 28 Oct 2022 16:28:22 -0700 Subject: [PATCH 4/8] Fix test --- pandas/tests/io/parser/dtypes/test_dtypes_basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 84d17e0949ba6..f7b0fa7ccb3c3 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -449,7 +449,7 @@ def test_use_nullabla_dtypes_and_dtype(all_parsers): @pytest.mark.parametrize("storage", ["pyarrow", "python"]) def test_use_nullabla_dtypes_string(all_parsers, storage): # GH#36712 - import pyarrow as pa + pa = pytest.importorskip("pyarrow") with pd.option_context("mode.string_storage", storage): From 23a5289ccd0034b0a191fb435a049d9f444c089d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 31 Oct 2022 12:46:50 -0700 Subject: [PATCH 5/8] change into section --- doc/source/whatsnew/v2.0.0.rst | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index da19318620460..756e9786faed1 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -28,10 +28,24 @@ Available optional dependencies (listed in order of appearance at `install guide ``[all, performance, computation, timezone, fss, aws, gcp, excel, parquet, feather, hdf5, spss, postgresql, mysql, sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` (:issue:`39164`). -.. _whatsnew_200.enhancements.enhancement2: +.. _whatsnew_200.enhancements.io_readers_nullable_pyarrow: -enhancement2 -^^^^^^^^^^^^ +Configuration option, ``io.nullable_backend`, to return pyarrow-backed dtypes from IO functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in :func:`read_parquet` and :func:`read_csv` (with ``engine="pyarrow"``) +to return pyarrow-backed dtypes when set to ``"pyarrow"`` (:issue:`48957`). + +.. ipython:: python + + import io + data = """a,b,c,d,e,f,g,h,i,j + 1,2.5,True,a,,,,,12-31-2019, + 3,4.5,False,b,6,7.5,True,a,12-31-2019, + """ + with pd.option_context("io.nullable_backend", "pyarrow"): + df = pd.read_csv(io.StringIO(data), use_nullable_dtypes=True, parse_dates=["i"]) + df .. _whatsnew_200.enhancements.other: @@ -42,7 +56,6 @@ Other enhancements - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`) - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`) - Added new argument ``use_nullable_dtypes`` to :func:`read_csv` and :func:`read_excel` to enable automatic conversion to nullable dtypes (:issue:`36712`) -- Added new global configuration, ``io.nullable_backend`` to allow ``use_nullable_dtypes=True`` to return pyarrow-backed dtypes when set to ``"pyarrow"`` in :func:`read_parquet`, :func:`read_csv` (with ``engine="pyarrow"``) (:issue:`48957`) - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`) - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`) - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`) From 2076effb1626d055506fdf1dc58106c2091a898d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 1 Nov 2022 16:35:50 -0700 Subject: [PATCH 6/8] fix doc example --- doc/source/whatsnew/v2.0.0.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index fd113453f9964..12085983f18bb 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -39,12 +39,12 @@ to return pyarrow-backed dtypes when set to ``"pyarrow"`` (:issue:`48957`). .. ipython:: python import io - data = """a,b,c,d,e,f,g,h,i,j - 1,2.5,True,a,,,,,12-31-2019, - 3,4.5,False,b,6,7.5,True,a,12-31-2019, - """ + data = io.StringIO("""a,b,c,d,e,f,g,h,i + 1,2.5,True,a,,,,, + 3,4.5,False,b,6,7.5,True,a, + """) with pd.option_context("io.nullable_backend", "pyarrow"): - df = pd.read_csv(io.StringIO(data), use_nullable_dtypes=True, parse_dates=["i"]) + df = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow") df .. _whatsnew_200.enhancements.other: From cc573ea73fd942a265550f010ec1baa97cdb27cc Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 2 Nov 2022 12:13:58 -0700 Subject: [PATCH 7/8] Fix backtick --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 45f2dc4af623c..d5b0529347965 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -30,7 +30,7 @@ sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` ( .. _whatsnew_200.enhancements.io_readers_nullable_pyarrow: -Configuration option, ``io.nullable_backend`, to return pyarrow-backed dtypes from IO functions +Configuration option, ``io.nullable_backend``, to return pyarrow-backed dtypes from IO functions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ A new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in :func:`read_parquet` and :func:`read_csv` (with ``engine="pyarrow"``) From 6c0c86c1eb073652f063364f967b515c8e2f0558 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 3 Nov 2022 10:53:23 -0700 Subject: [PATCH 8/8] carrot --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 3a17193f4827a..4a948c7276f5d 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -31,7 +31,7 @@ sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` ( .. _whatsnew_200.enhancements.io_readers_nullable_pyarrow: Configuration option, ``io.nullable_backend``, to return pyarrow-backed dtypes from IO functions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ A new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in :func:`read_parquet` and :func:`read_csv` (with ``engine="pyarrow"``) to return pyarrow-backed dtypes when set to ``"pyarrow"`` (:issue:`48957`).