diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi index 266207682..58e080b68 100644 --- a/pandas-stubs/core/generic.pyi +++ b/pandas-stubs/core/generic.pyi @@ -29,6 +29,7 @@ from pandas._typing import ( CSVQuoting, Dtype, DtypeArg, + DtypeBackend, FilePath, FileWriteMode, FillnaOptions, @@ -373,6 +374,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin): convert_string: _bool = ..., convert_integer: _bool = ..., convert_boolean: _bool = ..., + dtype_backend: DtypeBackend = ..., ) -> NDFrameT: ... def fillna( self, diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 227458764..37ed3d005 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -96,6 +96,7 @@ from pandas._typing import ( CategoryDtypeArg, ComplexDtypeArg, CompressionOptions, + DtypeBackend, DtypeObj, FilePath, FillnaOptions, @@ -1133,6 +1134,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): convert_string: _bool = ..., convert_integer: _bool = ..., convert_boolean: _bool = ..., + dtype_backend: DtypeBackend = ..., ) -> Series[S1]: ... @overload def ffill( diff --git a/pandas-stubs/core/tools/numeric.pyi b/pandas-stubs/core/tools/numeric.pyi index 0b5ea71dc..7b998b517 100644 --- a/pandas-stubs/core/tools/numeric.pyi +++ b/pandas-stubs/core/tools/numeric.pyi @@ -7,7 +7,9 @@ import numpy as np import pandas as pd from typing_extensions import TypeAlias +from pandas._libs.lib import NoDefault from pandas._typing import ( + DtypeBackend, IgnoreRaiseCoerce, Scalar, npt, @@ -20,22 +22,26 @@ def to_numeric( arg: Scalar, errors: Literal["raise", "coerce"] = ..., downcast: _Downcast = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> float: ... @overload def to_numeric( arg: Scalar, errors: Literal["ignore"], downcast: _Downcast = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> Scalar: ... @overload def to_numeric( arg: list | tuple | np.ndarray, errors: IgnoreRaiseCoerce = ..., downcast: _Downcast = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> npt.NDArray: ... @overload def to_numeric( arg: pd.Series, errors: IgnoreRaiseCoerce = ..., downcast: _Downcast = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> pd.Series: ... diff --git a/pandas-stubs/io/clipboards.pyi b/pandas-stubs/io/clipboards.pyi index 921f85926..25a675d51 100644 --- a/pandas-stubs/io/clipboards.pyi +++ b/pandas-stubs/io/clipboards.pyi @@ -12,11 +12,13 @@ from typing import ( from pandas.core.frame import DataFrame +from pandas._libs.lib import NoDefault from pandas._typing import ( CompressionOptions, CSVEngine, CSVQuoting, DtypeArg, + DtypeBackend, ListLikeHashable, StorageOptions, UsecolsArgType, @@ -28,6 +30,7 @@ from pandas.io.parsers import TextFileReader def read_clipboard( sep: str | None = ..., *, + dtype_backend: DtypeBackend | NoDefault = ..., delimiter: str | None = ..., header: int | Sequence[int] | Literal["infer"] | None = ..., names: ListLikeHashable | None = ..., @@ -85,6 +88,7 @@ def read_clipboard( def read_clipboard( sep: str | None = ..., *, + dtype_backend: DtypeBackend | NoDefault = ..., delimiter: str | None = ..., header: int | Sequence[int] | Literal["infer"] | None = ..., names: ListLikeHashable | None = ..., @@ -142,6 +146,7 @@ def read_clipboard( def read_clipboard( sep: str | None = ..., *, + dtype_backend: DtypeBackend | NoDefault = ..., delimiter: str | None = ..., header: int | Sequence[int] | Literal["infer"] | None = ..., names: ListLikeHashable | None = ..., diff --git a/pandas-stubs/io/excel/_base.pyi b/pandas-stubs/io/excel/_base.pyi index f75dd36b3..68425625e 100644 --- a/pandas-stubs/io/excel/_base.pyi +++ b/pandas-stubs/io/excel/_base.pyi @@ -19,8 +19,10 @@ import pyxlsb.workbook from typing_extensions import Self from xlrd.book import Book +from pandas._libs.lib import NoDefault from pandas._typing import ( Dtype, + DtypeBackend, FilePath, ListLikeHashable, ReadBuffer, @@ -66,6 +68,7 @@ def read_excel( comment: str | None = ..., skipfooter: int = ..., storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> dict[int | str, DataFrame]: ... @overload def read_excel( @@ -104,6 +107,7 @@ def read_excel( comment: str | None = ..., skipfooter: int = ..., storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> DataFrame: ... class ExcelWriter: diff --git a/pandas-stubs/io/feather_format.pyi b/pandas-stubs/io/feather_format.pyi index 32c8b1d5f..a54d16b74 100644 --- a/pandas-stubs/io/feather_format.pyi +++ b/pandas-stubs/io/feather_format.pyi @@ -1,6 +1,8 @@ from pandas import DataFrame +from pandas._libs.lib import NoDefault from pandas._typing import ( + DtypeBackend, FilePath, HashableT, ReadBuffer, @@ -12,4 +14,5 @@ def read_feather( columns: list[HashableT] | None = ..., use_threads: bool = ..., storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> DataFrame: ... diff --git a/pandas-stubs/io/html.pyi b/pandas-stubs/io/html.pyi index 75789f06f..6f8f15c86 100644 --- a/pandas-stubs/io/html.pyi +++ b/pandas-stubs/io/html.pyi @@ -12,7 +12,9 @@ from typing import ( from pandas.core.frame import DataFrame +from pandas._libs.lib import NoDefault from pandas._typing import ( + DtypeBackend, FilePath, HashableT1, HashableT2, @@ -49,4 +51,5 @@ def read_html( keep_default_na: bool = ..., displayed_only: bool = ..., extract_links: Literal["header", "footer", "body", "all"] | None = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> list[DataFrame]: ... diff --git a/pandas-stubs/io/json/_json.pyi b/pandas-stubs/io/json/_json.pyi index 198054e71..068654a77 100644 --- a/pandas-stubs/io/json/_json.pyi +++ b/pandas-stubs/io/json/_json.pyi @@ -9,9 +9,11 @@ from typing import ( from pandas.core.frame import DataFrame from pandas.core.series import Series +from pandas._libs.lib import NoDefault from pandas._typing import ( CompressionOptions, DtypeArg, + DtypeBackend, FilePath, HashableT, JsonFrameOrient, @@ -43,6 +45,7 @@ def read_json( compression: CompressionOptions = ..., nrows: int | None = ..., storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> JsonReader[Series]: ... @overload def read_json( @@ -66,6 +69,7 @@ def read_json( compression: CompressionOptions = ..., nrows: int | None = ..., storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> JsonReader[DataFrame]: ... @overload def read_json( @@ -89,6 +93,7 @@ def read_json( compression: CompressionOptions = ..., nrows: int | None = ..., storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> Series: ... @overload def read_json( @@ -112,6 +117,7 @@ def read_json( compression: CompressionOptions = ..., nrows: int | None = ..., storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> DataFrame: ... class JsonReader(abc.Iterator, Generic[NDFrameT]): diff --git a/pandas-stubs/io/orc.pyi b/pandas-stubs/io/orc.pyi index 964aaef72..b8fdc9506 100644 --- a/pandas-stubs/io/orc.pyi +++ b/pandas-stubs/io/orc.pyi @@ -2,7 +2,9 @@ from typing import Any from pandas import DataFrame +from pandas._libs.lib import NoDefault from pandas._typing import ( + DtypeBackend, FilePath, HashableT, ReadBuffer, @@ -11,5 +13,6 @@ from pandas._typing import ( def read_orc( path: FilePath | ReadBuffer[bytes], columns: list[HashableT] | None = ..., + dtype_backend: DtypeBackend | NoDefault = ..., **kwargs: Any, ) -> DataFrame: ... diff --git a/pandas-stubs/io/parsers/readers.pyi b/pandas-stubs/io/parsers/readers.pyi index 4c580f256..f09f53b16 100644 --- a/pandas-stubs/io/parsers/readers.pyi +++ b/pandas-stubs/io/parsers/readers.pyi @@ -18,11 +18,13 @@ from typing import ( from pandas.core.frame import DataFrame from typing_extensions import Self +from pandas._libs.lib import NoDefault from pandas._typing import ( CompressionOptions, CSVEngine, CSVQuoting, DtypeArg, + DtypeBackend, FilePath, ListLikeHashable, ReadCsvBuffer, @@ -90,6 +92,7 @@ def read_csv( memory_map: bool = ..., float_precision: Literal["high", "legacy", "round_trip"] | None = ..., storage_options: StorageOptions | None = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> TextFileReader: ... @overload def read_csv( @@ -149,6 +152,7 @@ def read_csv( memory_map: bool = ..., float_precision: Literal["high", "legacy", "round_trip"] | None = ..., storage_options: StorageOptions | None = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> TextFileReader: ... @overload def read_csv( @@ -208,6 +212,7 @@ def read_csv( memory_map: bool = ..., float_precision: Literal["high", "legacy", "round_trip"] | None = ..., storage_options: StorageOptions | None = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> DataFrame: ... @overload def read_table( @@ -393,6 +398,7 @@ def read_fwf( colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ..., widths: Sequence[int] | None = ..., infer_nrows: int = ..., + dtype_backend: DtypeBackend | NoDefault = ..., iterator: Literal[True], chunksize: int | None = ..., **kwds: Any, @@ -404,6 +410,7 @@ def read_fwf( colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ..., widths: Sequence[int] | None = ..., infer_nrows: int = ..., + dtype_backend: DtypeBackend | NoDefault = ..., iterator: bool = ..., chunksize: int, **kwds: Any, @@ -415,6 +422,7 @@ def read_fwf( colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ..., widths: Sequence[int] | None = ..., infer_nrows: int = ..., + dtype_backend: DtypeBackend | NoDefault = ..., iterator: Literal[False] = ..., chunksize: None = ..., **kwds: Any, diff --git a/pandas-stubs/io/spss.pyi b/pandas-stubs/io/spss.pyi index d042561be..d63089845 100644 --- a/pandas-stubs/io/spss.pyi +++ b/pandas-stubs/io/spss.pyi @@ -1,6 +1,8 @@ from pandas.core.frame import DataFrame +from pandas._libs.lib import NoDefault from pandas._typing import ( + DtypeBackend, FilePath, HashableT, ) @@ -9,4 +11,5 @@ def read_spss( path: FilePath, usecols: list[HashableT] | None = ..., convert_categoricals: bool = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> DataFrame: ... diff --git a/pandas-stubs/io/sql.pyi b/pandas-stubs/io/sql.pyi index f26f9bece..df29721f8 100644 --- a/pandas-stubs/io/sql.pyi +++ b/pandas-stubs/io/sql.pyi @@ -40,6 +40,7 @@ def read_sql_table( columns: list[str] | None = ..., *, chunksize: int, + dtype_backend: DtypeBackend | NoDefault = ..., ) -> Generator[DataFrame, None, None]: ... @overload def read_sql_table( @@ -51,6 +52,7 @@ def read_sql_table( parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = ..., columns: list[str] | None = ..., chunksize: None = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> DataFrame: ... @overload def read_sql_query( @@ -63,6 +65,7 @@ def read_sql_query( *, chunksize: int, dtype: DtypeArg | None = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> Generator[DataFrame, None, None]: ... @overload def read_sql_query( @@ -74,6 +77,7 @@ def read_sql_query( parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = ..., chunksize: None = ..., dtype: DtypeArg | None = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> DataFrame: ... @overload def read_sql( diff --git a/pandas-stubs/io/xml.pyi b/pandas-stubs/io/xml.pyi index 1cfa3d6bf..cfbefd18b 100644 --- a/pandas-stubs/io/xml.pyi +++ b/pandas-stubs/io/xml.pyi @@ -2,10 +2,12 @@ from collections.abc import Sequence from pandas.core.frame import DataFrame +from pandas._libs.lib import NoDefault from pandas._typing import ( CompressionOptions, ConvertersArg, DtypeArg, + DtypeBackend, FilePath, ParseDatesArg, ReadBuffer, @@ -31,4 +33,5 @@ def read_xml( iterparse: dict[str, list[str]] | None = ..., compression: CompressionOptions = ..., storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., ) -> DataFrame: ... diff --git a/tests/test_frame.py b/tests/test_frame.py index dc58f8610..eb34fddbd 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -2597,3 +2597,9 @@ def test_suffix_prefix_index() -> None: check( assert_type(df.add_prefix("_col", axis="columns"), pd.DataFrame), pd.DataFrame ) + + +def test_convert_dtypes_dtype_backend() -> None: + df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]}) + dfn = df.convert_dtypes(dtype_backend="numpy_nullable") + check(assert_type(dfn, pd.DataFrame), pd.DataFrame) diff --git a/tests/test_io.py b/tests/test_io.py index 24ff74262..1df763789 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -17,6 +17,7 @@ ) import numpy as np +import numpy.typing as npt import pandas as pd from pandas import ( DataFrame, @@ -1269,3 +1270,109 @@ def test_read_sql_dtype_backend() -> None: pd.DataFrame, ) conn2.close() + + +def test_all_read_without_lxml_dtype_backend() -> None: + with ensure_clean() as path: + check(assert_type(DF.to_csv(path), None), type(None)) + s1 = read_csv(path, iterator=True, dtype_backend="pyarrow") + check(assert_type(s1, TextFileReader), TextFileReader) + s1.close() + + DF.to_string(path, index=False) + check( + assert_type(read_fwf(path, dtype_backend="pyarrow"), DataFrame), DataFrame + ) + + check(assert_type(DF.to_json(path), None), type(None)) + check( + assert_type(read_json(path, dtype_backend="pyarrow"), DataFrame), DataFrame + ) + + with ensure_clean() as path: + con = sqlite3.connect(path) + check(assert_type(DF.to_sql("test", con=con), Union[int, None]), int) + check( + assert_type( + read_sql_query( + "select * from test", + con=con, + index_col="index", + dtype_backend="pyarrow", + ), + DataFrame, + ), + DataFrame, + ) + con.close() + + if not WINDOWS: + check(assert_type(DF.to_orc(path), None), type(None)) + check( + assert_type(read_orc(path, dtype_backend="numpy_nullable"), DataFrame), + DataFrame, + ) + + check(assert_type(DF.to_feather(path), None), type(None)) + check( + assert_type(read_feather(path, dtype_backend="pyarrow"), DataFrame), + DataFrame, + ) + + check( + assert_type( + pd.to_numeric( + [1.0, 2.0, "blerg"], errors="ignore", dtype_backend="numpy_nullable" + ), + npt.NDArray, + ), + np.ndarray, + ) + + with ensure_clean(".xlsx") as path: + as_str: str = path + DF.to_excel(path) + check( + assert_type(pd.read_excel(as_str, dtype_backend="pyarrow"), pd.DataFrame), + pd.DataFrame, + ) + + try: + DF.to_clipboard() + except errors.PyperclipException: + pytest.skip("clipboard not available for testing") + check( + assert_type( + read_clipboard(iterator=True, dtype_backend="pyarrow"), TextFileReader + ), + TextFileReader, + ) + + if TYPE_CHECKING: + # sqlite3 doesn't support read_table, which is required for this function + # Could only run in pytest if SQLAlchemy was installed + with ensure_clean() as path: + co1 = sqlite3.connect(path) + assert_type(DF.to_sql("test", con=co1), Union[int, None]) + assert_type( + read_sql_table("test", con=co1, dtype_backend="numpy_nullable"), + DataFrame, + ) + co1.close() + + +@lxml_skip +def test_read_with_lxml_dtype_backend() -> None: + with ensure_clean() as path: + check(assert_type(DF.to_html(path), None), type(None)) + check( + assert_type( + read_html(path, dtype_backend="numpy_nullable"), List[DataFrame] + ), + list, + ) + + check(assert_type(DF.to_xml(path), None), type(None)) + check( + assert_type(read_xml(path, dtype_backend="pyarrow"), DataFrame), DataFrame + ) diff --git a/tests/test_series.py b/tests/test_series.py index 863404737..c4c5c0f56 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1830,3 +1830,9 @@ def test_prefix_summix_axis() -> None: if TYPE_CHECKING_INVALID_USAGE: check(assert_type(s.add_prefix("_item", axis=1), pd.Series), pd.Series) # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues] check(assert_type(s.add_suffix("_item", axis="columns"), pd.Series), pd.Series) # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues] + + +def test_convert_dtypes_dtype_backend() -> None: + s = pd.Series([1, 2, 3, 4]) + s1 = s.convert_dtypes(dtype_backend="numpy_nullable") + check(assert_type(s1, pd.Series), pd.Series)