diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index a921bf44c..ac175a4b1 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -14,6 +14,7 @@ from typing import ( AnyStr, Callable, Hashable, + Iterator, Literal, Mapping, Optional, @@ -66,6 +67,12 @@ class BaseBuffer(Protocol): ... class ReadBuffer(BaseBuffer, Protocol[AnyStr_cov]): ... class WriteBuffer(BaseBuffer, Protocol[AnyStr_cov]): ... +class ReadCsvBuffer(ReadBuffer[AnyStr_cov], Protocol[AnyStr_cov]): + def __iter__(self) -> Iterator[AnyStr_cov]: ... + def readline(self) -> AnyStr_cov: ... + @property + def closed(self) -> bool: ... + class WriteExcelBuffer(WriteBuffer[bytes], Protocol): def truncate(self, size: Union[int, None] = ...) -> int: ... @@ -242,6 +249,7 @@ JsonSeriesOrient = Literal["split", "records", "index"] TimestampConvention = Literal["start", "end", "s", "e"] CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"] +CSVQuoting = Literal[0, 1, 2, 3] HDFCompLib = Literal["zlib", "lzo", "bzip2", "blosc"] ParquetEngine = Literal["auto", "pyarrow", "fastparquet"] diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 6ddde00e2..a0e46ba9e 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1917,55 +1917,6 @@ class DataFrame(NDFrame, OpsMixin): self, excel: _bool = ..., sep: _str | None = ..., **kwargs ) -> None: ... @overload - def to_csv( - self, - path_or_buf: FilePathOrBuffer | None, - sep: _str = ..., - na_rep: _str = ..., - float_format: _str | None = ..., - columns: Sequence[Hashable] | None = ..., - header: _bool | list[_str] = ..., - index: _bool = ..., - index_label: _bool | _str | Sequence[Hashable] | None = ..., - mode: _str = ..., - encoding: _str | None = ..., - compression: _str | Mapping[_str, _str] = ..., - quoting: Literal[0, 1, 2, 3] | None = ..., - quotechar: _str = ..., - line_terminator: _str | None = ..., - chunksize: int | None = ..., - date_format: _str | None = ..., - doublequote: _bool = ..., - escapechar: _str | None = ..., - decimal: _str = ..., - errors: _str = ..., - storage_options: dict[_str, Any] | None = ..., - ) -> None: ... - @overload - def to_csv( - self, - sep: _str = ..., - na_rep: _str = ..., - float_format: _str | None = ..., - columns: Sequence[Hashable] | None = ..., - header: _bool | list[_str] = ..., - index: _bool = ..., - index_label: _bool | _str | Sequence[Hashable] | None = ..., - mode: _str = ..., - encoding: _str | None = ..., - compression: _str | Mapping[_str, _str] = ..., - quoting: Literal[0, 1, 2, 3] | None = ..., - quotechar: _str = ..., - line_terminator: _str | None = ..., - chunksize: int | None = ..., - date_format: _str | None = ..., - doublequote: _bool = ..., - escapechar: _str | None = ..., - decimal: _str = ..., - errors: _str = ..., - storage_options: dict[_str, Any] | None = ..., - ) -> _str: ... - @overload def to_json( self, path_or_buf: FilePathOrBuffer | None, diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi index 945a35303..bd77eb279 100644 --- a/pandas-stubs/core/generic.pyi +++ b/pandas-stubs/core/generic.pyi @@ -22,6 +22,7 @@ from pandas._typing import ( ArrayLike, Axis, CompressionOptions, + CSVQuoting, Dtype, DtypeArg, FilePath, @@ -231,18 +232,18 @@ class NDFrame(PandasObject, indexing.IndexingMixin): @overload def to_csv( self, - path_or_buf: FilePathOrBuffer | None, + path_or_buf: FilePathOrBuffer, sep: _str = ..., na_rep: _str = ..., - float_format: _str | None = ..., - columns: Sequence[Hashable] | None = ..., + float_format: _str | Callable[[object], _str] | None = ..., + columns: list[HashableT] | None = ..., header: _bool | list[_str] = ..., index: _bool = ..., - index_label: _bool | _str | Sequence[Hashable] | None = ..., - mode: _str = ..., + index_label: Literal[False] | _str | list[HashableT] | None = ..., + mode: FileWriteMode = ..., encoding: _str | None = ..., - compression: _str | Mapping[_str, _str] = ..., - quoting: Literal[0, 1, 2, 3] | None = ..., + compression: CompressionOptions = ..., + quoting: CSVQuoting = ..., quotechar: _str = ..., line_terminator: _str | None = ..., chunksize: int | None = ..., @@ -251,22 +252,23 @@ class NDFrame(PandasObject, indexing.IndexingMixin): escapechar: _str | None = ..., decimal: _str = ..., errors: _str = ..., - storage_options: dict[_str, Any] | None = ..., + storage_options: StorageOptions = ..., ) -> None: ... @overload def to_csv( self, + path_or_buf: None = ..., sep: _str = ..., na_rep: _str = ..., - float_format: _str | None = ..., - columns: Sequence[Hashable] | None = ..., + float_format: _str | Callable[[object], _str] | None = ..., + columns: list[HashableT] | None = ..., header: _bool | list[_str] = ..., index: _bool = ..., - index_label: _bool | _str | Sequence[Hashable] | None = ..., - mode: _str = ..., + index_label: Literal[False] | _str | list[HashableT] | None = ..., + mode: FileWriteMode = ..., encoding: _str | None = ..., - compression: _str | Mapping[_str, _str] = ..., - quoting: Literal[0, 1, 2, 3] | None = ..., + compression: CompressionOptions = ..., + quoting: CSVQuoting = ..., quotechar: _str = ..., line_terminator: _str | None = ..., chunksize: int | None = ..., @@ -275,7 +277,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin): escapechar: _str | None = ..., decimal: _str = ..., errors: _str = ..., - storage_options: dict[_str, Any] | None = ..., + storage_options: StorageOptions = ..., ) -> _str: ... def take( self, indices, axis=..., is_copy: _bool | None = ..., **kwargs diff --git a/pandas-stubs/io/clipboards.pyi b/pandas-stubs/io/clipboards.pyi index 62fbef460..fc83d80ba 100644 --- a/pandas-stubs/io/clipboards.pyi +++ b/pandas-stubs/io/clipboards.pyi @@ -15,6 +15,7 @@ from pandas.core.series import Series from pandas._typing import ( CompressionOptions, CSVEngine, + CSVQuoting, DtypeArg, StorageOptions, npt, @@ -71,7 +72,7 @@ def read_clipboard( decimal: str = ..., lineterminator: str | None = ..., quotechar: str = ..., - quoting: Literal[0, 1, 2, 3] | None = ..., + quoting: CSVQuoting = ..., doublequote: bool = ..., escapechar: str | None = ..., comment: str | None = ..., @@ -137,7 +138,7 @@ def read_clipboard( decimal: str = ..., lineterminator: str | None = ..., quotechar: str = ..., - quoting: Literal[0, 1, 2, 3] | None = ..., + quoting: CSVQuoting = ..., doublequote: bool = ..., escapechar: str | None = ..., comment: str | None = ..., @@ -203,7 +204,7 @@ def read_clipboard( decimal: str = ..., lineterminator: str | None = ..., quotechar: str = ..., - quoting: Literal[0, 1, 2, 3] | None = ..., + quoting: CSVQuoting = ..., doublequote: bool = ..., escapechar: str | None = ..., comment: str | None = ..., diff --git a/pandas-stubs/io/common.pyi b/pandas-stubs/io/common.pyi index b13ab06ec..ede6e40b3 100644 --- a/pandas-stubs/io/common.pyi +++ b/pandas-stubs/io/common.pyi @@ -1,17 +1,17 @@ -from typing import ( - IO, - AnyStr, - Generic, -) - -from pandas._typing import CompressionDict - -class IOHandles(Generic[AnyStr]): - handle: IO[AnyStr] - compression: CompressionDict - created_handles: list[IO[AnyStr]] - is_wrapped: bool - def close(self) -> None: ... - def __enter__(self) -> IOHandles[AnyStr]: ... - def __exit__(self, *args: object) -> None: ... - def __init__(self, handle, compression, created_handles, is_wrapped) -> None: ... +from typing import ( + IO, + AnyStr, + Generic, +) + +from pandas._typing import CompressionDict + +class IOHandles(Generic[AnyStr]): + handle: IO[AnyStr] + compression: CompressionDict + created_handles: list[IO[AnyStr]] + is_wrapped: bool + def close(self) -> None: ... + def __enter__(self) -> IOHandles[AnyStr]: ... + def __exit__(self, *args: object) -> None: ... + def __init__(self, handle, compression, created_handles, is_wrapped) -> None: ... diff --git a/pandas-stubs/io/parsers.pyi b/pandas-stubs/io/parsers.pyi deleted file mode 100644 index 7b34db73a..000000000 --- a/pandas-stubs/io/parsers.pyi +++ /dev/null @@ -1,618 +0,0 @@ -from collections import abc -from typing import ( - Callable, - Literal, - Protocol, - Sequence, - Union, - overload, -) - -import numpy as np -import pandas as pd -from pandas.core.frame import DataFrame - -from pandas._typing import ( - AnyStr_cov, - CompressionOptions, - DtypeArg, - FilePath, - FilePathOrBuffer, - ReadBuffer, - StorageOptions, -) - -ListLike = Union[ - list[Union[str, int]], - tuple[Union[str, int]], - set[Union[str, int]], - np.ndarray, - pd.Series, -] - -class ReadCsvBuffer(ReadBuffer[AnyStr_cov], Protocol): ... - -# read_csv engines -CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"] - -# iterator=True -> TextFileReader -@overload -def read_csv( - filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], - *, - sep: str | None = ..., - delimiter: str | None = ..., - header: int | Sequence[int] | Literal["infer"] | None = ..., - names=..., - index_col=..., - usecols: ListLike | Callable | None = ..., - squeeze: bool | None = ..., - prefix: str | None = ..., - mangle_dupe_cols: bool = ..., - dtype: DtypeArg | None = ..., - engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., - skipinitialspace: bool = ..., - skiprows=..., - skipfooter: int = ..., - nrows: int | None = ..., - na_values=..., - keep_default_na: bool = ..., - na_filter: bool = ..., - verbose: bool = ..., - skip_blank_lines: bool = ..., - parse_dates=..., - infer_datetime_format: bool = ..., - keep_date_col: bool = ..., - date_parser=..., - dayfirst: bool = ..., - cache_dates: bool = ..., - iterator: Literal[True], - chunksize: int | None = ..., - compression: CompressionOptions = ..., - thousands: str | None = ..., - decimal: str = ..., - lineterminator: str | None = ..., - quotechar: str = ..., - quoting: int = ..., - doublequote: bool = ..., - escapechar: str | None = ..., - comment: str | None = ..., - encoding: str | None = ..., - encoding_errors: str | None = ..., - dialect=..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., - on_bad_lines=..., - delim_whitespace: bool = ..., - low_memory=..., - memory_map: bool = ..., - float_precision: Literal["high", "legacy"] | None = ..., - storage_options: StorageOptions | None = ..., -) -> TextFileReader: ... - -# chunksize=int -> TextFileReader -@overload -def read_csv( - filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], - *, - sep: str | None = ..., - delimiter: str | None = ..., - header: int | Sequence[int] | Literal["infer"] | None = ..., - names=..., - index_col=..., - usecols: ListLike | Callable | None = ..., - squeeze: bool | None = ..., - prefix: str | None = ..., - mangle_dupe_cols: bool = ..., - dtype: DtypeArg | None = ..., - engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., - skipinitialspace: bool = ..., - skiprows=..., - skipfooter: int = ..., - nrows: int | None = ..., - na_values=..., - keep_default_na: bool = ..., - na_filter: bool = ..., - verbose: bool = ..., - skip_blank_lines: bool = ..., - parse_dates=..., - infer_datetime_format: bool = ..., - keep_date_col: bool = ..., - date_parser=..., - dayfirst: bool = ..., - cache_dates: bool = ..., - iterator: bool = ..., - chunksize: int, - compression: CompressionOptions = ..., - thousands: str | None = ..., - decimal: str = ..., - lineterminator: str | None = ..., - quotechar: str = ..., - quoting: int = ..., - doublequote: bool = ..., - escapechar: str | None = ..., - comment: str | None = ..., - encoding: str | None = ..., - encoding_errors: str | None = ..., - dialect=..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., - on_bad_lines=..., - delim_whitespace: bool = ..., - low_memory=..., - memory_map: bool = ..., - float_precision: Literal["high", "legacy"] | None = ..., - storage_options: StorageOptions | None = ..., -) -> TextFileReader: ... - -# default case -> DataFrame -@overload -def read_csv( - filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], - *, - sep: str | None = ..., - delimiter: str | None = ..., - header: int | Sequence[int] | Literal["infer"] | None = ..., - names=..., - index_col=..., - usecols: ListLike | Callable | None = ..., - squeeze: bool | None = ..., - prefix: str | None = ..., - mangle_dupe_cols: bool = ..., - dtype: DtypeArg | None = ..., - engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., - skipinitialspace: bool = ..., - skiprows=..., - skipfooter: int = ..., - nrows: int | None = ..., - na_values=..., - keep_default_na: bool = ..., - na_filter: bool = ..., - verbose: bool = ..., - skip_blank_lines: bool = ..., - parse_dates=..., - infer_datetime_format: bool = ..., - keep_date_col: bool = ..., - date_parser=..., - dayfirst: bool = ..., - cache_dates: bool = ..., - iterator: Literal[False] = ..., - chunksize: None = ..., - compression: CompressionOptions = ..., - thousands: str | None = ..., - decimal: str = ..., - lineterminator: str | None = ..., - quotechar: str = ..., - quoting: int = ..., - doublequote: bool = ..., - escapechar: str | None = ..., - comment: str | None = ..., - encoding: str | None = ..., - encoding_errors: str | None = ..., - dialect=..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., - on_bad_lines=..., - delim_whitespace: bool = ..., - low_memory=..., - memory_map: bool = ..., - float_precision: Literal["high", "legacy"] | None = ..., - storage_options: StorageOptions | None = ..., -) -> DataFrame: ... - -# Unions -> DataFrame | TextFileReader -@overload -def read_csv( - filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], - *, - sep: str | None = ..., - delimiter: str | None = ..., - header: int | Sequence[int] | Literal["infer"] | None = ..., - names=..., - index_col=..., - usecols: ListLike | Callable | None = ..., - squeeze: bool | None = ..., - prefix: str | None = ..., - mangle_dupe_cols: bool = ..., - dtype: DtypeArg | None = ..., - engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., - skipinitialspace: bool = ..., - skiprows=..., - skipfooter: int = ..., - nrows: int | None = ..., - na_values=..., - keep_default_na: bool = ..., - na_filter: bool = ..., - verbose: bool = ..., - skip_blank_lines: bool = ..., - parse_dates=..., - infer_datetime_format: bool = ..., - keep_date_col: bool = ..., - date_parser=..., - dayfirst: bool = ..., - cache_dates: bool = ..., - iterator: bool = ..., - chunksize: int | None = ..., - compression: CompressionOptions = ..., - thousands: str | None = ..., - decimal: str = ..., - lineterminator: str | None = ..., - quotechar: str = ..., - quoting: int = ..., - doublequote: bool = ..., - escapechar: str | None = ..., - comment: str | None = ..., - encoding: str | None = ..., - encoding_errors: str | None = ..., - dialect=..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., - on_bad_lines=..., - delim_whitespace: bool = ..., - low_memory=..., - memory_map: bool = ..., - float_precision: Literal["high", "legacy"] | None = ..., - storage_options: StorageOptions | None = ..., -) -> DataFrame | TextFileReader: ... - -# iterator=True -> TextFileReader -@overload -def read_table( - filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], - *, - sep: str | None = ..., - delimiter: str | None = ..., - header: int | Sequence[int] | Literal["infer"] | None = ..., - names=..., - index_col=..., - usecols=..., - squeeze: bool | None = ..., - prefix: str | None = ..., - mangle_dupe_cols: bool = ..., - dtype: DtypeArg | None = ..., - engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., - skipinitialspace: bool = ..., - skiprows=..., - skipfooter: int = ..., - nrows: int | None = ..., - na_values=..., - keep_default_na: bool = ..., - na_filter: bool = ..., - verbose: bool = ..., - skip_blank_lines: bool = ..., - parse_dates=..., - infer_datetime_format: bool = ..., - keep_date_col: bool = ..., - date_parser=..., - dayfirst: bool = ..., - cache_dates: bool = ..., - iterator: Literal[True], - chunksize: int | None = ..., - compression: CompressionOptions = ..., - thousands: str | None = ..., - decimal: str = ..., - lineterminator: str | None = ..., - quotechar: str = ..., - quoting: int = ..., - doublequote: bool = ..., - escapechar: str | None = ..., - comment: str | None = ..., - encoding: str | None = ..., - encoding_errors: str | None = ..., - dialect=..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., - on_bad_lines=..., - delim_whitespace: bool = ..., - low_memory=..., - memory_map: bool = ..., - float_precision: Literal["high", "legacy"] | None = ..., - storage_options: StorageOptions | None = ..., -) -> TextFileReader: ... - -# chunksize=int -> TextFileReader -@overload -def read_table( - filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], - *, - sep: str | None = ..., - delimiter: str | None = ..., - header: int | Sequence[int] | Literal["infer"] | None = ..., - names=..., - index_col=..., - usecols=..., - squeeze: bool | None = ..., - prefix: str | None = ..., - mangle_dupe_cols: bool = ..., - dtype: DtypeArg | None = ..., - engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., - skipinitialspace: bool = ..., - skiprows=..., - skipfooter: int = ..., - nrows: int | None = ..., - na_values=..., - keep_default_na: bool = ..., - na_filter: bool = ..., - verbose: bool = ..., - skip_blank_lines: bool = ..., - parse_dates=..., - infer_datetime_format: bool = ..., - keep_date_col: bool = ..., - date_parser=..., - dayfirst: bool = ..., - cache_dates: bool = ..., - iterator: bool = ..., - chunksize: int, - compression: CompressionOptions = ..., - thousands: str | None = ..., - decimal: str = ..., - lineterminator: str | None = ..., - quotechar: str = ..., - quoting: int = ..., - doublequote: bool = ..., - escapechar: str | None = ..., - comment: str | None = ..., - encoding: str | None = ..., - encoding_errors: str | None = ..., - dialect=..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., - on_bad_lines=..., - delim_whitespace: bool = ..., - low_memory=..., - memory_map: bool = ..., - float_precision: Literal["high", "legacy"] | None = ..., - storage_options: StorageOptions | None = ..., -) -> TextFileReader: ... - -# default case -> DataFrame -@overload -def read_table( - filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], - *, - sep: str | None = ..., - delimiter: str | None = ..., - header: int | Sequence[int] | Literal["infer"] | None = ..., - names=..., - index_col=..., - usecols=..., - squeeze: bool | None = ..., - prefix: str | None = ..., - mangle_dupe_cols: bool = ..., - dtype: DtypeArg | None = ..., - engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., - skipinitialspace: bool = ..., - skiprows=..., - skipfooter: int = ..., - nrows: int | None = ..., - na_values=..., - keep_default_na: bool = ..., - na_filter: bool = ..., - verbose: bool = ..., - skip_blank_lines: bool = ..., - parse_dates=..., - infer_datetime_format: bool = ..., - keep_date_col: bool = ..., - date_parser=..., - dayfirst: bool = ..., - cache_dates: bool = ..., - iterator: Literal[False] = ..., - chunksize: None = ..., - compression: CompressionOptions = ..., - thousands: str | None = ..., - decimal: str = ..., - lineterminator: str | None = ..., - quotechar: str = ..., - quoting: int = ..., - doublequote: bool = ..., - escapechar: str | None = ..., - comment: str | None = ..., - encoding: str | None = ..., - encoding_errors: str | None = ..., - dialect=..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., - on_bad_lines=..., - delim_whitespace: bool = ..., - low_memory=..., - memory_map: bool = ..., - float_precision: Literal["high", "legacy"] | None = ..., - storage_options: StorageOptions | None = ..., -) -> DataFrame: ... - -# Unions -> DataFrame | TextFileReader -@overload -def read_table( - filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], - *, - sep: str | None = ..., - delimiter: str | None = ..., - header: int | Sequence[int] | Literal["infer"] | None = ..., - names=..., - index_col=..., - usecols=..., - squeeze: bool | None = ..., - prefix: str | None = ..., - mangle_dupe_cols: bool = ..., - dtype: DtypeArg | None = ..., - engine: CSVEngine | None = ..., - converters=..., - true_values=..., - false_values=..., - skipinitialspace: bool = ..., - skiprows=..., - skipfooter: int = ..., - nrows: int | None = ..., - na_values=..., - keep_default_na: bool = ..., - na_filter: bool = ..., - verbose: bool = ..., - skip_blank_lines: bool = ..., - parse_dates=..., - infer_datetime_format: bool = ..., - keep_date_col: bool = ..., - date_parser=..., - dayfirst: bool = ..., - cache_dates: bool = ..., - iterator: bool = ..., - chunksize: int | None = ..., - compression: CompressionOptions = ..., - thousands: str | None = ..., - decimal: str = ..., - lineterminator: str | None = ..., - quotechar: str = ..., - quoting: int = ..., - doublequote: bool = ..., - escapechar: str | None = ..., - comment: str | None = ..., - encoding: str | None = ..., - encoding_errors: str | None = ..., - dialect=..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., - on_bad_lines=..., - delim_whitespace: bool = ..., - low_memory=..., - memory_map: bool = ..., - float_precision: Literal["high", "legacy"] | None = ..., - storage_options: StorageOptions | None = ..., -) -> DataFrame | TextFileReader: ... -def read_fwf( - filepath_or_buffer: FilePathOrBuffer, - colspecs=..., - widths=..., - infer_nrows=..., - **kwds, -): ... - -class TextFileReader(abc.Iterator): - f = ... - orig_options = ... - engine = ... - chunksize = ... - nrows = ... - squeeze = ... - def __init__(self, f, engine=..., **kwds) -> None: ... - def close(self) -> None: ... - def __next__(self): ... - def __enter__(self) -> TextFileReader: ... - def __exit__(self, exc_type, exc_value, traceback) -> None: ... - def read(self, nrows=...): ... - def get_chunk(self, size=...): ... - -class ParserBase: - names = ... - orig_names = ... - prefix = ... - index_col = ... - unnamed_cols = ... - index_names = ... - col_names = ... - parse_dates = ... - date_parser = ... - dayfirst = ... - keep_date_col = ... - na_values = ... - na_fvalues = ... - na_filter = ... - keep_default_na = ... - true_values = ... - false_values = ... - mangle_dupe_cols = ... - infer_datetime_format = ... - cache_dates = ... - header = ... - handles = ... - def __init__(self, kwds) -> None: ... - def close(self) -> None: ... - -class CParserWrapper(ParserBase): - kwds = ... - unnamed_cols = ... - names = ... - orig_names = ... - index_names = ... - def __init__(self, src, **kwds) -> None: ... - def close(self) -> None: ... - def set_error_bad_lines(self, status) -> None: ... - def read(self, nrows=...): ... - -def TextParser(*args, **kwds): ... -def count_empty_vals(vals): ... - -class PythonParser(ParserBase): - data = ... - buf = ... - pos: int = ... - line_pos: int = ... - encoding = ... - compression = ... - memory_map = ... - skiprows = ... - skipfunc = ... - skipfooter = ... - delimiter = ... - quotechar = ... - escapechar = ... - doublequote = ... - skipinitialspace = ... - lineterminator = ... - quoting = ... - skip_blank_lines = ... - warn_bad_lines = ... - error_bad_lines = ... - names_passed = ... - has_index_names: bool = ... - verbose = ... - converters = ... - dtype = ... - thousands = ... - decimal = ... - comment = ... - num_original_columns = ... - columns = ... - orig_names = ... - index_names = ... - nonnum = ... - def __init__(self, f, **kwds): ... - def read(self, rows=...): ... - def get_chunk(self, size=...): ... - -class FixedWidthReader(abc.Iterator): - f = ... - buffer = ... - delimiter = ... - comment = ... - colspecs = ... - def __init__( - self, f, colspecs, delimiter, comment, skiprows=..., infer_nrows: int = ... - ) -> None: ... - def get_rows(self, infer_nrows, skiprows=...): ... - def detect_colspecs(self, infer_nrows: int = ..., skiprows=...): ... - def __next__(self): ... - -class FixedWidthFieldParser(PythonParser): - colspecs = ... - infer_nrows = ... - def __init__(self, f, **kwds) -> None: ... diff --git a/pandas-stubs/io/parsers/__init__.pyi b/pandas-stubs/io/parsers/__init__.pyi new file mode 100644 index 000000000..7b4412662 --- /dev/null +++ b/pandas-stubs/io/parsers/__init__.pyi @@ -0,0 +1,6 @@ +from pandas.io.parsers.readers import ( + TextFileReader as TextFileReader, + read_csv as read_csv, + read_fwf as read_fwf, + read_table as read_table, +) diff --git a/pandas-stubs/io/parsers/readers.pyi b/pandas-stubs/io/parsers/readers.pyi new file mode 100644 index 000000000..6c1fb327f --- /dev/null +++ b/pandas-stubs/io/parsers/readers.pyi @@ -0,0 +1,468 @@ +from collections import abc +import csv +from types import TracebackType +from typing import ( + Any, + Callable, + Literal, + Sequence, + overload, +) + +from pandas.core.frame import DataFrame +from pandas.core.indexes.base import Index +from pandas.core.series import Series + +import pandas._libs.lib as lib +from pandas._typing import ( + CompressionOptions, + CSVEngine, + CSVQuoting, + DtypeArg, + FilePath, + ReadCsvBuffer, + StorageOptions, + npt, +) + +from pandas.io.common import IOHandles + +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None = ..., + delimiter: str | None = ..., + header: int | Sequence[int] | Literal["infer"] | None = ..., + names: list[str] | None = ..., + index_col: int | str | Sequence[str | int] | Literal[False] | None = ..., + usecols: list[str] + | tuple[str, ...] + | Sequence[int] + | Series + | Index + | npt.NDArray + | Callable[[str], bool] + | None = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters: dict[int | str, Callable[[str], Any]] = ..., + true_values: list[str] = ..., + false_values: list[str] = ..., + skipinitialspace: bool = ..., + skiprows: int | Sequence[int] | Callable[[int], bool] = ..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values: Sequence[str] | dict[str, Sequence[str]] = ..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates: bool + | Sequence[int] + | list[str] + | Sequence[Sequence[int]] + | dict[str, Sequence[int]] = ..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser: Callable = ..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[True], + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: CSVQuoting = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect = ..., + on_bad_lines: Literal["error", "warn", "skip"] + | Callable[[list[str]], list[str] | None] = ..., + delim_whitespace: bool = ..., + low_memory: bool = ..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy", "round_trip"] | None = ..., + storage_options: StorageOptions | None = ..., +) -> TextFileReader: ... +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None = ..., + delimiter: str | None = ..., + header: int | Sequence[int] | Literal["infer"] | None = ..., + names: list[str] | None = ..., + index_col: int | str | Sequence[str | int] | Literal[False] | None = ..., + usecols: list[str] + | tuple[str, ...] + | Sequence[int] + | Series + | Index + | npt.NDArray + | Callable[[str], bool] + | None = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters: dict[int | str, Callable[[str], Any]] = ..., + true_values: list[str] = ..., + false_values: list[str] = ..., + skipinitialspace: bool = ..., + skiprows: int | Sequence[int] | Callable[[int], bool] = ..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values: Sequence[str] | dict[str, Sequence[str]] = ..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates: bool + | Sequence[int] + | list[str] + | Sequence[Sequence[int]] + | dict[str, Sequence[int]] = ..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser: Callable = ..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool = ..., + chunksize: int, + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: CSVQuoting = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect = ..., + on_bad_lines: Literal["error", "warn", "skip"] + | Callable[[list[str]], list[str] | None] = ..., + delim_whitespace: bool = ..., + low_memory: bool = ..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy", "round_trip"] | None = ..., + storage_options: StorageOptions | None = ..., +) -> TextFileReader: ... +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None = ..., + delimiter: str | None = ..., + header: int | Sequence[int] | Literal["infer"] | None = ..., + names: list[str] | None = ..., + index_col: int | str | Sequence[str | int] | Literal[False] | None = ..., + usecols: list[str] + | tuple[str, ...] + | Sequence[int] + | Series + | Index + | npt.NDArray + | Callable[[str], bool] + | None = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters: dict[int | str, Callable[[str], Any]] = ..., + true_values: list[str] = ..., + false_values: list[str] = ..., + skipinitialspace: bool = ..., + skiprows: int | Sequence[int] | Callable[[int], bool] = ..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values: Sequence[str] | dict[str, Sequence[str]] = ..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates: bool + | Sequence[int] + | list[str] + | Sequence[Sequence[int]] + | dict[str, Sequence[int]] = ..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser: Callable = ..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[False] = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: CSVQuoting = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect = ..., + on_bad_lines: Literal["error", "warn", "skip"] + | Callable[[list[str]], list[str] | None] = ..., + delim_whitespace: bool = ..., + low_memory: bool = ..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy", "round_trip"] | None = ..., + storage_options: StorageOptions | None = ..., +) -> DataFrame: ... +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None = ..., + delimiter: str | None = ..., + header: int | Sequence[int] | Literal["infer"] | None = ..., + names: list[str] | None = ..., + index_col: int | str | Sequence[str | int] | Literal[False] | None = ..., + usecols: list[str] + | tuple[str, ...] + | Sequence[int] + | Series + | Index + | npt.NDArray + | Callable[[str], bool] + | None = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters: dict[int | str, Callable[[str], Any]] = ..., + true_values: list[str] = ..., + false_values: list[str] = ..., + skipinitialspace: bool = ..., + skiprows: int | Sequence[int] | Callable[[int], bool] = ..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values: Sequence[str] | dict[str, Sequence[str]] = ..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates: bool + | Sequence[int] + | list[str] + | Sequence[Sequence[int]] + | dict[str, Sequence[int]] = ..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser: Callable = ..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[True], + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: CSVQuoting = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect = ..., + on_bad_lines: Literal["error", "warn", "skip"] + | Callable[[list[str]], list[str] | None] = ..., + delim_whitespace: bool = ..., + low_memory: bool = ..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy", "round_trip"] | None = ..., + storage_options: StorageOptions | None = ..., +) -> TextFileReader: ... +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None = ..., + delimiter: str | None = ..., + header: int | Sequence[int] | Literal["infer"] | None = ..., + names: list[str] | None = ..., + index_col: int | str | Sequence[str | int] | Literal[False] | None = ..., + usecols: list[str] + | tuple[str, ...] + | Sequence[int] + | Series + | Index + | npt.NDArray + | Callable[[str], bool] + | None = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters: dict[int | str, Callable[[str], Any]] = ..., + true_values: list[str] = ..., + false_values: list[str] = ..., + skipinitialspace: bool = ..., + skiprows: int | Sequence[int] | Callable[[int], bool] = ..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values: Sequence[str] | dict[str, Sequence[str]] = ..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates: bool + | Sequence[int] + | list[str] + | Sequence[Sequence[int]] + | dict[str, Sequence[int]] = ..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser: Callable = ..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool = ..., + chunksize: int, + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: CSVQuoting = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect = ..., + on_bad_lines: Literal["error", "warn", "skip"] + | Callable[[list[str]], list[str] | None] = ..., + delim_whitespace: bool = ..., + low_memory: bool = ..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy", "round_trip"] | None = ..., + storage_options: StorageOptions | None = ..., +) -> TextFileReader: ... +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None = ..., + delimiter: str | None = ..., + header: int | Sequence[int] | Literal["infer"] | None = ..., + names: list[str] | None = ..., + index_col: int | str | Sequence[str | int] | Literal[False] | None = ..., + usecols: list[str] + | tuple[str, ...] + | Sequence[int] + | Series + | Index + | npt.NDArray + | Callable[[str], bool] + | None = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters: dict[int | str, Callable[[str], Any]] = ..., + true_values: list[str] = ..., + false_values: list[str] = ..., + skipinitialspace: bool = ..., + skiprows: int | Sequence[int] | Callable[[int], bool] = ..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values: Sequence[str] | dict[str, Sequence[str]] = ..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates: bool + | Sequence[int] + | list[str] + | Sequence[Sequence[int]] + | dict[str, Sequence[int]] = ..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser: Callable = ..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[False] = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: CSVQuoting = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect = ..., + on_bad_lines: Literal["error", "warn", "skip"] + | Callable[[list[str]], list[str] | None] = ..., + delim_whitespace: bool = ..., + low_memory: bool = ..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy", "round_trip"] | None = ..., + storage_options: StorageOptions | None = ..., +) -> DataFrame: ... +@overload +def read_fwf( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ..., + widths: Sequence[int] | None = ..., + infer_nrows: int = ..., + *, + iterator: Literal[True], + chunksize: int | None = ..., + **kwds: Any, +) -> TextFileReader: ... +@overload +def read_fwf( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ..., + widths: Sequence[int] | None = ..., + infer_nrows: int = ..., + *, + iterator: bool = ..., + chunksize: int, + **kwds: Any, +) -> TextFileReader: ... +@overload +def read_fwf( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ..., + widths: Sequence[int] | None = ..., + infer_nrows: int = ..., + *, + iterator: Literal[False] = ..., + chunksize: None = ..., + **kwds: Any, +) -> DataFrame: ... + +class TextFileReader(abc.Iterator): + engine: CSVEngine + orig_options: dict[str, Any] + chunksize: int | None + nrows: int | None + squeeze: bool + handles: IOHandles | None + def __init__( + self, + f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str] | list, + engine: CSVEngine | None = ..., + **kwds: Any, + ) -> None: ... + def close(self) -> None: ... + def read(self, nrows: int | None = ...) -> DataFrame: ... + def get_chunk(self, size: int | None = ...) -> DataFrame: ... + def __next__(self) -> DataFrame: ... + def __enter__(self) -> TextFileReader: ... + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: ... diff --git a/tests/test_io.py b/tests/test_io.py index 9fb7c175f..478add136 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,3 +1,4 @@ +import csv import io import os.path import pathlib @@ -11,6 +12,7 @@ Union, ) +import numpy as np from packaging.version import parse import pandas as pd from pandas import ( @@ -19,8 +21,10 @@ Series, __version__, read_clipboard, + read_csv, read_excel, read_feather, + read_fwf, read_hdf, read_html, read_json, @@ -32,6 +36,7 @@ read_sql_query, read_sql_table, read_stata, + read_table, read_xml, ) from pandas._testing import ensure_clean @@ -356,6 +361,143 @@ def test_feather(): check(assert_type(read_feather(bio), DataFrame), DataFrame) +def test_read_csv(): + with ensure_clean() as path: + check(assert_type(DF.to_csv(path), None), type(None)) + check(assert_type(read_csv(path), DataFrame), DataFrame) + with open(path) as csv_file: + check(assert_type(read_csv(csv_file), DataFrame), DataFrame) + with open(path) as csv_file: + sio = io.StringIO(csv_file.read()) + check(assert_type(read_csv(sio), DataFrame), DataFrame) + check(assert_type(read_csv(path, iterator=False), DataFrame), DataFrame) + check(assert_type(read_csv(path, chunksize=None), DataFrame), DataFrame) + + +def test_read_csv_iterator(): + with ensure_clean() as path: + check(assert_type(DF.to_csv(path), None), type(None)) + tfr = read_csv(path, iterator=True) + check(assert_type(tfr, TextFileReader), TextFileReader) + tfr.close() + tfr2 = read_csv(pathlib.Path(path), chunksize=1) + check( + assert_type(tfr2, TextFileReader), + TextFileReader, + ) + tfr2.close() + + +def test_types_read_csv() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + csv_df: str = df.to_csv() + + with ensure_clean() as path: + df.to_csv(path) + df2: pd.DataFrame = pd.read_csv(path) + df3: pd.DataFrame = pd.read_csv(path, sep="a") + df4: pd.DataFrame = pd.read_csv( + path, + header=None, + ) + df5: pd.DataFrame = pd.read_csv( + path, engine="python", true_values=["no", "No", "NO"], na_filter=False + ) + df6: pd.DataFrame = pd.read_csv( + path, + skiprows=lambda x: x in [0, 2], + skip_blank_lines=True, + dayfirst=False, + ) + df7: pd.DataFrame = pd.read_csv(path, nrows=2) + df8: pd.DataFrame = pd.read_csv(path, dtype={"a": float, "b": int}) + df9: pd.DataFrame = pd.read_csv(path, usecols=["col1"]) + df10: pd.DataFrame = pd.read_csv(path, usecols=[0]) + df11: pd.DataFrame = pd.read_csv(path, usecols=np.array([0])) + df12: pd.DataFrame = pd.read_csv(path, usecols=("col1",)) + df13: pd.DataFrame = pd.read_csv(path, usecols=pd.Series(data=["col1"])) + + tfr1: TextFileReader = pd.read_csv(path, nrows=2, iterator=True, chunksize=3) + tfr1.close() + + tfr2: TextFileReader = pd.read_csv(path, nrows=2, chunksize=1) + tfr2.close() + + tfr3: TextFileReader = pd.read_csv(path, nrows=2, iterator=False, chunksize=1) + tfr3.close() + + tfr4: TextFileReader = pd.read_csv(path, nrows=2, iterator=True) + tfr4.close() + + +def test_read_table(): + with ensure_clean() as path: + check(assert_type(DF.to_csv(path, sep="\t"), None), type(None)) + check(assert_type(read_table(path), DataFrame), DataFrame) + check(assert_type(read_table(path, iterator=False), DataFrame), DataFrame) + check(assert_type(read_table(path, chunksize=None), DataFrame), DataFrame) + + +def test_read_table_iterator(): + with ensure_clean() as path: + check(assert_type(DF.to_csv(path, sep="\t"), None), type(None)) + tfr = read_table(path, iterator=True) + check(assert_type(tfr, TextFileReader), TextFileReader) + tfr.close() + tfr2 = read_table(path, chunksize=1) + check(assert_type(tfr2, TextFileReader), TextFileReader) + tfr2.close() + + +def btest_read_fwf(): + with ensure_clean() as path: + DF.to_string(path, index=False) + check(assert_type(read_fwf(path), DataFrame), DataFrame) + check(assert_type(read_fwf(pathlib.Path(path)), DataFrame), DataFrame) + + with open(path) as fwf_file: + check( + assert_type(read_fwf(fwf_file), DataFrame), + DataFrame, + ) + with open(path) as fwf_file: + sio = io.StringIO(fwf_file.read()) + check(assert_type(read_fwf(sio), DataFrame), DataFrame) + with open(path, "rb") as fwf_file: + bio = io.BytesIO(fwf_file.read()) + check(assert_type(read_fwf(bio), DataFrame), DataFrame) + fwf_iterator = read_fwf(path, iterator=True) + check(assert_type(fwf_iterator, TextFileReader), TextFileReader) + fwf_iterator.close() + fwf_iterator2 = read_fwf(path, chunksize=1) + check(assert_type(fwf_iterator2, TextFileReader), TextFileReader) + fwf_iterator.close() + + +def test_text_file_reader(): + with ensure_clean() as path: + DF.to_string(path, index=False) + tfr = TextFileReader(path, engine="python") + check(assert_type(tfr, TextFileReader), TextFileReader) + check(assert_type(tfr.read(1), DataFrame), DataFrame) + check(assert_type(tfr.close(), None), type(None)) + with TextFileReader(path, engine="python") as tfr: + check(assert_type(tfr.read(1), DataFrame), DataFrame) + with TextFileReader(path, engine="python") as tfr: + check(assert_type(tfr.__next__(), DataFrame), DataFrame) + df_iter: DataFrame + for df_iter in tfr: + check(df_iter, DataFrame) + + +def test_to_csv_series(): + s: Series + s = DF.iloc[:, 0] + check(assert_type(s.to_csv(), str), str) + with ensure_clean() as path: + check(assert_type(s.to_csv(path), None), type(None)) + + def test_read_excel() -> None: with ensure_clean(".xlsx") as path: # https://github.com/pandas-dev/pandas-stubs/pull/33 @@ -550,3 +692,13 @@ def test_read_html(): with ensure_clean() as path: check(assert_type(DF.to_html(path), None), type(None)) check(assert_type(read_html(path), List[DataFrame]), list) + + +def test_csv_quoting(): + with ensure_clean() as path: + check(assert_type(DF.to_csv(path, quoting=csv.QUOTE_ALL), None), type(None)) + check(assert_type(DF.to_csv(path, quoting=csv.QUOTE_NONE), None), type(None)) + check( + assert_type(DF.to_csv(path, quoting=csv.QUOTE_NONNUMERIC), None), type(None) + ) + check(assert_type(DF.to_csv(path, quoting=csv.QUOTE_MINIMAL), None), type(None)) diff --git a/tests/test_pandas.py b/tests/test_pandas.py index e7817648b..d0a949812 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -8,15 +8,11 @@ import numpy as np import pandas as pd -from pandas._testing import ensure_clean from pandas.api.extensions import ExtensionArray -import pytest from typing_extensions import assert_type from tests import check -from pandas.io.parsers import TextFileReader - def test_types_to_datetime() -> None: df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) @@ -115,54 +111,6 @@ def test_types_json_normalize() -> None: df5: pd.DataFrame = pd.json_normalize(data=data2) -def test_types_read_csv() -> None: - df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - csv_df: str = df.to_csv() - - with ensure_clean() as path: - df.to_csv(path) - df2: pd.DataFrame = pd.read_csv(path) - with pytest.warns(FutureWarning, match="The squeeze argument"): - df3: pd.DataFrame = pd.read_csv(path, sep="a", squeeze=False) - with pytest.warns(FutureWarning, match="The prefix argument has been"): - df4: pd.DataFrame = pd.read_csv( - path, - header=None, - prefix="b", - mangle_dupe_cols=True, - keep_default_na=False, - ) - df5: pd.DataFrame = pd.read_csv( - path, engine="python", true_values=[0, 1, 3], na_filter=False - ) - df6: pd.DataFrame = pd.read_csv( - path, - skiprows=lambda x: x in [0, 2], - skip_blank_lines=True, - dayfirst=False, - ) - df7: pd.DataFrame = pd.read_csv(path, nrows=2) - df8: pd.DataFrame = pd.read_csv(path, dtype={"a": float, "b": int}) - df9: pd.DataFrame = pd.read_csv(path, usecols=["col1"]) - df10: pd.DataFrame = pd.read_csv(path, usecols={"col1"}) - df11: pd.DataFrame = pd.read_csv(path, usecols=[0]) - df12: pd.DataFrame = pd.read_csv(path, usecols=np.array([0])) - df13: pd.DataFrame = pd.read_csv(path, usecols=("col1",)) - df14: pd.DataFrame = pd.read_csv(path, usecols=pd.Series(data=["col1"])) - - tfr1: TextFileReader = pd.read_csv(path, nrows=2, iterator=True, chunksize=3) - tfr1.close() - - tfr2: TextFileReader = pd.read_csv(path, nrows=2, chunksize=1) - tfr2.close() - - tfr3: TextFileReader = pd.read_csv(path, nrows=2, iterator=False, chunksize=1) - tfr3.close() - - tfr4: TextFileReader = pd.read_csv(path, nrows=2, iterator=True) - tfr4.close() - - def test_isna() -> None: s = pd.Series([1, np.nan, 3.2]) check(assert_type(pd.isna(s), "pd.Series[bool]"), pd.Series, bool)