From 424ffd335b3e3287a25cbe28aaa3395de4f6340c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 23 Jul 2022 16:42:14 -0400 Subject: [PATCH] TYP: pandas/io annotations from pandas-stubs --- pandas/errors/__init__.py | 2 +- pandas/io/excel/_base.py | 53 +++++--- pandas/io/formats/excel.py | 6 +- pandas/io/formats/format.py | 5 +- pandas/io/formats/html.py | 3 +- pandas/io/formats/style.py | 2 +- pandas/io/html.py | 3 +- pandas/io/json/_json.py | 64 +++++----- pandas/io/parquet.py | 4 +- pandas/io/pytables.py | 54 ++++---- pandas/io/sas/sas_constants.py | 218 +++++++++++++++++---------------- pandas/io/sql.py | 73 ++++++----- pandas/io/stata.py | 21 ++-- pandas/util/_validators.py | 2 +- 14 files changed, 270 insertions(+), 240 deletions(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 08ee5650e97a6..e3f7e9d454383 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -189,7 +189,7 @@ class AbstractMethodError(NotImplementedError): while keeping compatibility with Python 2 and Python 3. """ - def __init__(self, class_instance, methodtype="method") -> None: + def __init__(self, class_instance, methodtype: str = "method") -> None: types = {"method", "classmethod", "staticmethod", "property"} if methodtype not in types: raise ValueError( diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index a0abddc82e6c8..44152f100d390 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -359,13 +359,18 @@ def read_excel( # sheet name is str or int -> DataFrame sheet_name: str | int, header: int | Sequence[int] | None = ..., - names=..., + names: list[str] | None = ..., index_col: int | Sequence[int] | None = ..., - usecols=..., + usecols: int + | str + | Sequence[int] + | Sequence[str] + | Callable[[str], bool] + | None = ..., squeeze: bool | None = ..., dtype: DtypeArg | None = ..., engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ..., - converters=..., + converters: dict[str, Callable] | dict[int, Callable] | None = ..., true_values: Iterable[Hashable] | None = ..., false_values: Iterable[Hashable] | None = ..., skiprows: Sequence[int] | int | Callable[[int], object] | None = ..., @@ -374,8 +379,8 @@ def read_excel( keep_default_na: bool = ..., na_filter: bool = ..., verbose: bool = ..., - parse_dates=..., - date_parser=..., + parse_dates: list | dict | bool = ..., + date_parser: Callable | None = ..., thousands: str | None = ..., decimal: str = ..., comment: str | None = ..., @@ -393,13 +398,18 @@ def read_excel( # sheet name is list or None -> dict[IntStrT, DataFrame] sheet_name: list[IntStrT] | None, header: int | Sequence[int] | None = ..., - names=..., + names: list[str] | None = ..., index_col: int | Sequence[int] | None = ..., - usecols=..., + usecols: int + | str + | Sequence[int] + | Sequence[str] + | Callable[[str], bool] + | None = ..., squeeze: bool | None = ..., dtype: DtypeArg | None = ..., engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ..., - converters=..., + converters: dict[str, Callable] | dict[int, Callable] | None = ..., true_values: Iterable[Hashable] | None = ..., false_values: Iterable[Hashable] | None = ..., skiprows: Sequence[int] | int | Callable[[int], object] | None = ..., @@ -408,8 +418,8 @@ def read_excel( keep_default_na: bool = ..., na_filter: bool = ..., verbose: bool = ..., - parse_dates=..., - date_parser=..., + parse_dates: list | dict | bool = ..., + date_parser: Callable | None = ..., thousands: str | None = ..., decimal: str = ..., comment: str | None = ..., @@ -428,13 +438,18 @@ def read_excel( io, sheet_name: str | int | list[IntStrT] | None = 0, header: int | Sequence[int] | None = 0, - names=None, + names: list[str] | None = None, index_col: int | Sequence[int] | None = None, - usecols=None, + usecols: int + | str + | Sequence[int] + | Sequence[str] + | Callable[[str], bool] + | None = None, squeeze: bool | None = None, dtype: DtypeArg | None = None, engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = None, - converters=None, + converters: dict[str, Callable] | dict[int, Callable] | None = None, true_values: Iterable[Hashable] | None = None, false_values: Iterable[Hashable] | None = None, skiprows: Sequence[int] | int | Callable[[int], object] | None = None, @@ -443,8 +458,8 @@ def read_excel( keep_default_na: bool = True, na_filter: bool = True, verbose: bool = False, - parse_dates=False, - date_parser=None, + parse_dates: list | dict | bool = False, + date_parser: Callable | None = None, thousands: str | None = None, decimal: str = ".", comment: str | None = None, @@ -687,8 +702,8 @@ def parse( nrows: int | None = None, na_values=None, verbose: bool = False, - parse_dates=False, - date_parser=None, + parse_dates: list | dict | bool = False, + date_parser: Callable | None = None, thousands: str | None = None, decimal: str = ".", comment: str | None = None, @@ -1665,8 +1680,8 @@ def parse( skiprows: Sequence[int] | int | Callable[[int], object] | None = None, nrows: int | None = None, na_values=None, - parse_dates=False, - date_parser=None, + parse_dates: list | dict | bool = False, + date_parser: Callable | None = None, thousands: str | None = None, comment: str | None = None, skipfooter: int = 0, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 811b079c3c693..3f9f6c7a5fee7 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -853,9 +853,9 @@ def get_formatted_cells(self) -> Iterable[ExcelCell]: def write( self, writer, - sheet_name="Sheet1", - startrow=0, - startcol=0, + sheet_name: str = "Sheet1", + startrow: int = 0, + startcol: int = 0, freeze_panes=None, engine=None, storage_options: StorageOptions = None, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6554b4c1f1afd..57bc534bd67c4 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -20,6 +20,7 @@ TYPE_CHECKING, Any, Callable, + Final, Hashable, Iterable, Iterator, @@ -117,7 +118,7 @@ ) -common_docstring = """ +common_docstring: Final = """ Parameters ---------- buf : str, Path or StringIO-like, optional, default None @@ -190,7 +191,7 @@ "unset", ) -return_docstring = """ +return_docstring: Final = """ Returns ------- str or None diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 163e7dc7bde5e..e161c8ad16ab1 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -6,6 +6,7 @@ from textwrap import dedent from typing import ( Any, + Final, Hashable, Iterable, Mapping, @@ -39,7 +40,7 @@ class HTMLFormatter: and this class responsible for only producing html markup. """ - indent_delta = 2 + indent_delta: Final = 2 def __init__( self, diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index fbee64771cd9a..a557f9b5c0a0d 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -3332,7 +3332,7 @@ def highlight_null( color: str | None = None, subset: Subset | None = None, props: str | None = None, - null_color=lib.no_default, + null_color: str | lib.NoDefault = lib.no_default, ) -> Styler: """ Highlight missing values with a style. diff --git a/pandas/io/html.py b/pandas/io/html.py index ad92bb2447329..ad92883fe8572 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -10,6 +10,7 @@ import numbers import re from typing import ( + Iterable, Pattern, Sequence, cast, @@ -971,7 +972,7 @@ def read_html( encoding: str | None = None, decimal: str = ".", converters: dict | None = None, - na_values=None, + na_values: Iterable[object] | None = None, keep_default_na: bool = True, displayed_only: bool = True, ) -> list[DataFrame]: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index c617828c91bd4..d40b0357049a1 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -365,16 +365,16 @@ def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: def read_json( path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], *, - orient=..., + orient: str | None = ..., typ: Literal["frame"] = ..., dtype: DtypeArg | None = ..., convert_axes=..., - convert_dates=..., + convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., numpy: bool = ..., precise_float: bool = ..., - date_unit=..., - encoding=..., + date_unit: str | None = ..., + encoding: str | None = ..., encoding_errors: str | None = ..., lines: bool = ..., chunksize: int, @@ -389,16 +389,16 @@ def read_json( def read_json( path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], *, - orient=..., + orient: str | None = ..., typ: Literal["series"], dtype: DtypeArg | None = ..., convert_axes=..., - convert_dates=..., + convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., numpy: bool = ..., precise_float: bool = ..., - date_unit=..., - encoding=..., + date_unit: str | None = ..., + encoding: str | None = ..., encoding_errors: str | None = ..., lines: bool = ..., chunksize: int, @@ -413,16 +413,16 @@ def read_json( def read_json( path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], *, - orient=..., + orient: str | None = ..., typ: Literal["series"], dtype: DtypeArg | None = ..., convert_axes=..., - convert_dates=..., + convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., numpy: bool = ..., precise_float: bool = ..., - date_unit=..., - encoding=..., + date_unit: str | None = ..., + encoding: str | None = ..., encoding_errors: str | None = ..., lines: bool = ..., chunksize: None = ..., @@ -436,16 +436,16 @@ def read_json( @overload def read_json( path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], - orient=..., + orient: str | None = ..., typ: Literal["frame"] = ..., dtype: DtypeArg | None = ..., convert_axes=..., - convert_dates=..., + convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., numpy: bool = ..., precise_float: bool = ..., - date_unit=..., - encoding=..., + date_unit: str | None = ..., + encoding: str | None = ..., encoding_errors: str | None = ..., lines: bool = ..., chunksize: None = ..., @@ -464,16 +464,16 @@ def read_json( @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["path_or_buf"]) def read_json( path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], - orient=None, + orient: str | None = None, typ: Literal["frame", "series"] = "frame", dtype: DtypeArg | None = None, convert_axes=None, - convert_dates=True, + convert_dates: bool | list[str] = True, keep_default_dates: bool = True, numpy: bool = False, precise_float: bool = False, - date_unit=None, - encoding=None, + date_unit: str | None = None, + encoding: str | None = None, encoding_errors: str | None = "strict", lines: bool = False, chunksize: int | None = None, @@ -1009,11 +1009,11 @@ def __init__( json, orient, dtype: DtypeArg | None = None, - convert_axes=True, - convert_dates=True, - keep_default_dates=False, - numpy=False, - precise_float=False, + convert_axes: bool = True, + convert_dates: bool | list[str] = True, + keep_default_dates: bool = False, + numpy: bool = False, + precise_float: bool = False, date_unit=None, ) -> None: self.json = json @@ -1093,7 +1093,11 @@ def _try_convert_types(self): raise AbstractMethodError(self) def _try_convert_data( - self, name, data, use_dtypes: bool = True, convert_dates: bool = True + self, + name, + data, + use_dtypes: bool = True, + convert_dates: bool | list[str] = True, ): """ Try to parse a ndarray like into a column by inferring dtype. @@ -1375,10 +1379,10 @@ def _try_convert_dates(self): return # our columns to parse - convert_dates = self.convert_dates - if convert_dates is True: - convert_dates = [] - convert_dates = set(convert_dates) + convert_dates_list_bool = self.convert_dates + if isinstance(convert_dates_list_bool, bool): + convert_dates_list_bool = [] + convert_dates = set(convert_dates_list_bool) def is_ok(col) -> bool: """ diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index ed0e0a99ec43b..378b158d1b0bb 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -444,9 +444,9 @@ def to_parquet( @doc(storage_options=_shared_docs["storage_options"]) def read_parquet( - path, + path: FilePath | ReadBuffer[bytes], engine: str = "auto", - columns=None, + columns: list[str] | None = None, storage_options: StorageOptions = None, use_nullable_dtypes: bool = False, **kwargs, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 52a2883e70f93..5c773a424a1c9 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -18,6 +18,7 @@ TYPE_CHECKING, Any, Callable, + Final, Hashable, Iterator, Literal, @@ -43,6 +44,7 @@ AnyArrayLike, ArrayLike, DtypeArg, + FilePath, Shape, npt, ) @@ -175,18 +177,18 @@ def _ensure_term(where, scope_level: int): return where if where is None or len(where) else None -incompatibility_doc = """ +incompatibility_doc: Final = """ where criteria is being ignored as this version [%s] is too old (or not-defined), read the file in and write it out to a new file to upgrade (with the copy_to method) """ -attribute_conflict_doc = """ +attribute_conflict_doc: Final = """ the [%s] attribute of the existing index is [%s] which conflicts with the new [%s], resetting the attribute to None """ -performance_doc = """ +performance_doc: Final = """ your performance may suffer as PyTables will pickle object types that it cannot map directly to c-types [inferred_type->%s,key->%s] [items->%s] """ @@ -198,11 +200,11 @@ def _ensure_term(where, scope_level: int): _AXES_MAP = {DataFrame: [0]} # register our configuration options -dropna_doc = """ +dropna_doc: Final = """ : boolean drop ALL nan rows when appending to a table """ -format_doc = """ +format_doc: Final = """ : format default format writing format, if None, then put will default to 'fixed' and append will default to 'table' @@ -245,7 +247,7 @@ def _tables(): def to_hdf( - path_or_buf, + path_or_buf: FilePath | HDFStore, key: str, value: DataFrame | Series, mode: str = "a", @@ -301,15 +303,15 @@ def to_hdf( def read_hdf( - path_or_buf, + path_or_buf: FilePath | HDFStore, key=None, mode: str = "r", errors: str = "strict", - where=None, + where: str | list | None = None, start: int | None = None, stop: int | None = None, - columns=None, - iterator=False, + columns: list[str] | None = None, + iterator: bool = False, chunksize: int | None = None, **kwargs, ): @@ -669,7 +671,7 @@ def keys(self, include: str = "pandas") -> list[str]: def __iter__(self) -> Iterator[str]: return iter(self.keys()) - def items(self): + def items(self) -> Iterator[tuple[str, list]]: """ iterate on key->group """ @@ -1415,7 +1417,7 @@ def create_table_index( raise TypeError("cannot create table index on a Fixed format store") s.create_index(columns=columns, optlevel=optlevel, kind=kind) - def groups(self): + def groups(self) -> list: """ Return a list of all the top-level nodes. @@ -1443,7 +1445,7 @@ def groups(self): ) ] - def walk(self, where="/"): + def walk(self, where: str = "/") -> Iterator[tuple[str, list[str], list[str]]]: """ Walk the pytables group hierarchy for pandas objects. @@ -1959,8 +1961,8 @@ class IndexCol: """ - is_an_indexable = True - is_data_indexable = True + is_an_indexable: bool = True + is_data_indexable: bool = True _info_fields = ["freq", "tz", "index_name"] name: str @@ -2612,7 +2614,7 @@ class Fixed: parent: HDFStore group: Node errors: str - is_table = False + is_table: bool = False def __init__( self, @@ -2863,7 +2865,8 @@ def get_attrs(self) -> None: for n in self.attributes: setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None))) - def write(self, obj, **kwargs): + # error: Signature of "write" incompatible with supertype "Fixed" + def write(self, obj, **kwargs) -> None: # type: ignore[override] self.set_attrs() def read_array(self, key: str, start: int | None = None, stop: int | None = None): @@ -3148,7 +3151,8 @@ def read( values = self.read_array("values", start=start, stop=stop) return Series(values, index=index, name=self.name) - def write(self, obj, **kwargs): + # error: Signature of "write" incompatible with supertype "Fixed" + def write(self, obj, **kwargs) -> None: # type: ignore[override] super().write(obj, **kwargs) self.write_index("index", obj.index) self.write_array("values", obj) @@ -3224,7 +3228,8 @@ def read( return DataFrame(columns=axes[0], index=axes[1]) - def write(self, obj, **kwargs): + # error: Signature of "write" incompatible with supertype "Fixed" + def write(self, obj, **kwargs) -> None: # type: ignore[override] super().write(obj, **kwargs) # TODO(ArrayManager) HDFStore relies on accessing the blocks @@ -4272,7 +4277,7 @@ def read( """ raise NotImplementedError("WORMTable needs to implement read") - def write(self, **kwargs): + def write(self, **kwargs) -> None: """ write in a format that we can search later on (but cannot append to): write out the indices and the values using _write_array @@ -4286,22 +4291,23 @@ class AppendableTable(Table): table_type = "appendable" - def write( + # error: Signature of "write" incompatible with supertype "Fixed" + def write( # type: ignore[override] self, obj, axes=None, - append=False, + append: bool = False, complib=None, complevel=None, fletcher32=None, min_itemsize=None, chunksize=None, expectedrows=None, - dropna=False, + dropna: bool = False, nan_rep=None, data_columns=None, track_times=True, - ): + ) -> None: if not append and self.is_exists: self._handle.remove_node(self.group, "table") diff --git a/pandas/io/sas/sas_constants.py b/pandas/io/sas/sas_constants.py index 366e6924a1e16..69bc16e6d294f 100644 --- a/pandas/io/sas/sas_constants.py +++ b/pandas/io/sas/sas_constants.py @@ -1,112 +1,114 @@ from __future__ import annotations -magic = ( +from typing import Final + +magic: Final = ( b"\x00\x00\x00\x00\x00\x00\x00\x00" + b"\x00\x00\x00\x00\xc2\xea\x81\x60" + b"\xb3\x14\x11\xcf\xbd\x92\x08\x00" + b"\x09\xc7\x31\x8c\x18\x1f\x10\x11" ) -align_1_checker_value = b"3" -align_1_offset = 32 -align_1_length = 1 -align_1_value = 4 -u64_byte_checker_value = b"3" -align_2_offset = 35 -align_2_length = 1 -align_2_value = 4 -endianness_offset = 37 -endianness_length = 1 -platform_offset = 39 -platform_length = 1 -encoding_offset = 70 -encoding_length = 1 -dataset_offset = 92 -dataset_length = 64 -file_type_offset = 156 -file_type_length = 8 -date_created_offset = 164 -date_created_length = 8 -date_modified_offset = 172 -date_modified_length = 8 -header_size_offset = 196 -header_size_length = 4 -page_size_offset = 200 -page_size_length = 4 -page_count_offset = 204 -page_count_length = 4 -sas_release_offset = 216 -sas_release_length = 8 -sas_server_type_offset = 224 -sas_server_type_length = 16 -os_version_number_offset = 240 -os_version_number_length = 16 -os_maker_offset = 256 -os_maker_length = 16 -os_name_offset = 272 -os_name_length = 16 -page_bit_offset_x86 = 16 -page_bit_offset_x64 = 32 -subheader_pointer_length_x86 = 12 -subheader_pointer_length_x64 = 24 -page_type_offset = 0 -page_type_length = 2 -block_count_offset = 2 -block_count_length = 2 -subheader_count_offset = 4 -subheader_count_length = 2 -page_type_mask = 0x0F00 +align_1_checker_value: Final = b"3" +align_1_offset: Final = 32 +align_1_length: Final = 1 +align_1_value: Final = 4 +u64_byte_checker_value: Final = b"3" +align_2_offset: Final = 35 +align_2_length: Final = 1 +align_2_value: Final = 4 +endianness_offset: Final = 37 +endianness_length: Final = 1 +platform_offset: Final = 39 +platform_length: Final = 1 +encoding_offset: Final = 70 +encoding_length: Final = 1 +dataset_offset: Final = 92 +dataset_length: Final = 64 +file_type_offset: Final = 156 +file_type_length: Final = 8 +date_created_offset: Final = 164 +date_created_length: Final = 8 +date_modified_offset: Final = 172 +date_modified_length: Final = 8 +header_size_offset: Final = 196 +header_size_length: Final = 4 +page_size_offset: Final = 200 +page_size_length: Final = 4 +page_count_offset: Final = 204 +page_count_length: Final = 4 +sas_release_offset: Final = 216 +sas_release_length: Final = 8 +sas_server_type_offset: Final = 224 +sas_server_type_length: Final = 16 +os_version_number_offset: Final = 240 +os_version_number_length: Final = 16 +os_maker_offset: Final = 256 +os_maker_length: Final = 16 +os_name_offset: Final = 272 +os_name_length: Final = 16 +page_bit_offset_x86: Final = 16 +page_bit_offset_x64: Final = 32 +subheader_pointer_length_x86: Final = 12 +subheader_pointer_length_x64: Final = 24 +page_type_offset: Final = 0 +page_type_length: Final = 2 +block_count_offset: Final = 2 +block_count_length: Final = 2 +subheader_count_offset: Final = 4 +subheader_count_length: Final = 2 +page_type_mask: Final = 0x0F00 # Keep "page_comp_type" bits -page_type_mask2 = 0xF000 | page_type_mask -page_meta_type = 0x0000 -page_data_type = 0x0100 -page_mix_type = 0x0200 -page_amd_type = 0x0400 -page_meta2_type = 0x4000 -page_comp_type = 0x9000 -page_meta_types = [page_meta_type, page_meta2_type] -subheader_pointers_offset = 8 -truncated_subheader_id = 1 -compressed_subheader_id = 4 -compressed_subheader_type = 1 -text_block_size_length = 2 -row_length_offset_multiplier = 5 -row_count_offset_multiplier = 6 -col_count_p1_multiplier = 9 -col_count_p2_multiplier = 10 -row_count_on_mix_page_offset_multiplier = 15 -column_name_pointer_length = 8 -column_name_text_subheader_offset = 0 -column_name_text_subheader_length = 2 -column_name_offset_offset = 2 -column_name_offset_length = 2 -column_name_length_offset = 4 -column_name_length_length = 2 -column_data_offset_offset = 8 -column_data_length_offset = 8 -column_data_length_length = 4 -column_type_offset = 14 -column_type_length = 1 -column_format_text_subheader_index_offset = 22 -column_format_text_subheader_index_length = 2 -column_format_offset_offset = 24 -column_format_offset_length = 2 -column_format_length_offset = 26 -column_format_length_length = 2 -column_label_text_subheader_index_offset = 28 -column_label_text_subheader_index_length = 2 -column_label_offset_offset = 30 -column_label_offset_length = 2 -column_label_length_offset = 32 -column_label_length_length = 2 -rle_compression = b"SASYZCRL" -rdc_compression = b"SASYZCR2" +page_type_mask2: Final = 0xF000 | page_type_mask +page_meta_type: Final = 0x0000 +page_data_type: Final = 0x0100 +page_mix_type: Final = 0x0200 +page_amd_type: Final = 0x0400 +page_meta2_type: Final = 0x4000 +page_comp_type: Final = 0x9000 +page_meta_types: Final = [page_meta_type, page_meta2_type] +subheader_pointers_offset: Final = 8 +truncated_subheader_id: Final = 1 +compressed_subheader_id: Final = 4 +compressed_subheader_type: Final = 1 +text_block_size_length: Final = 2 +row_length_offset_multiplier: Final = 5 +row_count_offset_multiplier: Final = 6 +col_count_p1_multiplier: Final = 9 +col_count_p2_multiplier: Final = 10 +row_count_on_mix_page_offset_multiplier: Final = 15 +column_name_pointer_length: Final = 8 +column_name_text_subheader_offset: Final = 0 +column_name_text_subheader_length: Final = 2 +column_name_offset_offset: Final = 2 +column_name_offset_length: Final = 2 +column_name_length_offset: Final = 4 +column_name_length_length: Final = 2 +column_data_offset_offset: Final = 8 +column_data_length_offset: Final = 8 +column_data_length_length: Final = 4 +column_type_offset: Final = 14 +column_type_length: Final = 1 +column_format_text_subheader_index_offset: Final = 22 +column_format_text_subheader_index_length: Final = 2 +column_format_offset_offset: Final = 24 +column_format_offset_length: Final = 2 +column_format_length_offset: Final = 26 +column_format_length_length: Final = 2 +column_label_text_subheader_index_offset: Final = 28 +column_label_text_subheader_index_length: Final = 2 +column_label_offset_offset: Final = 30 +column_label_offset_length: Final = 2 +column_label_length_offset: Final = 32 +column_label_length_length: Final = 2 +rle_compression: Final = b"SASYZCRL" +rdc_compression: Final = b"SASYZCR2" -compression_literals = [rle_compression, rdc_compression] +compression_literals: Final = [rle_compression, rdc_compression] # Incomplete list of encodings, using SAS nomenclature: # http://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm -encoding_names = { +encoding_names: Final = { 29: "latin1", 20: "utf-8", 33: "cyrillic", @@ -118,18 +120,18 @@ class SASIndex: - row_size_index = 0 - column_size_index = 1 - subheader_counts_index = 2 - column_text_index = 3 - column_name_index = 4 - column_attributes_index = 5 - format_and_label_index = 6 - column_list_index = 7 - data_subheader_index = 8 + row_size_index: Final = 0 + column_size_index: Final = 1 + subheader_counts_index: Final = 2 + column_text_index: Final = 3 + column_name_index: Final = 4 + column_attributes_index: Final = 5 + format_and_label_index: Final = 6 + column_list_index: Final = 7 + data_subheader_index: Final = 8 -subheader_signature_to_index = { +subheader_signature_to_index: Final = { b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index, b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index, b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index, @@ -166,7 +168,7 @@ class SASIndex: # List of frequently used SAS date and datetime formats # http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm # https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java -sas_date_formats = ( +sas_date_formats: Final = ( "DATE", "DAY", "DDMMYY", @@ -235,7 +237,7 @@ class SASIndex: "MINGUO", ) -sas_datetime_formats = ( +sas_datetime_formats: Final = ( "DATETIME", "DTWKDATX", "B8601DN", diff --git a/pandas/io/sql.py b/pandas/io/sql.py index f591e7b8676f6..086a60774ac4e 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -17,7 +17,6 @@ TYPE_CHECKING, Any, Iterator, - Sequence, cast, overload, ) @@ -189,10 +188,10 @@ def read_sql_table( table_name, con, schema=..., - index_col=..., + index_col: str | list[str] | None = ..., coerce_float=..., - parse_dates=..., - columns=..., + parse_dates: list[str] | dict[str, str] | None = ..., + columns: list[str] | None = ..., chunksize: None = ..., ) -> DataFrame: ... @@ -203,10 +202,10 @@ def read_sql_table( table_name, con, schema=..., - index_col=..., + index_col: str | list[str] | None = ..., coerce_float=..., - parse_dates=..., - columns=..., + parse_dates: list[str] | dict[str, str] | None = ..., + columns: list[str] | None = ..., chunksize: int = ..., ) -> Iterator[DataFrame]: ... @@ -216,10 +215,10 @@ def read_sql_table( table_name: str, con, schema: str | None = None, - index_col: str | Sequence[str] | None = None, + index_col: str | list[str] | None = None, coerce_float: bool = True, - parse_dates=None, - columns=None, + parse_dates: list[str] | dict[str, str] | None = None, + columns: list[str] | None = None, chunksize: int | None = None, ) -> DataFrame | Iterator[DataFrame]: """ @@ -302,10 +301,10 @@ def read_sql_table( def read_sql_query( sql, con, - index_col=..., + index_col: str | list[str] | None = ..., coerce_float=..., - params=..., - parse_dates=..., + params: list[str] | dict[str, str] | None = ..., + parse_dates: list[str] | dict[str, str] | None = ..., chunksize: None = ..., dtype: DtypeArg | None = ..., ) -> DataFrame: @@ -316,10 +315,10 @@ def read_sql_query( def read_sql_query( sql, con, - index_col=..., + index_col: str | list[str] | None = ..., coerce_float=..., - params=..., - parse_dates=..., + params: list[str] | dict[str, str] | None = ..., + parse_dates: list[str] | dict[str, str] | None = ..., chunksize: int = ..., dtype: DtypeArg | None = ..., ) -> Iterator[DataFrame]: @@ -329,10 +328,10 @@ def read_sql_query( def read_sql_query( sql, con, - index_col=None, + index_col: str | list[str] | None = None, coerce_float: bool = True, - params=None, - parse_dates=None, + params: list[str] | dict[str, str] | None = None, + parse_dates: list[str] | dict[str, str] | None = None, chunksize: int | None = None, dtype: DtypeArg | None = None, ) -> DataFrame | Iterator[DataFrame]: @@ -409,11 +408,11 @@ def read_sql_query( def read_sql( sql, con, - index_col=..., + index_col: str | list[str] | None = ..., coerce_float=..., params=..., parse_dates=..., - columns=..., + columns: list[str] = ..., chunksize: None = ..., ) -> DataFrame: ... @@ -423,11 +422,11 @@ def read_sql( def read_sql( sql, con, - index_col=..., + index_col: str | list[str] | None = ..., coerce_float=..., params=..., parse_dates=..., - columns=..., + columns: list[str] = ..., chunksize: int = ..., ) -> Iterator[DataFrame]: ... @@ -436,11 +435,11 @@ def read_sql( def read_sql( sql, con, - index_col: str | Sequence[str] | None = None, + index_col: str | list[str] | None = None, coerce_float: bool = True, params=None, parse_dates=None, - columns=None, + columns: list[str] | None = None, chunksize: int | None = None, ) -> DataFrame | Iterator[DataFrame]: """ @@ -781,9 +780,9 @@ def __init__( name: str, pandas_sql_engine, frame=None, - index=True, - if_exists="fail", - prefix="pandas", + index: bool | str | list[str] | None = True, + if_exists: str = "fail", + prefix: str = "pandas", index_label=None, schema=None, keys=None, @@ -984,7 +983,7 @@ def _query_iterator( def read( self, - coerce_float=True, + coerce_float: bool = True, parse_dates=None, columns=None, chunksize=None, @@ -1267,8 +1266,8 @@ def to_sql( self, frame, name, - if_exists="fail", - index=True, + if_exists: str = "fail", + index: bool = True, index_label=None, schema=None, chunksize=None, @@ -1406,7 +1405,7 @@ def execute(self, *args, **kwargs): def read_table( self, table_name: str, - index_col: str | Sequence[str] | None = None, + index_col: str | list[str] | None = None, coerce_float: bool = True, parse_dates=None, columns=None, @@ -1501,7 +1500,7 @@ def _query_iterator( def read_query( self, sql: str, - index_col: str | Sequence[str] | None = None, + index_col: str | list[str] | None = None, coerce_float: bool = True, parse_dates=None, params=None, @@ -1660,8 +1659,8 @@ def to_sql( self, frame, name, - if_exists="fail", - index=True, + if_exists: str = "fail", + index: bool = True, index_label=None, schema=None, chunksize=None, @@ -2107,8 +2106,8 @@ def to_sql( self, frame, name, - if_exists="fail", - index=True, + if_exists: str = "fail", + index: bool = True, index_label=None, schema=None, chunksize=None, diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 226a19e1f7599..2662df0361b55 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -22,6 +22,7 @@ TYPE_CHECKING, Any, AnyStr, + Final, Hashable, Sequence, cast, @@ -214,7 +215,7 @@ _date_formats = ["%tc", "%tC", "%td", "%d", "%tw", "%tm", "%tq", "%th", "%ty"] -stata_epoch = datetime.datetime(1960, 1, 1) +stata_epoch: Final = datetime.datetime(1960, 1, 1) # TODO: Add typing. As of January 2020 it is not possible to type this function since @@ -485,7 +486,7 @@ def g(x: datetime.datetime) -> int: return Series(conv_dates, index=index) -excessive_string_length_error = """ +excessive_string_length_error: Final = """ Fixed width strings in Stata .dta files are limited to 244 (or fewer) characters. Column '{0}' does not satisfy this restriction. Use the 'version=117' parameter to write the newer (Stata 13 and later) format. @@ -496,7 +497,7 @@ class PossiblePrecisionLoss(Warning): pass -precision_loss_doc = """ +precision_loss_doc: Final = """ Column converted from {0} to {1}, and some data are outside of the lossless conversion range. This may result in a loss of precision in the saved data. """ @@ -506,7 +507,7 @@ class ValueLabelTypeMismatch(Warning): pass -value_label_mismatch_doc = """ +value_label_mismatch_doc: Final = """ Stata value labels (pandas categories) must be strings. Column {0} contains non-string labels which will be converted to strings. Please check that the Stata data file created has not lost information due to duplicate labels. @@ -517,7 +518,7 @@ class InvalidColumnName(Warning): pass -invalid_name_doc = """ +invalid_name_doc: Final = """ Not all pandas column names were valid Stata variable names. The following replacements have been made: @@ -851,15 +852,15 @@ class StataMissingValue: # Construct a dictionary of missing values MISSING_VALUES: dict[float, str] = {} - bases = (101, 32741, 2147483621) + bases: Final = (101, 32741, 2147483621) for b in bases: # Conversion to long to avoid hash issues on 32 bit platforms #8968 MISSING_VALUES[b] = "." for i in range(1, 27): MISSING_VALUES[i + b] = "." + chr(96 + i) - float32_base = b"\x00\x00\x00\x7f" - increment = struct.unpack("