Type read_fwf (#44678)

phofl · web-flow · commit 9f5e60920968 · 2021-12-01T18:01:36.000-05:00
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -9,9 +9,11 @@
 import re
 import sys
 from typing import (
+    IO,
     DefaultDict,
     Hashable,
     Iterator,
+    Literal,
     Mapping,
     Sequence,
     cast,
@@ -1135,9 +1137,17 @@ class FixedWidthReader(abc.Iterator):
     A reader of fixed-width lines.
     """
 
-    def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=100):
+    def __init__(
+        self,
+        f: IO[str],
+        colspecs: list[tuple[int, int]] | Literal["infer"],
+        delimiter: str | None,
+        comment: str | None,
+        skiprows: set[int] | None = None,
+        infer_nrows: int = 100,
+    ) -> None:
         self.f = f
-        self.buffer = None
+        self.buffer: Iterator | None = None
         self.delimiter = "\r\n" + delimiter if delimiter else "\n\r\t "
         self.comment = comment
         if colspecs == "infer":
@@ -1165,7 +1175,7 @@ def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=1
                     "2 element tuple or list of integers"
                 )
 
-    def get_rows(self, infer_nrows, skiprows=None):
+    def get_rows(self, infer_nrows: int, skiprows: set[int] | None = None) -> list[str]:
         """
         Read rows from self.f, skipping as specified.
 
@@ -1203,7 +1213,9 @@ def get_rows(self, infer_nrows, skiprows=None):
         self.buffer = iter(buffer_rows)
         return detect_rows
 
-    def detect_colspecs(self, infer_nrows=100, skiprows=None):
+    def detect_colspecs(
+        self, infer_nrows: int = 100, skiprows: set[int] | None = None
+    ) -> list[tuple[int, int]]:
         # Regex escape the delimiters
         delimiters = "".join([fr"\{x}" for x in self.delimiter])
         pattern = re.compile(f"([^{delimiters}]+)")
@@ -1223,7 +1235,7 @@ def detect_colspecs(self, infer_nrows=100, skiprows=None):
         edge_pairs = list(zip(edges[::2], edges[1::2]))
         return edge_pairs
 
-    def __next__(self):
+    def __next__(self) -> list[str]:
         if self.buffer is not None:
             try:
                 line = next(self.buffer)
@@ -1242,13 +1254,15 @@ class FixedWidthFieldParser(PythonParser):
     See PythonParser for details.
     """
 
-    def __init__(self, f, **kwds):
+    def __init__(
+        self, f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], **kwds
+    ) -> None:
         # Support iterators, convert to a list.
         self.colspecs = kwds.pop("colspecs")
         self.infer_nrows = kwds.pop("infer_nrows")
         PythonParser.__init__(self, f, **kwds)
 
-    def _make_reader(self, f):
+    def _make_reader(self, f: IO[str]) -> None:
         self.data = FixedWidthReader(
             f,
             self.colspecs,
@@ -1258,7 +1272,7 @@ def _make_reader(self, f):
             self.infer_nrows,
         )
 
-    def _remove_empty_lines(self, lines) -> list:
+    def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:
         """
         Returns the list of lines without the empty ones. With fixed-width
         fields, empty lines become arrays of empty strings.
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -760,11 +760,11 @@ def read_table(
 
 def read_fwf(
     filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
-    colspecs="infer",
-    widths=None,
-    infer_nrows=100,
+    colspecs: list[tuple[int, int]] | str | None = "infer",
+    widths: list[int] | None = None,
+    infer_nrows: int = 100,
     **kwds,
-):
+) -> DataFrame | TextFileReader:
     r"""
     Read a table of fixed-width formatted lines into DataFrame.
 
@@ -799,7 +799,7 @@ def read_fwf(
 
     Returns
     -------
-    DataFrame or TextParser
+    DataFrame or TextFileReader
         A comma-separated values (csv) file is returned as two-dimensional
         data structure with labeled axes.
 
@@ -825,6 +825,9 @@ def read_fwf(
             colspecs.append((col, col + w))
             col += w
 
+    # for mypy
+    assert colspecs is not None
+
     # GH#40830
     # Ensure length of `colspecs` matches length of `names`
     names = kwds.get("names")