pandas-dev · jreback · Nov 28, 2021 · Nov 27, 2021 · Nov 28, 2021 · Nov 28, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -655,6 +655,7 @@ I/O
 - Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
 - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
 - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
+- Bug in :func:`read_csv` raising ``ParserError`` when reading file in chunks and aome chunk blocks have fewer columns than header for ``engine="c"`` (:issue:`21211`)
 - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
 - Bug in :func:`read_csv` and :func:`read_fwf` ignoring all ``skiprows`` except first when ``nrows`` is specified for ``engine='python'`` (:issue:`44021`, :issue:`10261`)
 - Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -927,9 +927,12 @@ cdef class TextReader:
                 (num_cols >= self.parser.line_fields[i]) * num_cols
 
         if self.table_width - self.leading_cols > num_cols:
-            raise ParserError(f"Too many columns specified: expected "
-                              f"{self.table_width - self.leading_cols} "
-                              f"and found {num_cols}")
+            if ((not callable(self.usecols) and self.usecols and
+                    self.table_width - self.leading_cols < len(self.usecols))
+                    or (self.names and len(self.names) - self.leading_cols > num_cols)):
+                raise ParserError(f"Too many columns specified: expected "
+                                  f"{self.table_width - self.leading_cols} "
+                                  f"and found {num_cols}")
 
         if (self.usecols is not None and not callable(self.usecols) and
                 all(isinstance(u, int) for u in self.usecols)):

diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
@@ -248,3 +248,48 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
         with parser.read_csv(path, chunksize=20) as result:
             for _ in result:
                 pass
+
+
+def test_chunksize_with_usecols_second_block_shorter(all_parsers):
+    # GH#21211
+    parser = all_parsers
+    data = """1,2,3,4
+5,6,7,8
+9,10,11
+"""
+
+    result_chunks = parser.read_csv(
+        StringIO(data),
+        names=["a", "b"],
+        chunksize=2,
+        usecols=[0, 1],
+        header=None,
+    )
+
+    expected_frames = [
+        DataFrame({"a": [1, 5], "b": [2, 6]}),
+        DataFrame({"a": [9], "b": [10]}, index=[2]),
+    ]
+
+    for i, result in enumerate(result_chunks):
+        tm.assert_frame_equal(result, expected_frames[i])
+
+
+def test_chunksize_second_block_shorter(all_parsers):
+    # GH#21211
+    parser = all_parsers
+    data = """a,b,c,d
+1,2,3,4
+5,6,7,8
+9,10,11
+"""
+
+    result_chunks = parser.read_csv(StringIO(data), chunksize=2)
+
+    expected_frames = [
+        DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
+        DataFrame({"a": [9], "b": [10], "c": [11], "d": [np.nan]}, index=[2]),
+    ]
+
+    for i, result in enumerate(result_chunks):
+        tm.assert_frame_equal(result, expected_frames[i])