Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,7 @@ I/O
- Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
- Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
- Bug in :func:`read_csv` raising ``ParserError`` when reading file in chunks and aome chunk blocks have fewer columns than header for ``engine="c"`` (:issue:`21211`)
- Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
- Bug in :func:`read_csv` and :func:`read_fwf` ignoring all ``skiprows`` except first when ``nrows`` is specified for ``engine='python'`` (:issue:`44021`, :issue:`10261`)
- Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
Expand Down
9 changes: 6 additions & 3 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -927,9 +927,12 @@ cdef class TextReader:
(num_cols >= self.parser.line_fields[i]) * num_cols

if self.table_width - self.leading_cols > num_cols:
raise ParserError(f"Too many columns specified: expected "
f"{self.table_width - self.leading_cols} "
f"and found {num_cols}")
if ((not callable(self.usecols) and self.usecols and
self.table_width - self.leading_cols < len(self.usecols))
or (self.names and len(self.names) - self.leading_cols > num_cols)):
raise ParserError(f"Too many columns specified: expected "
f"{self.table_width - self.leading_cols} "
f"and found {num_cols}")

if (self.usecols is not None and not callable(self.usecols) and
all(isinstance(u, int) for u in self.usecols)):
Expand Down
45 changes: 45 additions & 0 deletions pandas/tests/io/parser/common/test_chunksize.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,48 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
with parser.read_csv(path, chunksize=20) as result:
for _ in result:
pass


def test_chunksize_with_usecols_second_block_shorter(all_parsers):
# GH#21211
parser = all_parsers
data = """1,2,3,4
5,6,7,8
9,10,11
"""

result_chunks = parser.read_csv(
StringIO(data),
names=["a", "b"],
chunksize=2,
usecols=[0, 1],
header=None,
)

expected_frames = [
DataFrame({"a": [1, 5], "b": [2, 6]}),
DataFrame({"a": [9], "b": [10]}, index=[2]),
]

for i, result in enumerate(result_chunks):
tm.assert_frame_equal(result, expected_frames[i])


def test_chunksize_second_block_shorter(all_parsers):
# GH#21211
parser = all_parsers
data = """a,b,c,d
1,2,3,4
5,6,7,8
9,10,11
"""

result_chunks = parser.read_csv(StringIO(data), chunksize=2)

expected_frames = [
DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
DataFrame({"a": [9], "b": [10], "c": [11], "d": [np.nan]}, index=[2]),
]

for i, result in enumerate(result_chunks):
tm.assert_frame_equal(result, expected_frames[i])