diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 3d869bf31f372..73e939e7f4bb0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -554,6 +554,7 @@ I/O - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) +- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) Period diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 8a07c99b0fe94..d00fc3b15976c 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1534,7 +1534,10 @@ def get_chunk(self, size: int | None = None) -> DataFrame: if self.nrows is not None: if self._currow >= self.nrows: raise StopIteration - size = min(size, self.nrows - self._currow) + if size is None: + size = self.nrows - self._currow + else: + size = min(size, self.nrows - self._currow) return self.read(nrows=size) def __enter__(self) -> Self: diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index 091edb67f6e19..668aab05b9fa4 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -98,6 +98,31 @@ def test_iterator_stop_on_chunksize(all_parsers): tm.assert_frame_equal(concat(result), expected) +def test_nrows_iterator_without_chunksize(all_parsers): + # GH 59079 + parser = all_parsers + data = """A,B,C +foo,1,2,3 +bar,4,5,6 +baz,7,8,9 +""" + if parser.engine == "pyarrow": + msg = "The 'iterator' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), iterator=True, nrows=2) + return + + with parser.read_csv(StringIO(data), iterator=True, nrows=2) as reader: + result = reader.get_chunk() + + expected = DataFrame( + [[1, 2, 3], [4, 5, 6]], + index=["foo", "bar"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}] )