diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 09cb024cbd95c..dde977c4350df 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -517,6 +517,7 @@ I/O - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) - :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`) +- Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`) Plotting ^^^^^^^^ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index d7930f35a1421..ad5385cd659ef 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3750,6 +3750,19 @@ def _make_reader(self, f): self.infer_nrows, ) + def _remove_empty_lines(self, lines) -> List: + """ + Returns the list of lines without the empty ones. With fixed-width + fields, empty lines become arrays of empty strings. + + See PythonParser._remove_empty_lines. + """ + return [ + line + for line in lines + if any(not isinstance(e, str) or e.strip() for e in line) + ] + def _refine_defaults_read( dialect: Union[str, csv.Dialect], diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 4796cf0b79fae..5e9609956183b 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -340,6 +340,51 @@ def test_fwf_comment(comment): tm.assert_almost_equal(result, expected) +def test_fwf_skip_blank_lines(): + data = """ + +A B C D + +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 + + +201162 502.953953 173.237159 12468.3 + +""" + result = read_fwf(StringIO(data), skip_blank_lines=True) + expected = DataFrame( + [ + [201158, 360.242940, 149.910199, 11950.7], + [201159, 444.953632, 166.985655, 11788.4], + [201162, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D"], + ) + tm.assert_frame_equal(result, expected) + + data = """\ +A B C D +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 + + +201162 502.953953 173.237159 12468.3 +""" + result = read_fwf(StringIO(data), skip_blank_lines=False) + expected = DataFrame( + [ + [201158, 360.242940, 149.910199, 11950.7], + [201159, 444.953632, 166.985655, 11788.4], + [np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan], + [201162, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D"], + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("thousands", [",", "#", "~"]) def test_fwf_thousands(thousands): data = """\