diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 6eb1b9e950dfd..8574c9ad1d425 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -133,8 +133,24 @@ + fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True - If na_values are specified and keep_default_na is False the default NaN - values are overridden, otherwise they're appended to. + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is True, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is True, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is False, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is False, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as False, the `keep_default_na` and + `na_values` parameters will be ignored. +na_filter : bool, default True + Detect missing value markers (empty strings and the value of na_values). In + data without any NAs, passing na_filter=False can improve the performance + of reading a large file. verbose : bool, default False Indicate number of NA values placed in non-numeric columns. parse_dates : bool, list-like, or dict, default False diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 5326f2df68972..1d3653f685e1e 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -870,6 +870,27 @@ def test_excel_passes_na(self, read_ext): ) tm.assert_frame_equal(parsed, expected) + @pytest.mark.parametrize("na_filter", [None, True, False]) + def test_excel_passes_na_filter(self, read_ext, na_filter): + # gh-25453 + kwargs = {} + + if na_filter is not None: + kwargs["na_filter"] = na_filter + + with pd.ExcelFile("test5" + read_ext) as excel: + parsed = pd.read_excel( + excel, "Sheet1", keep_default_na=True, na_values=["apple"], **kwargs + ) + + if na_filter is False: + expected = [["1.#QNAN"], [1], ["nan"], ["apple"], ["rabbit"]] + else: + expected = [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]] + + expected = DataFrame(expected, columns=["Test"]) + tm.assert_frame_equal(parsed, expected) + @pytest.mark.parametrize("arg", ["sheet", "sheetname", "parse_cols"]) def test_unexpected_kwargs_raises(self, read_ext, arg): # gh-17964