diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ef561d50066d1..bce5c7927c72d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -301,6 +301,7 @@ Bug fixes - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) +- Fixed bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) Categorical ^^^^^^^^^^^ diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 7b06c6b6b0d39..bb9f1db0d05e8 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -487,6 +487,8 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index: col_na_values, col_na_fvalues = _get_na_values( col_name, self.na_values, self.na_fvalues, self.keep_default_na ) + else: + col_na_values, col_na_fvalues = set(), set() clean_dtypes = self._clean_mapping(self.dtype) diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index dbda47172f6ac..21dcf5f2f9310 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -356,14 +356,15 @@ def _convert_data( if isinstance(self.na_values, dict): for col in self.na_values: - na_value = self.na_values[col] - na_fvalue = self.na_fvalues[col] + if col is not None: + na_value = self.na_values[col] + na_fvalue = self.na_fvalues[col] - if isinstance(col, int) and col not in self.orig_names: - col = self.orig_names[col] + if isinstance(col, int) and col not in self.orig_names: + col = self.orig_names[col] - clean_na_values[col] = na_value - clean_na_fvalues[col] = na_fvalue + clean_na_values[col] = na_value + clean_na_fvalues[col] = na_fvalue else: clean_na_values = self.na_values clean_na_fvalues = self.na_fvalues diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index ba0e3033321e4..1e370f649aef8 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -532,6 +532,47 @@ def test_na_values_dict_aliasing(all_parsers): tm.assert_dict_equal(na_values, na_values_copy) +def test_na_values_dict_null_column_name(all_parsers): + # see gh-57547 + parser = all_parsers + data = ",x,y\n\nMA,1,2\nNA,2,1\nOA,,3" + names = [None, "x", "y"] + na_values = {name: STR_NA_VALUES for name in names} + dtype = {None: "object", "x": "float64", "y": "float64"} + + if parser.engine == "pyarrow": + msg = "The pyarrow engine doesn't support passing a dict for na_values" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), + index_col=0, + header=0, + dtype=dtype, + names=names, + na_values=na_values, + keep_default_na=False, + ) + return + + expected = DataFrame( + {None: ["MA", "NA", "OA"], "x": [1.0, 2.0, np.nan], "y": [2.0, 1.0, 3.0]} + ) + + expected = expected.set_index(None) + + result = parser.read_csv( + StringIO(data), + index_col=0, + header=0, + dtype=dtype, + names=names, + na_values=na_values, + keep_default_na=False, + ) + + tm.assert_frame_equal(result, expected) + + def test_na_values_dict_col_index(all_parsers): # see gh-14203 data = "a\nfoo\n1"