Skip to content

Commit 5376e2a

Browse files
authored
BUG: Fix na_values dict not working on index column (#57547) (#57965)
BUG: Na_values dict not working on index column (#57547) * fix base_parser not setting col_na_values when na_values is a dict containing None * fix python_parser applying na_values in a column None * add unit test to test_na_values.py; * update whatsnew.
1 parent 6126b85 commit 5376e2a

File tree

4 files changed

+51
-6
lines changed

4 files changed

+51
-6
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ Bug fixes
349349
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
350350
- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
351351
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
352+
- Fixed bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
352353

353354
Categorical
354355
^^^^^^^^^^^

pandas/io/parsers/base_parser.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,8 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
487487
col_na_values, col_na_fvalues = _get_na_values(
488488
col_name, self.na_values, self.na_fvalues, self.keep_default_na
489489
)
490+
else:
491+
col_na_values, col_na_fvalues = set(), set()
490492

491493
clean_dtypes = self._clean_mapping(self.dtype)
492494

pandas/io/parsers/python_parser.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -354,14 +354,15 @@ def _convert_data(
354354

355355
if isinstance(self.na_values, dict):
356356
for col in self.na_values:
357-
na_value = self.na_values[col]
358-
na_fvalue = self.na_fvalues[col]
357+
if col is not None:
358+
na_value = self.na_values[col]
359+
na_fvalue = self.na_fvalues[col]
359360

360-
if isinstance(col, int) and col not in self.orig_names:
361-
col = self.orig_names[col]
361+
if isinstance(col, int) and col not in self.orig_names:
362+
col = self.orig_names[col]
362363

363-
clean_na_values[col] = na_value
364-
clean_na_fvalues[col] = na_fvalue
364+
clean_na_values[col] = na_value
365+
clean_na_fvalues[col] = na_fvalue
365366
else:
366367
clean_na_values = self.na_values
367368
clean_na_fvalues = self.na_fvalues

pandas/tests/io/parser/test_na_values.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,47 @@ def test_na_values_dict_aliasing(all_parsers):
532532
tm.assert_dict_equal(na_values, na_values_copy)
533533

534534

535+
def test_na_values_dict_null_column_name(all_parsers):
536+
# see gh-57547
537+
parser = all_parsers
538+
data = ",x,y\n\nMA,1,2\nNA,2,1\nOA,,3"
539+
names = [None, "x", "y"]
540+
na_values = {name: STR_NA_VALUES for name in names}
541+
dtype = {None: "object", "x": "float64", "y": "float64"}
542+
543+
if parser.engine == "pyarrow":
544+
msg = "The pyarrow engine doesn't support passing a dict for na_values"
545+
with pytest.raises(ValueError, match=msg):
546+
parser.read_csv(
547+
StringIO(data),
548+
index_col=0,
549+
header=0,
550+
dtype=dtype,
551+
names=names,
552+
na_values=na_values,
553+
keep_default_na=False,
554+
)
555+
return
556+
557+
expected = DataFrame(
558+
{None: ["MA", "NA", "OA"], "x": [1.0, 2.0, np.nan], "y": [2.0, 1.0, 3.0]}
559+
)
560+
561+
expected = expected.set_index(None)
562+
563+
result = parser.read_csv(
564+
StringIO(data),
565+
index_col=0,
566+
header=0,
567+
dtype=dtype,
568+
names=names,
569+
na_values=na_values,
570+
keep_default_na=False,
571+
)
572+
573+
tm.assert_frame_equal(result, expected)
574+
575+
535576
def test_na_values_dict_col_index(all_parsers):
536577
# see gh-14203
537578
data = "a\nfoo\n1"

0 commit comments

Comments
 (0)