diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt index 916a246355b5f..f298dac8ae6a7 100644 --- a/doc/source/whatsnew/v0.23.5.txt +++ b/doc/source/whatsnew/v0.23.5.txt @@ -40,3 +40,7 @@ Bug Fixes - - + +**I/O** + +- Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 49705cb6d9ad2..4bf62b021cddc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -95,7 +95,7 @@ def _ensure_data(values, dtype=None): values = ensure_float64(values) return values, 'float64', 'float64' - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): # if we are trying to coerce to a dtype # and it is incompat this will fall thru to here return ensure_object(values), 'object', 'object' @@ -429,7 +429,7 @@ def isin(comps, values): values = values.astype('int64', copy=False) comps = comps.astype('int64', copy=False) f = lambda x, y: htable.ismember_int64(x, y) - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): values = values.astype(object) comps = comps.astype(object) diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 9c68770e06b02..880ab707cfd07 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -369,3 +369,14 @@ def test_no_na_filter_on_index(self): expected = DataFrame({"a": [1, 4], "c": [3, 6]}, index=Index([np.nan, 5.0], name="b")) tm.assert_frame_equal(out, expected) + + def test_inf_na_values_with_int_index(self): + # see gh-17128 + data = "idx,col1,col2\n1,3,4\n2,inf,-inf" + + # Don't fail with OverflowError with infs and integer index column + out = self.read_csv(StringIO(data), index_col=[0], + na_values=['inf', '-inf']) + expected = DataFrame({"col1": [3, np.nan], "col2": [4, np.nan]}, + index=Index([1, 2], name="idx")) + tm.assert_frame_equal(out, expected)