From 5e76a171f43f5164228e7889a20c3945a46d006b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 2 May 2019 21:12:05 -0700 Subject: [PATCH 1/3] Added failing test --- pandas/tests/io/parser/test_dtypes.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index 1d3c935e9101b..c7ab6cc077291 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -509,3 +509,27 @@ def test_numeric_dtype(all_parsers, dtype): result = parser.read_csv(StringIO(data), header=None, dtype=dtype) tm.assert_frame_equal(expected, result) + + +def test_intna_precision(all_parsers): + parser = all_parsers + data = "1556559573141592653\n1556559573141592654\n\n1556559573141592655" + dtype = 'Int64' + + expected = DataFrame([ + [1556559573141592653], + [1556559573141592654], + [0], + [1556559573141592655], + ], dtype=dtype) + expected.iloc[2] = np.nan # TODO: fix general bug on df construction + + result = parser.read_csv(StringIO(data), header=None, dtype=dtype, + skip_blank_lines=False) + + tm.assert_frame_equal(result, expected) + + # See why tm.assert_frame_equal doesn't fail... + for i in range(len(result)): + assert result.iloc[i] == expected.iloc[i] + From 6d98872b914ff78d6d6cf1c9bc6eb0020d8b5584 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 2 May 2019 21:16:10 -0700 Subject: [PATCH 2/3] Improved hacking of test --- pandas/tests/io/parser/test_dtypes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index c7ab6cc077291..92574c81b4fa2 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -530,6 +530,6 @@ def test_intna_precision(all_parsers): tm.assert_frame_equal(result, expected) # See why tm.assert_frame_equal doesn't fail... - for i in range(len(result)): - assert result.iloc[i] == expected.iloc[i] - + assert result.iloc[0] == expected.iloc[0] + assert result.iloc[1] == expected.iloc[1] + assert result.iloc[3] == expected.iloc[3] From 8132596b81bbe6b98a8dd5ca5f50305103de6483 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 2 May 2019 21:17:05 -0700 Subject: [PATCH 3/3] Fixed construction issue --- pandas/core/arrays/integer.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 3f0a3590e24a3..ba01d7338a8d0 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,11 +1,12 @@ import copy import sys -from typing import Type +from typing import Sequence, Type import warnings import numpy as np from pandas._libs import lib +from pandas._typing import Dtype from pandas.compat import set_function_name from pandas.util._decorators import cache_readonly @@ -304,9 +305,18 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): return integer_array(scalars, dtype=dtype, copy=copy) @classmethod - def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): - scalars = to_numeric(strings, errors="raise") - return cls._from_sequence(scalars, dtype, copy) + def _from_sequence_of_strings(cls, + strings: Sequence[str], + dtype: Dtype = None, + copy: bool = False) -> 'IntegerArray': + # Mask the NA location before sending to to_numeric to prevent + # undesirable cast to float which may lose precision + mask = isna(strings) + masked_strings = np.where(mask, 0, strings) + + scalars = to_numeric(masked_strings, errors="raise") + + return IntegerArray(scalars, mask) @classmethod def _from_factorized(cls, values, original):