diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index ffc6757b674ea..6bf1080f7c906 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1567,7 +1567,7 @@ Bug Fixes - Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`) - Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`) - Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`) - +- Bug in ``pd.eval()`` and ``HDFStore`` query truncating long float literals with python 2 (:issue:`14241`) - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) - Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`) - Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment. diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index 9446e84d891c4..6ba2a21940d55 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -166,6 +166,11 @@ def _resolve_name(self): def name(self): return self.value + def __unicode__(self): + # in python 2 str() of float + # can truncate shorter than repr() + return repr(self.name) + _bool_op_map = {'not': '~', 'and': '&', 'or': '|'} diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index a4dd03a0fa7ee..9dc18284ec22c 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -611,10 +611,14 @@ def __init__(self, value, converted, kind): def tostring(self, encoding): """ quote the string if not encoded else encode and return """ - if self.kind == u('string'): + if self.kind == u'string': if encoding is not None: return self.converted return '"%s"' % self.converted + elif self.kind == u'float': + # python 2 str(float) is not always + # round-trippable so use repr() + return repr(self.converted) return self.converted diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 02ed11c65706c..72fbc3906cafb 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -678,6 +678,31 @@ def test_line_continuation(self): result = pd.eval(exp, engine=self.engine, parser=self.parser) self.assertEqual(result, 12) + def test_float_truncation(self): + # GH 14241 + exp = '1000000000.006' + result = pd.eval(exp, engine=self.engine, parser=self.parser) + expected = np.float64(exp) + self.assertEqual(result, expected) + + df = pd.DataFrame({'A': [1000000000.0009, + 1000000000.0011, + 1000000000.0015]}) + cutoff = 1000000000.0006 + result = df.query("A < %.4f" % cutoff) + self.assertTrue(result.empty) + + cutoff = 1000000000.0010 + result = df.query("A > %.4f" % cutoff) + expected = df.loc[[1, 2], :] + tm.assert_frame_equal(expected, result) + + exact = 1000000000.0011 + result = df.query('A == %.4f' % exact) + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + class TestEvalNumexprPython(TestEvalNumexprPandas): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 44ff9f8a5a1dd..213bc53e3aab4 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -5002,6 +5002,29 @@ def test_read_from_py_localpath(self): tm.assert_frame_equal(expected, actual) + def test_query_long_float_literal(self): + # GH 14241 + df = pd.DataFrame({'A': [1000000000.0009, + 1000000000.0011, + 1000000000.0015]}) + + with ensure_clean_store(self.path) as store: + store.append('test', df, format='table', data_columns=True) + + cutoff = 1000000000.0006 + result = store.select('test', "A < %.4f" % cutoff) + self.assertTrue(result.empty) + + cutoff = 1000000000.0010 + result = store.select('test', "A > %.4f" % cutoff) + expected = df.loc[[1, 2], :] + tm.assert_frame_equal(expected, result) + + exact = 1000000000.0011 + result = store.select('test', 'A == %.4f' % exact) + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + class TestHDFComplexValues(Base): # GH10447