From b386184203cd1f6aa0d608bc00d1f00ab675effa Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 19 Sep 2016 19:37:22 -0500 Subject: [PATCH 1/6] BUG: float trunc in eval with py 2 --- doc/source/whatsnew/v0.19.0.txt | 2 +- pandas/computation/ops.py | 5 +++++ pandas/computation/tests/test_eval.py | 12 ++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index ffc6757b674ea..017808d5f3f96 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1567,7 +1567,7 @@ Bug Fixes - Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`) - Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`) - Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`) - +- Bug in ``pd.eval()`` truncating long float literals with python 2 (:issue:`14241`) - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) - Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`) - Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment. diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index 9446e84d891c4..6ba2a21940d55 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -166,6 +166,11 @@ def _resolve_name(self): def name(self): return self.value + def __unicode__(self): + # in python 2 str() of float + # can truncate shorter than repr() + return repr(self.name) + _bool_op_map = {'not': '~', 'and': '&', 'or': '|'} diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 02ed11c65706c..4fad51422cfb2 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -678,6 +678,18 @@ def test_line_continuation(self): result = pd.eval(exp, engine=self.engine, parser=self.parser) self.assertEqual(result, 12) + def test_float_truncation(self): + # GH 14241 + exp = '1000000000.006' + result = pd.eval(exp, engine=self.engine, parser=self.parser) + expected = np.float64(exp) + self.assertEqual(result, expected) + + df = pd.DataFrame([{"A": 1000000000.0099}]) + cutoff = 1000000000.006 + result = df.query("A < %.3f" % cutoff) + self.assertTrue(result.empty) + class TestEvalNumexprPython(TestEvalNumexprPandas): From 05de4aea40db9906737977e043c56124ce919b1f Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 20 Sep 2016 18:13:20 -0500 Subject: [PATCH 2/6] fix rounding in pytables query --- doc/source/whatsnew/v0.19.0.txt | 2 +- pandas/computation/pytables.py | 6 +++++- pandas/io/tests/test_pytables.py | 10 ++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 017808d5f3f96..6bf1080f7c906 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1567,7 +1567,7 @@ Bug Fixes - Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`) - Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`) - Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`) -- Bug in ``pd.eval()`` truncating long float literals with python 2 (:issue:`14241`) +- Bug in ``pd.eval()`` and ``HDFStore`` query truncating long float literals with python 2 (:issue:`14241`) - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) - Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`) - Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment. diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index a4dd03a0fa7ee..9dc18284ec22c 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -611,10 +611,14 @@ def __init__(self, value, converted, kind): def tostring(self, encoding): """ quote the string if not encoded else encode and return """ - if self.kind == u('string'): + if self.kind == u'string': if encoding is not None: return self.converted return '"%s"' % self.converted + elif self.kind == u'float': + # python 2 str(float) is not always + # round-trippable so use repr() + return repr(self.converted) return self.converted diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 44ff9f8a5a1dd..df28488756c43 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -5003,6 +5003,16 @@ def test_read_from_py_localpath(self): tm.assert_frame_equal(expected, actual) + def test_query_long_float_literal(self): + # GH 14241 + df = pd.DataFrame([{"A": 1000000000.0099}]) + cutoff = 1000000000.006 + with ensure_clean_store(self.path) as store: + store.append('test', df, format='table', data_columns=True) + result = store.select('test', "A < %.3f" % cutoff) + self.assertTrue(result.empty) + + class TestHDFComplexValues(Base): # GH10447 From c1e77b6ca9115c48b7eb9dc13ec99c6e345d9c50 Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 20 Sep 2016 18:15:40 -0500 Subject: [PATCH 3/6] lint fixup --- pandas/io/tests/test_pytables.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index df28488756c43..371b4d88cc836 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -5002,7 +5002,6 @@ def test_read_from_py_localpath(self): tm.assert_frame_equal(expected, actual) - def test_query_long_float_literal(self): # GH 14241 df = pd.DataFrame([{"A": 1000000000.0099}]) From 87df38d32a445de79f5cc8e2e2652a2ac932064b Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 21 Sep 2016 06:10:48 -0500 Subject: [PATCH 4/6] better tests --- pandas/computation/tests/test_eval.py | 19 ++++++++++++++++--- pandas/io/tests/test_pytables.py | 22 ++++++++++++++++++---- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 4fad51422cfb2..564e3606e0a69 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -685,11 +685,24 @@ def test_float_truncation(self): expected = np.float64(exp) self.assertEqual(result, expected) - df = pd.DataFrame([{"A": 1000000000.0099}]) - cutoff = 1000000000.006 - result = df.query("A < %.3f" % cutoff) + df = pd.DataFrame({'A': [1000000000.0009, + 1000000000.0011, + 1000000000.0015]}) + cutoff = 1000000000.0006 + result = df.query("A < %.4f" % cutoff) self.assertTrue(result.empty) + cutoff = 1000000000.0010 + result = df.query("A > %.4f" % cutoff) + expected = df.loc[[1,2], :] + tm.assert_frame_equal(expected, result) + + exact = 1000000000.0011 + result = df.query('A == %.4f' % exact) + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + class TestEvalNumexprPython(TestEvalNumexprPandas): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 371b4d88cc836..e35279a27a855 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -5004,12 +5004,26 @@ def test_read_from_py_localpath(self): def test_query_long_float_literal(self): # GH 14241 - df = pd.DataFrame([{"A": 1000000000.0099}]) - cutoff = 1000000000.006 + df = pd.DataFrame({'A': [1000000000.0009, + 1000000000.0011, + 1000000000.0015]}) + with ensure_clean_store(self.path) as store: store.append('test', df, format='table', data_columns=True) - result = store.select('test', "A < %.3f" % cutoff) - self.assertTrue(result.empty) + + cutoff = 1000000000.0006 + result = store.select('test', "A < %.4f" % cutoff) + self.assertTrue(result.empty) + + cutoff = 1000000000.0010 + result = store.select('test', "A > %.4f" % cutoff) + expected = df.loc[[1,2], :] + tm.assert_frame_equal(expected, result) + + exact = 1000000000.0011 + result = store.select('test', 'A == %.4f' % exact) + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) class TestHDFComplexValues(Base): From 8b0cb57d2fa255950871fbc3a645d48bbeb79f7a Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 21 Sep 2016 07:00:05 -0500 Subject: [PATCH 5/6] lint --- pandas/computation/tests/test_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 564e3606e0a69..72fbc3906cafb 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -694,7 +694,7 @@ def test_float_truncation(self): cutoff = 1000000000.0010 result = df.query("A > %.4f" % cutoff) - expected = df.loc[[1,2], :] + expected = df.loc[[1, 2], :] tm.assert_frame_equal(expected, result) exact = 1000000000.0011 From d679aa34119a2e588ed70f3bf64795e3ec1d0f26 Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 21 Sep 2016 17:30:33 -0500 Subject: [PATCH 6/6] actual lint fix --- pandas/io/tests/test_pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index e35279a27a855..213bc53e3aab4 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -5017,7 +5017,7 @@ def test_query_long_float_literal(self): cutoff = 1000000000.0010 result = store.select('test', "A > %.4f" % cutoff) - expected = df.loc[[1,2], :] + expected = df.loc[[1, 2], :] tm.assert_frame_equal(expected, result) exact = 1000000000.0011