From da8b54e47c8d49f12859a88cbf91650480359f20 Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 19:44:55 -0500 Subject: [PATCH 01/14] TST: Added test for asserting correct dtype coercion after math operation on a bool frame --- pandas/tests/frame/test_dtypes.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 7591f1f1459be..0ee8d4ee82567 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -669,6 +669,21 @@ def test_arg_for_errors_in_astype(self): df.astype(np.int8, errors='ignore') + @pytest.mark.parametrize("num, dtype", [ + (1.0, 'float64'), + (1, 'int64') + ]) + def test_assert_list_and_bool_coerce(self, num, dtype): + #issue 18549 + df = pd.DataFrame([True]) * num + assert df.dtypes[0] == type + df = pd.DataFrame([False]) + num + assert df.dtypes[0] == type + df = pd.DataFrame([True]) - num + assert df.dtypes[0] == type + df = pd.DataFrame([False]) / num + assert df.dtypes[0] == type + class TestDataFrameDatetimeWithTZ(TestData): From 25754bec1b6933680bf44111ae36f60c8a389800 Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 19:46:11 -0500 Subject: [PATCH 02/14] BUG: Returning other dtype when first is bool and other is int or float --- pandas/core/internals.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index e537cb2edc1c4..c42d7ffa4efc7 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1042,8 +1042,10 @@ def coerce_to_target_dtype(self, other): return self if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype): - # we don't upcast to bool - return self.astype(object) + if is_float_dtype(dtype) or is_integer_dtype(dtype): + return self.astype(dtype) + else: + return self.astype(object) elif ((self.is_float or self.is_complex) and (is_integer_dtype(dtype) or is_float_dtype(dtype))): From a25b9c21153d42348c44f5d483ddcee536e33343 Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 20:02:50 -0500 Subject: [PATCH 03/14] DOC: Updated doc --- doc/source/whatsnew/v0.22.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 5549ba4e8f735..b8f060137287a 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -132,7 +132,7 @@ Conversion ^^^^^^^^^^ - Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) -- +- Bug in :class:`Block` where math operations on a `DataFrame` containing `bool` as elements are coerced to `bool` in :meth:`coerce_to_target_dtype`. - Indexing From 6df5605fb96d859c64f7e64b7da245ad66ea46d6 Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 20:04:15 -0500 Subject: [PATCH 04/14] TST: Updated test to check Series also --- pandas/tests/frame/test_dtypes.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 0ee8d4ee82567..5c35a2ef7ec23 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -676,13 +676,13 @@ def test_arg_for_errors_in_astype(self): def test_assert_list_and_bool_coerce(self, num, dtype): #issue 18549 df = pd.DataFrame([True]) * num - assert df.dtypes[0] == type + assert df.dtypes[0] == dtype df = pd.DataFrame([False]) + num - assert df.dtypes[0] == type - df = pd.DataFrame([True]) - num - assert df.dtypes[0] == type - df = pd.DataFrame([False]) / num - assert df.dtypes[0] == type + assert df.dtypes[0] == dtype + ser = pd.Series([True]) - num + assert ser.dtype == dtype + ser = pd.Series([False]) / num + assert ser.dtype == 'float64' class TestDataFrameDatetimeWithTZ(TestData): From 3187f0c18941a314991a2213dab325b936d42d9f Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 20:41:19 -0500 Subject: [PATCH 05/14] TST: Expanded tests dynamically. --- pandas/tests/frame/test_dtypes.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 5c35a2ef7ec23..ea397787ea031 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -669,20 +669,19 @@ def test_arg_for_errors_in_astype(self): df.astype(np.int8, errors='ignore') - @pytest.mark.parametrize("num, dtype", [ - (1.0, 'float64'), - (1, 'int64') - ]) - def test_assert_list_and_bool_coerce(self, num, dtype): + from operator import (add, mul, floordiv, sub) + + @pytest.mark.parametrize("num", [1.0, 1]) + @pytest.mark.parametrize("struct", [pd.Series, pd.DataFrame]) + @pytest.mark.parametrize('op', [add, mul, floordiv, sub]) + def test_assert_list_and_bool_coerce(self, num, struct, op): #issue 18549 - df = pd.DataFrame([True]) * num - assert df.dtypes[0] == dtype - df = pd.DataFrame([False]) + num - assert df.dtypes[0] == dtype - ser = pd.Series([True]) - num - assert ser.dtype == dtype - ser = pd.Series([False]) / num - assert ser.dtype == 'float64' + target_type = np.array([op(num, num)]).dtype + res = op(struct([True]), num).dtypes + if isinstance(res, pd.Series): + res = res[0] + assert target_type == res + class TestDataFrameDatetimeWithTZ(TestData): From d796a8207907f135f22cc30fc2fcc492852dd1d9 Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 20:48:05 -0500 Subject: [PATCH 06/14] CLN: PEP 8 fix --- pandas/tests/frame/test_dtypes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index ea397787ea031..58b6df8db3c00 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -675,7 +675,7 @@ def test_arg_for_errors_in_astype(self): @pytest.mark.parametrize("struct", [pd.Series, pd.DataFrame]) @pytest.mark.parametrize('op', [add, mul, floordiv, sub]) def test_assert_list_and_bool_coerce(self, num, struct, op): - #issue 18549 + # issue 18549 target_type = np.array([op(num, num)]).dtype res = op(struct([True]), num).dtypes if isinstance(res, pd.Series): @@ -683,7 +683,6 @@ def test_assert_list_and_bool_coerce(self, num, struct, op): assert target_type == res - class TestDataFrameDatetimeWithTZ(TestData): def test_interleave(self): From 3546010482e17dd206d078393ba0be44eee40b79 Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 19:44:55 -0500 Subject: [PATCH 07/14] TST: Added test for asserting correct dtype coercion after math operation on a bool frame --- pandas/tests/frame/test_dtypes.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 7591f1f1459be..0ee8d4ee82567 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -669,6 +669,21 @@ def test_arg_for_errors_in_astype(self): df.astype(np.int8, errors='ignore') + @pytest.mark.parametrize("num, dtype", [ + (1.0, 'float64'), + (1, 'int64') + ]) + def test_assert_list_and_bool_coerce(self, num, dtype): + #issue 18549 + df = pd.DataFrame([True]) * num + assert df.dtypes[0] == type + df = pd.DataFrame([False]) + num + assert df.dtypes[0] == type + df = pd.DataFrame([True]) - num + assert df.dtypes[0] == type + df = pd.DataFrame([False]) / num + assert df.dtypes[0] == type + class TestDataFrameDatetimeWithTZ(TestData): From 264c66fcc20d2a0d9d0f3008cb13d87e478e5b3e Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 19:46:11 -0500 Subject: [PATCH 08/14] BUG: Returning other dtype when first is bool and other is int or float --- pandas/core/internals.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1d1d71be16c00..64bec05993305 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1042,8 +1042,10 @@ def coerce_to_target_dtype(self, other): return self if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype): - # we don't upcast to bool - return self.astype(object) + if is_float_dtype(dtype) or is_integer_dtype(dtype): + return self.astype(dtype) + else: + return self.astype(object) elif ((self.is_float or self.is_complex) and (is_integer_dtype(dtype) or is_float_dtype(dtype))): From b0e2e5db071c1cdaea5fc2239429725f76206054 Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 20:02:50 -0500 Subject: [PATCH 09/14] DOC: Updated doc --- doc/source/whatsnew/v0.22.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 09b504cac5ed4..571dc2fe1ae55 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -169,7 +169,7 @@ Conversion ^^^^^^^^^^ - Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) -- +- Bug in :class:`Block` where math operations on a `DataFrame` containing `bool` as elements are coerced to `bool` in :meth:`coerce_to_target_dtype`. - Indexing From ad6b10b6bd887dbdb82ac25f3c498edbcbf9688d Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 20:04:15 -0500 Subject: [PATCH 10/14] TST: Updated test to check Series also --- pandas/tests/frame/test_dtypes.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 0ee8d4ee82567..5c35a2ef7ec23 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -676,13 +676,13 @@ def test_arg_for_errors_in_astype(self): def test_assert_list_and_bool_coerce(self, num, dtype): #issue 18549 df = pd.DataFrame([True]) * num - assert df.dtypes[0] == type + assert df.dtypes[0] == dtype df = pd.DataFrame([False]) + num - assert df.dtypes[0] == type - df = pd.DataFrame([True]) - num - assert df.dtypes[0] == type - df = pd.DataFrame([False]) / num - assert df.dtypes[0] == type + assert df.dtypes[0] == dtype + ser = pd.Series([True]) - num + assert ser.dtype == dtype + ser = pd.Series([False]) / num + assert ser.dtype == 'float64' class TestDataFrameDatetimeWithTZ(TestData): From 9c04498b6ca953cfb646a01a7aa1e7aa739ce69e Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 20:41:19 -0500 Subject: [PATCH 11/14] TST: Expanded tests dynamically. --- pandas/tests/frame/test_dtypes.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 5c35a2ef7ec23..ea397787ea031 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -669,20 +669,19 @@ def test_arg_for_errors_in_astype(self): df.astype(np.int8, errors='ignore') - @pytest.mark.parametrize("num, dtype", [ - (1.0, 'float64'), - (1, 'int64') - ]) - def test_assert_list_and_bool_coerce(self, num, dtype): + from operator import (add, mul, floordiv, sub) + + @pytest.mark.parametrize("num", [1.0, 1]) + @pytest.mark.parametrize("struct", [pd.Series, pd.DataFrame]) + @pytest.mark.parametrize('op', [add, mul, floordiv, sub]) + def test_assert_list_and_bool_coerce(self, num, struct, op): #issue 18549 - df = pd.DataFrame([True]) * num - assert df.dtypes[0] == dtype - df = pd.DataFrame([False]) + num - assert df.dtypes[0] == dtype - ser = pd.Series([True]) - num - assert ser.dtype == dtype - ser = pd.Series([False]) / num - assert ser.dtype == 'float64' + target_type = np.array([op(num, num)]).dtype + res = op(struct([True]), num).dtypes + if isinstance(res, pd.Series): + res = res[0] + assert target_type == res + class TestDataFrameDatetimeWithTZ(TestData): From fbf0c7d7d43443bc50500371d4122eab724cd8ad Mon Sep 17 00:00:00 2001 From: aschade Date: Sat, 2 Dec 2017 20:48:05 -0500 Subject: [PATCH 12/14] CLN: PEP 8 fix --- pandas/tests/frame/test_dtypes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index ea397787ea031..58b6df8db3c00 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -675,7 +675,7 @@ def test_arg_for_errors_in_astype(self): @pytest.mark.parametrize("struct", [pd.Series, pd.DataFrame]) @pytest.mark.parametrize('op', [add, mul, floordiv, sub]) def test_assert_list_and_bool_coerce(self, num, struct, op): - #issue 18549 + # issue 18549 target_type = np.array([op(num, num)]).dtype res = op(struct([True]), num).dtypes if isinstance(res, pd.Series): @@ -683,7 +683,6 @@ def test_assert_list_and_bool_coerce(self, num, struct, op): assert target_type == res - class TestDataFrameDatetimeWithTZ(TestData): def test_interleave(self): From 2cb084f01629cf30fdb43e050e94623b6d178145 Mon Sep 17 00:00:00 2001 From: aschade Date: Sun, 3 Dec 2017 00:24:21 -0500 Subject: [PATCH 13/14] TST: Fixed test failure --- pandas/core/internals.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c42d7ffa4efc7..7a80b669c3376 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1026,7 +1026,7 @@ def f(m, v, i): return [self.make_block(new_values, fastpath=True)] - def coerce_to_target_dtype(self, other): + def coerce_to_target_dtype(self, other, force_coericion=False): """ coerce the current block to a dtype compat for other we will return a block, possibly object, and not raise @@ -1042,7 +1042,7 @@ def coerce_to_target_dtype(self, other): return self if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype): - if is_float_dtype(dtype) or is_integer_dtype(dtype): + if force_coericion and is_float_dtype(dtype) or is_integer_dtype(dtype): return self.astype(dtype) else: return self.astype(object) @@ -1326,7 +1326,7 @@ def eval(self, func, other, errors='raise', try_cast=False, mgr=None): values, values_mask, other, other_mask = self._try_coerce_args( transf(values), other) except TypeError: - block = self.coerce_to_target_dtype(orig_other) + block = self.coerce_to_target_dtype(orig_other, True) return block.eval(func, orig_other, errors=errors, try_cast=try_cast, mgr=mgr) From a85cee4778af5a712cfcbd77331ee4ddb5d37375 Mon Sep 17 00:00:00 2001 From: aschade Date: Sun, 3 Dec 2017 00:24:21 -0500 Subject: [PATCH 14/14] TST: Fixed test failure --- pandas/core/internals.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 64bec05993305..2bbdc06768767 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1026,7 +1026,7 @@ def f(m, v, i): return [self.make_block(new_values, fastpath=True)] - def coerce_to_target_dtype(self, other): + def coerce_to_target_dtype(self, other, force_coericion=False): """ coerce the current block to a dtype compat for other we will return a block, possibly object, and not raise @@ -1042,7 +1042,7 @@ def coerce_to_target_dtype(self, other): return self if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype): - if is_float_dtype(dtype) or is_integer_dtype(dtype): + if force_coericion and is_float_dtype(dtype) or is_integer_dtype(dtype): return self.astype(dtype) else: return self.astype(object) @@ -1326,7 +1326,7 @@ def eval(self, func, other, errors='raise', try_cast=False, mgr=None): values, values_mask, other, other_mask = self._try_coerce_args( transf(values), other) except TypeError: - block = self.coerce_to_target_dtype(orig_other) + block = self.coerce_to_target_dtype(orig_other, True) return block.eval(func, orig_other, errors=errors, try_cast=try_cast, mgr=mgr)