diff --git a/RELEASE.rst b/RELEASE.rst index 3e935879c197e..c92f9fcd698ee 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -124,6 +124,8 @@ pandas 0.11.0 knows how many columns to expect in the result) (GH2981_) - On a mixed DataFrame, allow setting with indexers with ndarray/DataFrame on rhs (GH3216_) + - Treat boolean values as integers (values 1 and 0) for numeric + operations. (GH2641_) **API Changes** @@ -350,6 +352,7 @@ pandas 0.11.0 .. _GH2747: https://github.com/pydata/pandas/issues/2747 .. _GH2816: https://github.com/pydata/pandas/issues/2816 .. _GH3216: https://github.com/pydata/pandas/issues/3216 +.. _GH2641: https://github.com/pydata/pandas/issues/2641 pandas 0.10.1 ============= diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt index 0193714a5d30c..e299ba43ad9ee 100644 --- a/doc/source/v0.11.0.txt +++ b/doc/source/v0.11.0.txt @@ -304,6 +304,9 @@ Enhancements - added option `display.with_wmp_style` providing a sleeker visual style for plots. Based on https://gist.github.com/huyng/816622 (GH3075_). + - Treat boolean values as integers (values 1 and 0) for numeric + operations. (GH2641_) + See the `full release notes `__ or issue tracker on GitHub for a complete list. @@ -328,3 +331,4 @@ on GitHub for a complete list. .. _GH3059: https://github.com/pydata/pandas/issues/3059 .. _GH3070: https://github.com/pydata/pandas/issues/3070 .. _GH3075: https://github.com/pydata/pandas/issues/3075 +.. _GH2641: https://github.com/pydata/pandas/issues/2641 diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 7441134aab351..a47d747216f49 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -628,7 +628,7 @@ def should_store(self, value): return com.is_integer_dtype(value) and value.dtype == self.dtype -class BoolBlock(Block): +class BoolBlock(NumericBlock): is_bool = True _can_hold_na = False @@ -641,9 +641,6 @@ def _try_cast(self, element): except: # pragma: no cover return element - def _try_cast_result(self, result): - return _possibly_downcast_to_dtype(result, self.dtype) - def should_store(self, value): return issubclass(value.dtype.type, np.bool_) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index e7b5e266ad09f..f4f04d5a53579 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7899,15 +7899,14 @@ def test_dataframe_clip(self): def test_get_X_columns(self): # numeric and object columns - # Booleans get casted to float in DataFrame, so skip for now df = DataFrame({'a': [1, 2, 3], - # 'b' : [True, False, True], + 'b' : [True, False, True], 'c': ['foo', 'bar', 'baz'], 'd': [None, None, None], 'e': [3.14, 0.577, 2.773]}) self.assert_(np.array_equal(df._get_numeric_data().columns, - ['a', 'e'])) + ['a', 'b', 'e'])) def test_get_numeric_data(self): intname = np.dtype(np.int_).name @@ -7939,6 +7938,30 @@ def test_get_numeric_data(self): expected = df.ix[:, []] assert_frame_equal(result, expected) + def test_bool_describe_in_mixed_frame(self): + df = DataFrame({ + 'string_data': ['a', 'b', 'c', 'd', 'e'], + 'bool_data': [True, True, False, False, False], + 'int_data': [10, 20, 30, 40, 50], + }) + + # Boolean data and integer data is included in .describe() output, string data isn't + self.assert_(np.array_equal(df.describe().columns, ['bool_data', 'int_data'])) + + bool_describe = df.describe()['bool_data'] + + # Both the min and the max values should stay booleans + self.assert_(bool_describe['min'].dtype == np.bool_) + self.assert_(bool_describe['max'].dtype == np.bool_) + + self.assert_(bool_describe['min'] == False) + self.assert_(bool_describe['max'] == True) + + # For numeric operations, like mean or median, the values True/False are cast to + # the integer values 1 and 0 + assert_almost_equal(bool_describe['mean'], 0.4) + assert_almost_equal(bool_describe['50%'], 0) + def test_count(self): f = lambda s: notnull(s).sum() self._check_stat_op('count', f, diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 93e9b07558319..eec5f5632d36b 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -497,7 +497,7 @@ def test_get_numeric_data(self): 'bool': bool_ser, 'obj': obj_ser, 'dt': dt_ser}) xp = DataFrame({'int': int_ser, 'float': float_ser, - 'complex': complex_ser}) + 'complex': complex_ser, 'bool': bool_ser}) rs = DataFrame(df._data.get_numeric_data()) assert_frame_equal(xp, rs)