From 5afd4b4cfeae92c57866c26e7529979c4b810a6d Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 3 Sep 2013 17:58:15 -0400 Subject: [PATCH 1/2] API: allow single element boolean Series to mimic numpy behavior( related GH4657) TST: test for single element with null-like DOC: deprecation message for using bool(Series([True])) --- doc/source/release.rst | 3 ++- doc/source/v0.13.0.txt | 5 +++-- pandas/core/generic.py | 3 ++- pandas/core/series.py | 13 ++++++++++++- pandas/tests/test_generic.py | 19 +++++++++++++++++-- 5 files changed, 36 insertions(+), 7 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 801158a00b9ab..da3b9882c30f9 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -241,7 +241,8 @@ API Changes - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`) - ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`) - behavior. + behavior (except for a single-element boolean Series, which mimics ``numpy`` behavior and will evaluate + to the bool of the element) - ``DataFrame.update()`` no longer raises a ``DataConflictError``, it now will raise a ``ValueError`` instead (if necessary) (:issue:`4732`) - ``Series.isin()`` and ``DataFrame.isin()`` now raise a ``TypeError`` when diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 9a4d644b12104..83bb5f1a2dfdb 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -56,9 +56,10 @@ API changes - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`) - ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`) - behavior. See :ref:`gotchas` for a more detailed discussion. + behavior (except for a single-element boolean Series, which mimics ``numpy`` behavior and will evaluate + to the bool of the element). See :ref:`gotchas` for a more detailed discussion. - This prevent behaviors like (which will now all raise ``ValueError``) + This prevents behaviors like (which will now all raise ``ValueError``) .. code-block:: python diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 18a03eb313dd2..3125995a1d86f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -596,7 +596,8 @@ def empty(self): return not all(len(self._get_axis(a)) > 0 for a in self._AXIS_ORDERS) def __nonzero__(self): - raise ValueError("The truth value of an array is ambiguous. Use a.empty, a.item(), a.any() or a.all().") + raise ValueError("The truth value of a {0} is ambiguous. " + "Use a.empty, a.item(), a.any() or a.all().".format(self.__class__.__name__)) __bool__ = __nonzero__ diff --git a/pandas/core/series.py b/pandas/core/series.py index 884e737f357a7..15dda702016fa 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -7,6 +7,7 @@ import operator import types +import warnings from numpy import nan, ndarray import numpy as np @@ -364,6 +365,17 @@ def imag(self, v): __long__ = _coerce_method(int) __int__ = _coerce_method(int) + def __nonzero__(self): + if len(self) == 1 and self.dtype == np.bool_: + warnings.warn("bool on a single-element boolean dtyped Series is deprecated,\n" + " please use a.empty, a.item(), a.any(), or a.all() instead\n", + UserWarning) + return bool(self.iloc[0]) + raise ValueError("The truth value of a {0} is ambiguous.\n" + "Use a.empty, a.item(), a.any() or a.all().\n" + "Currently, a boolean Series of length 1 is the exception\n".format(self.__class__.__name__)) + __bool__ = __nonzero__ + # we are preserving name here def __getstate__(self): return dict(_data=self._data, name=self.name) @@ -913,7 +925,6 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, """ if nanRep is not None: # pragma: no cover - import warnings warnings.warn("nanRep is deprecated, use na_rep", FutureWarning) na_rep = nanRep diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 7f50cb2453a21..04977a9cc4cff 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -205,11 +205,26 @@ def test_get_numeric_data_preserve_dtype(self): def test_nonzero_single_element(self): + # single item to follow numpy s = Series([True]) - self.assertRaises(ValueError, lambda : bool(s)) + self.assert_(bool(s) == True) s = Series([False]) - self.assertRaises(ValueError, lambda : bool(s)) + self.assert_(bool(s) == False) + + # single item nan to raise + for s in [ Series([np.nan]), Series([pd.NaT]) ]: + self.assertRaises(ValueError, lambda : bool(s)) + + # multiple bool are still an error + for s in [Series([True,True]), Series([False, False])]: + self.assertRaises(ValueError, lambda : bool(s)) + + # single non-bool are an error + for s in [Series([1]), Series([0]), + Series(['a']), Series([0.0])]: + self.assertRaises(ValueError, lambda : bool(s)) + class TestDataFrame(unittest.TestCase, Generic): _typ = DataFrame From fe97e1a927d93f15b4596fa66a713508acf19e03 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 23 Sep 2013 10:18:35 -0400 Subject: [PATCH 2/2] CLN: add .bool() method for single length evaluation --- doc/source/basics.rst | 13 +++++++++++-- doc/source/gotchas.rst | 9 +++++++++ doc/source/release.rst | 3 +-- doc/source/v0.13.0.txt | 17 +++++++++++++++-- pandas/core/generic.py | 16 +++++++++++++++- pandas/core/series.py | 11 ----------- pandas/io/tests/test_pytables.py | 10 +++++----- pandas/tests/test_generic.py | 26 ++++++++++++++++++++++---- 8 files changed, 78 insertions(+), 27 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index b167b00b58ef1..9782967fd0a59 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -219,7 +219,7 @@ Boolean Reductions .. _basics.reductions: -Furthermore, you can apply the reduction functions: ``any()`` and ``all()`` to provide a +Furthermore, you can apply the reductions: ``empty``, ``any()``, ``all()``, and ``bool()`` to provide a way to summarize these results. .. ipython:: python @@ -233,7 +233,7 @@ You can reduce to a final boolean value. (df>0).any().any() -Finally you can test if a pandas object is empty, via the ``empty`` property. +You can test if a pandas object is empty, via the ``empty`` property. .. ipython:: python @@ -262,6 +262,15 @@ Finally you can test if a pandas object is empty, via the ``empty`` property. ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). +To evaluate single-element pandas objects in a boolean context, use the method ``.bool()``: + + .. ipython:: python + + Series([True]).bool() + Series([False]).bool() + DataFrame([[True]]).bool() + DataFrame([[False]]).bool() + See :ref:`gotchas` for a more detailed discussion. diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index 58eb6dccfc967..6cef1d52a6cec 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -59,6 +59,15 @@ or return if ``any`` value is ``True``. print("I am any") >>> I am any +To evaluate single-element pandas objects in a boolean context, use the method ``.bool()``: + + .. ipython:: python + + Series([True]).bool() + Series([False]).bool() + DataFrame([[True]]).bool() + DataFrame([[False]]).bool() + See :ref:`boolean reductions` for more examples. Bitwise boolean diff --git a/doc/source/release.rst b/doc/source/release.rst index da3b9882c30f9..de81df9c15a42 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -241,8 +241,7 @@ API Changes - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`) - ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`) - behavior (except for a single-element boolean Series, which mimics ``numpy`` behavior and will evaluate - to the bool of the element) + behavior. Add ``.bool()`` method to ``NDFrame`` objects to facilitate evaluating of single-element boolean Series - ``DataFrame.update()`` no longer raises a ``DataConflictError``, it now will raise a ``ValueError`` instead (if necessary) (:issue:`4732`) - ``Series.isin()`` and ``DataFrame.isin()`` now raise a ``TypeError`` when diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 83bb5f1a2dfdb..5ff7038d02e45 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -55,9 +55,12 @@ API changes index.set_names(["bob", "cranberry"], inplace=True) - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`) + - Remove deprecated ``Factor`` (:issue:`3650`) + - Remove deprecated ``set_printoptions/reset_printoptions`` (:issue:``3046``) + - Remove deprecated ``_verbose_info`` (:issue:`3215`) - ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`) - behavior (except for a single-element boolean Series, which mimics ``numpy`` behavior and will evaluate - to the bool of the element). See :ref:`gotchas` for a more detailed discussion. + behavior. Added the ``.bool()`` method to ``NDFrame`` objects to facilitate evaluating of single-element boolean Series + See :ref:`gotchas` for a more detailed discussion. This prevents behaviors like (which will now all raise ``ValueError``) @@ -69,6 +72,16 @@ API changes df1 and df2 s1 and s2 + + To evaluate single-element pandas objects in a boolean context, use the method ``.bool()``: + + .. ipython:: python + + Series([True]).bool() + Series([False]).bool() + DataFrame([[True]]).bool() + DataFrame([[False]]).bool() + - All non-Index NDFrames (``Series``, ``DataFrame``, ``Panel``, ``Panel4D``, ``SparsePanel``, etc.), now support the entire set of arithmetic operators and arithmetic flex methods (add, sub, mul, etc.). ``SparsePanel`` does not diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3125995a1d86f..7712d91ff4c71 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -597,10 +597,24 @@ def empty(self): def __nonzero__(self): raise ValueError("The truth value of a {0} is ambiguous. " - "Use a.empty, a.item(), a.any() or a.all().".format(self.__class__.__name__)) + "Use a.empty, a.bool(), a.item(), a.any() or a.all().".format(self.__class__.__name__)) __bool__ = __nonzero__ + def bool(self): + """ Return the bool of a single element PandasObject + This must be a boolean scalar value, either True or False + + Raise a ValueError if the PandasObject does not have exactly + 1 element, or that element is not boolean """ + v = self.squeeze() + if isinstance(v, (bool,np.bool_)): + return bool(v) + elif np.isscalar(v): + raise ValueError("bool cannot act on a non-boolean single element {0}".format(self.__class__.__name__)) + + self.__nonzero__() + def __abs__(self): return self.abs() diff --git a/pandas/core/series.py b/pandas/core/series.py index 15dda702016fa..3161fd0496f6c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -365,17 +365,6 @@ def imag(self, v): __long__ = _coerce_method(int) __int__ = _coerce_method(int) - def __nonzero__(self): - if len(self) == 1 and self.dtype == np.bool_: - warnings.warn("bool on a single-element boolean dtyped Series is deprecated,\n" - " please use a.empty, a.item(), a.any(), or a.all() instead\n", - UserWarning) - return bool(self.iloc[0]) - raise ValueError("The truth value of a {0} is ambiguous.\n" - "Use a.empty, a.item(), a.any() or a.all().\n" - "Currently, a boolean Series of length 1 is the exception\n".format(self.__class__.__name__)) - __bool__ = __nonzero__ - # we are preserving name here def __getstate__(self): return dict(_data=self._data, name=self.name) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 35ecef2acf818..835198400cd5a 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -2662,18 +2662,18 @@ def test_select_dtypes(self): df = DataFrame(np.random.randn(5,2), columns =['A','B']) df['object'] = 'foo' df.ix[4:5,'object'] = 'bar' - df['bool'] = df['A'] > 0 + df['boolv'] = df['A'] > 0 _maybe_remove(store, 'df') store.append('df', df, data_columns = True) - expected = df[df.bool == True].reindex(columns=['A','bool']) + expected = df[df.boolv == True].reindex(columns=['A','boolv']) for v in [True,'true',1]: - result = store.select('df', Term('bool == %s' % str(v)), columns = ['A','bool']) + result = store.select('df', Term('boolv == %s' % str(v)), columns = ['A','boolv']) tm.assert_frame_equal(expected, result) - expected = df[df.bool == False ].reindex(columns=['A','bool']) + expected = df[df.boolv == False ].reindex(columns=['A','boolv']) for v in [False,'false',0]: - result = store.select('df', Term('bool == %s' % str(v)), columns = ['A','bool']) + result = store.select('df', Term('boolv == %s' % str(v)), columns = ['A','boolv']) tm.assert_frame_equal(expected, result) # integer index diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 04977a9cc4cff..b8c143e10111d 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -205,25 +205,30 @@ def test_get_numeric_data_preserve_dtype(self): def test_nonzero_single_element(self): - # single item to follow numpy + # allow single item via bool method s = Series([True]) - self.assert_(bool(s) == True) + self.assert_(s.bool() is True) s = Series([False]) - self.assert_(bool(s) == False) + self.assert_(s.bool() is False) # single item nan to raise - for s in [ Series([np.nan]), Series([pd.NaT]) ]: + for s in [ Series([np.nan]), Series([pd.NaT]), Series([True]), Series([False]) ]: self.assertRaises(ValueError, lambda : bool(s)) + for s in [ Series([np.nan]), Series([pd.NaT])]: + self.assertRaises(ValueError, lambda : s.bool()) + # multiple bool are still an error for s in [Series([True,True]), Series([False, False])]: self.assertRaises(ValueError, lambda : bool(s)) + self.assertRaises(ValueError, lambda : s.bool()) # single non-bool are an error for s in [Series([1]), Series([0]), Series(['a']), Series([0.0])]: self.assertRaises(ValueError, lambda : bool(s)) + self.assertRaises(ValueError, lambda : s.bool()) class TestDataFrame(unittest.TestCase, Generic): @@ -235,6 +240,19 @@ def test_rename_mi(self): index=MultiIndex.from_tuples([("A",x) for x in ["a","B","c"]])) result = df.rename(str.lower) + def test_nonzero_single_element(self): + + # allow single item via bool method + df = DataFrame([[True]]) + self.assert_(df.bool() is True) + + df = DataFrame([[False]]) + self.assert_(df.bool() is False) + + df = DataFrame([[False, False]]) + self.assertRaises(ValueError, lambda : df.bool()) + self.assertRaises(ValueError, lambda : bool(df)) + def test_get_numeric_data_preserve_dtype(self): # get the numeric data