diff --git a/RELEASE.rst b/RELEASE.rst index e01ccad6db96a..a542a406fcfaa 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -292,6 +292,7 @@ pandas 0.11.0 spacing (GH3258_) - fixed pretty priniting of sets (GH3294_) - Panel() and Panel.from_dict() now respects ordering when give OrderedDict (GH3303_) + - DataFrame where with a datetimelike incorrectly selecting (GH3311_) .. _GH3294: https://github.com/pydata/pandas/issues/3294 .. _GH622: https://github.com/pydata/pandas/issues/622 @@ -400,6 +401,7 @@ pandas 0.11.0 .. _GH3258: https://github.com/pydata/pandas/issues/3258 .. _GH3283: https://github.com/pydata/pandas/issues/3283 .. _GH2919: https://github.com/pydata/pandas/issues/2919 +.. _GH3311: https://github.com/pydata/pandas/issues/3311 pandas 0.10.1 ============= diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1ab6eb05b86d4..b44ef5d465bb9 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -284,6 +284,14 @@ def _try_cast_result(self, result): we may have roundtripped thru object in the mean-time """ return result + def _try_coerce_args(self, values, other): + """ provide coercion to our input arguments """ + return values, other + + def _try_coerce_result(self, result): + """ reverse of try_coerce_args """ + return result + def to_native_types(self, slicer=None, na_rep='', **kwargs): """ convert to our native types format, slicing if desired """ @@ -454,9 +462,10 @@ def eval(self, func, other, raise_on_error = True, try_cast = False): values = values.T is_transposed = True + values, other = self._try_coerce_args(values, other) args = [ values, other ] try: - result = func(*args) + result = self._try_coerce_result(func(*args)) except (Exception), detail: if raise_on_error: raise TypeError('Could not operate [%s] with block values [%s]' @@ -529,8 +538,9 @@ def func(c,v,o): if c.ravel().all(): return v + v, o = self._try_coerce_args(v, o) try: - return expressions.where(c, v, o, raise_on_error=True) + return self._try_coerce_result(expressions.where(c, v, o, raise_on_error=True)) except (Exception), detail: if raise_on_error: raise TypeError('Could not operate [%s] with block values [%s]' @@ -735,6 +745,29 @@ def _try_cast(self, element): except: return element + def _try_coerce_args(self, values, other): + """ provide coercion to our input arguments + we are going to compare vs i8, so coerce to integer + values is always ndarra like, other may not be """ + values = values.view('i8') + if isinstance(other, datetime): + other = lib.Timestamp(other).asm8.view('i8') + elif isnull(other): + other = tslib.iNaT + else: + other = other.view('i8') + + return values, other + + def _try_coerce_result(self, result): + """ reverse of try_coerce_args """ + if isinstance(result, np.ndarray): + if result.dtype == 'i8': + result = tslib.array_to_datetime(result.astype(object).ravel()).reshape(result.shape) + elif isinstance(result, np.integer): + result = lib.Timestamp(result) + return result + def to_native_types(self, slicer=None, na_rep=None, **kwargs): """ convert to our native types format, slicing if desired """ diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 213547c4132b9..5e5b24bb6734d 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -22,6 +22,7 @@ import pandas.core.datetools as datetools from pandas.core.api import (DataFrame, Index, Series, notnull, isnull, MultiIndex, DatetimeIndex, Timestamp, Period) +from pandas import date_range from pandas.io.parsers import read_csv from pandas.util.testing import (assert_almost_equal, @@ -2197,7 +2198,6 @@ def test_constructor_error_msgs(self): self.assert_("Mixing dicts with non-Series may lead to ambiguous ordering." in str(detail)) # wrong size ndarray, GH 3105 - from pandas import date_range try: DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'], index=date_range('2000-01-01', periods=3)) @@ -2888,7 +2888,6 @@ def test_constructor_with_datetimes(self): assert_series_equal(result, expected) # GH 2809 - from pandas import date_range ind = date_range(start="2000-01-01", freq="D", periods=10) datetimes = [ts.to_pydatetime() for ts in ind] datetime_s = Series(datetimes) @@ -2975,7 +2974,6 @@ def test_constructor_for_list_with_dtypes(self): def test_timedeltas(self): - from pandas import date_range df = DataFrame(dict(A = Series(date_range('2012-1-1', periods=3, freq='D')), B = Series([ timedelta(days=i) for i in range(3) ]))) result = df.get_dtype_counts() @@ -3001,7 +2999,6 @@ def test_timedeltas(self): def test_operators_timedelta64(self): - from pandas import date_range from datetime import datetime, timedelta df = DataFrame(dict(A = date_range('2012-1-1', periods=3, freq='D'), B = date_range('2012-1-2', periods=3, freq='D'), @@ -6838,6 +6835,19 @@ def test_where_bug(self): result.where(result > 2, np.nan, inplace=True) assert_frame_equal(result, expected) + def test_where_datetime(self): + + # GH 3311 + df = DataFrame(dict(A = date_range('20130102',periods=5), + B = date_range('20130104',periods=5), + C = np.random.randn(5))) + + stamp = datetime(2013,1,3) + result = df[df>stamp] + expected = df.copy() + expected.loc[[0,1],'A'] = np.nan + assert_frame_equal(result,expected) + def test_mask(self): df = DataFrame(np.random.randn(5, 3)) cond = df > 0