From bb2fd4142d1bbeedac5e177f1e7c4c3cc812430d Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 27 Jun 2014 15:51:37 -0400 Subject: [PATCH] BUG: Bug in timedelta inference when assigning an incomplete Series (GH7592) --- doc/source/v0.14.1.txt | 2 +- pandas/core/common.py | 4 ++-- pandas/core/internals.py | 11 +++++++---- pandas/tests/test_frame.py | 29 ++++++++++++++++++++++++++--- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 45a5d55ca047d..cf14e3696b90f 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -172,7 +172,7 @@ Bug Fixes - Regression in datetimelike slice indexing with a duplicated index and non-exact end-points (:issue:`7523`) - Bug in setitem with list-of-lists and single vs mixed types (:issue:`7551`:) - Bug in timeops with non-aligned Series (:issue:`7500`) - +- Bug in timedelta inference when assigning an incomplete Series (:issue:`7592`) - Bug in ``value_counts`` where ``NaT`` did not qualify as missing (``NaN``) (:issue:`7423`) diff --git a/pandas/core/common.py b/pandas/core/common.py index c0aab2ca1428a..c0432b53e346a 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2603,7 +2603,7 @@ def check_main(): def in_qtconsole(): """ check if we're inside an IPython qtconsole - + DEPRECATED: This is no longer needed, or working, in IPython 3 and above. """ try: @@ -2622,7 +2622,7 @@ def in_qtconsole(): def in_ipnb(): """ check if we're inside an IPython Notebook - + DEPRECATED: This is no longer used in pandas, and won't work in IPython 3 and above. """ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 8100b98d6e42d..accaf4ea5cd29 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1037,9 +1037,11 @@ class FloatBlock(FloatOrComplexBlock): def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) - return issubclass(element.dtype.type, (np.floating, np.integer)) - return (isinstance(element, (float, int, np.float_, np.int_)) and - not isinstance(bool, np.bool_)) + tipo = element.dtype.type + return issubclass(tipo, (np.floating, np.integer)) and not issubclass( + tipo, (np.datetime64, np.timedelta64)) + return isinstance(element, (float, int, np.float_, np.int_)) and not isinstance( + element, (bool, np.bool_, datetime, timedelta, np.datetime64, np.timedelta64)) def _try_cast(self, element): try: @@ -1099,7 +1101,8 @@ class IntBlock(NumericBlock): def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) - return issubclass(element.dtype.type, np.integer) + tipo = element.dtype.type + return issubclass(tipo, np.integer) and not issubclass(tipo, (np.datetime64, np.timedelta64)) return com.is_integer(element) def _try_cast(self, element): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 4360e2b9066df..ef57256c7ee06 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -32,7 +32,8 @@ import pandas.core.format as fmt import pandas.core.datetools as datetools from pandas import (DataFrame, Index, Series, notnull, isnull, - MultiIndex, DatetimeIndex, Timestamp, date_range, read_csv) + MultiIndex, DatetimeIndex, Timestamp, date_range, read_csv, + _np_version_under1p7) import pandas as pd from pandas.parser import CParserError from pandas.util.misc import is_little_endian @@ -2180,11 +2181,11 @@ def test_set_index_cast_datetimeindex(self): # reset_index with single level for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern']: idx = pd.date_range('1/1/2011', periods=5, freq='D', tz=tz, name='idx') - df = pd.DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) + df = pd.DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) expected = pd.DataFrame({'idx': [datetime(2011, 1, 1), datetime(2011, 1, 2), datetime(2011, 1, 3), datetime(2011, 1, 4), - datetime(2011, 1, 5)], + datetime(2011, 1, 5)], 'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, columns=['idx', 'a', 'b']) expected['idx'] = expected['idx'].apply(lambda d: pd.Timestamp(d, tz=tz)) @@ -3757,6 +3758,28 @@ def test_operators_timedelta64(self): self.assertTrue(df['off1'].dtype == 'timedelta64[ns]') self.assertTrue(df['off2'].dtype == 'timedelta64[ns]') + def test_datetimelike_setitem_with_inference(self): + if _np_version_under1p7: + raise nose.SkipTest("numpy < 1.7") + + # GH 7592 + # assignment of timedeltas with NaT + + one_hour = timedelta(hours=1) + df = DataFrame(index=date_range('20130101',periods=4)) + df['A'] = np.array([1*one_hour]*4, dtype='m8[ns]') + df.loc[:,'B'] = np.array([2*one_hour]*4, dtype='m8[ns]') + df.loc[:3,'C'] = np.array([3*one_hour]*3, dtype='m8[ns]') + df.ix[:,'D'] = np.array([4*one_hour]*4, dtype='m8[ns]') + df.ix[:3,'E'] = np.array([5*one_hour]*3, dtype='m8[ns]') + df['F'] = np.timedelta64('NaT') + df.ix[:-1,'F'] = np.array([6*one_hour]*3, dtype='m8[ns]') + df.ix[-3:,'G'] = date_range('20130101',periods=3) + df['H'] = np.datetime64('NaT') + result = df.dtypes + expected = Series([np.dtype('timedelta64[ns]')]*6+[np.dtype('datetime64[ns]')]*2,index=list('ABCDEFGH')) + assert_series_equal(result,expected) + def test_new_empty_index(self): df1 = DataFrame(randn(0, 3)) df2 = DataFrame(randn(0, 3))