From f883b7994b863fdf7c0511829b2b447b4ad44f44 Mon Sep 17 00:00:00 2001 From: Patrick Fournier Date: Fri, 17 Apr 2015 16:23:42 -0400 Subject: [PATCH 1/3] TST: Adding test for bug GH 9456 --- pandas/tests/test_series.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index c3b43f3ec70c0..1d5b2a199e806 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -894,6 +894,35 @@ def test_constructor_dtype_datetime64(self): dr = date_range('20130101',periods=3,tz='US/Eastern') self.assertTrue(str(Series(dr).iloc[0].tz) == 'US/Eastern') + # GH 9456 + d = {np.datetime64('2015-01-07T02:00:00.000000000+0200'): 4.2, + np.datetime64('2015-01-08T02:00:00.000000000+0200'): 4.0, + np.datetime64('2015-01-09T02:00:00.000000000+0200'): 3.9, + np.datetime64('2015-01-12T02:00:00.000000000+0200'): 3.5} + keys = list() + vals = list() + for k in sorted(d.keys()): + vals.append(d[k]) + keys.append(k) + expected = Series(vals, keys) + + s = Series(d) + assert_series_equal(s, expected) + + d = {datetime(2015,1,7): 4.2, + datetime(2015,1,8): 4.0, + datetime(2015,1,9): 3.9, + datetime(2015,1,12): 3.5} + s = Series(d) + assert_series_equal(s, expected) + + d = {datetime(2015,1,7): 4.2, + np.datetime64('2015-01-08T02:00:00.000000000+0200'): 4.0, + '20150109': 3.9, + np.datetime64('2015-01-12T02:00:00.000000000+0200'): 3.5} + s = Series(d) + assert_series_equal(s, expected) + def test_constructor_periodindex(self): # GH7932 # converting a PeriodIndex when put in a Series From a934792c4ec81120f42822f1f01095e50b65ef68 Mon Sep 17 00:00:00 2001 From: Patrick Fournier Date: Fri, 17 Apr 2015 16:26:32 -0400 Subject: [PATCH 2/3] BUG: GH 9456 Fixed Series.__init__ to better handle dict data - dict with datetime64 keys now working - when isinstance(index, DatetimeIndex), use lib.fast_multiget correctly to avoid raising a TypeError exception --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index f9c56db018639..6cd264c75152b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -166,9 +166,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None, else: index = Index(_try_sort(data)) try: - if isinstance(index, DatetimeIndex): + if isinstance(index, DatetimeIndex) and lib.infer_dtype(data) != 'datetime64': # coerce back to datetime objects for lookup - data = lib.fast_multiget(data, index.astype('O'), + data = lib.fast_multiget(data, index.astype('O').values, default=np.nan) elif isinstance(index, PeriodIndex): data = [data.get(i, nan) for i in index] From c59c4fec4442a739995c920c3e1fdc4f0ebb0d5b Mon Sep 17 00:00:00 2001 From: Patrick Fournier Date: Sat, 30 May 2015 23:05:22 -0400 Subject: [PATCH 3/3] BUG: GH 9456 Fixed Series.__init__ to better handle dict data - dict with datetime64 keys now working - use lib.fast_multiget correctly to avoid unnecessary exceptions --- pandas/core/series.py | 34 +++++++++++++++++++++------------- pandas/tests/test_series.py | 28 +++++++++++++++++++++------- 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 56c695f0e159b..fe1634170fc1f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -99,7 +99,8 @@ class Series(base.IndexOpsMixin, generic.NDFrame): Parameters ---------- data : array-like, dict, or scalar value - Contains data stored in Series + Contains data stored in Series. If a dict and no index is provided, + an attempt will be made to sort the dict. index : array-like or Index (1d) Values must be unique and hashable, same length as data. Index object (or other iterable of same length as data) Will default to @@ -131,6 +132,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, else: + original_index = index if index is not None: index = _ensure_index(index) @@ -162,21 +164,27 @@ def __init__(self, data=None, index=None, dtype=None, name=None, elif isinstance(data, dict): if index is None: if isinstance(data, OrderedDict): - index = Index(data) + original_index = data.keys() else: - index = Index(_try_sort(data)) + original_index = _try_sort(data) + + index = Index(original_index) + try: - if isinstance(index, DatetimeIndex) and lib.infer_dtype(data) != 'datetime64': - # coerce back to datetime objects for lookup - data = lib.fast_multiget(data, index.astype('O').values, - default=np.nan) - elif isinstance(index, PeriodIndex): - data = [data.get(i, nan) for i in index] + # lib.fast_multiget raises TypeError if type(data) != dict + + if lib.infer_dtype(data) == lib.infer_dtype(index.values): + data = lib.fast_multiget(data, index.values, default=np.nan) else: - data = lib.fast_multiget(data, index.values, - default=np.nan) - except TypeError: - data = [data.get(i, nan) for i in index] + if isinstance(original_index, PeriodIndex): + data = [data.get(i, np.nan) for i in original_index] + else: + # np.array(['z', ('a', 'b')]) raises ValueError; + # this may happens with MultiIndex. + data = lib.fast_multiget(data, np.array(original_index), + default=np.nan) + except (TypeError, ValueError) as e: + data = [data.get(i, np.nan) for i in index] elif isinstance(data, SingleBlockManager): if index is None: diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 8859cdbbb9fc7..3d800c4f04eaf 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -917,8 +917,8 @@ def test_constructor_dtype_datetime64(self): # GH 9456 d = {np.datetime64('2015-01-07T02:00:00.000000000+0200'): 4.2, - np.datetime64('2015-01-08T02:00:00.000000000+0200'): 4.0, - np.datetime64('2015-01-09T02:00:00.000000000+0200'): 3.9, + np.datetime64('2015-01-09T02:00:00.000000000+0200'): 4.0, + np.datetime64('2015-01-08T02:00:00.000000000+0200'): 3.9, np.datetime64('2015-01-12T02:00:00.000000000+0200'): 3.5} keys = list() vals = list() @@ -926,23 +926,37 @@ def test_constructor_dtype_datetime64(self): vals.append(d[k]) keys.append(k) expected = Series(vals, keys) + expected_unsorted = Series(d.values(), d.keys()) s = Series(d) assert_series_equal(s, expected) + s = Series(d, d.keys()) + assert_series_equal(s, expected_unsorted) + d = {datetime(2015,1,7): 4.2, - datetime(2015,1,8): 4.0, - datetime(2015,1,9): 3.9, + datetime(2015,1,9): 4.0, + datetime(2015,1,8): 3.9, datetime(2015,1,12): 3.5} + expected_unsorted = Series(d.values(), d.keys()) + s = Series(d) assert_series_equal(s, expected) + s = Series(d, d.keys()) + assert_series_equal(s, expected_unsorted) + d = {datetime(2015,1,7): 4.2, - np.datetime64('2015-01-08T02:00:00.000000000+0200'): 4.0, - '20150109': 3.9, + np.datetime64('2015-01-09T02:00:00.000000000+0200'): 4.0, + '20150108': 3.9, np.datetime64('2015-01-12T02:00:00.000000000+0200'): 3.5} + expected_unsorted = Series(d.values(), d.keys()) + s = Series(d) - assert_series_equal(s, expected) + assert_series_equal(s, expected_unsorted) + + s = Series(d, d.keys()) + assert_series_equal(s, expected_unsorted) def test_constructor_periodindex(self): # GH7932