From f31cb6b9cbbb4cba543fc12c862fa725f1d5bebd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 29 Apr 2018 06:56:55 -0500 Subject: [PATCH 1/4] PERF: Fixed regression in Series(index=idx) constructor From https://github.com/pandas-dev/pandas/pull/18496/ Special cases empty series construction, since the reindex is not necessary. --- pandas/core/series.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7abd95c68ea2b..82076f8251ed1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -207,10 +207,20 @@ def __init__(self, data=None, index=None, dtype=None, name=None, else: data = data.reindex(index, copy=copy) data = data._data - elif isinstance(data, dict): + elif isinstance(data, dict) and (len(data) or index is None): + # Include the len(data) check here, since _init_dict contains + # a relatively expensive reindex. When called with + # Series(data=None, index=idx`, that is unnescessary. We know + # we're all NaN anyway, so we handle this in the next block. + # https://github.com/pandas-dev/pandas/pull/18496/ data, index = self._init_dict(data, index, dtype) dtype = None copy = False + elif isinstance(data, dict): + # Same as previous block, but special cased for data=None, + # for performance when creating empty arrays. + data = np.nan + elif isinstance(data, SingleBlockManager): if index is None: index = data.index From cf8c047da7a3240f2eaad5807a0f52326a1c6d71 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 29 Apr 2018 13:12:53 -0500 Subject: [PATCH 2/4] Handle dtype correctly --- pandas/core/series.py | 8 ++++++-- pandas/tests/series/test_constructors.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 82076f8251ed1..b9e61dc470608 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -41,7 +41,11 @@ maybe_cast_to_datetime, maybe_castable, construct_1d_arraylike_from_scalar, construct_1d_object_array_from_listlike) -from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike +from pandas.core.dtypes.missing import ( + isna, + notna, + remove_na_arraylike, + na_value_for_dtype) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) @@ -219,7 +223,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, elif isinstance(data, dict): # Same as previous block, but special cased for data=None, # for performance when creating empty arrays. - data = np.nan + data = na_value_for_dtype(dtype) elif isinstance(data, SingleBlockManager): if index is None: diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 82b5b1c10fa2d..4a5a6ab611e60 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -122,6 +122,17 @@ def test_constructor_nan(self, input_arg): assert_series_equal(empty, empty2, check_index_type=False) + @pytest.mark.parametrize('dtype', [ + 'f8', 'i8', 'M8[ns]', 'm8[ns]', 'category', 'object', + 'datetime64[ns, UTC]', + ]) + @pytest.mark.parametrize('index', [None, pd.Index([])]) + def test_constructor_dtype_only(self, dtype, index): + # GH-20865 + result = pd.Series(dtype=dtype, index=index) + assert result.dtype == dtype + assert len(result) == 0 + def test_constructor_series(self): index1 = ['d', 'b', 'a', 'c'] index2 = sorted(index1) From 0988df16964ea2107fe07b5e2dfa0821cc35aef9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 30 Apr 2018 05:44:33 -0500 Subject: [PATCH 3/4] Moved to init_dict --- pandas/core/series.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b9e61dc470608..3fd54d0d5ff29 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -211,20 +211,10 @@ def __init__(self, data=None, index=None, dtype=None, name=None, else: data = data.reindex(index, copy=copy) data = data._data - elif isinstance(data, dict) and (len(data) or index is None): - # Include the len(data) check here, since _init_dict contains - # a relatively expensive reindex. When called with - # Series(data=None, index=idx`, that is unnescessary. We know - # we're all NaN anyway, so we handle this in the next block. - # https://github.com/pandas-dev/pandas/pull/18496/ + elif isinstance(data, dict): data, index = self._init_dict(data, index, dtype) dtype = None copy = False - elif isinstance(data, dict): - # Same as previous block, but special cased for data=None, - # for performance when creating empty arrays. - data = na_value_for_dtype(dtype) - elif isinstance(data, SingleBlockManager): if index is None: index = data.index @@ -314,6 +304,11 @@ def _init_dict(self, data, index=None, dtype=None): if data: keys, values = zip(*compat.iteritems(data)) values = list(values) + elif index is not None: + # fastpath for Series(data=None). Just use broadcasting a scalar + # instead of reindexing. + values = na_value_for_dtype(dtype) + keys = index else: keys, values = [], [] @@ -321,7 +316,7 @@ def _init_dict(self, data, index=None, dtype=None): s = Series(values, index=keys, dtype=dtype) # Now we just make sure the order is respected, if any - if index is not None: + if data and index is not None: s = s.reindex(index, copy=False) elif not PY36 and not isinstance(data, OrderedDict): try: From d8b1312ab2a5c752d474c504245c84f3fcab32e3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 30 Apr 2018 07:28:16 -0500 Subject: [PATCH 4/4] More special cases --- pandas/core/series.py | 4 +++- pandas/tests/series/test_constructors.py | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3fd54d0d5ff29..1ef6f2d7eee22 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -318,7 +318,9 @@ def _init_dict(self, data, index=None, dtype=None): # Now we just make sure the order is respected, if any if data and index is not None: s = s.reindex(index, copy=False) - elif not PY36 and not isinstance(data, OrderedDict): + elif not PY36 and not isinstance(data, OrderedDict) and data: + # Need the `and data` to avoid sorting Series(None, index=[...]) + # since that isn't really dict-like try: s = s.sort_index() except TypeError: diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 4a5a6ab611e60..7e59325c32ddc 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -133,6 +133,10 @@ def test_constructor_dtype_only(self, dtype, index): assert result.dtype == dtype assert len(result) == 0 + def test_constructor_no_data_index_order(self): + result = pd.Series(index=['b', 'a', 'c']) + assert result.index.tolist() == ['b', 'a', 'c'] + def test_constructor_series(self): index1 = ['d', 'b', 'a', 'c'] index2 = sorted(index1)