diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 1aaf77625cf7f..b22720d277873 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -145,7 +145,7 @@ Performance - Improvements in Series.transform for significant performance gains (:issue:`6496`) - Improvements in DataFrame.transform with ufuncs and built-in grouper functions for signifcant performance gains (:issue:`7383`) - Regression in groupby aggregation of datetime64 dtypes (:issue:`7555`) - +- Improvements in `MultiIndex.from_product` for large iterables (:issue:`7627`) diff --git a/pandas/core/index.py b/pandas/core/index.py index 2138ecfa5281f..4d7e14c9e026f 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2875,10 +2875,14 @@ def from_product(cls, iterables, sortorder=None, names=None): MultiIndex.from_arrays : Convert list of arrays to MultiIndex MultiIndex.from_tuples : Convert list of tuples to MultiIndex """ + from pandas.core.categorical import Categorical from pandas.tools.util import cartesian_product - product = cartesian_product(iterables) - return MultiIndex.from_arrays(product, sortorder=sortorder, - names=names) + + categoricals = [Categorical.from_array(it) for it in iterables] + labels = cartesian_product([c.labels for c in categoricals]) + + return MultiIndex(levels=[c.levels for c in categoricals], + labels=labels, sortorder=sortorder, names=names) @property def nlevels(self): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index e20b209b3b5dd..23a0f39ef3547 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1861,6 +1861,15 @@ def test_from_product(self): assert_array_equal(result, expected) self.assertEqual(result.names, names) + def test_from_product_datetimeindex(self): + dt_index = pd.date_range('2000-01-01', periods=2) + mi = pd.MultiIndex.from_product([[1, 2], dt_index]) + etalon = pd.lib.list_to_object_array([(1, pd.Timestamp('2000-01-01')), + (1, pd.Timestamp('2000-01-02')), + (2, pd.Timestamp('2000-01-01')), + (2, pd.Timestamp('2000-01-02'))]) + assert_array_equal(mi.values, etalon) + def test_append(self): result = self.index[:3].append(self.index[3:]) self.assertTrue(result.equals(self.index)) diff --git a/vb_suite/index_object.py b/vb_suite/index_object.py index e6bd32737d567..5ddb2fb0ac7ec 100644 --- a/vb_suite/index_object.py +++ b/vb_suite/index_object.py @@ -105,3 +105,15 @@ start_date=datetime(2014, 4, 13)) index_float64_div = Benchmark('idx / 2', setup, name='index_float64_div', start_date=datetime(2014, 4, 13)) + + +# Constructing MultiIndex from cartesian product of iterables +# + +setup = common_setup + """ +iterables = [tm.makeStringIndex(10000), xrange(20)] +""" + +multiindex_from_product = Benchmark('MultiIndex.from_product(iterables)', + setup, name='multiindex_from_product', + start_date=datetime(2014, 6, 30))