diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 7433adaa4b738..c34218b434b1b 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -45,6 +45,7 @@ Performance .. _whatsnew_0160.performance: +- Fixed a severe performance regression for ``.loc`` indexing with an array or list (:issue:9126:). Bug Fixes ~~~~~~~~~ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c9322a9371309..7202ed64e1c9c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1275,9 +1275,8 @@ def _has_valid_type(self, key, axis): if isinstance(key, tuple) and isinstance(ax, MultiIndex): return True - # require at least 1 element in the index - idx = _ensure_index(key) - if len(idx) and not idx.isin(ax).any(): + # TODO: don't check the entire key unless necessary + if len(key) and np.all(ax.get_indexer_for(key) < 0): raise KeyError("None of [%s] are in the [%s]" % (key, self.obj._get_axis_name(axis))) diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index 320f261050e07..f05ebc47d2e25 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -209,3 +209,30 @@ frame_iloc_big = Benchmark('df.iloc[:100,0]', setup, start_date=datetime(2013, 1, 1)) + +#---------------------------------------------------------------------- +# basic tests for [], .loc[], .iloc[] and .ix[] + +setup = common_setup + """ +s = Series(np.random.rand(1000000)) +""" + +series_getitem_scalar = Benchmark("s[800000]", setup) +series_getitem_slice = Benchmark("s[:800000]", setup) +series_getitem_list_like = Benchmark("s[[800000]]", setup) +series_getitem_array = Benchmark("s[np.arange(10000)]", setup) + +series_loc_scalar = Benchmark("s.loc[800000]", setup) +series_loc_slice = Benchmark("s.loc[:800000]", setup) +series_loc_list_like = Benchmark("s.loc[[800000]]", setup) +series_loc_array = Benchmark("s.loc[np.arange(10000)]", setup) + +series_iloc_scalar = Benchmark("s.loc[800000]", setup) +series_iloc_slice = Benchmark("s.loc[:800000]", setup) +series_iloc_list_like = Benchmark("s.loc[[800000]]", setup) +series_iloc_array = Benchmark("s.loc[np.arange(10000)]", setup) + +series_ix_scalar = Benchmark("s.ix[800000]", setup) +series_ix_slice = Benchmark("s.ix[:800000]", setup) +series_ix_list_like = Benchmark("s.ix[[800000]]", setup) +series_ix_array = Benchmark("s.ix[np.arange(10000)]", setup)