From 3b680bca9dc833293c5a8ef0ebc2dfa78679bdc3 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Tue, 22 May 2018 22:42:03 +0200
Subject: [PATCH 1/4] REF: deduplicate _NDFrameIndexer._multi_take code

---
 pandas/core/frame.py    |   3 +-
 pandas/core/indexing.py | 209 +++++++++++++++++++++-------------------
 2 files changed, 113 insertions(+), 99 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 02c86d2f4dcc8..383f129a713ed 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2723,7 +2723,8 @@ def _getitem_array(self, key):
             indexer = key.nonzero()[0]
             return self._take(indexer, axis=0)
         else:
-            indexer = self.loc._convert_to_indexer(key, axis=1)
+            indexer = self.loc._convert_to_indexer(key, axis=1,
+                                                   raise_missing=True)
             return self._take(indexer, axis=1)
 
     def _getitem_multilevel(self, key):
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 0e4f040253560..aa8de9d2baad8 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -925,33 +925,10 @@ def _multi_take(self, tup):
         """ create the reindex map for our objects, raise the _exception if we
         can't create the indexer
         """
-        try:
-            o = self.obj
-            d = {}
-            for key, axis in zip(tup, o._AXIS_ORDERS):
-                ax = o._get_axis(axis)
-                # Have the index compute an indexer or return None
-                # if it cannot handle:
-                indexer, keyarr = ax._convert_listlike_indexer(key,
-                                                               kind=self.name)
-                # We only act on all found values:
-                if indexer is not None and (indexer != -1).all():
-                    self._validate_read_indexer(key, indexer, axis)
-                    d[axis] = (ax[indexer], indexer)
-                    continue
-
-                # If we are trying to get actual keys from empty Series, we
-                # patiently wait for a KeyError later on - otherwise, convert
-                if len(ax) or not len(key):
-                    key = self._convert_for_reindex(key, axis)
-                indexer = ax.get_indexer_for(key)
-                keyarr = ax.reindex(keyarr)[0]
-                self._validate_read_indexer(keyarr, indexer,
-                                            o._get_axis_number(axis))
-                d[axis] = (keyarr, indexer)
-            return o._reindex_with_indexers(d, copy=True, allow_dups=True)
-        except (KeyError, IndexingError) as detail:
-            raise self._exception(detail)
+        o = self.obj
+        d = {axis: self._get_listlike_indexer(key, axis)
+             for (key, axis) in zip(tup, o._AXIS_ORDERS)}
+        return o._reindex_with_indexers(d, copy=True, allow_dups=True)
 
     def _convert_for_reindex(self, key, axis=None):
         return key
@@ -1124,63 +1101,110 @@ def _getitem_axis(self, key, axis=None):
 
             return self._get_label(key, axis=axis)
 
-    def _getitem_iterable(self, key, axis=None):
-        if axis is None:
-            axis = self.axis or 0
+    def _get_listlike_indexer(self, key, axis, raise_missing=False):
+        """
+        Transform a list-like of keys into a new index and an indexer.
 
-        self._validate_key(key, axis)
+        Parameters
+        ----------
+        key : list-like
+            Target labels
+        axis: int
+            Dimension on which the indexing is being made
+        raise_missing: bool
+            Whether to raise a KeyError if some labels are not found. Will be
+            removed in the future, and then this method will always behave as
+            if raise_missing=True.
 
-        labels = self.obj._get_axis(axis)
+        Raises
+        ------
+        KeyError
+            If at least one key was requested but none was found, and
+            raise_missing=True.
 
-        if com.is_bool_indexer(key):
-            key = check_bool_indexer(labels, key)
-            inds, = key.nonzero()
-            return self.obj._take(inds, axis=axis)
-        else:
+        Returns
+        -------
+        keyarr: Index
+            New index (coinciding with 'key' if the axis is unique)
+        values : array-like
+            An indexer for the return object; -1 denotes keys not found
+        """
+        o = self.obj
+        ax = o._get_axis(axis)
+        try:
             # Have the index compute an indexer or return None
-            # if it cannot handle; we only act on all found values
-            indexer, keyarr = labels._convert_listlike_indexer(
-                key, kind=self.name)
+            # if it cannot handle:
+            indexer, keyarr = ax._convert_listlike_indexer(key,
+                                                           kind=self.name)
+            # We only act on all found values:
             if indexer is not None and (indexer != -1).all():
-                self._validate_read_indexer(key, indexer, axis)
-                return self.obj.take(indexer, axis=axis)
+                self._validate_read_indexer(key, indexer, axis,
+                                            raise_missing=raise_missing)
+                return ax[indexer], indexer
 
-            ax = self.obj._get_axis(axis)
-            # existing labels are unique and indexer are unique
-            if labels.is_unique and Index(keyarr).is_unique:
+            if ax.is_unique:
+                # If we are trying to get actual keys from empty Series, we
+                # patiently wait for a KeyError later on - otherwise, convert
+                if len(ax) or not len(key):
+                    key = self._convert_for_reindex(key, axis)
                 indexer = ax.get_indexer_for(key)
-                self._validate_read_indexer(key, indexer, axis)
+                keyarr = ax.reindex(keyarr)[0]
+            else:
+                keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
 
-                d = {axis: [ax.reindex(keyarr)[0], indexer]}
-                return self.obj._reindex_with_indexers(d, copy=True,
-                                                       allow_dups=True)
+            self._validate_read_indexer(keyarr, indexer,
+                                        o._get_axis_number(axis),
+                                        raise_missing=raise_missing)
+            return keyarr, indexer
+        except (KeyError, IndexingError) as detail:
+            raise self._exception(detail)
 
-            # existing labels are non-unique
-            else:
+    def _getitem_iterable(self, key, axis=None):
+        """
+        Index current object with an an iterable key (which can be a boolean
+        indexer, or a collection of keys).
 
-                # reindex with the specified axis
-                if axis + 1 > self.obj.ndim:
-                    raise AssertionError("invalid indexing error with "
-                                         "non-unique index")
+        Parameters
+        ----------
+        key : iterable
+            Target labels, or boolean indexer
+        axis: int, default None
+            Dimension on which the indexing is being made
 
-                new_target, indexer, new_indexer = labels._reindex_non_unique(
-                    keyarr)
+        Raises
+        ------
+        KeyError
+            If no key was found. Will change in the future to raise if not all
+            keys were found.
+        IndexingError
+            If the boolean indexer is unalignable with the object being
+            indexed.
 
-                if new_indexer is not None:
-                    result = self.obj._take(indexer[indexer != -1], axis=axis)
+        Returns
+        -------
+        scalar, DataFrame, or Series: indexed value(s),
+        """
 
-                    self._validate_read_indexer(key, new_indexer, axis)
-                    result = result._reindex_with_indexers(
-                        {axis: [new_target, new_indexer]},
-                        copy=True, allow_dups=True)
+        if axis is None:
+            axis = self.axis or 0
 
-                else:
-                    self._validate_read_indexer(key, indexer, axis)
-                    result = self.obj._take(indexer, axis=axis)
+        self._validate_key(key, axis)
 
-                return result
+        labels = self.obj._get_axis(axis)
+
+        if com.is_bool_indexer(key):
+            # A boolean indexer
+            key = check_bool_indexer(labels, key)
+            inds, = key.nonzero()
+            return self.obj._take(inds, axis=axis)
+        else:
+            # A collection of keys
+            keyarr, indexer = self._get_listlike_indexer(key, axis,
+                                                         raise_missing=False)
+            return self.obj._reindex_with_indexers({axis: [keyarr, indexer]},
+                                                   copy=True, allow_dups=True)
 
-    def _validate_read_indexer(self, key, indexer, axis):
+    def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
         """
         Check that indexer can be used to return a result (e.g. at least one
         element was found, unless the list of keys was actually empty).
@@ -1193,11 +1217,16 @@ def _validate_read_indexer(self, key, indexer, axis):
             Indices corresponding to the key (with -1 indicating not found)
         axis: int
             Dimension on which the indexing is being made
+        raise_missing: bool
+            Whether to raise a KeyError if some labels are not found. Will be
+            removed in the future, and then this method will always behave as
+            if raise_missing=True.
 
         Raises
         ------
         KeyError
-            If at least one key was requested none was found.
+            If at least one key was requested but none was found, and
+            raise_missing=True.
         """
 
         ax = self.obj._get_axis(axis)
@@ -1214,6 +1243,12 @@ def _validate_read_indexer(self, key, indexer, axis):
                     u"None of [{key}] are in the [{axis}]".format(
                         key=key, axis=self.obj._get_axis_name(axis)))
 
+            # We (temporarily) allow for some missing keys with .loc, except in
+            # some cases (e.g. setting) in which "raise_missing" will be False
+            if not(self.name == 'loc' and not raise_missing):
+                not_found = list(set(key) - set(ax))
+                raise KeyError("{} not in index".format(not_found))
+
             # we skip the warning on Categorical/Interval
             # as this check is actually done (check for
             # non-missing values), but a bit later in the
@@ -1229,9 +1264,10 @@ def _validate_read_indexer(self, key, indexer, axis):
 
             if not (ax.is_categorical() or ax.is_interval()):
                 warnings.warn(_missing_key_warning,
-                              FutureWarning, stacklevel=5)
+                              FutureWarning, stacklevel=6)
 
-    def _convert_to_indexer(self, obj, axis=None, is_setter=False):
+    def _convert_to_indexer(self, obj, axis=None, is_setter=False,
+                            raise_missing=False):
         """
         Convert indexing key into something we can use to do actual fancy
         indexing on an ndarray
@@ -1310,33 +1346,10 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False):
                 inds, = obj.nonzero()
                 return inds
             else:
-
-                # Have the index compute an indexer or return None
-                # if it cannot handle
-                indexer, objarr = labels._convert_listlike_indexer(
-                    obj, kind=self.name)
-                if indexer is not None:
-                    return indexer
-
-                # unique index
-                if labels.is_unique:
-                    indexer = check = labels.get_indexer(objarr)
-
-                # non-unique (dups)
-                else:
-                    (indexer,
-                     missing) = labels.get_indexer_non_unique(objarr)
-                    # 'indexer' has dupes, create 'check' using 'missing'
-                    check = np.zeros(len(objarr), dtype=np.intp)
-                    check[missing] = -1
-
-                mask = check == -1
-                if mask.any():
-                    raise KeyError('{mask} not in index'
-                                   .format(mask=objarr[mask]))
-
-                return com._values_from_object(indexer)
-
+                # When setting, missing keys are not allowed, even with .loc:
+                kwargs = {'raise_missing': True if is_setter else
+                          raise_missing}
+                return self._get_listlike_indexer(obj, axis, **kwargs)[1]
         else:
             try:
                 return labels.get_loc(obj)

From 9bcbcfee15e92eda0855358cf0309aadd916a5ee Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Fri, 15 Jun 2018 21:45:29 +0200
Subject: [PATCH 2/4] BUG: handling of missing values in
 Index._reindex_non_unique with non unique target

---
 pandas/core/indexes/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 6a56278b0da49..ccecb6d4d0713 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3627,7 +3627,7 @@ def _reindex_non_unique(self, target):
             else:
 
                 # need to retake to have the same size as the indexer
-                indexer[~check] = 0
+                indexer[~check] = -1
 
                 # reset the new indexer to account for the new size
                 new_indexer = np.arange(len(self.take(indexer)))

From 2a64630762158578f28927eefdbac2622225a957 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Fri, 15 Jun 2018 21:49:49 +0200
Subject: [PATCH 3/4] CLN: transform lambda into def

---
 pandas/core/indexing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index aa8de9d2baad8..ad538e2b85169 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -688,7 +688,8 @@ def _align_series(self, indexer, ser, multiindex_indexer=False):
         if isinstance(indexer, tuple):
 
             # flatten np.ndarray indexers
-            ravel = lambda i: i.ravel() if isinstance(i, np.ndarray) else i
+            def ravel(i):
+                return i.ravel() if isinstance(i, np.ndarray) else i
             indexer = tuple(map(ravel, indexer))
 
             aligners = [not com.is_null_slice(idx) for idx in indexer]

From 2b07ab0f91f349875e342cbe495f583177d57aa2 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Tue, 19 Jun 2018 11:08:02 +0200
Subject: [PATCH 4/4] CLN: remove unneeded try... except

---
 pandas/core/indexing.py | 48 ++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index ad538e2b85169..d5e81105dd323 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1132,33 +1132,31 @@ def _get_listlike_indexer(self, key, axis, raise_missing=False):
         """
         o = self.obj
         ax = o._get_axis(axis)
-        try:
-            # Have the index compute an indexer or return None
-            # if it cannot handle:
-            indexer, keyarr = ax._convert_listlike_indexer(key,
-                                                           kind=self.name)
-            # We only act on all found values:
-            if indexer is not None and (indexer != -1).all():
-                self._validate_read_indexer(key, indexer, axis,
-                                            raise_missing=raise_missing)
-                return ax[indexer], indexer
-
-            if ax.is_unique:
-                # If we are trying to get actual keys from empty Series, we
-                # patiently wait for a KeyError later on - otherwise, convert
-                if len(ax) or not len(key):
-                    key = self._convert_for_reindex(key, axis)
-                indexer = ax.get_indexer_for(key)
-                keyarr = ax.reindex(keyarr)[0]
-            else:
-                keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
 
-            self._validate_read_indexer(keyarr, indexer,
-                                        o._get_axis_number(axis),
+        # Have the index compute an indexer or return None
+        # if it cannot handle:
+        indexer, keyarr = ax._convert_listlike_indexer(key,
+                                                       kind=self.name)
+        # We only act on all found values:
+        if indexer is not None and (indexer != -1).all():
+            self._validate_read_indexer(key, indexer, axis,
                                         raise_missing=raise_missing)
-            return keyarr, indexer
-        except (KeyError, IndexingError) as detail:
-            raise self._exception(detail)
+            return ax[indexer], indexer
+
+        if ax.is_unique:
+            # If we are trying to get actual keys from empty Series, we
+            # patiently wait for a KeyError later on - otherwise, convert
+            if len(ax) or not len(key):
+                key = self._convert_for_reindex(key, axis)
+            indexer = ax.get_indexer_for(key)
+            keyarr = ax.reindex(keyarr)[0]
+        else:
+            keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
+
+        self._validate_read_indexer(keyarr, indexer,
+                                    o._get_axis_number(axis),
+                                    raise_missing=raise_missing)
+        return keyarr, indexer
 
     def _getitem_iterable(self, key, axis=None):
         """