Skip to content

Commit a96a5f1

Browse files
committed
* add docstings, fix comments
* simplify code and make get_indexer_and_fill method static
1 parent d900f1a commit a96a5f1

File tree

2 files changed

+53
-11
lines changed

2 files changed

+53
-11
lines changed

pandas/_libs/index.pyx

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -606,15 +606,56 @@ cdef class BaseMultiIndexCodesEngine:
606606
in zip(self.levels, zip(*target))]
607607
return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
608608

609-
def get_indexer(self, object target, object limit=None) -> np.ndarray:
609+
def get_indexer(self, object target) -> np.ndarray:
610+
"""
611+
Gets an indexer, i.e. set of indexes into `self`'s values for the
612+
values in `target`, where -1 represents a value in `target` not existing
613+
in (the cross-product of) `self.levels`
614+
615+
Parameters
616+
----------
617+
target : list-like of keys
618+
Each key is a tuple, with a label for each level of the index
619+
620+
Returns
621+
-------
622+
1-dimensional array of dtype int64 of the index
623+
"""
610624
lab_ints = self._extract_level_codes(target)
611625
return self._base.get_indexer(self, lab_ints)
612626

613-
def get_indexer_and_fill(self, object values, object target,
627+
@staticmethod
628+
def get_indexer_and_fill(object values, object target,
614629
object method, object limit = None) -> np.ndarray:
615-
""" get an indexer for `target`, a sortable, array-like collection of
616-
values which are themselves comparable to `values`, which should be the
617-
index values of the MultiIndex object for which `self` is the engine """
630+
"""
631+
Gets an indexer, i.e. a set of indexes into `values`, for the values in
632+
`target`, where the index value.
633+
634+
If method is "backfill" then the index for a value in `target` which
635+
does not exist in `values` is the index of the next match, or -1 is the
636+
value is larger than the largest value in `values`.
637+
638+
Similarly, if the method if "pad" then the index for a value in `target`
639+
which does not exist in `values` is the index of the previous match, or
640+
-1 if the value is smaller then the largest value in `values`.
641+
642+
Parameters
643+
----------
644+
values : list-like of tuples
645+
must be sorted and all have the same length
646+
target: list-like of tuples
647+
need not be sorted, but all must have the same length, which must be
648+
the same as the length of all tuples in `values`
649+
method: string
650+
"backfill" or "pad"
651+
limit: int, optional
652+
if provided, limit the number of fills to this value
653+
654+
Returns
655+
-------
656+
np.ndarray[int64_t, ndim=1] of the indexer of `target` into `values`,
657+
filled with the `method` (and optionally `limit`) specified
658+
"""
618659
if method not in ("backfill", "pad"):
619660
raise ValueError(
620661
f"{method} is not a valid method value; only 'backfill' and "
@@ -636,8 +677,9 @@ cdef class BaseMultiIndexCodesEngine:
636677
np.empty((num_target_values,)).astype('int64')
637678

638679
# `values` and `target_values` are both sorted, so we walk through them
639-
# and memoize the set of indices in the (implicit) merged sorted list,
640-
# the effect of which is to create a factorization for the (sorted)
680+
# and memoize the (ordered) set of indices in the (implicit) merged-and
681+
# sorted list of the two which belong to each of them
682+
# the effect of this is to create a factorization for the (sorted)
641683
# merger of the index values, where `new_codes` and `new_target_codes`
642684
# are the subset of the factors which appear in `values` and `target`,
643685
# respectively
@@ -665,9 +707,9 @@ cdef class BaseMultiIndexCodesEngine:
665707

666708
# get the indexer, and undo the sorting of `target.values`
667709
sorted_indexer = (
668-
algos.backfill(new_codes, new_target_codes, limit=limit)
669-
if method == "backfill" else
670-
algos.pad(new_codes, new_target_codes, limit=limit)
710+
(algos.backfill if method == "backfill" else algos.pad)(
711+
new_codes, new_target_codes, limit=limit
712+
)
671713
)
672714
return sorted_indexer[np.argsort(target_order)]
673715

pandas/core/indexes/multi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2351,7 +2351,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
23512351
raise NotImplementedError(
23522352
"tolerance not implemented yet for MultiIndex"
23532353
)
2354-
indexer = self._engine.get_indexer_and_fill(
2354+
indexer = self._engine.__class__.get_indexer_and_fill(
23552355
self.values, target, method=method, limit=limit
23562356
)
23572357
elif method == "nearest":

0 commit comments

Comments
 (0)