|
13 | 13 | import numpy as np
|
14 | 14 |
|
15 | 15 | from pandas._libs import lib, tslibs
|
| 16 | +from pandas.compat import PY36, OrderedDict, iteritems |
| 17 | + |
16 | 18 | from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
17 |
| -from pandas import compat |
18 |
| -from pandas.compat import iteritems, PY36, OrderedDict |
19 |
| -from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass |
20 | 19 | from pandas.core.dtypes.common import (
|
21 |
| - is_integer, is_integer_dtype, is_bool_dtype, |
22 |
| - is_extension_array_dtype, is_array_like, is_object_dtype, |
23 |
| - is_categorical_dtype, is_numeric_dtype, is_scalar, ensure_platform_int) |
| 20 | + ensure_platform_int, is_array_like, is_bool_dtype, is_categorical_dtype, |
| 21 | + is_extension_array_dtype, is_integer, is_integer_dtype, is_numeric_dtype, |
| 22 | + is_object_dtype, is_scalar) |
| 23 | +from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries |
24 | 24 | from pandas.core.dtypes.inference import _iterable_not_string
|
25 | 25 | from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
|
26 | 26 |
|
| 27 | +from pandas import compat |
| 28 | + |
27 | 29 |
|
28 | 30 | class SettingWithCopyError(ValueError):
|
29 | 31 | pass
|
@@ -485,87 +487,79 @@ def f(x):
|
485 | 487 | return f
|
486 | 488 |
|
487 | 489 |
|
488 |
| -def searchsorted_integer(arr, value, side="left", sorter=None): |
489 |
| - """ |
490 |
| - searchsorted implementation for searching integer arrays. |
491 |
| -
|
492 |
| - We get a speedup if we ensure the dtype of arr and value are the same |
493 |
| - (if possible) before searchingm as numpy implicitly converts the dtypes |
494 |
| - if they're different, which would cause a slowdown. |
495 |
| -
|
496 |
| - See :func:`searchsorted` for a more general searchsorted implementation. |
497 |
| -
|
498 |
| - Parameters |
499 |
| - ---------- |
500 |
| - arr : numpy.array |
501 |
| - a numpy array of integers |
502 |
| - value : int or numpy.array |
503 |
| - an integer or an array of integers that we want to find the |
504 |
| - location(s) for in `arr` |
505 |
| - side : str |
506 |
| - One of {'left', 'right'} |
507 |
| - sorter : numpy.array, optional |
508 |
| -
|
509 |
| - Returns |
510 |
| - ------- |
511 |
| - int or numpy.array |
512 |
| - The locations(s) of `value` in `arr`. |
513 |
| - """ |
514 |
| - from .arrays.array_ import array |
515 |
| - if sorter is not None: |
516 |
| - sorter = ensure_platform_int(sorter) |
517 |
| - |
518 |
| - # below we try to give `value` the same dtype as `arr`, while guarding |
519 |
| - # against integer overflows. If the value of `value` is outside of the |
520 |
| - # bound of `arr`, `arr` would be recast by numpy, causing a slower search. |
521 |
| - value_arr = np.array([value]) if is_scalar(value) else np.array(value) |
522 |
| - iinfo = np.iinfo(arr.dtype.type) |
523 |
| - if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all(): |
524 |
| - dtype = arr.dtype |
525 |
| - else: |
526 |
| - dtype = value_arr.dtype |
527 |
| - |
528 |
| - if is_scalar(value): |
529 |
| - value = dtype.type(value) |
530 |
| - else: |
531 |
| - value = array(value, dtype=dtype) |
532 |
| - |
533 |
| - return arr.searchsorted(value, side=side, sorter=sorter) |
534 |
| - |
535 |
| - |
536 | 490 | def searchsorted(arr, value, side="left", sorter=None):
|
537 | 491 | """
|
538 | 492 | Find indices where elements should be inserted to maintain order.
|
539 | 493 |
|
540 |
| - Find the indices into a sorted array-like `arr` such that, if the |
| 494 | + .. versionadded:: 0.25.0 |
| 495 | +
|
| 496 | + Find the indices into a sorted array `self` (a) such that, if the |
541 | 497 | corresponding elements in `value` were inserted before the indices,
|
542 |
| - the order of `arr` would be preserved. |
| 498 | + the order of `self` would be preserved. |
| 499 | +
|
| 500 | + Assuming that `self` is sorted: |
543 | 501 |
|
544 |
| - See :class:`IndexOpsMixin.searchsorted` for more details and examples. |
| 502 | + ====== ================================ |
| 503 | + `side` returned index `i` satisfies |
| 504 | + ====== ================================ |
| 505 | + left ``self[i-1] < value <= self[i]`` |
| 506 | + right ``self[i-1] <= value < self[i]`` |
| 507 | + ====== ================================ |
545 | 508 |
|
546 | 509 | Parameters
|
547 | 510 | ----------
|
548 |
| - arr : numpy.array or ExtensionArray |
549 |
| - value : scalar or numpy.array |
550 |
| - side : str |
551 |
| - One of {'left', 'right'} |
552 |
| - sorter : numpy.array, optional |
| 511 | + arr: numpy.array or ExtensionArray |
| 512 | + array to search in. Cannot be Index, Series or PandasArray, as that |
| 513 | + would cause a RecursionError. |
| 514 | + value : array_like |
| 515 | + Values to insert into `arr`. |
| 516 | + side : {'left', 'right'}, optional |
| 517 | + If 'left', the index of the first suitable location found is given. |
| 518 | + If 'right', return the last such index. If there is no suitable |
| 519 | + index, return either 0 or N (where N is the length of `self`). |
| 520 | + sorter : 1-D array_like, optional |
| 521 | + Optional array of integer indices that sort array a into ascending |
| 522 | + order. They are typically the result of argsort. |
553 | 523 |
|
554 | 524 | Returns
|
555 | 525 | -------
|
556 |
| - int or numpy.array |
557 |
| - The locations(s) of `value` in `arr`. |
| 526 | + array of ints |
| 527 | + Array of insertion points with the same shape as `value`. |
| 528 | +
|
| 529 | + See Also |
| 530 | + -------- |
| 531 | + numpy.searchsorted : Similar method from NumPy. |
558 | 532 | """
|
559 | 533 | if sorter is not None:
|
560 | 534 | sorter = ensure_platform_int(sorter)
|
561 | 535 |
|
562 | 536 | if is_integer_dtype(arr) and (
|
563 | 537 | is_integer(value) or is_integer_dtype(value)):
|
564 |
| - return searchsorted_integer(arr, value, side=side, sorter=sorter) |
565 |
| - if not (is_object_dtype(arr) or is_numeric_dtype(arr) or |
566 |
| - is_categorical_dtype(arr)): |
| 538 | + from .arrays.array_ import array |
| 539 | + # if `arr` and `value` have different dtypes, `arr` would be |
| 540 | + # recast by numpy, causing a slow search. |
| 541 | + # Before searching below, we therefore try to give `value` the |
| 542 | + # same dtype as `arr`, while guarding against integer overflows. |
| 543 | + iinfo = np.iinfo(arr.dtype.type) |
| 544 | + value_arr = np.array([value]) if is_scalar(value) else np.array(value) |
| 545 | + if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all(): |
| 546 | + # value within bounds, so no overflow, so can convert value dtype |
| 547 | + # to dtype of arr |
| 548 | + dtype = arr.dtype |
| 549 | + else: |
| 550 | + dtype = value_arr.dtype |
| 551 | + |
| 552 | + if is_scalar(value): |
| 553 | + value = dtype.type(value) |
| 554 | + else: |
| 555 | + value = array(value, dtype=dtype) |
| 556 | + elif not (is_object_dtype(arr) or is_numeric_dtype(arr) or |
| 557 | + is_categorical_dtype(arr)): |
| 558 | + from pandas.core.series import Series |
567 | 559 | # E.g. if `arr` is an array with dtype='datetime64[ns]'
|
568 | 560 | # and `value` is a pd.Timestamp, we may need to convert value
|
569 |
| - from pandas.core.series import Series |
570 |
| - value = Series(value)._values |
571 |
| - return arr.searchsorted(value, side=side, sorter=sorter) |
| 561 | + value_ser = Series(value)._values |
| 562 | + value = value_ser[0] if is_scalar(value) else value_ser |
| 563 | + |
| 564 | + result = arr.searchsorted(value, side=side, sorter=sorter) |
| 565 | + return result |
0 commit comments