Skip to content

Commit 093c2be

Browse files
committed
move searchsorted to algorithms.py
1 parent 3f841c7 commit 093c2be

File tree

5 files changed

+92
-89
lines changed

5 files changed

+92
-89
lines changed

pandas/core/algorithms.py

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
ensure_float64, ensure_int64, ensure_object, ensure_platform_int,
2020
ensure_uint64, is_array_like, is_bool_dtype, is_categorical_dtype,
2121
is_complex_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype,
22-
is_datetimelike, is_extension_array_dtype, is_float_dtype,
22+
is_datetimelike, is_extension_array_dtype, is_float_dtype, is_integer,
2323
is_integer_dtype, is_interval_dtype, is_list_like, is_numeric_dtype,
2424
is_object_dtype, is_period_dtype, is_scalar, is_signed_integer_dtype,
2525
is_sparse, is_timedelta64_dtype, is_unsigned_integer_dtype,
@@ -1724,6 +1724,88 @@ def func(arr, indexer, out, fill_value=np.nan):
17241724
return out
17251725

17261726

1727+
# ---- #
1728+
# searchsorted #
1729+
# ---- #
1730+
1731+
def searchsorted(arr, value, side="left", sorter=None):
1732+
"""
1733+
Find indices where elements should be inserted to maintain order.
1734+
1735+
.. versionadded:: 0.25.0
1736+
1737+
Find the indices into a sorted array `self` (a) such that, if the
1738+
corresponding elements in `value` were inserted before the indices,
1739+
the order of `self` would be preserved.
1740+
1741+
Assuming that `self` is sorted:
1742+
1743+
====== ================================
1744+
`side` returned index `i` satisfies
1745+
====== ================================
1746+
left ``self[i-1] < value <= self[i]``
1747+
right ``self[i-1] <= value < self[i]``
1748+
====== ================================
1749+
1750+
Parameters
1751+
----------
1752+
arr: numpy.array or ExtensionArray
1753+
array to search in. Cannot be Index, Series or PandasArray, as that
1754+
would cause a RecursionError.
1755+
value : array_like
1756+
Values to insert into `arr`.
1757+
side : {'left', 'right'}, optional
1758+
If 'left', the index of the first suitable location found is given.
1759+
If 'right', return the last such index. If there is no suitable
1760+
index, return either 0 or N (where N is the length of `self`).
1761+
sorter : 1-D array_like, optional
1762+
Optional array of integer indices that sort array a into ascending
1763+
order. They are typically the result of argsort.
1764+
1765+
Returns
1766+
-------
1767+
array of ints
1768+
Array of insertion points with the same shape as `value`.
1769+
1770+
See Also
1771+
--------
1772+
numpy.searchsorted : Similar method from NumPy.
1773+
"""
1774+
if sorter is not None:
1775+
sorter = ensure_platform_int(sorter)
1776+
1777+
if is_integer_dtype(arr) and (
1778+
is_integer(value) or is_integer_dtype(value)):
1779+
from .arrays.array_ import array
1780+
# if `arr` and `value` have different dtypes, `arr` would be
1781+
# recast by numpy, causing a slow search.
1782+
# Before searching below, we therefore try to give `value` the
1783+
# same dtype as `arr`, while guarding against integer overflows.
1784+
iinfo = np.iinfo(arr.dtype.type)
1785+
value_arr = np.array([value]) if is_scalar(value) else np.array(value)
1786+
if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all():
1787+
# value within bounds, so no overflow, so can convert value dtype
1788+
# to dtype of arr
1789+
dtype = arr.dtype
1790+
else:
1791+
dtype = value_arr.dtype
1792+
1793+
if is_scalar(value):
1794+
value = dtype.type(value)
1795+
else:
1796+
value = array(value, dtype=dtype)
1797+
elif not (is_object_dtype(arr) or is_numeric_dtype(arr) or
1798+
is_categorical_dtype(arr)):
1799+
from pandas.core.series import Series
1800+
# E.g. if `arr` is an array with dtype='datetime64[ns]'
1801+
# and `value` is a pd.Timestamp, we may need to convert value
1802+
value_ser = Series(value)._values
1803+
value = value_ser[0] if is_scalar(value) else value_ser
1804+
1805+
result = arr.searchsorted(value, side=side, sorter=sorter)
1806+
return result
1807+
1808+
17271809
# ---- #
17281810
# diff #
17291811
# ---- #

pandas/core/arrays/numpy_.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from pandas.core.dtypes.inference import is_array_like, is_list_like
1313

1414
from pandas import compat
15-
from pandas.core import common as com, nanops
15+
from pandas.core import nanops
16+
from pandas.core.algorithms import searchsorted
1617
from pandas.core.missing import backfill_1d, pad_1d
1718

1819
from .base import ExtensionArray, ExtensionOpsMixin
@@ -426,8 +427,8 @@ def to_numpy(self, dtype=None, copy=False):
426427

427428
@Appender(ExtensionArray.searchsorted.__doc__)
428429
def searchsorted(self, value, side='left', sorter=None):
429-
return com.searchsorted(self.to_numpy(), value,
430-
side=side, sorter=sorter)
430+
return searchsorted(self.to_numpy(), value,
431+
side=side, sorter=sorter)
431432

432433
# ------------------------------------------------------------------------
433434
# Ops

pandas/core/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,8 +1517,8 @@ def factorize(self, sort=False, na_sentinel=-1):
15171517
@Substitution(klass='Index')
15181518
@Appender(_shared_docs['searchsorted'])
15191519
def searchsorted(self, value, side='left', sorter=None):
1520-
return com.searchsorted(self._values, value,
1521-
side=side, sorter=sorter)
1520+
return algorithms.searchsorted(self._values, value,
1521+
side=side, sorter=sorter)
15221522

15231523
def drop_duplicates(self, keep='first', inplace=False):
15241524
inplace = validate_bool_kwarg(inplace, 'inplace')

pandas/core/common.py

Lines changed: 1 addition & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717

1818
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
1919
from pandas.core.dtypes.common import (
20-
ensure_platform_int, is_array_like, is_bool_dtype, is_categorical_dtype,
21-
is_extension_array_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
22-
is_object_dtype, is_scalar)
20+
is_array_like, is_bool_dtype, is_extension_array_dtype, is_integer)
2321
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
2422
from pandas.core.dtypes.inference import _iterable_not_string
2523
from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
@@ -485,81 +483,3 @@ def f(x):
485483
f = mapper
486484

487485
return f
488-
489-
490-
def searchsorted(arr, value, side="left", sorter=None):
491-
"""
492-
Find indices where elements should be inserted to maintain order.
493-
494-
.. versionadded:: 0.25.0
495-
496-
Find the indices into a sorted array `self` (a) such that, if the
497-
corresponding elements in `value` were inserted before the indices,
498-
the order of `self` would be preserved.
499-
500-
Assuming that `self` is sorted:
501-
502-
====== ================================
503-
`side` returned index `i` satisfies
504-
====== ================================
505-
left ``self[i-1] < value <= self[i]``
506-
right ``self[i-1] <= value < self[i]``
507-
====== ================================
508-
509-
Parameters
510-
----------
511-
arr: numpy.array or ExtensionArray
512-
array to search in. Cannot be Index, Series or PandasArray, as that
513-
would cause a RecursionError.
514-
value : array_like
515-
Values to insert into `arr`.
516-
side : {'left', 'right'}, optional
517-
If 'left', the index of the first suitable location found is given.
518-
If 'right', return the last such index. If there is no suitable
519-
index, return either 0 or N (where N is the length of `self`).
520-
sorter : 1-D array_like, optional
521-
Optional array of integer indices that sort array a into ascending
522-
order. They are typically the result of argsort.
523-
524-
Returns
525-
-------
526-
array of ints
527-
Array of insertion points with the same shape as `value`.
528-
529-
See Also
530-
--------
531-
numpy.searchsorted : Similar method from NumPy.
532-
"""
533-
if sorter is not None:
534-
sorter = ensure_platform_int(sorter)
535-
536-
if is_integer_dtype(arr) and (
537-
is_integer(value) or is_integer_dtype(value)):
538-
from .arrays.array_ import array
539-
# if `arr` and `value` have different dtypes, `arr` would be
540-
# recast by numpy, causing a slow search.
541-
# Before searching below, we therefore try to give `value` the
542-
# same dtype as `arr`, while guarding against integer overflows.
543-
iinfo = np.iinfo(arr.dtype.type)
544-
value_arr = np.array([value]) if is_scalar(value) else np.array(value)
545-
if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all():
546-
# value within bounds, so no overflow, so can convert value dtype
547-
# to dtype of arr
548-
dtype = arr.dtype
549-
else:
550-
dtype = value_arr.dtype
551-
552-
if is_scalar(value):
553-
value = dtype.type(value)
554-
else:
555-
value = array(value, dtype=dtype)
556-
elif not (is_object_dtype(arr) or is_numeric_dtype(arr) or
557-
is_categorical_dtype(arr)):
558-
from pandas.core.series import Series
559-
# E.g. if `arr` is an array with dtype='datetime64[ns]'
560-
# and `value` is a pd.Timestamp, we may need to convert value
561-
value_ser = Series(value)._values
562-
value = value_ser[0] if is_scalar(value) else value_ser
563-
564-
result = arr.searchsorted(value, side=side, sorter=sorter)
565-
return result

pandas/core/series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2388,8 +2388,8 @@ def __rmatmul__(self, other):
23882388
@Substitution(klass='Series')
23892389
@Appender(base._shared_docs['searchsorted'])
23902390
def searchsorted(self, value, side='left', sorter=None):
2391-
return com.searchsorted(self._values, value,
2392-
side=side, sorter=sorter)
2391+
return algorithms.searchsorted(self._values, value,
2392+
side=side, sorter=sorter)
23932393

23942394
# -------------------------------------------------------------------
23952395
# Combination

0 commit comments

Comments
 (0)