collect into one function

topper-123 · topper-123 · commit 3f841c77cdf6 · 2019-02-09T18:55:27.000Z
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -555,17 +555,17 @@ def searchsorted(self, value, side="left", sorter=None):
         .. versionadded:: 0.24.0
 
         Find the indices into a sorted array `self` (a) such that, if the
-        corresponding elements in `v` were inserted before the indices, the
-        order of `self` would be preserved.
+        corresponding elements in `value` were inserted before the indices,
+        the order of `self` would be preserved.
 
-        Assuming that `a` is sorted:
+        Assuming that `self` is sorted:
 
-        ======  ============================
+        ======  ================================
         `side`  returned index `i` satisfies
-        ======  ============================
-        left    ``self[i-1] < v <= self[i]``
-        right   ``self[i-1] <= v < self[i]``
-        ======  ============================
+        ======  ================================
+        left    ``self[i-1] < value <= self[i]``
+        right   ``self[i-1] <= value < self[i]``
+        ======  ================================
 
         Parameters
         ----------
@@ -581,7 +581,7 @@ def searchsorted(self, value, side="left", sorter=None):
 
         Returns
         -------
-        indices : array of ints
+        array of ints
             Array of insertion points with the same shape as `value`.
 
         See Also
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -4,14 +4,15 @@
 
 from pandas._libs import lib
 from pandas.compat.numpy import function as nv
+from pandas.util._decorators import Appender
 from pandas.util._validators import validate_fillna_kwargs
 
 from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 from pandas.core.dtypes.inference import is_array_like, is_list_like
 
 from pandas import compat
-from pandas.core import nanops
+from pandas.core import common as com, nanops
 from pandas.core.missing import backfill_1d, pad_1d
 
 from .base import ExtensionArray, ExtensionOpsMixin
@@ -423,6 +424,11 @@ def to_numpy(self, dtype=None, copy=False):
 
         return result
 
+    @Appender(ExtensionArray.searchsorted.__doc__)
+    def searchsorted(self, value, side='left', sorter=None):
+        return com.searchsorted(self.to_numpy(), value,
+                                side=side, sorter=sorter)
+
     # ------------------------------------------------------------------------
     # Ops
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1514,15 +1514,11 @@ def factorize(self, sort=False, na_sentinel=-1):
         array([3])
         """)
 
-    @Substitution(klass='IndexOpsMixin')
+    @Substitution(klass='Index')
     @Appender(_shared_docs['searchsorted'])
     def searchsorted(self, value, side='left', sorter=None):
-        result = com.searchsorted(self._values, value,
-                                  side=side, sorter=sorter)
-
-        if is_scalar(value):
-            return result if is_scalar(result) else result[0]
-        return result
+        return com.searchsorted(self._values, value,
+                                side=side, sorter=sorter)
 
     def drop_duplicates(self, keep='first', inplace=False):
         inplace = validate_bool_kwarg(inplace, 'inplace')
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -13,17 +13,19 @@
 import numpy as np
 
 from pandas._libs import lib, tslibs
+from pandas.compat import PY36, OrderedDict, iteritems
+
 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
-from pandas import compat
-from pandas.compat import iteritems, PY36, OrderedDict
-from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass
 from pandas.core.dtypes.common import (
-    is_integer, is_integer_dtype, is_bool_dtype,
-    is_extension_array_dtype, is_array_like, is_object_dtype,
-    is_categorical_dtype, is_numeric_dtype, is_scalar, ensure_platform_int)
+    ensure_platform_int, is_array_like, is_bool_dtype, is_categorical_dtype,
+    is_extension_array_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
+    is_object_dtype, is_scalar)
+from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
 from pandas.core.dtypes.inference import _iterable_not_string
 from pandas.core.dtypes.missing import isna, isnull, notnull  # noqa
 
+from pandas import compat
+
 
 class SettingWithCopyError(ValueError):
     pass
@@ -485,87 +487,79 @@ def f(x):
     return f
 
 
-def searchsorted_integer(arr, value, side="left", sorter=None):
-    """
-    searchsorted implementation for searching integer arrays.
-
-    We get a speedup if we ensure the dtype of arr and value are the same
-    (if possible) before searchingm as numpy implicitly converts the dtypes
-    if they're different, which would cause a slowdown.
-
-    See :func:`searchsorted` for a more general searchsorted implementation.
-
-    Parameters
-    ----------
-    arr : numpy.array
-        a numpy array of integers
-    value : int or numpy.array
-        an integer or an array of integers that we want to find the
-        location(s) for in `arr`
-    side : str
-        One of {'left', 'right'}
-    sorter : numpy.array, optional
-
-    Returns
-    -------
-    int or numpy.array
-        The locations(s) of `value` in `arr`.
-    """
-    from .arrays.array_ import array
-    if sorter is not None:
-        sorter = ensure_platform_int(sorter)
-
-    # below we try to give `value` the same dtype as `arr`, while guarding
-    # against integer overflows. If the value of `value` is outside of the
-    # bound of `arr`, `arr` would be recast by numpy, causing a slower search.
-    value_arr = np.array([value]) if is_scalar(value) else np.array(value)
-    iinfo = np.iinfo(arr.dtype.type)
-    if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all():
-        dtype = arr.dtype
-    else:
-        dtype = value_arr.dtype
-
-    if is_scalar(value):
-        value = dtype.type(value)
-    else:
-        value = array(value, dtype=dtype)
-
-    return arr.searchsorted(value, side=side, sorter=sorter)
-
-
 def searchsorted(arr, value, side="left", sorter=None):
     """
     Find indices where elements should be inserted to maintain order.
 
-    Find the indices into a sorted array-like `arr` such that, if the
+    .. versionadded:: 0.25.0
+
+    Find the indices into a sorted array `self` (a) such that, if the
     corresponding elements in `value` were inserted before the indices,
-    the order of `arr` would be preserved.
+    the order of `self` would be preserved.
+
+    Assuming that `self` is sorted:
 
-    See :class:`IndexOpsMixin.searchsorted` for more details and examples.
+    ======  ================================
+    `side`  returned index `i` satisfies
+    ======  ================================
+    left    ``self[i-1] < value <= self[i]``
+    right   ``self[i-1] <= value < self[i]``
+    ======  ================================
 
     Parameters
     ----------
-    arr : numpy.array or ExtensionArray
-    value : scalar or numpy.array
-    side : str
-        One of {'left', 'right'}
-    sorter : numpy.array, optional
+    arr: numpy.array or ExtensionArray
+        array to search in. Cannot be Index, Series or PandasArray, as that
+        would cause a RecursionError.
+    value : array_like
+        Values to insert into `arr`.
+    side : {'left', 'right'}, optional
+        If 'left', the index of the first suitable location found is given.
+        If 'right', return the last such index.  If there is no suitable
+        index, return either 0 or N (where N is the length of `self`).
+    sorter : 1-D array_like, optional
+        Optional array of integer indices that sort array a into ascending
+        order. They are typically the result of argsort.
 
     Returns
     -------
-    int or numpy.array
-        The locations(s) of `value` in `arr`.
+    array of ints
+        Array of insertion points with the same shape as `value`.
+
+    See Also
+    --------
+    numpy.searchsorted : Similar method from NumPy.
     """
     if sorter is not None:
         sorter = ensure_platform_int(sorter)
 
     if is_integer_dtype(arr) and (
             is_integer(value) or is_integer_dtype(value)):
-        return searchsorted_integer(arr, value, side=side, sorter=sorter)
-    if not (is_object_dtype(arr) or is_numeric_dtype(arr) or
-            is_categorical_dtype(arr)):
+        from .arrays.array_ import array
+        # if `arr` and `value` have different dtypes, `arr` would be
+        # recast by numpy, causing a slow search.
+        # Before searching below, we therefore try to give `value` the
+        # same dtype as `arr`, while guarding against integer overflows.
+        iinfo = np.iinfo(arr.dtype.type)
+        value_arr = np.array([value]) if is_scalar(value) else np.array(value)
+        if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all():
+            # value within bounds, so no overflow, so can convert value dtype
+            # to dtype of arr
+            dtype = arr.dtype
+        else:
+            dtype = value_arr.dtype
+
+        if is_scalar(value):
+            value = dtype.type(value)
+        else:
+            value = array(value, dtype=dtype)
+    elif not (is_object_dtype(arr) or is_numeric_dtype(arr) or
+              is_categorical_dtype(arr)):
+        from pandas.core.series import Series
         # E.g. if `arr` is an array with dtype='datetime64[ns]'
         # and `value` is a pd.Timestamp, we may need to convert value
-        from pandas.core.series import Series
-        value = Series(value)._values
-    return arr.searchsorted(value, side=side, sorter=sorter)
+        value_ser = Series(value)._values
+        value = value_ser[0] if is_scalar(value) else value_ser
+
+    result = arr.searchsorted(value, side=side, sorter=sorter)
+    return result
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2388,12 +2388,8 @@ def __rmatmul__(self, other):
     @Substitution(klass='Series')
     @Appender(base._shared_docs['searchsorted'])
     def searchsorted(self, value, side='left', sorter=None):
-        result = com.searchsorted(self._values, value,
-                                  side=side, sorter=sorter)
-
-        if is_scalar(value):
-            return result if is_scalar(result) else result[0]
-        return result
+        return com.searchsorted(self._values, value,
+                                side=side, sorter=sorter)
 
     # -------------------------------------------------------------------
     # Combination
diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
@@ -9,6 +9,7 @@
 
 import pandas as pd
 from pandas.api.extensions import register_extension_dtype
+from pandas.api.types import is_scalar
 from pandas.core.arrays import PandasArray, integer_array, period_array
 from pandas.tests.extension.decimal import (
     DecimalArray, DecimalDtype, to_decimal)
@@ -254,3 +255,45 @@ def test_array_not_registered(registry_without_decimal):
     result = pd.array(data, dtype=DecimalDtype)
     expected = DecimalArray._from_sequence(data)
     tm.assert_equal(result, expected)
+
+
+class TestArrayAnalytics(object):
+    def test_searchsorted(self, string_dtype):
+        arr = pd.array(['a', 'b', 'c'], dtype=string_dtype)
+
+        result = arr.searchsorted('a', side='left')
+        assert is_scalar(result)
+        assert result == 0
+
+        result = arr.searchsorted('a', side='right')
+        assert is_scalar(result)
+        assert result == 1
+
+    def test_searchsorted_numeric_dtypes_scalar(self, any_real_dtype):
+        arr = pd.array([1, 3, 90], dtype=any_real_dtype)
+        result = arr.searchsorted(30)
+        assert is_scalar(result)
+        assert result == 2
+
+        result = arr.searchsorted([30])
+        expected = np.array([2], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_searchsorted_numeric_dtypes_vector(self, any_real_dtype):
+        arr = pd.array([1, 3, 90], dtype=any_real_dtype)
+        result = arr.searchsorted([2, 30])
+        expected = np.array([1, 2], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_search_sorted_datetime64_scalar(self):
+        arr = pd.array(pd.date_range('20120101', periods=10, freq='2D'))
+        val = pd.Timestamp('20120102')
+        result = arr.searchsorted(val)
+        assert is_scalar(result)
+        assert result == 1
+
+    def test_searchsorted_sorter(self, any_real_dtype):
+        arr = pd.array([3, 1, 2], dtype=any_real_dtype)
+        result = arr.searchsorted([0, 3], sorter=np.argsort(arr))
+        expected = np.array([0, 2], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)