15
15
from pandas ._libs import lib , tslibs
16
16
from pandas .core .dtypes .cast import construct_1d_object_array_from_listlike
17
17
from pandas import compat
18
- from pandas .compat import iteritems , PY2 , PY36 , OrderedDict
18
+ from pandas .compat import iteritems , PY36 , OrderedDict
19
19
from pandas .core .dtypes .generic import ABCSeries , ABCIndex , ABCIndexClass
20
- from pandas .core .dtypes .common import (is_integer , is_integer_dtype ,
21
- is_bool_dtype , is_extension_array_dtype ,
22
- is_array_like ,
23
- is_float_dtype , is_object_dtype ,
24
- is_categorical_dtype , is_numeric_dtype ,
25
- is_scalar , ensure_platform_int )
20
+ from pandas .core .dtypes .common import (
21
+ is_integer , is_integer_dtype , is_bool_dtype ,
22
+ is_extension_array_dtype , is_array_like , is_object_dtype ,
23
+ is_categorical_dtype , is_numeric_dtype , is_scalar , ensure_platform_int )
26
24
from pandas .core .dtypes .inference import _iterable_not_string
27
25
from pandas .core .dtypes .missing import isna , isnull , notnull # noqa
28
26
@@ -487,58 +485,47 @@ def f(x):
487
485
return f
488
486
489
487
490
- def ensure_integer_dtype (arr , value ):
488
+ def searchsorted_integer (arr , value , side = "left" , sorter = None ):
491
489
"""
492
- Ensure optimal dtype for :func:`searchsorted_integer` is returned.
490
+ searchsorted implementation for searching integer arrays.
491
+
492
+ We get a speedup if we ensure the dtype of arr and value are the same
493
+ (if possible) before searchingm as numpy implicitly converts the dtypes
494
+ if they're different, which would cause a slowdown.
495
+
496
+ See :func:`searchsorted` for a more general searchsorted implementation.
493
497
494
498
Parameters
495
499
----------
496
- arr : a numpy integer array
497
- value : a number or array of numbers
500
+ arr : numpy.array
501
+ a numpy array of integers
502
+ value : int or numpy.array
503
+ an integer or an array of integers that we want to find the
504
+ location(s) for in `arr`
505
+ side : str
506
+ One of {'left', 'right'}
507
+ sorter : numpy.array, optional
498
508
499
509
Returns
500
510
-------
501
- dtype : an numpy integer dtype
502
-
503
- Raises
504
- ------
505
- TypeError : if value is not a number
506
- """
507
- value_arr = np .array ([value ]) if is_scalar (value ) else np .array (value )
508
-
509
- if PY2 and not is_numeric_dtype (value_arr ):
510
- # python 2 allows "a" < 1, avoid such nonsense
511
- msg = "value must be numeric, was type {}"
512
- raise TypeError (msg .format (value ))
513
-
514
- iinfo = np .iinfo (arr .dtype )
515
- if not ((value_arr < iinfo .min ).any () or (value_arr > iinfo .max ).any ()):
516
- return arr .dtype
517
- else :
518
- return value_arr .dtype
519
-
520
-
521
- def searchsorted_integer (arr , value , side = "left" , sorter = None ):
522
- """
523
- searchsorted implementation, but only for integer arrays.
524
-
525
- We get a speedup if the dtype of arr and value is the same.
526
-
527
- See :func:`searchsorted` for a more general searchsorted implementation.
511
+ int or numpy.array
512
+ The locations(s) of `value` in `arr`.
528
513
"""
529
514
if sorter is not None :
530
515
sorter = ensure_platform_int (sorter )
531
516
532
- dtype = ensure_integer_dtype (arr , value )
533
-
534
- if is_integer (value ) or is_integer_dtype (value ):
535
- value = np .asarray (value , dtype = dtype )
536
- elif hasattr (value , 'is_integer' ) and value .is_integer ():
537
- # float 2.0 can be converted to int 2 for better speed,
538
- # but float 2.2 should *not* be converted to int 2
539
- value = np .asarray (value , dtype = dtype )
517
+ # below we try to give `value` the same dtype as `arr`, while guarding
518
+ # against integer overflows. If the value of `value` is outside of the
519
+ # bound of `arr`, `arr` would be recast by numpy, causing a slower search.
520
+ value_arr = np .array ([value ]) if is_scalar (value ) else np .array (value )
521
+ iinfo = np .iinfo (arr .dtype )
522
+ if (value_arr >= iinfo .min ).all () and (value_arr <= iinfo .max ).all ():
523
+ dtype = arr .dtype
524
+ else :
525
+ dtype = value_arr .dtype
526
+ value = np .asarray (value , dtype = dtype )
540
527
541
- return np .searchsorted (arr , value , side = side , sorter = sorter )
528
+ return arr .searchsorted (value , side = side , sorter = sorter )
542
529
543
530
544
531
def searchsorted (arr , value , side = "left" , sorter = None ):
@@ -550,18 +537,30 @@ def searchsorted(arr, value, side="left", sorter=None):
550
537
the order of `arr` would be preserved.
551
538
552
539
See :class:`IndexOpsMixin.searchsorted` for more details and examples.
540
+
541
+ Parameters
542
+ ----------
543
+ arr : numpy.array or ExtensionArray
544
+ value : scalar or numpy.array
545
+ side : str
546
+ One of {'left', 'right'}
547
+ sorter : numpy.array, optional
548
+
549
+ Returns
550
+ -------
551
+ int or numpy.array
552
+ The locations(s) of `value` in `arr`.
553
553
"""
554
554
if sorter is not None :
555
555
sorter = ensure_platform_int (sorter )
556
556
557
- if is_integer_dtype (arr ):
557
+ if is_integer_dtype (arr ) and (
558
+ is_integer (value ) or is_integer_dtype (value )):
558
559
return searchsorted_integer (arr , value , side = side , sorter = sorter )
559
- elif (is_object_dtype (arr ) or is_float_dtype (arr ) or
560
- is_categorical_dtype (arr )):
561
- return arr .searchsorted (value , side = side , sorter = sorter )
562
- else :
563
- # fallback solution. E.g. arr is an array with dtype='datetime64[ns]'
564
- # and value is a pd.Timestamp, need to convert value
560
+ if not (is_object_dtype (arr ) or is_numeric_dtype (arr ) or
561
+ is_categorical_dtype (arr )):
562
+ # E.g. if `arr` is an array with dtype='datetime64[ns]'
563
+ # and `value` is a pd.Timestamp, we may need to convert value
565
564
from pandas .core .series import Series
566
565
value = Series (value )._values
567
- return arr .searchsorted (value , side = side , sorter = sorter )
566
+ return arr .searchsorted (value , side = side , sorter = sorter )
0 commit comments