|
13 | 13 | from pandas._libs import lib, tslibs
|
14 | 14 |
|
15 | 15 | from pandas import compat
|
16 |
| -from pandas.compat import iteritems, PY36, OrderedDict |
| 16 | +from pandas.compat import iteritems, PY2, PY36, OrderedDict |
17 | 17 | from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass
|
18 |
| -from pandas.core.dtypes.common import is_integer |
| 18 | +from pandas.core.dtypes.common import (is_integer, is_integer_dtype, |
| 19 | + is_numeric_dtype, is_number, |
| 20 | + is_scalar, ensure_platform_int) |
19 | 21 | from pandas.core.dtypes.inference import _iterable_not_string
|
20 | 22 | from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
|
21 | 23 | from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
@@ -430,3 +432,73 @@ def _pipe(obj, func, *args, **kwargs):
|
430 | 432 | return func(*args, **kwargs)
|
431 | 433 | else:
|
432 | 434 | return func(obj, *args, **kwargs)
|
| 435 | + |
| 436 | + |
| 437 | +def ensure_within_integer_bounds(value, dtype): |
| 438 | + """Ensure that ``value`` is within the integer bounds in ``dtype``. |
| 439 | +
|
| 440 | + Parameters |
| 441 | + ---------- |
| 442 | + value : a number or array of numbers |
| 443 | + dtype : a numpy integer dtype |
| 444 | +
|
| 445 | + Raises |
| 446 | + ------ |
| 447 | + ValueError : if value is outside the bounds set in iinfo(dtype) |
| 448 | + """ |
| 449 | + if PY2: |
| 450 | + # python 2 allows e.g. "a" < 1, avoid this |
| 451 | + if not (is_number(value) or is_numeric_dtype(value)): |
| 452 | + msg = "value must be a number, was type {}" |
| 453 | + raise ValueError(msg.format(value)) |
| 454 | + |
| 455 | + # check if value is within integer bounds |
| 456 | + iinfo = np.iinfo(dtype) |
| 457 | + value_arr = np.array([value]) if is_scalar(value) else np.array(value) |
| 458 | + if (value_arr < iinfo.min).any() or (value_arr > iinfo.max).any(): |
| 459 | + msg = "Value {} out of bound for dtype {}" |
| 460 | + raise ValueError(msg.format(value, dtype)) |
| 461 | + |
| 462 | + |
| 463 | +def searchsorted_integer(arr, value, side="left", sorter=None): |
| 464 | + """searchsorted implementation, but only for integer arrays. |
| 465 | +
|
| 466 | + We get a speedup if the dtype of arr and value is the same. |
| 467 | +
|
| 468 | + See :func:`searchsorted` for a more general searchsorted implementation. |
| 469 | + """ |
| 470 | + ensure_within_integer_bounds(value, arr.dtype) |
| 471 | + |
| 472 | + if sorter is not None: |
| 473 | + sorter = ensure_platform_int(sorter) |
| 474 | + |
| 475 | + # convert dtype of value for better searchsorted speed |
| 476 | + dtype = arr.dtype |
| 477 | + if is_integer(value) or is_integer_dtype(value): |
| 478 | + value = np.asarray(value, dtype=dtype) |
| 479 | + elif hasattr(value, 'is_integer') and value.is_integer(): |
| 480 | + # float 2.0 should be converted to int 2 |
| 481 | + # but float 2.2 should *not* be converted to int 2 |
| 482 | + value = np.asarray(value, dtype=dtype) |
| 483 | + |
| 484 | + return arr.searchsorted(value, side=side, sorter=sorter) |
| 485 | + |
| 486 | + |
| 487 | +def searchsorted(arr, value, side="left", sorter=None): |
| 488 | + """ |
| 489 | + Do a arr.searchsorted(value) with adjustments for dtypes. |
| 490 | +
|
| 491 | + :func:`numpy.searchsorted` is only fast if value is of same dtype |
| 492 | + as the searched array. Else numpy recasts arr to a higher dtype, which |
| 493 | + causes a slowdown. Below we ensure that value has the right dtype |
| 494 | + for giving fast results for arr.searchsorted, when possible. |
| 495 | +
|
| 496 | + See :meth:`Index.searchsorted` for details on parameters and return value. |
| 497 | + """ |
| 498 | + if sorter is not None: |
| 499 | + sorter = ensure_platform_int(sorter) |
| 500 | + |
| 501 | + if is_integer_dtype(arr): |
| 502 | + return searchsorted_integer(arr, value, side=side, sorter=sorter) |
| 503 | + else: |
| 504 | + return arr.searchsorted(value, side=side, sorter=sorter) |
0 commit comments