|
13 | 13 | import numpy as np |
14 | 14 |
|
15 | 15 | from pandas._libs import lib, tslibs |
| 16 | +from pandas.compat import PY36, OrderedDict, iteritems |
| 17 | + |
16 | 18 | from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike |
17 | | -from pandas import compat |
18 | | -from pandas.compat import iteritems, PY36, OrderedDict |
19 | | -from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass |
20 | 19 | from pandas.core.dtypes.common import ( |
21 | | - is_integer, is_integer_dtype, is_bool_dtype, |
22 | | - is_extension_array_dtype, is_array_like, is_object_dtype, |
23 | | - is_categorical_dtype, is_numeric_dtype, is_scalar, ensure_platform_int) |
| 20 | + ensure_platform_int, is_array_like, is_bool_dtype, is_categorical_dtype, |
| 21 | + is_extension_array_dtype, is_integer, is_integer_dtype, is_numeric_dtype, |
| 22 | + is_object_dtype, is_scalar) |
| 23 | +from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries |
24 | 24 | from pandas.core.dtypes.inference import _iterable_not_string |
25 | 25 | from pandas.core.dtypes.missing import isna, isnull, notnull # noqa |
26 | 26 |
|
| 27 | +from pandas import compat |
| 28 | + |
27 | 29 |
|
28 | 30 | class SettingWithCopyError(ValueError): |
29 | 31 | pass |
@@ -485,87 +487,79 @@ def f(x): |
485 | 487 | return f |
486 | 488 |
|
487 | 489 |
|
488 | | -def searchsorted_integer(arr, value, side="left", sorter=None): |
489 | | - """ |
490 | | - searchsorted implementation for searching integer arrays. |
491 | | -
|
492 | | - We get a speedup if we ensure the dtype of arr and value are the same |
493 | | - (if possible) before searchingm as numpy implicitly converts the dtypes |
494 | | - if they're different, which would cause a slowdown. |
495 | | -
|
496 | | - See :func:`searchsorted` for a more general searchsorted implementation. |
497 | | -
|
498 | | - Parameters |
499 | | - ---------- |
500 | | - arr : numpy.array |
501 | | - a numpy array of integers |
502 | | - value : int or numpy.array |
503 | | - an integer or an array of integers that we want to find the |
504 | | - location(s) for in `arr` |
505 | | - side : str |
506 | | - One of {'left', 'right'} |
507 | | - sorter : numpy.array, optional |
508 | | -
|
509 | | - Returns |
510 | | - ------- |
511 | | - int or numpy.array |
512 | | - The locations(s) of `value` in `arr`. |
513 | | - """ |
514 | | - from .arrays.array_ import array |
515 | | - if sorter is not None: |
516 | | - sorter = ensure_platform_int(sorter) |
517 | | - |
518 | | - # below we try to give `value` the same dtype as `arr`, while guarding |
519 | | - # against integer overflows. If the value of `value` is outside of the |
520 | | - # bound of `arr`, `arr` would be recast by numpy, causing a slower search. |
521 | | - value_arr = np.array([value]) if is_scalar(value) else np.array(value) |
522 | | - iinfo = np.iinfo(arr.dtype.type) |
523 | | - if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all(): |
524 | | - dtype = arr.dtype |
525 | | - else: |
526 | | - dtype = value_arr.dtype |
527 | | - |
528 | | - if is_scalar(value): |
529 | | - value = dtype.type(value) |
530 | | - else: |
531 | | - value = array(value, dtype=dtype) |
532 | | - |
533 | | - return arr.searchsorted(value, side=side, sorter=sorter) |
534 | | - |
535 | | - |
536 | 490 | def searchsorted(arr, value, side="left", sorter=None): |
537 | 491 | """ |
538 | 492 | Find indices where elements should be inserted to maintain order. |
539 | 493 |
|
540 | | - Find the indices into a sorted array-like `arr` such that, if the |
| 494 | + .. versionadded:: 0.25.0 |
| 495 | +
|
| 496 | + Find the indices into a sorted array `self` (a) such that, if the |
541 | 497 | corresponding elements in `value` were inserted before the indices, |
542 | | - the order of `arr` would be preserved. |
| 498 | + the order of `self` would be preserved. |
| 499 | +
|
| 500 | + Assuming that `self` is sorted: |
543 | 501 |
|
544 | | - See :class:`IndexOpsMixin.searchsorted` for more details and examples. |
| 502 | + ====== ================================ |
| 503 | + `side` returned index `i` satisfies |
| 504 | + ====== ================================ |
| 505 | + left ``self[i-1] < value <= self[i]`` |
| 506 | + right ``self[i-1] <= value < self[i]`` |
| 507 | + ====== ================================ |
545 | 508 |
|
546 | 509 | Parameters |
547 | 510 | ---------- |
548 | | - arr : numpy.array or ExtensionArray |
549 | | - value : scalar or numpy.array |
550 | | - side : str |
551 | | - One of {'left', 'right'} |
552 | | - sorter : numpy.array, optional |
| 511 | + arr: numpy.array or ExtensionArray |
| 512 | + array to search in. Cannot be Index, Series or PandasArray, as that |
| 513 | + would cause a RecursionError. |
| 514 | + value : array_like |
| 515 | + Values to insert into `arr`. |
| 516 | + side : {'left', 'right'}, optional |
| 517 | + If 'left', the index of the first suitable location found is given. |
| 518 | + If 'right', return the last such index. If there is no suitable |
| 519 | + index, return either 0 or N (where N is the length of `self`). |
| 520 | + sorter : 1-D array_like, optional |
| 521 | + Optional array of integer indices that sort array a into ascending |
| 522 | + order. They are typically the result of argsort. |
553 | 523 |
|
554 | 524 | Returns |
555 | 525 | ------- |
556 | | - int or numpy.array |
557 | | - The locations(s) of `value` in `arr`. |
| 526 | + array of ints |
| 527 | + Array of insertion points with the same shape as `value`. |
| 528 | +
|
| 529 | + See Also |
| 530 | + -------- |
| 531 | + numpy.searchsorted : Similar method from NumPy. |
558 | 532 | """ |
559 | 533 | if sorter is not None: |
560 | 534 | sorter = ensure_platform_int(sorter) |
561 | 535 |
|
562 | 536 | if is_integer_dtype(arr) and ( |
563 | 537 | is_integer(value) or is_integer_dtype(value)): |
564 | | - return searchsorted_integer(arr, value, side=side, sorter=sorter) |
565 | | - if not (is_object_dtype(arr) or is_numeric_dtype(arr) or |
566 | | - is_categorical_dtype(arr)): |
| 538 | + from .arrays.array_ import array |
| 539 | + # if `arr` and `value` have different dtypes, `arr` would be |
| 540 | + # recast by numpy, causing a slow search. |
| 541 | + # Before searching below, we therefore try to give `value` the |
| 542 | + # same dtype as `arr`, while guarding against integer overflows. |
| 543 | + iinfo = np.iinfo(arr.dtype.type) |
| 544 | + value_arr = np.array([value]) if is_scalar(value) else np.array(value) |
| 545 | + if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all(): |
| 546 | + # value within bounds, so no overflow, so can convert value dtype |
| 547 | + # to dtype of arr |
| 548 | + dtype = arr.dtype |
| 549 | + else: |
| 550 | + dtype = value_arr.dtype |
| 551 | + |
| 552 | + if is_scalar(value): |
| 553 | + value = dtype.type(value) |
| 554 | + else: |
| 555 | + value = array(value, dtype=dtype) |
| 556 | + elif not (is_object_dtype(arr) or is_numeric_dtype(arr) or |
| 557 | + is_categorical_dtype(arr)): |
| 558 | + from pandas.core.series import Series |
567 | 559 | # E.g. if `arr` is an array with dtype='datetime64[ns]' |
568 | 560 | # and `value` is a pd.Timestamp, we may need to convert value |
569 | | - from pandas.core.series import Series |
570 | | - value = Series(value)._values |
571 | | - return arr.searchsorted(value, side=side, sorter=sorter) |
| 561 | + value_ser = Series(value)._values |
| 562 | + value = value_ser[0] if is_scalar(value) else value_ser |
| 563 | + |
| 564 | + result = arr.searchsorted(value, side=side, sorter=sorter) |
| 565 | + return result |
0 commit comments