Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,7 @@ Performance Improvements
- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`)
- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`)
- Improved performance of :func:`MultiIndex.remove_unused_levels` when there are no unused levels, at the cost of a reduction in performance when there are (:issue:`19289`)
- Improved performance of :func:`Index.get_loc` for non-unique indexes (:issue:`19478`)
- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`)
- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` (:issue:`15779`)
- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`)
Expand Down
35 changes: 10 additions & 25 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -183,32 +183,20 @@ cdef class IndexEngine:

cdef _maybe_get_bool_indexer(self, object val):
cdef:
ndarray[uint8_t] indexer
ndarray[object] values
int count = 0
Py_ssize_t i, n
int last_true
ndarray[cnp.uint8_t, ndim=1, cast=True] indexer
ndarray[int64_t, ndim=1] found
int count

values = np.array(self._get_index_values(), copy=False)
n = len(values)

result = np.empty(n, dtype=bool)
indexer = result.view(np.uint8)
indexer = self._get_index_values() == val
found = np.where(indexer)[0]
count = len(found)

for i in range(n):
if values[i] == val:
count += 1
indexer[i] = 1
last_true = i
else:
indexer[i] = 0

if count == 0:
raise KeyError(val)
if count > 1:
return indexer
if count == 1:
return last_true
return int(found[0])

return result
raise KeyError(val)

def sizeof(self, deep=False):
""" return the sizeof our mapping """
Expand Down Expand Up @@ -542,9 +530,6 @@ cdef class PeriodEngine(Int64Engine):

return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array)

cdef _get_index_values_for_bool_indexer(self):
return self._get_index_values().view('i8')


cpdef convert_scalar(ndarray arr, object value):
# we don't turn integers
Expand Down
33 changes: 11 additions & 22 deletions pandas/_libs/index_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -55,40 +55,29 @@ cdef class {{name}}Engine(IndexEngine):

cdef _maybe_get_bool_indexer(self, object val):
cdef:
ndarray[uint8_t, cast=True] indexer
ndarray[cnp.uint8_t, ndim=1, cast=True] indexer
ndarray[int64_t, ndim=1] found
ndarray[{{ctype}}] values
int count = 0
Py_ssize_t i, n
int last_true

{{if name != 'Float64'}}
if not util.is_integer_object(val):
raise KeyError(val)
{{endif}}

values = self._get_index_values_for_bool_indexer()
n = len(values)
# A view is needed for some subclasses, such as PeriodEngine:
values = self._get_index_values().view('{{dtype}}')
indexer = values == val
found = np.where(indexer)[0]
count = len(found)

result = np.empty(n, dtype=bool)
indexer = result.view(np.uint8)

for i in range(n):
if values[i] == val:
count += 1
indexer[i] = 1
last_true = i
else:
indexer[i] = 0

if count == 0:
raise KeyError(val)
if count > 1:
return indexer
if count == 1:
return last_true
return int(found[0])

return result
raise KeyError(val)

cdef _get_index_values_for_bool_indexer(self):
return self._get_index_values()
{{endif}}

{{endfor}}