From f3c6fc18e0d143fa007204fd055df13510140454 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 25 Apr 2020 10:03:57 -0700 Subject: [PATCH] REF: remove need to override get_indexer_non_unique in DatetimeIndexOpsMixin --- pandas/_libs/index.pyx | 4 ++++ pandas/core/indexes/base.py | 27 ++++++++++++++++++++++++--- pandas/core/indexes/datetimelike.py | 26 ++------------------------ pandas/core/indexes/datetimes.py | 5 ----- pandas/core/indexes/timedeltas.py | 5 ----- 5 files changed, 30 insertions(+), 37 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index d8e0d9c6bd7ab..5f04c2c7cd14e 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -441,6 +441,10 @@ cdef class DatetimeEngine(Int64Engine): except KeyError: raise KeyError(val) + def get_indexer_non_unique(self, targets): + # we may get datetime64[ns] or timedelta64[ns], cast these to int64 + return super().get_indexer_non_unique(targets.view("i8")) + def get_indexer(self, values): self._ensure_mapping_populated() if values.dtype != self._get_box_dtype(): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5cad76fd18c58..69e9b77633b56 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -13,7 +13,7 @@ from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp from pandas._libs.tslibs.period import IncompatibleFrequency from pandas._libs.tslibs.timezones import tz_compare -from pandas._typing import Label +from pandas._typing import DtypeObj, Label from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly, doc @@ -4626,6 +4626,10 @@ def get_indexer_non_unique(self, target): if pself is not self or ptarget is not target: return pself.get_indexer_non_unique(ptarget) + if not self._is_comparable_dtype(target.dtype): + no_matches = -1 * np.ones(self.shape, dtype=np.intp) + return no_matches, no_matches + if is_categorical_dtype(target.dtype): tgt_values = np.asarray(target) else: @@ -4651,16 +4655,33 @@ def get_indexer_for(self, target, **kwargs): indexer, _ = self.get_indexer_non_unique(target, **kwargs) return indexer - def _maybe_promote(self, other): - # A hack, but it works + def _maybe_promote(self, other: "Index"): + """ + When dealing with an object-dtype Index and a non-object Index, see + if we can upcast the object-dtype one to improve performance. + """ if self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex): return type(other)(self), other + elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex): + # TODO: we dont have tests that get here + return type(other)(self), other elif self.inferred_type == "boolean": if not is_object_dtype(self.dtype): return self.astype("object"), other.astype("object") + + if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype): + # Reverse op so we dont need to re-implement on the subclasses + other, self = other._maybe_promote(self) + return self, other + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: + """ + Can we compare values of the given dtype to our own? + """ + return True + def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]: """ Group the index labels by a given array of values. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index cf653a6875a9c..f40fdc469bb92 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -8,14 +8,13 @@ from pandas._libs import NaT, iNaT, join as libjoin, lib from pandas._libs.tslibs import timezones -from pandas._typing import DtypeObj, Label +from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, cache_readonly, doc from pandas.core.dtypes.common import ( ensure_int64, - ensure_platform_int, is_bool_dtype, is_dtype_equal, is_integer, @@ -31,7 +30,7 @@ from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin from pandas.core.base import IndexOpsMixin import pandas.core.indexes.base as ibase -from pandas.core.indexes.base import Index, _index_shared_docs, ensure_index +from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.extension import ( ExtensionIndex, inherit_names, @@ -99,12 +98,6 @@ class DatetimeIndexOpsMixin(ExtensionIndex): def is_all_dates(self) -> bool: return True - def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: - """ - Can we compare values of the given dtype to our own? - """ - raise AbstractMethodError(self) - # ------------------------------------------------------------------------ # Abstract data attributes @@ -430,21 +423,6 @@ def _partial_date_slice( # try to find the dates return (lhs_mask & rhs_mask).nonzero()[0] - @Appender(Index.get_indexer_non_unique.__doc__) - def get_indexer_non_unique(self, target): - target = ensure_index(target) - pself, ptarget = self._maybe_promote(target) - if pself is not self or ptarget is not target: - return pself.get_indexer_non_unique(ptarget) - - if not self._is_comparable_dtype(target.dtype): - no_matches = -1 * np.ones(self.shape, dtype=np.intp) - return no_matches, no_matches - - tgt_values = target.asi8 - indexer, missing = self._engine.get_indexer_non_unique(tgt_values) - return ensure_platform_int(indexer), missing - # -------------------------------------------------------------------- __add__ = make_wrapped_arith_op("__add__") diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 649d4e6dfc384..1b43176d4b3e3 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -538,11 +538,6 @@ def _validate_partial_date_slice(self, reso: str): # _parsed_string_to_bounds allows it. raise KeyError - def _maybe_promote(self, other): - if other.inferred_type == "date": - other = DatetimeIndex(other) - return self, other - def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index ad698afaedb59..741951d480d18 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -197,11 +197,6 @@ def astype(self, dtype, copy=True): return Index(result.astype("i8"), name=self.name) return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy) - def _maybe_promote(self, other): - if other.inferred_type == "timedelta": - other = TimedeltaIndex(other) - return self, other - def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: """ Can we compare values of the given dtype to our own?