Skip to content
66 changes: 66 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4937,6 +4937,43 @@ def get_indexer_for(self, target, **kwargs):
indexer, _ = self.get_indexer_non_unique(target)
return indexer

def _get_indexer_non_comparable(self, target: "Index", method, unique: bool = True):
"""
Called from get_indexer or get_indexer_non_unique when the target
is of a non-comparable dtype.

For get_indexer lookups with method=None, get_indexer is an _equality_
check, so non-comparable dtypes mean we will always have no matches.

For get_indexer lookups with a method, get_indexer is an _inequality_
check, so non-comparable dtypes mean we will always raise TypeError.

Parameters
----------
target : Index
method : str or None
unique : bool, default True
* True if called from get_indexer.
* False if called from get_indexer_non_unique.

Raises
------
TypeError
If doing an inequality check, i.e. method is not None.
"""
if method is not None:
other = _unpack_nested_dtype(target)
raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")

no_matches = -1 * np.ones(target.shape, dtype=np.intp)
if unique:
# This is for get_indexer
return no_matches
else:
# This is for get_indexer_non_unique
missing = np.arange(len(target), dtype=np.intp)
return no_matches, missing

@property
def _index_as_unique(self):
"""
Expand Down Expand Up @@ -4972,6 +5009,14 @@ def _maybe_promote(self, other: "Index"):

return self, other

def _should_compare(self, other: "Index") -> bool:
"""
Check if `self == other` can ever have non-False entries.
"""
other = _unpack_nested_dtype(other)
dtype = other.dtype
return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)

def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
Expand Down Expand Up @@ -6119,3 +6164,24 @@ def get_unanimous_names(*indexes: Index) -> Tuple[Label, ...]:
name_sets = [{*ns} for ns in zip_longest(*name_tups)]
names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)
return names


def _unpack_nested_dtype(other: Index) -> Index:
"""
When checking if our dtype is comparable with another, we need
to unpack CategoricalDtype to look at its categories.dtype.

Parameters
----------
other : Index

Returns
-------
Index
"""
dtype = other.dtype
if is_categorical_dtype(dtype):
# If there is ever a SparseIndex, this could get dispatched
# here too.
return dtype.categories
return other
9 changes: 3 additions & 6 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,13 +452,10 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
def get_indexer(self, target, method=None, limit=None, tolerance=None):
target = ensure_index(target)

if isinstance(target, PeriodIndex):
if not self._is_comparable_dtype(target.dtype):
# i.e. target.freq != self.freq
# No matches
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
return no_matches
if not self._should_compare(target):
return self._get_indexer_non_comparable(target, method, unique=True)

if isinstance(target, PeriodIndex):
target = target._get_engine_target() # i.e. target.asi8
self_index = self._int64index
else:
Expand Down
53 changes: 53 additions & 0 deletions pandas/tests/indexes/period/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,28 @@
)
import pandas._testing as tm

dti4 = date_range("2016-01-01", periods=4)
dti = dti4[:-1]
rng = pd.Index(range(3))


@pytest.fixture(
params=[
dti,
dti.tz_localize("UTC"),
dti.to_period("W"),
dti - dti[0],
rng,
pd.Index([1, 2, 3]),
pd.Index([2.0, 3.0, 4.0]),
pd.Index([4, 5, 6], dtype="u8"),
pd.IntervalIndex.from_breaks(dti4),
]
)
def non_comparable_idx(request):
# All have length 3
return request.param


class TestGetItem:
def test_ellipsis(self):
Expand Down Expand Up @@ -438,6 +460,37 @@ def test_get_indexer_mismatched_dtype(self):
result = pi.get_indexer_non_unique(pi2)[0]
tm.assert_numpy_array_equal(result, expected)

def test_get_indexer_mismatched_dtype_different_length(self, non_comparable_idx):
# without method we arent checking inequalities, so get all-missing
# but do not raise
dti = date_range("2016-01-01", periods=3)
pi = dti.to_period("D")

other = non_comparable_idx

res = pi[:-1].get_indexer(other)
expected = -np.ones(other.shape, dtype=np.intp)
tm.assert_numpy_array_equal(res, expected)

@pytest.mark.parametrize("method", ["pad", "backfill", "nearest"])
def test_get_indexer_mismatched_dtype_with_method(self, non_comparable_idx, method):
dti = date_range("2016-01-01", periods=3)
pi = dti.to_period("D")

other = non_comparable_idx

msg = re.escape(f"Cannot compare dtypes {pi.dtype} and {other.dtype}")
with pytest.raises(TypeError, match=msg):
pi.get_indexer(other, method=method)

for dtype in ["object", "category"]:
other2 = other.astype(dtype)
if dtype == "object" and isinstance(other, PeriodIndex):
continue
# For object dtype we are liable to get a different exception message
with pytest.raises(TypeError):
pi.get_indexer(other2, method=method)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback notice in these cases we are currently raising in master bc the scalar comparisons raise

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this new?

get_indexer is not super public but i believe it will never raise

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right that get_indexer with method=None should never raise (maybe with tzawareness corner cases), but with method="ffill" the following raises on master:

dti = pd.date_range("2016-01-01", periods=3)
rng = pd.Index(range(5))

>>> dti.get_indexer(rng, method="ffill")
TypeError: '<' not supported between instances of 'int' and 'Timestamp'


def test_get_indexer_non_unique(self):
# GH 17717
p1 = Period("2017-09-02")
Expand Down