From 6d62ea69e4c28bf54544c080f7144c9a1a696958 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 1 Jul 2021 08:52:07 -0700 Subject: [PATCH 1/2] PERF/REGR: restore IntervalIndex._intersection_non_unique --- pandas/core/indexes/interval.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 134583833daac..2d953cbcf01e2 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -803,7 +803,8 @@ def _intersection(self, other, sort): # multiple NaNs taken = other._intersection_unique(self) else: - return super()._intersection(other, sort) + # duplicates + taken = self._intersection_non_unique(other) if sort is None: taken = taken.sort_values() @@ -832,6 +833,33 @@ def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex: return self.take(indexer) + def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex: + """ + Used when the IntervalIndex does have some common endpoints, + on either sides. + Return the intersection with another IntervalIndex. + Parameters + ---------- + other : IntervalIndex + Returns + ------- + IntervalIndex + """ + # Note: this is about 3.25x faster than super()._intersection(other) + # in IntervalIndexMethod.time_intersection_both_duplicate(1000) + mask = np.zeros(len(self), dtype=bool) + + if self.hasnans and other.hasnans: + first_nan_loc = np.arange(len(self))[self.isna()][0] + mask[first_nan_loc] = True + + other_tups = set(zip(other.left, other.right)) + for i, tup in enumerate(zip(self.left, self.right)): + if tup in other_tups: + mask[i] = True + + return self[mask] + # -------------------------------------------------------------------- @property From 3ff319c303c256f5813dfdce39dc74646d515c92 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 1 Jul 2021 08:52:58 -0700 Subject: [PATCH 2/2] copy/paste docstring fixup --- pandas/core/indexes/interval.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2d953cbcf01e2..36779504d014b 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -838,9 +838,11 @@ def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex: Used when the IntervalIndex does have some common endpoints, on either sides. Return the intersection with another IntervalIndex. + Parameters ---------- other : IntervalIndex + Returns ------- IntervalIndex