|
9 | 9 | ) |
10 | 10 | import datetime |
11 | 11 | from functools import partial |
12 | | -import string |
13 | 12 | from typing import ( |
14 | 13 | TYPE_CHECKING, |
15 | 14 | Literal, |
|
90 | 89 | BaseMaskedArray, |
91 | 90 | ExtensionArray, |
92 | 91 | ) |
93 | | -from pandas.core.arrays._mixins import NDArrayBackedExtensionArray |
94 | 92 | from pandas.core.arrays.string_ import StringDtype |
95 | 93 | import pandas.core.common as com |
96 | 94 | from pandas.core.construction import ( |
|
99 | 97 | ) |
100 | 98 | from pandas.core.frame import _merge_doc |
101 | 99 | from pandas.core.indexes.api import default_index |
102 | | -from pandas.core.sorting import is_int64_overflow_possible |
| 100 | +from pandas.core.sorting import ( |
| 101 | + get_group_index, |
| 102 | + is_int64_overflow_possible, |
| 103 | +) |
103 | 104 |
|
104 | 105 | if TYPE_CHECKING: |
105 | 106 | from pandas import DataFrame |
@@ -2117,34 +2118,6 @@ def _convert_values_for_libjoin( |
2117 | 2118 | def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: |
2118 | 2119 | """return the join indexers""" |
2119 | 2120 |
|
2120 | | - def flip(xs: list[ArrayLike]) -> np.ndarray: |
2121 | | - """unlike np.transpose, this returns an array of tuples""" |
2122 | | - |
2123 | | - def injection(obj: ArrayLike): |
2124 | | - if not isinstance(obj.dtype, ExtensionDtype): |
2125 | | - # ndarray |
2126 | | - return obj |
2127 | | - obj = extract_array(obj) |
2128 | | - if isinstance(obj, NDArrayBackedExtensionArray): |
2129 | | - # fastpath for e.g. dt64tz, categorical |
2130 | | - return obj._ndarray |
2131 | | - # FIXME: returning obj._values_for_argsort() here doesn't |
2132 | | - # break in any existing test cases, but i (@jbrockmendel) |
2133 | | - # am pretty sure it should! |
2134 | | - # e.g. |
2135 | | - # arr = pd.array([0, pd.NA, 255], dtype="UInt8") |
2136 | | - # will have values_for_argsort (before GH#45434) |
2137 | | - # np.array([0, 255, 255], dtype=np.uint8) |
2138 | | - # and the non-injectivity should make a difference somehow |
2139 | | - # shouldn't it? |
2140 | | - return np.asarray(obj) |
2141 | | - |
2142 | | - xs = [injection(x) for x in xs] |
2143 | | - labels = list(string.ascii_lowercase[: len(xs)]) |
2144 | | - dtypes = [x.dtype for x in xs] |
2145 | | - labeled_dtypes = list(zip(labels, dtypes)) |
2146 | | - return np.array(list(zip(*xs)), labeled_dtypes) |
2147 | | - |
2148 | 2121 | # values to compare |
2149 | 2122 | left_values = ( |
2150 | 2123 | self.left.index._values if self.left_index else self.left_join_keys[-1] |
@@ -2197,11 +2170,23 @@ def injection(obj: ArrayLike): |
2197 | 2170 | else: |
2198 | 2171 | # We get here with non-ndarrays in test_merge_by_col_tz_aware |
2199 | 2172 | # and test_merge_groupby_multiple_column_with_categorical_column |
2200 | | - lbv = flip(left_by_values) |
2201 | | - rbv = flip(right_by_values) |
2202 | | - lbv = ensure_object(lbv) |
2203 | | - rbv = ensure_object(rbv) |
2204 | | - |
| 2173 | + mapped = [ |
| 2174 | + _factorize_keys( |
| 2175 | + left_by_values[n], |
| 2176 | + right_by_values[n], |
| 2177 | + sort=False, |
| 2178 | + how="left", |
| 2179 | + ) |
| 2180 | + for n in range(len(left_by_values)) |
| 2181 | + ] |
| 2182 | + arrs = [np.concatenate(m[:2]) for m in mapped] |
| 2183 | + shape = tuple(m[2] for m in mapped) |
| 2184 | + group_index = get_group_index( |
| 2185 | + arrs, shape=shape, sort=False, xnull=False |
| 2186 | + ) |
| 2187 | + left_len = len(left_by_values[0]) |
| 2188 | + lbv = group_index[:left_len] |
| 2189 | + rbv = group_index[left_len:] |
2205 | 2190 | # error: Incompatible types in assignment (expression has type |
2206 | 2191 | # "Union[ndarray[Any, dtype[Any]], ndarray[Any, dtype[object_]]]", |
2207 | 2192 | # variable has type "List[Union[Union[ExtensionArray, |
|
0 commit comments