diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 63c9ebb1dd98d..e3a643a38e24c 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -100,6 +100,7 @@ ) from pandas.core.arrays import ( DatetimeArray, + PandasArray, PeriodArray, TimedeltaArray, period_array, @@ -204,7 +205,11 @@ def box_expected(expected, box_cls, transpose=True): subclass of box_cls """ if box_cls is pd.array: - expected = pd.array(expected) + if isinstance(expected, RangeIndex): + # pd.array would return an IntegerArray + expected = PandasArray(np.asarray(expected._values)) + else: + expected = pd.array(expected) elif box_cls is Index: expected = Index(expected) elif box_cls is Series: diff --git a/pandas/core/construction.py b/pandas/core/construction.py index c20cca57afff1..f3133480108a6 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -58,6 +58,7 @@ ABCExtensionArray, ABCIndex, ABCPandasArray, + ABCRangeIndex, ABCSeries, ) from pandas.core.dtypes.missing import isna @@ -367,7 +368,9 @@ def array( return PandasArray._from_sequence(data, dtype=dtype, copy=copy) -def extract_array(obj: object, extract_numpy: bool = False) -> Any | ArrayLike: +def extract_array( + obj: object, extract_numpy: bool = False, extract_range: bool = False +) -> Any | ArrayLike: """ Extract the ndarray or ExtensionArray from a Series or Index. @@ -382,6 +385,10 @@ def extract_array(obj: object, extract_numpy: bool = False) -> Any | ArrayLike: extract_numpy : bool, default False Whether to extract the ndarray from a PandasArray + extract_range : bool, default False + If we have a RangeIndex, return range._values if True + (which is a materialized integer ndarray), otherwise return unchanged. + Returns ------- arr : object @@ -410,6 +417,11 @@ def extract_array(obj: object, extract_numpy: bool = False) -> Any | ArrayLike: array([1, 2, 3]) """ if isinstance(obj, (ABCIndex, ABCSeries)): + if isinstance(obj, ABCRangeIndex): + if extract_range: + return obj._values + return obj + obj = obj.array if extract_numpy and isinstance(obj, ABCPandasArray): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 7a7a13ac94448..56bd11056b380 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -528,12 +528,17 @@ def argsort(self, *args, **kwargs) -> np.ndarray: -------- numpy.ndarray.argsort """ + ascending = kwargs.pop("ascending", True) # EA compat nv.validate_argsort(args, kwargs) if self._range.step > 0: - return np.arange(len(self)) + result = np.arange(len(self)) else: - return np.arange(len(self) - 1, -1, -1) + result = np.arange(len(self) - 1, -1, -1) + + if not ascending: + result = result[::-1] + return result def factorize( self, sort: bool = False, na_sentinel: int | None = -1 @@ -920,7 +925,8 @@ def _arith_method(self, other, op): if op in [operator.mul, ops.rmul, operator.truediv, ops.rtruediv]: step = op - other = extract_array(other, extract_numpy=True) + # TODO: if other is a RangeIndex we may have more efficient options + other = extract_array(other, extract_numpy=True, extract_range=True) attrs = self._get_attributes_dict() left, right = self, other diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 13e528f38f3bf..a167418ce2750 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2080,8 +2080,9 @@ def _factorize_keys( (array([0, 1, 2]), array([0, 1]), 3) """ # Some pre-processing for non-ndarray lk / rk - lk = extract_array(lk, extract_numpy=True) - rk = extract_array(rk, extract_numpy=True) + lk = extract_array(lk, extract_numpy=True, extract_range=True) + rk = extract_array(rk, extract_numpy=True, extract_range=True) + # TODO: if either is a RangeIndex, we can likely factorize more efficiently? if is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype(rk.dtype): # Extract the ndarray (UTC-localized) values diff --git a/pandas/core/series.py b/pandas/core/series.py index 36623695d7569..8ed3bf682b46d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5078,7 +5078,7 @@ def _cmp_method(self, other, op): raise ValueError("Can only compare identically-labeled Series objects") lvalues = self._values - rvalues = extract_array(other, extract_numpy=True) + rvalues = extract_array(other, extract_numpy=True, extract_range=True) with np.errstate(all="ignore"): res_values = ops.comparison_op(lvalues, rvalues, op) @@ -5090,7 +5090,7 @@ def _logical_method(self, other, op): self, other = ops.align_method_SERIES(self, other, align_asobject=True) lvalues = self._values - rvalues = extract_array(other, extract_numpy=True) + rvalues = extract_array(other, extract_numpy=True, extract_range=True) res_values = ops.logical_op(lvalues, rvalues, op) return self._construct_result(res_values, name=res_name) @@ -5100,7 +5100,8 @@ def _arith_method(self, other, op): self, other = ops.align_method_SERIES(self, other) lvalues = self._values - rvalues = extract_array(other, extract_numpy=True) + rvalues = extract_array(other, extract_numpy=True, extract_range=True) + with np.errstate(all="ignore"): result = ops.arithmetic_op(lvalues, rvalues, op) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 9de2563863b41..88a8b9a887f20 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -25,7 +25,10 @@ ensure_platform_int, is_extension_array_dtype, ) -from pandas.core.dtypes.generic import ABCMultiIndex +from pandas.core.dtypes.generic import ( + ABCMultiIndex, + ABCRangeIndex, +) from pandas.core.dtypes.missing import isna from pandas.core.construction import extract_array @@ -362,9 +365,12 @@ def nargsort( mask=mask, ) - items = extract_array(items) + if isinstance(items, ABCRangeIndex): + return items.argsort(ascending=ascending) # TODO: test coverage with key? + elif not isinstance(items, ABCMultiIndex): + items = extract_array(items) if mask is None: - mask = np.asarray(isna(items)) + mask = np.asarray(isna(items)) # TODO: does this exclude MultiIndex too? if is_extension_array_dtype(items): return items.argsort(ascending=ascending, kind=kind, na_position=na_position) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index ef86a8e6a1cb0..bdd954c1e2222 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -311,6 +311,7 @@ def test_add_sub_datetimelike_invalid(self, numeric_idx, other, box_with_array): "Concatenation operation is not implemented for NumPy arrays", # pd.array vs np.datetime64 case r"operand type\(s\) all returned NotImplemented from __array_ufunc__", + "can only perform ops with numeric values", ] ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/construction/__init__.py b/pandas/tests/construction/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/construction/test_extract_array.py b/pandas/tests/construction/test_extract_array.py new file mode 100644 index 0000000000000..4dd3eda8c995c --- /dev/null +++ b/pandas/tests/construction/test_extract_array.py @@ -0,0 +1,18 @@ +from pandas import Index +import pandas._testing as tm +from pandas.core.construction import extract_array + + +def test_extract_array_rangeindex(): + ri = Index(range(5)) + + expected = ri._values + res = extract_array(ri, extract_numpy=True, extract_range=True) + tm.assert_numpy_array_equal(res, expected) + res = extract_array(ri, extract_numpy=False, extract_range=True) + tm.assert_numpy_array_equal(res, expected) + + res = extract_array(ri, extract_numpy=True, extract_range=False) + tm.assert_index_equal(res, ri) + res = extract_array(ri, extract_numpy=False, extract_range=False) + tm.assert_index_equal(res, ri)