From 09cbbe8170b811fdd717affe1e6adaaf9ce3822c Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 21 Dec 2025 00:58:44 -0500 Subject: [PATCH 1/6] WIP --- pandas/core/algorithms.py | 2 +- pandas/core/apply.py | 1 + pandas/core/arrays/_mixins.py | 2 +- pandas/core/arrays/categorical.py | 3 +- pandas/core/base.py | 2 +- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/groupby/groupby.py | 8 ++-- pandas/core/groupby/grouper.py | 6 ++- pandas/core/groupby/ops.py | 6 ++- pandas/core/indexes/base.py | 63 ++++++++++++++++++++++------- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 1 + pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/extension.py | 4 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 21 ++++++---- pandas/core/indexes/period.py | 6 +-- pandas/core/indexes/range.py | 4 +- pandas/core/reshape/reshape.py | 2 +- pandas/core/reshape/tile.py | 4 +- pandas/core/tools/datetimes.py | 10 ++--- pandas/core/tools/timedeltas.py | 2 +- pandas/io/parsers/base_parser.py | 2 +- pandas/tests/io/test_stata.py | 4 +- pandas/tseries/frequencies.py | 2 +- 25 files changed, 107 insertions(+), 56 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 55a3022454e02..a6d14d6fbd152 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -926,7 +926,7 @@ def value_counts_internal( # Starting in 3.0, we no longer perform dtype inference on the # Index object we construct here, xref GH#56161 - idx = Index(keys, dtype=keys.dtype, name=index_name) + idx = Index(keys, dtype=keys.dtype, name=index_name, copy=False) if ( not sort diff --git a/pandas/core/apply.py b/pandas/core/apply.py index f719fa4e4c839..014584435b477 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1972,6 +1972,7 @@ def relabel_result( fun = [ com.get_callable_name(f) if not isinstance(f, str) else f for f in fun ] + # TODO: Does copy=False do anything here? col_idx_order = Index(s.index).get_indexer(fun) valid_idx = col_idx_order != -1 if valid_idx.any(): diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index ff01d4ac835ba..e4f4a6d242003 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -496,7 +496,7 @@ def value_counts(self, dropna: bool = True) -> Series: result = value_counts(values, sort=False, dropna=dropna) index_arr = self._from_backing_data(np.asarray(result.index._data)) - index = Index(index_arr, name=result.index.name) + index = Index(index_arr, name=result.index.name, copy=False) return Series(result._values, index=index, name=result.name, copy=False) def _quantile( diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3cea95c81b7f2..705c6437bb848 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -670,6 +670,7 @@ def _from_inferred_categories( to_timedelta, ) + # TODO: Here cats = Index(inferred_categories) known_categories = ( isinstance(dtype, CategoricalDtype) and dtype.categories is not None @@ -2397,7 +2398,7 @@ def _validate_listlike(self, value): from pandas import Index # tupleize_cols=False for e.g. test_fillna_iterable_category GH#41914 - to_add = Index._with_infer(value, tupleize_cols=False).difference( + to_add = Index._with_infer(value, tupleize_cols=False, copy=False).difference( self.categories ) diff --git a/pandas/core/base.py b/pandas/core/base.py index 200b16b4b6b1a..e75e8c70dc437 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1312,7 +1312,7 @@ def factorize( from pandas import Index try: - uniques = Index(uniques, dtype=self.dtype) + uniques = Index(uniques, dtype=self.dtype, copy=False) except NotImplementedError: # not all dtypes are supported in Index that are allowed for Series # e.g. float16 or bytes diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 3b14c5348f9d6..50f9241ec7028 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -581,7 +581,7 @@ def validate_categories(categories, fastpath: bool = False) -> Index: f"Parameter 'categories' must be list-like, was {categories!r}" ) if not isinstance(categories, ABCIndex): - categories = Index._with_infer(categories, tupleize_cols=False) + categories = Index._with_infer(categories, tupleize_cols=False, copy=False) if not fastpath: if categories.hasnans: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b4bdaefbe34b9..93e9d16228eac 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1250,7 +1250,7 @@ def _set_result_index_ordered( return result # row order is scrambled => sort the rows by position in original index - original_positions = Index(self._grouper.result_ilocs) + original_positions = Index(self._grouper.result_ilocs, copy=False) result = result.set_axis(original_positions, axis=0) result = result.sort_index(axis=0) if self._grouper.has_dropped_na: @@ -1298,7 +1298,7 @@ def _insert_inaxis_grouper( if qs is None: result.insert(0, name, lev) else: - result.insert(0, name, Index(np.repeat(lev, len(qs)))) + result.insert(0, name, Index(np.repeat(lev, len(qs)), copy=False)) return result @@ -4392,7 +4392,7 @@ def _nth( # error: No overload variant of "where" matches argument types # "Any", "NAType", "Any" values = np.where(nulls, NA, grouper) # type: ignore[call-overload] - grouper = Index(values, dtype="Int64") + grouper = Index(values, dtype="Int64", copy=False) grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort) return grb.nth(n) @@ -5806,7 +5806,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde MultiIndex """ nqs = len(qs) - lev_codes, lev = Index(qs).factorize() + lev_codes, lev = Index(qs, copy=False).factorize() lev_codes = coerce_indexer_dtype(lev_codes, lev) if idx._is_multi: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index eacde2e9661a8..1f8ee634e7332 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -515,7 +515,9 @@ def __init__( # error: Cannot determine type of "grouping_vector" [has-type] ng = newgrouper.groupings[0].grouping_vector # type: ignore[has-type] # use Index instead of ndarray so we can recover the name - grouping_vector = Index(ng, name=newgrouper.result_index.name) + grouping_vector = Index( + ng, name=newgrouper.result_index.name, copy=False + ) elif not isinstance( grouping_vector, (Series, Index, ExtensionArray, np.ndarray) @@ -684,7 +686,7 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]: @cache_readonly def groups(self) -> dict[Hashable, Index]: codes, uniques = self._codes_and_uniques - uniques = Index._with_infer(uniques, name=self.name) + uniques = Index._with_infer(uniques, name=self.name, copy=False) r, counts = libalgos.groupsort_indexer(ensure_platform_int(codes), len(uniques)) counts = ensure_int64(counts).cumsum() diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index d86264cb95dc5..636f8a7a7affc 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -730,7 +730,7 @@ def groups(self) -> dict[Hashable, Index]: @cache_readonly def is_monotonic(self) -> bool: # return if my group orderings are monotonic - return Index(self.ids).is_monotonic_increasing + return Index(self.ids, copy=False).is_monotonic_increasing @final @cache_readonly @@ -760,7 +760,9 @@ def ids(self) -> npt.NDArray[np.intp]: @cache_readonly def result_index_and_ids(self) -> tuple[Index, npt.NDArray[np.intp]]: - levels = [Index._with_infer(ping.uniques) for ping in self.groupings] + levels = [ + Index._with_infer(ping.uniques, copy=False) for ping in self.groupings + ] obs = [ ping._observed or not ping._passed_categorical for ping in self.groupings ] diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7ea6fc253c60b..602facac86db0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -321,6 +321,30 @@ def _new_Index(cls, d): return cls.__new__(cls, **d) +def called_from_tests() -> bool: + """ + Find the first place in the stack that is not inside pandas + (tests notwithstanding). + """ + import inspect + import os + + import pandas as pd + + pkg_dir = os.path.dirname(pd.__file__) + test_dir = os.path.join(pkg_dir, "tests") + + # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow + frame = inspect.currentframe().f_back.f_back.f_back + try: + filename = inspect.getfile(frame) + return filename.startswith(test_dir) + finally: + # See note in + # https://docs.python.org/3/library/inspect.html#inspect.Traceback + del frame + + @set_module("pandas") class Index(IndexOpsMixin, PandasObject): """ @@ -692,7 +716,7 @@ def _with_infer(cls, *args, **kwargs): # "ndarray[Any, Any]" values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type] if values.dtype.kind in "iufb": - return Index(values, name=result.name) + return Index(values, name=result.name, copy=False) return result @@ -982,6 +1006,7 @@ def __array_wrap__(self, result, context=None, return_scalar=False): # Reached in plotting tests with e.g. np.nonzero(index) return result + # TODO: Here? return Index(result, name=self.name) @cache_readonly @@ -2775,7 +2800,7 @@ def fillna(self, value): # no need to care metadata other than name # because it can't have freq if it has NaTs # _with_infer needed for test_fillna_categorical - return Index._with_infer(result, name=self.name) + return Index._with_infer(result, name=self.name, copy=False) return self._view() def dropna(self, how: AnyAll = "any") -> Self: @@ -3910,8 +3935,8 @@ def _get_fill_indexer( if not (self.is_monotonic_increasing or self.is_monotonic_decreasing): raise ValueError("index must be monotonic increasing or decreasing") encoded = self.append(target)._engine.values # type: ignore[union-attr] - self_encoded = Index(encoded[: len(self)]) - target_encoded = Index(encoded[len(self) :]) + self_encoded = Index(encoded[: len(self)], copy=False) + target_encoded = Index(encoded[len(self) :], copy=False) return self_encoded._get_fill_indexer( target_encoded, method, limit, tolerance ) @@ -4338,7 +4363,7 @@ def _reindex_non_unique( new_indexer[~check] = -1 if not isinstance(self, ABCMultiIndex): - new_index = Index(new_labels, name=self.name) + new_index = Index(new_labels, name=self.name, copy=False) else: new_index = type(self).from_tuples(new_labels, names=self.names) return new_index, indexer, new_indexer @@ -4487,7 +4512,7 @@ def join( and not self.categories.equals(other.categories) ): # dtypes are "equal" but categories are in different order - other = Index(other._values.reorder_categories(self.categories)) + other = Index(other._values.reorder_categories(self.categories), copy=False) _validate_join_method(how) @@ -4930,7 +4955,9 @@ def _wrap_join_result( elif ridx is None: join_index = other else: - join_index = self._constructor._with_infer(joined, dtype=self.dtype) + join_index = self._constructor._with_infer( + joined, dtype=self.dtype, copy=False + ) names = other.names if how == "right" else self.names if join_index.names != names: @@ -5203,6 +5230,13 @@ def _maybe_copy_array_input( if isinstance(data, (ExtensionArray, np.ndarray)): if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): + import os + + if not called_from_tests() and "PYTEST_CURRENT_TEST" in os.environ: + with open( + "/home/richard/dev/pandas/pytest.out", mode="a" + ) as fh: + fh.write(os.environ["PYTEST_CURRENT_TEST"] + "\n") data = data.copy() copy = False return data, bool(copy) @@ -6368,7 +6402,7 @@ def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]: other = type(self).from_tuples(other) # type: ignore[attr-defined] except (TypeError, ValueError): # let's instead try with a straight Index - self = Index(self._values) + self = Index(self._values, copy=False) if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype): # Reverse op so we dont need to re-implement on the subclasses @@ -6747,6 +6781,7 @@ def _maybe_cast_listlike_indexer(self, target) -> Index: ): # If we started with a list-like, avoid inference to string dtype if self # is object dtype (coercing to string dtype will alter the missing values) + # TODO: Here? target_index = Index(target, dtype=self.dtype) elif ( not hasattr(target, "dtype") @@ -7124,7 +7159,7 @@ def insert(self, loc: int, item) -> Index: new_values[loc] = item # GH#51363 stopped doing dtype inference here - out = Index(new_values, dtype=new_values.dtype, name=self.name) + out = Index(new_values, dtype=new_values.dtype, name=self.name, copy=False) return out def drop( @@ -7220,7 +7255,7 @@ def infer_objects(self, copy: bool = True) -> Index: ) if copy and res_values is values: return self.copy() - result = Index(res_values, name=self.name) + result = Index(res_values, name=self.name, copy=False) if not copy and res_values is values and self._references is not None: result._references = self._references result._references.add_index_reference(result) @@ -7329,10 +7364,10 @@ def _logical_method(self, other, op): def _construct_result(self, result, name, other): if isinstance(result, tuple): return ( - Index(result[0], name=name, dtype=result[0].dtype), - Index(result[1], name=name, dtype=result[1].dtype), + Index(result[0], name=name, dtype=result[0].dtype, copy=False), + Index(result[1], name=name, dtype=result[1].dtype, copy=False), ) - return Index(result, name=name, dtype=result.dtype) + return Index(result, name=name, dtype=result.dtype, copy=False) def _arith_method(self, other, op): if ( @@ -7350,7 +7385,7 @@ def _arith_method(self, other, op): @final def _unary_method(self, op): result = op(self._values) - return Index(result, name=self.name) + return Index(result, name=self.name, copy=False) def __abs__(self) -> Index: return self._unary_method(operator.abs) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index cbefaac77dd82..aee63a6ba4582 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -517,4 +517,4 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): Index(['first', 'second', nan], dtype='object') """ mapped = self._values.map(mapper, na_action=na_action) - return Index(mapped, name=self.name) + return Index(mapped, name=self.name, copy=False) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index fd061666c1f00..f25d357ad7d0e 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -534,6 +534,7 @@ def _maybe_cast_listlike_indexer(self, keyarr): # TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray else: res = keyarr + # TODO: Here? return Index(res, dtype=res.dtype) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 72b009a344193..4a8cc611b1284 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -323,7 +323,7 @@ def strftime(self, date_format) -> Index: dtype='str') """ arr = self._data.strftime(date_format) - return Index(arr, name=self.name, dtype=arr.dtype) + return Index(arr, name=self.name, dtype=arr.dtype, copy=False) def tz_convert(self, tz) -> Self: """ diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 3b4fa372eec04..0c37aca23342f 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -74,7 +74,7 @@ def fget(self): return type(self)._simple_new(result, name=self.name) elif isinstance(result, ABCDataFrame): return result.set_index(self) - return Index(result, name=self.name, dtype=result.dtype) + return Index(result, name=self.name, dtype=result.dtype, copy=False) return result def fset(self, value) -> None: @@ -101,7 +101,7 @@ def method(self, *args, **kwargs): # type: ignore[misc] return type(self)._simple_new(result, name=self.name) elif isinstance(result, ABCDataFrame): return result.set_index(self) - return Index(result, name=self.name, dtype=result.dtype) + return Index(result, name=self.name, dtype=result.dtype, copy=False) return result # error: "property" has no attribute "__name__" diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1def317bc1a88..3bfacde94eb20 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -691,7 +691,7 @@ def _maybe_convert_i8(self, key): key_i8 = key_i8.view("i8") else: # DatetimeIndex/TimedeltaIndex - key_dtype, key_i8 = key.dtype, Index(key.asi8) + key_dtype, key_i8 = key.dtype, Index(key.asi8, copy=False) if key.hasnans: # convert NaT from its i8 value to np.nan so it's not viewed # as a valid value, maybe causing errors (e.g. is_overlapping) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3a0a1d8deacb3..7fef49025d415 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -564,7 +564,11 @@ def from_tuples( # handling the empty tuple cases if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): codes = [np.zeros(len(tuples))] - levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))] + levels = [ + Index( + com.asarray_tuplesafe(tuples, dtype=np.dtype("object")), copy=False + ) + ] return cls( levels=levels, codes=codes, @@ -1037,6 +1041,7 @@ def set_levels( if isinstance(levels, Index): pass elif is_array_like(levels): + # TODO: Here? levels = Index(levels) elif is_list_like(levels): levels = list(levels) @@ -1505,7 +1510,9 @@ def _get_values_for_csv( if len(new_levels) == 1: # a single-level multi-index - return Index(new_levels[0].take(new_codes[0]))._get_values_for_csv() + return Index( + new_levels[0].take(new_codes[0]), copy=False + )._get_values_for_csv() else: # reconstruct the multi-index mi = MultiIndex( @@ -1732,10 +1739,10 @@ def is_monotonic_increasing(self) -> bool: # int, float, complex, str, bytes, _NestedSequence[Union # [bool, int, float, complex, str, bytes]]]" sort_order = np.lexsort(values) # type: ignore[arg-type] - return Index(sort_order).is_monotonic_increasing + return Index(sort_order, copy=False).is_monotonic_increasing except TypeError: # we have mixed types and np.lexsort is not happy - return Index(self._values).is_monotonic_increasing + return Index(self._values, copy=False).is_monotonic_increasing @cache_readonly def is_monotonic_decreasing(self) -> bool: @@ -1996,7 +2003,7 @@ def to_flat_index(self) -> Index: # type: ignore[override] ('bar', 'baz'), ('bar', 'qux')], dtype='object') """ - return Index(self._values, tupleize_cols=False) + return Index(self._values, tupleize_cols=False, copy=False) def _is_lexsorted(self) -> bool: """ @@ -2448,7 +2455,7 @@ def append(self, other): # setting names to None automatically return MultiIndex.from_tuples(new_tuples) except (TypeError, IndexError): - return Index(new_tuples) + return Index(new_tuples, copy=False) def argsort( self, *args, na_position: NaPosition = "last", **kwargs @@ -3077,7 +3084,7 @@ def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]: lev = self.levels[0] codes = self._codes[0] cat = Categorical.from_codes(codes=codes, categories=lev, validate=False) - ci = Index(cat) + ci = Index(cat, copy=False) return ci.get_indexer_for(target) def get_slice_bound( diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b8a25ab0da693..fadcd74afc0fe 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -205,17 +205,17 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex: @property @doc(PeriodArray.hour.fget) def hour(self) -> Index: - return Index(self._data.hour, name=self.name) + return Index(self._data.hour, name=self.name, copy=False) @property @doc(PeriodArray.minute.fget) def minute(self) -> Index: - return Index(self._data.minute, name=self.name) + return Index(self._data.minute, name=self.name, copy=False) @property @doc(PeriodArray.second.fget) def second(self) -> Index: - return Index(self._data.second, name=self.name) + return Index(self._data.second, name=self.name, copy=False) # ------------------------------------------------------------------------ # Index Constructors diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 4a935c2afb43e..9635cbc91364e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -604,7 +604,7 @@ def _shallow_copy(self, values, name: Hashable = no_default): name = self._name if name is no_default else name if values.dtype.kind == "f": - return Index(values, name=name, dtype=np.float64) + return Index(values, name=name, dtype=np.float64, copy=False) if values.dtype.kind == "i" and values.ndim == 1: # GH 46675 & 43885: If values is equally spaced, return a # more memory-compact RangeIndex instead of Index with 64-bit dtype @@ -1231,7 +1231,7 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: ) else: values = np.concatenate([x._values for x in rng_indexes]) - result = self._constructor(values) + result = self._constructor(values, copy=False) return result.rename(name) step = rng.start - start diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index accb577d5345c..c3ac1ce0d0dc2 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -490,7 +490,7 @@ def _unstack_multiple( if not rlocs: # Everything is in clocs, so the dummy df has a regular index - dummy_index = Index(obs_ids, name="__placeholder__") + dummy_index = Index(obs_ids, name="__placeholder__", copy=False) else: dummy_index = MultiIndex( levels=rlevels + [obs_ids], diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 659e82d979a91..9ef8e783c8104 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -446,7 +446,7 @@ def _nbins_to_bins(x_idx: Index, nbins: int, right: bool) -> Index: else: bins[-1] += adj - return Index(bins) + return Index(bins, copy=False) def _bins_to_cuts( @@ -631,7 +631,7 @@ def _preprocess_for_cut(x) -> Index: if x.ndim != 1: raise ValueError("Input array must be 1 dimensional") - return Index(x) + return Index(x, copy=False) def _postprocess_for_cut(fac, bins, retbins: bool, original): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c5c0aa4d61187..7cbe021d7864b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -288,8 +288,8 @@ def _box_as_indexlike( if lib.is_np_dtype(dt_array.dtype, "M"): tz = "utc" if utc else None - return DatetimeIndex(dt_array, tz=tz, name=name) - return Index(dt_array, name=name, dtype=dt_array.dtype) + return DatetimeIndex(dt_array, tz=tz, name=name, copy=False) + return Index(dt_array, name=name, dtype=dt_array.dtype, copy=False) def _convert_and_box_cache( @@ -476,13 +476,13 @@ def _array_strptime_with_fallback( dta = DatetimeArray._simple_new(result, dtype=dtype) if utc: dta = dta.tz_convert("UTC") - return Index(dta, name=name) + return Index(dta, name=name, copy=False) elif result.dtype != object and utc: unit = np.datetime_data(result.dtype)[0] unit = cast("TimeUnit", unit) - res = Index(result, dtype=f"M8[{unit}, UTC]", name=name) + res = Index(result, dtype=f"M8[{unit}, UTC]", name=name, copy=False) return res - return Index(result, dtype=result.dtype, name=name) + return Index(result, dtype=result.dtype, name=name, copy=False) def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index bbdbe363d07ee..c74b6b2e1badb 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -241,5 +241,5 @@ def _convert_listlike( from pandas import TimedeltaIndex - value = TimedeltaIndex(td64arr, name=name) + value = TimedeltaIndex(td64arr, name=name, copy=False) return value diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index a6a5a7c23b506..64fd2836e87d4 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -377,7 +377,7 @@ def _agg_index(self, index) -> Index: ) if cast_type is not None: # Don't perform RangeIndex inference - idx = Index(arr, name=name, dtype=cast_type) + idx = Index(arr, name=name, dtype=cast_type, copy=False) else: idx = ensure_index_from_sequences([arr], [name]) arrays.append(idx) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index b44f595e73670..7ef6b6fe97e30 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1330,7 +1330,9 @@ def _convert_categorical(from_frame: DataFrame) -> DataFrame: if isinstance(ser.dtype, CategoricalDtype): cat = ser._values.remove_unused_categories() if cat.categories.dtype == object: - categories = pd.Index._with_infer(cat.categories._values) + categories = pd.Index._with_infer( + cat.categories._values, copy=False + ) cat = cat.set_categories(categories) elif cat.categories.dtype == "string" and len(cat.categories) == 0: # if the read categories are empty, it comes back as object dtype diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index c4e6733b9a08d..196b3aadccaef 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -171,7 +171,7 @@ def infer_freq( ) if not isinstance(index, DatetimeIndex): - index = DatetimeIndex(index) + index = DatetimeIndex(index, copy=False) inferer = _FrequencyInferer(index) return inferer.get_freq() From 2991ebad538ec44a6f438c9c20b94c9c43653532 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 21 Dec 2025 11:01:00 -0500 Subject: [PATCH 2/6] More copy=False --- pandas/_testing/__init__.py | 2 +- pandas/conftest.py | 2 +- pandas/core/apply.py | 3 +- pandas/core/arrays/arrow/array.py | 2 +- pandas/core/arrays/categorical.py | 3 +- pandas/core/arrays/masked.py | 3 +- pandas/core/arrays/sparse/array.py | 2 +- pandas/core/base.py | 2 +- pandas/core/indexes/base.py | 44 ++++++++++++++++++++---- pandas/core/indexes/datetimelike.py | 1 - pandas/core/indexes/multi.py | 1 - pandas/core/indexes/period.py | 4 +-- pandas/core/indexing.py | 2 +- pandas/core/reshape/merge.py | 6 ++-- pandas/core/reshape/tile.py | 2 +- pandas/core/strings/accessor.py | 4 +-- pandas/core/tools/datetimes.py | 2 +- pandas/core/util/hashing.py | 4 ++- pandas/io/parsers/python_parser.py | 2 +- pandas/io/pytables.py | 13 +++---- pandas/plotting/_matplotlib/converter.py | 2 +- pandas/tests/tslibs/test_conversion.py | 1 + 22 files changed, 70 insertions(+), 37 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 230ebc3c10fad..9a6b531c6da5e 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -290,7 +290,7 @@ def box_expected(expected, box_cls, transpose: bool = True): else: expected = pd.array(expected, copy=False) elif box_cls is Index: - expected = Index(expected) + expected = Index(expected, copy=False) elif box_cls is Series: expected = Series(expected) elif box_cls is DataFrame: diff --git a/pandas/conftest.py b/pandas/conftest.py index 74c79c7025ec6..1e1d84e08dcd9 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -769,7 +769,7 @@ def index_with_missing(request): vals = ind.values.copy() vals[0] = None vals[-1] = None - return type(ind)(vals) + return type(ind)(vals, copy=False) # ---------------------------------------------------------------- diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 014584435b477..3f218b3813149 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1972,8 +1972,7 @@ def relabel_result( fun = [ com.get_callable_name(f) if not isinstance(f, str) else f for f in fun ] - # TODO: Does copy=False do anything here? - col_idx_order = Index(s.index).get_indexer(fun) + col_idx_order = Index(s.index, copy=False).get_indexer(fun) valid_idx = col_idx_order != -1 if valid_idx.any(): s = s.iloc[col_idx_order[valid_idx]] diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index d2d67dd644303..0e69ba9a1b778 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1804,7 +1804,7 @@ def value_counts(self, dropna: bool = True) -> Series: counts = ArrowExtensionArray(counts) - index = Index(self._from_pyarrow_array(values)) + index = Index(self._from_pyarrow_array(values), copy=False) return Series(counts, index=index, name="count", copy=False) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 705c6437bb848..c24ec494a84ce 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -670,8 +670,7 @@ def _from_inferred_categories( to_timedelta, ) - # TODO: Here - cats = Index(inferred_categories) + cats = Index(inferred_categories, copy=False) known_categories = ( isinstance(dtype, CategoricalDtype) and dtype.categories is not None ) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 9a0b29316d192..390c4d2a8bcdc 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1422,7 +1422,8 @@ def value_counts(self, dropna: bool = True) -> Series: self.dtype.construct_array_type()( keys, # type: ignore[arg-type] mask_index, - ) + ), + copy=False, ) return Series(arr, index=index, name="count", copy=False) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 86140229b724e..4b1c05fe7be0b 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -961,7 +961,7 @@ def value_counts(self, dropna: bool = True) -> Series: counts = np.insert(counts, 0, fcounts) if not isinstance(keys, ABCIndex): - index = Index(keys) + index = Index(keys, copy=False) else: index = keys return Series(counts, index=index, copy=False) diff --git a/pandas/core/base.py b/pandas/core/base.py index e75e8c70dc437..8fb7b6590afb1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1316,7 +1316,7 @@ def factorize( except NotImplementedError: # not all dtypes are supported in Index that are allowed for Series # e.g. float16 or bytes - uniques = Index(uniques) + uniques = Index(uniques, copy=False) return codes, uniques _shared_docs["searchsorted"] = """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 602facac86db0..bffd83ea61709 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -300,6 +300,7 @@ def _new_Index(cls, d): """ # required for backward compat, because PI can't be instantiated with # ordinals through __new__ GH #13277 + d["copy"] = False if issubclass(cls, ABCPeriodIndex): from pandas.core.indexes.period import _new_PeriodIndex @@ -321,7 +322,7 @@ def _new_Index(cls, d): return cls.__new__(cls, **d) -def called_from_tests() -> bool: +def ignore_caller() -> bool: """ Find the first place in the stack that is not inside pandas (tests notwithstanding). @@ -338,7 +339,40 @@ def called_from_tests() -> bool: frame = inspect.currentframe().f_back.f_back.f_back try: filename = inspect.getfile(frame) - return filename.startswith(test_dir) + if filename.startswith(test_dir): + return True + caller = frame.f_code.co_name + if caller in [ + "sanitize_array", + "_maybe_cast_listlike_indexer", + "convert", + "_cast_pointwise_result", + "isin", + "array", + ]: + # Due to maybe_convert_object + return True + if frame.f_back.f_code.co_name in ["ndarray_to_mgr"]: + # Due to maybe_convert_object + list comp + return True + if caller in [ + "map", + "_to_datetime_with_unit", + "_convert_listlike_datetimes", + "to_numeric", + "quantile", + "ensure_index_from_sequences", + "infer_objects", + "cut", + "_set_grouper", + "_convert_can_do_setop", + "ensure_index", + "set_levels", + "from_records", + ]: + return True + + return False finally: # See note in # https://docs.python.org/3/library/inspect.html#inspect.Traceback @@ -1006,8 +1040,7 @@ def __array_wrap__(self, result, context=None, return_scalar=False): # Reached in plotting tests with e.g. np.nonzero(index) return result - # TODO: Here? - return Index(result, name=self.name) + return Index(result, name=self.name, copy=False) @cache_readonly def dtype(self) -> DtypeObj: @@ -5232,7 +5265,7 @@ def _maybe_copy_array_input( if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): import os - if not called_from_tests() and "PYTEST_CURRENT_TEST" in os.environ: + if not ignore_caller() and "PYTEST_CURRENT_TEST" in os.environ: with open( "/home/richard/dev/pandas/pytest.out", mode="a" ) as fh: @@ -6781,7 +6814,6 @@ def _maybe_cast_listlike_indexer(self, target) -> Index: ): # If we started with a list-like, avoid inference to string dtype if self # is object dtype (coercing to string dtype will alter the missing values) - # TODO: Here? target_index = Index(target, dtype=self.dtype) elif ( not hasattr(target, "dtype") diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f25d357ad7d0e..fd061666c1f00 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -534,7 +534,6 @@ def _maybe_cast_listlike_indexer(self, keyarr): # TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray else: res = keyarr - # TODO: Here? return Index(res, dtype=res.dtype) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7fef49025d415..66b74818507e5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1041,7 +1041,6 @@ def set_levels( if isinstance(levels, Index): pass elif is_array_like(levels): - # TODO: Here? levels = Index(levels) elif is_list_like(levels): levels = list(levels) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index fadcd74afc0fe..a313a927a5d0b 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -410,7 +410,7 @@ def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray: Array of booleans where data is not NA. """ if isinstance(where, DatetimeIndex): - where = PeriodIndex(where._values, freq=self.freq) + where = PeriodIndex(where._values, freq=self.freq, copy=False) elif not isinstance(where, PeriodIndex): raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex") @@ -625,4 +625,4 @@ def period_range( data, freq = PeriodArray._generate_range(start, end, periods, freq) dtype = PeriodDtype(freq) data = PeriodArray(data, dtype=dtype) - return PeriodIndex(data, name=name) + return PeriodIndex(data, name=name, copy=False) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a476415d6c7c0..31145a552084b 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -894,7 +894,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: ): # GH#38148 keys = self.obj.columns.union(key, sort=False) - diff = Index(key).difference(self.obj.columns, sort=False) + diff = Index(key, copy=False).difference(self.obj.columns, sort=False) if len(diff): # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B" diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f7fd4da2968a7..467d6ba2da9af 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2097,8 +2097,8 @@ def get_join_indexers( lkey = left_keys[0] rkey = right_keys[0] - left = Index(lkey) - right = Index(rkey) + left = Index(lkey, copy=False) + right = Index(rkey, copy=False) if ( left.is_monotonic_increasing @@ -2529,7 +2529,7 @@ def _convert_values_for_libjoin( self, values: AnyArrayLike, side: str ) -> np.ndarray: # we require sortedness and non-null values in the join keys - if not Index(values).is_monotonic_increasing: + if not Index(values, copy=False).is_monotonic_increasing: if isna(values).any(): raise ValueError(f"Merge keys contain null values on {side} side") raise ValueError(f"{side} keys must be sorted") diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 9ef8e783c8104..d7ed603bd4ad4 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -571,7 +571,7 @@ def _coerce_to_type(x: Index) -> tuple[Index, DtypeObj | None]: # https://github.com/pandas-dev/pandas/issues/31389 elif isinstance(x.dtype, ExtensionDtype) and is_numeric_dtype(x.dtype): x_arr = x.to_numpy(dtype=np.float64, na_value=np.nan) - x = Index(x_arr) + x = Index(x_arr, copy=False) return Index(x), dtype diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 386681ceb1cf0..78174b0606fcf 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -383,7 +383,7 @@ def cons_row(x): out = out.get_level_values(0) return out else: - return Index(result, name=name, dtype=dtype) + return Index(result, name=name, dtype=dtype, copy=False) else: index = self._orig.index # This is a mess. @@ -703,7 +703,7 @@ def cat( if isna(result).all(): dtype = object # type: ignore[assignment] - out = Index(result, dtype=dtype, name=self._orig.name) + out = Index(result, dtype=dtype, name=self._orig.name, copy=False) else: # Series res_ser = Series( result, dtype=dtype, index=data.index, name=self._orig.name, copy=False diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 7cbe021d7864b..8115915ebabdf 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -381,7 +381,7 @@ def _convert_listlike_datetimes( arg_array = arg_array._dt_tz_convert("UTC") else: arg_array = arg_array._dt_tz_localize("UTC") - arg = Index(arg_array) + arg = Index(arg_array, copy=False) else: # ArrowExtensionArray if arg_dtype.pyarrow_dtype.tz is not None: diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index ae36a9a6176e9..4f4a28e630e9c 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -324,7 +324,9 @@ def _hash_ndarray( ) codes, categories = factorize(vals, sort=False) - tdtype = CategoricalDtype(categories=Index(categories), ordered=False) + tdtype = CategoricalDtype( + categories=Index(categories, copy=False), ordered=False + ) cat = Categorical._simple_new(codes, tdtype) return cat._hash_pandas_object( encoding=encoding, hash_key=hash_key, categorize=False diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 55a219eba1c61..66a67dc5d2b1d 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -509,7 +509,7 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi values, skipna=False, convert_na_value=False ) - cats = Index(values).unique().dropna() + cats = Index(values, copy=False).unique().dropna() values = Categorical._from_inferred_categories( cats, cats.get_indexer(values), cast_type, true_values=self.true_values ) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fa01fd5e4379c..4b4bc8708728a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2990,6 +2990,7 @@ def _get_index_factory(self, attrs): factory: Callable + kwargs = {} if index_class == DatetimeIndex: def f(values, freq=None, tz=None): @@ -3013,8 +3014,8 @@ def f(values, freq=None, tz=None): factory = f else: factory = index_class + kwargs["copy"] = False - kwargs = {} if "freq" in attrs: kwargs["freq"] = attrs["freq"] if index_class is Index: @@ -4451,7 +4452,7 @@ def read_coordinates( ) coords = coords[op(data.iloc[coords - coords.min()], filt).values] - return Index(coords) + return Index(coords, copy=False) def read_column( self, @@ -5183,15 +5184,15 @@ def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray if kind.startswith("datetime64"): if kind == "datetime64": # created before we stored resolution information - index = DatetimeIndex(data) + index = DatetimeIndex(data, copy=False) else: - index = DatetimeIndex(data.view(kind)) + index = DatetimeIndex(data.view(kind), copy=False) elif kind.startswith("timedelta64"): if kind == "timedelta64": # created before we stored resolution information - index = TimedeltaIndex(data) + index = TimedeltaIndex(data, copy=False) else: - index = TimedeltaIndex(data.view(kind)) + index = TimedeltaIndex(data.view(kind), copy=False) elif kind == "date": try: index = np.asarray([date.fromordinal(v) for v in data], dtype=object) diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 813bd984cf297..0ca1c398d2671 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -268,7 +268,7 @@ def _convert_1d(values, freq: BaseOffset): elif lib.infer_dtype(values, skipna=False) == "period": # https://github.com/pandas-dev/pandas/issues/24304 # convert ndarray[period] -> PeriodIndex - return PeriodIndex(values, freq=freq).asi8 + return PeriodIndex(values, freq=freq, copy=False).asi8 elif isinstance(values, (list, tuple, np.ndarray)): return [_get_datevalue(x, freq) for x in values] return values diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index 5a6d76d78df53..b5598cfded074 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -42,6 +42,7 @@ def _compare_local_to_utc(tz_didx, naive_didx): except Exception as err: err1 = err + expected = naive_didx.map(lambda x: x.tz_localize(tz_didx.tz)).asi8 try: expected = naive_didx.map(lambda x: x.tz_localize(tz_didx.tz)).asi8 except Exception as err: From 8d87f83a09051e04de5497a323a194c116987f00 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 21 Dec 2025 11:03:54 -0500 Subject: [PATCH 3/6] cleanup --- pandas/core/indexes/base.py | 64 -------------------------- pandas/tests/tslibs/test_conversion.py | 1 - 2 files changed, 65 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bffd83ea61709..7654647fbdc00 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -322,63 +322,6 @@ def _new_Index(cls, d): return cls.__new__(cls, **d) -def ignore_caller() -> bool: - """ - Find the first place in the stack that is not inside pandas - (tests notwithstanding). - """ - import inspect - import os - - import pandas as pd - - pkg_dir = os.path.dirname(pd.__file__) - test_dir = os.path.join(pkg_dir, "tests") - - # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow - frame = inspect.currentframe().f_back.f_back.f_back - try: - filename = inspect.getfile(frame) - if filename.startswith(test_dir): - return True - caller = frame.f_code.co_name - if caller in [ - "sanitize_array", - "_maybe_cast_listlike_indexer", - "convert", - "_cast_pointwise_result", - "isin", - "array", - ]: - # Due to maybe_convert_object - return True - if frame.f_back.f_code.co_name in ["ndarray_to_mgr"]: - # Due to maybe_convert_object + list comp - return True - if caller in [ - "map", - "_to_datetime_with_unit", - "_convert_listlike_datetimes", - "to_numeric", - "quantile", - "ensure_index_from_sequences", - "infer_objects", - "cut", - "_set_grouper", - "_convert_can_do_setop", - "ensure_index", - "set_levels", - "from_records", - ]: - return True - - return False - finally: - # See note in - # https://docs.python.org/3/library/inspect.html#inspect.Traceback - del frame - - @set_module("pandas") class Index(IndexOpsMixin, PandasObject): """ @@ -5263,13 +5206,6 @@ def _maybe_copy_array_input( if isinstance(data, (ExtensionArray, np.ndarray)): if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): - import os - - if not ignore_caller() and "PYTEST_CURRENT_TEST" in os.environ: - with open( - "/home/richard/dev/pandas/pytest.out", mode="a" - ) as fh: - fh.write(os.environ["PYTEST_CURRENT_TEST"] + "\n") data = data.copy() copy = False return data, bool(copy) diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index b5598cfded074..5a6d76d78df53 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -42,7 +42,6 @@ def _compare_local_to_utc(tz_didx, naive_didx): except Exception as err: err1 = err - expected = naive_didx.map(lambda x: x.tz_localize(tz_didx.tz)).asi8 try: expected = naive_didx.map(lambda x: x.tz_localize(tz_didx.tz)).asi8 except Exception as err: From 9226871dcb24104d91622ed899e8138e381c7761 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 21 Dec 2025 11:31:26 -0500 Subject: [PATCH 4/6] reverts --- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/multi.py | 6 +----- pandas/core/tools/datetimes.py | 4 ++-- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 50f9241ec7028..3b14c5348f9d6 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -581,7 +581,7 @@ def validate_categories(categories, fastpath: bool = False) -> Index: f"Parameter 'categories' must be list-like, was {categories!r}" ) if not isinstance(categories, ABCIndex): - categories = Index._with_infer(categories, tupleize_cols=False, copy=False) + categories = Index._with_infer(categories, tupleize_cols=False) if not fastpath: if categories.hasnans: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7654647fbdc00..e9eaff0e08bd2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -983,7 +983,7 @@ def __array_wrap__(self, result, context=None, return_scalar=False): # Reached in plotting tests with e.g. np.nonzero(index) return result - return Index(result, name=self.name, copy=False) + return Index(result, name=self.name) @cache_readonly def dtype(self) -> DtypeObj: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 66b74818507e5..8623a4bb061bb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -564,11 +564,7 @@ def from_tuples( # handling the empty tuple cases if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): codes = [np.zeros(len(tuples))] - levels = [ - Index( - com.asarray_tuplesafe(tuples, dtype=np.dtype("object")), copy=False - ) - ] + levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))] return cls( levels=levels, codes=codes, diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 8115915ebabdf..fc38f0ef35492 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -288,8 +288,8 @@ def _box_as_indexlike( if lib.is_np_dtype(dt_array.dtype, "M"): tz = "utc" if utc else None - return DatetimeIndex(dt_array, tz=tz, name=name, copy=False) - return Index(dt_array, name=name, dtype=dt_array.dtype, copy=False) + return DatetimeIndex(dt_array, tz=tz, name=name) + return Index(dt_array, name=name, dtype=dt_array.dtype) def _convert_and_box_cache( From c1bf9d88b6d3a6990ecf915dc4c131aa3286de7f Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 21 Dec 2025 17:54:51 -0500 Subject: [PATCH 5/6] Refinements --- pandas/core/tools/timedeltas.py | 3 ++- pandas/plotting/_matplotlib/converter.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index c74b6b2e1badb..8ab2332efb6f8 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -241,5 +241,6 @@ def _convert_listlike( from pandas import TimedeltaIndex - value = TimedeltaIndex(td64arr, name=name, copy=False) + copy = td64arr is arg + value = TimedeltaIndex(td64arr, name=name, copy=copy) return value diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 0ca1c398d2671..813bd984cf297 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -268,7 +268,7 @@ def _convert_1d(values, freq: BaseOffset): elif lib.infer_dtype(values, skipna=False) == "period": # https://github.com/pandas-dev/pandas/issues/24304 # convert ndarray[period] -> PeriodIndex - return PeriodIndex(values, freq=freq, copy=False).asi8 + return PeriodIndex(values, freq=freq).asi8 elif isinstance(values, (list, tuple, np.ndarray)): return [_get_datevalue(x, freq) for x in values] return values From bd8af678152943fa078aebdb6e172ef85eccf87c Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Mon, 22 Dec 2025 06:50:14 -0500 Subject: [PATCH 6/6] Update pandas/core/tools/timedeltas.py Co-authored-by: Joris Van den Bossche --- pandas/core/tools/timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 8ab2332efb6f8..6789eb85b6c4b 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -241,6 +241,6 @@ def _convert_listlike( from pandas import TimedeltaIndex - copy = td64arr is arg + copy = td64arr is arg or np.may_share_memory(arg, td64arr) value = TimedeltaIndex(td64arr, name=name, copy=copy) return value