From 39ae2bf875a9b17d8ccdb6d62180eff74b581813 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 4 Apr 2021 09:49:29 -0700 Subject: [PATCH 1/2] CLN: ensure_int64->ensure_platform_int --- pandas/core/generic.py | 4 ++-- pandas/core/indexes/base.py | 4 ++-- pandas/core/internals/array_manager.py | 6 +++--- pandas/core/internals/managers.py | 5 ++--- pandas/core/reshape/tile.py | 4 ++-- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6b4e3c7caef50..a19402433b2e9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -79,8 +79,8 @@ ) from pandas.core.dtypes.common import ( - ensure_int64, ensure_object, + ensure_platform_int, ensure_str, is_bool, is_bool_dtype, @@ -4973,7 +4973,7 @@ def _reindex_with_indexers( index = ensure_index(index) if indexer is not None: - indexer = ensure_int64(indexer) + indexer = ensure_platform_int(indexer) # TODO: speed up on homogeneous DataFrame objects new_data = new_data.reindex_indexer( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5163c55036fd0..8c406d61dd437 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3828,9 +3828,9 @@ def _reindex_non_unique(self, target): missing = ensure_platform_int(missing) missing_labels = target.take(missing) - missing_indexer = ensure_int64(length[~check]) + missing_indexer = ensure_platform_int(length[~check]) cur_labels = self.take(indexer[check]).values - cur_indexer = ensure_int64(length[check]) + cur_indexer = ensure_platform_int(length[check]) new_labels = np.empty((len(indexer),), dtype=object) new_labels[cur_indexer] = cur_labels diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 294d1fd078b08..a7e7e59a2acf4 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -32,7 +32,7 @@ soft_convert_objects, ) from pandas.core.dtypes.common import ( - ensure_int64, + ensure_platform_int, is_datetime64_ns_dtype, is_dtype_equal, is_extension_array_dtype, @@ -1010,7 +1010,7 @@ def _reindex_indexer( else: validate_indices(indexer, len(self._axes[0])) - indexer = ensure_int64(indexer) + indexer = ensure_platform_int(indexer) if (indexer == -1).any(): allow_fill = True else: @@ -1101,7 +1101,7 @@ def unstack(self, unstacker, fill_value) -> ArrayManager: new_indexer[unstacker.mask] = indexer allow_fill = True new_indexer2D = new_indexer.reshape(*unstacker.full_shape) - new_indexer2D = ensure_int64(new_indexer2D) + new_indexer2D = ensure_platform_int(new_indexer2D) new_arrays = [] for arr in self.arrays: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b688f1b4fea5f..1fca464248ae0 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -35,7 +35,7 @@ from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( - ensure_int64, + ensure_platform_int, is_dtype_equal, is_extension_array_dtype, is_list_like, @@ -1979,8 +1979,7 @@ def _preprocess_slice_or_indexer( dtype = getattr(slice_or_indexer, "dtype", None) raise TypeError(type(slice_or_indexer), dtype) - # TODO: np.intp? - indexer = ensure_int64(slice_or_indexer) + indexer = ensure_platform_int(slice_or_indexer) if not allow_fill: indexer = maybe_convert_indices(indexer, length) return "fancy", indexer, len(indexer) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 89eba5bf41c78..624dcf23e8c9a 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -11,7 +11,7 @@ from pandas.core.dtypes.common import ( DT64NS_DTYPE, - ensure_int64, + ensure_platform_int, is_bool_dtype, is_categorical_dtype, is_datetime64_dtype, @@ -413,7 +413,7 @@ def _bins_to_cuts( bins = unique_bins side = "left" if right else "right" - ids = ensure_int64(bins.searchsorted(x, side=side)) + ids = ensure_platform_int(bins.searchsorted(x, side=side)) if include_lowest: ids[x == bins[0]] = 1 From ec1b5669a2c753164136de3bf2760b960905e412 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 5 Apr 2021 11:39:01 -0700 Subject: [PATCH 2/2] troubleshoot 32 bit --- pandas/tests/reshape/test_cut.py | 2 +- pandas/tests/reshape/test_qcut.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 06159cf70b1ab..944205c66c3e6 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -414,7 +414,7 @@ def test_single_bin(data, length): ser = Series([data] * length) result = cut(ser, 1, labels=False) - expected = Series([0] * length) + expected = Series([0] * length, dtype=np.intp) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index d48fde35f8561..7996c15ae8e64 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -202,7 +202,7 @@ def test_single_quantile(data, start, end, length, labels): intervals = IntervalIndex([Interval(start, end)] * length, closed="right") expected = Series(intervals).astype(CDT(ordered=True)) else: - expected = Series([0] * length) + expected = Series([0] * length, dtype=np.intp) tm.assert_series_equal(result, expected)