From 1001a16a3aa7977edb361a1d952e0e190817c5b1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 1 Mar 2021 15:09:52 +0100 Subject: [PATCH 1/2] [ArrayManager] Ensure to store datetime/timedelta data as DatetimeArray/TimedeltaArray (and not ndarray) --- pandas/core/internals/array_manager.py | 14 +++++++++++++- pandas/tests/frame/methods/test_rename.py | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 5001754017dda..e4356ff2fddfa 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -33,6 +33,7 @@ ) from pandas.core.dtypes.common import ( is_bool_dtype, + is_datetime64_ns_dtype, is_dtype_equal, is_extension_array_dtype, is_numeric_dtype, @@ -53,7 +54,11 @@ ) import pandas.core.algorithms as algos -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + TimedeltaArray, +) from pandas.core.arrays.sparse import SparseDtype from pandas.core.construction import ( ensure_wrapped_if_datetimelike, @@ -113,6 +118,7 @@ def __init__( if verify_integrity: self._axes = [ensure_index(ax) for ax in axes] + self.arrays = [ensure_wrapped_if_datetimelike(arr) for arr in arrays] self._verify_integrity() def make_empty(self: T, axes=None) -> T: @@ -721,6 +727,8 @@ def fast_xs(self, loc: int) -> ArrayLike: temp_dtype = dtype.numpy_dtype elif is_extension_array_dtype(dtype): temp_dtype = "object" + elif is_datetime64_ns_dtype(dtype) or is_timedelta64_ns_dtype(dtype): + temp_dtype = "object" elif is_dtype_equal(dtype, str): temp_dtype = "object" else: @@ -729,6 +737,10 @@ def fast_xs(self, loc: int) -> ArrayLike: result = np.array([arr[loc] for arr in self.arrays], dtype=temp_dtype) if isinstance(dtype, ExtensionDtype): result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) + elif is_datetime64_ns_dtype(dtype): + result = DatetimeArray._from_sequence(result, dtype=dtype)._data + elif is_timedelta64_ns_dtype(dtype): + result = TimedeltaArray._from_sequence(result, dtype=dtype)._data return result def iget(self, i: int) -> SingleBlockManager: diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 677d862dfe077..462d588aff58f 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -170,6 +170,7 @@ def test_rename_multiindex(self): renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) tm.assert_index_equal(renamed.index, new_index) + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) setitem copy/view def test_rename_nocopy(self, float_frame): renamed = float_frame.rename(columns={"C": "foo"}, copy=False) renamed["foo"] = 1.0 From d8482051fe77d41c1c267cc2e5b43e8c2e454b3c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 2 Mar 2021 08:24:26 +0100 Subject: [PATCH 2/2] simplify --- pandas/core/internals/array_manager.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index e4356ff2fddfa..0556434c2c223 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -721,26 +721,16 @@ def fast_xs(self, loc: int) -> ArrayLike: """ dtype = _interleaved_dtype(self.arrays) - if isinstance(dtype, SparseDtype): - temp_dtype = dtype.subtype - elif isinstance(dtype, PandasDtype): - temp_dtype = dtype.numpy_dtype - elif is_extension_array_dtype(dtype): - temp_dtype = "object" - elif is_datetime64_ns_dtype(dtype) or is_timedelta64_ns_dtype(dtype): - temp_dtype = "object" - elif is_dtype_equal(dtype, str): - temp_dtype = "object" - else: - temp_dtype = dtype - - result = np.array([arr[loc] for arr in self.arrays], dtype=temp_dtype) + values = [arr[loc] for arr in self.arrays] if isinstance(dtype, ExtensionDtype): - result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) + result = dtype.construct_array_type()._from_sequence(values, dtype=dtype) + # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT elif is_datetime64_ns_dtype(dtype): - result = DatetimeArray._from_sequence(result, dtype=dtype)._data + result = DatetimeArray._from_sequence(values, dtype=dtype)._data elif is_timedelta64_ns_dtype(dtype): - result = TimedeltaArray._from_sequence(result, dtype=dtype)._data + result = TimedeltaArray._from_sequence(values, dtype=dtype)._data + else: + result = np.array(values, dtype=dtype) return result def iget(self, i: int) -> SingleBlockManager: