diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index bc66335d74f9b..3c9810598331e 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -157,9 +157,9 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`, :issue:`54883`) +- Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`) +- Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`) - Performance improvement when indexing with more than 4 keys (:issue:`54550`) -- .. --------------------------------------------------------------------------- .. _whatsnew_220.bug_fixes: diff --git a/pandas/core/methods/to_dict.py b/pandas/core/methods/to_dict.py index e89f641e17296..f4e0dcddcd34a 100644 --- a/pandas/core/methods/to_dict.py +++ b/pandas/core/methods/to_dict.py @@ -106,13 +106,13 @@ def to_dict( return into_c((k, v.to_dict(into)) for k, v in df.items()) elif orient == "list": - object_dtype_indices_as_set = set(box_native_indices) + object_dtype_indices_as_set: set[int] = set(box_native_indices) return into_c( ( k, - list(map(maybe_box_native, v.tolist())) + list(map(maybe_box_native, v.to_numpy().tolist())) if i in object_dtype_indices_as_set - else v.tolist(), + else v.to_numpy().tolist(), ) for i, (k, v) in enumerate(df.items()) ) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index 7bb9518f9b0f9..61f0ad30b4519 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -166,6 +166,21 @@ def test_to_dict_not_unique_warning(self): with tm.assert_produces_warning(UserWarning): df.to_dict() + @pytest.mark.filterwarnings("ignore::UserWarning") + @pytest.mark.parametrize( + "orient,expected", + [ + ("list", {"A": [2, 5], "B": [3, 6]}), + ("dict", {"A": {0: 2, 1: 5}, "B": {0: 3, 1: 6}}), + ], + ) + def test_to_dict_not_unique(self, orient, expected): + # GH#54824: This is to make sure that dataframes with non-unique column + # would have uniform behavior throughout different orients + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "A", "B"]) + result = df.to_dict(orient) + assert result == expected + # orient - orient argument to to_dict function # item_getter - function for extracting value from # the resulting dict using column name and index