From b4f2062d6536bf4a422934735e34948bd41ed701 Mon Sep 17 00:00:00 2001 From: Damian Barabonkov Date: Wed, 2 Feb 2022 11:22:32 +0100 Subject: [PATCH 1/8] Reset the index before 'na_value' assignment in 'to_numpy' --- pandas/core/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 84bc6cb161bec..62f341f593d3c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -541,7 +541,7 @@ def to_numpy( if copy or na_value is not lib.no_default: result = result.copy() if na_value is not lib.no_default: - result[self.isna()] = na_value + result[self.isna().reset_index(drop=True)] = na_value return result @property From 75af3bd990413b0990bf0a14ec0e400fb767a920 Mon Sep 17 00:00:00 2001 From: Damian Barabonkov Date: Wed, 2 Feb 2022 11:40:26 +0100 Subject: [PATCH 2/8] Slight performance improvement using 'to_numpy' instead of 'reset_index' --- pandas/core/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 62f341f593d3c..d7136c8013bcc 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -541,7 +541,7 @@ def to_numpy( if copy or na_value is not lib.no_default: result = result.copy() if na_value is not lib.no_default: - result[self.isna().reset_index(drop=True)] = na_value + result[self.isna().to_numpy()] = na_value return result @property From 2b9e691a2291d68c1c5e9014e4c7d10db86fcfc5 Mon Sep 17 00:00:00 2001 From: Damian Barabonkov Date: Wed, 2 Feb 2022 11:51:42 +0100 Subject: [PATCH 3/8] Added to whatsnew --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 039a86da0541e..011c32bcbd5e1 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -286,7 +286,7 @@ Missing MultiIndex ^^^^^^^^^^ -- +- Bug in :meth:`IndexOpsMixin.to_numpy` where multiindexed Series could not be converted to numpy arrays when an ``na_value`` was supplied (:issue:`45774`) - I/O From 4ff0a992651bfcfe93fd381b03c86dde356143dd Mon Sep 17 00:00:00 2001 From: Damian Barabonkov Date: Thu, 3 Feb 2022 11:42:19 +0100 Subject: [PATCH 4/8] The 'to_numpy' mask supports both pandas and numpy inputs --- pandas/core/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index d7136c8013bcc..7a6462a834ef8 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -541,7 +541,9 @@ def to_numpy( if copy or na_value is not lib.no_default: result = result.copy() if na_value is not lib.no_default: - result[self.isna().to_numpy()] = na_value + # Convert the mask to numpy in order to prevent possible + # issues with multiindex compatibility (#45774) + result[np.asanyarray(self.isna())] = na_value return result @property From 9e39eee14d9c7f3e01e0684d7a99e8409646e397 Mon Sep 17 00:00:00 2001 From: Damian Barabonkov Date: Tue, 8 Feb 2022 12:07:15 +0100 Subject: [PATCH 5/8] Added test cases for MultiIndex Series 'to_numpy' in relation to PR #45775 --- pandas/tests/base/test_conversion.py | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 216c9b1546b9d..104d172b9eafb 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -415,6 +415,42 @@ def test_to_numpy_na_value_numpy_dtype( tm.assert_numpy_array_equal(result, expected) +@pytest.mark.parametrize( + "data, multiindex, dtype, na_value, expected", + [ + ( + [1, 2, None, 4], + [(0, "a"), (0, "b"), (1, "b"), (1, "c")], + float, + np.nan, + [1.0, 2.0, np.nan, 4.0], + ), + ( + [1.0, 2.0, np.nan, 4.0], + [("a", 0), ("a", 1), ("a", 2), ("b", 0)], + int, + 0, + [1, 2, 0, 4], + ), + ( + [Timestamp("2000"), Timestamp("2000"), pd.NaT], + [(0, Timestamp("2021")), (0, Timestamp("2022")), (1, Timestamp("2000"))], + None, + Timestamp("2000"), + [np.datetime64("2000-01-01T00:00:00.000000000")] * 3, + ), + ], +) +def test_to_numpy_multiindex_series_na_value( + data, multiindex, dtype, na_value, expected +): + index = pd.MultiIndex.from_tuples(multiindex) + series = Series(data, index=index) + result = series.to_numpy(dtype=dtype, na_value=na_value) + expected = np.array(expected) + tm.assert_numpy_array_equal(result, expected) + + def test_to_numpy_kwargs_raises(): # numpy s = Series([1, 2, 3]) From c73ba48301ce1536fe2e300ccbe77410283a4d04 Mon Sep 17 00:00:00 2001 From: Damian Barabonkov Date: Tue, 22 Feb 2022 12:19:47 +0100 Subject: [PATCH 6/8] Changed entry in 'whatsnew' to refer to public facing features rather than internals --- doc/source/whatsnew/v1.5.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 596b035150dd1..3323e57b0b096 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -329,8 +329,7 @@ Missing MultiIndex ^^^^^^^^^^ -- Bug in :meth:`IndexOpsMixin.to_numpy` where multiindexed Series could not be converted to numpy arrays when an ``na_value`` was supplied (:issue:`45774`) -- +- Bug in :meth:`Series.to_numpy` where multiindexed Series could not be converted to numpy arrays when an ``na_value`` was supplied (:issue:`45774`) I/O ^^^ From 82d64c4d79fc09edaeb08f92ec2ff86411419f8c Mon Sep 17 00:00:00 2001 From: Damian Barabonkov Date: Thu, 3 Mar 2022 11:03:48 +0100 Subject: [PATCH 7/8] Added 'na_value=None' test case for multiindex series to numpy test --- pandas/tests/base/test_conversion.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 104d172b9eafb..599aaae4d3527 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -418,6 +418,13 @@ def test_to_numpy_na_value_numpy_dtype( @pytest.mark.parametrize( "data, multiindex, dtype, na_value, expected", [ + ( + [1, 2, None, 4], + [(0, "a"), (0, "b"), (1, "b"), (1, "c")], + float, + None, + [1.0, 2.0, np.nan, 4.0], + ), ( [1, 2, None, 4], [(0, "a"), (0, "b"), (1, "b"), (1, "c")], From e45ee10b78073d6390ee3d517f3fae87324e58f8 Mon Sep 17 00:00:00 2001 From: Damian Barabonkov Date: Thu, 3 Mar 2022 11:15:37 +0100 Subject: [PATCH 8/8] Removed comment related to issue #45774 --- pandas/core/base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 7a6462a834ef8..74e119a42c974 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -541,8 +541,6 @@ def to_numpy( if copy or na_value is not lib.no_default: result = result.copy() if na_value is not lib.no_default: - # Convert the mask to numpy in order to prevent possible - # issues with multiindex compatibility (#45774) result[np.asanyarray(self.isna())] = na_value return result