From f868874e86f06dfeb62bd2ad6a060a5b188b347e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 3 Feb 2020 08:06:10 -0600
Subject: [PATCH 1/6] REGR: Fixed AssertionError in groupby

Closes https://github.com/pandas-dev/pandas/issues/31522
---
 doc/source/whatsnew/v1.0.1.rst                |  1 +
 pandas/core/groupby/generic.py                | 12 ++++++++--
 .../tests/groupby/aggregate/test_aggregate.py | 24 +++++++++++++++++++
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst
index 180411afb117d..2566574fc06dc 100644
--- a/doc/source/whatsnew/v1.0.1.rst
+++ b/doc/source/whatsnew/v1.0.1.rst
@@ -19,6 +19,7 @@ Fixed regressions
 - Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containg a :class:`datetime.date` (:issue:`31501`)
 - Fixed regression in :class:`Series` multiplication when multiplying a numeric :class:`Series` with >10000 elements with a timedelta-like scalar (:issue:`31457`)
 - Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`)
+- Fixed regression in ``.groupby().agg()`` raising an ``AssertionError`` for some reductions like ``min`` on object-dtype columns (:issue:`31522`)
 - Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`)
 - Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`)
 - Fixed regression where setting :attr:`pd.options.display.max_colwidth` was not accepting negative integer. In addition, this behavior has been deprecated in favor of using ``None`` (:issue:`31532`)
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 27dd6e953c219..aa9a936e8a5f6 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1061,8 +1061,16 @@ def _cython_agg_blocks(
                 else:
                     result = cast(DataFrame, result)
                     # unwrap DataFrame to get array
-                    assert len(result._data.blocks) == 1
-                    result = result._data.blocks[0].values
+                    if len(result._data.blocks) != 1:
+                        # An input (P, N)-shape block was split into P
+                        # (1, n_groups) blocks. This is problematic since it breaks
+                        # the assumption that one input block is aggregated
+                        # to one output block. We should be OK as long as
+                        # the split output can be put back into a single block below
+                        assert len(result._data.blocks) == result.shape[1]
+                        result = np.asarray(result)
+                    else:
+                        result = result._data.blocks[0].values
                     if isinstance(result, np.ndarray) and result.ndim == 1:
                         result = result.reshape(1, -1)
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 2d31996a8a964..f16d7e7cfa447 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -377,6 +377,30 @@ def test_agg_index_has_complex_internals(index):
     tm.assert_frame_equal(result, expected)
 
 
+def test_agg_split_block():
+    # https://github.com/pandas-dev/pandas/issues/31522
+    df = pd.DataFrame(
+        {
+            "key1": ["a", "a", "b", "b", "a"],
+            "key2": ["one", "two", "one", "two", "one"],
+            "key3": ["three", "three", "three", "six", "six"],
+            "data1": [0.0, 1, 2, 3, 4],
+            "data2": [0.0, 1, 2, 3, 4],
+        }
+    )
+    result = df.groupby("key1").min()
+    expected = pd.DataFrame(
+        {
+            "key2": ["one", "six"],
+            "key3": ["one", "six"],
+            "data1": [0.0, 2.0],
+            "data2": [0.0, 2.0],
+        },
+        index=pd.Index(["a", "b"], name="key1"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 class TestNamedAggregationSeries:
     def test_series_named_agg(self):
         df = pd.Series([1, 2, 3, 4])

From 70608cfba409e5c287850132e2eabda1947e2b53 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 3 Feb 2020 10:42:52 -0600
Subject: [PATCH 2/6] consolidate if needed

---
 pandas/core/groupby/generic.py                   | 13 ++++---------
 pandas/tests/groupby/aggregate/test_aggregate.py |  4 ++--
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index aa9a936e8a5f6..9db26501406ff 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1062,15 +1062,10 @@ def _cython_agg_blocks(
                     result = cast(DataFrame, result)
                     # unwrap DataFrame to get array
                     if len(result._data.blocks) != 1:
-                        # An input (P, N)-shape block was split into P
-                        # (1, n_groups) blocks. This is problematic since it breaks
-                        # the assumption that one input block is aggregated
-                        # to one output block. We should be OK as long as
-                        # the split output can be put back into a single block below
-                        assert len(result._data.blocks) == result.shape[1]
-                        result = np.asarray(result)
-                    else:
-                        result = result._data.blocks[0].values
+                        result._consolidate_inplace()
+
+                    assert len(result._data.blocks) == 1
+                    result = result._data.blocks[0].values
                     if isinstance(result, np.ndarray) and result.ndim == 1:
                         result = result.reshape(1, -1)
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index f16d7e7cfa447..ae6e9f255bfca 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -391,8 +391,8 @@ def test_agg_split_block():
     result = df.groupby("key1").min()
     expected = pd.DataFrame(
         {
-            "key2": ["one", "six"],
-            "key3": ["one", "six"],
+            "key2": ["one", "one"],
+            "key3": ["six", "six"],
             "data1": [0.0, 2.0],
             "data2": [0.0, 2.0],
         },

From 6eeda4262bdef24482d90facb58f390354e24fdc Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 4 Feb 2020 07:35:38 -0600
Subject: [PATCH 3/6] add test

---
 pandas/tests/groupby/aggregate/test_aggregate.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index ae6e9f255bfca..9a9e15a529ed3 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -401,6 +401,21 @@ def test_agg_split_block():
     tm.assert_frame_equal(result, expected)
 
 
+def test_agg_split_object_part_datetime():
+    # https://github.com/pandas-dev/pandas/pull/31616
+    df = pd.DataFrame(
+        {
+            "A": pd.date_range("2000", periods=4),
+            "B": ["a", "b", "c", "d"],
+            "C": ["b", "c", "d", "e"],
+            "D": pd.date_range("2000", periods=4),
+        }
+    ).astype(object)
+    result = df.groupby([0, 0, 0, 0]).min()
+    expected = pd.DataFrame({"A": [pd.Timestamp("2000")], "B": ["a"]})
+    tm.assert_frame_equal(result, expected)
+
+
 class TestNamedAggregationSeries:
     def test_series_named_agg(self):
         df = pd.Series([1, 2, 3, 4])

From 04d2c72b1f5b727990bfff18d56ae1dc2935ca0e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 4 Feb 2020 09:40:06 -0600
Subject: [PATCH 4/6] dedent

---
 pandas/core/groupby/generic.py | 44 ++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9db26501406ff..db143441fd1ff 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1022,6 +1022,9 @@ def _cython_agg_blocks(
         agg_blocks: List[Block] = []
         new_items: List[np.ndarray] = []
         deleted_items: List[np.ndarray] = []
+        # Some object-dtype blocks might be split into List[Block[T], Block[U]]
+        # split_items: List[np.ndarray] = []
+
         no_result = object()
         for block in data.blocks:
             # Avoid inheriting result from earlier in the loop
@@ -1069,28 +1072,27 @@ def _cython_agg_blocks(
                     if isinstance(result, np.ndarray) and result.ndim == 1:
                         result = result.reshape(1, -1)
 
-            finally:
-                assert not isinstance(result, DataFrame)
-
-                if result is not no_result:
-                    # see if we can cast the block back to the original dtype
-                    result = maybe_downcast_numeric(result, block.dtype)
-
-                    if block.is_extension and isinstance(result, np.ndarray):
-                        # e.g. block.values was an IntegerArray
-                        # (1, N) case can occur if block.values was Categorical
-                        #  and result is ndarray[object]
-                        assert result.ndim == 1 or result.shape[0] == 1
-                        try:
-                            # Cast back if feasible
-                            result = type(block.values)._from_sequence(
-                                result.ravel(), dtype=block.values.dtype
-                            )
-                        except ValueError:
-                            # reshape to be valid for non-Extension Block
-                            result = result.reshape(1, -1)
+            assert not isinstance(result, DataFrame)
+
+            if result is not no_result:
+                # see if we can cast the block back to the original dtype
+                result = maybe_downcast_numeric(result, block.dtype)
+
+                if block.is_extension and isinstance(result, np.ndarray):
+                    # e.g. block.values was an IntegerArray
+                    # (1, N) case can occur if block.values was Categorical
+                    #  and result is ndarray[object]
+                    assert result.ndim == 1 or result.shape[0] == 1
+                    try:
+                        # Cast back if feasible
+                        result = type(block.values)._from_sequence(
+                            result.ravel(), dtype=block.values.dtype
+                        )
+                    except ValueError:
+                        # reshape to be valid for non-Extension Block
+                        result = result.reshape(1, -1)
 
-                    agg_block: Block = block.make_block(result)
+                agg_block: Block = block.make_block(result)
 
             new_items.append(locs)
             agg_blocks.append(agg_block)

From 8a5db1231f368538bb3dfe0c685e76087af6a993 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 4 Feb 2020 10:01:21 -0600
Subject: [PATCH 5/6] fixup

---
 pandas/core/groupby/generic.py                | 62 ++++++++++++-------
 .../tests/groupby/aggregate/test_aggregate.py | 17 ++++-
 2 files changed, 53 insertions(+), 26 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index db143441fd1ff..0493f09c233d6 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1023,7 +1023,8 @@ def _cython_agg_blocks(
         new_items: List[np.ndarray] = []
         deleted_items: List[np.ndarray] = []
         # Some object-dtype blocks might be split into List[Block[T], Block[U]]
-        # split_items: List[np.ndarray] = []
+        split_items: List[np.ndarray] = []
+        split_frames: List[DataFrame] = []
 
         no_result = object()
         for block in data.blocks:
@@ -1065,41 +1066,56 @@ def _cython_agg_blocks(
                     result = cast(DataFrame, result)
                     # unwrap DataFrame to get array
                     if len(result._data.blocks) != 1:
-                        result._consolidate_inplace()
+                        # We've split an object block! Everything we've assumed
+                        # about a single block input returning a single block output
+                        # is a lie. To keep the code-path for the typical non-split case
+                        # clean, we choose to clean up this mess later on.
+                        split_items.append(locs)
+                        split_frames.append(result)
+                        continue
 
                     assert len(result._data.blocks) == 1
                     result = result._data.blocks[0].values
                     if isinstance(result, np.ndarray) and result.ndim == 1:
                         result = result.reshape(1, -1)
 
-            assert not isinstance(result, DataFrame)
-
-            if result is not no_result:
-                # see if we can cast the block back to the original dtype
-                result = maybe_downcast_numeric(result, block.dtype)
-
-                if block.is_extension and isinstance(result, np.ndarray):
-                    # e.g. block.values was an IntegerArray
-                    # (1, N) case can occur if block.values was Categorical
-                    #  and result is ndarray[object]
-                    assert result.ndim == 1 or result.shape[0] == 1
-                    try:
-                        # Cast back if feasible
-                        result = type(block.values)._from_sequence(
-                            result.ravel(), dtype=block.values.dtype
-                        )
-                    except ValueError:
-                        # reshape to be valid for non-Extension Block
-                        result = result.reshape(1, -1)
+            finally:
+                assert not isinstance(result, DataFrame)
+
+                if result is not no_result:
+                    # see if we can cast the block back to the original dtype
+                    result = maybe_downcast_numeric(result, block.dtype)
+
+                    if block.is_extension and isinstance(result, np.ndarray):
+                        # e.g. block.values was an IntegerArray
+                        # (1, N) case can occur if block.values was Categorical
+                        #  and result is ndarray[object]
+                        assert result.ndim == 1 or result.shape[0] == 1
+                        try:
+                            # Cast back if feasible
+                            result = type(block.values)._from_sequence(
+                                result.ravel(), dtype=block.values.dtype
+                            )
+                        except ValueError:
+                            # reshape to be valid for non-Extension Block
+                            result = result.reshape(1, -1)
 
-                agg_block: Block = block.make_block(result)
+                    agg_block: Block = block.make_block(result)
 
             new_items.append(locs)
             agg_blocks.append(agg_block)
 
-        if not agg_blocks:
+        if not (agg_blocks or split_frames):
             raise DataError("No numeric types to aggregate")
 
+        if split_items:
+            # Clean up the mess left over from split blocks.
+            for locs, result in zip(split_items, split_frames):
+                assert len(locs) == result.shape[1]
+                for i, loc in enumerate(locs):
+                    new_items.append(np.array([loc], dtype=locs.dtype))
+                    agg_blocks.append(result.iloc[:, [i]]._data.blocks[0])
+
         # reset the locs in the blocks to correspond to our
         # current ordering
         indexer = np.concatenate(new_items)
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 9a9e15a529ed3..cdb24549008b9 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -407,12 +407,23 @@ def test_agg_split_object_part_datetime():
         {
             "A": pd.date_range("2000", periods=4),
             "B": ["a", "b", "c", "d"],
-            "C": ["b", "c", "d", "e"],
-            "D": pd.date_range("2000", periods=4),
+            "C": [1, 2, 3, 4],
+            "D": ["b", "c", "d", "e"],
+            "E": pd.date_range("2000", periods=4),
+            "F": [1, 2, 3, 4],
         }
     ).astype(object)
     result = df.groupby([0, 0, 0, 0]).min()
-    expected = pd.DataFrame({"A": [pd.Timestamp("2000")], "B": ["a"]})
+    expected = pd.DataFrame(
+        {
+            "A": [pd.Timestamp("2000")],
+            "B": ["a"],
+            "C": [1],
+            "D": ["b"],
+            "E": [pd.Timestamp("2000")],
+            "F": [1],
+        }
+    )
     tm.assert_frame_equal(result, expected)
 
 

From 6eb1cfd3396b8eff1afe2fe4c091c3e8b077c914 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 4 Feb 2020 10:10:38 -0600
Subject: [PATCH 6/6] all split

---
 pandas/core/groupby/generic.py                | 41 +++++++++----------
 .../tests/groupby/aggregate/test_aggregate.py |  9 +---
 2 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 0493f09c233d6..f194c774cf329 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1079,28 +1079,27 @@ def _cython_agg_blocks(
                     if isinstance(result, np.ndarray) and result.ndim == 1:
                         result = result.reshape(1, -1)
 
-            finally:
-                assert not isinstance(result, DataFrame)
-
-                if result is not no_result:
-                    # see if we can cast the block back to the original dtype
-                    result = maybe_downcast_numeric(result, block.dtype)
-
-                    if block.is_extension and isinstance(result, np.ndarray):
-                        # e.g. block.values was an IntegerArray
-                        # (1, N) case can occur if block.values was Categorical
-                        #  and result is ndarray[object]
-                        assert result.ndim == 1 or result.shape[0] == 1
-                        try:
-                            # Cast back if feasible
-                            result = type(block.values)._from_sequence(
-                                result.ravel(), dtype=block.values.dtype
-                            )
-                        except ValueError:
-                            # reshape to be valid for non-Extension Block
-                            result = result.reshape(1, -1)
+            assert not isinstance(result, DataFrame)
+
+            if result is not no_result:
+                # see if we can cast the block back to the original dtype
+                result = maybe_downcast_numeric(result, block.dtype)
+
+                if block.is_extension and isinstance(result, np.ndarray):
+                    # e.g. block.values was an IntegerArray
+                    # (1, N) case can occur if block.values was Categorical
+                    #  and result is ndarray[object]
+                    assert result.ndim == 1 or result.shape[0] == 1
+                    try:
+                        # Cast back if feasible
+                        result = type(block.values)._from_sequence(
+                            result.ravel(), dtype=block.values.dtype
+                        )
+                    except ValueError:
+                        # reshape to be valid for non-Extension Block
+                        result = result.reshape(1, -1)
 
-                    agg_block: Block = block.make_block(result)
+                agg_block: Block = block.make_block(result)
 
             new_items.append(locs)
             agg_blocks.append(agg_block)
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index cdb24549008b9..47c7f2dc0b51a 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -384,18 +384,11 @@ def test_agg_split_block():
             "key1": ["a", "a", "b", "b", "a"],
             "key2": ["one", "two", "one", "two", "one"],
             "key3": ["three", "three", "three", "six", "six"],
-            "data1": [0.0, 1, 2, 3, 4],
-            "data2": [0.0, 1, 2, 3, 4],
         }
     )
     result = df.groupby("key1").min()
     expected = pd.DataFrame(
-        {
-            "key2": ["one", "one"],
-            "key3": ["six", "six"],
-            "data1": [0.0, 2.0],
-            "data2": [0.0, 2.0],
-        },
+        {"key2": ["one", "one"], "key3": ["six", "six"]},
         index=pd.Index(["a", "b"], name="key1"),
     )
     tm.assert_frame_equal(result, expected)