From ffd4ac44f927711cbb460dd9530dcb0eac4bc823 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 23 Sep 2021 18:19:40 -0700
Subject: [PATCH 1/5] REF: de-duplicate warning message

---
 pandas/core/groupby/generic.py | 28 ++++------------------
 pandas/core/groupby/groupby.py | 43 ++++++++++++++--------------------
 2 files changed, 21 insertions(+), 50 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index f0408db7f4ef8..77db3123dee8c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -78,6 +78,7 @@
     _apply_docs,
     _transform_template,
     group_selection_context,
+    warn_dropping_nuisance_columns_deprecated,
 )
 from pandas.core.indexes.api import (
     Index,
@@ -999,14 +1000,7 @@ def array_func(values: ArrayLike) -> ArrayLike:
         new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
 
         if len(new_mgr) < len(data):
-            warnings.warn(
-                f"Dropping invalid columns in {type(self).__name__}.{how} "
-                "is deprecated. In a future version, a TypeError will be raised. "
-                f"Before calling .{how}, select only columns which should be "
-                "valid for the function.",
-                FutureWarning,
-                stacklevel=4,
-            )
+            warn_dropping_nuisance_columns_deprecated(type(self), how)
 
         return self._wrap_agged_manager(new_mgr)
 
@@ -1195,14 +1189,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:
         res_mgr.set_axis(1, mgr.axes[1])
 
         if len(res_mgr) < len(mgr):
-            warnings.warn(
-                f"Dropping invalid columns in {type(self).__name__}.{how} "
-                "is deprecated. In a future version, a TypeError will be raised. "
-                f"Before calling .{how}, select only columns which should be "
-                "valid for the transforming function.",
-                FutureWarning,
-                stacklevel=4,
-            )
+            warn_dropping_nuisance_columns_deprecated(type(self), how)
 
         res_df = self.obj._constructor(res_mgr)
         if self.axis == 1:
@@ -1314,14 +1301,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
                 output[i] = sgb.transform(wrapper)
             except TypeError:
                 # e.g. trying to call nanmean with string values
-                warnings.warn(
-                    f"Dropping invalid columns in {type(self).__name__}.transform "
-                    "is deprecated. In a future version, a TypeError will be raised. "
-                    "Before calling .transform, select only columns which should be "
-                    "valid for the transforming function.",
-                    FutureWarning,
-                    stacklevel=5,
-                )
+                warn_dropping_nuisance_columns_deprecated(type(self), "transform")
             else:
                 inds.append(i)
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b41935902b9cf..cb21c50692ac3 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1422,14 +1422,7 @@ def _python_agg_general(self, func, *args, **kwargs):
                 # if this function is invalid for this dtype, we will ignore it.
                 result = self.grouper.agg_series(obj, f)
             except TypeError:
-                warnings.warn(
-                    f"Dropping invalid columns in {type(self).__name__}.agg "
-                    "is deprecated. In a future version, a TypeError will be raised. "
-                    "Before calling .agg, select only columns which should be "
-                    "valid for the aggregating function.",
-                    FutureWarning,
-                    stacklevel=3,
-                )
+                warn_dropping_nuisance_columns_deprecated(type(self), "agg")
                 continue
 
             key = base.OutputKey(label=name, position=idx)
@@ -2709,15 +2702,8 @@ def blk_func(values: ArrayLike) -> ArrayLike:
 
         res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
         if not is_ser and len(res_mgr.items) != len(mgr.items):
-            warnings.warn(
-                "Dropping invalid columns in "
-                f"{type(self).__name__}.quantile is deprecated. "
-                "In a future version, a TypeError will be raised. "
-                "Before calling .quantile, select only columns which "
-                "should be valid for the function.",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
+            warn_dropping_nuisance_columns_deprecated(type(self), "quantile")
+
             if len(res_mgr.items) == 0:
                 # re-call grouped_reduce to get the desired exception message
                 mgr.grouped_reduce(blk_func, ignore_failures=False)
@@ -3150,15 +3136,8 @@ def blk_func(values: ArrayLike) -> ArrayLike:
 
         if not is_ser and len(res_mgr.items) != len(mgr.items):
             howstr = how.replace("group_", "")
-            warnings.warn(
-                "Dropping invalid columns in "
-                f"{type(self).__name__}.{howstr} is deprecated. "
-                "In a future version, a TypeError will be raised. "
-                f"Before calling .{howstr}, select only columns which "
-                "should be valid for the function.",
-                FutureWarning,
-                stacklevel=3,
-            )
+            warn_dropping_nuisance_columns_deprecated(type(self), howstr)
+
             if len(res_mgr.items) == 0:
                 # We re-call grouped_reduce to get the right exception message
                 try:
@@ -3627,3 +3606,15 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
     else:
         mi = MultiIndex.from_product([idx, qs])
     return mi
+
+
+def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None:
+    warnings.warn(
+        "Dropping invalid columns in "
+        f"{cls.__name__}.{how} is deprecated. "
+        "In a future version, a TypeError will be raised. "
+        f"Before calling .{how}, select only columns which "
+        "should be valid for the function.",
+        FutureWarning,
+        stacklevel=find_stack_level(),
+    )

From 3b4564888ad91b7ef03a7618b976dff604e6e181 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 24 Sep 2021 10:26:50 -0700
Subject: [PATCH 2/5] standardize re-calling

---
 pandas/core/groupby/groupby.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index cb21c50692ac3..5218419988110 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2707,6 +2707,10 @@ def blk_func(values: ArrayLike) -> ArrayLike:
             if len(res_mgr.items) == 0:
                 # re-call grouped_reduce to get the desired exception message
                 mgr.grouped_reduce(blk_func, ignore_failures=False)
+                # grouped_reduce _should_ raise, so this should not be reached
+                raise TypeError(  # pragma: no cover
+                    "All columns were dropped in grouped_reduce"
+                )
 
         if is_ser:
             res = self._wrap_agged_manager(res_mgr)
@@ -3140,13 +3144,11 @@ def blk_func(values: ArrayLike) -> ArrayLike:
 
             if len(res_mgr.items) == 0:
                 # We re-call grouped_reduce to get the right exception message
-                try:
-                    mgr.grouped_reduce(blk_func, ignore_failures=False)
-                except Exception as err:
-                    error_msg = str(err)
-                    raise TypeError(error_msg)
-                # We should never get here
-                raise TypeError("All columns were dropped in grouped_reduce")
+                mgr.grouped_reduce(blk_func, ignore_failures=False)
+                # grouped_reduce _should_ raise, so this should not be reached
+                raise TypeError(  # pragma: no cover
+                    "All columns were dropped in grouped_reduce"
+                )
 
         if is_ser:
             out = self._wrap_agged_manager(res_mgr)

From 3b3a13bc490bb86ea35b25a2a2ca13303aa9dadc Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 24 Sep 2021 14:47:41 -0700
Subject: [PATCH 3/5] tighten wrapping

---
 pandas/core/groupby/groupby.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 5218419988110..2ddb0afca3283 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1762,8 +1762,9 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
         # _wrap_agged_manager() returns. GH 35028
         with com.temp_setattr(self, "observed", True):
             result = self._wrap_agged_manager(new_mgr)
-            if result.ndim == 1:
-                result.index = self.grouper.result_index
+
+        if result.ndim == 1:
+            result.index = self.grouper.result_index
 
         return self._reindex_output(result, fill_value=0)
 
@@ -3152,9 +3153,8 @@ def blk_func(values: ArrayLike) -> ArrayLike:
 
         if is_ser:
             out = self._wrap_agged_manager(res_mgr)
-            out.index = self.grouper.result_index
         else:
-            out = type(obj)(res_mgr)
+            out = obj._constructor(res_mgr)
 
         return self._wrap_aggregated_output(out)
 

From 17c1bb66ed6a60e378facaf25ec2e73afb091546 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 26 Sep 2021 16:20:18 -0700
Subject: [PATCH 4/5] REF: use grouped_reduce in _cython_agg_general

---
 pandas/core/groupby/generic.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 77db3123dee8c..1dbf5781637cb 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -340,8 +340,7 @@ def _cython_agg_general(
     ):
 
         obj = self._selected_obj
-        objvals = obj._values
-        data = obj._mgr
+        data = self._get_data_to_aggregate()
 
         if numeric_only and not is_numeric_dtype(obj.dtype):
             # GH#41291 match Series behavior
@@ -365,12 +364,10 @@ def array_func(values: ArrayLike) -> ArrayLike:
 
             return result
 
-        result = array_func(objvals)
-
-        ser = self.obj._constructor(
-            result, index=self.grouper.result_index, name=obj.name
-        )
-        return self._reindex_output(ser)
+        new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
+        res = self._wrap_agged_manager(new_mgr)
+        res.index = self.grouper.result_index
+        return self._reindex_output(res)
 
     def _indexed_output_to_ndframe(
         self, output: Mapping[base.OutputKey, ArrayLike]

From a69a7be8d5d43982005f4023c50167e1603d2ca3 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 26 Sep 2021 19:53:59 -0700
Subject: [PATCH 5/5] REF: share _cython_agg_general

---
 pandas/core/groupby/generic.py | 68 ----------------------------------
 pandas/core/groupby/groupby.py | 44 +++++++++++++++++++++-
 2 files changed, 43 insertions(+), 69 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 1dbf5781637cb..b1f7f56aaad8c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -48,7 +48,6 @@
     is_dict_like,
     is_integer_dtype,
     is_interval_dtype,
-    is_numeric_dtype,
     is_scalar,
 )
 from pandas.core.dtypes.missing import (
@@ -335,40 +334,6 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame:
         output = self._reindex_output(output)
         return output
 
-    def _cython_agg_general(
-        self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
-    ):
-
-        obj = self._selected_obj
-        data = self._get_data_to_aggregate()
-
-        if numeric_only and not is_numeric_dtype(obj.dtype):
-            # GH#41291 match Series behavior
-            raise NotImplementedError(
-                f"{type(self).__name__}.{how} does not implement numeric_only."
-            )
-
-        # This is overkill because it is only called once, but is here to
-        #  mirror the array_func used in DataFrameGroupBy._cython_agg_general
-        def array_func(values: ArrayLike) -> ArrayLike:
-            try:
-                result = self.grouper._cython_operation(
-                    "aggregate", values, how, axis=data.ndim - 1, min_count=min_count
-                )
-            except NotImplementedError:
-                # generally if we have numeric_only=False
-                # and non-applicable functions
-                # try to python agg
-                # TODO: shouldn't min_count matter?
-                result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
-
-            return result
-
-        new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
-        res = self._wrap_agged_manager(new_mgr)
-        res.index = self.grouper.result_index
-        return self._reindex_output(res)
-
     def _indexed_output_to_ndframe(
         self, output: Mapping[base.OutputKey, ArrayLike]
     ) -> Series:
@@ -968,39 +933,6 @@ def _iterate_slices(self) -> Iterable[Series]:
 
                 yield values
 
-    def _cython_agg_general(
-        self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
-    ) -> DataFrame:
-        # Note: we never get here with how="ohlc"; that goes through SeriesGroupBy
-
-        data: Manager2D = self._get_data_to_aggregate()
-
-        if numeric_only:
-            data = data.get_numeric_data(copy=False)
-
-        def array_func(values: ArrayLike) -> ArrayLike:
-            try:
-                result = self.grouper._cython_operation(
-                    "aggregate", values, how, axis=data.ndim - 1, min_count=min_count
-                )
-            except NotImplementedError:
-                # generally if we have numeric_only=False
-                # and non-applicable functions
-                # try to python agg
-                # TODO: shouldn't min_count matter?
-                result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
-
-            return result
-
-        # TypeError -> we may have an exception in trying to aggregate
-        #  continue and exclude the block
-        new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
-
-        if len(new_mgr) < len(data):
-            warn_dropping_nuisance_columns_deprecated(type(self), how)
-
-        return self._wrap_agged_manager(new_mgr)
-
     def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
         if self.grouper.nkeys != 1:
             raise AssertionError("Number of keys must be 1")
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 2ddb0afca3283..3720da80ad34d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1493,10 +1493,52 @@ def _agg_py_fallback(
         # test_groupby_duplicate_columns with object dtype values
         return ensure_block_shape(res_values, ndim=ndim)
 
+    @final
     def _cython_agg_general(
         self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
     ):
-        raise AbstractMethodError(self)
+        # Note: we never get here with how="ohlc" for DataFrameGroupBy;
+        #  that goes through SeriesGroupBy
+
+        data = self._get_data_to_aggregate()
+        is_ser = data.ndim == 1
+
+        if numeric_only:
+            if is_ser and not is_numeric_dtype(self._selected_obj.dtype):
+                # GH#41291 match Series behavior
+                raise NotImplementedError(
+                    f"{type(self).__name__}.{how} does not implement numeric_only."
+                )
+            elif not is_ser:
+                data = data.get_numeric_data(copy=False)
+
+        def array_func(values: ArrayLike) -> ArrayLike:
+            try:
+                result = self.grouper._cython_operation(
+                    "aggregate", values, how, axis=data.ndim - 1, min_count=min_count
+                )
+            except NotImplementedError:
+                # generally if we have numeric_only=False
+                # and non-applicable functions
+                # try to python agg
+                # TODO: shouldn't min_count matter?
+                result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
+
+            return result
+
+        # TypeError -> we may have an exception in trying to aggregate
+        #  continue and exclude the block
+        new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
+
+        if not is_ser and len(new_mgr) < len(data):
+            warn_dropping_nuisance_columns_deprecated(type(self), how)
+
+        res = self._wrap_agged_manager(new_mgr)
+        if is_ser:
+            res.index = self.grouper.result_index
+            return self._reindex_output(res)
+        else:
+            return res
 
     def _cython_transform(
         self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs