From cb43d75b9669f5ff835929f67dbe4c263e5baee3 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Fri, 4 Mar 2022 18:57:55 +0530
Subject: [PATCH 01/32] updating submodule

---
 testing | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testing b/testing
index 53b49804710..634739c6644 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 53b498047109d9940fcfab388bd9d6aeb8c57425
+Subproject commit 634739c664433cec366b4b9a81d1e1044a8c5eda

From b1b25b7c053260bfbe97c749959606208599630a Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Sat, 19 Mar 2022 19:14:39 +0530
Subject: [PATCH 02/32] temp commit to remove files in submodule

---
 .gitmodules | 3 ---
 testing     | 1 -
 2 files changed, 4 deletions(-)
 delete mode 160000 testing

diff --git a/.gitmodules b/.gitmodules
index 6efc4871542..71722b21777 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,3 @@
 [submodule "cpp/submodules/parquet-testing"]
 	path = cpp/submodules/parquet-testing
 	url = https://github.com/apache/parquet-testing.git
-[submodule "testing"]
-	path = testing
-	url = https://github.com/apache/arrow-testing
diff --git a/testing b/testing
deleted file mode 160000
index 634739c6644..00000000000
--- a/testing
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 634739c664433cec366b4b9a81d1e1044a8c5eda

From 394fe82f704aaa44aa65b0224c7756b13d8d47e3 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Sat, 19 Mar 2022 19:15:36 +0530
Subject: [PATCH 03/32] adding submodule

---
 .gitmodules | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitmodules b/.gitmodules
index 71722b21777..6efc4871542 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "cpp/submodules/parquet-testing"]
 	path = cpp/submodules/parquet-testing
 	url = https://github.com/apache/parquet-testing.git
+[submodule "testing"]
+	path = testing
+	url = https://github.com/apache/arrow-testing

From 3c93575589c5bb96d23bd7e4134ed5702a16b90f Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Sun, 20 Mar 2022 23:09:21 +0530
Subject: [PATCH 04/32] updating testing submodule

---
 testing | 1 +
 1 file changed, 1 insertion(+)
 create mode 160000 testing

diff --git a/testing b/testing
new file mode 160000
index 00000000000..d315f798520
--- /dev/null
+++ b/testing
@@ -0,0 +1 @@
+Subproject commit d315f7985207d2d67fc2c8e41053e9d97d573f4b

From b926c5546ab3b74e6a369740d28428e2dd7c7c64 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Sun, 20 Mar 2022 23:11:24 +0530
Subject: [PATCH 05/32] revert to uupstream version

---
 testing | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testing b/testing
index d315f798520..53b49804710 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit d315f7985207d2d67fc2c8e41053e9d97d573f4b
+Subproject commit 53b498047109d9940fcfab388bd9d6aeb8c57425

From f63db74a68fdc2f2ff439a059396124d146233e4 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Fri, 13 May 2022 18:37:24 +0530
Subject: [PATCH 06/32] initial commit for agg option refactor

---
 .../execution_plan_documentation_examples.cc  |   9 +-
 cpp/src/arrow/compute/api_aggregate.h         |   3 +
 cpp/src/arrow/compute/exec/aggregate_node.cc  |   9 +-
 cpp/src/arrow/compute/exec/ir_consumer.cc     |   5 +-
 cpp/src/arrow/compute/exec/ir_test.cc         |  10 +-
 cpp/src/arrow/compute/exec/options.h          |   6 +-
 cpp/src/arrow/compute/exec/plan_test.cc       |  99 ++--
 cpp/src/arrow/compute/exec/test_util.cc       |  12 +-
 cpp/src/arrow/compute/exec/tpch_benchmark.cc  |  17 +-
 .../compute/kernels/hash_aggregate_test.cc    | 495 +++++++++---------
 cpp/src/arrow/dataset/scanner.cc              |   3 +-
 cpp/src/arrow/dataset/scanner_test.cc         |  21 +-
 12 files changed, 325 insertions(+), 364 deletions(-)

diff --git a/cpp/examples/arrow/execution_plan_documentation_examples.cc b/cpp/examples/arrow/execution_plan_documentation_examples.cc
index 5f80119bbba..3063431b5f9 100644
--- a/cpp/examples/arrow/execution_plan_documentation_examples.cc
+++ b/cpp/examples/arrow/execution_plan_documentation_examples.cc
@@ -502,9 +502,9 @@ arrow::Status SourceScalarAggregateSinkExample(cp::ExecContext& exec_context) {
 
   ARROW_ASSIGN_OR_RAISE(cp::ExecNode * source,
                         cp::MakeExecNode("source", plan.get(), {}, source_node_options));
-  auto aggregate_options = cp::AggregateNodeOptions{/*aggregates=*/{{"sum", nullptr}},
-                                                    /*targets=*/{"a"},
-                                                    /*names=*/{"sum(a)"}};
+  auto aggregate_options =
+      cp::AggregateNodeOptions{/*aggregates=*/{{"sum", nullptr, "a"}},
+                               /*names=*/{"sum(a)"}};
   ARROW_ASSIGN_OR_RAISE(
       cp::ExecNode * aggregate,
       cp::MakeExecNode("aggregate", plan.get(), {source}, std::move(aggregate_options)));
@@ -541,8 +541,7 @@ arrow::Status SourceGroupAggregateSinkExample(cp::ExecContext& exec_context) {
                         cp::MakeExecNode("source", plan.get(), {}, source_node_options));
   auto options = std::make_shared<cp::CountOptions>(cp::CountOptions::ONLY_VALID);
   auto aggregate_options =
-      cp::AggregateNodeOptions{/*aggregates=*/{{"hash_count", options}},
-                               /*targets=*/{"a"},
+      cp::AggregateNodeOptions{/*aggregates=*/{{"hash_count", &options, "a"}},
                                /*names=*/{"count(a)"},
                                /*keys=*/{"b"}};
   ARROW_ASSIGN_OR_RAISE(
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index becd5a74149..ff994178fc1 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -402,6 +402,9 @@ struct ARROW_EXPORT Aggregate {
 
   /// options for the aggregation function
   std::shared_ptr<FunctionOptions> options;
+
+  // fields to which aggregations will be applied
+  FieldRef target;
 };
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/exec/aggregate_node.cc b/cpp/src/arrow/compute/exec/aggregate_node.cc
index c5c5d3efcfc..ace6d9aa0ee 100644
--- a/cpp/src/arrow/compute/exec/aggregate_node.cc
+++ b/cpp/src/arrow/compute/exec/aggregate_node.cc
@@ -93,8 +93,9 @@ class ScalarAggregateNode : public ExecNode {
     std::vector<int> target_field_ids(kernels.size());
 
     for (size_t i = 0; i < kernels.size(); ++i) {
-      ARROW_ASSIGN_OR_RAISE(auto match,
-                            FieldRef(aggregate_options.targets[i]).FindOne(input_schema));
+      ARROW_ASSIGN_OR_RAISE(
+          auto match,
+          FieldRef(aggregate_options.aggregates[i].target).FindOne(input_schema));
       target_field_ids[i] = match[0];
 
       ARROW_ASSIGN_OR_RAISE(
@@ -310,13 +311,11 @@ class GroupByNode : public ExecNode {
     // Find input field indices for aggregates
     std::vector<int> agg_src_field_ids(aggs.size());
     for (size_t i = 0; i < aggs.size(); ++i) {
-      ARROW_ASSIGN_OR_RAISE(auto match,
-                            aggregate_options.targets[i].FindOne(*input_schema));
+      ARROW_ASSIGN_OR_RAISE(auto match, aggs[i].target.FindOne(*input_schema));
       agg_src_field_ids[i] = match[0];
     }
 
     // Build vector of aggregate source field data types
-    DCHECK_EQ(aggregate_options.targets.size(), aggs.size());
     std::vector<ValueDescr> agg_src_descrs(aggs.size());
     for (size_t i = 0; i < aggs.size(); ++i) {
       auto agg_src_field_id = agg_src_field_ids[i];
diff --git a/cpp/src/arrow/compute/exec/ir_consumer.cc b/cpp/src/arrow/compute/exec/ir_consumer.cc
index 0aafa2c2819..4f539c91fca 100644
--- a/cpp/src/arrow/compute/exec/ir_consumer.cc
+++ b/cpp/src/arrow/compute/exec/ir_consumer.cc
@@ -531,7 +531,7 @@ Result<Declaration> Convert(const ir::Relation& rel) {
       ARROW_ASSIGN_OR_RAISE(auto arg,
                             Convert(*aggregate->rel()).As<Declaration::Input>());
 
-      AggregateNodeOptions opts{{}, {}, {}};
+      AggregateNodeOptions opts{{}, {}};
 
       if (!aggregate->measures()) return UnexpectedNullField("Aggregate.measures");
       for (const ir::Expression* m : *aggregate->measures()) {
@@ -550,8 +550,7 @@ Result<Declaration> Convert(const ir::Relation& rel) {
               "Support for non-FieldRef arguments to Aggregate.measures");
         }
 
-        opts.aggregates.push_back({call->function_name, nullptr});
-        opts.targets.push_back(*target);
+        opts.aggregates.push_back({call->function_name, nullptr, *target});
         opts.names.push_back(call->function_name + " " + target->ToString());
       }
 
diff --git a/cpp/src/arrow/compute/exec/ir_test.cc b/cpp/src/arrow/compute/exec/ir_test.cc
index 847f555c69a..9f3e3098cc0 100644
--- a/cpp/src/arrow/compute/exec/ir_test.cc
+++ b/cpp/src/arrow/compute/exec/ir_test.cc
@@ -357,10 +357,9 @@ TEST(Relation, AggregateSimple) {
                    "0"},
                   {"aggregate",
                    AggregateNodeOptions{/*aggregates=*/{
-                                            {"sum", nullptr},
-                                            {"mean", nullptr},
+                                            {"sum", nullptr, 1},
+                                            {"mean", nullptr, 2},
                                         },
-                                        /*targets=*/{1, 2},
                                         /*names=*/
                                         {
                                             "sum FieldRef.FieldPath(1)",
@@ -564,10 +563,9 @@ TEST(Relation, AggregateWithHaving) {
           {"filter", FilterNodeOptions{less(field_ref(0), literal<int8_t>(3))}, "1"},
           {"aggregate",
            AggregateNodeOptions{/*aggregates=*/{
-                                    {"sum", nullptr},
-                                    {"mean", nullptr},
+                                    {"sum", nullptr, 1},
+                                    {"mean", nullptr, 2},
                                 },
-                                /*targets=*/{1, 2},
                                 /*names=*/
                                 {
                                     "sum FieldRef.FieldPath(1)",
diff --git a/cpp/src/arrow/compute/exec/options.h b/cpp/src/arrow/compute/exec/options.h
index 4691ad65a96..7132bd64784 100644
--- a/cpp/src/arrow/compute/exec/options.h
+++ b/cpp/src/arrow/compute/exec/options.h
@@ -112,17 +112,13 @@ class ARROW_EXPORT ProjectNodeOptions : public ExecNodeOptions {
 class ARROW_EXPORT AggregateNodeOptions : public ExecNodeOptions {
  public:
   AggregateNodeOptions(std::vector<internal::Aggregate> aggregates,
-                       std::vector<FieldRef> targets, std::vector<std::string> names,
-                       std::vector<FieldRef> keys = {})
+                       std::vector<std::string> names, std::vector<FieldRef> keys = {})
       : aggregates(std::move(aggregates)),
-        targets(std::move(targets)),
         names(std::move(names)),
         keys(std::move(keys)) {}
 
   // aggregations which will be applied to the targetted fields
   std::vector<internal::Aggregate> aggregates;
-  // fields to which aggregations will be applied
-  std::vector<FieldRef> targets;
   // output field names for aggregations
   std::vector<std::string> names;
   // keys by which aggregations will be grouped
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index 2df3c5e915e..4d1905fc8e7 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -391,8 +391,8 @@ TEST(ExecPlan, ToString) {
                           }}},
               {"aggregate",
                AggregateNodeOptions{
-                   /*aggregates=*/{{"hash_sum", nullptr}, {"hash_count", options}},
-                   /*targets=*/{"multiply(i32, 2)", "multiply(i32, 2)"},
+                   /*aggregates=*/{{"hash_sum", nullptr, "multiply(i32, 2)"},
+                                   {"hash_count", options, "multiply(i32, 2)"}},
                    /*names=*/{"sum(multiply(i32, 2))", "count(multiply(i32, 2))"},
                    /*keys=*/{"bool"}}},
               {"filter", FilterNodeOptions{greater(field_ref("sum(multiply(i32, 2))"),
@@ -433,8 +433,7 @@ custom_sink_label:OrderBySinkNode{by={sort_keys=[FieldRef.Name(sum(multiply(i32,
                 {
                     union_node,
                     {"aggregate",
-                     AggregateNodeOptions{/*aggregates=*/{{"count", std::move(options)}},
-                                          /*targets=*/{"i32"},
+                     AggregateNodeOptions{/*aggregates=*/{{"count", options, "i32"}},
                                           /*names=*/{"count(i32)"},
                                           /*keys=*/{}}},
                     {"sink", SinkNodeOptions{&sink_gen}},
@@ -770,8 +769,8 @@ TEST(ExecPlanExecution, StressSourceGroupedSumStop) {
                         {"source", SourceNodeOptions{random_data.schema,
                                                      random_data.gen(parallel, slow)}},
                         {"aggregate",
-                         AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr}},
-                                              /*targets=*/{"a"}, /*names=*/{"sum(a)"},
+                         AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr, "a"}},
+                                              /*names=*/{"sum(a)"},
                                               /*keys=*/{"b"}}},
                         {"sink", SinkNodeOptions{&sink_gen}},
                     })
@@ -918,8 +917,8 @@ TEST(ExecPlanExecution, SourceGroupedSum) {
                       {"source", SourceNodeOptions{input.schema,
                                                    input.gen(parallel, /*slow=*/false)}},
                       {"aggregate",
-                       AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr}},
-                                            /*targets=*/{"i32"}, /*names=*/{"sum(i32)"},
+                       AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr, "i32"}},
+                                            /*names=*/{"sum(i32)"},
                                             /*keys=*/{"str"}}},
                       {"sink", SinkNodeOptions{&sink_gen}},
                   })
@@ -987,10 +986,10 @@ TEST(ExecPlanExecution, NestedSourceProjectGroupedSum) {
                                                    field_ref(FieldRef("struct", "bool")),
                                                },
                                                {"i32", "bool"}}},
-                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr}},
-                                                   /*targets=*/{"i32"},
-                                                   /*names=*/{"sum(i32)"},
-                                                   /*keys=*/{"bool"}}},
+                {"aggregate",
+                 AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr, "i32"}},
+                                      /*names=*/{"sum(i32)"},
+                                      /*keys=*/{"bool"}}},
                 {"sink", SinkNodeOptions{&sink_gen}},
             })
             .AddToPlan(plan.get()));
@@ -1021,8 +1020,8 @@ TEST(ExecPlanExecution, SourceFilterProjectGroupedSumFilter) {
                                 field_ref("str"),
                                 call("multiply", {field_ref("i32"), literal(2)}),
                             }}},
-                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr}},
-                                                   /*targets=*/{"multiply(i32, 2)"},
+                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr,
+                                                                    "multiply(i32, 2)"}},
                                                    /*names=*/{"sum(multiply(i32, 2))"},
                                                    /*keys=*/{"str"}}},
                 {"filter", FilterNodeOptions{greater(field_ref("sum(multiply(i32, 2))"),
@@ -1060,8 +1059,8 @@ TEST(ExecPlanExecution, SourceFilterProjectGroupedSumOrderBy) {
                                 field_ref("str"),
                                 call("multiply", {field_ref("i32"), literal(2)}),
                             }}},
-                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr}},
-                                                   /*targets=*/{"multiply(i32, 2)"},
+                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr,
+                                                                    "multiply(i32, 2)"}},
                                                    /*names=*/{"sum(multiply(i32, 2))"},
                                                    /*keys=*/{"str"}}},
                 {"filter", FilterNodeOptions{greater(field_ref("sum(multiply(i32, 2))"),
@@ -1097,8 +1096,8 @@ TEST(ExecPlanExecution, SourceFilterProjectGroupedSumTopK) {
                                 field_ref("str"),
                                 call("multiply", {field_ref("i32"), literal(2)}),
                             }}},
-                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr}},
-                                                   /*targets=*/{"multiply(i32, 2)"},
+                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr,
+                                                                    "multiply(i32, 2)"}},
                                                    /*names=*/{"sum(multiply(i32, 2))"},
                                                    /*keys=*/{"str"}}},
                 {"select_k_sink", SelectKSinkNodeOptions{options, &sink_gen}},
@@ -1123,10 +1122,10 @@ TEST(ExecPlanExecution, SourceScalarAggSink) {
                     {"source", SourceNodeOptions{basic_data.schema,
                                                  basic_data.gen(/*parallel=*/false,
                                                                 /*slow=*/false)}},
-                    {"aggregate", AggregateNodeOptions{
-                                      /*aggregates=*/{{"sum", nullptr}, {"any", nullptr}},
-                                      /*targets=*/{"i32", "bool"},
-                                      /*names=*/{"sum(i32)", "any(bool)"}}},
+                    {"aggregate",
+                     AggregateNodeOptions{/*aggregates=*/{{"sum", nullptr, "i32"},
+                                                          {"any", nullptr, "bool"}},
+                                          /*names=*/{"sum(i32)", "any(bool)"}}},
                     {"sink", SinkNodeOptions{&sink_gen}},
                 })
                 .AddToPlan(plan.get()));
@@ -1150,18 +1149,18 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
 
     {
       auto options = std::make_shared<TDigestOptions>(TDigestOptions::Defaults());
-      ASSERT_OK(Declaration::Sequence(
-                    {
-                        {"source", SourceNodeOptions{basic_data.schema,
-                                                     basic_data.gen(/*parallel=*/false,
-                                                                    /*slow=*/false)}},
-                        {"aggregate",
-                         AggregateNodeOptions{/*aggregates=*/{{"tdigest", options}},
-                                              /*targets=*/{"i32"},
-                                              /*names=*/{"tdigest(i32)"}}},
-                        {"sink", SinkNodeOptions{&sink_gen}},
-                    })
-                    .AddToPlan(plan.get()));
+      ASSERT_OK(
+          Declaration::Sequence(
+              {
+                  {"source",
+                   SourceNodeOptions{basic_data.schema, basic_data.gen(/*parallel=*/false,
+                                                                       /*slow=*/false)}},
+                  {"aggregate", AggregateNodeOptions{
+                                    /*aggregates=*/{{"tdigest", options, "i32"}},
+                                    /*names=*/{"tdigest(i32)"}}},
+                  {"sink", SinkNodeOptions{&sink_gen}},
+              })
+              .AddToPlan(plan.get()));
     }
 
     ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
@@ -1182,11 +1181,10 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
               {
                   {"source", SourceNodeOptions{data.schema, data.gen(/*parallel=*/false,
                                                                      /*slow=*/false)}},
-                  {"aggregate",
-                   AggregateNodeOptions{/*aggregates=*/{{"hash_count", options}},
-                                        /*targets=*/{"i32"},
-                                        /*names=*/{"count(i32)"},
-                                        /*keys=*/{"str"}}},
+                  {"aggregate", AggregateNodeOptions{
+                                    /*aggregates=*/{{"hash_count", options, "i32"}},
+                                    /*names=*/{"count(i32)"},
+                                    /*keys=*/{"str"}}},
                   {"sink", SinkNodeOptions{&sink_gen}},
               })
               .AddToPlan(plan.get()));
@@ -1222,16 +1220,15 @@ TEST(ExecPlanExecution, ScalarSourceScalarAggSink) {
                SourceNodeOptions{scalar_data.schema, scalar_data.gen(/*parallel=*/false,
                                                                      /*slow=*/false)}},
               {"aggregate", AggregateNodeOptions{
-                                /*aggregates=*/{{"all", nullptr},
-                                                {"any", nullptr},
-                                                {"count", nullptr},
-                                                {"mean", nullptr},
-                                                {"product", nullptr},
-                                                {"stddev", nullptr},
-                                                {"sum", nullptr},
-                                                {"tdigest", nullptr},
-                                                {"variance", nullptr}},
-                                /*targets=*/{"b", "b", "a", "a", "a", "a", "a", "a", "a"},
+                                /*aggregates=*/{{"all", nullptr, "b"},
+                                                {"any", nullptr, "b"},
+                                                {"count", nullptr, "a"},
+                                                {"mean", nullptr, "a"},
+                                                {"product", nullptr, "a"},
+                                                {"stddev", nullptr, "a"},
+                                                {"sum", nullptr, "a"},
+                                                {"tdigest", nullptr, "a"},
+                                                {"variance", nullptr, "a"}},
                                 /*names=*/
                                 {"all(b)", "any(b)", "count(a)", "mean(a)", "product(a)",
                                  "stddev(a)", "sum(a)", "tdigest(a)", "variance(a)"}}},
@@ -1273,8 +1270,8 @@ TEST(ExecPlanExecution, ScalarSourceGroupedSum) {
                                                  scalar_data.gen(/*parallel=*/false,
                                                                  /*slow=*/false)}},
                     {"aggregate",
-                     AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr}},
-                                          /*targets=*/{"a"}, /*names=*/{"hash_sum(a)"},
+                     AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr, "a"}},
+                                          /*names=*/{"hash_sum(a)"},
                                           /*keys=*/{"b"}}},
                     {"order_by_sink", OrderBySinkNodeOptions{options, &sink_gen}},
                 })
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
index 3f5d094774c..e336b245bb7 100644
--- a/cpp/src/arrow/compute/exec/test_util.cc
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -313,10 +313,11 @@ bool operator==(const Declaration& l, const Declaration& r) {
       if (l_agg->options == nullptr || r_agg->options == nullptr) return false;
 
       if (!l_agg->options->Equals(*r_agg->options)) return false;
+
+      if (l_agg->target != r_agg->target) return false;
     }
 
-    return l_opts->targets == r_opts->targets && l_opts->names == r_opts->names &&
-           l_opts->keys == r_opts->keys;
+    return l_opts->names == r_opts->names && l_opts->keys == r_opts->keys;
   }
 
   if (l.factory_name == "order_by_sink") {
@@ -378,16 +379,11 @@ static inline void PrintToImpl(const std::string& factory_name,
     for (const auto& agg : o->aggregates) {
       *os << agg.function << "<";
       if (agg.options) PrintTo(*agg.options, os);
+      *os << agg.target.ToString() << "<";
       *os << ">,";
     }
     *os << "},";
 
-    *os << "targets={";
-    for (const auto& target : o->targets) {
-      *os << target.ToString() << ",";
-    }
-    *os << "},";
-
     *os << "names={";
     for (const auto& name : o->names) {
       *os << name << ",";
diff --git a/cpp/src/arrow/compute/exec/tpch_benchmark.cc b/cpp/src/arrow/compute/exec/tpch_benchmark.cc
index 82584f58e93..f348eaf3eb1 100644
--- a/cpp/src/arrow/compute/exec/tpch_benchmark.cc
+++ b/cpp/src/arrow/compute/exec/tpch_benchmark.cc
@@ -78,20 +78,21 @@ std::shared_ptr<ExecPlan> Plan_Q1(AsyncGenerator<util::optional<ExecBatch>>* sin
       std::make_shared<ScalarAggregateOptions>(ScalarAggregateOptions::Defaults());
   auto count_opts = std::make_shared<CountOptions>(CountOptions::CountMode::ALL);
   std::vector<arrow::compute::internal::Aggregate> aggs = {
-      {"hash_sum", sum_opts},  {"hash_sum", sum_opts},    {"hash_sum", sum_opts},
-      {"hash_sum", sum_opts},  {"hash_mean", sum_opts},   {"hash_mean", sum_opts},
-      {"hash_mean", sum_opts}, {"hash_count", count_opts}};
-
-  std::vector<FieldRef> to_aggregate = {"sum_qty",    "sum_base_price", "sum_disc_price",
-                                        "sum_charge", "avg_qty",        "avg_price",
-                                        "avg_disc",   "sum_qty"};
+      {"hash_sum", sum_opts, "sum_qty"},
+      {"hash_sum", sum_opts, "sum_base_price"},
+      {"hash_sum", sum_opts, "sum_disc_price"},
+      {"hash_sum", sum_opts, "sum_charge"},
+      {"hash_mean", sum_opts, "avg_qty"},
+      {"hash_mean", sum_opts, "avg_price"},
+      {"hash_mean", sum_opts, "avg_disc"},
+      {"hash_count", count_opts, "sum_qty"}};
 
   std::vector<std::string> names = {"sum_qty",    "sum_base_price", "sum_disc_price",
                                     "sum_charge", "avg_qty",        "avg_price",
                                     "avg_disc",   "count_order"};
 
   std::vector<FieldRef> keys = {"l_returnflag", "l_linestatus"};
-  AggregateNodeOptions agg_opts(aggs, to_aggregate, names, keys);
+  AggregateNodeOptions agg_opts(aggs, names, keys);
 
   SortKey l_returnflag_key("l_returnflag");
   SortKey l_linestatus_key("l_linestatus");
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 7b47845f234..51c7aea1f37 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -123,15 +123,12 @@ Result<Datum> NaiveGroupBy(std::vector<Datum> arguments, std::vector<Datum> keys
 
 Result<Datum> GroupByUsingExecPlan(const BatchesWithSchema& input,
                                    const std::vector<std::string>& key_names,
-                                   const std::vector<std::string>& arg_names,
                                    const std::vector<internal::Aggregate>& aggregates,
                                    bool use_threads, ExecContext* ctx) {
   std::vector<FieldRef> keys(key_names.size());
-  std::vector<FieldRef> targets(aggregates.size());
   std::vector<std::string> names(aggregates.size());
   for (size_t i = 0; i < aggregates.size(); ++i) {
     names[i] = aggregates[i].function;
-    targets[i] = FieldRef(arg_names[i]);
   }
   for (size_t i = 0; i < key_names.size(); ++i) {
     keys[i] = FieldRef(key_names[i]);
@@ -144,9 +141,8 @@ Result<Datum> GroupByUsingExecPlan(const BatchesWithSchema& input,
           {
               {"source",
                SourceNodeOptions{input.schema, input.gen(use_threads, /*slow=*/false)}},
-              {"aggregate",
-               AggregateNodeOptions{std::move(aggregates), std::move(targets),
-                                    std::move(names), std::move(keys)}},
+              {"aggregate", AggregateNodeOptions{std::move(aggregates), std::move(names),
+                                                 std::move(keys)}},
               {"sink", SinkNodeOptions{&sink_gen}},
           })
           .AddToPlan(plan.get()));
@@ -197,11 +193,9 @@ Result<Datum> GroupByUsingExecPlan(const std::vector<Datum>& arguments,
 
   FieldVector scan_fields(arguments.size() + keys.size());
   std::vector<std::string> key_names(keys.size());
-  std::vector<std::string> arg_names(arguments.size());
   for (size_t i = 0; i < arguments.size(); ++i) {
     auto name = std::string("agg_") + std::to_string(i);
     scan_fields[i] = field(name, arguments[i].type());
-    arg_names[i] = std::move(name);
   }
   for (size_t i = 0; i < keys.size(); ++i) {
     auto name = std::string("key_") + std::to_string(i);
@@ -223,7 +217,7 @@ Result<Datum> GroupByUsingExecPlan(const std::vector<Datum>& arguments,
     input.batches.push_back(std::move(batch));
   }
 
-  return GroupByUsingExecPlan(input, key_names, arg_names, aggregates, use_threads, ctx);
+  return GroupByUsingExecPlan(input, key_names, aggregates, use_threads, ctx);
 }
 
 void ValidateGroupBy(const std::vector<internal::Aggregate>& aggregates,
@@ -756,7 +750,7 @@ TEST(GroupBy, NoBatches) {
       Datum aggregated_and_grouped,
       GroupByTest({table->GetColumnByName("argument")}, {table->GetColumnByName("key")},
                   {
-                      {"hash_count", nullptr},
+                      {"hash_count", nullptr, {"agg_0"}},
                   },
                   /*use_threads=*/true, /*use_exec_plan=*/true));
   AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -821,7 +815,7 @@ TEST(GroupBy, CountOnly) {
                            GroupByTest({table->GetColumnByName("argument")},
                                        {table->GetColumnByName("key")},
                                        {
-                                           {"hash_count", nullptr},
+                                           {"hash_count", nullptr, "agg_0"},
                                        },
                                        use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -858,15 +852,14 @@ TEST(GroupBy, CountScalar) {
   auto count_all = std::make_shared<CountOptions>(CountOptions::ALL);
   for (bool use_threads : {true, false}) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-    ASSERT_OK_AND_ASSIGN(
-        Datum actual,
-        GroupByUsingExecPlan(input, {"key"}, {"argument", "argument", "argument"},
-                             {
-                                 {"hash_count", skip_nulls},
-                                 {"hash_count", keep_nulls},
-                                 {"hash_count", count_all},
-                             },
-                             use_threads, default_exec_context()));
+    ASSERT_OK_AND_ASSIGN(Datum actual,
+                         GroupByUsingExecPlan(input, {"key"},
+                                              {
+                                                  {"hash_count", skip_nulls, "argument"},
+                                                  {"hash_count", keep_nulls, "argument"},
+                                                  {"hash_count", count_all, "argument"},
+                                              },
+                                              use_threads, default_exec_context()));
     Datum expected = ArrayFromJSON(struct_({
                                        field("hash_count", int64()),
                                        field("hash_count", int64()),
@@ -909,7 +902,7 @@ TEST(GroupBy, SumOnly) {
                            GroupByTest({table->GetColumnByName("argument")},
                                        {table->GetColumnByName("key")},
                                        {
-                                           {"hash_sum", nullptr},
+                                           {"hash_sum", nullptr, "agg_0"},
                                        },
                                        use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -974,12 +967,12 @@ TEST(GroupBy, SumMeanProductDecimal) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_sum", nullptr},
-                                   {"hash_sum", nullptr},
-                                   {"hash_mean", nullptr},
-                                   {"hash_mean", nullptr},
-                                   {"hash_product", nullptr},
-                                   {"hash_product", nullptr},
+                                   {"hash_sum", nullptr, "agg_0"},
+                                   {"hash_sum", nullptr, "agg_1"},
+                                   {"hash_mean", nullptr, "agg_2"},
+                                   {"hash_mean", nullptr, "agg_3"},
+                                   {"hash_product", nullptr, "agg_4"},
+                                   {"hash_product", nullptr, "agg_5"},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -1035,8 +1028,8 @@ TEST(GroupBy, MeanOnly) {
                                             table->GetColumnByName("argument")},
                                            {table->GetColumnByName("key")},
                                            {
-                                               {"hash_mean", nullptr},
-                                               {"hash_mean", min_count},
+                                               {"hash_mean", nullptr, "agg_0"},
+                                               {"hash_mean", min_count, "agg_1"},
                                            },
                                            use_threads));
     SortBy({"key_0"}, &aggregated_and_grouped);
@@ -1070,15 +1063,14 @@ TEST(GroupBy, SumMeanProductScalar) {
 
   for (bool use_threads : {true, false}) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-    ASSERT_OK_AND_ASSIGN(
-        Datum actual,
-        GroupByUsingExecPlan(input, {"key"}, {"argument", "argument", "argument"},
-                             {
-                                 {"hash_sum", nullptr},
-                                 {"hash_mean", nullptr},
-                                 {"hash_product", nullptr},
-                             },
-                             use_threads, default_exec_context()));
+    ASSERT_OK_AND_ASSIGN(Datum actual,
+                         GroupByUsingExecPlan(input, {"key"},
+                                              {
+                                                  {"hash_sum", nullptr, "argument"},
+                                                  {"hash_mean", nullptr, "argument"},
+                                                  {"hash_product", nullptr, "argument"},
+                                              },
+                                              use_threads, default_exec_context()));
     Datum expected = ArrayFromJSON(struct_({
                                        field("hash_sum", int64()),
                                        field("hash_mean", float64()),
@@ -1119,8 +1111,8 @@ TEST(GroupBy, VarianceAndStddev) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_variance", nullptr},
-                               {"hash_stddev", nullptr},
+                               {"hash_variance", nullptr, "agg_0"},
+                               {"hash_stddev", nullptr, "agg_1"},
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -1151,18 +1143,19 @@ TEST(GroupBy, VarianceAndStddev) {
     [null,  3]
   ])");
 
-  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped, internal::GroupBy(
-                                                   {
-                                                       batch->GetColumnByName("argument"),
-                                                       batch->GetColumnByName("argument"),
-                                                   },
-                                                   {
-                                                       batch->GetColumnByName("key"),
-                                                   },
-                                                   {
-                                                       {"hash_variance", nullptr},
-                                                       {"hash_stddev", nullptr},
-                                                   }));
+  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_variance", nullptr, "agg_0"},
+                               {"hash_stddev", nullptr, "agg_1"},
+                           }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
                                             field("hash_variance", float64()),
@@ -1190,8 +1183,8 @@ TEST(GroupBy, VarianceAndStddev) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_variance", variance_options},
-                               {"hash_stddev", variance_options},
+                               {"hash_variance", variance_options, "agg_0"},
+                               {"hash_stddev", variance_options, "agg_1"},
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -1236,10 +1229,10 @@ TEST(GroupBy, VarianceAndStddevDecimal) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_variance", nullptr},
-                               {"hash_stddev", nullptr},
-                               {"hash_variance", nullptr},
-                               {"hash_stddev", nullptr},
+                               {"hash_variance", nullptr, "agg_0"},
+                               {"hash_stddev", nullptr, "agg_1"},
+                               {"hash_variance", nullptr, "agg_2"},
+                               {"hash_stddev", nullptr, "agg_3"},
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -1304,12 +1297,12 @@ TEST(GroupBy, TDigest) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_tdigest", nullptr},
-                               {"hash_tdigest", options1},
-                               {"hash_tdigest", options2},
-                               {"hash_tdigest", keep_nulls},
-                               {"hash_tdigest", min_count},
-                               {"hash_tdigest", keep_nulls_min_count},
+                               {"hash_tdigest", nullptr, "agg_0"},
+                               {"hash_tdigest", options1, "agg_1"},
+                               {"hash_tdigest", options2, "agg_2"},
+                               {"hash_tdigest", keep_nulls, "agg_3"},
+                               {"hash_tdigest", min_count, "agg_4"},
+                               {"hash_tdigest", keep_nulls_min_count, "agg_5"},
                            }));
 
   AssertDatumsApproxEqual(
@@ -1356,8 +1349,8 @@ TEST(GroupBy, TDigestDecimal) {
                            },
                            {batch->GetColumnByName("key")},
                            {
-                               {"hash_tdigest", nullptr},
-                               {"hash_tdigest", nullptr},
+                               {"hash_tdigest", nullptr, "agg_0"},
+                               {"hash_tdigest", nullptr, "agg_1"},
                            }));
 
   AssertDatumsApproxEqual(
@@ -1402,23 +1395,24 @@ TEST(GroupBy, ApproximateMedian) {
         /*skip_nulls=*/true, /*min_count=*/3);
     auto keep_nulls_min_count = std::make_shared<ScalarAggregateOptions>(
         /*skip_nulls=*/false, /*min_count=*/3);
-    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                         internal::GroupBy(
-                             {
-                                 batch->GetColumnByName("argument"),
-                                 batch->GetColumnByName("argument"),
-                                 batch->GetColumnByName("argument"),
-                                 batch->GetColumnByName("argument"),
-                             },
-                             {
-                                 batch->GetColumnByName("key"),
-                             },
-                             {
-                                 {"hash_approximate_median", options},
-                                 {"hash_approximate_median", keep_nulls},
-                                 {"hash_approximate_median", min_count},
-                                 {"hash_approximate_median", keep_nulls_min_count},
-                             }));
+    ASSERT_OK_AND_ASSIGN(
+        Datum aggregated_and_grouped,
+        internal::GroupBy(
+            {
+                batch->GetColumnByName("argument"),
+                batch->GetColumnByName("argument"),
+                batch->GetColumnByName("argument"),
+                batch->GetColumnByName("argument"),
+            },
+            {
+                batch->GetColumnByName("key"),
+            },
+            {
+                {"hash_approximate_median", options, "agg_0"},
+                {"hash_approximate_median", keep_nulls, "agg_1"},
+                {"hash_approximate_median", min_count, "agg_2"},
+                {"hash_approximate_median", keep_nulls_min_count, "agg_3"},
+            }));
 
     AssertDatumsApproxEqual(ArrayFromJSON(struct_({
                                               field("hash_approximate_median", float64()),
@@ -1458,15 +1452,13 @@ TEST(GroupBy, StddevVarianceTDigestScalar) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
     ASSERT_OK_AND_ASSIGN(Datum actual,
                          GroupByUsingExecPlan(input, {"key"},
-                                              {"argument", "argument", "argument",
-                                               "argument1", "argument1", "argument1"},
                                               {
-                                                  {"hash_stddev", nullptr},
-                                                  {"hash_variance", nullptr},
-                                                  {"hash_tdigest", nullptr},
-                                                  {"hash_stddev", nullptr},
-                                                  {"hash_variance", nullptr},
-                                                  {"hash_tdigest", nullptr},
+                                                  {"hash_stddev", nullptr, "argument"},
+                                                  {"hash_variance", nullptr, "argument"},
+                                                  {"hash_tdigest", nullptr, "argument"},
+                                                  {"hash_stddev", nullptr, "argument1"},
+                                                  {"hash_variance", nullptr, "argument1"},
+                                                  {"hash_tdigest", nullptr, "argument1"},
                                               },
                                               use_threads, default_exec_context()));
     Datum expected =
@@ -1516,25 +1508,18 @@ TEST(GroupBy, VarianceOptions) {
 
   for (bool use_threads : {false}) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-    ASSERT_OK_AND_ASSIGN(Datum actual,
-                         GroupByUsingExecPlan(input, {"key"},
-                                              {
-                                                  "argument",
-                                                  "argument",
-                                                  "argument",
-                                                  "argument",
-                                                  "argument",
-                                                  "argument",
-                                              },
-                                              {
-                                                  {"hash_stddev", keep_nulls},
-                                                  {"hash_stddev", min_count},
-                                                  {"hash_stddev", keep_nulls_min_count},
-                                                  {"hash_variance", keep_nulls},
-                                                  {"hash_variance", min_count},
-                                                  {"hash_variance", keep_nulls_min_count},
-                                              },
-                                              use_threads, default_exec_context()));
+    ASSERT_OK_AND_ASSIGN(
+        Datum actual,
+        GroupByUsingExecPlan(input, {"key"},
+                             {
+                                 {"hash_stddev", keep_nulls, "argument"},
+                                 {"hash_stddev", min_count, "argument"},
+                                 {"hash_stddev", keep_nulls_min_count, "argument"},
+                                 {"hash_variance", keep_nulls, "argument"},
+                                 {"hash_variance", min_count, "argument"},
+                                 {"hash_variance", keep_nulls_min_count, "argument"},
+                             },
+                             use_threads, default_exec_context()));
     Datum expected = ArrayFromJSON(struct_({
                                        field("hash_stddev", float64()),
                                        field("hash_stddev", float64()),
@@ -1553,25 +1538,18 @@ TEST(GroupBy, VarianceOptions) {
     ValidateOutput(expected);
     AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
 
-    ASSERT_OK_AND_ASSIGN(actual,
-                         GroupByUsingExecPlan(input, {"key"},
-                                              {
-                                                  "argument1",
-                                                  "argument1",
-                                                  "argument1",
-                                                  "argument1",
-                                                  "argument1",
-                                                  "argument1",
-                                              },
-                                              {
-                                                  {"hash_stddev", keep_nulls},
-                                                  {"hash_stddev", min_count},
-                                                  {"hash_stddev", keep_nulls_min_count},
-                                                  {"hash_variance", keep_nulls},
-                                                  {"hash_variance", min_count},
-                                                  {"hash_variance", keep_nulls_min_count},
-                                              },
-                                              use_threads, default_exec_context()));
+    ASSERT_OK_AND_ASSIGN(
+        actual,
+        GroupByUsingExecPlan(input, {"key"},
+                             {
+                                 {"hash_stddev", keep_nulls, "argument1"},
+                                 {"hash_stddev", min_count, "argument1"},
+                                 {"hash_stddev", keep_nulls_min_count, "argument1"},
+                                 {"hash_variance", keep_nulls, "argument1"},
+                                 {"hash_variance", min_count, "argument1"},
+                                 {"hash_variance", keep_nulls_min_count, "argument1"},
+                             },
+                             use_threads, default_exec_context()));
     expected = ArrayFromJSON(struct_({
                                  field("hash_stddev", float64()),
                                  field("hash_stddev", float64()),
@@ -1629,9 +1607,9 @@ TEST(GroupBy, MinMaxOnly) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_min_max", nullptr},
-                                   {"hash_min_max", nullptr},
-                                   {"hash_min_max", nullptr},
+                                   {"hash_min_max", nullptr, "agg_0"},
+                                   {"hash_min_max", nullptr, "agg_1"},
+                                   {"hash_min_max", nullptr, "agg_2"},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -1736,7 +1714,7 @@ TEST(GroupBy, MinMaxTypes) {
     ASSERT_OK_AND_ASSIGN(
         Datum aggregated_and_grouped,
         GroupByTest({table->GetColumnByName("argument0")},
-                    {table->GetColumnByName("key")}, {{"hash_min_max", nullptr}},
+                    {table->GetColumnByName("key")}, {{"hash_min_max", nullptr, "agg_0"}},
                     /*use_threads=*/true, /*use_exec_plan=*/true));
     ValidateOutput(aggregated_and_grouped);
     SortBy({"key_0"}, &aggregated_and_grouped);
@@ -1790,8 +1768,8 @@ TEST(GroupBy, MinMaxDecimal) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_min_max", nullptr},
-                                   {"hash_min_max", nullptr},
+                                   {"hash_min_max", nullptr, "agg_0"},
+                                   {"hash_min_max", nullptr, "agg_1"},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -1849,11 +1827,11 @@ TEST(GroupBy, MinMaxBinary) {
     [null,    3]
 ])"});
 
-        ASSERT_OK_AND_ASSIGN(
-            Datum aggregated_and_grouped,
-            GroupByTest({table->GetColumnByName("argument0")},
-                        {table->GetColumnByName("key")}, {{"hash_min_max", nullptr}},
-                        use_threads, use_exec_plan));
+        ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                             GroupByTest({table->GetColumnByName("argument0")},
+                                         {table->GetColumnByName("key")},
+                                         {{"hash_min_max", nullptr, "agg_0"}},
+                                         use_threads, use_exec_plan));
         ValidateOutput(aggregated_and_grouped);
         SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -1906,8 +1884,8 @@ TEST(GroupBy, MinMaxFixedSizeBinary) {
       ASSERT_OK_AND_ASSIGN(
           Datum aggregated_and_grouped,
           GroupByTest({table->GetColumnByName("argument0")},
-                      {table->GetColumnByName("key")}, {{"hash_min_max", nullptr}},
-                      use_threads, use_exec_plan));
+                      {table->GetColumnByName("key")},
+                      {{"hash_min_max", nullptr, "agg_0"}}, use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
       SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -1960,8 +1938,8 @@ TEST(GroupBy, MinOrMax) {
                                     table->GetColumnByName("argument")},
                                    {table->GetColumnByName("key")},
                                    {
-                                       {"hash_min", nullptr},
-                                       {"hash_max", nullptr},
+                                       {"hash_min", nullptr, "agg_0"},
+                                       {"hash_max", nullptr, "agg_1"},
                                    },
                                    /*use_threads=*/true, /*use_exec_plan=*/true));
   SortBy({"key_0"}, &aggregated_and_grouped);
@@ -1997,9 +1975,8 @@ TEST(GroupBy, MinMaxScalar) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
     ASSERT_OK_AND_ASSIGN(
         Datum actual,
-        GroupByUsingExecPlan(input, {"key"}, {"argument", "argument", "argument"},
-                             {{"hash_min_max", nullptr}}, use_threads,
-                             default_exec_context()));
+        GroupByUsingExecPlan(input, {"key"}, {{"hash_min_max", nullptr, "argument"}},
+                             use_threads, default_exec_context()));
     Datum expected =
         ArrayFromJSON(struct_({
                           field("hash_min_max",
@@ -2062,14 +2039,14 @@ TEST(GroupBy, AnyAndAll) {
                              },
                              {table->GetColumnByName("key")},
                              {
-                                 {"hash_any", no_min},
-                                 {"hash_any", min_count},
-                                 {"hash_any", keep_nulls},
-                                 {"hash_any", keep_nulls_min_count},
-                                 {"hash_all", no_min},
-                                 {"hash_all", min_count},
-                                 {"hash_all", keep_nulls},
-                                 {"hash_all", keep_nulls_min_count},
+                                 {"hash_any", no_min, "agg_0"},
+                                 {"hash_any", min_count, "agg_1"},
+                                 {"hash_any", keep_nulls, "agg_2"},
+                                 {"hash_any", keep_nulls_min_count, "agg_3"},
+                                 {"hash_all", no_min, "agg_4"},
+                                 {"hash_all", min_count, "agg_5"},
+                                 {"hash_all", keep_nulls, "agg_6"},
+                                 {"hash_all", keep_nulls_min_count, "agg_7"},
                              },
                              use_threads));
     SortBy({"key_0"}, &aggregated_and_grouped);
@@ -2119,17 +2096,15 @@ TEST(GroupBy, AnyAllScalar) {
       std::make_shared<ScalarAggregateOptions>(/*skip_nulls=*/false, /*min_count=*/0);
   for (bool use_threads : {true, false}) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-    ASSERT_OK_AND_ASSIGN(
-        Datum actual,
-        GroupByUsingExecPlan(input, {"key"},
-                             {"argument", "argument", "argument", "argument"},
-                             {
-                                 {"hash_any", nullptr},
-                                 {"hash_all", nullptr},
-                                 {"hash_any", keep_nulls},
-                                 {"hash_all", keep_nulls},
-                             },
-                             use_threads, default_exec_context()));
+    ASSERT_OK_AND_ASSIGN(Datum actual,
+                         GroupByUsingExecPlan(input, {"key"},
+                                              {
+                                                  {"hash_any", nullptr, "argument"},
+                                                  {"hash_all", nullptr, "argument"},
+                                                  {"hash_any", keep_nulls, "argument"},
+                                                  {"hash_all", keep_nulls, "argument"},
+                                              },
+                                              use_threads, default_exec_context()));
     Datum expected = ArrayFromJSON(struct_({
                                        field("hash_any", boolean()),
                                        field("hash_all", boolean()),
@@ -2195,9 +2170,9 @@ TEST(GroupBy, CountDistinct) {
                                  table->GetColumnByName("key"),
                              },
                              {
-                                 {"hash_count_distinct", all},
-                                 {"hash_count_distinct", only_valid},
-                                 {"hash_count_distinct", only_null},
+                                 {"hash_count_distinct", all, "agg_0"},
+                                 {"hash_count_distinct", only_valid, "agg_1"},
+                                 {"hash_count_distinct", only_null, "agg_2"},
                              },
                              use_threads));
     SortBy({"key_0"}, &aggregated_and_grouped);
@@ -2261,9 +2236,9 @@ TEST(GroupBy, CountDistinct) {
                                  table->GetColumnByName("key"),
                              },
                              {
-                                 {"hash_count_distinct", all},
-                                 {"hash_count_distinct", only_valid},
-                                 {"hash_count_distinct", only_null},
+                                 {"hash_count_distinct", all, "agg_0"},
+                                 {"hash_count_distinct", only_valid, "agg_1"},
+                                 {"hash_count_distinct", only_null, "agg_2"},
                              },
                              use_threads));
     ValidateOutput(aggregated_and_grouped);
@@ -2307,9 +2282,9 @@ TEST(GroupBy, CountDistinct) {
                                  table->GetColumnByName("key"),
                              },
                              {
-                                 {"hash_count_distinct", all},
-                                 {"hash_count_distinct", only_valid},
-                                 {"hash_count_distinct", only_null},
+                                 {"hash_count_distinct", all, "agg_0"},
+                                 {"hash_count_distinct", only_valid, "agg_1"},
+                                 {"hash_count_distinct", only_null, "agg_2"},
                              },
                              use_threads));
     ValidateOutput(aggregated_and_grouped);
@@ -2379,9 +2354,9 @@ TEST(GroupBy, Distinct) {
                                  table->GetColumnByName("key"),
                              },
                              {
-                                 {"hash_distinct", all},
-                                 {"hash_distinct", only_valid},
-                                 {"hash_distinct", only_null},
+                                 {"hash_distinct", all, "agg_0"},
+                                 {"hash_distinct", only_valid, "agg_1"},
+                                 {"hash_distinct", only_null, "agg_2"},
                              },
                              use_threads));
     ValidateOutput(aggregated_and_grouped);
@@ -2452,9 +2427,9 @@ TEST(GroupBy, Distinct) {
                                  table->GetColumnByName("key"),
                              },
                              {
-                                 {"hash_distinct", all},
-                                 {"hash_distinct", only_valid},
-                                 {"hash_distinct", only_null},
+                                 {"hash_distinct", all, "agg_0"},
+                                 {"hash_distinct", only_valid, "agg_1"},
+                                 {"hash_distinct", only_null, "agg_2"},
                              },
                              use_threads));
     ValidateOutput(aggregated_and_grouped);
@@ -2516,12 +2491,12 @@ TEST(GroupBy, OneMiscTypes) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_one", nullptr},
-                                   {"hash_one", nullptr},
-                                   {"hash_one", nullptr},
-                                   {"hash_one", nullptr},
-                                   {"hash_one", nullptr},
-                                   {"hash_one", nullptr},
+                                   {"hash_one", nullptr, "agg_0"},
+                                   {"hash_one", nullptr, "agg_1"},
+                                   {"hash_one", nullptr, "agg_2"},
+                                   {"hash_one", nullptr, "agg_3"},
+                                   {"hash_one", nullptr, "agg_4"},
+                                   {"hash_one", nullptr, "agg_5"},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -2647,7 +2622,7 @@ TEST(GroupBy, OneNumericTypes) {
         ASSERT_OK_AND_ASSIGN(
             Datum aggregated_and_grouped,
             GroupByTest({table->GetColumnByName("argument0")},
-                        {table->GetColumnByName("key")}, {{"hash_one", nullptr}},
+                        {table->GetColumnByName("key")}, {{"hash_one", nullptr, "agg_0"}},
                         use_threads, use_exec_plan));
         ValidateOutput(aggregated_and_grouped);
         SortBy({"key_0"}, &aggregated_and_grouped);
@@ -2710,7 +2685,7 @@ TEST(GroupBy, OneBinaryTypes) {
         ASSERT_OK_AND_ASSIGN(
             Datum aggregated_and_grouped,
             GroupByTest({table->GetColumnByName("argument0")},
-                        {table->GetColumnByName("key")}, {{"hash_one", nullptr}},
+                        {table->GetColumnByName("key")}, {{"hash_one", nullptr, "agg_0"}},
                         use_threads, use_exec_plan));
         ValidateOutput(aggregated_and_grouped);
         SortBy({"key_0"}, &aggregated_and_grouped);
@@ -2743,9 +2718,9 @@ TEST(GroupBy, OneScalar) {
   for (bool use_threads : {true, false}) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
     ASSERT_OK_AND_ASSIGN(
-        Datum actual, GroupByUsingExecPlan(
-                          input, {"key"}, {"argument", "argument", "argument"},
-                          {{"hash_one", nullptr}}, use_threads, default_exec_context()));
+        Datum actual,
+        GroupByUsingExecPlan(input, {"key"}, {{"hash_one", nullptr, "argument"}},
+                             use_threads, default_exec_context()));
 
     const auto& struct_arr = actual.array_as<StructArray>();
     //  Check the key column
@@ -2805,7 +2780,7 @@ TEST(GroupBy, ListNumeric) {
                                      table->GetColumnByName("key"),
                                  },
                                  {
-                                     {"hash_list", nullptr},
+                                     {"hash_list", nullptr, "agg_0"},
                                  },
                                  use_threads));
         ValidateOutput(aggregated_and_grouped);
@@ -2876,7 +2851,7 @@ TEST(GroupBy, ListNumeric) {
                                      table->GetColumnByName("key"),
                                  },
                                  {
-                                     {"hash_list", nullptr},
+                                     {"hash_list", nullptr, "agg_0"},
                                  },
                                  use_threads));
         ValidateOutput(aggregated_and_grouped);
@@ -2945,7 +2920,7 @@ TEST(GroupBy, ListBinaryTypes) {
                                      table->GetColumnByName("key"),
                                  },
                                  {
-                                     {"hash_list", nullptr},
+                                     {"hash_list", nullptr, "agg_0"},
                                  },
                                  use_threads));
         ValidateOutput(aggregated_and_grouped);
@@ -3007,7 +2982,7 @@ TEST(GroupBy, ListBinaryTypes) {
                                      table->GetColumnByName("key"),
                                  },
                                  {
-                                     {"hash_list", nullptr},
+                                     {"hash_list", nullptr, "agg_0"},
                                  },
                                  use_threads));
         ValidateOutput(aggregated_and_grouped);
@@ -3085,12 +3060,12 @@ TEST(GroupBy, ListMiscTypes) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_list", nullptr},
-                                   {"hash_list", nullptr},
-                                   {"hash_list", nullptr},
-                                   {"hash_list", nullptr},
-                                   {"hash_list", nullptr},
-                                   {"hash_list", nullptr},
+                                   {"hash_list", nullptr, "agg_0"},
+                                   {"hash_list", nullptr, "agg_1"},
+                                   {"hash_list", nullptr, "agg_2"},
+                                   {"hash_list", nullptr, "agg_3"},
+                                   {"hash_list", nullptr, "agg_4"},
+                                   {"hash_list", nullptr, "agg_5"},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -3238,12 +3213,12 @@ TEST(GroupBy, CountAndSum) {
               batch->GetColumnByName("key"),
           },
           {
-              {"hash_count", count_options},
-              {"hash_count", count_nulls},
-              {"hash_count", count_all},
-              {"hash_sum", nullptr},
-              {"hash_sum", min_count},
-              {"hash_sum", nullptr},
+              {"hash_count", count_options, "agg_0"},
+              {"hash_count", count_nulls, "agg_1"},
+              {"hash_count", count_all, "agg_2"},
+              {"hash_sum", nullptr, "agg_3"},
+              {"hash_sum", min_count, "agg_4"},
+              {"hash_sum", nullptr, "agg_5"},
           }));
 
   AssertDatumsEqual(
@@ -3295,9 +3270,9 @@ TEST(GroupBy, Product) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_product", nullptr},
-                               {"hash_product", nullptr},
-                               {"hash_product", min_count},
+                               {"hash_product", nullptr, "agg_0"},
+                               {"hash_product", nullptr, "agg_1"},
+                               {"hash_product", min_count, "agg_2"},
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -3330,7 +3305,7 @@ TEST(GroupBy, Product) {
                                                        batch->GetColumnByName("key"),
                                                    },
                                                    {
-                                                       {"hash_product", nullptr},
+                                                       {"hash_product", nullptr, "agg_0"},
                                                    }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -3374,12 +3349,12 @@ TEST(GroupBy, SumMeanProductKeepNulls) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_sum", keep_nulls},
-                               {"hash_sum", min_count},
-                               {"hash_mean", keep_nulls},
-                               {"hash_mean", min_count},
-                               {"hash_product", keep_nulls},
-                               {"hash_product", min_count},
+                               {"hash_sum", keep_nulls, "agg_0"},
+                               {"hash_sum", min_count, "agg_1"},
+                               {"hash_mean", keep_nulls, "agg_2"},
+                               {"hash_mean", min_count, "agg_3"},
+                               {"hash_product", keep_nulls, "agg_4"},
+                               {"hash_product", min_count, "agg_5"},
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -3423,7 +3398,7 @@ TEST(GroupBy, SumOnlyStringAndDictKeys) {
                          internal::GroupBy({batch->GetColumnByName("argument")},
                                            {batch->GetColumnByName("key")},
                                            {
-                                               {"hash_sum", nullptr},
+                                               {"hash_sum", nullptr, "agg_0"},
                                            }));
     SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -3466,11 +3441,11 @@ TEST(GroupBy, ConcreteCaseWithValidateGroupBy) {
 
   using internal::Aggregate;
   for (auto agg : {
-           Aggregate{"hash_sum", nullptr},
-           Aggregate{"hash_count", non_null},
-           Aggregate{"hash_count", nulls},
-           Aggregate{"hash_min_max", nullptr},
-           Aggregate{"hash_min_max", keepna},
+           Aggregate{"hash_sum", nullptr, "agg_0"},
+           Aggregate{"hash_count", non_null, "agg_1"},
+           Aggregate{"hash_count", nulls, "agg_2"},
+           Aggregate{"hash_min_max", nullptr, "agg_3"},
+           Aggregate{"hash_min_max", keepna, "agg_4"},
        }) {
     SCOPED_TRACE(agg.function);
     ValidateGroupBy({agg}, {batch->GetColumnByName("argument")},
@@ -3494,8 +3469,8 @@ TEST(GroupBy, CountNull) {
 
   using internal::Aggregate;
   for (auto agg : {
-           Aggregate{"hash_count", keepna},
-           Aggregate{"hash_count", skipna},
+           Aggregate{"hash_count", keepna, "agg_0"},
+           Aggregate{"hash_count", skipna, "agg_1"},
        }) {
     SCOPED_TRACE(agg.function);
     ValidateGroupBy({agg}, {batch->GetColumnByName("argument")},
@@ -3519,7 +3494,7 @@ TEST(GroupBy, RandomArraySum) {
 
       ValidateGroupBy(
           {
-              {"hash_sum", options},
+              {"hash_sum", options, "agg_0"},
           },
           {batch->GetColumnByName("argument")}, {batch->GetColumnByName("key")});
     }
@@ -3552,9 +3527,9 @@ TEST(GroupBy, WithChunkedArray) {
                                table->GetColumnByName("key"),
                            },
                            {
-                               {"hash_count", nullptr},
-                               {"hash_sum", nullptr},
-                               {"hash_min_max", nullptr},
+                               {"hash_count", nullptr, "agg_0"},
+                               {"hash_sum", nullptr, "agg_1"},
+                               {"hash_min_max", nullptr, "agg_2"},
                            }));
 
   AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -3590,7 +3565,7 @@ TEST(GroupBy, MinMaxWithNewGroupsInChunkedArray) {
                                table->GetColumnByName("key"),
                            },
                            {
-                               {"hash_min_max", nullptr},
+                               {"hash_min_max", nullptr, "agg_1"},
                            }));
 
   AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -3626,7 +3601,7 @@ TEST(GroupBy, SmallChunkSizeSumOnly) {
                        internal::GroupBy({batch->GetColumnByName("argument")},
                                          {batch->GetColumnByName("key")},
                                          {
-                                             {"hash_sum", nullptr},
+                                             {"hash_sum", nullptr, "agg_0"},
                                          },
                                          small_chunksize_context()));
   AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -3678,9 +3653,9 @@ TEST(GroupBy, CountWithNullType) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_count", all},
-                                   {"hash_count", only_valid},
-                                   {"hash_count", only_null},
+                                   {"hash_count", all, "agg_0"},
+                                   {"hash_count", only_valid, "agg_1"},
+                                   {"hash_count", only_null, "agg_2"},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -3723,9 +3698,9 @@ TEST(GroupBy, CountWithNullTypeEmptyTable) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_count", all},
-                                   {"hash_count", only_valid},
-                                   {"hash_count", only_null},
+                                   {"hash_count", all, "agg_0"},
+                                   {"hash_count", only_valid, "agg_1"},
+                                   {"hash_count", only_null, "agg_2"},
                                },
                                use_threads, use_exec_plan));
       auto struct_arr = aggregated_and_grouped.array_as<StructArray>();
@@ -3768,10 +3743,10 @@ TEST(GroupBy, SingleNullTypeKey) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_count", nullptr},
-                                   {"hash_sum", nullptr},
-                                   {"hash_mean", nullptr},
-                                   {"hash_min_max", nullptr},
+                                   {"hash_count", nullptr, "agg_0"},
+                                   {"hash_sum", nullptr, "agg_1"},
+                                   {"hash_mean", nullptr, "agg_2"},
+                                   {"hash_min_max", nullptr, "agg_3"},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -3828,9 +3803,9 @@ TEST(GroupBy, MultipleKeysIncludesNullType) {
               },
               {table->GetColumnByName("key_0"), table->GetColumnByName("key_1")},
               {
-                  {"hash_count", nullptr},
-                  {"hash_sum", nullptr},
-                  {"hash_min_max", nullptr},
+                  {"hash_count", nullptr, "agg_0"},
+                  {"hash_sum", nullptr, "agg_1"},
+                  {"hash_min_max", nullptr, "agg_2"},
               },
               use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -3899,10 +3874,10 @@ TEST(GroupBy, SumNullType) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_sum", no_min},
-                                   {"hash_sum", keep_nulls},
-                                   {"hash_sum", min_count},
-                                   {"hash_sum", keep_nulls_min_count},
+                                   {"hash_sum", no_min, "agg_0"},
+                                   {"hash_sum", keep_nulls, "agg_1"},
+                                   {"hash_sum", min_count, "agg_2"},
+                                   {"hash_sum", keep_nulls_min_count, "agg_3"},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -3967,10 +3942,10 @@ TEST(GroupBy, ProductNullType) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_product", no_min},
-                                   {"hash_product", keep_nulls},
-                                   {"hash_product", min_count},
-                                   {"hash_product", keep_nulls_min_count},
+                                   {"hash_product", no_min, "agg_0"},
+                                   {"hash_product", keep_nulls, "agg_1"},
+                                   {"hash_product", min_count, "agg_2"},
+                                   {"hash_product", keep_nulls_min_count, "agg_3"},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -4035,10 +4010,10 @@ TEST(GroupBy, MeanNullType) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_mean", no_min},
-                                   {"hash_mean", keep_nulls},
-                                   {"hash_mean", min_count},
-                                   {"hash_mean", keep_nulls_min_count},
+                                   {"hash_mean", no_min, "agg_0"},
+                                   {"hash_mean", keep_nulls, "agg_1"},
+                                   {"hash_mean", min_count, "agg_2"},
+                                   {"hash_mean", keep_nulls_min_count, "agg_3"},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -4087,9 +4062,9 @@ TEST(GroupBy, NullTypeEmptyTable) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_sum", no_min},
-                                   {"hash_product", min_count},
-                                   {"hash_mean", keep_nulls},
+                                   {"hash_sum", no_min, "agg_0"},
+                                   {"hash_product", min_count, "agg_1"},
+                                   {"hash_mean", keep_nulls, "agg_2"},
                                },
                                use_threads, use_exec_plan));
       auto struct_arr = aggregated_and_grouped.array_as<StructArray>();
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 02f658181c0..b19e787f94d 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -676,8 +676,7 @@ Result<int64_t> AsyncScanner::CountRows() {
                                        options}},
               {"project", compute::ProjectNodeOptions{{options->filter}, {"mask"}}},
               {"aggregate", compute::AggregateNodeOptions{{compute::internal::Aggregate{
-                                                              "sum", nullptr}},
-                                                          /*targets=*/{"mask"},
+                                                              "sum", nullptr, "mask"}},
                                                           /*names=*/{"selected_count"}}},
               {"sink", compute::SinkNodeOptions{&sink_gen}},
           })
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index b7dcb8b18d2..1d3e0b403ef 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1837,11 +1837,10 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) {
   // pipe the projection into a scalar aggregate node
   ASSERT_OK_AND_ASSIGN(
       compute::ExecNode * aggregate,
-      compute::MakeExecNode(
-          "aggregate", plan.get(), {project},
-          compute::AggregateNodeOptions{{compute::internal::Aggregate{"sum", nullptr}},
-                                        /*targets=*/{"a * 2"},
-                                        /*names=*/{"sum(a * 2)"}}));
+      compute::MakeExecNode("aggregate", plan.get(), {project},
+                            compute::AggregateNodeOptions{
+                                {compute::internal::Aggregate{"sum", nullptr, "a * 2"}},
+                                /*names=*/{"sum(a * 2)"}}));
 
   // finally, pipe the aggregate node into a sink node
   AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen;
@@ -1927,12 +1926,12 @@ TEST(ScanNode, MinimalGroupedAggEndToEnd) {
   // pipe the projection into a grouped aggregate node
   ASSERT_OK_AND_ASSIGN(
       compute::ExecNode * aggregate,
-      compute::MakeExecNode("aggregate", plan.get(), {project},
-                            compute::AggregateNodeOptions{
-                                {compute::internal::Aggregate{"hash_sum", nullptr}},
-                                /*targets=*/{"a * 2"},
-                                /*names=*/{"sum(a * 2)"},
-                                /*keys=*/{"b"}}));
+      compute::MakeExecNode(
+          "aggregate", plan.get(), {project},
+          compute::AggregateNodeOptions{
+              {compute::internal::Aggregate{"hash_sum", nullptr, "a * 2"}},
+              /*names=*/{"sum(a * 2)"},
+              /*keys=*/{"b"}}));
 
   // finally, pipe the aggregate node into a sink node
   AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen;

From 97b0848f29fc279bb78812fa45d122e3bc82ef34 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Thu, 19 May 2022 12:43:27 +0530
Subject: [PATCH 07/32] adding modifications to R bindings (wip-test)

---
 r/R/arrowExports.R     |  5 +++--
 r/R/query-engine.R     | 17 ++++++++++-------
 r/src/arrowExports.cpp |  9 ++++-----
 r/src/compute-exec.cpp | 27 +++++++++++++++++++--------
 4 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 4c579840e49..53c0f8ac86b 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -432,8 +432,8 @@ ExecNode_Project <- function(input, exprs, names) {
   .Call(`_arrow_ExecNode_Project`, input, exprs, names)
 }
 
-ExecNode_Aggregate <- function(input, options, target_names, out_field_names, key_names) {
-  .Call(`_arrow_ExecNode_Aggregate`, input, options, target_names, out_field_names, key_names)
+ExecNode_Aggregate <- function(input, options, out_field_names, key_names) {
+  .Call(`_arrow_ExecNode_Aggregate`, input, options, out_field_names, key_names)
 }
 
 ExecNode_Join <- function(input, type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right) {
@@ -2007,3 +2007,4 @@ SetIOThreadPoolCapacity <- function(threads) {
 Array__infer_type <- function(x) {
   .Call(`_arrow_Array__infer_type`, x)
 }
+
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index c40c61e98a7..2c2d96063bd 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -74,7 +74,8 @@ ExecPlan <- R6Class("ExecPlan",
       grouped <- length(group_vars) > 0
 
       # Collect the target names first because we have to add back the group vars
-      target_names <- names(.data)
+      # TODO : remove upon discussion
+      # target_names <- names(.data)
       .data <- ensure_group_vars(.data)
       .data <- ensure_arrange_vars(.data) # this sets .data$temp_columns
 
@@ -114,10 +115,13 @@ ExecPlan <- R6Class("ExecPlan",
             x
           })
         }
+        target_names <- names(.data$aggregations);
+        for (i in seq_len(length(target_names))) {
+            .data$aggregations[[i]][["target"]] <- target_names[i]
+        }
 
         node <- node$Aggregate(
-          options = map(.data$aggregations, ~ .[c("fun", "options")]),
-          target_names = names(.data$aggregations),
+          options = map(.data$aggregations, ~ .[c("fun", "options", "target")]),
           out_field_names = names(.data$aggregations),
           key_names = group_vars
         )
@@ -179,7 +183,6 @@ ExecPlan <- R6Class("ExecPlan",
           temp_columns = names(.data$temp_columns)
         )
       }
-
       # This is only safe because we are going to evaluate queries that end
       # with head/tail first, then evaluate any subsequent query as a new query
       if (!is.null(.data$head)) {
@@ -304,9 +307,9 @@ ExecNode <- R6Class("ExecNode",
       assert_is(expr, "Expression")
       self$preserve_extras(ExecNode_Filter(self, expr))
     },
-    Aggregate = function(options, target_names, out_field_names, key_names) {
-      out <- self$preserve_extras(
-        ExecNode_Aggregate(self, options, target_names, out_field_names, key_names)
+    Aggregate = function(options, key_names) {
+      self$preserve_sort(
+        ExecNode_Aggregate(self, options, key_names)
       )
       # dplyr drops top-level attributes when you call summarize()
       out$extras$source_schema$metadata[["r"]]$attributes <- NULL
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 887327d48f9..3bfed5f0af7 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -964,15 +964,14 @@ BEGIN_CPP11
 END_CPP11
 }
 // compute-exec.cpp
-std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(const std::shared_ptr<compute::ExecNode>& input, cpp11::list options, std::vector<std::string> target_names, std::vector<std::string> out_field_names, std::vector<std::string> key_names);
-extern "C" SEXP _arrow_ExecNode_Aggregate(SEXP input_sexp, SEXP options_sexp, SEXP target_names_sexp, SEXP out_field_names_sexp, SEXP key_names_sexp){
+std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(const std::shared_ptr<compute::ExecNode>& input, cpp11::list options, std::vector<std::string> out_field_names, std::vector<std::string> key_names);
+extern "C" SEXP _arrow_ExecNode_Aggregate(SEXP input_sexp, SEXP options_sexp, SEXP out_field_names_sexp, SEXP key_names_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
 	arrow::r::Input<cpp11::list>::type options(options_sexp);
-	arrow::r::Input<std::vector<std::string>>::type target_names(target_names_sexp);
 	arrow::r::Input<std::vector<std::string>>::type out_field_names(out_field_names_sexp);
 	arrow::r::Input<std::vector<std::string>>::type key_names(key_names_sexp);
-	return cpp11::as_sexp(ExecNode_Aggregate(input, options, target_names, out_field_names, key_names));
+	return cpp11::as_sexp(ExecNode_Aggregate(input, options, out_field_names, key_names));
 END_CPP11
 }
 // compute-exec.cpp
@@ -5222,7 +5221,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ExecPlan_Write", (DL_FUNC) &_arrow_ExecPlan_Write, 14}, 
 		{ "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, 
 		{ "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, 
-		{ "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 5}, 
+		{ "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 4}, 
 		{ "_arrow_ExecNode_Join", (DL_FUNC) &_arrow_ExecNode_Join, 9}, 
 		{ "_arrow_ExecNode_Union", (DL_FUNC) &_arrow_ExecNode_Union, 2}, 
 		{ "_arrow_ExecNode_SourceNode", (DL_FUNC) &_arrow_ExecNode_SourceNode, 2}, 
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index 089d1e71eba..addea6efa54 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -228,28 +228,39 @@ std::shared_ptr<compute::ExecNode> ExecNode_Project(
 // [[arrow::export]]
 std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(
     const std::shared_ptr<compute::ExecNode>& input, cpp11::list options,
-    std::vector<std::string> target_names, std::vector<std::string> out_field_names,
+    std::vector<std::string> out_field_names,
     std::vector<std::string> key_names) {
   std::vector<arrow::compute::internal::Aggregate> aggregates;
 
+  //   aggregates.push_back(
+  //       arrow::compute::internal::Aggregate{std::move(name), opts.get()});
+  //   keep_alives.push_back(std::move(opts));
+  // }
   for (cpp11::list name_opts : options) {
     auto name = cpp11::as_cpp<std::string>(name_opts[0]);
     auto opts = make_compute_options(name, name_opts[1]);
-
+    auto target = cpp11::as_cpp<std::string>(name_opts[2]);
+    
     aggregates.push_back(
-        arrow::compute::internal::Aggregate{std::move(name), std::move(opts)});
+        arrow::compute::internal::Aggregate{std::move(name), opts, std::move(target)});
+    keep_alives.push_back(std::move(opts));
   }
 
-  std::vector<arrow::FieldRef> targets, keys;
-  for (auto&& name : target_names) {
-    targets.emplace_back(std::move(name));
-  }
+  // std::vector<arrow::FieldRef> targets, keys;
+  // for (auto&& name : target_names) {
+  //   targets.emplace_back(std::move(name));
+  // }
+  std::vector<arrow::FieldRef> keys;
   for (auto&& name : key_names) {
     keys.emplace_back(std::move(name));
   }
+  // return MakeExecNodeOrStop(
+  //     "aggregate", input->plan(), {input.get()},
+  //     compute::AggregateNodeOptions{std::move(aggregates), std::move(targets),
+  //                                   std::move(out_field_names), std::move(keys)});
   return MakeExecNodeOrStop(
       "aggregate", input->plan(), {input.get()},
-      compute::AggregateNodeOptions{std::move(aggregates), std::move(targets),
+      compute::AggregateNodeOptions{std::move(aggregates),
                                     std::move(out_field_names), std::move(keys)});
 }
 

From 93196d23982a6bab41c649cc41cc61514b8449af Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Thu, 19 May 2022 20:46:17 +0530
Subject: [PATCH 08/32] reformat file and clean up

---
 r/src/compute-exec.cpp | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index addea6efa54..e4438d08d28 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -228,14 +228,9 @@ std::shared_ptr<compute::ExecNode> ExecNode_Project(
 // [[arrow::export]]
 std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(
     const std::shared_ptr<compute::ExecNode>& input, cpp11::list options,
-    std::vector<std::string> out_field_names,
-    std::vector<std::string> key_names) {
+    std::vector<std::string> out_field_names, std::vector<std::string> key_names) {
   std::vector<arrow::compute::internal::Aggregate> aggregates;
 
-  //   aggregates.push_back(
-  //       arrow::compute::internal::Aggregate{std::move(name), opts.get()});
-  //   keep_alives.push_back(std::move(opts));
-  // }
   for (cpp11::list name_opts : options) {
     auto name = cpp11::as_cpp<std::string>(name_opts[0]);
     auto opts = make_compute_options(name, name_opts[1]);
@@ -246,22 +241,14 @@ std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(
     keep_alives.push_back(std::move(opts));
   }
 
-  // std::vector<arrow::FieldRef> targets, keys;
-  // for (auto&& name : target_names) {
-  //   targets.emplace_back(std::move(name));
-  // }
   std::vector<arrow::FieldRef> keys;
   for (auto&& name : key_names) {
     keys.emplace_back(std::move(name));
   }
-  // return MakeExecNodeOrStop(
-  //     "aggregate", input->plan(), {input.get()},
-  //     compute::AggregateNodeOptions{std::move(aggregates), std::move(targets),
-  //                                   std::move(out_field_names), std::move(keys)});
   return MakeExecNodeOrStop(
       "aggregate", input->plan(), {input.get()},
-      compute::AggregateNodeOptions{std::move(aggregates),
-                                    std::move(out_field_names), std::move(keys)});
+      compute::AggregateNodeOptions{std::move(aggregates), std::move(out_field_names),
+                                    std::move(keys)});
 }
 
 // [[arrow::export]]

From 7bd02343afd77cf613e5e5a271ba0c5523ec5857 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Thu, 19 May 2022 23:08:53 +0530
Subject: [PATCH 09/32] [c++] inclusion of name within aggregation

---
 .../execution_plan_documentation_examples.cc  |   6 +-
 cpp/src/arrow/compute/api_aggregate.h         |   3 +
 cpp/src/arrow/compute/exec/aggregate_node.cc  |   7 +-
 cpp/src/arrow/compute/exec/ir_consumer.cc     |   4 +-
 cpp/src/arrow/compute/exec/ir_test.cc         |  49 +-
 cpp/src/arrow/compute/exec/options.h          |   8 +-
 cpp/src/arrow/compute/exec/plan_test.cc       | 232 +++---
 cpp/src/arrow/compute/exec/test_util.cc       |  10 +-
 .../compute/kernels/hash_aggregate_test.cc    | 754 +++++++++---------
 cpp/src/arrow/dataset/scanner.cc              |   3 +-
 cpp/src/arrow/dataset/scanner_test.cc         |   8 +-
 11 files changed, 538 insertions(+), 546 deletions(-)

diff --git a/cpp/examples/arrow/execution_plan_documentation_examples.cc b/cpp/examples/arrow/execution_plan_documentation_examples.cc
index 3063431b5f9..44bff56e00c 100644
--- a/cpp/examples/arrow/execution_plan_documentation_examples.cc
+++ b/cpp/examples/arrow/execution_plan_documentation_examples.cc
@@ -503,8 +503,7 @@ arrow::Status SourceScalarAggregateSinkExample(cp::ExecContext& exec_context) {
   ARROW_ASSIGN_OR_RAISE(cp::ExecNode * source,
                         cp::MakeExecNode("source", plan.get(), {}, source_node_options));
   auto aggregate_options =
-      cp::AggregateNodeOptions{/*aggregates=*/{{"sum", nullptr, "a"}},
-                               /*names=*/{"sum(a)"}};
+      cp::AggregateNodeOptions{/*aggregates=*/{{"sum", nullptr, "a", "sum(a)"}}};
   ARROW_ASSIGN_OR_RAISE(
       cp::ExecNode * aggregate,
       cp::MakeExecNode("aggregate", plan.get(), {source}, std::move(aggregate_options)));
@@ -541,8 +540,7 @@ arrow::Status SourceGroupAggregateSinkExample(cp::ExecContext& exec_context) {
                         cp::MakeExecNode("source", plan.get(), {}, source_node_options));
   auto options = std::make_shared<cp::CountOptions>(cp::CountOptions::ONLY_VALID);
   auto aggregate_options =
-      cp::AggregateNodeOptions{/*aggregates=*/{{"hash_count", &options, "a"}},
-                               /*names=*/{"count(a)"},
+      cp::AggregateNodeOptions{/*aggregates=*/{{"hash_count", &options, "a", "count(a)"}},
                                /*keys=*/{"b"}};
   ARROW_ASSIGN_OR_RAISE(
       cp::ExecNode * aggregate,
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index ff994178fc1..9cd06424299 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -405,6 +405,9 @@ struct ARROW_EXPORT Aggregate {
 
   // fields to which aggregations will be applied
   FieldRef target;
+
+  // output field name for aggregations
+  std::string name;
 };
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/exec/aggregate_node.cc b/cpp/src/arrow/compute/exec/aggregate_node.cc
index ace6d9aa0ee..3632f2da0b6 100644
--- a/cpp/src/arrow/compute/exec/aggregate_node.cc
+++ b/cpp/src/arrow/compute/exec/aggregate_node.cc
@@ -89,7 +89,6 @@ class ScalarAggregateNode : public ExecNode {
     std::vector<const ScalarAggregateKernel*> kernels(aggregates.size());
     std::vector<std::vector<std::unique_ptr<KernelState>>> states(kernels.size());
     FieldVector fields(kernels.size());
-    const auto& field_names = aggregate_options.names;
     std::vector<int> target_field_ids(kernels.size());
 
     for (size_t i = 0; i < kernels.size(); ++i) {
@@ -130,7 +129,7 @@ class ScalarAggregateNode : public ExecNode {
       ARROW_ASSIGN_OR_RAISE(
           auto descr, kernels[i]->signature->out_type().Resolve(&kernel_ctx, {in_type}));
 
-      fields[i] = field(field_names[i], std::move(descr.type));
+      fields[i] = field(aggregate_options.aggregates[i].name, std::move(descr.type));
     }
 
     return plan->EmplaceNode<ScalarAggregateNode>(
@@ -296,7 +295,6 @@ class GroupByNode : public ExecNode {
     const auto& keys = aggregate_options.keys;
     // Copy (need to modify options pointer below)
     auto aggs = aggregate_options.aggregates;
-    const auto& field_names = aggregate_options.names;
 
     // Get input schema
     auto input_schema = input->output_schema();
@@ -341,7 +339,8 @@ class GroupByNode : public ExecNode {
 
     // Aggregate fields come before key fields to match the behavior of GroupBy function
     for (size_t i = 0; i < aggs.size(); ++i) {
-      output_fields[i] = agg_result_fields[i]->WithName(field_names[i]);
+      output_fields[i] =
+          agg_result_fields[i]->WithName(aggregate_options.aggregates[i].name);
     }
     size_t base = aggs.size();
     for (size_t i = 0; i < keys.size(); ++i) {
diff --git a/cpp/src/arrow/compute/exec/ir_consumer.cc b/cpp/src/arrow/compute/exec/ir_consumer.cc
index 4f539c91fca..f17dbf1ed79 100644
--- a/cpp/src/arrow/compute/exec/ir_consumer.cc
+++ b/cpp/src/arrow/compute/exec/ir_consumer.cc
@@ -550,8 +550,8 @@ Result<Declaration> Convert(const ir::Relation& rel) {
               "Support for non-FieldRef arguments to Aggregate.measures");
         }
 
-        opts.aggregates.push_back({call->function_name, nullptr, *target});
-        opts.names.push_back(call->function_name + " " + target->ToString());
+        opts.aggregates.push_back({call->function_name, nullptr, *target,
+                                   call->function_name + " " + target->ToString()});
       }
 
       if (!aggregate->groupings()) return UnexpectedNullField("Aggregate.groupings");
diff --git a/cpp/src/arrow/compute/exec/ir_test.cc b/cpp/src/arrow/compute/exec/ir_test.cc
index 9f3e3098cc0..d7eb37c185e 100644
--- a/cpp/src/arrow/compute/exec/ir_test.cc
+++ b/cpp/src/arrow/compute/exec/ir_test.cc
@@ -249,7 +249,8 @@ TEST(Relation, Filter) {
 }
 
 TEST(Relation, AggregateSimple) {
-  ASSERT_THAT(ConvertJSON<ir::Relation>(R"({
+  ASSERT_THAT(
+      ConvertJSON<ir::Relation>(R"({
             "impl": {
                 id: {id: 1},
                 "groupings": [
@@ -347,27 +348,22 @@ TEST(Relation, AggregateSimple) {
             },
             "impl_type": "Aggregate"
 })"),
-              ResultWith(Eq(Declaration::Sequence({
-                  {"catalog_source",
-                   CatalogSourceNodeOptions{"tbl", schema({
-                                                       field("foo", int32()),
-                                                       field("bar", int64()),
-                                                       field("baz", float64()),
-                                                   })},
-                   "0"},
-                  {"aggregate",
-                   AggregateNodeOptions{/*aggregates=*/{
-                                            {"sum", nullptr, 1},
-                                            {"mean", nullptr, 2},
-                                        },
-                                        /*names=*/
-                                        {
-                                            "sum FieldRef.FieldPath(1)",
-                                            "mean FieldRef.FieldPath(2)",
-                                        },
-                                        /*keys=*/{0}},
-                   "1"},
-              }))));
+      ResultWith(Eq(Declaration::Sequence({
+          {"catalog_source",
+           CatalogSourceNodeOptions{"tbl", schema({
+                                               field("foo", int32()),
+                                               field("bar", int64()),
+                                               field("baz", float64()),
+                                           })},
+           "0"},
+          {"aggregate",
+           AggregateNodeOptions{/*aggregates=*/{
+                                    {"sum", nullptr, 1, "sum FieldRef.FieldPath(1)"},
+                                    {"mean", nullptr, 2, "mean FieldRef.FieldPath(2)"},
+                                },
+                                /*keys=*/{0}},
+           "1"},
+      }))));
 }
 
 TEST(Relation, AggregateWithHaving) {
@@ -563,13 +559,8 @@ TEST(Relation, AggregateWithHaving) {
           {"filter", FilterNodeOptions{less(field_ref(0), literal<int8_t>(3))}, "1"},
           {"aggregate",
            AggregateNodeOptions{/*aggregates=*/{
-                                    {"sum", nullptr, 1},
-                                    {"mean", nullptr, 2},
-                                },
-                                /*names=*/
-                                {
-                                    "sum FieldRef.FieldPath(1)",
-                                    "mean FieldRef.FieldPath(2)",
+                                    {"sum", nullptr, 1, "sum FieldRef.FieldPath(1)"},
+                                    {"mean", nullptr, 2, "mean FieldRef.FieldPath(2)"},
                                 },
                                 /*keys=*/{0}},
            "2"},
diff --git a/cpp/src/arrow/compute/exec/options.h b/cpp/src/arrow/compute/exec/options.h
index 7132bd64784..84254ed5ad9 100644
--- a/cpp/src/arrow/compute/exec/options.h
+++ b/cpp/src/arrow/compute/exec/options.h
@@ -112,15 +112,11 @@ class ARROW_EXPORT ProjectNodeOptions : public ExecNodeOptions {
 class ARROW_EXPORT AggregateNodeOptions : public ExecNodeOptions {
  public:
   AggregateNodeOptions(std::vector<internal::Aggregate> aggregates,
-                       std::vector<std::string> names, std::vector<FieldRef> keys = {})
-      : aggregates(std::move(aggregates)),
-        names(std::move(names)),
-        keys(std::move(keys)) {}
+                       std::vector<FieldRef> keys = {})
+      : aggregates(std::move(aggregates)), keys(std::move(keys)) {}
 
   // aggregations which will be applied to the targetted fields
   std::vector<internal::Aggregate> aggregates;
-  // output field names for aggregations
-  std::vector<std::string> names;
   // keys by which aggregations will be grouped
   std::vector<FieldRef> keys;
 };
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index 4d1905fc8e7..4ffee8bdc13 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -391,9 +391,10 @@ TEST(ExecPlan, ToString) {
                           }}},
               {"aggregate",
                AggregateNodeOptions{
-                   /*aggregates=*/{{"hash_sum", nullptr, "multiply(i32, 2)"},
-                                   {"hash_count", options, "multiply(i32, 2)"}},
-                   /*names=*/{"sum(multiply(i32, 2))", "count(multiply(i32, 2))"},
+                   /*aggregates=*/{
+                       {"hash_sum", nullptr, "multiply(i32, 2)", "sum(multiply(i32, 2))"},
+                       {"hash_count", options, "multiply(i32, 2)",
+                        "count(multiply(i32, 2))"}},
                    /*keys=*/{"bool"}}},
               {"filter", FilterNodeOptions{greater(field_ref("sum(multiply(i32, 2))"),
                                                    literal(10))}},
@@ -429,16 +430,16 @@ custom_sink_label:OrderBySinkNode{by={sort_keys=[FieldRef.Name(sum(multiply(i32,
   rhs.label = "rhs";
   union_node.inputs.emplace_back(lhs);
   union_node.inputs.emplace_back(rhs);
-  ASSERT_OK(Declaration::Sequence(
-                {
-                    union_node,
-                    {"aggregate",
-                     AggregateNodeOptions{/*aggregates=*/{{"count", options, "i32"}},
-                                          /*names=*/{"count(i32)"},
-                                          /*keys=*/{}}},
-                    {"sink", SinkNodeOptions{&sink_gen}},
-                })
-                .AddToPlan(plan.get()));
+  ASSERT_OK(
+      Declaration::Sequence(
+          {
+              union_node,
+              {"aggregate", AggregateNodeOptions{
+                                /*aggregates=*/{{"count", options, "i32", "count(i32)"}},
+                                /*keys=*/{}}},
+              {"sink", SinkNodeOptions{&sink_gen}},
+          })
+          .AddToPlan(plan.get()));
   EXPECT_EQ(plan->ToString(), R"a(ExecPlan with 5 nodes:
 :SinkNode{}
   :ScalarAggregateNode{aggregates=[
@@ -764,17 +765,17 @@ TEST(ExecPlanExecution, StressSourceGroupedSumStop) {
       auto random_data = MakeRandomBatches(input_schema, num_batches);
 
       SortOptions options({SortKey("a", SortOrder::Ascending)});
-      ASSERT_OK(Declaration::Sequence(
-                    {
-                        {"source", SourceNodeOptions{random_data.schema,
-                                                     random_data.gen(parallel, slow)}},
-                        {"aggregate",
-                         AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr, "a"}},
-                                              /*names=*/{"sum(a)"},
-                                              /*keys=*/{"b"}}},
-                        {"sink", SinkNodeOptions{&sink_gen}},
-                    })
-                    .AddToPlan(plan.get()));
+      ASSERT_OK(
+          Declaration::Sequence(
+              {
+                  {"source", SourceNodeOptions{random_data.schema,
+                                               random_data.gen(parallel, slow)}},
+                  {"aggregate", AggregateNodeOptions{
+                                    /*aggregates=*/{{"hash_sum", nullptr, "a", "sum(a)"}},
+                                    /*keys=*/{"b"}}},
+                  {"sink", SinkNodeOptions{&sink_gen}},
+              })
+              .AddToPlan(plan.get()));
 
       ASSERT_OK(plan->Validate());
       ASSERT_OK(plan->StartProducing());
@@ -912,17 +913,17 @@ TEST(ExecPlanExecution, SourceGroupedSum) {
     ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
     AsyncGenerator<util::optional<ExecBatch>> sink_gen;
 
-    ASSERT_OK(Declaration::Sequence(
-                  {
-                      {"source", SourceNodeOptions{input.schema,
-                                                   input.gen(parallel, /*slow=*/false)}},
-                      {"aggregate",
-                       AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr, "i32"}},
-                                            /*names=*/{"sum(i32)"},
-                                            /*keys=*/{"str"}}},
-                      {"sink", SinkNodeOptions{&sink_gen}},
-                  })
-                  .AddToPlan(plan.get()));
+    ASSERT_OK(
+        Declaration::Sequence(
+            {
+                {"source",
+                 SourceNodeOptions{input.schema, input.gen(parallel, /*slow=*/false)}},
+                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr,
+                                                                    "i32", "sum(i32)"}},
+                                                   /*keys=*/{"str"}}},
+                {"sink", SinkNodeOptions{&sink_gen}},
+            })
+            .AddToPlan(plan.get()));
 
     ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
                 Finishes(ResultWith(UnorderedElementsAreArray({ExecBatchFromJSON(
@@ -986,10 +987,9 @@ TEST(ExecPlanExecution, NestedSourceProjectGroupedSum) {
                                                    field_ref(FieldRef("struct", "bool")),
                                                },
                                                {"i32", "bool"}}},
-                {"aggregate",
-                 AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr, "i32"}},
-                                      /*names=*/{"sum(i32)"},
-                                      /*keys=*/{"bool"}}},
+                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr,
+                                                                    "i32", "sum(i32)"}},
+                                                   /*keys=*/{"bool"}}},
                 {"sink", SinkNodeOptions{&sink_gen}},
             })
             .AddToPlan(plan.get()));
@@ -1020,10 +1020,11 @@ TEST(ExecPlanExecution, SourceFilterProjectGroupedSumFilter) {
                                 field_ref("str"),
                                 call("multiply", {field_ref("i32"), literal(2)}),
                             }}},
-                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr,
-                                                                    "multiply(i32, 2)"}},
-                                                   /*names=*/{"sum(multiply(i32, 2))"},
-                                                   /*keys=*/{"str"}}},
+                {"aggregate",
+                 AggregateNodeOptions{
+                     /*aggregates=*/{{"hash_sum", nullptr, "multiply(i32, 2)",
+                                      "sum(multiply(i32, 2))"}},
+                     /*keys=*/{"str"}}},
                 {"filter", FilterNodeOptions{greater(field_ref("sum(multiply(i32, 2))"),
                                                      literal(10 * batch_multiplicity))}},
                 {"sink", SinkNodeOptions{&sink_gen}},
@@ -1059,10 +1060,11 @@ TEST(ExecPlanExecution, SourceFilterProjectGroupedSumOrderBy) {
                                 field_ref("str"),
                                 call("multiply", {field_ref("i32"), literal(2)}),
                             }}},
-                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr,
-                                                                    "multiply(i32, 2)"}},
-                                                   /*names=*/{"sum(multiply(i32, 2))"},
-                                                   /*keys=*/{"str"}}},
+                {"aggregate",
+                 AggregateNodeOptions{
+                     /*aggregates=*/{{"hash_sum", nullptr, "multiply(i32, 2)",
+                                      "sum(multiply(i32, 2))"}},
+                     /*keys=*/{"str"}}},
                 {"filter", FilterNodeOptions{greater(field_ref("sum(multiply(i32, 2))"),
                                                      literal(10 * batch_multiplicity))}},
                 {"order_by_sink", OrderBySinkNodeOptions{options, &sink_gen}},
@@ -1087,22 +1089,22 @@ TEST(ExecPlanExecution, SourceFilterProjectGroupedSumTopK) {
     AsyncGenerator<util::optional<ExecBatch>> sink_gen;
 
     SelectKOptions options = SelectKOptions::TopKDefault(/*k=*/1, {"str"});
-    ASSERT_OK(
-        Declaration::Sequence(
-            {
-                {"source",
-                 SourceNodeOptions{input.schema, input.gen(parallel, /*slow=*/false)}},
-                {"project", ProjectNodeOptions{{
-                                field_ref("str"),
-                                call("multiply", {field_ref("i32"), literal(2)}),
-                            }}},
-                {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr,
-                                                                    "multiply(i32, 2)"}},
-                                                   /*names=*/{"sum(multiply(i32, 2))"},
-                                                   /*keys=*/{"str"}}},
-                {"select_k_sink", SelectKSinkNodeOptions{options, &sink_gen}},
-            })
-            .AddToPlan(plan.get()));
+    ASSERT_OK(Declaration::Sequence(
+                  {
+                      {"source", SourceNodeOptions{input.schema,
+                                                   input.gen(parallel, /*slow=*/false)}},
+                      {"project", ProjectNodeOptions{{
+                                      field_ref("str"),
+                                      call("multiply", {field_ref("i32"), literal(2)}),
+                                  }}},
+                      {"aggregate",
+                       AggregateNodeOptions{
+                           /*aggregates=*/{{"hash_sum", nullptr, "multiply(i32, 2)",
+                                            "sum(multiply(i32, 2))"}},
+                           /*keys=*/{"str"}}},
+                      {"select_k_sink", SelectKSinkNodeOptions{options, &sink_gen}},
+                  })
+                  .AddToPlan(plan.get()));
 
     ASSERT_THAT(
         StartAndCollect(plan.get(), sink_gen),
@@ -1117,18 +1119,19 @@ TEST(ExecPlanExecution, SourceScalarAggSink) {
 
   auto basic_data = MakeBasicBatches();
 
-  ASSERT_OK(Declaration::Sequence(
-                {
-                    {"source", SourceNodeOptions{basic_data.schema,
-                                                 basic_data.gen(/*parallel=*/false,
-                                                                /*slow=*/false)}},
-                    {"aggregate",
-                     AggregateNodeOptions{/*aggregates=*/{{"sum", nullptr, "i32"},
-                                                          {"any", nullptr, "bool"}},
-                                          /*names=*/{"sum(i32)", "any(bool)"}}},
-                    {"sink", SinkNodeOptions{&sink_gen}},
-                })
-                .AddToPlan(plan.get()));
+  ASSERT_OK(
+      Declaration::Sequence(
+          {
+              {"source",
+               SourceNodeOptions{basic_data.schema, basic_data.gen(/*parallel=*/false,
+                                                                   /*slow=*/false)}},
+              {"aggregate", AggregateNodeOptions{
+                                /*aggregates=*/{{"sum", nullptr, "i32", "sum(i32)"},
+                                                {"any", nullptr, "bool", "any(bool)"}},
+                            }},
+              {"sink", SinkNodeOptions{&sink_gen}},
+          })
+          .AddToPlan(plan.get()));
 
   ASSERT_THAT(
       StartAndCollect(plan.get(), sink_gen),
@@ -1155,9 +1158,10 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
                   {"source",
                    SourceNodeOptions{basic_data.schema, basic_data.gen(/*parallel=*/false,
                                                                        /*slow=*/false)}},
-                  {"aggregate", AggregateNodeOptions{
-                                    /*aggregates=*/{{"tdigest", options, "i32"}},
-                                    /*names=*/{"tdigest(i32)"}}},
+                  {"aggregate",
+                   AggregateNodeOptions{
+                       /*aggregates=*/{{"tdigest", options, "i32", "tdigest(i32)"}},
+                   }},
                   {"sink", SinkNodeOptions{&sink_gen}},
               })
               .AddToPlan(plan.get()));
@@ -1181,10 +1185,10 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
               {
                   {"source", SourceNodeOptions{data.schema, data.gen(/*parallel=*/false,
                                                                      /*slow=*/false)}},
-                  {"aggregate", AggregateNodeOptions{
-                                    /*aggregates=*/{{"hash_count", options, "i32"}},
-                                    /*names=*/{"count(i32)"},
-                                    /*keys=*/{"str"}}},
+                  {"aggregate",
+                   AggregateNodeOptions{/*aggregates=*/{{"hash_count", options,
+                                                         "i32", "count(i32)"}},
+                                        /*keys=*/{"str"}}},
                   {"sink", SinkNodeOptions{&sink_gen}},
               })
               .AddToPlan(plan.get()));
@@ -1213,28 +1217,24 @@ TEST(ExecPlanExecution, ScalarSourceScalarAggSink) {
 
   // index can't be tested as it's order-dependent
   // mode/quantile can't be tested as they're technically vector kernels
-  ASSERT_OK(
-      Declaration::Sequence(
-          {
-              {"source",
-               SourceNodeOptions{scalar_data.schema, scalar_data.gen(/*parallel=*/false,
-                                                                     /*slow=*/false)}},
-              {"aggregate", AggregateNodeOptions{
-                                /*aggregates=*/{{"all", nullptr, "b"},
-                                                {"any", nullptr, "b"},
-                                                {"count", nullptr, "a"},
-                                                {"mean", nullptr, "a"},
-                                                {"product", nullptr, "a"},
-                                                {"stddev", nullptr, "a"},
-                                                {"sum", nullptr, "a"},
-                                                {"tdigest", nullptr, "a"},
-                                                {"variance", nullptr, "a"}},
-                                /*names=*/
-                                {"all(b)", "any(b)", "count(a)", "mean(a)", "product(a)",
-                                 "stddev(a)", "sum(a)", "tdigest(a)", "variance(a)"}}},
-              {"sink", SinkNodeOptions{&sink_gen}},
-          })
-          .AddToPlan(plan.get()));
+  ASSERT_OK(Declaration::Sequence(
+                {
+                    {"source", SourceNodeOptions{scalar_data.schema,
+                                                 scalar_data.gen(/*parallel=*/false,
+                                                                 /*slow=*/false)}},
+                    {"aggregate", AggregateNodeOptions{/*aggregates=*/{
+                                      {"all", nullptr, "b", "all(b)"},
+                                      {"any", nullptr, "b", "any(b)"},
+                                      {"count", nullptr, "a", "count(a)"},
+                                      {"mean", nullptr, "a", "mean(a)"},
+                                      {"product", nullptr, "a", "product(a)"},
+                                      {"stddev", nullptr, "a", "stddev(a)"},
+                                      {"sum", nullptr, "a", "sum(a)"},
+                                      {"tdigest", nullptr, "a", "tdigest(a)"},
+                                      {"variance", nullptr, "a", "variance(a)"}}}},
+                    {"sink", SinkNodeOptions{&sink_gen}},
+                })
+                .AddToPlan(plan.get()));
 
   ASSERT_THAT(
       StartAndCollect(plan.get(), sink_gen),
@@ -1264,18 +1264,18 @@ TEST(ExecPlanExecution, ScalarSourceGroupedSum) {
   scalar_data.schema = schema({field("a", int32()), field("b", boolean())});
 
   SortOptions options({SortKey("b", SortOrder::Descending)});
-  ASSERT_OK(Declaration::Sequence(
-                {
-                    {"source", SourceNodeOptions{scalar_data.schema,
-                                                 scalar_data.gen(/*parallel=*/false,
-                                                                 /*slow=*/false)}},
-                    {"aggregate",
-                     AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr, "a"}},
-                                          /*names=*/{"hash_sum(a)"},
-                                          /*keys=*/{"b"}}},
-                    {"order_by_sink", OrderBySinkNodeOptions{options, &sink_gen}},
-                })
-                .AddToPlan(plan.get()));
+  ASSERT_OK(
+      Declaration::Sequence(
+          {
+              {"source",
+               SourceNodeOptions{scalar_data.schema, scalar_data.gen(/*parallel=*/false,
+                                                                     /*slow=*/false)}},
+              {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_sum", nullptr,
+                                                                  "a", "hash_sum(a)"}},
+                                                 /*keys=*/{"b"}}},
+              {"order_by_sink", OrderBySinkNodeOptions{options, &sink_gen}},
+          })
+          .AddToPlan(plan.get()));
 
   ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
               Finishes(ResultWith(UnorderedElementsAreArray({
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
index e336b245bb7..762bf320f03 100644
--- a/cpp/src/arrow/compute/exec/test_util.cc
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -315,9 +315,10 @@ bool operator==(const Declaration& l, const Declaration& r) {
       if (!l_agg->options->Equals(*r_agg->options)) return false;
 
       if (l_agg->target != r_agg->target) return false;
+      if (l_agg->name != r_agg->name) return false;
     }
 
-    return l_opts->names == r_opts->names && l_opts->keys == r_opts->keys;
+    return l_opts->keys == r_opts->keys;
   }
 
   if (l.factory_name == "order_by_sink") {
@@ -380,16 +381,11 @@ static inline void PrintToImpl(const std::string& factory_name,
       *os << agg.function << "<";
       if (agg.options) PrintTo(*agg.options, os);
       *os << agg.target.ToString() << "<";
+      *os << agg.name << "<";
       *os << ">,";
     }
     *os << "},";
 
-    *os << "names={";
-    for (const auto& name : o->names) {
-      *os << name << ",";
-    }
-    *os << "}";
-
     if (!o->keys.empty()) {
       *os << ",keys={";
       for (const auto& key : o->keys) {
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 51c7aea1f37..0c3e219d439 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -126,10 +126,6 @@ Result<Datum> GroupByUsingExecPlan(const BatchesWithSchema& input,
                                    const std::vector<internal::Aggregate>& aggregates,
                                    bool use_threads, ExecContext* ctx) {
   std::vector<FieldRef> keys(key_names.size());
-  std::vector<std::string> names(aggregates.size());
-  for (size_t i = 0; i < aggregates.size(); ++i) {
-    names[i] = aggregates[i].function;
-  }
   for (size_t i = 0; i < key_names.size(); ++i) {
     keys[i] = FieldRef(key_names[i]);
   }
@@ -141,8 +137,7 @@ Result<Datum> GroupByUsingExecPlan(const BatchesWithSchema& input,
           {
               {"source",
                SourceNodeOptions{input.schema, input.gen(use_threads, /*slow=*/false)}},
-              {"aggregate", AggregateNodeOptions{std::move(aggregates), std::move(names),
-                                                 std::move(keys)}},
+              {"aggregate", AggregateNodeOptions{std::move(aggregates), std::move(keys)}},
               {"sink", SinkNodeOptions{&sink_gen}},
           })
           .AddToPlan(plan.get()));
@@ -750,7 +745,7 @@ TEST(GroupBy, NoBatches) {
       Datum aggregated_and_grouped,
       GroupByTest({table->GetColumnByName("argument")}, {table->GetColumnByName("key")},
                   {
-                      {"hash_count", nullptr, {"agg_0"}},
+                      {"hash_count", nullptr, "agg_0", "hash_count"},
                   },
                   /*use_threads=*/true, /*use_exec_plan=*/true));
   AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -815,7 +810,7 @@ TEST(GroupBy, CountOnly) {
                            GroupByTest({table->GetColumnByName("argument")},
                                        {table->GetColumnByName("key")},
                                        {
-                                           {"hash_count", nullptr, "agg_0"},
+                                           {"hash_count", nullptr, "agg_0", "hash_count"},
                                        },
                                        use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -852,14 +847,15 @@ TEST(GroupBy, CountScalar) {
   auto count_all = std::make_shared<CountOptions>(CountOptions::ALL);
   for (bool use_threads : {true, false}) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-    ASSERT_OK_AND_ASSIGN(Datum actual,
-                         GroupByUsingExecPlan(input, {"key"},
-                                              {
-                                                  {"hash_count", skip_nulls, "argument"},
-                                                  {"hash_count", keep_nulls, "argument"},
-                                                  {"hash_count", count_all, "argument"},
-                                              },
-                                              use_threads, default_exec_context()));
+    ASSERT_OK_AND_ASSIGN(
+        Datum actual,
+        GroupByUsingExecPlan(input, {"key"},
+                             {
+                                 {"hash_count", skip_nulls, "argument", "hash_count"},
+                                 {"hash_count", keep_nulls, "argument", "hash_count"},
+                                 {"hash_count", count_all, "argument", "hash_count"},
+                             },
+                             use_threads, default_exec_context()));
     Datum expected = ArrayFromJSON(struct_({
                                        field("hash_count", int64()),
                                        field("hash_count", int64()),
@@ -902,7 +898,7 @@ TEST(GroupBy, SumOnly) {
                            GroupByTest({table->GetColumnByName("argument")},
                                        {table->GetColumnByName("key")},
                                        {
-                                           {"hash_sum", nullptr, "agg_0"},
+                                           {"hash_sum", nullptr, "agg_0", "hash_sum"},
                                        },
                                        use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -967,12 +963,12 @@ TEST(GroupBy, SumMeanProductDecimal) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_sum", nullptr, "agg_0"},
-                                   {"hash_sum", nullptr, "agg_1"},
-                                   {"hash_mean", nullptr, "agg_2"},
-                                   {"hash_mean", nullptr, "agg_3"},
-                                   {"hash_product", nullptr, "agg_4"},
-                                   {"hash_product", nullptr, "agg_5"},
+                                   {"hash_sum", nullptr, "agg_0", "hash_sum"},
+                                   {"hash_sum", nullptr, "agg_1", "hash_sum"},
+                                   {"hash_mean", nullptr, "agg_2", "hash_mean"},
+                                   {"hash_mean", nullptr, "agg_3", "hash_mean"},
+                                   {"hash_product", nullptr, "agg_4", "hash_product"},
+                                   {"hash_product", nullptr, "agg_5", "hash_product"},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -1021,17 +1017,17 @@ TEST(GroupBy, MeanOnly) {
     [null,  3]
                         ])"});
 
-    auto min_count =
-        std::make_shared<ScalarAggregateOptions>(/*skip_nulls=*/true, /*min_count=*/3);
-    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                         internal::GroupBy({table->GetColumnByName("argument"),
-                                            table->GetColumnByName("argument")},
-                                           {table->GetColumnByName("key")},
-                                           {
-                                               {"hash_mean", nullptr, "agg_0"},
-                                               {"hash_mean", min_count, "agg_1"},
-                                           },
-                                           use_threads));
+    ScalarAggregateOptions min_count(/*skip_nulls=*/true, /*min_count=*/3);
+    ASSERT_OK_AND_ASSIGN(
+        Datum aggregated_and_grouped,
+        internal::GroupBy(
+            {table->GetColumnByName("argument"), table->GetColumnByName("argument")},
+            {table->GetColumnByName("key")},
+            {
+                {"hash_mean", nullptr, "agg_0", "hash_mean"},
+                {"hash_mean", min_count, "agg_1", "hash_mean"},
+            },
+            use_threads));
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -1063,14 +1059,15 @@ TEST(GroupBy, SumMeanProductScalar) {
 
   for (bool use_threads : {true, false}) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-    ASSERT_OK_AND_ASSIGN(Datum actual,
-                         GroupByUsingExecPlan(input, {"key"},
-                                              {
-                                                  {"hash_sum", nullptr, "argument"},
-                                                  {"hash_mean", nullptr, "argument"},
-                                                  {"hash_product", nullptr, "argument"},
-                                              },
-                                              use_threads, default_exec_context()));
+    ASSERT_OK_AND_ASSIGN(
+        Datum actual,
+        GroupByUsingExecPlan(input, {"key"},
+                             {
+                                 {"hash_sum", nullptr, "argument", "hash_sum"},
+                                 {"hash_mean", nullptr, "argument", "hash_mean"},
+                                 {"hash_product", nullptr, "argument", "hash_product"},
+                             },
+                             use_threads, default_exec_context()));
     Datum expected = ArrayFromJSON(struct_({
                                        field("hash_sum", int64()),
                                        field("hash_mean", float64()),
@@ -1111,8 +1108,8 @@ TEST(GroupBy, VarianceAndStddev) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_variance", nullptr, "agg_0"},
-                               {"hash_stddev", nullptr, "agg_1"},
+                               {"hash_variance", nullptr, "agg_0", "hash_variance_0"},
+                               {"hash_stddev", nullptr, "agg_1", "hash_stddev_1"},
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -1153,8 +1150,8 @@ TEST(GroupBy, VarianceAndStddev) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_variance", nullptr, "agg_0"},
-                               {"hash_stddev", nullptr, "agg_1"},
+                               {"hash_variance", nullptr, "agg_0", "hash_variance_0"},
+                               {"hash_stddev", nullptr, "agg_1", "hash_stddev_1"},
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -1172,20 +1169,21 @@ TEST(GroupBy, VarianceAndStddev) {
                           /*verbose=*/true);
 
   // Test ddof
-  auto variance_options = std::make_shared<VarianceOptions>(/*ddof=*/2);
-  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped,
-                       internal::GroupBy(
-                           {
-                               batch->GetColumnByName("argument"),
-                               batch->GetColumnByName("argument"),
-                           },
-                           {
-                               batch->GetColumnByName("key"),
-                           },
-                           {
-                               {"hash_variance", variance_options, "agg_0"},
-                               {"hash_stddev", variance_options, "agg_1"},
-                           }));
+  VarianceOptions variance_options(/*ddof=*/2);
+  ASSERT_OK_AND_ASSIGN(
+      aggregated_and_grouped,
+      internal::GroupBy(
+          {
+              batch->GetColumnByName("argument"),
+              batch->GetColumnByName("argument"),
+          },
+          {
+              batch->GetColumnByName("key"),
+          },
+          {
+              {"hash_variance", variance_options, "agg_0", "hash_variance_0"},
+              {"hash_stddev", variance_options, "agg_1", "hash_stddev_0"},
+          }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
                                             field("hash_variance", float64()),
@@ -1229,10 +1227,10 @@ TEST(GroupBy, VarianceAndStddevDecimal) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_variance", nullptr, "agg_0"},
-                               {"hash_stddev", nullptr, "agg_1"},
-                               {"hash_variance", nullptr, "agg_2"},
-                               {"hash_stddev", nullptr, "agg_3"},
+                               {"hash_variance", nullptr, "agg_0", "hash_variance_0"},
+                               {"hash_stddev", nullptr, "agg_1", "hash_stddev_1"},
+                               {"hash_variance", nullptr, "agg_2", "hash_variance_2"},
+                               {"hash_stddev", nullptr, "agg_3", "hash_stddev_3"},
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -1270,40 +1268,37 @@ TEST(GroupBy, TDigest) {
     [null, 4]
   ])");
 
-  auto options1 = std::make_shared<TDigestOptions>(std::vector<double>{0.5, 0.9, 0.99});
-  auto options2 =
-      std::make_shared<TDigestOptions>(std::vector<double>{0.5, 0.9, 0.99}, /*delta=*/50,
-                                       /*buffer_size=*/1024);
-  auto keep_nulls =
-      std::make_shared<TDigestOptions>(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
-                                       /*skip_nulls=*/false, /*min_count=*/0);
-  auto min_count =
-      std::make_shared<TDigestOptions>(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
-                                       /*skip_nulls=*/true, /*min_count=*/3);
-  auto keep_nulls_min_count =
-      std::make_shared<TDigestOptions>(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
-                                       /*skip_nulls=*/false, /*min_count=*/3);
-  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                       internal::GroupBy(
-                           {
-                               batch->GetColumnByName("argument"),
-                               batch->GetColumnByName("argument"),
-                               batch->GetColumnByName("argument"),
-                               batch->GetColumnByName("argument"),
-                               batch->GetColumnByName("argument"),
-                               batch->GetColumnByName("argument"),
-                           },
-                           {
-                               batch->GetColumnByName("key"),
-                           },
-                           {
-                               {"hash_tdigest", nullptr, "agg_0"},
-                               {"hash_tdigest", options1, "agg_1"},
-                               {"hash_tdigest", options2, "agg_2"},
-                               {"hash_tdigest", keep_nulls, "agg_3"},
-                               {"hash_tdigest", min_count, "agg_4"},
-                               {"hash_tdigest", keep_nulls_min_count, "agg_5"},
-                           }));
+  TDigestOptions options1(std::vector<double>{0.5, 0.9, 0.99});
+  TDigestOptions options2(std::vector<double>{0.5, 0.9, 0.99}, /*delta=*/50,
+                          /*buffer_size=*/1024);
+  TDigestOptions keep_nulls(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
+                            /*skip_nulls=*/false, /*min_count=*/0);
+  TDigestOptions min_count(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
+                           /*skip_nulls=*/true, /*min_count=*/3);
+  TDigestOptions keep_nulls_min_count(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
+                                      /*skip_nulls=*/false, /*min_count=*/3);
+  ASSERT_OK_AND_ASSIGN(
+      Datum aggregated_and_grouped,
+      internal::GroupBy(
+          {
+              batch->GetColumnByName("argument"),
+              batch->GetColumnByName("argument"),
+              batch->GetColumnByName("argument"),
+              batch->GetColumnByName("argument"),
+              batch->GetColumnByName("argument"),
+              batch->GetColumnByName("argument"),
+          },
+          {
+              batch->GetColumnByName("key"),
+          },
+          {
+              {"hash_tdigest", nullptr, "agg_0", "hash_tdigest_0"},
+              {"hash_tdigest", options1, "agg_1", "hash_tdigest_1"},
+              {"hash_tdigest", options2, "agg_2", "hash_tdigest_2"},
+              {"hash_tdigest", keep_nulls, "agg_3", "hash_tdigest_3"},
+              {"hash_tdigest", min_count, "agg_4", "hash_tdigest_4"},
+              {"hash_tdigest", keep_nulls_min_count, "agg_5", "hash_tdigest_5"},
+          }));
 
   AssertDatumsApproxEqual(
       ArrayFromJSON(struct_({
@@ -1349,8 +1344,8 @@ TEST(GroupBy, TDigestDecimal) {
                            },
                            {batch->GetColumnByName("key")},
                            {
-                               {"hash_tdigest", nullptr, "agg_0"},
-                               {"hash_tdigest", nullptr, "agg_1"},
+                               {"hash_tdigest", nullptr, "agg_0", "hash_tdigest_0"},
+                               {"hash_tdigest", nullptr, "agg_1", "hash_tdigest_1"},
                            }));
 
   AssertDatumsApproxEqual(
@@ -1395,24 +1390,27 @@ TEST(GroupBy, ApproximateMedian) {
         /*skip_nulls=*/true, /*min_count=*/3);
     auto keep_nulls_min_count = std::make_shared<ScalarAggregateOptions>(
         /*skip_nulls=*/false, /*min_count=*/3);
-    ASSERT_OK_AND_ASSIGN(
-        Datum aggregated_and_grouped,
-        internal::GroupBy(
-            {
-                batch->GetColumnByName("argument"),
-                batch->GetColumnByName("argument"),
-                batch->GetColumnByName("argument"),
-                batch->GetColumnByName("argument"),
-            },
-            {
-                batch->GetColumnByName("key"),
-            },
-            {
-                {"hash_approximate_median", options, "agg_0"},
-                {"hash_approximate_median", keep_nulls, "agg_1"},
-                {"hash_approximate_median", min_count, "agg_2"},
-                {"hash_approximate_median", keep_nulls_min_count, "agg_3"},
-            }));
+    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                         internal::GroupBy(
+                             {
+                                 batch->GetColumnByName("argument"),
+                                 batch->GetColumnByName("argument"),
+                                 batch->GetColumnByName("argument"),
+                                 batch->GetColumnByName("argument"),
+                             },
+                             {
+                                 batch->GetColumnByName("key"),
+                             },
+                             {
+                                 {"hash_approximate_median", options, "agg_0",
+                                  "hash_approximate_median_0"},
+                                 {"hash_approximate_median", keep_nulls, "agg_1",
+                                  "hash_approximate_median_1"},
+                                 {"hash_approximate_median", min_count, "agg_2",
+                                  "hash_approximate_median_2"},
+                                 {"hash_approximate_median", keep_nulls_min_count,
+                                  "agg_3", "hash_approximate_median_3"},
+                             }));
 
     AssertDatumsApproxEqual(ArrayFromJSON(struct_({
                                               field("hash_approximate_median", float64()),
@@ -1450,17 +1448,18 @@ TEST(GroupBy, StddevVarianceTDigestScalar) {
 
   for (bool use_threads : {false}) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-    ASSERT_OK_AND_ASSIGN(Datum actual,
-                         GroupByUsingExecPlan(input, {"key"},
-                                              {
-                                                  {"hash_stddev", nullptr, "argument"},
-                                                  {"hash_variance", nullptr, "argument"},
-                                                  {"hash_tdigest", nullptr, "argument"},
-                                                  {"hash_stddev", nullptr, "argument1"},
-                                                  {"hash_variance", nullptr, "argument1"},
-                                                  {"hash_tdigest", nullptr, "argument1"},
-                                              },
-                                              use_threads, default_exec_context()));
+    ASSERT_OK_AND_ASSIGN(
+        Datum actual,
+        GroupByUsingExecPlan(input, {"key"},
+                             {
+                                 {"hash_stddev", nullptr, "argument", "hash_stddev"},
+                                 {"hash_variance", nullptr, "argument", "hash_variance"},
+                                 {"hash_tdigest", nullptr, "argument", "hash_tdigest"},
+                                 {"hash_stddev", nullptr, "argument1", "hash_stddev"},
+                                 {"hash_variance", nullptr, "argument1", "hash_variance"},
+                                 {"hash_tdigest", nullptr, "argument1", "hash_tdigest"},
+                             },
+                             use_threads, default_exec_context()));
     Datum expected =
         ArrayFromJSON(struct_({
                           field("hash_stddev", float64()),
@@ -1510,16 +1509,17 @@ TEST(GroupBy, VarianceOptions) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
     ASSERT_OK_AND_ASSIGN(
         Datum actual,
-        GroupByUsingExecPlan(input, {"key"},
-                             {
-                                 {"hash_stddev", keep_nulls, "argument"},
-                                 {"hash_stddev", min_count, "argument"},
-                                 {"hash_stddev", keep_nulls_min_count, "argument"},
-                                 {"hash_variance", keep_nulls, "argument"},
-                                 {"hash_variance", min_count, "argument"},
-                                 {"hash_variance", keep_nulls_min_count, "argument"},
-                             },
-                             use_threads, default_exec_context()));
+        GroupByUsingExecPlan(
+            input, {"key"},
+            {
+                {"hash_stddev", keep_nulls, "argument", "hash_stddev"},
+                {"hash_stddev", min_count, "argument", "hash_stddev"},
+                {"hash_stddev", keep_nulls_min_count, "argument", "hash_stddev"},
+                {"hash_variance", keep_nulls, "argument", "hash_variance"},
+                {"hash_variance", min_count, "argument", "hash_variance"},
+                {"hash_variance", keep_nulls_min_count, "argument", "hash_variance"},
+            },
+            use_threads, default_exec_context()));
     Datum expected = ArrayFromJSON(struct_({
                                        field("hash_stddev", float64()),
                                        field("hash_stddev", float64()),
@@ -1540,16 +1540,17 @@ TEST(GroupBy, VarianceOptions) {
 
     ASSERT_OK_AND_ASSIGN(
         actual,
-        GroupByUsingExecPlan(input, {"key"},
-                             {
-                                 {"hash_stddev", keep_nulls, "argument1"},
-                                 {"hash_stddev", min_count, "argument1"},
-                                 {"hash_stddev", keep_nulls_min_count, "argument1"},
-                                 {"hash_variance", keep_nulls, "argument1"},
-                                 {"hash_variance", min_count, "argument1"},
-                                 {"hash_variance", keep_nulls_min_count, "argument1"},
-                             },
-                             use_threads, default_exec_context()));
+        GroupByUsingExecPlan(
+            input, {"key"},
+            {
+                {"hash_stddev", keep_nulls, "argument1", "hash_stddev"},
+                {"hash_stddev", min_count, "argument1", "hash_stddev"},
+                {"hash_stddev", keep_nulls_min_count, "argument1", "hash_stddev"},
+                {"hash_variance", keep_nulls, "argument1", "hash_variance"},
+                {"hash_variance", min_count, "argument1", "hash_variance"},
+                {"hash_variance", keep_nulls_min_count, "argument1", "hash_variance"},
+            },
+            use_threads, default_exec_context()));
     expected = ArrayFromJSON(struct_({
                                  field("hash_stddev", float64()),
                                  field("hash_stddev", float64()),
@@ -1607,9 +1608,9 @@ TEST(GroupBy, MinMaxOnly) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_min_max", nullptr, "agg_0"},
-                                   {"hash_min_max", nullptr, "agg_1"},
-                                   {"hash_min_max", nullptr, "agg_2"},
+                                   {"hash_min_max", nullptr, "agg_0", "hash_min_max"},
+                                   {"hash_min_max", nullptr, "agg_1", "hash_min_max"},
+                                   {"hash_min_max", nullptr, "agg_2", "hash_min_max"},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -1711,11 +1712,11 @@ TEST(GroupBy, MinMaxTypes) {
     auto table =
         TableFromJSON(in_schema, (ty->name() == "date64") ? date64_table : default_table);
 
-    ASSERT_OK_AND_ASSIGN(
-        Datum aggregated_and_grouped,
-        GroupByTest({table->GetColumnByName("argument0")},
-                    {table->GetColumnByName("key")}, {{"hash_min_max", nullptr, "agg_0"}},
-                    /*use_threads=*/true, /*use_exec_plan=*/true));
+    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                         GroupByTest({table->GetColumnByName("argument0")},
+                                     {table->GetColumnByName("key")},
+                                     {{"hash_min_max", nullptr, "agg_0", "hash_min_max"}},
+                                     /*use_threads=*/true, /*use_exec_plan=*/true));
     ValidateOutput(aggregated_and_grouped);
     SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -1768,8 +1769,8 @@ TEST(GroupBy, MinMaxDecimal) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_min_max", nullptr, "agg_0"},
-                                   {"hash_min_max", nullptr, "agg_1"},
+                                   {"hash_min_max", nullptr, "agg_0", "hash_min_max"},
+                                   {"hash_min_max", nullptr, "agg_1", "hash_min_max"},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -1827,11 +1828,12 @@ TEST(GroupBy, MinMaxBinary) {
     [null,    3]
 ])"});
 
-        ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                             GroupByTest({table->GetColumnByName("argument0")},
-                                         {table->GetColumnByName("key")},
-                                         {{"hash_min_max", nullptr, "agg_0"}},
-                                         use_threads, use_exec_plan));
+        ASSERT_OK_AND_ASSIGN(
+            Datum aggregated_and_grouped,
+            GroupByTest({table->GetColumnByName("argument0")},
+                        {table->GetColumnByName("key")},
+                        {{"hash_min_max", nullptr, "agg_0", "hash_min_max"}}, use_threads,
+                        use_exec_plan));
         ValidateOutput(aggregated_and_grouped);
         SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -1885,7 +1887,8 @@ TEST(GroupBy, MinMaxFixedSizeBinary) {
           Datum aggregated_and_grouped,
           GroupByTest({table->GetColumnByName("argument0")},
                       {table->GetColumnByName("key")},
-                      {{"hash_min_max", nullptr, "agg_0"}}, use_threads, use_exec_plan));
+                      {{"hash_min_max", nullptr, "agg_0", "hash_min_max"}}, use_threads,
+                      use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
       SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -1938,8 +1941,8 @@ TEST(GroupBy, MinOrMax) {
                                     table->GetColumnByName("argument")},
                                    {table->GetColumnByName("key")},
                                    {
-                                       {"hash_min", nullptr, "agg_0"},
-                                       {"hash_max", nullptr, "agg_1"},
+                                       {"hash_min", nullptr, "agg_0", "hash_min"},
+                                       {"hash_max", nullptr, "agg_1", "hash_max"},
                                    },
                                    /*use_threads=*/true, /*use_exec_plan=*/true));
   SortBy({"key_0"}, &aggregated_and_grouped);
@@ -1975,7 +1978,8 @@ TEST(GroupBy, MinMaxScalar) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
     ASSERT_OK_AND_ASSIGN(
         Datum actual,
-        GroupByUsingExecPlan(input, {"key"}, {{"hash_min_max", nullptr, "argument"}},
+        GroupByUsingExecPlan(input, {"key"},
+                             {{"hash_min_max", nullptr, "argument", "hash_min_max"}},
                              use_threads, default_exec_context()));
     Datum expected =
         ArrayFromJSON(struct_({
@@ -2039,14 +2043,14 @@ TEST(GroupBy, AnyAndAll) {
                              },
                              {table->GetColumnByName("key")},
                              {
-                                 {"hash_any", no_min, "agg_0"},
-                                 {"hash_any", min_count, "agg_1"},
-                                 {"hash_any", keep_nulls, "agg_2"},
-                                 {"hash_any", keep_nulls_min_count, "agg_3"},
-                                 {"hash_all", no_min, "agg_4"},
-                                 {"hash_all", min_count, "agg_5"},
-                                 {"hash_all", keep_nulls, "agg_6"},
-                                 {"hash_all", keep_nulls_min_count, "agg_7"},
+                                 {"hash_any", no_min, "agg_0", "hash_any"},
+                                 {"hash_any", min_count, "agg_1", "hash_any"},
+                                 {"hash_any", keep_nulls, "agg_2", "hash_any"},
+                                 {"hash_any", keep_nulls_min_count, "agg_3", "hash_any"},
+                                 {"hash_all", no_min, "agg_4", "hash_all"},
+                                 {"hash_all", min_count, "agg_5", "hash_all"},
+                                 {"hash_all", keep_nulls, "agg_6", "hash_all"},
+                                 {"hash_all", keep_nulls_min_count, "agg_7", "hash_all"},
                              },
                              use_threads));
     SortBy({"key_0"}, &aggregated_and_grouped);
@@ -2096,15 +2100,16 @@ TEST(GroupBy, AnyAllScalar) {
       std::make_shared<ScalarAggregateOptions>(/*skip_nulls=*/false, /*min_count=*/0);
   for (bool use_threads : {true, false}) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-    ASSERT_OK_AND_ASSIGN(Datum actual,
-                         GroupByUsingExecPlan(input, {"key"},
-                                              {
-                                                  {"hash_any", nullptr, "argument"},
-                                                  {"hash_all", nullptr, "argument"},
-                                                  {"hash_any", keep_nulls, "argument"},
-                                                  {"hash_all", keep_nulls, "argument"},
-                                              },
-                                              use_threads, default_exec_context()));
+    ASSERT_OK_AND_ASSIGN(
+        Datum actual,
+        GroupByUsingExecPlan(input, {"key"},
+                             {
+                                 {"hash_any", nullptr, "argument", "hash_any"},
+                                 {"hash_all", nullptr, "argument", "hash_all"},
+                                 {"hash_any", keep_nulls, "argument", "hash_any"},
+                                 {"hash_all", keep_nulls, "argument", "hash_all"},
+                             },
+                             use_threads, default_exec_context()));
     Datum expected = ArrayFromJSON(struct_({
                                        field("hash_any", boolean()),
                                        field("hash_all", boolean()),
@@ -2159,22 +2164,23 @@ TEST(GroupBy, CountDistinct) {
     [3,    null]
   ])"});
 
-    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                         internal::GroupBy(
-                             {
-                                 table->GetColumnByName("argument"),
-                                 table->GetColumnByName("argument"),
-                                 table->GetColumnByName("argument"),
-                             },
-                             {
-                                 table->GetColumnByName("key"),
-                             },
-                             {
-                                 {"hash_count_distinct", all, "agg_0"},
-                                 {"hash_count_distinct", only_valid, "agg_1"},
-                                 {"hash_count_distinct", only_null, "agg_2"},
-                             },
-                             use_threads));
+    ASSERT_OK_AND_ASSIGN(
+        Datum aggregated_and_grouped,
+        internal::GroupBy(
+            {
+                table->GetColumnByName("argument"),
+                table->GetColumnByName("argument"),
+                table->GetColumnByName("argument"),
+            },
+            {
+                table->GetColumnByName("key"),
+            },
+            {
+                {"hash_count_distinct", all, "agg_0", "hash_count_distinct"},
+                {"hash_count_distinct", only_valid, "agg_1", "hash_count_distinct"},
+                {"hash_count_distinct", only_null, "agg_2", "hash_count_distinct"},
+            },
+            use_threads));
     SortBy({"key_0"}, &aggregated_and_grouped);
     ValidateOutput(aggregated_and_grouped);
 
@@ -2225,22 +2231,23 @@ TEST(GroupBy, CountDistinct) {
     ["b",    null]
   ])"});
 
-    ASSERT_OK_AND_ASSIGN(aggregated_and_grouped,
-                         internal::GroupBy(
-                             {
-                                 table->GetColumnByName("argument"),
-                                 table->GetColumnByName("argument"),
-                                 table->GetColumnByName("argument"),
-                             },
-                             {
-                                 table->GetColumnByName("key"),
-                             },
-                             {
-                                 {"hash_count_distinct", all, "agg_0"},
-                                 {"hash_count_distinct", only_valid, "agg_1"},
-                                 {"hash_count_distinct", only_null, "agg_2"},
-                             },
-                             use_threads));
+    ASSERT_OK_AND_ASSIGN(
+        aggregated_and_grouped,
+        internal::GroupBy(
+            {
+                table->GetColumnByName("argument"),
+                table->GetColumnByName("argument"),
+                table->GetColumnByName("argument"),
+            },
+            {
+                table->GetColumnByName("key"),
+            },
+            {
+                {"hash_count_distinct", all, "agg_0", "hash_count_distinct"},
+                {"hash_count_distinct", only_valid, "agg_1", "hash_count_distinct"},
+                {"hash_count_distinct", only_null, "agg_2", "hash_count_distinct"},
+            },
+            use_threads));
     ValidateOutput(aggregated_and_grouped);
     SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -2271,22 +2278,23 @@ TEST(GroupBy, CountDistinct) {
 ])",
                                                                                   });
 
-    ASSERT_OK_AND_ASSIGN(aggregated_and_grouped,
-                         internal::GroupBy(
-                             {
-                                 table->GetColumnByName("argument"),
-                                 table->GetColumnByName("argument"),
-                                 table->GetColumnByName("argument"),
-                             },
-                             {
-                                 table->GetColumnByName("key"),
-                             },
-                             {
-                                 {"hash_count_distinct", all, "agg_0"},
-                                 {"hash_count_distinct", only_valid, "agg_1"},
-                                 {"hash_count_distinct", only_null, "agg_2"},
-                             },
-                             use_threads));
+    ASSERT_OK_AND_ASSIGN(
+        aggregated_and_grouped,
+        internal::GroupBy(
+            {
+                table->GetColumnByName("argument"),
+                table->GetColumnByName("argument"),
+                table->GetColumnByName("argument"),
+            },
+            {
+                table->GetColumnByName("key"),
+            },
+            {
+                {"hash_count_distinct", all, "agg_0", "hash_count_distinct"},
+                {"hash_count_distinct", only_valid, "agg_1", "hash_count_distinct"},
+                {"hash_count_distinct", only_null, "agg_2", "hash_count_distinct"},
+            },
+            use_threads));
     ValidateOutput(aggregated_and_grouped);
     SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -2354,9 +2362,9 @@ TEST(GroupBy, Distinct) {
                                  table->GetColumnByName("key"),
                              },
                              {
-                                 {"hash_distinct", all, "agg_0"},
-                                 {"hash_distinct", only_valid, "agg_1"},
-                                 {"hash_distinct", only_null, "agg_2"},
+                                 {"hash_distinct", all, "agg_0", "hash_distinct"},
+                                 {"hash_distinct", only_valid, "agg_1", "hash_distinct"},
+                                 {"hash_distinct", only_null, "agg_2", "hash_distinct"},
                              },
                              use_threads));
     ValidateOutput(aggregated_and_grouped);
@@ -2427,9 +2435,9 @@ TEST(GroupBy, Distinct) {
                                  table->GetColumnByName("key"),
                              },
                              {
-                                 {"hash_distinct", all, "agg_0"},
-                                 {"hash_distinct", only_valid, "agg_1"},
-                                 {"hash_distinct", only_null, "agg_2"},
+                                 {"hash_distinct", all, "agg_0", "hash_distinct"},
+                                 {"hash_distinct", only_valid, "agg_1", "hash_distinct"},
+                                 {"hash_distinct", only_null, "agg_2", "hash_distinct"},
                              },
                              use_threads));
     ValidateOutput(aggregated_and_grouped);
@@ -2491,12 +2499,12 @@ TEST(GroupBy, OneMiscTypes) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_one", nullptr, "agg_0"},
-                                   {"hash_one", nullptr, "agg_1"},
-                                   {"hash_one", nullptr, "agg_2"},
-                                   {"hash_one", nullptr, "agg_3"},
-                                   {"hash_one", nullptr, "agg_4"},
-                                   {"hash_one", nullptr, "agg_5"},
+                                   {"hash_one", nullptr, "agg_0", "hash_one"},
+                                   {"hash_one", nullptr, "agg_1", "hash_one"},
+                                   {"hash_one", nullptr, "agg_2", "hash_one"},
+                                   {"hash_one", nullptr, "agg_3", "hash_one"},
+                                   {"hash_one", nullptr, "agg_4", "hash_one"},
+                                   {"hash_one", nullptr, "agg_5", "hash_one"},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -2619,11 +2627,11 @@ TEST(GroupBy, OneNumericTypes) {
         auto table =
             TableFromJSON(in_schema, (type->name() == "date64") ? temporal_table_json
                                                                 : numeric_table_json);
-        ASSERT_OK_AND_ASSIGN(
-            Datum aggregated_and_grouped,
-            GroupByTest({table->GetColumnByName("argument0")},
-                        {table->GetColumnByName("key")}, {{"hash_one", nullptr, "agg_0"}},
-                        use_threads, use_exec_plan));
+        ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                             GroupByTest({table->GetColumnByName("argument0")},
+                                         {table->GetColumnByName("key")},
+                                         {{"hash_one", nullptr, "agg_0", "hash_one"}},
+                                         use_threads, use_exec_plan));
         ValidateOutput(aggregated_and_grouped);
         SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -2682,11 +2690,11 @@ TEST(GroupBy, OneBinaryTypes) {
     [null,  3]
 ])"});
 
-        ASSERT_OK_AND_ASSIGN(
-            Datum aggregated_and_grouped,
-            GroupByTest({table->GetColumnByName("argument0")},
-                        {table->GetColumnByName("key")}, {{"hash_one", nullptr, "agg_0"}},
-                        use_threads, use_exec_plan));
+        ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                             GroupByTest({table->GetColumnByName("argument0")},
+                                         {table->GetColumnByName("key")},
+                                         {{"hash_one", nullptr, "agg_0", "hash_one"}},
+                                         use_threads, use_exec_plan));
         ValidateOutput(aggregated_and_grouped);
         SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -2718,9 +2726,9 @@ TEST(GroupBy, OneScalar) {
   for (bool use_threads : {true, false}) {
     SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
     ASSERT_OK_AND_ASSIGN(
-        Datum actual,
-        GroupByUsingExecPlan(input, {"key"}, {{"hash_one", nullptr, "argument"}},
-                             use_threads, default_exec_context()));
+        Datum actual, GroupByUsingExecPlan(
+                          input, {"key"}, {{"hash_one", nullptr, "argument", "hash_one"}},
+                          use_threads, default_exec_context()));
 
     const auto& struct_arr = actual.array_as<StructArray>();
     //  Check the key column
@@ -2780,7 +2788,7 @@ TEST(GroupBy, ListNumeric) {
                                      table->GetColumnByName("key"),
                                  },
                                  {
-                                     {"hash_list", nullptr, "agg_0"},
+                                     {"hash_list", nullptr, "agg_0", "hash_list"},
                                  },
                                  use_threads));
         ValidateOutput(aggregated_and_grouped);
@@ -2851,7 +2859,7 @@ TEST(GroupBy, ListNumeric) {
                                      table->GetColumnByName("key"),
                                  },
                                  {
-                                     {"hash_list", nullptr, "agg_0"},
+                                     {"hash_list", nullptr, "agg_0", "hash_list"},
                                  },
                                  use_threads));
         ValidateOutput(aggregated_and_grouped);
@@ -2920,7 +2928,7 @@ TEST(GroupBy, ListBinaryTypes) {
                                      table->GetColumnByName("key"),
                                  },
                                  {
-                                     {"hash_list", nullptr, "agg_0"},
+                                     {"hash_list", nullptr, "agg_0", "hash_list"},
                                  },
                                  use_threads));
         ValidateOutput(aggregated_and_grouped);
@@ -2982,7 +2990,7 @@ TEST(GroupBy, ListBinaryTypes) {
                                      table->GetColumnByName("key"),
                                  },
                                  {
-                                     {"hash_list", nullptr, "agg_0"},
+                                     {"hash_list", nullptr, "agg_0", "hash_list"},
                                  },
                                  use_threads));
         ValidateOutput(aggregated_and_grouped);
@@ -3060,12 +3068,12 @@ TEST(GroupBy, ListMiscTypes) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_list", nullptr, "agg_0"},
-                                   {"hash_list", nullptr, "agg_1"},
-                                   {"hash_list", nullptr, "agg_2"},
-                                   {"hash_list", nullptr, "agg_3"},
-                                   {"hash_list", nullptr, "agg_4"},
-                                   {"hash_list", nullptr, "agg_5"},
+                                   {"hash_list", nullptr, "agg_0", "hash_list"},
+                                   {"hash_list", nullptr, "agg_1", "hash_list"},
+                                   {"hash_list", nullptr, "agg_2", "hash_list"},
+                                   {"hash_list", nullptr, "agg_3", "hash_list"},
+                                   {"hash_list", nullptr, "agg_4", "hash_list"},
+                                   {"hash_list", nullptr, "agg_5", "hash_list"},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -3213,12 +3221,12 @@ TEST(GroupBy, CountAndSum) {
               batch->GetColumnByName("key"),
           },
           {
-              {"hash_count", count_options, "agg_0"},
-              {"hash_count", count_nulls, "agg_1"},
-              {"hash_count", count_all, "agg_2"},
-              {"hash_sum", nullptr, "agg_3"},
-              {"hash_sum", min_count, "agg_4"},
-              {"hash_sum", nullptr, "agg_5"},
+              {"hash_count", count_options, "agg_0", "hash_count"},
+              {"hash_count", count_nulls, "agg_1", "hash_count"},
+              {"hash_count", count_all, "agg_2", "hash_count"},
+              {"hash_sum", nullptr, "agg_3", "hash_sum"},
+              {"hash_sum", min_count, "agg_4", "hash_sum"},
+              {"hash_sum", nullptr, "agg_5", "hash_sum"},
           }));
 
   AssertDatumsEqual(
@@ -3270,9 +3278,9 @@ TEST(GroupBy, Product) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_product", nullptr, "agg_0"},
-                               {"hash_product", nullptr, "agg_1"},
-                               {"hash_product", min_count, "agg_2"},
+                               {"hash_product", nullptr, "agg_0", "hash_product"},
+                               {"hash_product", nullptr, "agg_1", "hash_product"},
+                               {"hash_product", min_count, "agg_2", "hash_product"},
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -3297,16 +3305,17 @@ TEST(GroupBy, Product) {
     [8589934593, 1]
   ])");
 
-  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped, internal::GroupBy(
-                                                   {
-                                                       batch->GetColumnByName("argument"),
-                                                   },
-                                                   {
-                                                       batch->GetColumnByName("key"),
-                                                   },
-                                                   {
-                                                       {"hash_product", nullptr, "agg_0"},
-                                                   }));
+  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               batch->GetColumnByName("argument"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_product", nullptr, "agg_0", "hash_product"},
+                           }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
                                             field("hash_product", int64()),
@@ -3349,12 +3358,12 @@ TEST(GroupBy, SumMeanProductKeepNulls) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_sum", keep_nulls, "agg_0"},
-                               {"hash_sum", min_count, "agg_1"},
-                               {"hash_mean", keep_nulls, "agg_2"},
-                               {"hash_mean", min_count, "agg_3"},
-                               {"hash_product", keep_nulls, "agg_4"},
-                               {"hash_product", min_count, "agg_5"},
+                               {"hash_sum", keep_nulls, "agg_0", "hash_sum"},
+                               {"hash_sum", min_count, "agg_1", "hash_sum"},
+                               {"hash_mean", keep_nulls, "agg_2", "hash_mean"},
+                               {"hash_mean", min_count, "agg_3", "hash_mean"},
+                               {"hash_product", keep_nulls, "agg_4", "hash_product"},
+                               {"hash_product", min_count, "agg_5", "hash_product"},
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -3398,7 +3407,7 @@ TEST(GroupBy, SumOnlyStringAndDictKeys) {
                          internal::GroupBy({batch->GetColumnByName("argument")},
                                            {batch->GetColumnByName("key")},
                                            {
-                                               {"hash_sum", nullptr, "agg_0"},
+                                               {"hash_sum", nullptr, "agg_0", "hash_sum"},
                                            }));
     SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -3441,11 +3450,11 @@ TEST(GroupBy, ConcreteCaseWithValidateGroupBy) {
 
   using internal::Aggregate;
   for (auto agg : {
-           Aggregate{"hash_sum", nullptr, "agg_0"},
-           Aggregate{"hash_count", non_null, "agg_1"},
-           Aggregate{"hash_count", nulls, "agg_2"},
-           Aggregate{"hash_min_max", nullptr, "agg_3"},
-           Aggregate{"hash_min_max", keepna, "agg_4"},
+           Aggregate{"hash_sum", nullptr, "agg_0", "hash_sum"},
+           Aggregate{"hash_count", non_null, "agg_1", "hash_count"},
+           Aggregate{"hash_count", nulls, "agg_2", "hash_count"},
+           Aggregate{"hash_min_max", nullptr, "agg_3", "hash_min_max"},
+           Aggregate{"hash_min_max", keepna, "agg_4", "hash_min_max"},
        }) {
     SCOPED_TRACE(agg.function);
     ValidateGroupBy({agg}, {batch->GetColumnByName("argument")},
@@ -3469,8 +3478,8 @@ TEST(GroupBy, CountNull) {
 
   using internal::Aggregate;
   for (auto agg : {
-           Aggregate{"hash_count", keepna, "agg_0"},
-           Aggregate{"hash_count", skipna, "agg_1"},
+           Aggregate{"hash_count", keepna, "agg_0", "hash_count"},
+           Aggregate{"hash_count", skipna, "agg_1", "hash_count"},
        }) {
     SCOPED_TRACE(agg.function);
     ValidateGroupBy({agg}, {batch->GetColumnByName("argument")},
@@ -3494,7 +3503,7 @@ TEST(GroupBy, RandomArraySum) {
 
       ValidateGroupBy(
           {
-              {"hash_sum", options, "agg_0"},
+              {"hash_sum", options, "agg_0", "hash_sum"},
           },
           {batch->GetColumnByName("argument")}, {batch->GetColumnByName("key")});
     }
@@ -3527,9 +3536,9 @@ TEST(GroupBy, WithChunkedArray) {
                                table->GetColumnByName("key"),
                            },
                            {
-                               {"hash_count", nullptr, "agg_0"},
-                               {"hash_sum", nullptr, "agg_1"},
-                               {"hash_min_max", nullptr, "agg_2"},
+                               {"hash_count", nullptr, "agg_0", "hash_count"},
+                               {"hash_sum", nullptr, "agg_1", "hash_sum"},
+                               {"hash_min_max", nullptr, "agg_2", "hash_min_max"},
                            }));
 
   AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -3565,7 +3574,7 @@ TEST(GroupBy, MinMaxWithNewGroupsInChunkedArray) {
                                table->GetColumnByName("key"),
                            },
                            {
-                               {"hash_min_max", nullptr, "agg_1"},
+                               {"hash_min_max", nullptr, "agg_1", "hash_min_max"},
                            }));
 
   AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -3601,7 +3610,7 @@ TEST(GroupBy, SmallChunkSizeSumOnly) {
                        internal::GroupBy({batch->GetColumnByName("argument")},
                                          {batch->GetColumnByName("key")},
                                          {
-                                             {"hash_sum", nullptr, "agg_0"},
+                                             {"hash_sum", nullptr, "agg_0", "hash_sum"},
                                          },
                                          small_chunksize_context()));
   AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -3653,9 +3662,9 @@ TEST(GroupBy, CountWithNullType) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_count", all, "agg_0"},
-                                   {"hash_count", only_valid, "agg_1"},
-                                   {"hash_count", only_null, "agg_2"},
+                                   {"hash_count", all, "agg_0", "hash_count"},
+                                   {"hash_count", only_valid, "agg_1", "hash_count"},
+                                   {"hash_count", only_null, "agg_2", "hash_count"},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -3698,9 +3707,9 @@ TEST(GroupBy, CountWithNullTypeEmptyTable) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_count", all, "agg_0"},
-                                   {"hash_count", only_valid, "agg_1"},
-                                   {"hash_count", only_null, "agg_2"},
+                                   {"hash_count", all, "agg_0", "hash_count"},
+                                   {"hash_count", only_valid, "agg_1", "hash_count"},
+                                   {"hash_count", only_null, "agg_2", "hash_count"},
                                },
                                use_threads, use_exec_plan));
       auto struct_arr = aggregated_and_grouped.array_as<StructArray>();
@@ -3743,10 +3752,10 @@ TEST(GroupBy, SingleNullTypeKey) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_count", nullptr, "agg_0"},
-                                   {"hash_sum", nullptr, "agg_1"},
-                                   {"hash_mean", nullptr, "agg_2"},
-                                   {"hash_min_max", nullptr, "agg_3"},
+                                   {"hash_count", nullptr, "agg_0", "hash_count"},
+                                   {"hash_sum", nullptr, "agg_1", "hash_sum"},
+                                   {"hash_mean", nullptr, "agg_2", "hash_mean"},
+                                   {"hash_min_max", nullptr, "agg_3", "hash_min_max"},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -3803,9 +3812,9 @@ TEST(GroupBy, MultipleKeysIncludesNullType) {
               },
               {table->GetColumnByName("key_0"), table->GetColumnByName("key_1")},
               {
-                  {"hash_count", nullptr, "agg_0"},
-                  {"hash_sum", nullptr, "agg_1"},
-                  {"hash_min_max", nullptr, "agg_2"},
+                  {"hash_count", nullptr, "agg_0", "hash_count"},
+                  {"hash_sum", nullptr, "agg_1", "hash_sum"},
+                  {"hash_min_max", nullptr, "agg_2", "hash_min_max"},
               },
               use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -3864,22 +3873,23 @@ TEST(GroupBy, SumNullType) {
   for (bool use_exec_plan : {false, true}) {
     for (bool use_threads : {true, false}) {
       SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                           GroupByTest(
-                               {
-                                   table->GetColumnByName("argument"),
-                                   table->GetColumnByName("argument"),
-                                   table->GetColumnByName("argument"),
-                                   table->GetColumnByName("argument"),
-                               },
-                               {table->GetColumnByName("key")},
-                               {
-                                   {"hash_sum", no_min, "agg_0"},
-                                   {"hash_sum", keep_nulls, "agg_1"},
-                                   {"hash_sum", min_count, "agg_2"},
-                                   {"hash_sum", keep_nulls_min_count, "agg_3"},
-                               },
-                               use_threads, use_exec_plan));
+      ASSERT_OK_AND_ASSIGN(
+          Datum aggregated_and_grouped,
+          GroupByTest(
+              {
+                  table->GetColumnByName("argument"),
+                  table->GetColumnByName("argument"),
+                  table->GetColumnByName("argument"),
+                  table->GetColumnByName("argument"),
+              },
+              {table->GetColumnByName("key")},
+              {
+                  {"hash_sum", no_min, "agg_0", "hash_sum"},
+                  {"hash_sum", keep_nulls, "agg_1", "hash_sum"},
+                  {"hash_sum", min_count, "agg_2", "hash_sum"},
+                  {"hash_sum", keep_nulls_min_count, "agg_3", "hash_sum"},
+              },
+              use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
 
       AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -3932,22 +3942,23 @@ TEST(GroupBy, ProductNullType) {
   for (bool use_exec_plan : {false, true}) {
     for (bool use_threads : {true, false}) {
       SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                           GroupByTest(
-                               {
-                                   table->GetColumnByName("argument"),
-                                   table->GetColumnByName("argument"),
-                                   table->GetColumnByName("argument"),
-                                   table->GetColumnByName("argument"),
-                               },
-                               {table->GetColumnByName("key")},
-                               {
-                                   {"hash_product", no_min, "agg_0"},
-                                   {"hash_product", keep_nulls, "agg_1"},
-                                   {"hash_product", min_count, "agg_2"},
-                                   {"hash_product", keep_nulls_min_count, "agg_3"},
-                               },
-                               use_threads, use_exec_plan));
+      ASSERT_OK_AND_ASSIGN(
+          Datum aggregated_and_grouped,
+          GroupByTest(
+              {
+                  table->GetColumnByName("argument"),
+                  table->GetColumnByName("argument"),
+                  table->GetColumnByName("argument"),
+                  table->GetColumnByName("argument"),
+              },
+              {table->GetColumnByName("key")},
+              {
+                  {"hash_product", no_min, "agg_0", "hash_product"},
+                  {"hash_product", keep_nulls, "agg_1", "hash_product"},
+                  {"hash_product", min_count, "agg_2", "hash_product"},
+                  {"hash_product", keep_nulls_min_count, "agg_3", "hash_product"},
+              },
+              use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
 
       AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -4000,22 +4011,23 @@ TEST(GroupBy, MeanNullType) {
   for (bool use_exec_plan : {false, true}) {
     for (bool use_threads : {true, false}) {
       SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                           GroupByTest(
-                               {
-                                   table->GetColumnByName("argument"),
-                                   table->GetColumnByName("argument"),
-                                   table->GetColumnByName("argument"),
-                                   table->GetColumnByName("argument"),
-                               },
-                               {table->GetColumnByName("key")},
-                               {
-                                   {"hash_mean", no_min, "agg_0"},
-                                   {"hash_mean", keep_nulls, "agg_1"},
-                                   {"hash_mean", min_count, "agg_2"},
-                                   {"hash_mean", keep_nulls_min_count, "agg_3"},
-                               },
-                               use_threads, use_exec_plan));
+      ASSERT_OK_AND_ASSIGN(
+          Datum aggregated_and_grouped,
+          GroupByTest(
+              {
+                  table->GetColumnByName("argument"),
+                  table->GetColumnByName("argument"),
+                  table->GetColumnByName("argument"),
+                  table->GetColumnByName("argument"),
+              },
+              {table->GetColumnByName("key")},
+              {
+                  {"hash_mean", no_min, "agg_0", "hash_mean"},
+                  {"hash_mean", keep_nulls, "agg_1", "hash_mean"},
+                  {"hash_mean", min_count, "agg_2", "hash_mean"},
+                  {"hash_mean", keep_nulls_min_count, "agg_3", "hash_mean"},
+              },
+              use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
 
       AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -4062,9 +4074,9 @@ TEST(GroupBy, NullTypeEmptyTable) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_sum", no_min, "agg_0"},
-                                   {"hash_product", min_count, "agg_1"},
-                                   {"hash_mean", keep_nulls, "agg_2"},
+                                   {"hash_sum", no_min, "agg_0", "hash_sum"},
+                                   {"hash_product", min_count, "agg_1", "hash_product"},
+                                   {"hash_mean", keep_nulls, "agg_2", "hash_mean"},
                                },
                                use_threads, use_exec_plan));
       auto struct_arr = aggregated_and_grouped.array_as<StructArray>();
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index b19e787f94d..2295665b161 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -676,8 +676,7 @@ Result<int64_t> AsyncScanner::CountRows() {
                                        options}},
               {"project", compute::ProjectNodeOptions{{options->filter}, {"mask"}}},
               {"aggregate", compute::AggregateNodeOptions{{compute::internal::Aggregate{
-                                                              "sum", nullptr, "mask"}},
-                                                          /*names=*/{"selected_count"}}},
+                                "sum", nullptr, "mask", "selected_count"}}}},
               {"sink", compute::SinkNodeOptions{&sink_gen}},
           })
           .AddToPlan(plan.get()));
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 1d3e0b403ef..6dd75db114a 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1838,9 +1838,8 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) {
   ASSERT_OK_AND_ASSIGN(
       compute::ExecNode * aggregate,
       compute::MakeExecNode("aggregate", plan.get(), {project},
-                            compute::AggregateNodeOptions{
-                                {compute::internal::Aggregate{"sum", nullptr, "a * 2"}},
-                                /*names=*/{"sum(a * 2)"}}));
+                            compute::AggregateNodeOptions{{compute::internal::Aggregate{
+                                "sum", nullptr, "a * 2", "sum(a * 2)"}}}));
 
   // finally, pipe the aggregate node into a sink node
   AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen;
@@ -1929,8 +1928,7 @@ TEST(ScanNode, MinimalGroupedAggEndToEnd) {
       compute::MakeExecNode(
           "aggregate", plan.get(), {project},
           compute::AggregateNodeOptions{
-              {compute::internal::Aggregate{"hash_sum", nullptr, "a * 2"}},
-              /*names=*/{"sum(a * 2)"},
+              {compute::internal::Aggregate{"hash_sum", nullptr, "a * 2", "sum(a * 2)"}},
               /*keys=*/{"b"}}));
 
   // finally, pipe the aggregate node into a sink node

From fd2e78d18180b065c2f075d1f32759d9bc1aa9e9 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Thu, 19 May 2022 23:31:24 +0530
Subject: [PATCH 10/32] [r] fix r bindings

---
 r/R/arrowExports.R     |  5 ++---
 r/R/query-engine.R     |  9 +++------
 r/src/arrowExports.cpp |  9 ++++-----
 r/src/compute-exec.cpp | 17 ++++++++---------
 4 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 53c0f8ac86b..3a14730e535 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -432,8 +432,8 @@ ExecNode_Project <- function(input, exprs, names) {
   .Call(`_arrow_ExecNode_Project`, input, exprs, names)
 }
 
-ExecNode_Aggregate <- function(input, options, out_field_names, key_names) {
-  .Call(`_arrow_ExecNode_Aggregate`, input, options, out_field_names, key_names)
+ExecNode_Aggregate <- function(input, options, key_names) {
+  .Call(`_arrow_ExecNode_Aggregate`, input, options, key_names)
 }
 
 ExecNode_Join <- function(input, type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right) {
@@ -2007,4 +2007,3 @@ SetIOThreadPoolCapacity <- function(threads) {
 Array__infer_type <- function(x) {
   .Call(`_arrow_Array__infer_type`, x)
 }
-
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 2c2d96063bd..7c7d25b9029 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -74,8 +74,6 @@ ExecPlan <- R6Class("ExecPlan",
       grouped <- length(group_vars) > 0
 
       # Collect the target names first because we have to add back the group vars
-      # TODO : remove upon discussion
-      # target_names <- names(.data)
       .data <- ensure_group_vars(.data)
       .data <- ensure_arrange_vars(.data) # this sets .data$temp_columns
 
@@ -115,14 +113,13 @@ ExecPlan <- R6Class("ExecPlan",
             x
           })
         }
-        target_names <- names(.data$aggregations);
+        target_names <- names(.data$aggregations)
         for (i in seq_len(length(target_names))) {
-            .data$aggregations[[i]][["target"]] <- target_names[i]
+          .data$aggregations[[i]][["name"]] <- .data$aggregations[[i]][["target"]] <- target_names[i]
         }
 
         node <- node$Aggregate(
-          options = map(.data$aggregations, ~ .[c("fun", "options", "target")]),
-          out_field_names = names(.data$aggregations),
+          options = map(.data$aggregations, ~ .[c("fun", "options", "target", "name")]),
           key_names = group_vars
         )
 
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 3bfed5f0af7..6ff771fdf5a 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -964,14 +964,13 @@ BEGIN_CPP11
 END_CPP11
 }
 // compute-exec.cpp
-std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(const std::shared_ptr<compute::ExecNode>& input, cpp11::list options, std::vector<std::string> out_field_names, std::vector<std::string> key_names);
-extern "C" SEXP _arrow_ExecNode_Aggregate(SEXP input_sexp, SEXP options_sexp, SEXP out_field_names_sexp, SEXP key_names_sexp){
+std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(const std::shared_ptr<compute::ExecNode>& input, cpp11::list options, std::vector<std::string> key_names);
+extern "C" SEXP _arrow_ExecNode_Aggregate(SEXP input_sexp, SEXP options_sexp, SEXP key_names_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
 	arrow::r::Input<cpp11::list>::type options(options_sexp);
-	arrow::r::Input<std::vector<std::string>>::type out_field_names(out_field_names_sexp);
 	arrow::r::Input<std::vector<std::string>>::type key_names(key_names_sexp);
-	return cpp11::as_sexp(ExecNode_Aggregate(input, options, out_field_names, key_names));
+	return cpp11::as_sexp(ExecNode_Aggregate(input, options, key_names));
 END_CPP11
 }
 // compute-exec.cpp
@@ -5221,7 +5220,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ExecPlan_Write", (DL_FUNC) &_arrow_ExecPlan_Write, 14}, 
 		{ "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, 
 		{ "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, 
-		{ "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 4}, 
+		{ "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 3}, 
 		{ "_arrow_ExecNode_Join", (DL_FUNC) &_arrow_ExecNode_Join, 9}, 
 		{ "_arrow_ExecNode_Union", (DL_FUNC) &_arrow_ExecNode_Union, 2}, 
 		{ "_arrow_ExecNode_SourceNode", (DL_FUNC) &_arrow_ExecNode_SourceNode, 2}, 
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index e4438d08d28..5359065d8d8 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -228,17 +228,17 @@ std::shared_ptr<compute::ExecNode> ExecNode_Project(
 // [[arrow::export]]
 std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(
     const std::shared_ptr<compute::ExecNode>& input, cpp11::list options,
-    std::vector<std::string> out_field_names, std::vector<std::string> key_names) {
+    std::vector<std::string> key_names) {
   std::vector<arrow::compute::internal::Aggregate> aggregates;
 
   for (cpp11::list name_opts : options) {
-    auto name = cpp11::as_cpp<std::string>(name_opts[0]);
-    auto opts = make_compute_options(name, name_opts[1]);
+    auto function = cpp11::as_cpp<std::string>(name_opts[0]);
+    auto opts = make_compute_options(function, name_opts[1]);
     auto target = cpp11::as_cpp<std::string>(name_opts[2]);
-    
-    aggregates.push_back(
-        arrow::compute::internal::Aggregate{std::move(name), opts, std::move(target)});
-    keep_alives.push_back(std::move(opts));
+    auto name = cpp11::as_cpp<std::string>(name_opts[3]);
+
+    aggregates.push_back(arrow::compute::internal::Aggregate{
+        std::move(function), opts.get(), std::move(target), std::move(name)});
   }
 
   std::vector<arrow::FieldRef> keys;
@@ -247,8 +247,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(
   }
   return MakeExecNodeOrStop(
       "aggregate", input->plan(), {input.get()},
-      compute::AggregateNodeOptions{std::move(aggregates), std::move(out_field_names),
-                                    std::move(keys)});
+      compute::AggregateNodeOptions{std::move(aggregates), std::move(keys)});
 }
 
 // [[arrow::export]]

From 9eecd8c35068572ae2d5458d07debafa7172b3fd Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Fri, 20 May 2022 15:36:39 +0530
Subject: [PATCH 11/32] fixing tpch benchmarks for agg op refactor

---
 cpp/src/arrow/compute/exec/tpch_benchmark.cc | 22 ++++++++------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/tpch_benchmark.cc b/cpp/src/arrow/compute/exec/tpch_benchmark.cc
index f348eaf3eb1..b998da5eded 100644
--- a/cpp/src/arrow/compute/exec/tpch_benchmark.cc
+++ b/cpp/src/arrow/compute/exec/tpch_benchmark.cc
@@ -78,21 +78,17 @@ std::shared_ptr<ExecPlan> Plan_Q1(AsyncGenerator<util::optional<ExecBatch>>* sin
       std::make_shared<ScalarAggregateOptions>(ScalarAggregateOptions::Defaults());
   auto count_opts = std::make_shared<CountOptions>(CountOptions::CountMode::ALL);
   std::vector<arrow::compute::internal::Aggregate> aggs = {
-      {"hash_sum", sum_opts, "sum_qty"},
-      {"hash_sum", sum_opts, "sum_base_price"},
-      {"hash_sum", sum_opts, "sum_disc_price"},
-      {"hash_sum", sum_opts, "sum_charge"},
-      {"hash_mean", sum_opts, "avg_qty"},
-      {"hash_mean", sum_opts, "avg_price"},
-      {"hash_mean", sum_opts, "avg_disc"},
-      {"hash_count", count_opts, "sum_qty"}};
-
-  std::vector<std::string> names = {"sum_qty",    "sum_base_price", "sum_disc_price",
-                                    "sum_charge", "avg_qty",        "avg_price",
-                                    "avg_disc",   "count_order"};
+      {"hash_sum", sum_opts, "sum_qty", "sum_qty"},
+      {"hash_sum", sum_opts, "sum_base_price", "sum_base_price"},
+      {"hash_sum", sum_opts, "sum_disc_price", "sum_disc_price"},
+      {"hash_sum", sum_opts, "sum_charge", "sum_charge"},
+      {"hash_mean", sum_opts, "avg_qty", "avg_qty"},
+      {"hash_mean", sum_opts, "avg_price", "avg_price"},
+      {"hash_mean", sum_opts, "avg_disc", "avg_disc"},
+      {"hash_count", count_opts, "sum_qty", "count_order"}};
 
   std::vector<FieldRef> keys = {"l_returnflag", "l_linestatus"};
-  AggregateNodeOptions agg_opts(aggs, names, keys);
+  AggregateNodeOptions agg_opts(aggs, keys);
 
   SortKey l_returnflag_key("l_returnflag");
   SortKey l_linestatus_key("l_linestatus");

From e9c70d7196003d61be842c26ee3f8ca179a884fb Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Tue, 24 May 2022 16:12:25 +0530
Subject: [PATCH 12/32] minor changes

---
 r/R/query-engine.R     | 3 +--
 r/src/compute-exec.cpp | 8 ++++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 7c7d25b9029..69fa9e742ec 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -73,7 +73,6 @@ ExecPlan <- R6Class("ExecPlan",
       group_vars <- dplyr::group_vars(.data)
       grouped <- length(group_vars) > 0
 
-      # Collect the target names first because we have to add back the group vars
       .data <- ensure_group_vars(.data)
       .data <- ensure_arrange_vars(.data) # this sets .data$temp_columns
 
@@ -119,7 +118,7 @@ ExecPlan <- R6Class("ExecPlan",
         }
 
         node <- node$Aggregate(
-          options = map(.data$aggregations, ~ .[c("fun", "options", "target", "name")]),
+          options = .data$aggregations,
           key_names = group_vars
         )
 
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index 5359065d8d8..7b28a700c2b 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -232,10 +232,10 @@ std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(
   std::vector<arrow::compute::internal::Aggregate> aggregates;
 
   for (cpp11::list name_opts : options) {
-    auto function = cpp11::as_cpp<std::string>(name_opts[0]);
-    auto opts = make_compute_options(function, name_opts[1]);
-    auto target = cpp11::as_cpp<std::string>(name_opts[2]);
-    auto name = cpp11::as_cpp<std::string>(name_opts[3]);
+    auto function = cpp11::as_cpp<std::string>(name_opts["fun"]);
+    auto opts = make_compute_options(function, name_opts["options"]);
+    auto target = cpp11::as_cpp<std::string>(name_opts["target"]);
+    auto name = cpp11::as_cpp<std::string>(name_opts["name"]);
 
     aggregates.push_back(arrow::compute::internal::Aggregate{
         std::move(function), opts.get(), std::move(target), std::move(name)});

From 7d8b4f7cf419a483a68002e055ed1e37c5cb8cad Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Mon, 30 May 2022 17:03:43 +0530
Subject: [PATCH 13/32] fix print format

---
 cpp/src/arrow/compute/exec/test_util.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
index 762bf320f03..4e8f57afcf9 100644
--- a/cpp/src/arrow/compute/exec/test_util.cc
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -378,11 +378,11 @@ static inline void PrintToImpl(const std::string& factory_name,
 
     *os << "aggregates={";
     for (const auto& agg : o->aggregates) {
-      *os << agg.function << "<";
+      *os << "function=" << agg.function << "<";
       if (agg.options) PrintTo(*agg.options, os);
-      *os << agg.target.ToString() << "<";
-      *os << agg.name << "<";
       *os << ">,";
+      *os << "target=" << agg.target.ToString() << ",";
+      *os << "name=" << agg.name;
     }
     *os << "},";
 

From 2e2df66711728a169cc45b254988b7a9e69033d0 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Mon, 30 May 2022 17:35:00 +0530
Subject: [PATCH 14/32] adding test aggregate option

---
 .../compute/kernels/hash_aggregate_test.cc    | 134 ++++++++++--------
 1 file changed, 72 insertions(+), 62 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 0c3e219d439..0478cba98a1 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -235,15 +235,27 @@ ExecContext* small_chunksize_context(bool use_threads = false) {
   return use_threads ? &ctx_with_threads : &ctx;
 }
 
-Result<Datum> GroupByTest(
-    const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
-    const std::vector<::arrow::compute::internal::Aggregate>& aggregates,
-    bool use_threads, bool use_exec_plan) {
+struct TestAggregate {
+  std::string function;
+  const FunctionOptions* options;
+};
+
+Result<Datum> GroupByTest(const std::vector<Datum>& arguments,
+                          const std::vector<Datum>& keys,
+                          const std::vector<TestAggregate>& aggregates, bool use_threads,
+                          bool use_exec_plan) {
+  std::vector<internal::Aggregate> internal_aggregates;
+  int idx = 0;
+  for (auto t_agg : aggregates) {
+    internal_aggregates.push_back(
+        {t_agg.function, t_agg.options, "agg_" + std::to_string(idx), t_agg.function});
+    idx = idx + 1;
+  }
   if (use_exec_plan) {
-    return GroupByUsingExecPlan(arguments, keys, aggregates, use_threads,
+    return GroupByUsingExecPlan(arguments, keys, internal_aggregates, use_threads,
                                 small_chunksize_context(use_threads));
   } else {
-    return internal::GroupBy(arguments, keys, aggregates, use_threads,
+    return internal::GroupBy(arguments, keys, internal_aggregates, use_threads,
                              default_exec_context());
   }
 }
@@ -745,7 +757,7 @@ TEST(GroupBy, NoBatches) {
       Datum aggregated_and_grouped,
       GroupByTest({table->GetColumnByName("argument")}, {table->GetColumnByName("key")},
                   {
-                      {"hash_count", nullptr, "agg_0", "hash_count"},
+                      {"hash_count", nullptr},
                   },
                   /*use_threads=*/true, /*use_exec_plan=*/true));
   AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -810,7 +822,7 @@ TEST(GroupBy, CountOnly) {
                            GroupByTest({table->GetColumnByName("argument")},
                                        {table->GetColumnByName("key")},
                                        {
-                                           {"hash_count", nullptr, "agg_0", "hash_count"},
+                                           {"hash_count", nullptr},
                                        },
                                        use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -898,7 +910,7 @@ TEST(GroupBy, SumOnly) {
                            GroupByTest({table->GetColumnByName("argument")},
                                        {table->GetColumnByName("key")},
                                        {
-                                           {"hash_sum", nullptr, "agg_0", "hash_sum"},
+                                           {"hash_sum", nullptr},
                                        },
                                        use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -963,12 +975,12 @@ TEST(GroupBy, SumMeanProductDecimal) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_sum", nullptr, "agg_0", "hash_sum"},
-                                   {"hash_sum", nullptr, "agg_1", "hash_sum"},
-                                   {"hash_mean", nullptr, "agg_2", "hash_mean"},
-                                   {"hash_mean", nullptr, "agg_3", "hash_mean"},
-                                   {"hash_product", nullptr, "agg_4", "hash_product"},
-                                   {"hash_product", nullptr, "agg_5", "hash_product"},
+                                   {"hash_sum", nullptr},
+                                   {"hash_sum", nullptr},
+                                   {"hash_mean", nullptr},
+                                   {"hash_mean", nullptr},
+                                   {"hash_product", nullptr},
+                                   {"hash_product", nullptr},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -1608,9 +1620,9 @@ TEST(GroupBy, MinMaxOnly) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_min_max", nullptr, "agg_0", "hash_min_max"},
-                                   {"hash_min_max", nullptr, "agg_1", "hash_min_max"},
-                                   {"hash_min_max", nullptr, "agg_2", "hash_min_max"},
+                                   {"hash_min_max", nullptr},
+                                   {"hash_min_max", nullptr},
+                                   {"hash_min_max", nullptr},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -1712,11 +1724,11 @@ TEST(GroupBy, MinMaxTypes) {
     auto table =
         TableFromJSON(in_schema, (ty->name() == "date64") ? date64_table : default_table);
 
-    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                         GroupByTest({table->GetColumnByName("argument0")},
-                                     {table->GetColumnByName("key")},
-                                     {{"hash_min_max", nullptr, "agg_0", "hash_min_max"}},
-                                     /*use_threads=*/true, /*use_exec_plan=*/true));
+    ASSERT_OK_AND_ASSIGN(
+        Datum aggregated_and_grouped,
+        GroupByTest({table->GetColumnByName("argument0")},
+                    {table->GetColumnByName("key")}, {{"hash_min_max", nullptr}},
+                    /*use_threads=*/true, /*use_exec_plan=*/true));
     ValidateOutput(aggregated_and_grouped);
     SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -1769,8 +1781,8 @@ TEST(GroupBy, MinMaxDecimal) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_min_max", nullptr, "agg_0", "hash_min_max"},
-                                   {"hash_min_max", nullptr, "agg_1", "hash_min_max"},
+                                   {"hash_min_max", nullptr},
+                                   {"hash_min_max", nullptr},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -1831,9 +1843,8 @@ TEST(GroupBy, MinMaxBinary) {
         ASSERT_OK_AND_ASSIGN(
             Datum aggregated_and_grouped,
             GroupByTest({table->GetColumnByName("argument0")},
-                        {table->GetColumnByName("key")},
-                        {{"hash_min_max", nullptr, "agg_0", "hash_min_max"}}, use_threads,
-                        use_exec_plan));
+                        {table->GetColumnByName("key")}, {{"hash_min_max", nullptr}},
+                        use_threads, use_exec_plan));
         ValidateOutput(aggregated_and_grouped);
         SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -1886,9 +1897,8 @@ TEST(GroupBy, MinMaxFixedSizeBinary) {
       ASSERT_OK_AND_ASSIGN(
           Datum aggregated_and_grouped,
           GroupByTest({table->GetColumnByName("argument0")},
-                      {table->GetColumnByName("key")},
-                      {{"hash_min_max", nullptr, "agg_0", "hash_min_max"}}, use_threads,
-                      use_exec_plan));
+                      {table->GetColumnByName("key")}, {{"hash_min_max", nullptr}},
+                      use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
       SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -1941,8 +1951,8 @@ TEST(GroupBy, MinOrMax) {
                                     table->GetColumnByName("argument")},
                                    {table->GetColumnByName("key")},
                                    {
-                                       {"hash_min", nullptr, "agg_0", "hash_min"},
-                                       {"hash_max", nullptr, "agg_1", "hash_max"},
+                                       {"hash_min", nullptr},
+                                       {"hash_max", nullptr},
                                    },
                                    /*use_threads=*/true, /*use_exec_plan=*/true));
   SortBy({"key_0"}, &aggregated_and_grouped);
@@ -2499,12 +2509,12 @@ TEST(GroupBy, OneMiscTypes) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_one", nullptr, "agg_0", "hash_one"},
-                                   {"hash_one", nullptr, "agg_1", "hash_one"},
-                                   {"hash_one", nullptr, "agg_2", "hash_one"},
-                                   {"hash_one", nullptr, "agg_3", "hash_one"},
-                                   {"hash_one", nullptr, "agg_4", "hash_one"},
-                                   {"hash_one", nullptr, "agg_5", "hash_one"},
+                                   {"hash_one", nullptr},
+                                   {"hash_one", nullptr},
+                                   {"hash_one", nullptr},
+                                   {"hash_one", nullptr},
+                                   {"hash_one", nullptr},
+                                   {"hash_one", nullptr},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -2627,11 +2637,11 @@ TEST(GroupBy, OneNumericTypes) {
         auto table =
             TableFromJSON(in_schema, (type->name() == "date64") ? temporal_table_json
                                                                 : numeric_table_json);
-        ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                             GroupByTest({table->GetColumnByName("argument0")},
-                                         {table->GetColumnByName("key")},
-                                         {{"hash_one", nullptr, "agg_0", "hash_one"}},
-                                         use_threads, use_exec_plan));
+        ASSERT_OK_AND_ASSIGN(
+            Datum aggregated_and_grouped,
+            GroupByTest({table->GetColumnByName("argument0")},
+                        {table->GetColumnByName("key")}, {{"hash_one", nullptr}},
+                        use_threads, use_exec_plan));
         ValidateOutput(aggregated_and_grouped);
         SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -2690,11 +2700,11 @@ TEST(GroupBy, OneBinaryTypes) {
     [null,  3]
 ])"});
 
-        ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                             GroupByTest({table->GetColumnByName("argument0")},
-                                         {table->GetColumnByName("key")},
-                                         {{"hash_one", nullptr, "agg_0", "hash_one"}},
-                                         use_threads, use_exec_plan));
+        ASSERT_OK_AND_ASSIGN(
+            Datum aggregated_and_grouped,
+            GroupByTest({table->GetColumnByName("argument0")},
+                        {table->GetColumnByName("key")}, {{"hash_one", nullptr}},
+                        use_threads, use_exec_plan));
         ValidateOutput(aggregated_and_grouped);
         SortBy({"key_0"}, &aggregated_and_grouped);
 
@@ -3068,12 +3078,12 @@ TEST(GroupBy, ListMiscTypes) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_list", nullptr, "agg_0", "hash_list"},
-                                   {"hash_list", nullptr, "agg_1", "hash_list"},
-                                   {"hash_list", nullptr, "agg_2", "hash_list"},
-                                   {"hash_list", nullptr, "agg_3", "hash_list"},
-                                   {"hash_list", nullptr, "agg_4", "hash_list"},
-                                   {"hash_list", nullptr, "agg_5", "hash_list"},
+                                   {"hash_list", nullptr},
+                                   {"hash_list", nullptr},
+                                   {"hash_list", nullptr},
+                                   {"hash_list", nullptr},
+                                   {"hash_list", nullptr},
+                                   {"hash_list", nullptr},
                                },
                                use_threads, use_exec_plan));
       ValidateOutput(aggregated_and_grouped);
@@ -3752,10 +3762,10 @@ TEST(GroupBy, SingleNullTypeKey) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_count", nullptr, "agg_0", "hash_count"},
-                                   {"hash_sum", nullptr, "agg_1", "hash_sum"},
-                                   {"hash_mean", nullptr, "agg_2", "hash_mean"},
-                                   {"hash_min_max", nullptr, "agg_3", "hash_min_max"},
+                                   {"hash_count", nullptr},
+                                   {"hash_sum", nullptr},
+                                   {"hash_mean", nullptr},
+                                   {"hash_min_max", nullptr},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -3812,9 +3822,9 @@ TEST(GroupBy, MultipleKeysIncludesNullType) {
               },
               {table->GetColumnByName("key_0"), table->GetColumnByName("key_1")},
               {
-                  {"hash_count", nullptr, "agg_0", "hash_count"},
-                  {"hash_sum", nullptr, "agg_1", "hash_sum"},
-                  {"hash_min_max", nullptr, "agg_2", "hash_min_max"},
+                  {"hash_count", nullptr},
+                  {"hash_sum", nullptr},
+                  {"hash_min_max", nullptr},
               },
               use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);

From 56551acee9ee5c38125700302953937c382059bf Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Tue, 31 May 2022 13:46:31 +0530
Subject: [PATCH 15/32] fixing typo in rebase

---
 r/R/query-engine.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 69fa9e742ec..1a53298fd47 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -304,7 +304,7 @@ ExecNode <- R6Class("ExecNode",
       self$preserve_extras(ExecNode_Filter(self, expr))
     },
     Aggregate = function(options, key_names) {
-      self$preserve_sort(
+      out <- self$preserve_extras(
         ExecNode_Aggregate(self, options, key_names)
       )
       # dplyr drops top-level attributes when you call summarize()

From 1227a133cd793617c78ee6de9e2b250d128c4928 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Tue, 31 May 2022 18:18:54 +0530
Subject: [PATCH 16/32]  adding patch

---
 c_glib/arrow-glib/compute.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index ca5bc1a76f5..1595c745bb9 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -1255,8 +1255,6 @@ garrow_aggregate_node_options_new(GList *aggregations,
                                   GError **error)
 {
   std::vector<arrow::compute::internal::Aggregate> arrow_aggregates;
-  std::vector<arrow::FieldRef> arrow_targets;
-  std::vector<std::string> arrow_names;
   std::vector<arrow::FieldRef> arrow_keys;
   for (auto node = aggregations; node; node = node->next) {
     auto aggregation_priv = GARROW_AGGREGATION_GET_PRIVATE(node->data);
@@ -1279,7 +1277,12 @@ garrow_aggregate_node_options_new(GList *aggregations,
                                "[aggregate-node-options][new][input]")) {
       return NULL;
     }
-    arrow_names.emplace_back(aggregation_priv->output);
+    arrow_aggregates.push_back({
+                                  aggregation_priv->function,
+                                  function_options,
+                                  arrow_targets[0],
+                                  aggregation_priv->output
+                               });
   }
   for (gsize i = 0; i < n_keys; ++i) {
     if (!garrow_field_refs_add(arrow_keys,
@@ -1291,8 +1294,6 @@ garrow_aggregate_node_options_new(GList *aggregations,
   }
   auto arrow_options =
     new arrow::compute::AggregateNodeOptions(std::move(arrow_aggregates),
-                                             std::move(arrow_targets),
-                                             std::move(arrow_names),
                                              std::move(arrow_keys));
   auto options = g_object_new(GARROW_TYPE_AGGREGATE_NODE_OPTIONS,
                               "options", arrow_options,

From ab9c7d5fe4ec70f99ca83c3bdd77cc24a81e2765 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Wed, 8 Jun 2022 23:15:52 +0530
Subject: [PATCH 17/32] rebase

---
 cpp/src/arrow/compute/api_aggregate.h         | 113 +++++++++++++++---
 cpp/src/arrow/compute/exec/aggregate_node.cc  |  27 ++++-
 cpp/src/arrow/compute/exec/options.h          |   4 +-
 .../compute/kernels/aggregate_benchmark.cc    |   2 +-
 .../compute/kernels/hash_aggregate_test.cc    |  14 +--
 cpp/src/arrow/dataset/scanner.cc              |   2 +-
 cpp/src/arrow/dataset/scanner_test.cc         |   4 +-
 python/pyarrow/includes/libarrow.pxd          |   2 +-
 r/src/compute-exec.cpp                        |   4 +-
 9 files changed, 137 insertions(+), 35 deletions(-)

diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 9cd06424299..dd3bd591a5f 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -56,6 +56,21 @@ class ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
   uint32_t min_count;
 };
 
+/// \brief Configure a grouped aggregation
+struct ARROW_EXPORT Aggregate {
+  /// the name of the aggregation function
+  std::string function;
+
+  /// options for the aggregation function
+  const FunctionOptions* options;
+
+  // fields to which aggregations will be applied
+  FieldRef target;
+
+  // output field name for aggregations
+  std::string name;
+};
+
 /// \brief Control count aggregate kernel behavior.
 ///
 /// By default, only non-null values are counted.
@@ -393,23 +408,93 @@ ARROW_EXPORT
 Result<Datum> Index(const Datum& value, const IndexOptions& options,
                     ExecContext* ctx = NULLPTR);
 
-namespace internal {
 
-/// \brief Configure a grouped aggregation
-struct ARROW_EXPORT Aggregate {
-  /// the name of the aggregation function
-  std::string function;
-
-  /// options for the aggregation function
-  std::shared_ptr<FunctionOptions> options;
-
-  // fields to which aggregations will be applied
-  FieldRef target;
-
-  // output field name for aggregations
-  std::string name;
+namespace internal {
+/// Internal use only: streaming group identifier.
+/// Consumes batches of keys and yields batches of the group ids.
+class ARROW_EXPORT Grouper {
+ public:
+  virtual ~Grouper() = default;
+
+  /// Construct a Grouper which receives the specified key types
+  static Result<std::unique_ptr<Grouper>> Make(const std::vector<ValueDescr>& descrs,
+                                               ExecContext* ctx = default_exec_context());
+
+  /// Consume a batch of keys, producing the corresponding group ids as an integer array.
+  /// Currently only uint32 indices will be produced, eventually the bit width will only
+  /// be as wide as necessary.
+  virtual Result<Datum> Consume(const ExecBatch& batch) = 0;
+
+  /// Get current unique keys. May be called multiple times.
+  virtual Result<ExecBatch> GetUniques() = 0;
+
+  /// Get the current number of groups.
+  virtual uint32_t num_groups() const = 0;
+
+  /// \brief Assemble lists of indices of identical elements.
+  ///
+  /// \param[in] ids An unsigned, all-valid integral array which will be
+  ///                used as grouping criteria.
+  /// \param[in] num_groups An upper bound for the elements of ids
+  /// \return A num_groups-long ListArray where the slot at i contains a
+  ///         list of indices where i appears in ids.
+  ///
+  ///   MakeGroupings([
+  ///       2,
+  ///       2,
+  ///       5,
+  ///       5,
+  ///       2,
+  ///       3
+  ///   ], 8) == [
+  ///       [],
+  ///       [],
+  ///       [0, 1, 4],
+  ///       [5],
+  ///       [],
+  ///       [2, 3],
+  ///       [],
+  ///       []
+  ///   ]
+  static Result<std::shared_ptr<ListArray>> MakeGroupings(
+      const UInt32Array& ids, uint32_t num_groups,
+      ExecContext* ctx = default_exec_context());
+
+  /// \brief Produce a ListArray whose slots are selections of `array` which correspond to
+  /// the provided groupings.
+  ///
+  /// For example,
+  ///   ApplyGroupings([
+  ///       [],
+  ///       [],
+  ///       [0, 1, 4],
+  ///       [5],
+  ///       [],
+  ///       [2, 3],
+  ///       [],
+  ///       []
+  ///   ], [2, 2, 5, 5, 2, 3]) == [
+  ///       [],
+  ///       [],
+  ///       [2, 2, 2],
+  ///       [3],
+  ///       [],
+  ///       [5, 5],
+  ///       [],
+  ///       []
+  ///   ]
+  static Result<std::shared_ptr<ListArray>> ApplyGroupings(
+      const ListArray& groupings, const Array& array,
+      ExecContext* ctx = default_exec_context());
 };
 
+/// Internal use only: helper function for testing HashAggregateKernels.
+/// This will be replaced by streaming execution operators.
+ARROW_EXPORT
+Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
+                      const std::vector<Aggregate>& aggregates, bool use_threads = false,
+                      ExecContext* ctx = default_exec_context());
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/aggregate_node.cc b/cpp/src/arrow/compute/exec/aggregate_node.cc
index 3632f2da0b6..5d71e23546f 100644
--- a/cpp/src/arrow/compute/exec/aggregate_node.cc
+++ b/cpp/src/arrow/compute/exec/aggregate_node.cc
@@ -41,6 +41,25 @@ using internal::checked_cast;
 
 namespace compute {
 
+namespace internal {
+
+Result<std::vector<const HashAggregateKernel*>> GetKernels(
+    ExecContext* ctx, const std::vector<Aggregate>& aggregates,
+    const std::vector<ValueDescr>& in_descrs);
+
+Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
+    const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
+    const std::vector<Aggregate>& aggregates,
+    const std::vector<ValueDescr>& in_descrs);
+
+Result<FieldVector> ResolveKernels(
+    const std::vector<Aggregate>& aggregates,
+    const std::vector<const HashAggregateKernel*>& kernels,
+    const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx,
+    const std::vector<ValueDescr>& descrs);
+
+}  // namespace internal
+
 namespace {
 
 void AggregatesToString(std::stringstream* ss, const Schema& input_schema,
@@ -65,7 +84,7 @@ class ScalarAggregateNode : public ExecNode {
   ScalarAggregateNode(ExecPlan* plan, std::vector<ExecNode*> inputs,
                       std::shared_ptr<Schema> output_schema,
                       std::vector<int> target_field_ids,
-                      std::vector<internal::Aggregate> aggs,
+                      std::vector<Aggregate> aggs,
                       std::vector<const ScalarAggregateKernel*> kernels,
                       std::vector<std::vector<std::unique_ptr<KernelState>>> states)
       : ExecNode(plan, std::move(inputs), {"target"},
@@ -263,7 +282,7 @@ class ScalarAggregateNode : public ExecNode {
   }
 
   const std::vector<int> target_field_ids_;
-  const std::vector<internal::Aggregate> aggs_;
+  const std::vector<Aggregate> aggs_;
   const std::vector<const ScalarAggregateKernel*> kernels_;
 
   std::vector<std::vector<std::unique_ptr<KernelState>>> states_;
@@ -276,7 +295,7 @@ class GroupByNode : public ExecNode {
  public:
   GroupByNode(ExecNode* input, std::shared_ptr<Schema> output_schema, ExecContext* ctx,
               std::vector<int> key_field_ids, std::vector<int> agg_src_field_ids,
-              std::vector<internal::Aggregate> aggs,
+              std::vector<Aggregate> aggs,
               std::vector<const HashAggregateKernel*> agg_kernels)
       : ExecNode(input->plan(), {input}, {"groupby"}, std::move(output_schema),
                  /*num_outputs=*/1),
@@ -658,7 +677,7 @@ class GroupByNode : public ExecNode {
 
   const std::vector<int> key_field_ids_;
   const std::vector<int> agg_src_field_ids_;
-  const std::vector<internal::Aggregate> aggs_;
+  const std::vector<Aggregate> aggs_;
   const std::vector<const HashAggregateKernel*> agg_kernels_;
 
   ThreadIndexer get_thread_index_;
diff --git a/cpp/src/arrow/compute/exec/options.h b/cpp/src/arrow/compute/exec/options.h
index 84254ed5ad9..48448b69f0f 100644
--- a/cpp/src/arrow/compute/exec/options.h
+++ b/cpp/src/arrow/compute/exec/options.h
@@ -111,12 +111,12 @@ class ARROW_EXPORT ProjectNodeOptions : public ExecNodeOptions {
 /// \brief Make a node which aggregates input batches, optionally grouped by keys.
 class ARROW_EXPORT AggregateNodeOptions : public ExecNodeOptions {
  public:
-  AggregateNodeOptions(std::vector<internal::Aggregate> aggregates,
+  AggregateNodeOptions(std::vector<Aggregate> aggregates,
                        std::vector<FieldRef> keys = {})
       : aggregates(std::move(aggregates)), keys(std::move(keys)) {}
 
   // aggregations which will be applied to the targetted fields
-  std::vector<internal::Aggregate> aggregates;
+  std::vector<Aggregate> aggregates;
   // keys by which aggregations will be grouped
   std::vector<FieldRef> keys;
 };
diff --git a/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc b/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
index c2712854345..dfd008de1e5 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
@@ -307,7 +307,7 @@ BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapVectorizeUnroll<int64_t>)
 //
 
 static void BenchmarkGroupBy(benchmark::State& state,
-                             std::vector<internal::Aggregate> aggregates,
+                             std::vector<Aggregate> aggregates,
                              std::vector<Datum> arguments, std::vector<Datum> keys) {
   for (auto _ : state) {
     ABORT_NOT_OK(GroupBy(arguments, keys, aggregates).status());
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 0478cba98a1..c6e87e5e368 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -72,7 +72,7 @@ namespace compute {
 namespace {
 
 Result<Datum> NaiveGroupBy(std::vector<Datum> arguments, std::vector<Datum> keys,
-                           const std::vector<internal::Aggregate>& aggregates) {
+                           const std::vector<Aggregate>& aggregates) {
   ARROW_ASSIGN_OR_RAISE(auto key_batch, ExecBatch::Make(std::move(keys)));
 
   ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_batch.GetDescriptors()));
@@ -123,7 +123,7 @@ Result<Datum> NaiveGroupBy(std::vector<Datum> arguments, std::vector<Datum> keys
 
 Result<Datum> GroupByUsingExecPlan(const BatchesWithSchema& input,
                                    const std::vector<std::string>& key_names,
-                                   const std::vector<internal::Aggregate>& aggregates,
+                                   const std::vector<Aggregate>& aggregates,
                                    bool use_threads, ExecContext* ctx) {
   std::vector<FieldRef> keys(key_names.size());
   for (size_t i = 0; i < key_names.size(); ++i) {
@@ -182,7 +182,7 @@ Result<Datum> GroupByUsingExecPlan(const BatchesWithSchema& input,
 /// Simpler overload where you can give the columns as datums
 Result<Datum> GroupByUsingExecPlan(const std::vector<Datum>& arguments,
                                    const std::vector<Datum>& keys,
-                                   const std::vector<internal::Aggregate>& aggregates,
+                                   const std::vector<Aggregate>& aggregates,
                                    bool use_threads, ExecContext* ctx) {
   using arrow::compute::detail::ExecBatchIterator;
 
@@ -215,11 +215,11 @@ Result<Datum> GroupByUsingExecPlan(const std::vector<Datum>& arguments,
   return GroupByUsingExecPlan(input, key_names, aggregates, use_threads, ctx);
 }
 
-void ValidateGroupBy(const std::vector<internal::Aggregate>& aggregates,
+void ValidateGroupBy(const std::vector<Aggregate>& aggregates,
                      std::vector<Datum> arguments, std::vector<Datum> keys) {
   ASSERT_OK_AND_ASSIGN(Datum expected, NaiveGroupBy(arguments, keys, aggregates));
 
-  ASSERT_OK_AND_ASSIGN(Datum actual, GroupBy(arguments, keys, aggregates));
+  ASSERT_OK_AND_ASSIGN(Datum actual, internal::GroupBy(arguments, keys, aggregates));
 
   ASSERT_OK(expected.make_array()->ValidateFull());
   ValidateOutput(actual);
@@ -244,7 +244,7 @@ Result<Datum> GroupByTest(const std::vector<Datum>& arguments,
                           const std::vector<Datum>& keys,
                           const std::vector<TestAggregate>& aggregates, bool use_threads,
                           bool use_exec_plan) {
-  std::vector<internal::Aggregate> internal_aggregates;
+  std::vector<Aggregate> internal_aggregates;
   int idx = 0;
   for (auto t_agg : aggregates) {
     internal_aggregates.push_back(
@@ -3458,7 +3458,6 @@ TEST(GroupBy, ConcreteCaseWithValidateGroupBy) {
   std::shared_ptr<CountOptions> non_null =
       std::make_shared<CountOptions>(CountOptions::ONLY_VALID);
 
-  using internal::Aggregate;
   for (auto agg : {
            Aggregate{"hash_sum", nullptr, "agg_0", "hash_sum"},
            Aggregate{"hash_count", non_null, "agg_1", "hash_count"},
@@ -3486,7 +3485,6 @@ TEST(GroupBy, CountNull) {
   std::shared_ptr<CountOptions> skipna =
       std::make_shared<CountOptions>(CountOptions::ONLY_VALID);
 
-  using internal::Aggregate;
   for (auto agg : {
            Aggregate{"hash_count", keepna, "agg_0", "hash_count"},
            Aggregate{"hash_count", skipna, "agg_1", "hash_count"},
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 2295665b161..3cd5f1fcc26 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -675,7 +675,7 @@ Result<int64_t> AsyncScanner::CountRows() {
                                            std::move(fragment_gen)),
                                        options}},
               {"project", compute::ProjectNodeOptions{{options->filter}, {"mask"}}},
-              {"aggregate", compute::AggregateNodeOptions{{compute::internal::Aggregate{
+              {"aggregate", compute::AggregateNodeOptions{{compute::Aggregate{
                                 "sum", nullptr, "mask", "selected_count"}}}},
               {"sink", compute::SinkNodeOptions{&sink_gen}},
           })
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 6dd75db114a..5316f63d080 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1838,7 +1838,7 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) {
   ASSERT_OK_AND_ASSIGN(
       compute::ExecNode * aggregate,
       compute::MakeExecNode("aggregate", plan.get(), {project},
-                            compute::AggregateNodeOptions{{compute::internal::Aggregate{
+                            compute::AggregateNodeOptions{{compute::Aggregate{
                                 "sum", nullptr, "a * 2", "sum(a * 2)"}}}));
 
   // finally, pipe the aggregate node into a sink node
@@ -1928,7 +1928,7 @@ TEST(ScanNode, MinimalGroupedAggEndToEnd) {
       compute::MakeExecNode(
           "aggregate", plan.get(), {project},
           compute::AggregateNodeOptions{
-              {compute::internal::Aggregate{"hash_sum", nullptr, "a * 2", "sum(a * 2)"}},
+              {compute::Aggregate{"hash_sum", nullptr, "a * 2", "sum(a * 2)"}},
               /*keys=*/{"b"}}));
 
   // finally, pipe the aggregate node into a sink node
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 302ac99c36a..9e43eb4eb9c 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2408,7 +2408,7 @@ cdef extern from * namespace "arrow::compute":
 
 cdef extern from "arrow/compute/exec/aggregate.h" namespace \
         "arrow::compute::internal" nogil:
-    cdef cppclass CAggregate "arrow::compute::internal::Aggregate":
+    cdef cppclass CAggregate "arrow::compute::Aggregate":
         c_string function
         shared_ptr[CFunctionOptions] options
 
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index 7b28a700c2b..fc4a41ab03a 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -229,7 +229,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_Project(
 std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(
     const std::shared_ptr<compute::ExecNode>& input, cpp11::list options,
     std::vector<std::string> key_names) {
-  std::vector<arrow::compute::internal::Aggregate> aggregates;
+  std::vector<arrow::compute::Aggregate> aggregates;
 
   for (cpp11::list name_opts : options) {
     auto function = cpp11::as_cpp<std::string>(name_opts["fun"]);
@@ -237,7 +237,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(
     auto target = cpp11::as_cpp<std::string>(name_opts["target"]);
     auto name = cpp11::as_cpp<std::string>(name_opts["name"]);
 
-    aggregates.push_back(arrow::compute::internal::Aggregate{
+    aggregates.push_back(arrow::compute::Aggregate{
         std::move(function), opts.get(), std::move(target), std::move(name)});
   }
 

From 3aa44a3ecec0f181cdf0876b86130c3a7fa3e67f Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Fri, 10 Jun 2022 13:14:24 +0530
Subject: [PATCH 18/32] test commit on fixing cglib

---
 c_glib/arrow-glib/compute.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 1595c745bb9..4044a4955dc 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -1254,7 +1254,7 @@ garrow_aggregate_node_options_new(GList *aggregations,
                                   gsize n_keys,
                                   GError **error)
 {
-  std::vector<arrow::compute::internal::Aggregate> arrow_aggregates;
+  std::vector<arrow::compute::Aggregate> arrow_aggregates;
   std::vector<arrow::FieldRef> arrow_keys;
   for (auto node = aggregations; node; node = node->next) {
     auto aggregation_priv = GARROW_AGGREGATION_GET_PRIVATE(node->data);

From c0d72b470b2b983f0a8e521f6c2de92b766308d1 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Sat, 11 Jun 2022 10:56:29 +0530
Subject: [PATCH 19/32] format and updated the aggregate_benchmark script

---
 cpp/src/arrow/compute/exec/aggregate_node.cc        | 13 ++++++-------
 cpp/src/arrow/compute/exec/options.h                |  4 ++--
 .../arrow/compute/kernels/aggregate_benchmark.cc    |  5 +++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/aggregate_node.cc b/cpp/src/arrow/compute/exec/aggregate_node.cc
index 5d71e23546f..c36da8c13f8 100644
--- a/cpp/src/arrow/compute/exec/aggregate_node.cc
+++ b/cpp/src/arrow/compute/exec/aggregate_node.cc
@@ -49,8 +49,7 @@ Result<std::vector<const HashAggregateKernel*>> GetKernels(
 
 Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
-    const std::vector<Aggregate>& aggregates,
-    const std::vector<ValueDescr>& in_descrs);
+    const std::vector<Aggregate>& aggregates, const std::vector<ValueDescr>& in_descrs);
 
 Result<FieldVector> ResolveKernels(
     const std::vector<Aggregate>& aggregates,
@@ -62,9 +61,10 @@ Result<FieldVector> ResolveKernels(
 
 namespace {
 
-void AggregatesToString(std::stringstream* ss, const Schema& input_schema,
-                        const std::vector<internal::Aggregate>& aggs,
-                        const std::vector<int>& target_field_ids, int indent = 0) {
+void AggregatesToString(
+    std::stringstream* ss, const Schema& input_schema, const std::vector<Aggregate>& aggs,
+    const std::vector<int>& target_field_ids,
+    int indent = 0) {
   *ss << "aggregates=[" << std::endl;
   for (size_t i = 0; i < aggs.size(); i++) {
     for (int j = 0; j < indent; ++j) *ss << "  ";
@@ -83,8 +83,7 @@ class ScalarAggregateNode : public ExecNode {
  public:
   ScalarAggregateNode(ExecPlan* plan, std::vector<ExecNode*> inputs,
                       std::shared_ptr<Schema> output_schema,
-                      std::vector<int> target_field_ids,
-                      std::vector<Aggregate> aggs,
+                      std::vector<int> target_field_ids, std::vector<Aggregate> aggs,
                       std::vector<const ScalarAggregateKernel*> kernels,
                       std::vector<std::vector<std::unique_ptr<KernelState>>> states)
       : ExecNode(plan, std::move(inputs), {"target"},
diff --git a/cpp/src/arrow/compute/exec/options.h b/cpp/src/arrow/compute/exec/options.h
index 48448b69f0f..0e0314eedb4 100644
--- a/cpp/src/arrow/compute/exec/options.h
+++ b/cpp/src/arrow/compute/exec/options.h
@@ -111,8 +111,8 @@ class ARROW_EXPORT ProjectNodeOptions : public ExecNodeOptions {
 /// \brief Make a node which aggregates input batches, optionally grouped by keys.
 class ARROW_EXPORT AggregateNodeOptions : public ExecNodeOptions {
  public:
-  AggregateNodeOptions(std::vector<Aggregate> aggregates,
-                       std::vector<FieldRef> keys = {})
+  explicit AggregateNodeOptions(std::vector<Aggregate> aggregates,
+                                std::vector<FieldRef> keys = {})
       : aggregates(std::move(aggregates)), keys(std::move(keys)) {}
 
   // aggregations which will be applied to the targetted fields
diff --git a/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc b/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
index dfd008de1e5..a8cae5b50ca 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
@@ -306,8 +306,9 @@ BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapVectorizeUnroll<int64_t>)
 // GroupBy
 //
 
-static void BenchmarkGroupBy(benchmark::State& state,
-                             std::vector<Aggregate> aggregates,
+using arrow::compute::internal::GroupBy;
+
+static void BenchmarkGroupBy(benchmark::State& state, std::vector<Aggregate> aggregates,
                              std::vector<Datum> arguments, std::vector<Datum> keys) {
   for (auto _ : state) {
     ABORT_NOT_OK(GroupBy(arguments, keys, aggregates).status());

From 2533d42140af16ffc22fbf4d138ff6ba9aa46c03 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Fri, 17 Jun 2022 06:28:26 +0530
Subject: [PATCH 20/32] temp commit

---
 cpp/src/arrow/compute/api_aggregate.h  | 88 --------------------------
 cpp/src/arrow/compute/exec/aggregate.h |  6 +-
 2 files changed, 3 insertions(+), 91 deletions(-)

diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index dd3bd591a5f..4d16447b8b2 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -408,93 +408,5 @@ ARROW_EXPORT
 Result<Datum> Index(const Datum& value, const IndexOptions& options,
                     ExecContext* ctx = NULLPTR);
 
-
-namespace internal {
-/// Internal use only: streaming group identifier.
-/// Consumes batches of keys and yields batches of the group ids.
-class ARROW_EXPORT Grouper {
- public:
-  virtual ~Grouper() = default;
-
-  /// Construct a Grouper which receives the specified key types
-  static Result<std::unique_ptr<Grouper>> Make(const std::vector<ValueDescr>& descrs,
-                                               ExecContext* ctx = default_exec_context());
-
-  /// Consume a batch of keys, producing the corresponding group ids as an integer array.
-  /// Currently only uint32 indices will be produced, eventually the bit width will only
-  /// be as wide as necessary.
-  virtual Result<Datum> Consume(const ExecBatch& batch) = 0;
-
-  /// Get current unique keys. May be called multiple times.
-  virtual Result<ExecBatch> GetUniques() = 0;
-
-  /// Get the current number of groups.
-  virtual uint32_t num_groups() const = 0;
-
-  /// \brief Assemble lists of indices of identical elements.
-  ///
-  /// \param[in] ids An unsigned, all-valid integral array which will be
-  ///                used as grouping criteria.
-  /// \param[in] num_groups An upper bound for the elements of ids
-  /// \return A num_groups-long ListArray where the slot at i contains a
-  ///         list of indices where i appears in ids.
-  ///
-  ///   MakeGroupings([
-  ///       2,
-  ///       2,
-  ///       5,
-  ///       5,
-  ///       2,
-  ///       3
-  ///   ], 8) == [
-  ///       [],
-  ///       [],
-  ///       [0, 1, 4],
-  ///       [5],
-  ///       [],
-  ///       [2, 3],
-  ///       [],
-  ///       []
-  ///   ]
-  static Result<std::shared_ptr<ListArray>> MakeGroupings(
-      const UInt32Array& ids, uint32_t num_groups,
-      ExecContext* ctx = default_exec_context());
-
-  /// \brief Produce a ListArray whose slots are selections of `array` which correspond to
-  /// the provided groupings.
-  ///
-  /// For example,
-  ///   ApplyGroupings([
-  ///       [],
-  ///       [],
-  ///       [0, 1, 4],
-  ///       [5],
-  ///       [],
-  ///       [2, 3],
-  ///       [],
-  ///       []
-  ///   ], [2, 2, 5, 5, 2, 3]) == [
-  ///       [],
-  ///       [],
-  ///       [2, 2, 2],
-  ///       [3],
-  ///       [],
-  ///       [5, 5],
-  ///       [],
-  ///       []
-  ///   ]
-  static Result<std::shared_ptr<ListArray>> ApplyGroupings(
-      const ListArray& groupings, const Array& array,
-      ExecContext* ctx = default_exec_context());
-};
-
-/// Internal use only: helper function for testing HashAggregateKernels.
-/// This will be replaced by streaming execution operators.
-ARROW_EXPORT
-Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
-                      const std::vector<Aggregate>& aggregates, bool use_threads = false,
-                      ExecContext* ctx = default_exec_context());
-
-}  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/aggregate.h b/cpp/src/arrow/compute/exec/aggregate.h
index 2c62acf2319..67d22d0df41 100644
--- a/cpp/src/arrow/compute/exec/aggregate.h
+++ b/cpp/src/arrow/compute/exec/aggregate.h
@@ -41,16 +41,16 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
                       ExecContext* ctx = default_exec_context());
 
 Result<std::vector<const HashAggregateKernel*>> GetKernels(
-    ExecContext* ctx, const std::vector<internal::Aggregate>& aggregates,
+    ExecContext* ctx, const std::vector<Aggregate>& aggregates,
     const std::vector<ValueDescr>& in_descrs);
 
 Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
-    const std::vector<internal::Aggregate>& aggregates,
+    const std::vector<Aggregate>& aggregates,
     const std::vector<ValueDescr>& in_descrs);
 
 Result<FieldVector> ResolveKernels(
-    const std::vector<internal::Aggregate>& aggregates,
+    const std::vector<Aggregate>& aggregates,
     const std::vector<const HashAggregateKernel*>& kernels,
     const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx,
     const std::vector<ValueDescr>& descrs);

From dda8b851a627bbae8dbe79fd73e91fd5e884c623 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Fri, 17 Jun 2022 07:00:16 +0530
Subject: [PATCH 21/32] rebase

---
 cpp/src/arrow/compute/api_aggregate.h        | 30 ++++++++++----------
 cpp/src/arrow/compute/exec/aggregate.h       |  3 +-
 cpp/src/arrow/compute/exec/aggregate_node.cc | 18 ------------
 3 files changed, 16 insertions(+), 35 deletions(-)

diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 4d16447b8b2..9b71994e5e4 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -56,21 +56,6 @@ class ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
   uint32_t min_count;
 };
 
-/// \brief Configure a grouped aggregation
-struct ARROW_EXPORT Aggregate {
-  /// the name of the aggregation function
-  std::string function;
-
-  /// options for the aggregation function
-  const FunctionOptions* options;
-
-  // fields to which aggregations will be applied
-  FieldRef target;
-
-  // output field name for aggregations
-  std::string name;
-};
-
 /// \brief Control count aggregate kernel behavior.
 ///
 /// By default, only non-null values are counted.
@@ -408,5 +393,20 @@ ARROW_EXPORT
 Result<Datum> Index(const Datum& value, const IndexOptions& options,
                     ExecContext* ctx = NULLPTR);
 
+/// \brief Configure a grouped aggregation
+struct ARROW_EXPORT Aggregate {
+  /// the name of the aggregation function
+  std::string function;
+
+  /// options for the aggregation function
+  const FunctionOptions* options;
+
+  // fields to which aggregations will be applied
+  FieldRef target;
+
+  // output field name for aggregations
+  std::string name;
+};
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/aggregate.h b/cpp/src/arrow/compute/exec/aggregate.h
index 67d22d0df41..753b0a8c47e 100644
--- a/cpp/src/arrow/compute/exec/aggregate.h
+++ b/cpp/src/arrow/compute/exec/aggregate.h
@@ -46,8 +46,7 @@ Result<std::vector<const HashAggregateKernel*>> GetKernels(
 
 Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
-    const std::vector<Aggregate>& aggregates,
-    const std::vector<ValueDescr>& in_descrs);
+    const std::vector<Aggregate>& aggregates, const std::vector<ValueDescr>& in_descrs);
 
 Result<FieldVector> ResolveKernels(
     const std::vector<Aggregate>& aggregates,
diff --git a/cpp/src/arrow/compute/exec/aggregate_node.cc b/cpp/src/arrow/compute/exec/aggregate_node.cc
index c36da8c13f8..7ada036fcc4 100644
--- a/cpp/src/arrow/compute/exec/aggregate_node.cc
+++ b/cpp/src/arrow/compute/exec/aggregate_node.cc
@@ -41,24 +41,6 @@ using internal::checked_cast;
 
 namespace compute {
 
-namespace internal {
-
-Result<std::vector<const HashAggregateKernel*>> GetKernels(
-    ExecContext* ctx, const std::vector<Aggregate>& aggregates,
-    const std::vector<ValueDescr>& in_descrs);
-
-Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
-    const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
-    const std::vector<Aggregate>& aggregates, const std::vector<ValueDescr>& in_descrs);
-
-Result<FieldVector> ResolveKernels(
-    const std::vector<Aggregate>& aggregates,
-    const std::vector<const HashAggregateKernel*>& kernels,
-    const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx,
-    const std::vector<ValueDescr>& descrs);
-
-}  // namespace internal
-
 namespace {
 
 void AggregatesToString(

From c7a5e54009bdef8b730e64c45903aa65ad9f146a Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Tue, 21 Jun 2022 13:08:44 +0530
Subject: [PATCH 22/32] tmp check on function refactor

---
 r/R/query-engine.R | 84 +++++++++++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 39 deletions(-)

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 1a53298fd47..7aaf32ff0cc 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -14,7 +14,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
 ExecPlan <- R6Class("ExecPlan",
   inherit = ArrowObject,
   public = list(
@@ -99,44 +98,8 @@ ExecPlan <- R6Class("ExecPlan",
       }
 
       if (!is.null(.data$aggregations)) {
-        # Project to include just the data required for each aggregation,
-        # plus group_by_vars (last)
-        # TODO: validate that none of names(aggregations) are the same as names(group_by_vars)
-        # dplyr does not error on this but the result it gives isn't great
-        node <- node$Project(summarize_projection(.data))
-
-        if (grouped) {
-          # We need to prefix all of the aggregation function names with "hash_"
-          .data$aggregations <- lapply(.data$aggregations, function(x) {
-            x[["fun"]] <- paste0("hash_", x[["fun"]])
-            x
-          })
-        }
-        target_names <- names(.data$aggregations)
-        for (i in seq_len(length(target_names))) {
-          .data$aggregations[[i]][["name"]] <- .data$aggregations[[i]][["target"]] <- target_names[i]
-        }
-
-        node <- node$Aggregate(
-          options = .data$aggregations,
-          key_names = group_vars
-        )
-
-        if (grouped) {
-          # The result will have result columns first then the grouping cols.
-          # dplyr orders group cols first, so adapt the result to meet that expectation.
-          node <- node$Project(
-            make_field_refs(c(group_vars, names(.data$aggregations)))
-          )
-          if (getOption("arrow.summarise.sort", FALSE)) {
-            # Add sorting instructions for the rows too to match dplyr
-            # (see below about why sorting isn't itself a Node)
-            node$extras$sort <- list(
-              names = group_vars,
-              orders = rep(0L, length(group_vars))
-            )
-          }
-        }
+        list[node, data] <- private$.config_aggregation(node, .data, grouped, group_vars)
+        .data <- data
       } else {
         # If any columns are derived, reordered, or renamed we need to Project
         # If there are aggregations, the projection was already handled above
@@ -258,6 +221,49 @@ ExecPlan <- R6Class("ExecPlan",
       )
     },
     Stop = function() ExecPlan_StopProducing(self)
+  ),
+  private = list(
+    .config_aggregation = function(node, data, grouped, group_vars) {
+      # Project to include just the data required for each aggregation,
+      # plus group_by_vars (last)
+      # TODO: validate that none of names(aggregations) are the same as names(group_by_vars)
+      # dplyr does not error on this but the result it gives isn't great
+      node <- node$Project(summarize_projection(data))
+
+      if (grouped) {
+        # We need to prefix all of the aggregation function names with "hash_"
+        data$aggregations <- lapply(data$aggregations, function(x) {
+          x[["fun"]] <- paste0("hash_", x[["fun"]])
+          x
+        })
+      }
+      target_names <- names(data$aggregations)
+      for (i in seq_len(length(target_names))) {
+        data$aggregations[[i]][["name"]] <- data$aggregations[[i]][["target"]] <- target_names[i]
+      }
+
+      node <- node$Aggregate(
+        options = data$aggregations,
+        key_names = group_vars
+      )
+
+      if (grouped) {
+        # The result will have result columns first then the grouping cols.
+        # dplyr orders group cols first, so adapt the result to meet that expectation.
+        node <- node$Project(
+          make_field_refs(c(group_vars, names(data$aggregations)))
+        )
+        if (getOption("arrow.summarise.sort", FALSE)) {
+          # Add sorting instructions for the rows too to match dplyr
+          # (see below about why sorting isn't itself a Node)
+          node$extras$sort <- list(
+            names = group_vars,
+            orders = rep(0L, length(group_vars))
+          )
+        }
+      }
+      return(list(node = node, data = data))
+    }
   )
 )
 ExecPlan$create <- function(use_threads = option_use_threads()) {

From fd443e0fd2739e2adbd68b627b921453bb85dae0 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Wed, 22 Jun 2022 07:26:51 +0530
Subject: [PATCH 23/32] fixed the test issue

---
 r/R/query-engine.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 7aaf32ff0cc..8d87fa1be16 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -98,8 +98,9 @@ ExecPlan <- R6Class("ExecPlan",
       }
 
       if (!is.null(.data$aggregations)) {
-        list[node, data] <- private$.config_aggregation(node, .data, grouped, group_vars)
-        .data <- data
+        config_agg <- private$.set_aggregation(node, .data, grouped, group_vars)
+        node <- config_agg$node
+        .data <- config_agg$data
       } else {
         # If any columns are derived, reordered, or renamed we need to Project
         # If there are aggregations, the projection was already handled above
@@ -223,7 +224,7 @@ ExecPlan <- R6Class("ExecPlan",
     Stop = function() ExecPlan_StopProducing(self)
   ),
   private = list(
-    .config_aggregation = function(node, data, grouped, group_vars) {
+    .set_aggregation = function(node, data, grouped, group_vars) {
       # Project to include just the data required for each aggregation,
       # plus group_by_vars (last)
       # TODO: validate that none of names(aggregations) are the same as names(group_by_vars)

From d53c68520001fac74331ce44a8eeb755e3332768 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Wed, 22 Jun 2022 10:52:41 +0530
Subject: [PATCH 24/32] rebase

---
 .../execution_plan_documentation_examples.cc  |   2 +-
 cpp/src/arrow/compute/api_aggregate.h         |   2 +-
 cpp/src/arrow/compute/exec/aggregate.cc       |   5 +-
 cpp/src/arrow/compute/exec/aggregate_node.cc  |   7 +-
 cpp/src/arrow/compute/exec/plan_test.cc       |  31 +-
 .../compute/kernels/hash_aggregate_test.cc    | 313 +++++++++---------
 r/src/compute-exec.cpp                        |   2 +-
 7 files changed, 182 insertions(+), 180 deletions(-)

diff --git a/cpp/examples/arrow/execution_plan_documentation_examples.cc b/cpp/examples/arrow/execution_plan_documentation_examples.cc
index 44bff56e00c..331388a5291 100644
--- a/cpp/examples/arrow/execution_plan_documentation_examples.cc
+++ b/cpp/examples/arrow/execution_plan_documentation_examples.cc
@@ -540,7 +540,7 @@ arrow::Status SourceGroupAggregateSinkExample(cp::ExecContext& exec_context) {
                         cp::MakeExecNode("source", plan.get(), {}, source_node_options));
   auto options = std::make_shared<cp::CountOptions>(cp::CountOptions::ONLY_VALID);
   auto aggregate_options =
-      cp::AggregateNodeOptions{/*aggregates=*/{{"hash_count", &options, "a", "count(a)"}},
+      cp::AggregateNodeOptions{/*aggregates=*/{{"hash_count", options, "a", "count(a)"}},
                                /*keys=*/{"b"}};
   ARROW_ASSIGN_OR_RAISE(
       cp::ExecNode * aggregate,
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 9b71994e5e4..55f434c6659 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -399,7 +399,7 @@ struct ARROW_EXPORT Aggregate {
   std::string function;
 
   /// options for the aggregation function
-  const FunctionOptions* options;
+  std::shared_ptr<FunctionOptions> options;
 
   // fields to which aggregations will be applied
   FieldRef target;
diff --git a/cpp/src/arrow/compute/exec/aggregate.cc b/cpp/src/arrow/compute/exec/aggregate.cc
index 934cdd4d1fd..41b5bb75b66 100644
--- a/cpp/src/arrow/compute/exec/aggregate.cc
+++ b/cpp/src/arrow/compute/exec/aggregate.cc
@@ -22,6 +22,7 @@
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/grouper.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/task_group.h"
 
 namespace arrow {
@@ -55,7 +56,9 @@ Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
   std::vector<std::unique_ptr<KernelState>> states(kernels.size());
 
   for (size_t i = 0; i < aggregates.size(); ++i) {
-    const FunctionOptions* options = aggregates[i].options.get();
+    const FunctionOptions* options =
+        arrow::internal::checked_cast<const FunctionOptions*>(
+            aggregates[i].options.get());
 
     if (options == nullptr) {
       // use known default options for the named function if possible
diff --git a/cpp/src/arrow/compute/exec/aggregate_node.cc b/cpp/src/arrow/compute/exec/aggregate_node.cc
index 7ada036fcc4..8c7899c41ec 100644
--- a/cpp/src/arrow/compute/exec/aggregate_node.cc
+++ b/cpp/src/arrow/compute/exec/aggregate_node.cc
@@ -43,10 +43,9 @@ namespace compute {
 
 namespace {
 
-void AggregatesToString(
-    std::stringstream* ss, const Schema& input_schema, const std::vector<Aggregate>& aggs,
-    const std::vector<int>& target_field_ids,
-    int indent = 0) {
+void AggregatesToString(std::stringstream* ss, const Schema& input_schema,
+                        const std::vector<Aggregate>& aggs,
+                        const std::vector<int>& target_field_ids, int indent = 0) {
   *ss << "aggregates=[" << std::endl;
   for (size_t i = 0; i < aggs.size(); i++) {
     for (int j = 0; j < indent; ++j) *ss << "  ";
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index 4ffee8bdc13..9efa6623e5a 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -1152,19 +1152,18 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
 
     {
       auto options = std::make_shared<TDigestOptions>(TDigestOptions::Defaults());
-      ASSERT_OK(
-          Declaration::Sequence(
-              {
-                  {"source",
-                   SourceNodeOptions{basic_data.schema, basic_data.gen(/*parallel=*/false,
-                                                                       /*slow=*/false)}},
-                  {"aggregate",
-                   AggregateNodeOptions{
-                       /*aggregates=*/{{"tdigest", options, "i32", "tdigest(i32)"}},
-                   }},
-                  {"sink", SinkNodeOptions{&sink_gen}},
-              })
-              .AddToPlan(plan.get()));
+      ASSERT_OK(Declaration::Sequence(
+                    {
+                        {"source", SourceNodeOptions{basic_data.schema,
+                                                     basic_data.gen(/*parallel=*/false,
+                                                                    /*slow=*/false)}},
+                        {"aggregate",
+                         AggregateNodeOptions{
+                             /*aggregates=*/{{"tdigest", options, "i32", "tdigest(i32)"}},
+                         }},
+                        {"sink", SinkNodeOptions{&sink_gen}},
+                    })
+                    .AddToPlan(plan.get()));
     }
 
     ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
@@ -1186,9 +1185,9 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
                   {"source", SourceNodeOptions{data.schema, data.gen(/*parallel=*/false,
                                                                      /*slow=*/false)}},
                   {"aggregate",
-                   AggregateNodeOptions{/*aggregates=*/{{"hash_count", options,
-                                                         "i32", "count(i32)"}},
-                                        /*keys=*/{"str"}}},
+                   AggregateNodeOptions{
+                       /*aggregates=*/{{"hash_count", options, "i32", "count(i32)"}},
+                       /*keys=*/{"str"}}},
                   {"sink", SinkNodeOptions{&sink_gen}},
               })
               .AddToPlan(plan.get()));
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index c6e87e5e368..82d40aba948 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -237,13 +237,13 @@ ExecContext* small_chunksize_context(bool use_threads = false) {
 
 struct TestAggregate {
   std::string function;
-  const FunctionOptions* options;
+  std::shared_ptr<FunctionOptions> options;
 };
 
 Result<Datum> GroupByTest(const std::vector<Datum>& arguments,
                           const std::vector<Datum>& keys,
                           const std::vector<TestAggregate>& aggregates, bool use_threads,
-                          bool use_exec_plan) {
+                          bool use_exec_plan = false) {
   std::vector<Aggregate> internal_aggregates;
   int idx = 0;
   for (auto t_agg : aggregates) {
@@ -1029,17 +1029,17 @@ TEST(GroupBy, MeanOnly) {
     [null,  3]
                         ])"});
 
-    ScalarAggregateOptions min_count(/*skip_nulls=*/true, /*min_count=*/3);
-    ASSERT_OK_AND_ASSIGN(
-        Datum aggregated_and_grouped,
-        internal::GroupBy(
-            {table->GetColumnByName("argument"), table->GetColumnByName("argument")},
-            {table->GetColumnByName("key")},
-            {
-                {"hash_mean", nullptr, "agg_0", "hash_mean"},
-                {"hash_mean", min_count, "agg_1", "hash_mean"},
-            },
-            use_threads));
+    auto min_count =
+        std::make_shared<ScalarAggregateOptions>(/*skip_nulls=*/true, /*min_count=*/3);
+    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                         GroupByTest({table->GetColumnByName("argument"),
+                                      table->GetColumnByName("argument")},
+                                     {table->GetColumnByName("key")},
+                                     {
+                                         {"hash_mean", nullptr},
+                                         {"hash_mean", min_count},
+                                     },
+                                     use_threads));
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsApproxEqual(ArrayFromJSON(struct_({
@@ -1111,7 +1111,7 @@ TEST(GroupBy, VarianceAndStddev) {
   ])");
 
   ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                       internal::GroupBy(
+                       GroupByTest(
                            {
                                batch->GetColumnByName("argument"),
                                batch->GetColumnByName("argument"),
@@ -1120,9 +1120,10 @@ TEST(GroupBy, VarianceAndStddev) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_variance", nullptr, "agg_0", "hash_variance_0"},
-                               {"hash_stddev", nullptr, "agg_1", "hash_stddev_1"},
-                           }));
+                               {"hash_variance", nullptr},
+                               {"hash_stddev", nullptr},
+                           },
+                           false));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
                                             field("hash_variance", float64()),
@@ -1152,19 +1153,19 @@ TEST(GroupBy, VarianceAndStddev) {
     [null,  3]
   ])");
 
-  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped,
-                       internal::GroupBy(
-                           {
-                               batch->GetColumnByName("argument"),
-                               batch->GetColumnByName("argument"),
-                           },
-                           {
-                               batch->GetColumnByName("key"),
-                           },
-                           {
-                               {"hash_variance", nullptr, "agg_0", "hash_variance_0"},
-                               {"hash_stddev", nullptr, "agg_1", "hash_stddev_1"},
-                           }));
+  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped, GroupByTest(
+                                                   {
+                                                       batch->GetColumnByName("argument"),
+                                                       batch->GetColumnByName("argument"),
+                                                   },
+                                                   {
+                                                       batch->GetColumnByName("key"),
+                                                   },
+                                                   {
+                                                       {"hash_variance", nullptr},
+                                                       {"hash_stddev", nullptr},
+                                                   },
+                                                   false));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
                                             field("hash_variance", float64()),
@@ -1181,21 +1182,21 @@ TEST(GroupBy, VarianceAndStddev) {
                           /*verbose=*/true);
 
   // Test ddof
-  VarianceOptions variance_options(/*ddof=*/2);
-  ASSERT_OK_AND_ASSIGN(
-      aggregated_and_grouped,
-      internal::GroupBy(
-          {
-              batch->GetColumnByName("argument"),
-              batch->GetColumnByName("argument"),
-          },
-          {
-              batch->GetColumnByName("key"),
-          },
-          {
-              {"hash_variance", variance_options, "agg_0", "hash_variance_0"},
-              {"hash_stddev", variance_options, "agg_1", "hash_stddev_0"},
-          }));
+  auto variance_options = std::make_shared<VarianceOptions>(/*ddof=*/2);
+  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped,
+                       GroupByTest(
+                           {
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_variance", variance_options},
+                               {"hash_stddev", variance_options},
+                           },
+                           false));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
                                             field("hash_variance", float64()),
@@ -1228,7 +1229,7 @@ TEST(GroupBy, VarianceAndStddevDecimal) {
   ])");
 
   ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                       internal::GroupBy(
+                       GroupByTest(
                            {
                                batch->GetColumnByName("argument0"),
                                batch->GetColumnByName("argument0"),
@@ -1239,11 +1240,12 @@ TEST(GroupBy, VarianceAndStddevDecimal) {
                                batch->GetColumnByName("key"),
                            },
                            {
-                               {"hash_variance", nullptr, "agg_0", "hash_variance_0"},
-                               {"hash_stddev", nullptr, "agg_1", "hash_stddev_1"},
-                               {"hash_variance", nullptr, "agg_2", "hash_variance_2"},
-                               {"hash_stddev", nullptr, "agg_3", "hash_stddev_3"},
-                           }));
+                               {"hash_variance", nullptr},
+                               {"hash_stddev", nullptr},
+                               {"hash_variance", nullptr},
+                               {"hash_stddev", nullptr},
+                           },
+                           false));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
                                             field("hash_variance", float64()),
@@ -1280,37 +1282,41 @@ TEST(GroupBy, TDigest) {
     [null, 4]
   ])");
 
-  TDigestOptions options1(std::vector<double>{0.5, 0.9, 0.99});
-  TDigestOptions options2(std::vector<double>{0.5, 0.9, 0.99}, /*delta=*/50,
-                          /*buffer_size=*/1024);
-  TDigestOptions keep_nulls(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
-                            /*skip_nulls=*/false, /*min_count=*/0);
-  TDigestOptions min_count(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
-                           /*skip_nulls=*/true, /*min_count=*/3);
-  TDigestOptions keep_nulls_min_count(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
-                                      /*skip_nulls=*/false, /*min_count=*/3);
-  ASSERT_OK_AND_ASSIGN(
-      Datum aggregated_and_grouped,
-      internal::GroupBy(
-          {
-              batch->GetColumnByName("argument"),
-              batch->GetColumnByName("argument"),
-              batch->GetColumnByName("argument"),
-              batch->GetColumnByName("argument"),
-              batch->GetColumnByName("argument"),
-              batch->GetColumnByName("argument"),
-          },
-          {
-              batch->GetColumnByName("key"),
-          },
-          {
-              {"hash_tdigest", nullptr, "agg_0", "hash_tdigest_0"},
-              {"hash_tdigest", options1, "agg_1", "hash_tdigest_1"},
-              {"hash_tdigest", options2, "agg_2", "hash_tdigest_2"},
-              {"hash_tdigest", keep_nulls, "agg_3", "hash_tdigest_3"},
-              {"hash_tdigest", min_count, "agg_4", "hash_tdigest_4"},
-              {"hash_tdigest", keep_nulls_min_count, "agg_5", "hash_tdigest_5"},
-          }));
+  auto options1 = std::make_shared<TDigestOptions>(std::vector<double>{0.5, 0.9, 0.99});
+  auto options2 =
+      std::make_shared<TDigestOptions>(std::vector<double>{0.5, 0.9, 0.99}, /*delta=*/50,
+                                       /*buffer_size=*/1024);
+  auto keep_nulls =
+      std::make_shared<TDigestOptions>(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
+                                       /*skip_nulls=*/false, /*min_count=*/0);
+  auto min_count =
+      std::make_shared<TDigestOptions>(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
+                                       /*skip_nulls=*/true, /*min_count=*/3);
+  auto keep_nulls_min_count =
+      std::make_shared<TDigestOptions>(/*q=*/0.5, /*delta=*/100, /*buffer_size=*/500,
+                                       /*skip_nulls=*/false, /*min_count=*/3);
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       GroupByTest(
+                           {
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_tdigest", nullptr},
+                               {"hash_tdigest", options1},
+                               {"hash_tdigest", options2},
+                               {"hash_tdigest", keep_nulls},
+                               {"hash_tdigest", min_count},
+                               {"hash_tdigest", keep_nulls_min_count},
+                           },
+                           false));
 
   AssertDatumsApproxEqual(
       ArrayFromJSON(struct_({
@@ -1349,16 +1355,17 @@ TEST(GroupBy, TDigestDecimal) {
   ])");
 
   ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                       internal::GroupBy(
+                       GroupByTest(
                            {
                                batch->GetColumnByName("argument0"),
                                batch->GetColumnByName("argument1"),
                            },
                            {batch->GetColumnByName("key")},
                            {
-                               {"hash_tdigest", nullptr, "agg_0", "hash_tdigest_0"},
-                               {"hash_tdigest", nullptr, "agg_1", "hash_tdigest_1"},
-                           }));
+                               {"hash_tdigest", nullptr},
+                               {"hash_tdigest", nullptr},
+                           },
+                           false));
 
   AssertDatumsApproxEqual(
       ArrayFromJSON(struct_({
@@ -1403,7 +1410,7 @@ TEST(GroupBy, ApproximateMedian) {
     auto keep_nulls_min_count = std::make_shared<ScalarAggregateOptions>(
         /*skip_nulls=*/false, /*min_count=*/3);
     ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                         internal::GroupBy(
+                         GroupByTest(
                              {
                                  batch->GetColumnByName("argument"),
                                  batch->GetColumnByName("argument"),
@@ -1414,15 +1421,12 @@ TEST(GroupBy, ApproximateMedian) {
                                  batch->GetColumnByName("key"),
                              },
                              {
-                                 {"hash_approximate_median", options, "agg_0",
-                                  "hash_approximate_median_0"},
-                                 {"hash_approximate_median", keep_nulls, "agg_1",
-                                  "hash_approximate_median_1"},
-                                 {"hash_approximate_median", min_count, "agg_2",
-                                  "hash_approximate_median_2"},
-                                 {"hash_approximate_median", keep_nulls_min_count,
-                                  "agg_3", "hash_approximate_median_3"},
-                             }));
+                                 {"hash_approximate_median", options},
+                                 {"hash_approximate_median", keep_nulls},
+                                 {"hash_approximate_median", min_count},
+                                 {"hash_approximate_median", keep_nulls_min_count},
+                             },
+                             false));
 
     AssertDatumsApproxEqual(ArrayFromJSON(struct_({
                                               field("hash_approximate_median", float64()),
@@ -3670,9 +3674,9 @@ TEST(GroupBy, CountWithNullType) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_count", all, "agg_0", "hash_count"},
-                                   {"hash_count", only_valid, "agg_1", "hash_count"},
-                                   {"hash_count", only_null, "agg_2", "hash_count"},
+                                   {"hash_count", all},
+                                   {"hash_count", only_valid},
+                                   {"hash_count", only_null},
                                },
                                use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
@@ -3715,9 +3719,9 @@ TEST(GroupBy, CountWithNullTypeEmptyTable) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_count", all, "agg_0", "hash_count"},
-                                   {"hash_count", only_valid, "agg_1", "hash_count"},
-                                   {"hash_count", only_null, "agg_2", "hash_count"},
+                                   {"hash_count", all},
+                                   {"hash_count", only_valid},
+                                   {"hash_count", only_null},
                                },
                                use_threads, use_exec_plan));
       auto struct_arr = aggregated_and_grouped.array_as<StructArray>();
@@ -3881,23 +3885,22 @@ TEST(GroupBy, SumNullType) {
   for (bool use_exec_plan : {false, true}) {
     for (bool use_threads : {true, false}) {
       SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-      ASSERT_OK_AND_ASSIGN(
-          Datum aggregated_and_grouped,
-          GroupByTest(
-              {
-                  table->GetColumnByName("argument"),
-                  table->GetColumnByName("argument"),
-                  table->GetColumnByName("argument"),
-                  table->GetColumnByName("argument"),
-              },
-              {table->GetColumnByName("key")},
-              {
-                  {"hash_sum", no_min, "agg_0", "hash_sum"},
-                  {"hash_sum", keep_nulls, "agg_1", "hash_sum"},
-                  {"hash_sum", min_count, "agg_2", "hash_sum"},
-                  {"hash_sum", keep_nulls_min_count, "agg_3", "hash_sum"},
-              },
-              use_threads, use_exec_plan));
+      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                           GroupByTest(
+                               {
+                                   table->GetColumnByName("argument"),
+                                   table->GetColumnByName("argument"),
+                                   table->GetColumnByName("argument"),
+                                   table->GetColumnByName("argument"),
+                               },
+                               {table->GetColumnByName("key")},
+                               {
+                                   {"hash_sum", no_min},
+                                   {"hash_sum", keep_nulls},
+                                   {"hash_sum", min_count},
+                                   {"hash_sum", keep_nulls_min_count},
+                               },
+                               use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
 
       AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -3950,23 +3953,22 @@ TEST(GroupBy, ProductNullType) {
   for (bool use_exec_plan : {false, true}) {
     for (bool use_threads : {true, false}) {
       SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-      ASSERT_OK_AND_ASSIGN(
-          Datum aggregated_and_grouped,
-          GroupByTest(
-              {
-                  table->GetColumnByName("argument"),
-                  table->GetColumnByName("argument"),
-                  table->GetColumnByName("argument"),
-                  table->GetColumnByName("argument"),
-              },
-              {table->GetColumnByName("key")},
-              {
-                  {"hash_product", no_min, "agg_0", "hash_product"},
-                  {"hash_product", keep_nulls, "agg_1", "hash_product"},
-                  {"hash_product", min_count, "agg_2", "hash_product"},
-                  {"hash_product", keep_nulls_min_count, "agg_3", "hash_product"},
-              },
-              use_threads, use_exec_plan));
+      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                           GroupByTest(
+                               {
+                                   table->GetColumnByName("argument"),
+                                   table->GetColumnByName("argument"),
+                                   table->GetColumnByName("argument"),
+                                   table->GetColumnByName("argument"),
+                               },
+                               {table->GetColumnByName("key")},
+                               {
+                                   {"hash_product", no_min},
+                                   {"hash_product", keep_nulls},
+                                   {"hash_product", min_count},
+                                   {"hash_product", keep_nulls_min_count},
+                               },
+                               use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
 
       AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -4019,23 +4021,22 @@ TEST(GroupBy, MeanNullType) {
   for (bool use_exec_plan : {false, true}) {
     for (bool use_threads : {true, false}) {
       SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-      ASSERT_OK_AND_ASSIGN(
-          Datum aggregated_and_grouped,
-          GroupByTest(
-              {
-                  table->GetColumnByName("argument"),
-                  table->GetColumnByName("argument"),
-                  table->GetColumnByName("argument"),
-                  table->GetColumnByName("argument"),
-              },
-              {table->GetColumnByName("key")},
-              {
-                  {"hash_mean", no_min, "agg_0", "hash_mean"},
-                  {"hash_mean", keep_nulls, "agg_1", "hash_mean"},
-                  {"hash_mean", min_count, "agg_2", "hash_mean"},
-                  {"hash_mean", keep_nulls_min_count, "agg_3", "hash_mean"},
-              },
-              use_threads, use_exec_plan));
+      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                           GroupByTest(
+                               {
+                                   table->GetColumnByName("argument"),
+                                   table->GetColumnByName("argument"),
+                                   table->GetColumnByName("argument"),
+                                   table->GetColumnByName("argument"),
+                               },
+                               {table->GetColumnByName("key")},
+                               {
+                                   {"hash_mean", no_min},
+                                   {"hash_mean", keep_nulls},
+                                   {"hash_mean", min_count},
+                                   {"hash_mean", keep_nulls_min_count},
+                               },
+                               use_threads, use_exec_plan));
       SortBy({"key_0"}, &aggregated_and_grouped);
 
       AssertDatumsEqual(ArrayFromJSON(struct_({
@@ -4082,9 +4083,9 @@ TEST(GroupBy, NullTypeEmptyTable) {
                                },
                                {table->GetColumnByName("key")},
                                {
-                                   {"hash_sum", no_min, "agg_0", "hash_sum"},
-                                   {"hash_product", min_count, "agg_1", "hash_product"},
-                                   {"hash_mean", keep_nulls, "agg_2", "hash_mean"},
+                                   {"hash_sum", no_min},
+                                   {"hash_product", min_count},
+                                   {"hash_mean", keep_nulls},
                                },
                                use_threads, use_exec_plan));
       auto struct_arr = aggregated_and_grouped.array_as<StructArray>();
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index fc4a41ab03a..c725c3379a6 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -238,7 +238,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(
     auto name = cpp11::as_cpp<std::string>(name_opts["name"]);
 
     aggregates.push_back(arrow::compute::Aggregate{
-        std::move(function), opts.get(), std::move(target), std::move(name)});
+        std::move(function), opts, std::move(target), std::move(name)});
   }
 
   std::vector<arrow::FieldRef> keys;

From 3ebc09a5243ca9c4b98526f682cc3db7444cdf37 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Wed, 22 Jun 2022 11:16:13 +0530
Subject: [PATCH 25/32] fix R format issue

---
 r/src/compute-exec.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index c725c3379a6..76112b4cefd 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -237,8 +237,8 @@ std::shared_ptr<compute::ExecNode> ExecNode_Aggregate(
     auto target = cpp11::as_cpp<std::string>(name_opts["target"]);
     auto name = cpp11::as_cpp<std::string>(name_opts["name"]);
 
-    aggregates.push_back(arrow::compute::Aggregate{
-        std::move(function), opts, std::move(target), std::move(name)});
+    aggregates.push_back(arrow::compute::Aggregate{std::move(function), opts,
+                                                   std::move(target), std::move(name)});
   }
 
   std::vector<arrow::FieldRef> keys;

From 5754ebb7e8dce91aff14fd88de2a7ef376e7b08e Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Wed, 22 Jun 2022 13:50:21 +0530
Subject: [PATCH 26/32] test fix for cglib

---
 c_glib/arrow-glib/compute.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 4044a4955dc..1fff429b5a2 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -1271,7 +1271,7 @@ garrow_aggregate_node_options_new(GList *aggregations,
     } else {
       arrow_aggregates.push_back({aggregation_priv->function, nullptr});
     };
-    if (!garrow_field_refs_add(arrow_targets,
+    if (!garrow_field_refs_add(aggregation_priv->output,
                                aggregation_priv->input,
                                error,
                                "[aggregate-node-options][new][input]")) {
@@ -1280,7 +1280,7 @@ garrow_aggregate_node_options_new(GList *aggregations,
     arrow_aggregates.push_back({
                                   aggregation_priv->function,
                                   function_options,
-                                  arrow_targets[0],
+                                  aggregation_priv->output,
                                   aggregation_priv->output
                                });
   }

From dfef8eceb1319cc31db764c7a8c50bdfad44609b Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Wed, 22 Jun 2022 19:30:23 +0530
Subject: [PATCH 27/32] typo fix in tpch bench script

---
 cpp/src/arrow/compute/exec/tpch_benchmark.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/exec/tpch_benchmark.cc b/cpp/src/arrow/compute/exec/tpch_benchmark.cc
index b998da5eded..54ac7cbdbf5 100644
--- a/cpp/src/arrow/compute/exec/tpch_benchmark.cc
+++ b/cpp/src/arrow/compute/exec/tpch_benchmark.cc
@@ -77,7 +77,7 @@ std::shared_ptr<ExecPlan> Plan_Q1(AsyncGenerator<util::optional<ExecBatch>>* sin
   auto sum_opts =
       std::make_shared<ScalarAggregateOptions>(ScalarAggregateOptions::Defaults());
   auto count_opts = std::make_shared<CountOptions>(CountOptions::CountMode::ALL);
-  std::vector<arrow::compute::internal::Aggregate> aggs = {
+  std::vector<arrow::compute::Aggregate> aggs = {
       {"hash_sum", sum_opts, "sum_qty", "sum_qty"},
       {"hash_sum", sum_opts, "sum_base_price", "sum_base_price"},
       {"hash_sum", sum_opts, "sum_disc_price", "sum_disc_price"},

From 038595ba499fc2de4338b8000b4dc7d3012ff142 Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Thu, 23 Jun 2022 12:59:13 +0530
Subject: [PATCH 28/32] partition the function further

---
 r/R/query-engine.R | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 8d87fa1be16..860d947cd48 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -224,13 +224,7 @@ ExecPlan <- R6Class("ExecPlan",
     Stop = function() ExecPlan_StopProducing(self)
   ),
   private = list(
-    .set_aggregation = function(node, data, grouped, group_vars) {
-      # Project to include just the data required for each aggregation,
-      # plus group_by_vars (last)
-      # TODO: validate that none of names(aggregations) are the same as names(group_by_vars)
-      # dplyr does not error on this but the result it gives isn't great
-      node <- node$Project(summarize_projection(data))
-
+    .set_aggregate_func_names = function(data, grouped) {
       if (grouped) {
         # We need to prefix all of the aggregation function names with "hash_"
         data$aggregations <- lapply(data$aggregations, function(x) {
@@ -238,16 +232,9 @@ ExecPlan <- R6Class("ExecPlan",
           x
         })
       }
-      target_names <- names(data$aggregations)
-      for (i in seq_len(length(target_names))) {
-        data$aggregations[[i]][["name"]] <- data$aggregations[[i]][["target"]] <- target_names[i]
-      }
-
-      node <- node$Aggregate(
-        options = data$aggregations,
-        key_names = group_vars
-      )
-
+      data
+    },
+    .set_group_by = function(node, data, group_vars, grouped) {
       if (grouped) {
         # The result will have result columns first then the grouping cols.
         # dplyr orders group cols first, so adapt the result to meet that expectation.
@@ -264,6 +251,27 @@ ExecPlan <- R6Class("ExecPlan",
         }
       }
       return(list(node = node, data = data))
+    },
+    .set_aggregation = function(node, data, grouped, group_vars) {
+      # Project to include just the data required for each aggregation,
+      # plus group_by_vars (last)
+      # TODO: validate that none of names(aggregations) are the same as names(group_by_vars)
+      # dplyr does not error on this but the result it gives isn't great
+      node <- node$Project(summarize_projection(data))
+
+      data <- private$.set_aggregate_func_names(data, grouped)
+
+      target_names <- names(data$aggregations)
+      for (i in seq_len(length(target_names))) {
+        data$aggregations[[i]][["name"]] <- data$aggregations[[i]][["target"]] <- target_names[i]
+      }
+
+      node <- node$Aggregate(
+        options = data$aggregations,
+        key_names = group_vars
+      )
+      group_config <- private$.set_group_by(node, data, group_vars, grouped)
+      return(list(node = group_config$node, data = group_config$data))
     }
   )
 )

From 764927a3b58fc7238a8a8cec85971582aca91b8d Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Thu, 23 Jun 2022 18:25:49 +0530
Subject: [PATCH 29/32] added the overwritten patch

---
 c_glib/arrow-glib/compute.cpp | 21 +++++++--------------
 c_glib/arrow-glib/compute.h   |  2 +-
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 1fff429b5a2..193ba2837b9 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -1263,26 +1263,19 @@ garrow_aggregate_node_options_new(GList *aggregations,
       function_options =
         garrow_function_options_get_raw(aggregation_priv->options);
     };
-    if (function_options) {
-      arrow_aggregates.push_back({
-        aggregation_priv->function,
-        function_options->Copy(),
-      });
-    } else {
-      arrow_aggregates.push_back({aggregation_priv->function, nullptr});
-    };
-    if (!garrow_field_refs_add(aggregation_priv->output,
+    std::vector<arrow::FieldRef> arrow_targets;
+    if (!garrow_field_refs_add(arrow_targets,
                                aggregation_priv->input,
                                error,
                                "[aggregate-node-options][new][input]")) {
       return NULL;
     }
     arrow_aggregates.push_back({
-                                  aggregation_priv->function,
-                                  function_options,
-                                  aggregation_priv->output,
-                                  aggregation_priv->output
-                               });
+      aggregation_priv->function,
+      function_options ? function_options->Copy() : nullptr,
+      arrow_targets[0],
+      aggregation_priv->output,
+    });
   }
   for (gsize i = 0; i < n_keys; ++i) {
     if (!garrow_field_refs_add(arrow_keys,
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 818b76ed72c..32db15be8b0 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -755,7 +755,7 @@ garrow_quantile_options_get_qs(GArrowQuantileOptions *options,
 GARROW_AVAILABLE_IN_9_0
 void
 garrow_quantile_options_set_q(GArrowQuantileOptions *options,
-                              gdouble quantile);
+                              gdouble q);
 GARROW_AVAILABLE_IN_9_0
 void
 garrow_quantile_options_set_qs(GArrowQuantileOptions *options,

From 4ab7b83dbd2985952585462ababf8d77a60f69ff Mon Sep 17 00:00:00 2001
From: Vibhatha Abeykoon <vibhatha@gmail.com>
Date: Thu, 23 Jun 2022 21:17:49 +0530
Subject: [PATCH 30/32] removing the splitted function

---
 r/R/query-engine.R | 93 +++++++++++++++++++---------------------------
 1 file changed, 39 insertions(+), 54 deletions(-)

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 860d947cd48..b033d86604e 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -98,9 +98,45 @@ ExecPlan <- R6Class("ExecPlan",
       }
 
       if (!is.null(.data$aggregations)) {
-        config_agg <- private$.set_aggregation(node, .data, grouped, group_vars)
-        node <- config_agg$node
-        .data <- config_agg$data
+        # Project to include just the data required for each aggregation,
+        # plus group_by_vars (last)
+        # TODO: validate that none of names(aggregations) are the same as names(group_by_vars)
+        # dplyr does not error on this but the result it gives isn't great
+        node <- node$Project(summarize_projection(.data))
+
+        if (grouped) {
+          # We need to prefix all of the aggregation function names with "hash_"
+          .data$aggregations <- lapply(.data$aggregations, function(x) {
+            x[["fun"]] <- paste0("hash_", x[["fun"]])
+            x
+          })
+        }
+
+        target_names <- names(.data$aggregations)
+        for (i in seq_len(length(target_names))) {
+          .data$aggregations[[i]][["name"]] <- .data$aggregations[[i]][["target"]] <- target_names[i]
+        }
+
+        node <- node$Aggregate(
+          options = .data$aggregations,
+          key_names = group_vars
+        )
+
+        if (grouped) {
+          # The result will have result columns first then the grouping cols.
+          # dplyr orders group cols first, so adapt the result to meet that expectation.
+          node <- node$Project(
+            make_field_refs(c(group_vars, names(.data$aggregations)))
+          )
+          if (getOption("arrow.summarise.sort", FALSE)) {
+            # Add sorting instructions for the rows too to match dplyr
+            # (see below about why sorting isn't itself a Node)
+            node$extras$sort <- list(
+              names = group_vars,
+              orders = rep(0L, length(group_vars))
+            )
+          }
+        }
       } else {
         # If any columns are derived, reordered, or renamed we need to Project
         # If there are aggregations, the projection was already handled above
@@ -222,57 +258,6 @@ ExecPlan <- R6Class("ExecPlan",
       )
     },
     Stop = function() ExecPlan_StopProducing(self)
-  ),
-  private = list(
-    .set_aggregate_func_names = function(data, grouped) {
-      if (grouped) {
-        # We need to prefix all of the aggregation function names with "hash_"
-        data$aggregations <- lapply(data$aggregations, function(x) {
-          x[["fun"]] <- paste0("hash_", x[["fun"]])
-          x
-        })
-      }
-      data
-    },
-    .set_group_by = function(node, data, group_vars, grouped) {
-      if (grouped) {
-        # The result will have result columns first then the grouping cols.
-        # dplyr orders group cols first, so adapt the result to meet that expectation.
-        node <- node$Project(
-          make_field_refs(c(group_vars, names(data$aggregations)))
-        )
-        if (getOption("arrow.summarise.sort", FALSE)) {
-          # Add sorting instructions for the rows too to match dplyr
-          # (see below about why sorting isn't itself a Node)
-          node$extras$sort <- list(
-            names = group_vars,
-            orders = rep(0L, length(group_vars))
-          )
-        }
-      }
-      return(list(node = node, data = data))
-    },
-    .set_aggregation = function(node, data, grouped, group_vars) {
-      # Project to include just the data required for each aggregation,
-      # plus group_by_vars (last)
-      # TODO: validate that none of names(aggregations) are the same as names(group_by_vars)
-      # dplyr does not error on this but the result it gives isn't great
-      node <- node$Project(summarize_projection(data))
-
-      data <- private$.set_aggregate_func_names(data, grouped)
-
-      target_names <- names(data$aggregations)
-      for (i in seq_len(length(target_names))) {
-        data$aggregations[[i]][["name"]] <- data$aggregations[[i]][["target"]] <- target_names[i]
-      }
-
-      node <- node$Aggregate(
-        options = data$aggregations,
-        key_names = group_vars
-      )
-      group_config <- private$.set_group_by(node, data, group_vars, grouped)
-      return(list(node = group_config$node, data = group_config$data))
-    }
   )
 )
 ExecPlan$create <- function(use_threads = option_use_threads()) {

From 5c367b566ba2829c6758bda497454557afdca4d6 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 23 Jun 2022 14:42:44 -0400
Subject: [PATCH 31/32] Apply suggested imap() patch to reduce (detected)
 complexity. Update lintr config for 3.0.0

---
 r/.lintr           |  6 +++---
 r/R/query-engine.R | 10 ++++++----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/r/.lintr b/r/.lintr
index 0298fd7f995..619339afca3 100644
--- a/r/.lintr
+++ b/r/.lintr
@@ -14,7 +14,7 @@ license:  #  Licensed to the Apache Software Foundation (ASF) under one
   #  KIND, either express or implied.  See the License for the
   #  specific language governing permissions and limitations
   #  under the License.
-linters: with_defaults(
+linters: linters_with_defaults(
   line_length_linter = line_length_linter(120),
   object_name_linter = NULL,
   # Even with a liberal definition of name styles, some of our names cause issues due to `.`s for s3 classes or NA in the name
@@ -22,8 +22,8 @@ linters: with_defaults(
   # object_name_linter = object_name_linter(styles = c("snake_case", "camelCase", "CamelCase", "symbols", "dotted.case", "UPPERCASE", "SNAKE_CASE")),
   object_length_linter = object_length_linter(40),
   object_usage_linter = NULL, # R6 methods are flagged,
-  cyclocomp_linter = cyclocomp_linter(26), # TODO: reduce to default of 15
-  open_curly_linter = NULL # styler and lintr conflict on this (https://github.com/r-lib/styler/issues/549#issuecomment-537191536)
+  cyclocomp_linter = cyclocomp_linter(26) # TODO: reduce to default of 15
+  # See also https://github.com/r-lib/lintr/issues/804 for cyclocomp issues with R6
   )
 exclusions: list(
   "R/arrowExports.R",
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index b033d86604e..513b861d414 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -112,10 +112,12 @@ ExecPlan <- R6Class("ExecPlan",
           })
         }
 
-        target_names <- names(.data$aggregations)
-        for (i in seq_len(length(target_names))) {
-          .data$aggregations[[i]][["name"]] <- .data$aggregations[[i]][["target"]] <- target_names[i]
-        }
+        .data$aggregations <- imap(.data$aggregations, function(x, name) {
+          # Embed the name inside the aggregation objects. `target` and `name`
+          # are the same because we just Project()ed the data that way above
+          x[["name"]] <- x[["target"]] <- name
+          x
+        })
 
         node <- node$Aggregate(
           options = .data$aggregations,

From 0a09d22c0d0be40dbf24e3017677b7eef88d7b0b Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 24 Jun 2022 08:19:05 -0400
Subject: [PATCH 32/32] Back out lintr changes (will PR separately)

---
 r/.lintr | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/r/.lintr b/r/.lintr
index 619339afca3..0298fd7f995 100644
--- a/r/.lintr
+++ b/r/.lintr
@@ -14,7 +14,7 @@ license:  #  Licensed to the Apache Software Foundation (ASF) under one
   #  KIND, either express or implied.  See the License for the
   #  specific language governing permissions and limitations
   #  under the License.
-linters: linters_with_defaults(
+linters: with_defaults(
   line_length_linter = line_length_linter(120),
   object_name_linter = NULL,
   # Even with a liberal definition of name styles, some of our names cause issues due to `.`s for s3 classes or NA in the name
@@ -22,8 +22,8 @@ linters: linters_with_defaults(
   # object_name_linter = object_name_linter(styles = c("snake_case", "camelCase", "CamelCase", "symbols", "dotted.case", "UPPERCASE", "SNAKE_CASE")),
   object_length_linter = object_length_linter(40),
   object_usage_linter = NULL, # R6 methods are flagged,
-  cyclocomp_linter = cyclocomp_linter(26) # TODO: reduce to default of 15
-  # See also https://github.com/r-lib/lintr/issues/804 for cyclocomp issues with R6
+  cyclocomp_linter = cyclocomp_linter(26), # TODO: reduce to default of 15
+  open_curly_linter = NULL # styler and lintr conflict on this (https://github.com/r-lib/styler/issues/549#issuecomment-537191536)
   )
 exclusions: list(
   "R/arrowExports.R",