From e62c77c9e2b4ef9154972f58376cb2a94ff3ff61 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 14 Jul 2021 14:17:46 -0400
Subject: [PATCH 01/24] Progress commit

---
 r/R/query-engine.R                      | 29 ++++++++++
 r/src/compute-exec.cpp                  | 74 +++++++++++++++++++++++++
 r/tests/testthat/test-dplyr-aggregate.R | 36 ++++++++++++
 3 files changed, 139 insertions(+)
 create mode 100644 r/R/query-engine.R
 create mode 100644 r/src/compute-exec.cpp
 create mode 100644 r/tests/testthat/test-dplyr-aggregate.R

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
new file mode 100644
index 00000000000..32670192241
--- /dev/null
+++ b/r/R/query-engine.R
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ExecNode <- R6Class("ExecNode", inherit = ArrowObject,
+  public = list(
+    Project = function(cols) {
+      assert_is_list_of(cols, "Expression")
+      ExecNode_Project(self, cols, names(cols))
+    },
+    Filter = function(expr) {
+      assert_is(expr, "Expression")
+      ExecNode_Filter(self, expr)
+    }
+  )
+)
\ No newline at end of file
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
new file mode 100644
index 00000000000..eeecb6e2db7
--- /dev/null
+++ b/r/src/compute-exec.cpp
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "./arrow_types.h"
+
+#if defined(ARROW_R_WITH_ARROW)
+
+#include <arrow/compute/api.h>
+#include <arrow/compute/exec/exec_plan.h>
+
+namespace compute = ::arrow::compute;
+
+#if defined(ARROW_R_WITH_DATASET)
+
+#include <arrow/dataset/scanner.h>
+
+std::shared_ptr<compute::ExecNode> StartExecPlan(
+    std::shared_ptr<arrow::dataset::Dataset> dataset) {
+  auto plan = ValueOrStop(compute::ExecPlan::Make());
+  // TODO: pass in ScanOptions by file type
+  auto options = std::make_shared<arrow::dataset::ScanOptions>();
+  return std::shared_ptr<compute::ExecNode>(
+      ValueOrStop(arrow::dataset::MakeScanNode(plan.get(), dataset, options)));
+}
+
+#endif
+
+std::shared_ptr<compute::ExecNode> ExecNode_Filter(
+    std::shared_ptr<compute::ExecNode> input,
+    std::shared_ptr<compute::Expression> filter) {
+  return std::shared_ptr<compute::ExecNode>(
+      ValueOrStop(compute::MakeFilterNode(input.get(), /*label=*/"filter", *filter)),
+      /* empty destructor: ExecNode lifetime is managed by an ExecPlan */
+      [](...) {});
+}
+
+std::shared_ptr<compute::ExecNode> ExecNode_Project(
+    std::shared_ptr<compute::ExecNode> input,
+    std::vector<std::shared_ptr<compute::Expression>> exprs,
+    std::vector<std::string> names = {}) {
+  // We have shared_ptrs of expressions but need the Expressions
+  std::vector<compute::Expression> expressions;
+  for (auto expr : exprs) {
+    expressions.push_back(*expr);
+  }
+  return std::shared_ptr<compute::ExecNode>(
+      ValueOrStop(
+          compute::MakeProjectNode(input.get(), /*label=*/"project", expressions, names)),
+      [](...) {});
+}
+
+std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
+    std::shared_ptr<compute::ExecNode> input,
+    std::vector<arrow::compute::internal::Aggregate> aggregates);
+
+// ARROW_EXPORT
+// Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
+//                                           std::vector<internal::Aggregate> aggregates);
+
+#endif
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
new file mode 100644
index 00000000000..7186acb9aed
--- /dev/null
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr)
+library(stringr)
+
+tbl <- example_data
+# Add some better string data
+tbl$verses <- verses[[1]]
+# c(" a ", "  b  ", "   c   ", ...) increasing padding
+# nchar =   3  5  7  9 11 13 15 17 19 21
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
+
+test_that("Can aggregate", {
+  expect_dplyr_equal(
+    input %>%
+      summarize(total = sum(int)),
+    tbl
+  )
+})
\ No newline at end of file

From 051074e68f7baf4d499979872bd1b0f43f4fc2d8 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 14 Jul 2021 19:46:23 -0400
Subject: [PATCH 02/24] Apply Ben's patch and sketch R side

---
 r/R/query-engine.R     |  24 +++++++++-
 r/src/arrow_types.h    |   1 +
 r/src/compute-exec.cpp | 104 ++++++++++++++++++++++++++++++++++-------
 r/src/compute.cpp      |   4 +-
 4 files changed, 113 insertions(+), 20 deletions(-)

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 32670192241..5d14264b90c 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -15,6 +15,21 @@
 # specific language governing permissions and limitations
 # under the License.
 
+ExecPlan <- R6Class("ExecPlan", inherit = ArrowObject,
+  public = list(
+    Scan = function(dataset) {
+      # Handle arrow_dplyr_query
+      # TODO: why do I need to filter/project here?
+      ExecNode_Scan(self, dataset, filter, colnames)
+    },
+    Run = function(node) {
+      assert_is(node, "ExecNode")
+      ExecPlan_run(self, node)
+    }
+  )
+)
+ExecPlan$create <- ExecPlan_create
+
 ExecNode <- R6Class("ExecNode", inherit = ArrowObject,
   public = list(
     Project = function(cols) {
@@ -24,6 +39,13 @@ ExecNode <- R6Class("ExecNode", inherit = ArrowObject,
     Filter = function(expr) {
       assert_is(expr, "Expression")
       ExecNode_Filter(self, expr)
+    },
+    ScalarAggregate = function(options, targets, out_field_names) {
+      ExecNode_ScalarAggregate(self, options, targets, out_field_names)
     }
   )
-)
\ No newline at end of file
+)
+
+# plan <- ExecPlan$create()
+# final_node <- plan$Scan(dataset)$Filter(expr)$Project(exprs)$ScalarAggregate(something)
+# plan$Run(final_node)
\ No newline at end of file
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index b5a8914d432..49bdefb6f44 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -60,6 +60,7 @@ namespace fs = ::arrow::fs;
 
 std::shared_ptr<arrow::RecordBatch> RecordBatch__from_arrays(SEXP, SEXP);
 arrow::MemoryPool* gc_memory_pool();
+arrow::compute::ExecContext* gc_context();
 
 #if (R_VERSION < R_Version(3, 5, 0))
 #define LOGICAL_RO(x) ((const int*)LOGICAL(x))
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index eeecb6e2db7..e9e9cc04a4c 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -21,33 +21,86 @@
 
 #include <arrow/compute/api.h>
 #include <arrow/compute/exec/exec_plan.h>
+#include <arrow/table.h>
+#include <arrow/util/future.h>
 
 namespace compute = ::arrow::compute;
 
+// [[arrow::export]]
+std::shared_ptr<compute::ExecPlan> ExecPlan_create() {
+  return ValueOrStop(compute::ExecPlan::Make(gc_context()));
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> ExecPlan_run(
+    std::shared_ptr<compute::ExecPlan> plan,
+    std::shared_ptr<compute::ExecNode> final_node) {
+  // For now, don't require R to construct SinkNodes.
+  // Instead, just pass the node we should collect as an argument.
+  auto sink_gen = compute::MakeSinkNode(final_node.get(), "sink");
+
+  StopIfNotOk(plan->Validate());
+  StopIfNotOk(plan->StartProducing());
+
+  std::shared_ptr<arrow::RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
+      final_node->output_schema(), std::move(sink_gen), gc_memory_pool());
+
+  plan->finished().Wait();
+  return ValueOrStop(arrow::Table::FromRecordBatchReader(sink_reader.get()));
+}
+
+std::shared_ptr<compute::ExecNode> ExecNodeOrStop(
+    arrow::Result<compute::ExecNode*> maybe_node) {
+  return std::shared_ptr<compute::ExecNode>(ValueOrStop(maybe_node), [](...) {
+    // empty destructor: ExecNode lifetime is managed by an ExecPlan
+  });
+}
+
 #if defined(ARROW_R_WITH_DATASET)
 
 #include <arrow/dataset/scanner.h>
 
-std::shared_ptr<compute::ExecNode> StartExecPlan(
-    std::shared_ptr<arrow::dataset::Dataset> dataset) {
-  auto plan = ValueOrStop(compute::ExecPlan::Make());
+// [[arrow::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_Scan(
+    std::shared_ptr<compute::ExecPlan> plan,
+    std::shared_ptr<arrow::dataset::Dataset> dataset,
+    std::shared_ptr<compute::Expression> filter,
+    std::vector<std::string> materialized_field_names) {
   // TODO: pass in ScanOptions by file type
   auto options = std::make_shared<arrow::dataset::ScanOptions>();
-  return std::shared_ptr<compute::ExecNode>(
-      ValueOrStop(arrow::dataset::MakeScanNode(plan.get(), dataset, options)));
+
+  options->use_async = true;
+
+  options->dataset_schema = dataset->schema();
+
+  // ScanNode needs the filter to do predicate pushdown and skip partitions
+  options->filter = ValueOrStop(filter->Bind(*dataset->schema()));
+
+  // ScanNode needs to know which fields to materialize (and which are unnecessary)
+  std::vector<compute::Expression> exprs;
+  for (const auto& name : materialized_field_names) {
+    exprs.push_back(compute::field_ref(name));
+  }
+
+  options->projection =
+      ValueOrStop(call("project", std::move(exprs),
+                       compute::ProjectOptions{std::move(materialized_field_names)})
+                      .Bind(*dataset->schema()));
+
+  return ExecNodeOrStop(arrow::dataset::MakeScanNode(plan.get(), dataset, options));
 }
 
 #endif
 
+// [[arrow::export]]
 std::shared_ptr<compute::ExecNode> ExecNode_Filter(
     std::shared_ptr<compute::ExecNode> input,
     std::shared_ptr<compute::Expression> filter) {
-  return std::shared_ptr<compute::ExecNode>(
-      ValueOrStop(compute::MakeFilterNode(input.get(), /*label=*/"filter", *filter)),
-      /* empty destructor: ExecNode lifetime is managed by an ExecPlan */
-      [](...) {});
+  return ExecNodeOrStop(
+      compute::MakeFilterNode(input.get(), /*label=*/"filter", *filter));
 }
 
+// [[arrow::export]]
 std::shared_ptr<compute::ExecNode> ExecNode_Project(
     std::shared_ptr<compute::ExecNode> input,
     std::vector<std::shared_ptr<compute::Expression>> exprs,
@@ -57,18 +110,33 @@ std::shared_ptr<compute::ExecNode> ExecNode_Project(
   for (auto expr : exprs) {
     expressions.push_back(*expr);
   }
-  return std::shared_ptr<compute::ExecNode>(
-      ValueOrStop(
-          compute::MakeProjectNode(input.get(), /*label=*/"project", expressions, names)),
-      [](...) {});
+  return ExecNodeOrStop(compute::MakeProjectNode(
+      input.get(), /*label=*/"project", std::move(expressions), std::move(names)));
 }
 
 std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
-    std::shared_ptr<compute::ExecNode> input,
-    std::vector<arrow::compute::internal::Aggregate> aggregates);
+    std::shared_ptr<compute::ExecNode> input, cpp11::list options,
+    std::vector<std::string> targets, std::vector<std::string> out_field_names) {
+  // PROBLEM: need to keep these alive as long as the plan somehow.
+  std::vector<std::shared_ptr<arrow::compute::FunctionOptions>> keep_alives;
+  std::vector<arrow::compute::internal::Aggregate> aggregates;
 
-// ARROW_EXPORT
-// Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
-//                                           std::vector<internal::Aggregate> aggregates);
+  for (cpp11::list name_opts : options) {
+    auto name = cpp11::as_cpp<std::string>(name_opts[0]);
+    auto opts = make_compute_options(name, name_opts[1]);
+
+    aggregates.push_back(
+        arrow::compute::internal::Aggregate{std::move(name), opts.get()});
+    keep_alives.push_back(std::move(opts));
+  }
+
+  auto scalar_agg = ValueOrStop(MakeScalarAggregateNode(
+      source, /*label=*/"scalar_agg", aggregates, targets, out_field_names));
+
+  return std::shared_ptr<compute::ExecNode>(scalar_agg, [keep_alives](...) {
+    // empty destructor: ExecNode lifetime is managed by an ExecPlan
+    // also carries the function options
+  });
+}
 
 #endif
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 30821137383..142a460d2eb 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -22,11 +22,13 @@
 #include <arrow/compute/api.h>
 #include <arrow/record_batch.h>
 #include <arrow/table.h>
+#include <arrow/util/thread_pool.h>
 
 std::shared_ptr<arrow::compute::CastOptions> make_cast_options(cpp11::list options);
 
 arrow::compute::ExecContext* gc_context() {
-  static arrow::compute::ExecContext context(gc_memory_pool());
+  static arrow::compute::ExecContext context(gc_memory_pool(),
+                                             arrow::internal::GetCpuThreadPool());
   return &context;
 }
 

From 41c0826ac5442ab2956677991fa715207171fede Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 15 Jul 2021 07:16:00 -0400
Subject: [PATCH 03/24] More R; try to get C++ to compile

---
 r/R/arrowExports.R                      | 1771 ++++++++++++-----------
 r/R/dplyr-summarize.R                   |   78 +-
 r/R/query-engine.R                      |   23 +-
 r/src/arrowExports.cpp                  |   86 ++
 r/src/arrow_types.h                     |    9 +
 r/src/compute-exec.cpp                  |    9 +-
 r/tests/testthat/test-dplyr-aggregate.R |    1 +
 7 files changed, 1090 insertions(+), 887 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index e54f88e9d4e..250fd53f1a0 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1,1749 +1,1772 @@
 # Generated by using data-raw/codegen.R -> do not edit by hand
 
-is_altrep_int_nonull <- function(x) {
-  .Call(`_arrow_is_altrep_int_nonull`, x)
+is_altrep_int_nonull <- function(x){
+    .Call(`_arrow_is_altrep_int_nonull`, x)
 }
 
-is_altrep_dbl_nonull <- function(x) {
-  .Call(`_arrow_is_altrep_dbl_nonull`, x)
+is_altrep_dbl_nonull <- function(x){
+    .Call(`_arrow_is_altrep_dbl_nonull`, x)
 }
 
-Array__Slice1 <- function(array, offset) {
-  .Call(`_arrow_Array__Slice1`, array, offset)
+Array__Slice1 <- function(array, offset){
+    .Call(`_arrow_Array__Slice1`, array, offset)
 }
 
-Array__Slice2 <- function(array, offset, length) {
-  .Call(`_arrow_Array__Slice2`, array, offset, length)
+Array__Slice2 <- function(array, offset, length){
+    .Call(`_arrow_Array__Slice2`, array, offset, length)
 }
 
-Array__IsNull <- function(x, i) {
-  .Call(`_arrow_Array__IsNull`, x, i)
+Array__IsNull <- function(x, i){
+    .Call(`_arrow_Array__IsNull`, x, i)
 }
 
-Array__IsValid <- function(x, i) {
-  .Call(`_arrow_Array__IsValid`, x, i)
+Array__IsValid <- function(x, i){
+    .Call(`_arrow_Array__IsValid`, x, i)
 }
 
-Array__length <- function(x) {
-  .Call(`_arrow_Array__length`, x)
+Array__length <- function(x){
+    .Call(`_arrow_Array__length`, x)
 }
 
-Array__offset <- function(x) {
-  .Call(`_arrow_Array__offset`, x)
+Array__offset <- function(x){
+    .Call(`_arrow_Array__offset`, x)
 }
 
-Array__null_count <- function(x) {
-  .Call(`_arrow_Array__null_count`, x)
+Array__null_count <- function(x){
+    .Call(`_arrow_Array__null_count`, x)
 }
 
-Array__type <- function(x) {
-  .Call(`_arrow_Array__type`, x)
+Array__type <- function(x){
+    .Call(`_arrow_Array__type`, x)
 }
 
-Array__ToString <- function(x) {
-  .Call(`_arrow_Array__ToString`, x)
+Array__ToString <- function(x){
+    .Call(`_arrow_Array__ToString`, x)
 }
 
-Array__type_id <- function(x) {
-  .Call(`_arrow_Array__type_id`, x)
+Array__type_id <- function(x){
+    .Call(`_arrow_Array__type_id`, x)
 }
 
-Array__Equals <- function(lhs, rhs) {
-  .Call(`_arrow_Array__Equals`, lhs, rhs)
+Array__Equals <- function(lhs, rhs){
+    .Call(`_arrow_Array__Equals`, lhs, rhs)
 }
 
-Array__ApproxEquals <- function(lhs, rhs) {
-  .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
+Array__ApproxEquals <- function(lhs, rhs){
+    .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
 }
 
-Array__Diff <- function(lhs, rhs) {
-  .Call(`_arrow_Array__Diff`, lhs, rhs)
+Array__Diff <- function(lhs, rhs){
+    .Call(`_arrow_Array__Diff`, lhs, rhs)
 }
 
-Array__data <- function(array) {
-  .Call(`_arrow_Array__data`, array)
+Array__data <- function(array){
+    .Call(`_arrow_Array__data`, array)
 }
 
-Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx) {
-  .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
+Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx){
+    .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
 }
 
-Array__View <- function(array, type) {
-  .Call(`_arrow_Array__View`, array, type)
+Array__View <- function(array, type){
+    .Call(`_arrow_Array__View`, array, type)
 }
 
-Array__Validate <- function(array) {
-  invisible(.Call(`_arrow_Array__Validate`, array))
+Array__Validate <- function(array){
+    invisible(.Call(`_arrow_Array__Validate`, array))
 }
 
-DictionaryArray__indices <- function(array) {
-  .Call(`_arrow_DictionaryArray__indices`, array)
+DictionaryArray__indices <- function(array){
+    .Call(`_arrow_DictionaryArray__indices`, array)
 }
 
-DictionaryArray__dictionary <- function(array) {
-  .Call(`_arrow_DictionaryArray__dictionary`, array)
+DictionaryArray__dictionary <- function(array){
+    .Call(`_arrow_DictionaryArray__dictionary`, array)
 }
 
-StructArray__field <- function(array, i) {
-  .Call(`_arrow_StructArray__field`, array, i)
+StructArray__field <- function(array, i){
+    .Call(`_arrow_StructArray__field`, array, i)
 }
 
-StructArray__GetFieldByName <- function(array, name) {
-  .Call(`_arrow_StructArray__GetFieldByName`, array, name)
+StructArray__GetFieldByName <- function(array, name){
+    .Call(`_arrow_StructArray__GetFieldByName`, array, name)
 }
 
-StructArray__Flatten <- function(array) {
-  .Call(`_arrow_StructArray__Flatten`, array)
+StructArray__Flatten <- function(array){
+    .Call(`_arrow_StructArray__Flatten`, array)
 }
 
-ListArray__value_type <- function(array) {
-  .Call(`_arrow_ListArray__value_type`, array)
+ListArray__value_type <- function(array){
+    .Call(`_arrow_ListArray__value_type`, array)
 }
 
-LargeListArray__value_type <- function(array) {
-  .Call(`_arrow_LargeListArray__value_type`, array)
+LargeListArray__value_type <- function(array){
+    .Call(`_arrow_LargeListArray__value_type`, array)
 }
 
-ListArray__values <- function(array) {
-  .Call(`_arrow_ListArray__values`, array)
+ListArray__values <- function(array){
+    .Call(`_arrow_ListArray__values`, array)
 }
 
-LargeListArray__values <- function(array) {
-  .Call(`_arrow_LargeListArray__values`, array)
+LargeListArray__values <- function(array){
+    .Call(`_arrow_LargeListArray__values`, array)
 }
 
-ListArray__value_length <- function(array, i) {
-  .Call(`_arrow_ListArray__value_length`, array, i)
+ListArray__value_length <- function(array, i){
+    .Call(`_arrow_ListArray__value_length`, array, i)
 }
 
-LargeListArray__value_length <- function(array, i) {
-  .Call(`_arrow_LargeListArray__value_length`, array, i)
+LargeListArray__value_length <- function(array, i){
+    .Call(`_arrow_LargeListArray__value_length`, array, i)
 }
 
-FixedSizeListArray__value_length <- function(array, i) {
-  .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
+FixedSizeListArray__value_length <- function(array, i){
+    .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
 }
 
-ListArray__value_offset <- function(array, i) {
-  .Call(`_arrow_ListArray__value_offset`, array, i)
+ListArray__value_offset <- function(array, i){
+    .Call(`_arrow_ListArray__value_offset`, array, i)
 }
 
-LargeListArray__value_offset <- function(array, i) {
-  .Call(`_arrow_LargeListArray__value_offset`, array, i)
+LargeListArray__value_offset <- function(array, i){
+    .Call(`_arrow_LargeListArray__value_offset`, array, i)
 }
 
-FixedSizeListArray__value_offset <- function(array, i) {
-  .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
+FixedSizeListArray__value_offset <- function(array, i){
+    .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
 }
 
-ListArray__raw_value_offsets <- function(array) {
-  .Call(`_arrow_ListArray__raw_value_offsets`, array)
+ListArray__raw_value_offsets <- function(array){
+    .Call(`_arrow_ListArray__raw_value_offsets`, array)
 }
 
-LargeListArray__raw_value_offsets <- function(array) {
-  .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
+LargeListArray__raw_value_offsets <- function(array){
+    .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
 }
 
-Array__as_vector <- function(array) {
-  .Call(`_arrow_Array__as_vector`, array)
+Array__as_vector <- function(array){
+    .Call(`_arrow_Array__as_vector`, array)
 }
 
-ChunkedArray__as_vector <- function(chunked_array, use_threads) {
-  .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
+ChunkedArray__as_vector <- function(chunked_array, use_threads){
+    .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
 }
 
-RecordBatch__to_dataframe <- function(batch, use_threads) {
-  .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
+RecordBatch__to_dataframe <- function(batch, use_threads){
+    .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
 }
 
-Table__to_dataframe <- function(table, use_threads) {
-  .Call(`_arrow_Table__to_dataframe`, table, use_threads)
+Table__to_dataframe <- function(table, use_threads){
+    .Call(`_arrow_Table__to_dataframe`, table, use_threads)
 }
 
-ArrayData__get_type <- function(x) {
-  .Call(`_arrow_ArrayData__get_type`, x)
+ArrayData__get_type <- function(x){
+    .Call(`_arrow_ArrayData__get_type`, x)
 }
 
-ArrayData__get_length <- function(x) {
-  .Call(`_arrow_ArrayData__get_length`, x)
+ArrayData__get_length <- function(x){
+    .Call(`_arrow_ArrayData__get_length`, x)
 }
 
-ArrayData__get_null_count <- function(x) {
-  .Call(`_arrow_ArrayData__get_null_count`, x)
+ArrayData__get_null_count <- function(x){
+    .Call(`_arrow_ArrayData__get_null_count`, x)
 }
 
-ArrayData__get_offset <- function(x) {
-  .Call(`_arrow_ArrayData__get_offset`, x)
+ArrayData__get_offset <- function(x){
+    .Call(`_arrow_ArrayData__get_offset`, x)
 }
 
-ArrayData__buffers <- function(x) {
-  .Call(`_arrow_ArrayData__buffers`, x)
+ArrayData__buffers <- function(x){
+    .Call(`_arrow_ArrayData__buffers`, x)
 }
 
-Buffer__is_mutable <- function(buffer) {
-  .Call(`_arrow_Buffer__is_mutable`, buffer)
+Buffer__is_mutable <- function(buffer){
+    .Call(`_arrow_Buffer__is_mutable`, buffer)
 }
 
-Buffer__ZeroPadding <- function(buffer) {
-  invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
+Buffer__ZeroPadding <- function(buffer){
+    invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
 }
 
-Buffer__capacity <- function(buffer) {
-  .Call(`_arrow_Buffer__capacity`, buffer)
+Buffer__capacity <- function(buffer){
+    .Call(`_arrow_Buffer__capacity`, buffer)
 }
 
-Buffer__size <- function(buffer) {
-  .Call(`_arrow_Buffer__size`, buffer)
+Buffer__size <- function(buffer){
+    .Call(`_arrow_Buffer__size`, buffer)
 }
 
-r___RBuffer__initialize <- function(x) {
-  .Call(`_arrow_r___RBuffer__initialize`, x)
+r___RBuffer__initialize <- function(x){
+    .Call(`_arrow_r___RBuffer__initialize`, x)
 }
 
-Buffer__data <- function(buffer) {
-  .Call(`_arrow_Buffer__data`, buffer)
+Buffer__data <- function(buffer){
+    .Call(`_arrow_Buffer__data`, buffer)
 }
 
-Buffer__Equals <- function(x, y) {
-  .Call(`_arrow_Buffer__Equals`, x, y)
+Buffer__Equals <- function(x, y){
+    .Call(`_arrow_Buffer__Equals`, x, y)
 }
 
-ChunkedArray__length <- function(chunked_array) {
-  .Call(`_arrow_ChunkedArray__length`, chunked_array)
+ChunkedArray__length <- function(chunked_array){
+    .Call(`_arrow_ChunkedArray__length`, chunked_array)
 }
 
-ChunkedArray__null_count <- function(chunked_array) {
-  .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
+ChunkedArray__null_count <- function(chunked_array){
+    .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
 }
 
-ChunkedArray__num_chunks <- function(chunked_array) {
-  .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
+ChunkedArray__num_chunks <- function(chunked_array){
+    .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
 }
 
-ChunkedArray__chunk <- function(chunked_array, i) {
-  .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
+ChunkedArray__chunk <- function(chunked_array, i){
+    .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
 }
 
-ChunkedArray__chunks <- function(chunked_array) {
-  .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
+ChunkedArray__chunks <- function(chunked_array){
+    .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
 }
 
-ChunkedArray__type <- function(chunked_array) {
-  .Call(`_arrow_ChunkedArray__type`, chunked_array)
+ChunkedArray__type <- function(chunked_array){
+    .Call(`_arrow_ChunkedArray__type`, chunked_array)
 }
 
-ChunkedArray__Slice1 <- function(chunked_array, offset) {
-  .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
+ChunkedArray__Slice1 <- function(chunked_array, offset){
+    .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
 }
 
-ChunkedArray__Slice2 <- function(chunked_array, offset, length) {
-  .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
+ChunkedArray__Slice2 <- function(chunked_array, offset, length){
+    .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
 }
 
-ChunkedArray__View <- function(array, type) {
-  .Call(`_arrow_ChunkedArray__View`, array, type)
+ChunkedArray__View <- function(array, type){
+    .Call(`_arrow_ChunkedArray__View`, array, type)
 }
 
-ChunkedArray__Validate <- function(chunked_array) {
-  invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
+ChunkedArray__Validate <- function(chunked_array){
+    invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
 }
 
-ChunkedArray__Equals <- function(x, y) {
-  .Call(`_arrow_ChunkedArray__Equals`, x, y)
+ChunkedArray__Equals <- function(x, y){
+    .Call(`_arrow_ChunkedArray__Equals`, x, y)
 }
 
-ChunkedArray__ToString <- function(x) {
-  .Call(`_arrow_ChunkedArray__ToString`, x)
+ChunkedArray__ToString <- function(x){
+    .Call(`_arrow_ChunkedArray__ToString`, x)
 }
 
-ChunkedArray__from_list <- function(chunks, s_type) {
-  .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
+ChunkedArray__from_list <- function(chunks, s_type){
+    .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
 }
 
-util___Codec__Create <- function(codec, compression_level) {
-  .Call(`_arrow_util___Codec__Create`, codec, compression_level)
+util___Codec__Create <- function(codec, compression_level){
+    .Call(`_arrow_util___Codec__Create`, codec, compression_level)
 }
 
-util___Codec__name <- function(codec) {
-  .Call(`_arrow_util___Codec__name`, codec)
+util___Codec__name <- function(codec){
+    .Call(`_arrow_util___Codec__name`, codec)
 }
 
-util___Codec__IsAvailable <- function(codec) {
-  .Call(`_arrow_util___Codec__IsAvailable`, codec)
+util___Codec__IsAvailable <- function(codec){
+    .Call(`_arrow_util___Codec__IsAvailable`, codec)
 }
 
-io___CompressedOutputStream__Make <- function(codec, raw) {
-  .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
+io___CompressedOutputStream__Make <- function(codec, raw){
+    .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
 }
 
-io___CompressedInputStream__Make <- function(codec, raw) {
-  .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
+io___CompressedInputStream__Make <- function(codec, raw){
+    .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
 }
 
-RecordBatch__cast <- function(batch, schema, options) {
-  .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
+ExecPlan_create <- function(){
+    .Call(`_arrow_ExecPlan_create`)
 }
 
-Table__cast <- function(table, schema, options) {
-  .Call(`_arrow_Table__cast`, table, schema, options)
+ExecPlan_run <- function(plan, final_node){
+    .Call(`_arrow_ExecPlan_run`, plan, final_node)
 }
 
-compute__CallFunction <- function(func_name, args, options) {
-  .Call(`_arrow_compute__CallFunction`, func_name, args, options)
+ExecNode_Scan <- function(plan, dataset, filter, materialized_field_names){
+    .Call(`_arrow_ExecNode_Scan`, plan, dataset, filter, materialized_field_names)
 }
 
-compute__GroupBy <- function(arguments, keys, options) {
-  .Call(`_arrow_compute__GroupBy`, arguments, keys, options)
+ExecNode_Filter <- function(input, filter){
+    .Call(`_arrow_ExecNode_Filter`, input, filter)
 }
 
-compute__GetFunctionNames <- function() {
-  .Call(`_arrow_compute__GetFunctionNames`)
+ExecNode_Project <- function(input, exprs, names){
+    .Call(`_arrow_ExecNode_Project`, input, exprs, names)
 }
 
-build_info <- function() {
-  .Call(`_arrow_build_info`)
+RecordBatch__cast <- function(batch, schema, options){
+    .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
 }
 
-runtime_info <- function() {
-  .Call(`_arrow_runtime_info`)
+Table__cast <- function(table, schema, options){
+    .Call(`_arrow_Table__cast`, table, schema, options)
 }
 
-csv___WriteOptions__initialize <- function(options) {
-  .Call(`_arrow_csv___WriteOptions__initialize`, options)
+compute__CallFunction <- function(func_name, args, options){
+    .Call(`_arrow_compute__CallFunction`, func_name, args, options)
 }
 
-csv___ReadOptions__initialize <- function(options) {
-  .Call(`_arrow_csv___ReadOptions__initialize`, options)
+compute__GroupBy <- function(arguments, keys, options){
+    .Call(`_arrow_compute__GroupBy`, arguments, keys, options)
 }
 
-csv___ParseOptions__initialize <- function(options) {
-  .Call(`_arrow_csv___ParseOptions__initialize`, options)
+compute__GetFunctionNames <- function(){
+    .Call(`_arrow_compute__GetFunctionNames`)
 }
 
-csv___ReadOptions__column_names <- function(options) {
-  .Call(`_arrow_csv___ReadOptions__column_names`, options)
+build_info <- function(){
+    .Call(`_arrow_build_info`)
 }
 
-csv___ConvertOptions__initialize <- function(options) {
-  .Call(`_arrow_csv___ConvertOptions__initialize`, options)
+runtime_info <- function(){
+    .Call(`_arrow_runtime_info`)
 }
 
-csv___TableReader__Make <- function(input, read_options, parse_options, convert_options) {
-  .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
+csv___WriteOptions__initialize <- function(options){
+    .Call(`_arrow_csv___WriteOptions__initialize`, options)
 }
 
-csv___TableReader__Read <- function(table_reader) {
-  .Call(`_arrow_csv___TableReader__Read`, table_reader)
+csv___ReadOptions__initialize <- function(options){
+    .Call(`_arrow_csv___ReadOptions__initialize`, options)
 }
 
-TimestampParser__kind <- function(parser) {
-  .Call(`_arrow_TimestampParser__kind`, parser)
+csv___ParseOptions__initialize <- function(options){
+    .Call(`_arrow_csv___ParseOptions__initialize`, options)
 }
 
-TimestampParser__format <- function(parser) {
-  .Call(`_arrow_TimestampParser__format`, parser)
+csv___ReadOptions__column_names <- function(options){
+    .Call(`_arrow_csv___ReadOptions__column_names`, options)
 }
 
-TimestampParser__MakeStrptime <- function(format) {
-  .Call(`_arrow_TimestampParser__MakeStrptime`, format)
+csv___ConvertOptions__initialize <- function(options){
+    .Call(`_arrow_csv___ConvertOptions__initialize`, options)
 }
 
-TimestampParser__MakeISO8601 <- function() {
-  .Call(`_arrow_TimestampParser__MakeISO8601`)
+csv___TableReader__Make <- function(input, read_options, parse_options, convert_options){
+    .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
 }
 
-csv___WriteCSV__Table <- function(table, write_options, stream) {
-  invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
+csv___TableReader__Read <- function(table_reader){
+    .Call(`_arrow_csv___TableReader__Read`, table_reader)
 }
 
-csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream) {
-  invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
+TimestampParser__kind <- function(parser){
+    .Call(`_arrow_TimestampParser__kind`, parser)
 }
 
-dataset___Dataset__NewScan <- function(ds) {
-  .Call(`_arrow_dataset___Dataset__NewScan`, ds)
+TimestampParser__format <- function(parser){
+    .Call(`_arrow_TimestampParser__format`, parser)
 }
 
-dataset___Dataset__schema <- function(dataset) {
-  .Call(`_arrow_dataset___Dataset__schema`, dataset)
+TimestampParser__MakeStrptime <- function(format){
+    .Call(`_arrow_TimestampParser__MakeStrptime`, format)
 }
 
-dataset___Dataset__type_name <- function(dataset) {
-  .Call(`_arrow_dataset___Dataset__type_name`, dataset)
+TimestampParser__MakeISO8601 <- function(){
+    .Call(`_arrow_TimestampParser__MakeISO8601`)
 }
 
-dataset___Dataset__ReplaceSchema <- function(dataset, schm) {
-  .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
+csv___WriteCSV__Table <- function(table, write_options, stream){
+    invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
 }
 
-dataset___UnionDataset__create <- function(datasets, schm) {
-  .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
+csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream){
+    invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
 }
 
-dataset___InMemoryDataset__create <- function(table) {
-  .Call(`_arrow_dataset___InMemoryDataset__create`, table)
+dataset___Dataset__NewScan <- function(ds){
+    .Call(`_arrow_dataset___Dataset__NewScan`, ds)
 }
 
-dataset___UnionDataset__children <- function(ds) {
-  .Call(`_arrow_dataset___UnionDataset__children`, ds)
+dataset___Dataset__schema <- function(dataset){
+    .Call(`_arrow_dataset___Dataset__schema`, dataset)
 }
 
-dataset___FileSystemDataset__format <- function(dataset) {
-  .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
+dataset___Dataset__type_name <- function(dataset){
+    .Call(`_arrow_dataset___Dataset__type_name`, dataset)
 }
 
-dataset___FileSystemDataset__filesystem <- function(dataset) {
-  .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
+dataset___Dataset__ReplaceSchema <- function(dataset, schm){
+    .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
 }
 
-dataset___FileSystemDataset__files <- function(dataset) {
-  .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
+dataset___UnionDataset__create <- function(datasets, schm){
+    .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
 }
 
-dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas) {
-  .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
+dataset___InMemoryDataset__create <- function(table){
+    .Call(`_arrow_dataset___InMemoryDataset__create`, table)
 }
 
-dataset___DatasetFactory__Finish2 <- function(factory, schema) {
-  .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
+dataset___UnionDataset__children <- function(ds){
+    .Call(`_arrow_dataset___UnionDataset__children`, ds)
 }
 
-dataset___DatasetFactory__Inspect <- function(factory, unify_schemas) {
-  .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
+dataset___FileSystemDataset__format <- function(dataset){
+    .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
 }
 
-dataset___UnionDatasetFactory__Make <- function(children) {
-  .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
+dataset___FileSystemDataset__filesystem <- function(dataset){
+    .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
 }
 
-dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format) {
-  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
+dataset___FileSystemDataset__files <- function(dataset){
+    .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
 }
 
-dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning) {
-  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
+dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas){
+    .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
 }
 
-dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format) {
-  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
+dataset___DatasetFactory__Finish2 <- function(factory, schema){
+    .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
 }
 
-dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory) {
-  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
+dataset___DatasetFactory__Inspect <- function(factory, unify_schemas){
+    .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
 }
 
-dataset___FileFormat__type_name <- function(format) {
-  .Call(`_arrow_dataset___FileFormat__type_name`, format)
+dataset___UnionDatasetFactory__Make <- function(children){
+    .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
 }
 
-dataset___FileFormat__DefaultWriteOptions <- function(fmt) {
-  .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
+dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format){
+    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
 }
 
-dataset___ParquetFileFormat__Make <- function(options, dict_columns) {
-  .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
+dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning){
+    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
 }
 
-dataset___FileWriteOptions__type_name <- function(options) {
-  .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
+dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format){
+    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
 }
 
-dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props) {
-  invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
+dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory){
+    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
 }
 
-dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version) {
-  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
+dataset___FileFormat__type_name <- function(format){
+    .Call(`_arrow_dataset___FileFormat__type_name`, format)
 }
 
-dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version) {
-  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
+dataset___FileFormat__DefaultWriteOptions <- function(fmt){
+    .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
 }
 
-dataset___CsvFileWriteOptions__update <- function(csv_options, write_options) {
-  invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
+dataset___ParquetFileFormat__Make <- function(options, dict_columns){
+    .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
 }
 
-dataset___IpcFileFormat__Make <- function() {
-  .Call(`_arrow_dataset___IpcFileFormat__Make`)
+dataset___FileWriteOptions__type_name <- function(options){
+    .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
 }
 
-dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options) {
-  .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
+dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props){
+    invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
 }
 
-dataset___FragmentScanOptions__type_name <- function(fragment_scan_options) {
-  .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
+dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version){
+    invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
 }
 
-dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options) {
-  .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
+dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version){
+    invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
 }
 
-dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer) {
-  .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
+dataset___CsvFileWriteOptions__update <- function(csv_options, write_options){
+    invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
 }
 
-dataset___DirectoryPartitioning <- function(schm, segment_encoding) {
-  .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
+dataset___IpcFileFormat__Make <- function(){
+    .Call(`_arrow_dataset___IpcFileFormat__Make`)
 }
 
-dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding) {
-  .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
+dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options){
+    .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
 }
 
-dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding) {
-  .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
+dataset___FragmentScanOptions__type_name <- function(fragment_scan_options){
+    .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
 }
 
-dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding) {
-  .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
+dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options){
+    .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
 }
 
-dataset___ScannerBuilder__ProjectNames <- function(sb, cols) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
+dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer){
+    .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
 }
 
-dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
+dataset___DirectoryPartitioning <- function(schm, segment_encoding){
+    .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
 }
 
-dataset___ScannerBuilder__Filter <- function(sb, expr) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
+dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding){
+    .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
 }
 
-dataset___ScannerBuilder__UseThreads <- function(sb, threads) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
+dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding){
+    .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
 }
 
-dataset___ScannerBuilder__UseAsync <- function(sb, use_async) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
+dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding){
+    .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
 }
 
-dataset___ScannerBuilder__BatchSize <- function(sb, batch_size) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
+dataset___ScannerBuilder__ProjectNames <- function(sb, cols){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
 }
 
-dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
+dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
 }
 
-dataset___ScannerBuilder__schema <- function(sb) {
-  .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
+dataset___ScannerBuilder__Filter <- function(sb, expr){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
 }
 
-dataset___ScannerBuilder__Finish <- function(sb) {
-  .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
+dataset___ScannerBuilder__UseThreads <- function(sb, threads){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
 }
 
-dataset___Scanner__ToTable <- function(scanner) {
-  .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
+dataset___ScannerBuilder__UseAsync <- function(sb, use_async){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
 }
 
-dataset___Scanner__ScanBatches <- function(scanner) {
-  .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
+dataset___ScannerBuilder__BatchSize <- function(sb, batch_size){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
 }
 
-dataset___Scanner__ToRecordBatchReader <- function(scanner) {
-  .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
+dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
 }
 
-dataset___Scanner__head <- function(scanner, n) {
-  .Call(`_arrow_dataset___Scanner__head`, scanner, n)
+dataset___ScannerBuilder__schema <- function(sb){
+    .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
 }
 
-dataset___Scanner__schema <- function(sc) {
-  .Call(`_arrow_dataset___Scanner__schema`, sc)
+dataset___ScannerBuilder__Finish <- function(sb){
+    .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
 }
 
-dataset___ScanTask__get_batches <- function(scan_task) {
-  .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
+dataset___Scanner__ToTable <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
 }
 
-dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner) {
-  invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
+dataset___Scanner__ScanBatches <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
 }
 
-dataset___Scanner__TakeRows <- function(scanner, indices) {
-  .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
+dataset___Scanner__ToRecordBatchReader <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
 }
 
-dataset___Scanner__CountRows <- function(scanner) {
-  .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
+dataset___Scanner__head <- function(scanner, n){
+    .Call(`_arrow_dataset___Scanner__head`, scanner, n)
 }
 
-Int8__initialize <- function() {
-  .Call(`_arrow_Int8__initialize`)
+dataset___Scanner__schema <- function(sc){
+    .Call(`_arrow_dataset___Scanner__schema`, sc)
 }
 
-Int16__initialize <- function() {
-  .Call(`_arrow_Int16__initialize`)
+dataset___ScanTask__get_batches <- function(scan_task){
+    .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
 }
 
-Int32__initialize <- function() {
-  .Call(`_arrow_Int32__initialize`)
+dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner){
+    invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
 }
 
-Int64__initialize <- function() {
-  .Call(`_arrow_Int64__initialize`)
+dataset___Scanner__TakeRows <- function(scanner, indices){
+    .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
 }
 
-UInt8__initialize <- function() {
-  .Call(`_arrow_UInt8__initialize`)
+dataset___Scanner__CountRows <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
 }
 
-UInt16__initialize <- function() {
-  .Call(`_arrow_UInt16__initialize`)
+Int8__initialize <- function(){
+    .Call(`_arrow_Int8__initialize`)
 }
 
-UInt32__initialize <- function() {
-  .Call(`_arrow_UInt32__initialize`)
+Int16__initialize <- function(){
+    .Call(`_arrow_Int16__initialize`)
 }
 
-UInt64__initialize <- function() {
-  .Call(`_arrow_UInt64__initialize`)
+Int32__initialize <- function(){
+    .Call(`_arrow_Int32__initialize`)
 }
 
-Float16__initialize <- function() {
-  .Call(`_arrow_Float16__initialize`)
+Int64__initialize <- function(){
+    .Call(`_arrow_Int64__initialize`)
 }
 
-Float32__initialize <- function() {
-  .Call(`_arrow_Float32__initialize`)
+UInt8__initialize <- function(){
+    .Call(`_arrow_UInt8__initialize`)
 }
 
-Float64__initialize <- function() {
-  .Call(`_arrow_Float64__initialize`)
+UInt16__initialize <- function(){
+    .Call(`_arrow_UInt16__initialize`)
 }
 
-Boolean__initialize <- function() {
-  .Call(`_arrow_Boolean__initialize`)
+UInt32__initialize <- function(){
+    .Call(`_arrow_UInt32__initialize`)
 }
 
-Utf8__initialize <- function() {
-  .Call(`_arrow_Utf8__initialize`)
+UInt64__initialize <- function(){
+    .Call(`_arrow_UInt64__initialize`)
 }
 
-LargeUtf8__initialize <- function() {
-  .Call(`_arrow_LargeUtf8__initialize`)
+Float16__initialize <- function(){
+    .Call(`_arrow_Float16__initialize`)
 }
 
-Binary__initialize <- function() {
-  .Call(`_arrow_Binary__initialize`)
+Float32__initialize <- function(){
+    .Call(`_arrow_Float32__initialize`)
 }
 
-LargeBinary__initialize <- function() {
-  .Call(`_arrow_LargeBinary__initialize`)
+Float64__initialize <- function(){
+    .Call(`_arrow_Float64__initialize`)
 }
 
-Date32__initialize <- function() {
-  .Call(`_arrow_Date32__initialize`)
+Boolean__initialize <- function(){
+    .Call(`_arrow_Boolean__initialize`)
 }
 
-Date64__initialize <- function() {
-  .Call(`_arrow_Date64__initialize`)
+Utf8__initialize <- function(){
+    .Call(`_arrow_Utf8__initialize`)
 }
 
-Null__initialize <- function() {
-  .Call(`_arrow_Null__initialize`)
+LargeUtf8__initialize <- function(){
+    .Call(`_arrow_LargeUtf8__initialize`)
 }
 
-Decimal128Type__initialize <- function(precision, scale) {
-  .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
+Binary__initialize <- function(){
+    .Call(`_arrow_Binary__initialize`)
 }
 
-FixedSizeBinary__initialize <- function(byte_width) {
-  .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
+LargeBinary__initialize <- function(){
+    .Call(`_arrow_LargeBinary__initialize`)
 }
 
-Timestamp__initialize <- function(unit, timezone) {
-  .Call(`_arrow_Timestamp__initialize`, unit, timezone)
+Date32__initialize <- function(){
+    .Call(`_arrow_Date32__initialize`)
 }
 
-Time32__initialize <- function(unit) {
-  .Call(`_arrow_Time32__initialize`, unit)
+Date64__initialize <- function(){
+    .Call(`_arrow_Date64__initialize`)
 }
 
-Time64__initialize <- function(unit) {
-  .Call(`_arrow_Time64__initialize`, unit)
+Null__initialize <- function(){
+    .Call(`_arrow_Null__initialize`)
 }
 
-list__ <- function(x) {
-  .Call(`_arrow_list__`, x)
+Decimal128Type__initialize <- function(precision, scale){
+    .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
 }
 
-large_list__ <- function(x) {
-  .Call(`_arrow_large_list__`, x)
+FixedSizeBinary__initialize <- function(byte_width){
+    .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
 }
 
-fixed_size_list__ <- function(x, list_size) {
-  .Call(`_arrow_fixed_size_list__`, x, list_size)
+Timestamp__initialize <- function(unit, timezone){
+    .Call(`_arrow_Timestamp__initialize`, unit, timezone)
 }
 
-struct__ <- function(fields) {
-  .Call(`_arrow_struct__`, fields)
+Time32__initialize <- function(unit){
+    .Call(`_arrow_Time32__initialize`, unit)
 }
 
-DataType__ToString <- function(type) {
-  .Call(`_arrow_DataType__ToString`, type)
+Time64__initialize <- function(unit){
+    .Call(`_arrow_Time64__initialize`, unit)
 }
 
-DataType__name <- function(type) {
-  .Call(`_arrow_DataType__name`, type)
+list__ <- function(x){
+    .Call(`_arrow_list__`, x)
 }
 
-DataType__Equals <- function(lhs, rhs) {
-  .Call(`_arrow_DataType__Equals`, lhs, rhs)
+large_list__ <- function(x){
+    .Call(`_arrow_large_list__`, x)
 }
 
-DataType__num_fields <- function(type) {
-  .Call(`_arrow_DataType__num_fields`, type)
+fixed_size_list__ <- function(x, list_size){
+    .Call(`_arrow_fixed_size_list__`, x, list_size)
 }
 
-DataType__fields <- function(type) {
-  .Call(`_arrow_DataType__fields`, type)
+struct__ <- function(fields){
+    .Call(`_arrow_struct__`, fields)
 }
 
-DataType__id <- function(type) {
-  .Call(`_arrow_DataType__id`, type)
+DataType__ToString <- function(type){
+    .Call(`_arrow_DataType__ToString`, type)
 }
 
-ListType__ToString <- function(type) {
-  .Call(`_arrow_ListType__ToString`, type)
+DataType__name <- function(type){
+    .Call(`_arrow_DataType__name`, type)
 }
 
-FixedWidthType__bit_width <- function(type) {
-  .Call(`_arrow_FixedWidthType__bit_width`, type)
+DataType__Equals <- function(lhs, rhs){
+    .Call(`_arrow_DataType__Equals`, lhs, rhs)
 }
 
-DateType__unit <- function(type) {
-  .Call(`_arrow_DateType__unit`, type)
+DataType__num_fields <- function(type){
+    .Call(`_arrow_DataType__num_fields`, type)
 }
 
-TimeType__unit <- function(type) {
-  .Call(`_arrow_TimeType__unit`, type)
+DataType__fields <- function(type){
+    .Call(`_arrow_DataType__fields`, type)
 }
 
-DecimalType__precision <- function(type) {
-  .Call(`_arrow_DecimalType__precision`, type)
+DataType__id <- function(type){
+    .Call(`_arrow_DataType__id`, type)
 }
 
-DecimalType__scale <- function(type) {
-  .Call(`_arrow_DecimalType__scale`, type)
+ListType__ToString <- function(type){
+    .Call(`_arrow_ListType__ToString`, type)
 }
 
-TimestampType__timezone <- function(type) {
-  .Call(`_arrow_TimestampType__timezone`, type)
+FixedWidthType__bit_width <- function(type){
+    .Call(`_arrow_FixedWidthType__bit_width`, type)
 }
 
-TimestampType__unit <- function(type) {
-  .Call(`_arrow_TimestampType__unit`, type)
+DateType__unit <- function(type){
+    .Call(`_arrow_DateType__unit`, type)
 }
 
-DictionaryType__initialize <- function(index_type, value_type, ordered) {
-  .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
+TimeType__unit <- function(type){
+    .Call(`_arrow_TimeType__unit`, type)
 }
 
-DictionaryType__index_type <- function(type) {
-  .Call(`_arrow_DictionaryType__index_type`, type)
+DecimalType__precision <- function(type){
+    .Call(`_arrow_DecimalType__precision`, type)
 }
 
-DictionaryType__value_type <- function(type) {
-  .Call(`_arrow_DictionaryType__value_type`, type)
+DecimalType__scale <- function(type){
+    .Call(`_arrow_DecimalType__scale`, type)
 }
 
-DictionaryType__name <- function(type) {
-  .Call(`_arrow_DictionaryType__name`, type)
+TimestampType__timezone <- function(type){
+    .Call(`_arrow_TimestampType__timezone`, type)
 }
 
-DictionaryType__ordered <- function(type) {
-  .Call(`_arrow_DictionaryType__ordered`, type)
+TimestampType__unit <- function(type){
+    .Call(`_arrow_TimestampType__unit`, type)
 }
 
-StructType__GetFieldByName <- function(type, name) {
-  .Call(`_arrow_StructType__GetFieldByName`, type, name)
+DictionaryType__initialize <- function(index_type, value_type, ordered){
+    .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
 }
 
-StructType__GetFieldIndex <- function(type, name) {
-  .Call(`_arrow_StructType__GetFieldIndex`, type, name)
+DictionaryType__index_type <- function(type){
+    .Call(`_arrow_DictionaryType__index_type`, type)
 }
 
-StructType__field_names <- function(type) {
-  .Call(`_arrow_StructType__field_names`, type)
+DictionaryType__value_type <- function(type){
+    .Call(`_arrow_DictionaryType__value_type`, type)
 }
 
-ListType__value_field <- function(type) {
-  .Call(`_arrow_ListType__value_field`, type)
+DictionaryType__name <- function(type){
+    .Call(`_arrow_DictionaryType__name`, type)
 }
 
-ListType__value_type <- function(type) {
-  .Call(`_arrow_ListType__value_type`, type)
+DictionaryType__ordered <- function(type){
+    .Call(`_arrow_DictionaryType__ordered`, type)
 }
 
-LargeListType__value_field <- function(type) {
-  .Call(`_arrow_LargeListType__value_field`, type)
+StructType__GetFieldByName <- function(type, name){
+    .Call(`_arrow_StructType__GetFieldByName`, type, name)
 }
 
-LargeListType__value_type <- function(type) {
-  .Call(`_arrow_LargeListType__value_type`, type)
+StructType__GetFieldIndex <- function(type, name){
+    .Call(`_arrow_StructType__GetFieldIndex`, type, name)
 }
 
-FixedSizeListType__value_field <- function(type) {
-  .Call(`_arrow_FixedSizeListType__value_field`, type)
+StructType__field_names <- function(type){
+    .Call(`_arrow_StructType__field_names`, type)
 }
 
-FixedSizeListType__value_type <- function(type) {
-  .Call(`_arrow_FixedSizeListType__value_type`, type)
+ListType__value_field <- function(type){
+    .Call(`_arrow_ListType__value_field`, type)
 }
 
-FixedSizeListType__list_size <- function(type) {
-  .Call(`_arrow_FixedSizeListType__list_size`, type)
+ListType__value_type <- function(type){
+    .Call(`_arrow_ListType__value_type`, type)
 }
 
-compute___expr__call <- function(func_name, argument_list, options) {
-  .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
+LargeListType__value_field <- function(type){
+    .Call(`_arrow_LargeListType__value_field`, type)
 }
 
-compute___expr__field_ref <- function(name) {
-  .Call(`_arrow_compute___expr__field_ref`, name)
+LargeListType__value_type <- function(type){
+    .Call(`_arrow_LargeListType__value_type`, type)
 }
 
-compute___expr__get_field_ref_name <- function(x) {
-  .Call(`_arrow_compute___expr__get_field_ref_name`, x)
+FixedSizeListType__value_field <- function(type){
+    .Call(`_arrow_FixedSizeListType__value_field`, type)
 }
 
-compute___expr__scalar <- function(x) {
-  .Call(`_arrow_compute___expr__scalar`, x)
+FixedSizeListType__value_type <- function(type){
+    .Call(`_arrow_FixedSizeListType__value_type`, type)
 }
 
-compute___expr__ToString <- function(x) {
-  .Call(`_arrow_compute___expr__ToString`, x)
+FixedSizeListType__list_size <- function(type){
+    .Call(`_arrow_FixedSizeListType__list_size`, type)
 }
 
-compute___expr__type <- function(x, schema) {
-  .Call(`_arrow_compute___expr__type`, x, schema)
+compute___expr__call <- function(func_name, argument_list, options){
+    .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
 }
 
-compute___expr__type_id <- function(x, schema) {
-  .Call(`_arrow_compute___expr__type_id`, x, schema)
+compute___expr__field_ref <- function(name){
+    .Call(`_arrow_compute___expr__field_ref`, name)
 }
 
-ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level) {
-  invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
+compute___expr__get_field_ref_name <- function(x){
+    .Call(`_arrow_compute___expr__get_field_ref_name`, x)
 }
 
-ipc___feather___Reader__version <- function(reader) {
-  .Call(`_arrow_ipc___feather___Reader__version`, reader)
+compute___expr__scalar <- function(x){
+    .Call(`_arrow_compute___expr__scalar`, x)
 }
 
-ipc___feather___Reader__Read <- function(reader, columns) {
-  .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
+compute___expr__ToString <- function(x){
+    .Call(`_arrow_compute___expr__ToString`, x)
 }
 
-ipc___feather___Reader__Open <- function(stream) {
-  .Call(`_arrow_ipc___feather___Reader__Open`, stream)
+compute___expr__type <- function(x, schema){
+    .Call(`_arrow_compute___expr__type`, x, schema)
 }
 
-ipc___feather___Reader__schema <- function(reader) {
-  .Call(`_arrow_ipc___feather___Reader__schema`, reader)
+compute___expr__type_id <- function(x, schema){
+    .Call(`_arrow_compute___expr__type_id`, x, schema)
 }
 
-Field__initialize <- function(name, field, nullable) {
-  .Call(`_arrow_Field__initialize`, name, field, nullable)
+ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){
+    invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
 }
 
-Field__ToString <- function(field) {
-  .Call(`_arrow_Field__ToString`, field)
+ipc___feather___Reader__version <- function(reader){
+    .Call(`_arrow_ipc___feather___Reader__version`, reader)
 }
 
-Field__name <- function(field) {
-  .Call(`_arrow_Field__name`, field)
+ipc___feather___Reader__Read <- function(reader, columns){
+    .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
 }
 
-Field__Equals <- function(field, other) {
-  .Call(`_arrow_Field__Equals`, field, other)
+ipc___feather___Reader__Open <- function(stream){
+    .Call(`_arrow_ipc___feather___Reader__Open`, stream)
 }
 
-Field__nullable <- function(field) {
-  .Call(`_arrow_Field__nullable`, field)
+ipc___feather___Reader__schema <- function(reader){
+    .Call(`_arrow_ipc___feather___Reader__schema`, reader)
 }
 
-Field__type <- function(field) {
-  .Call(`_arrow_Field__type`, field)
+Field__initialize <- function(name, field, nullable){
+    .Call(`_arrow_Field__initialize`, name, field, nullable)
 }
 
-fs___FileInfo__type <- function(x) {
-  .Call(`_arrow_fs___FileInfo__type`, x)
+Field__ToString <- function(field){
+    .Call(`_arrow_Field__ToString`, field)
 }
 
-fs___FileInfo__set_type <- function(x, type) {
-  invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
+Field__name <- function(field){
+    .Call(`_arrow_Field__name`, field)
 }
 
-fs___FileInfo__path <- function(x) {
-  .Call(`_arrow_fs___FileInfo__path`, x)
+Field__Equals <- function(field, other){
+    .Call(`_arrow_Field__Equals`, field, other)
 }
 
-fs___FileInfo__set_path <- function(x, path) {
-  invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
+Field__nullable <- function(field){
+    .Call(`_arrow_Field__nullable`, field)
 }
 
-fs___FileInfo__size <- function(x) {
-  .Call(`_arrow_fs___FileInfo__size`, x)
+Field__type <- function(field){
+    .Call(`_arrow_Field__type`, field)
 }
 
-fs___FileInfo__set_size <- function(x, size) {
-  invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
+fs___FileInfo__type <- function(x){
+    .Call(`_arrow_fs___FileInfo__type`, x)
 }
 
-fs___FileInfo__base_name <- function(x) {
-  .Call(`_arrow_fs___FileInfo__base_name`, x)
+fs___FileInfo__set_type <- function(x, type){
+    invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
 }
 
-fs___FileInfo__extension <- function(x) {
-  .Call(`_arrow_fs___FileInfo__extension`, x)
+fs___FileInfo__path <- function(x){
+    .Call(`_arrow_fs___FileInfo__path`, x)
 }
 
-fs___FileInfo__mtime <- function(x) {
-  .Call(`_arrow_fs___FileInfo__mtime`, x)
+fs___FileInfo__set_path <- function(x, path){
+    invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
 }
 
-fs___FileInfo__set_mtime <- function(x, time) {
-  invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
+fs___FileInfo__size <- function(x){
+    .Call(`_arrow_fs___FileInfo__size`, x)
 }
 
-fs___FileSelector__base_dir <- function(selector) {
-  .Call(`_arrow_fs___FileSelector__base_dir`, selector)
+fs___FileInfo__set_size <- function(x, size){
+    invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
 }
 
-fs___FileSelector__allow_not_found <- function(selector) {
-  .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
+fs___FileInfo__base_name <- function(x){
+    .Call(`_arrow_fs___FileInfo__base_name`, x)
 }
 
-fs___FileSelector__recursive <- function(selector) {
-  .Call(`_arrow_fs___FileSelector__recursive`, selector)
+fs___FileInfo__extension <- function(x){
+    .Call(`_arrow_fs___FileInfo__extension`, x)
 }
 
-fs___FileSelector__create <- function(base_dir, allow_not_found, recursive) {
-  .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
+fs___FileInfo__mtime <- function(x){
+    .Call(`_arrow_fs___FileInfo__mtime`, x)
 }
 
-fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths) {
-  .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
+fs___FileInfo__set_mtime <- function(x, time){
+    invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
 }
 
-fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector) {
-  .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
+fs___FileSelector__base_dir <- function(selector){
+    .Call(`_arrow_fs___FileSelector__base_dir`, selector)
 }
 
-fs___FileSystem__CreateDir <- function(file_system, path, recursive) {
-  invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
+fs___FileSelector__allow_not_found <- function(selector){
+    .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
 }
 
-fs___FileSystem__DeleteDir <- function(file_system, path) {
-  invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
+fs___FileSelector__recursive <- function(selector){
+    .Call(`_arrow_fs___FileSelector__recursive`, selector)
 }
 
-fs___FileSystem__DeleteDirContents <- function(file_system, path) {
-  invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
+fs___FileSelector__create <- function(base_dir, allow_not_found, recursive){
+    .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
 }
 
-fs___FileSystem__DeleteFile <- function(file_system, path) {
-  invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
+fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths){
+    .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
 }
 
-fs___FileSystem__DeleteFiles <- function(file_system, paths) {
-  invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
+fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector){
+    .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
 }
 
-fs___FileSystem__Move <- function(file_system, src, dest) {
-  invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
+fs___FileSystem__CreateDir <- function(file_system, path, recursive){
+    invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
 }
 
-fs___FileSystem__CopyFile <- function(file_system, src, dest) {
-  invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
+fs___FileSystem__DeleteDir <- function(file_system, path){
+    invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
 }
 
-fs___FileSystem__OpenInputStream <- function(file_system, path) {
-  .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
+fs___FileSystem__DeleteDirContents <- function(file_system, path){
+    invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
 }
 
-fs___FileSystem__OpenInputFile <- function(file_system, path) {
-  .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
+fs___FileSystem__DeleteFile <- function(file_system, path){
+    invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
 }
 
-fs___FileSystem__OpenOutputStream <- function(file_system, path) {
-  .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
+fs___FileSystem__DeleteFiles <- function(file_system, paths){
+    invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
 }
 
-fs___FileSystem__OpenAppendStream <- function(file_system, path) {
-  .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
+fs___FileSystem__Move <- function(file_system, src, dest){
+    invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
 }
 
-fs___FileSystem__type_name <- function(file_system) {
-  .Call(`_arrow_fs___FileSystem__type_name`, file_system)
+fs___FileSystem__CopyFile <- function(file_system, src, dest){
+    invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
 }
 
-fs___LocalFileSystem__create <- function() {
-  .Call(`_arrow_fs___LocalFileSystem__create`)
+fs___FileSystem__OpenInputStream <- function(file_system, path){
+    .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
 }
 
-fs___SubTreeFileSystem__create <- function(base_path, base_fs) {
-  .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
+fs___FileSystem__OpenInputFile <- function(file_system, path){
+    .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
 }
 
-fs___SubTreeFileSystem__base_fs <- function(file_system) {
-  .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
+fs___FileSystem__OpenOutputStream <- function(file_system, path){
+    .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
 }
 
-fs___SubTreeFileSystem__base_path <- function(file_system) {
-  .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
+fs___FileSystem__OpenAppendStream <- function(file_system, path){
+    .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
 }
 
-fs___FileSystemFromUri <- function(path) {
-  .Call(`_arrow_fs___FileSystemFromUri`, path)
+fs___FileSystem__type_name <- function(file_system){
+    .Call(`_arrow_fs___FileSystem__type_name`, file_system)
 }
 
-fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads) {
-  invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
+fs___LocalFileSystem__create <- function(){
+    .Call(`_arrow_fs___LocalFileSystem__create`)
 }
 
-fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes) {
-  .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
+fs___SubTreeFileSystem__create <- function(base_path, base_fs){
+    .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
 }
 
-fs___S3FileSystem__region <- function(fs) {
-  .Call(`_arrow_fs___S3FileSystem__region`, fs)
+fs___SubTreeFileSystem__base_fs <- function(file_system){
+    .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
 }
 
-io___Readable__Read <- function(x, nbytes) {
-  .Call(`_arrow_io___Readable__Read`, x, nbytes)
+fs___SubTreeFileSystem__base_path <- function(file_system){
+    .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
 }
 
-io___InputStream__Close <- function(x) {
-  invisible(.Call(`_arrow_io___InputStream__Close`, x))
+fs___FileSystemFromUri <- function(path){
+    .Call(`_arrow_fs___FileSystemFromUri`, path)
 }
 
-io___OutputStream__Close <- function(x) {
-  invisible(.Call(`_arrow_io___OutputStream__Close`, x))
+fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads){
+    invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
 }
 
-io___RandomAccessFile__GetSize <- function(x) {
-  .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
+fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes){
+    .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
 }
 
-io___RandomAccessFile__supports_zero_copy <- function(x) {
-  .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
+fs___S3FileSystem__region <- function(fs){
+    .Call(`_arrow_fs___S3FileSystem__region`, fs)
 }
 
-io___RandomAccessFile__Seek <- function(x, position) {
-  invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
+io___Readable__Read <- function(x, nbytes){
+    .Call(`_arrow_io___Readable__Read`, x, nbytes)
 }
 
-io___RandomAccessFile__Tell <- function(x) {
-  .Call(`_arrow_io___RandomAccessFile__Tell`, x)
+io___InputStream__Close <- function(x){
+    invisible(.Call(`_arrow_io___InputStream__Close`, x))
 }
 
-io___RandomAccessFile__Read0 <- function(x) {
-  .Call(`_arrow_io___RandomAccessFile__Read0`, x)
+io___OutputStream__Close <- function(x){
+    invisible(.Call(`_arrow_io___OutputStream__Close`, x))
 }
 
-io___RandomAccessFile__ReadAt <- function(x, position, nbytes) {
-  .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
+io___RandomAccessFile__GetSize <- function(x){
+    .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
 }
 
-io___MemoryMappedFile__Create <- function(path, size) {
-  .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
+io___RandomAccessFile__supports_zero_copy <- function(x){
+    .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
 }
 
-io___MemoryMappedFile__Open <- function(path, mode) {
-  .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
+io___RandomAccessFile__Seek <- function(x, position){
+    invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
 }
 
-io___MemoryMappedFile__Resize <- function(x, size) {
-  invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
+io___RandomAccessFile__Tell <- function(x){
+    .Call(`_arrow_io___RandomAccessFile__Tell`, x)
 }
 
-io___ReadableFile__Open <- function(path) {
-  .Call(`_arrow_io___ReadableFile__Open`, path)
+io___RandomAccessFile__Read0 <- function(x){
+    .Call(`_arrow_io___RandomAccessFile__Read0`, x)
 }
 
-io___BufferReader__initialize <- function(buffer) {
-  .Call(`_arrow_io___BufferReader__initialize`, buffer)
+io___RandomAccessFile__ReadAt <- function(x, position, nbytes){
+    .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
 }
 
-io___Writable__write <- function(stream, buf) {
-  invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
+io___MemoryMappedFile__Create <- function(path, size){
+    .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
 }
 
-io___OutputStream__Tell <- function(stream) {
-  .Call(`_arrow_io___OutputStream__Tell`, stream)
+io___MemoryMappedFile__Open <- function(path, mode){
+    .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
 }
 
-io___FileOutputStream__Open <- function(path) {
-  .Call(`_arrow_io___FileOutputStream__Open`, path)
+io___MemoryMappedFile__Resize <- function(x, size){
+    invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
 }
 
-io___BufferOutputStream__Create <- function(initial_capacity) {
-  .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
+io___ReadableFile__Open <- function(path){
+    .Call(`_arrow_io___ReadableFile__Open`, path)
 }
 
-io___BufferOutputStream__capacity <- function(stream) {
-  .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
+io___BufferReader__initialize <- function(buffer){
+    .Call(`_arrow_io___BufferReader__initialize`, buffer)
 }
 
-io___BufferOutputStream__Finish <- function(stream) {
-  .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
+io___Writable__write <- function(stream, buf){
+    invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
 }
 
-io___BufferOutputStream__Tell <- function(stream) {
-  .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
+io___OutputStream__Tell <- function(stream){
+    .Call(`_arrow_io___OutputStream__Tell`, stream)
 }
 
-io___BufferOutputStream__Write <- function(stream, bytes) {
-  invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
+io___FileOutputStream__Open <- function(path){
+    .Call(`_arrow_io___FileOutputStream__Open`, path)
 }
 
-json___ReadOptions__initialize <- function(use_threads, block_size) {
-  .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
+io___BufferOutputStream__Create <- function(initial_capacity){
+    .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
 }
 
-json___ParseOptions__initialize1 <- function(newlines_in_values) {
-  .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
+io___BufferOutputStream__capacity <- function(stream){
+    .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
 }
 
-json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema) {
-  .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
+io___BufferOutputStream__Finish <- function(stream){
+    .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
 }
 
-json___TableReader__Make <- function(input, read_options, parse_options) {
-  .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
+io___BufferOutputStream__Tell <- function(stream){
+    .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
 }
 
-json___TableReader__Read <- function(table_reader) {
-  .Call(`_arrow_json___TableReader__Read`, table_reader)
+io___BufferOutputStream__Write <- function(stream, bytes){
+    invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
 }
 
-MemoryPool__default <- function() {
-  .Call(`_arrow_MemoryPool__default`)
+json___ReadOptions__initialize <- function(use_threads, block_size){
+    .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
 }
 
-MemoryPool__bytes_allocated <- function(pool) {
-  .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
+json___ParseOptions__initialize1 <- function(newlines_in_values){
+    .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
 }
 
-MemoryPool__max_memory <- function(pool) {
-  .Call(`_arrow_MemoryPool__max_memory`, pool)
+json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema){
+    .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
 }
 
-MemoryPool__backend_name <- function(pool) {
-  .Call(`_arrow_MemoryPool__backend_name`, pool)
+json___TableReader__Make <- function(input, read_options, parse_options){
+    .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
 }
 
-supported_memory_backends <- function() {
-  .Call(`_arrow_supported_memory_backends`)
+json___TableReader__Read <- function(table_reader){
+    .Call(`_arrow_json___TableReader__Read`, table_reader)
 }
 
-ipc___Message__body_length <- function(message) {
-  .Call(`_arrow_ipc___Message__body_length`, message)
+MemoryPool__default <- function(){
+    .Call(`_arrow_MemoryPool__default`)
 }
 
-ipc___Message__metadata <- function(message) {
-  .Call(`_arrow_ipc___Message__metadata`, message)
+MemoryPool__bytes_allocated <- function(pool){
+    .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
 }
 
-ipc___Message__body <- function(message) {
-  .Call(`_arrow_ipc___Message__body`, message)
+MemoryPool__max_memory <- function(pool){
+    .Call(`_arrow_MemoryPool__max_memory`, pool)
 }
 
-ipc___Message__Verify <- function(message) {
-  .Call(`_arrow_ipc___Message__Verify`, message)
+MemoryPool__backend_name <- function(pool){
+    .Call(`_arrow_MemoryPool__backend_name`, pool)
 }
 
-ipc___Message__type <- function(message) {
-  .Call(`_arrow_ipc___Message__type`, message)
+supported_memory_backends <- function(){
+    .Call(`_arrow_supported_memory_backends`)
 }
 
-ipc___Message__Equals <- function(x, y) {
-  .Call(`_arrow_ipc___Message__Equals`, x, y)
+ipc___Message__body_length <- function(message){
+    .Call(`_arrow_ipc___Message__body_length`, message)
 }
 
-ipc___ReadRecordBatch__Message__Schema <- function(message, schema) {
-  .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
+ipc___Message__metadata <- function(message){
+    .Call(`_arrow_ipc___Message__metadata`, message)
 }
 
-ipc___ReadSchema_InputStream <- function(stream) {
-  .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
+ipc___Message__body <- function(message){
+    .Call(`_arrow_ipc___Message__body`, message)
 }
 
-ipc___ReadSchema_Message <- function(message) {
-  .Call(`_arrow_ipc___ReadSchema_Message`, message)
+ipc___Message__Verify <- function(message){
+    .Call(`_arrow_ipc___Message__Verify`, message)
 }
 
-ipc___MessageReader__Open <- function(stream) {
-  .Call(`_arrow_ipc___MessageReader__Open`, stream)
+ipc___Message__type <- function(message){
+    .Call(`_arrow_ipc___Message__type`, message)
 }
 
-ipc___MessageReader__ReadNextMessage <- function(reader) {
-  .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
+ipc___Message__Equals <- function(x, y){
+    .Call(`_arrow_ipc___Message__Equals`, x, y)
 }
 
-ipc___ReadMessage <- function(stream) {
-  .Call(`_arrow_ipc___ReadMessage`, stream)
+ipc___ReadRecordBatch__Message__Schema <- function(message, schema){
+    .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
 }
 
-parquet___arrow___ArrowReaderProperties__Make <- function(use_threads) {
-  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
+ipc___ReadSchema_InputStream <- function(stream){
+    .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
 }
 
-parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads) {
-  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
+ipc___ReadSchema_Message <- function(message){
+    .Call(`_arrow_ipc___ReadSchema_Message`, message)
 }
 
-parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads) {
-  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
+ipc___MessageReader__Open <- function(stream){
+    .Call(`_arrow_ipc___MessageReader__Open`, stream)
 }
 
-parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index) {
-  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
+ipc___MessageReader__ReadNextMessage <- function(reader){
+    .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
 }
 
-parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict) {
-  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
+ipc___ReadMessage <- function(stream){
+    .Call(`_arrow_ipc___ReadMessage`, stream)
 }
 
-parquet___arrow___FileReader__OpenFile <- function(file, props) {
-  .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
+parquet___arrow___ArrowReaderProperties__Make <- function(use_threads){
+    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
 }
 
-parquet___arrow___FileReader__ReadTable1 <- function(reader) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
+parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads){
+    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
 }
 
-parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
+parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads){
+    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
 }
 
-parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
+parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index){
+    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
 }
 
-parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
+parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict){
+    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
 }
 
-parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
+parquet___arrow___FileReader__OpenFile <- function(file, props){
+    .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
 }
 
-parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
+parquet___arrow___FileReader__ReadTable1 <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
 }
 
-parquet___arrow___FileReader__num_rows <- function(reader) {
-  .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
+parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
 }
 
-parquet___arrow___FileReader__num_columns <- function(reader) {
-  .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
+parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
 }
 
-parquet___arrow___FileReader__num_row_groups <- function(reader) {
-  .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
+parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
 }
 
-parquet___arrow___FileReader__ReadColumn <- function(reader, i) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
+parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
 }
 
-parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit) {
-  .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
+parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
 }
 
-parquet___WriterProperties___Builder__create <- function() {
-  .Call(`_arrow_parquet___WriterProperties___Builder__create`)
+parquet___arrow___FileReader__num_rows <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
 }
 
-parquet___WriterProperties___Builder__version <- function(builder, version) {
-  invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
+parquet___arrow___FileReader__num_columns <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
 }
 
-parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types) {
-  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
+parquet___arrow___FileReader__num_row_groups <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
 }
 
-parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels) {
-  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
+parquet___arrow___FileReader__ReadColumn <- function(reader, i){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
 }
 
-parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary) {
-  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
+parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit){
+    .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
 }
 
-parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics) {
-  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
+parquet___WriterProperties___Builder__create <- function(){
+    .Call(`_arrow_parquet___WriterProperties___Builder__create`)
 }
 
-parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size) {
-  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
+parquet___WriterProperties___Builder__version <- function(builder, version){
+    invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
 }
 
-parquet___WriterProperties___Builder__build <- function(builder) {
-  .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
+parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types){
+    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
 }
 
-parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties) {
-  .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
+parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels){
+    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
 }
 
-parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size) {
-  invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
+parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary){
+    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
 }
 
-parquet___arrow___FileWriter__Close <- function(writer) {
-  invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
+parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics){
+    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
 }
 
-parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties) {
-  invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
+parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size){
+    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
 }
 
-parquet___arrow___FileReader__GetSchema <- function(reader) {
-  .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
+parquet___WriterProperties___Builder__build <- function(builder){
+    .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
 }
 
-allocate_arrow_schema <- function() {
-  .Call(`_arrow_allocate_arrow_schema`)
+parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties){
+    .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
 }
 
-delete_arrow_schema <- function(ptr) {
-  invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
+parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size){
+    invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
 }
 
-allocate_arrow_array <- function() {
-  .Call(`_arrow_allocate_arrow_array`)
+parquet___arrow___FileWriter__Close <- function(writer){
+    invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
 }
 
-delete_arrow_array <- function(ptr) {
-  invisible(.Call(`_arrow_delete_arrow_array`, ptr))
+parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties){
+    invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
 }
 
-allocate_arrow_array_stream <- function() {
-  .Call(`_arrow_allocate_arrow_array_stream`)
+parquet___arrow___FileReader__GetSchema <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
 }
 
-delete_arrow_array_stream <- function(ptr) {
-  invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
+allocate_arrow_schema <- function(){
+    .Call(`_arrow_allocate_arrow_schema`)
 }
 
-ImportArray <- function(array, schema) {
-  .Call(`_arrow_ImportArray`, array, schema)
+delete_arrow_schema <- function(ptr){
+    invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
 }
 
-ImportRecordBatch <- function(array, schema) {
-  .Call(`_arrow_ImportRecordBatch`, array, schema)
+allocate_arrow_array <- function(){
+    .Call(`_arrow_allocate_arrow_array`)
 }
 
-ImportSchema <- function(schema) {
-  .Call(`_arrow_ImportSchema`, schema)
+delete_arrow_array <- function(ptr){
+    invisible(.Call(`_arrow_delete_arrow_array`, ptr))
 }
 
-ImportField <- function(field) {
-  .Call(`_arrow_ImportField`, field)
+allocate_arrow_array_stream <- function(){
+    .Call(`_arrow_allocate_arrow_array_stream`)
 }
 
-ImportType <- function(type) {
-  .Call(`_arrow_ImportType`, type)
+delete_arrow_array_stream <- function(ptr){
+    invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
 }
 
-ImportRecordBatchReader <- function(stream) {
-  .Call(`_arrow_ImportRecordBatchReader`, stream)
+ImportArray <- function(array, schema){
+    .Call(`_arrow_ImportArray`, array, schema)
 }
 
-ExportType <- function(type, ptr) {
-  invisible(.Call(`_arrow_ExportType`, type, ptr))
+ImportRecordBatch <- function(array, schema){
+    .Call(`_arrow_ImportRecordBatch`, array, schema)
 }
 
-ExportField <- function(field, ptr) {
-  invisible(.Call(`_arrow_ExportField`, field, ptr))
+ImportSchema <- function(schema){
+    .Call(`_arrow_ImportSchema`, schema)
 }
 
-ExportSchema <- function(schema, ptr) {
-  invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
+ImportField <- function(field){
+    .Call(`_arrow_ImportField`, field)
 }
 
-ExportArray <- function(array, array_ptr, schema_ptr) {
-  invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
+ImportType <- function(type){
+    .Call(`_arrow_ImportType`, type)
 }
 
-ExportRecordBatch <- function(batch, array_ptr, schema_ptr) {
-  invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
+ImportRecordBatchReader <- function(stream){
+    .Call(`_arrow_ImportRecordBatchReader`, stream)
 }
 
-ExportRecordBatchReader <- function(reader, stream_ptr) {
-  invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
+ExportType <- function(type, ptr){
+    invisible(.Call(`_arrow_ExportType`, type, ptr))
 }
 
-Table__from_dots <- function(lst, schema_sxp, use_threads) {
-  .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
+ExportField <- function(field, ptr){
+    invisible(.Call(`_arrow_ExportField`, field, ptr))
 }
 
-vec_to_arrow <- function(x, s_type) {
-  .Call(`_arrow_vec_to_arrow`, x, s_type)
+ExportSchema <- function(schema, ptr){
+    invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
 }
 
-DictionaryArray__FromArrays <- function(type, indices, dict) {
-  .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
+ExportArray <- function(array, array_ptr, schema_ptr){
+    invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
 }
 
-RecordBatch__num_columns <- function(x) {
-  .Call(`_arrow_RecordBatch__num_columns`, x)
+ExportRecordBatch <- function(batch, array_ptr, schema_ptr){
+    invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
 }
 
-RecordBatch__num_rows <- function(x) {
-  .Call(`_arrow_RecordBatch__num_rows`, x)
+ExportRecordBatchReader <- function(reader, stream_ptr){
+    invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
 }
 
-RecordBatch__schema <- function(x) {
-  .Call(`_arrow_RecordBatch__schema`, x)
+Table__from_dots <- function(lst, schema_sxp, use_threads){
+    .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
 }
 
-RecordBatch__RenameColumns <- function(batch, names) {
-  .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
+vec_to_arrow <- function(x, s_type){
+    .Call(`_arrow_vec_to_arrow`, x, s_type)
 }
 
-RecordBatch__ReplaceSchemaMetadata <- function(x, metadata) {
-  .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
+DictionaryArray__FromArrays <- function(type, indices, dict){
+    .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
 }
 
-RecordBatch__columns <- function(batch) {
-  .Call(`_arrow_RecordBatch__columns`, batch)
+RecordBatch__num_columns <- function(x){
+    .Call(`_arrow_RecordBatch__num_columns`, x)
 }
 
-RecordBatch__column <- function(batch, i) {
-  .Call(`_arrow_RecordBatch__column`, batch, i)
+RecordBatch__num_rows <- function(x){
+    .Call(`_arrow_RecordBatch__num_rows`, x)
 }
 
-RecordBatch__GetColumnByName <- function(batch, name) {
-  .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
+RecordBatch__schema <- function(x){
+    .Call(`_arrow_RecordBatch__schema`, x)
 }
 
-RecordBatch__SelectColumns <- function(batch, indices) {
-  .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
+RecordBatch__RenameColumns <- function(batch, names){
+    .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
 }
 
-RecordBatch__Equals <- function(self, other, check_metadata) {
-  .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
+RecordBatch__ReplaceSchemaMetadata <- function(x, metadata){
+    .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
 }
 
-RecordBatch__AddColumn <- function(batch, i, field, column) {
-  .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
+RecordBatch__columns <- function(batch){
+    .Call(`_arrow_RecordBatch__columns`, batch)
 }
 
-RecordBatch__SetColumn <- function(batch, i, field, column) {
-  .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
+RecordBatch__column <- function(batch, i){
+    .Call(`_arrow_RecordBatch__column`, batch, i)
 }
 
-RecordBatch__RemoveColumn <- function(batch, i) {
-  .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
+RecordBatch__GetColumnByName <- function(batch, name){
+    .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
 }
 
-RecordBatch__column_name <- function(batch, i) {
-  .Call(`_arrow_RecordBatch__column_name`, batch, i)
+RecordBatch__SelectColumns <- function(batch, indices){
+    .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
 }
 
-RecordBatch__names <- function(batch) {
-  .Call(`_arrow_RecordBatch__names`, batch)
+RecordBatch__Equals <- function(self, other, check_metadata){
+    .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
 }
 
-RecordBatch__Slice1 <- function(self, offset) {
-  .Call(`_arrow_RecordBatch__Slice1`, self, offset)
+RecordBatch__AddColumn <- function(batch, i, field, column){
+    .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
 }
 
-RecordBatch__Slice2 <- function(self, offset, length) {
-  .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
+RecordBatch__SetColumn <- function(batch, i, field, column){
+    .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
 }
 
-ipc___SerializeRecordBatch__Raw <- function(batch) {
-  .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
+RecordBatch__RemoveColumn <- function(batch, i){
+    .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
 }
 
-ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema) {
-  .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
+RecordBatch__column_name <- function(batch, i){
+    .Call(`_arrow_RecordBatch__column_name`, batch, i)
 }
 
-RecordBatch__from_arrays <- function(schema_sxp, lst) {
-  .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
+RecordBatch__names <- function(batch){
+    .Call(`_arrow_RecordBatch__names`, batch)
 }
 
-RecordBatchReader__schema <- function(reader) {
-  .Call(`_arrow_RecordBatchReader__schema`, reader)
+RecordBatch__Slice1 <- function(self, offset){
+    .Call(`_arrow_RecordBatch__Slice1`, self, offset)
 }
 
-RecordBatchReader__ReadNext <- function(reader) {
-  .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
+RecordBatch__Slice2 <- function(self, offset, length){
+    .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
 }
 
-RecordBatchReader__batches <- function(reader) {
-  .Call(`_arrow_RecordBatchReader__batches`, reader)
+ipc___SerializeRecordBatch__Raw <- function(batch){
+    .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
 }
 
-Table__from_RecordBatchReader <- function(reader) {
-  .Call(`_arrow_Table__from_RecordBatchReader`, reader)
+ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema){
+    .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
 }
 
-ipc___RecordBatchStreamReader__Open <- function(stream) {
-  .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
+RecordBatch__from_arrays <- function(schema_sxp, lst){
+    .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
 }
 
-ipc___RecordBatchFileReader__schema <- function(reader) {
-  .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
+RecordBatchReader__schema <- function(reader){
+    .Call(`_arrow_RecordBatchReader__schema`, reader)
 }
 
-ipc___RecordBatchFileReader__num_record_batches <- function(reader) {
-  .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
+RecordBatchReader__ReadNext <- function(reader){
+    .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
 }
 
-ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i) {
-  .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
+RecordBatchReader__batches <- function(reader){
+    .Call(`_arrow_RecordBatchReader__batches`, reader)
 }
 
-ipc___RecordBatchFileReader__Open <- function(file) {
-  .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
+Table__from_RecordBatchReader <- function(reader){
+    .Call(`_arrow_Table__from_RecordBatchReader`, reader)
 }
 
-Table__from_RecordBatchFileReader <- function(reader) {
-  .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
+ipc___RecordBatchStreamReader__Open <- function(stream){
+    .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
 }
 
-ipc___RecordBatchFileReader__batches <- function(reader) {
-  .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
+ipc___RecordBatchFileReader__schema <- function(reader){
+    .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
 }
 
-ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) {
-  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
+ipc___RecordBatchFileReader__num_record_batches <- function(reader){
+    .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
 }
 
-ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table) {
-  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
+ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i){
+    .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
 }
 
-ipc___RecordBatchWriter__Close <- function(batch_writer) {
-  invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
+ipc___RecordBatchFileReader__Open <- function(file){
+    .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
 }
 
-ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
-  .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+Table__from_RecordBatchFileReader <- function(reader){
+    .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
 }
 
-ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
-  .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+ipc___RecordBatchFileReader__batches <- function(reader){
+    .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
 }
 
-Array__GetScalar <- function(x, i) {
-  .Call(`_arrow_Array__GetScalar`, x, i)
+ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch){
+    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
 }
 
-Scalar__ToString <- function(s) {
-  .Call(`_arrow_Scalar__ToString`, s)
+ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table){
+    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
 }
 
-StructScalar__field <- function(s, i) {
-  .Call(`_arrow_StructScalar__field`, s, i)
+ipc___RecordBatchWriter__Close <- function(batch_writer){
+    invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
 }
 
-StructScalar__GetFieldByName <- function(s, name) {
-  .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
+ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){
+    .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-Scalar__as_vector <- function(scalar) {
-  .Call(`_arrow_Scalar__as_vector`, scalar)
+ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){
+    .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-MakeArrayFromScalar <- function(scalar, n) {
-  .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
+Array__GetScalar <- function(x, i){
+    .Call(`_arrow_Array__GetScalar`, x, i)
 }
 
-Scalar__is_valid <- function(s) {
-  .Call(`_arrow_Scalar__is_valid`, s)
+Scalar__ToString <- function(s){
+    .Call(`_arrow_Scalar__ToString`, s)
 }
 
-Scalar__type <- function(s) {
-  .Call(`_arrow_Scalar__type`, s)
+StructScalar__field <- function(s, i){
+    .Call(`_arrow_StructScalar__field`, s, i)
 }
 
-Scalar__Equals <- function(lhs, rhs) {
-  .Call(`_arrow_Scalar__Equals`, lhs, rhs)
+StructScalar__GetFieldByName <- function(s, name){
+    .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
 }
 
-Scalar__ApproxEquals <- function(lhs, rhs) {
-  .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
+Scalar__as_vector <- function(scalar){
+    .Call(`_arrow_Scalar__as_vector`, scalar)
 }
 
-schema_ <- function(fields) {
-  .Call(`_arrow_schema_`, fields)
+MakeArrayFromScalar <- function(scalar, n){
+    .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
 }
 
-Schema__ToString <- function(s) {
-  .Call(`_arrow_Schema__ToString`, s)
+Scalar__is_valid <- function(s){
+    .Call(`_arrow_Scalar__is_valid`, s)
 }
 
-Schema__num_fields <- function(s) {
-  .Call(`_arrow_Schema__num_fields`, s)
+Scalar__type <- function(s){
+    .Call(`_arrow_Scalar__type`, s)
 }
 
-Schema__field <- function(s, i) {
-  .Call(`_arrow_Schema__field`, s, i)
+Scalar__Equals <- function(lhs, rhs){
+    .Call(`_arrow_Scalar__Equals`, lhs, rhs)
 }
 
-Schema__AddField <- function(s, i, field) {
-  .Call(`_arrow_Schema__AddField`, s, i, field)
+Scalar__ApproxEquals <- function(lhs, rhs){
+    .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
 }
 
-Schema__SetField <- function(s, i, field) {
-  .Call(`_arrow_Schema__SetField`, s, i, field)
+schema_ <- function(fields){
+    .Call(`_arrow_schema_`, fields)
 }
 
-Schema__RemoveField <- function(s, i) {
-  .Call(`_arrow_Schema__RemoveField`, s, i)
+Schema__ToString <- function(s){
+    .Call(`_arrow_Schema__ToString`, s)
 }
 
-Schema__GetFieldByName <- function(s, x) {
-  .Call(`_arrow_Schema__GetFieldByName`, s, x)
+Schema__num_fields <- function(s){
+    .Call(`_arrow_Schema__num_fields`, s)
 }
 
-Schema__fields <- function(schema) {
-  .Call(`_arrow_Schema__fields`, schema)
+Schema__field <- function(s, i){
+    .Call(`_arrow_Schema__field`, s, i)
 }
 
-Schema__field_names <- function(schema) {
-  .Call(`_arrow_Schema__field_names`, schema)
+Schema__AddField <- function(s, i, field){
+    .Call(`_arrow_Schema__AddField`, s, i, field)
 }
 
-Schema__HasMetadata <- function(schema) {
-  .Call(`_arrow_Schema__HasMetadata`, schema)
+Schema__SetField <- function(s, i, field){
+    .Call(`_arrow_Schema__SetField`, s, i, field)
 }
 
-Schema__metadata <- function(schema) {
-  .Call(`_arrow_Schema__metadata`, schema)
+Schema__RemoveField <- function(s, i){
+    .Call(`_arrow_Schema__RemoveField`, s, i)
 }
 
-Schema__WithMetadata <- function(schema, metadata) {
-  .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
+Schema__GetFieldByName <- function(s, x){
+    .Call(`_arrow_Schema__GetFieldByName`, s, x)
 }
 
-Schema__serialize <- function(schema) {
-  .Call(`_arrow_Schema__serialize`, schema)
+Schema__fields <- function(schema){
+    .Call(`_arrow_Schema__fields`, schema)
 }
 
-Schema__Equals <- function(schema, other, check_metadata) {
-  .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
+Schema__field_names <- function(schema){
+    .Call(`_arrow_Schema__field_names`, schema)
 }
 
-arrow__UnifySchemas <- function(schemas) {
-  .Call(`_arrow_arrow__UnifySchemas`, schemas)
+Schema__HasMetadata <- function(schema){
+    .Call(`_arrow_Schema__HasMetadata`, schema)
 }
 
-Table__num_columns <- function(x) {
-  .Call(`_arrow_Table__num_columns`, x)
+Schema__metadata <- function(schema){
+    .Call(`_arrow_Schema__metadata`, schema)
 }
 
-Table__num_rows <- function(x) {
-  .Call(`_arrow_Table__num_rows`, x)
+Schema__WithMetadata <- function(schema, metadata){
+    .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
 }
 
-Table__schema <- function(x) {
-  .Call(`_arrow_Table__schema`, x)
+Schema__serialize <- function(schema){
+    .Call(`_arrow_Schema__serialize`, schema)
 }
 
-Table__ReplaceSchemaMetadata <- function(x, metadata) {
-  .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
+Schema__Equals <- function(schema, other, check_metadata){
+    .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
 }
 
-Table__column <- function(table, i) {
-  .Call(`_arrow_Table__column`, table, i)
+arrow__UnifySchemas <- function(schemas){
+    .Call(`_arrow_arrow__UnifySchemas`, schemas)
 }
 
-Table__field <- function(table, i) {
-  .Call(`_arrow_Table__field`, table, i)
+Table__num_columns <- function(x){
+    .Call(`_arrow_Table__num_columns`, x)
 }
 
-Table__columns <- function(table) {
-  .Call(`_arrow_Table__columns`, table)
+Table__num_rows <- function(x){
+    .Call(`_arrow_Table__num_rows`, x)
 }
 
-Table__ColumnNames <- function(table) {
-  .Call(`_arrow_Table__ColumnNames`, table)
+Table__schema <- function(x){
+    .Call(`_arrow_Table__schema`, x)
 }
 
-Table__RenameColumns <- function(table, names) {
-  .Call(`_arrow_Table__RenameColumns`, table, names)
+Table__ReplaceSchemaMetadata <- function(x, metadata){
+    .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
 }
 
-Table__Slice1 <- function(table, offset) {
-  .Call(`_arrow_Table__Slice1`, table, offset)
+Table__column <- function(table, i){
+    .Call(`_arrow_Table__column`, table, i)
 }
 
-Table__Slice2 <- function(table, offset, length) {
-  .Call(`_arrow_Table__Slice2`, table, offset, length)
+Table__field <- function(table, i){
+    .Call(`_arrow_Table__field`, table, i)
 }
 
-Table__Equals <- function(lhs, rhs, check_metadata) {
-  .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
+Table__columns <- function(table){
+    .Call(`_arrow_Table__columns`, table)
 }
 
-Table__Validate <- function(table) {
-  .Call(`_arrow_Table__Validate`, table)
+Table__ColumnNames <- function(table){
+    .Call(`_arrow_Table__ColumnNames`, table)
 }
 
-Table__ValidateFull <- function(table) {
-  .Call(`_arrow_Table__ValidateFull`, table)
+Table__RenameColumns <- function(table, names){
+    .Call(`_arrow_Table__RenameColumns`, table, names)
 }
 
-Table__GetColumnByName <- function(table, name) {
-  .Call(`_arrow_Table__GetColumnByName`, table, name)
+Table__Slice1 <- function(table, offset){
+    .Call(`_arrow_Table__Slice1`, table, offset)
 }
 
-Table__RemoveColumn <- function(table, i) {
-  .Call(`_arrow_Table__RemoveColumn`, table, i)
+Table__Slice2 <- function(table, offset, length){
+    .Call(`_arrow_Table__Slice2`, table, offset, length)
 }
 
-Table__AddColumn <- function(table, i, field, column) {
-  .Call(`_arrow_Table__AddColumn`, table, i, field, column)
+Table__Equals <- function(lhs, rhs, check_metadata){
+    .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
 }
 
-Table__SetColumn <- function(table, i, field, column) {
-  .Call(`_arrow_Table__SetColumn`, table, i, field, column)
+Table__Validate <- function(table){
+    .Call(`_arrow_Table__Validate`, table)
 }
 
-Table__SelectColumns <- function(table, indices) {
-  .Call(`_arrow_Table__SelectColumns`, table, indices)
+Table__ValidateFull <- function(table){
+    .Call(`_arrow_Table__ValidateFull`, table)
 }
 
-all_record_batches <- function(lst) {
-  .Call(`_arrow_all_record_batches`, lst)
+Table__GetColumnByName <- function(table, name){
+    .Call(`_arrow_Table__GetColumnByName`, table, name)
 }
 
-Table__from_record_batches <- function(batches, schema_sxp) {
-  .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
+Table__RemoveColumn <- function(table, i){
+    .Call(`_arrow_Table__RemoveColumn`, table, i)
 }
 
-GetCpuThreadPoolCapacity <- function() {
-  .Call(`_arrow_GetCpuThreadPoolCapacity`)
+Table__AddColumn <- function(table, i, field, column){
+    .Call(`_arrow_Table__AddColumn`, table, i, field, column)
 }
 
-SetCpuThreadPoolCapacity <- function(threads) {
-  invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
+Table__SetColumn <- function(table, i, field, column){
+    .Call(`_arrow_Table__SetColumn`, table, i, field, column)
 }
 
-GetIOThreadPoolCapacity <- function() {
-  .Call(`_arrow_GetIOThreadPoolCapacity`)
+Table__SelectColumns <- function(table, indices){
+    .Call(`_arrow_Table__SelectColumns`, table, indices)
 }
 
-SetIOThreadPoolCapacity <- function(threads) {
-  invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
+all_record_batches <- function(lst){
+    .Call(`_arrow_all_record_batches`, lst)
 }
 
-Array__infer_type <- function(x) {
-  .Call(`_arrow_Array__infer_type`, x)
+Table__from_record_batches <- function(batches, schema_sxp){
+    .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
 }
+
+GetCpuThreadPoolCapacity <- function(){
+    .Call(`_arrow_GetCpuThreadPoolCapacity`)
+}
+
+SetCpuThreadPoolCapacity <- function(threads){
+    invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
+}
+
+GetIOThreadPoolCapacity <- function(){
+    .Call(`_arrow_GetIOThreadPoolCapacity`)
+}
+
+SetIOThreadPoolCapacity <- function(threads){
+    invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
+}
+
+Array__infer_type <- function(x){
+    .Call(`_arrow_Array__infer_type`, x)
+}
+
+
+
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 26db190099f..2db8a954918 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -28,14 +28,82 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb
     dplyr::group_vars(.data) # vars needed for grouping
   ))
   .data <- dplyr::select(.data, vars_to_keep)
-
   if (match.arg(.engine) == "duckdb") {
-    dplyr::summarise(to_duckdb(.data), ...)
-  } else {
-    if (query_on_dataset(.data)) {
-      not_implemented_for_dataset("summarize()")
+      dplyr::summarise(to_duckdb(.data), ...)
+  } else if (isTRUE(getOption("arrow.summarize", FALSE))) {
+    # Try stuff, if successful return()
+    out <- try(do_arrow_summarize(.data, ...), silent = TRUE)
+    if (inherits(out, "try-error")) {
+      return(abandon_ship(call, .data, format(out)))
+    } else {
+      return(out)
     }
+  } else {
+    # If unsuccessful or if option not set, do the work in R
     dplyr::summarise(dplyr::collect(.data), ...)
   }
 }
 summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
+
+do_arrow_summarize <- function(.data, ...) {
+  if (length(dplyr::group_vars(.data))) {
+    stop("Grouped aggregation not supprted in Arrow", call. = FALSE)
+  }
+
+  exprs <- quos(...)
+  # Check for unnamed expressions and fix if any
+  unnamed <- !nzchar(names(exprs))
+  # Deparse and take the first element in case they're long expressions
+  names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
+
+  mask <- arrow_mask(.data)
+  # Add aggregation wrappers to arrow_mask somehow
+  # (this is not ideal, would overwrite same-named objects)
+  mask$sum <- function(x, na.rm = FALSE) {
+    list(
+      fun = "sum",
+      data = x,
+      options = list(na.rm = na.rm)
+    )
+  }
+  results <- list()
+  for (i in seq_along(exprs)) {
+    # Iterate over the indices and not the names because names may be repeated
+    # (which overwrites the previous name)
+    new_var <- names(exprs)[i]
+    results[[new_var]] <- arrow_eval(exprs[[i]], mask)
+    if (inherits(results[[new_var]], "try-error")) {
+      msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
+      stop(msg, call. = FALSE)
+    }
+    # Put it in the data mask too?
+    #mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
+  }
+
+  # Now, from that, split out the data (expressions) and options
+  .data$aggregations <- lapply(results, function(x) x[c("fun", "options")])
+
+  inputs <- lapply(results, function(x) x$data)
+  # This is essentially a projection, and the column names don't matter
+  # (but must exist)
+  names(inputs) <- as.character(seq_along(inputs))
+  .data$selected_columns <- inputs
+
+  # Eventually, we will return .data here if (dataset) but do it eagerly now
+  do_exec_plan(.data)
+}
+
+do_exec_plan <- function(.data) {
+  plan <- ExecPlan$create()
+  # Scan also will filter and select columns, so we don't need to Filter
+  start_node <- plan$Scan(.data)
+  # If any columns are derived we need to Project (otherwise this may be no-op)
+  project_node <- start_node$Project(.data$selected_columns)
+
+  final_node <- project_node$ScalarAggregate(
+    options = .data$aggregates,
+    targets = names(.data),
+    out_field_names = names(.data$aggregates)
+  )
+  plan$Run(final_node)
+}
\ No newline at end of file
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 5d14264b90c..fa12396740e 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -19,7 +19,24 @@ ExecPlan <- R6Class("ExecPlan", inherit = ArrowObject,
   public = list(
     Scan = function(dataset) {
       # Handle arrow_dplyr_query
-      # TODO: why do I need to filter/project here?
+      if (inherits(dataset, "arrow_dplyr_query")) {
+        filter <- dataset$filtered_rows
+        if (isTRUE(filter)) {
+          filter <- Expression$scalar(TRUE)
+        }
+        # TODO: use FieldsInExpression to find all from dataset$selected_columns
+        colnames <- names(dataset$.data)
+        dataset <- dataset$.data
+      } else {
+        if (inherits(dataset, "ArrowTabular")) {
+          dataset <- InMemoryDataset$create(dataset)
+        }
+        assert_is(dataset, "Dataset")
+        # Set some defaults
+        filter <- Expression$scalar(TRUE)
+        colnames <- names(dataset)
+      }
+      # TODO: why do I _need_ to filter/project here?
       ExecNode_Scan(self, dataset, filter, colnames)
     },
     Run = function(node) {
@@ -45,7 +62,3 @@ ExecNode <- R6Class("ExecNode", inherit = ArrowObject,
     }
   )
 )
-
-# plan <- ExecPlan$create()
-# final_node <- plan$Scan(dataset)$Filter(expr)$Project(exprs)$ScalarAggregate(something)
-# plan$Run(final_node)
\ No newline at end of file
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 19095a4cbde..86418634226 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1092,6 +1092,87 @@ extern "C" SEXP _arrow_io___CompressedInputStream__Make(SEXP codec_sexp, SEXP ra
 }
 #endif
 
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecPlan> ExecPlan_create();
+extern "C" SEXP _arrow_ExecPlan_create(){
+BEGIN_CPP11
+	return cpp11::as_sexp(ExecPlan_create());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecPlan_create(){
+	Rf_error("Cannot call ExecPlan_create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Table> ExecPlan_run(std::shared_ptr<compute::ExecPlan> plan, std::shared_ptr<compute::ExecNode> final_node);
+extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
+BEGIN_CPP11
+	arrow::r::Input<std::shared_ptr<compute::ExecPlan>>::type plan(plan_sexp);
+	arrow::r::Input<std::shared_ptr<compute::ExecNode>>::type final_node(final_node_sexp);
+	return cpp11::as_sexp(ExecPlan_run(plan, final_node));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
+	Rf_error("Cannot call ExecPlan_run(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_Scan(std::shared_ptr<compute::ExecPlan> plan, std::shared_ptr<arrow::dataset::Dataset> dataset, std::shared_ptr<compute::Expression> filter, std::vector<std::string> materialized_field_names);
+extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP filter_sexp, SEXP materialized_field_names_sexp){
+BEGIN_CPP11
+	arrow::r::Input<std::shared_ptr<compute::ExecPlan>>::type plan(plan_sexp);
+	arrow::r::Input<std::shared_ptr<arrow::dataset::Dataset>>::type dataset(dataset_sexp);
+	arrow::r::Input<std::shared_ptr<compute::Expression>>::type filter(filter_sexp);
+	arrow::r::Input<std::vector<std::string>>::type materialized_field_names(materialized_field_names_sexp);
+	return cpp11::as_sexp(ExecNode_Scan(plan, dataset, filter, materialized_field_names));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP filter_sexp, SEXP materialized_field_names_sexp){
+	Rf_error("Cannot call ExecNode_Scan(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_Filter(std::shared_ptr<compute::ExecNode> input, std::shared_ptr<compute::Expression> filter);
+extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){
+BEGIN_CPP11
+	arrow::r::Input<std::shared_ptr<compute::ExecNode>>::type input(input_sexp);
+	arrow::r::Input<std::shared_ptr<compute::Expression>>::type filter(filter_sexp);
+	return cpp11::as_sexp(ExecNode_Filter(input, filter));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){
+	Rf_error("Cannot call ExecNode_Filter(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_Project(std::shared_ptr<compute::ExecNode> input, std::vector<std::shared_ptr<compute::Expression>> exprs, std::vector<std::string> names);
+extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){
+BEGIN_CPP11
+	arrow::r::Input<std::shared_ptr<compute::ExecNode>>::type input(input_sexp);
+	arrow::r::Input<std::vector<std::shared_ptr<compute::Expression>>>::type exprs(exprs_sexp);
+	arrow::r::Input<std::vector<std::string>>::type names(names_sexp);
+	return cpp11::as_sexp(ExecNode_Project(input, exprs, names));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){
+	Rf_error("Cannot call ExecNode_Project(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // compute.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::RecordBatch> RecordBatch__cast(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& schema, cpp11::list options);
@@ -7011,6 +7092,11 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, 
 		{ "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, 
 		{ "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, 
+		{ "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 0}, 
+		{ "_arrow_ExecPlan_run", (DL_FUNC) &_arrow_ExecPlan_run, 2}, 
+		{ "_arrow_ExecNode_Scan", (DL_FUNC) &_arrow_ExecNode_Scan, 4}, 
+		{ "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, 
+		{ "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, 
 		{ "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, 
 		{ "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, 
 		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index 49bdefb6f44..4ecb99174b5 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -47,6 +47,15 @@
 #include <arrow/type_fwd.h>
 #include <arrow/util/type_fwd.h>
 
+namespace arrow {
+namespace compute {
+
+class ExecPlan;
+class ExecNode;
+
+}  // namespace compute
+}  // namespace arrow
+
 #if defined(ARROW_R_WITH_PARQUET)
 #include <parquet/type_fwd.h>
 #endif
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index e9e9cc04a4c..f9b1c6e2818 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -26,6 +26,9 @@
 
 namespace compute = ::arrow::compute;
 
+std::shared_ptr<compute::FunctionOptions> make_compute_options(std::string func_name,
+                                                               cpp11::list options);
+
 // [[arrow::export]]
 std::shared_ptr<compute::ExecPlan> ExecPlan_create() {
   return ValueOrStop(compute::ExecPlan::Make(gc_context()));
@@ -104,7 +107,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_Filter(
 std::shared_ptr<compute::ExecNode> ExecNode_Project(
     std::shared_ptr<compute::ExecNode> input,
     std::vector<std::shared_ptr<compute::Expression>> exprs,
-    std::vector<std::string> names = {}) {
+    std::vector<std::string> names) {
   // We have shared_ptrs of expressions but need the Expressions
   std::vector<compute::Expression> expressions;
   for (auto expr : exprs) {
@@ -130,8 +133,8 @@ std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
     keep_alives.push_back(std::move(opts));
   }
 
-  auto scalar_agg = ValueOrStop(MakeScalarAggregateNode(
-      source, /*label=*/"scalar_agg", aggregates, targets, out_field_names));
+  auto scalar_agg = ValueOrStop(compute::MakeScalarAggregateNode(
+      input, /*label=*/"scalar_agg", aggregates, targets, out_field_names));
 
   return std::shared_ptr<compute::ExecNode>(scalar_agg, [keep_alives](...) {
     // empty destructor: ExecNode lifetime is managed by an ExecPlan
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 7186acb9aed..279e5448753 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -28,6 +28,7 @@ tbl$verses <- verses[[1]]
 tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
 
 test_that("Can aggregate", {
+  withr::local_options(list(arrow.summarize = TRUE))
   expect_dplyr_equal(
     input %>%
       summarize(total = sum(int)),

From 1947e156a014b0c2bcaede1ace7547addc5a6586 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 15 Jul 2021 10:15:41 -0400
Subject: [PATCH 04/24] const

---
 r/src/arrowExports.cpp | 26 +++++++++++++-------------
 r/src/compute-exec.cpp | 20 ++++++++++----------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 86418634226..371aae96703 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1108,11 +1108,11 @@ extern "C" SEXP _arrow_ExecPlan_create(){
 
 // compute-exec.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Table> ExecPlan_run(std::shared_ptr<compute::ExecPlan> plan, std::shared_ptr<compute::ExecNode> final_node);
+std::shared_ptr<arrow::Table> ExecPlan_run(const std::shared_ptr<compute::ExecPlan>& plan, const std::shared_ptr<compute::ExecNode>& final_node);
 extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
 BEGIN_CPP11
-	arrow::r::Input<std::shared_ptr<compute::ExecPlan>>::type plan(plan_sexp);
-	arrow::r::Input<std::shared_ptr<compute::ExecNode>>::type final_node(final_node_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::ExecPlan>&>::type plan(plan_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type final_node(final_node_sexp);
 	return cpp11::as_sexp(ExecPlan_run(plan, final_node));
 END_CPP11
 }
@@ -1124,12 +1124,12 @@ extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
 
 // compute-exec.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<compute::ExecNode> ExecNode_Scan(std::shared_ptr<compute::ExecPlan> plan, std::shared_ptr<arrow::dataset::Dataset> dataset, std::shared_ptr<compute::Expression> filter, std::vector<std::string> materialized_field_names);
+std::shared_ptr<compute::ExecNode> ExecNode_Scan(const std::shared_ptr<compute::ExecPlan>& plan, const std::shared_ptr<arrow::dataset::Dataset>& dataset, const std::shared_ptr<compute::Expression>& filter, std::vector<std::string> materialized_field_names);
 extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP filter_sexp, SEXP materialized_field_names_sexp){
 BEGIN_CPP11
-	arrow::r::Input<std::shared_ptr<compute::ExecPlan>>::type plan(plan_sexp);
-	arrow::r::Input<std::shared_ptr<arrow::dataset::Dataset>>::type dataset(dataset_sexp);
-	arrow::r::Input<std::shared_ptr<compute::Expression>>::type filter(filter_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::ExecPlan>&>::type plan(plan_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::dataset::Dataset>&>::type dataset(dataset_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type filter(filter_sexp);
 	arrow::r::Input<std::vector<std::string>>::type materialized_field_names(materialized_field_names_sexp);
 	return cpp11::as_sexp(ExecNode_Scan(plan, dataset, filter, materialized_field_names));
 END_CPP11
@@ -1142,11 +1142,11 @@ extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP fil
 
 // compute-exec.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<compute::ExecNode> ExecNode_Filter(std::shared_ptr<compute::ExecNode> input, std::shared_ptr<compute::Expression> filter);
+std::shared_ptr<compute::ExecNode> ExecNode_Filter(const std::shared_ptr<compute::ExecNode>& input, const std::shared_ptr<compute::Expression>& filter);
 extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){
 BEGIN_CPP11
-	arrow::r::Input<std::shared_ptr<compute::ExecNode>>::type input(input_sexp);
-	arrow::r::Input<std::shared_ptr<compute::Expression>>::type filter(filter_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type filter(filter_sexp);
 	return cpp11::as_sexp(ExecNode_Filter(input, filter));
 END_CPP11
 }
@@ -1158,11 +1158,11 @@ extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){
 
 // compute-exec.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<compute::ExecNode> ExecNode_Project(std::shared_ptr<compute::ExecNode> input, std::vector<std::shared_ptr<compute::Expression>> exprs, std::vector<std::string> names);
+std::shared_ptr<compute::ExecNode> ExecNode_Project(const std::shared_ptr<compute::ExecNode>& input, const std::vector<std::shared_ptr<compute::Expression>>& exprs, std::vector<std::string> names);
 extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){
 BEGIN_CPP11
-	arrow::r::Input<std::shared_ptr<compute::ExecNode>>::type input(input_sexp);
-	arrow::r::Input<std::vector<std::shared_ptr<compute::Expression>>>::type exprs(exprs_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<const std::vector<std::shared_ptr<compute::Expression>>&>::type exprs(exprs_sexp);
 	arrow::r::Input<std::vector<std::string>>::type names(names_sexp);
 	return cpp11::as_sexp(ExecNode_Project(input, exprs, names));
 END_CPP11
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index f9b1c6e2818..dd341784899 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -36,8 +36,8 @@ std::shared_ptr<compute::ExecPlan> ExecPlan_create() {
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Table> ExecPlan_run(
-    std::shared_ptr<compute::ExecPlan> plan,
-    std::shared_ptr<compute::ExecNode> final_node) {
+    const std::shared_ptr<compute::ExecPlan>& plan,
+    const std::shared_ptr<compute::ExecNode>& final_node) {
   // For now, don't require R to construct SinkNodes.
   // Instead, just pass the node we should collect as an argument.
   auto sink_gen = compute::MakeSinkNode(final_node.get(), "sink");
@@ -65,9 +65,9 @@ std::shared_ptr<compute::ExecNode> ExecNodeOrStop(
 
 // [[arrow::export]]
 std::shared_ptr<compute::ExecNode> ExecNode_Scan(
-    std::shared_ptr<compute::ExecPlan> plan,
-    std::shared_ptr<arrow::dataset::Dataset> dataset,
-    std::shared_ptr<compute::Expression> filter,
+    const std::shared_ptr<compute::ExecPlan>& plan,
+    const std::shared_ptr<arrow::dataset::Dataset>& dataset,
+    const std::shared_ptr<compute::Expression>& filter,
     std::vector<std::string> materialized_field_names) {
   // TODO: pass in ScanOptions by file type
   auto options = std::make_shared<arrow::dataset::ScanOptions>();
@@ -97,16 +97,16 @@ std::shared_ptr<compute::ExecNode> ExecNode_Scan(
 
 // [[arrow::export]]
 std::shared_ptr<compute::ExecNode> ExecNode_Filter(
-    std::shared_ptr<compute::ExecNode> input,
-    std::shared_ptr<compute::Expression> filter) {
+    const std::shared_ptr<compute::ExecNode>& input,
+    const std::shared_ptr<compute::Expression>& filter) {
   return ExecNodeOrStop(
       compute::MakeFilterNode(input.get(), /*label=*/"filter", *filter));
 }
 
 // [[arrow::export]]
 std::shared_ptr<compute::ExecNode> ExecNode_Project(
-    std::shared_ptr<compute::ExecNode> input,
-    std::vector<std::shared_ptr<compute::Expression>> exprs,
+    const std::shared_ptr<compute::ExecNode>& input,
+    const std::vector<std::shared_ptr<compute::Expression>>& exprs,
     std::vector<std::string> names) {
   // We have shared_ptrs of expressions but need the Expressions
   std::vector<compute::Expression> expressions;
@@ -118,7 +118,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_Project(
 }
 
 std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
-    std::shared_ptr<compute::ExecNode> input, cpp11::list options,
+    const std::shared_ptr<compute::ExecNode>& input, cpp11::list options,
     std::vector<std::string> targets, std::vector<std::string> out_field_names) {
   // PROBLEM: need to keep these alive as long as the plan somehow.
   std::vector<std::shared_ptr<arrow::compute::FunctionOptions>> keep_alives;

From 1bc0789f6af3434a7ab61e49df2a3900724bda5b Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 15 Jul 2021 11:09:56 -0400
Subject: [PATCH 05/24] improve keepalive pattern

---
 cpp/src/arrow/compute/exec/exec_plan.cc | 35 +++++++++++++++------
 cpp/src/arrow/compute/exec/exec_plan.h  |  4 ++-
 cpp/src/arrow/compute/exec/plan_test.cc |  8 +++--
 r/DESCRIPTION                           |  1 +
 r/R/arrowExports.R                      |  4 +--
 r/R/query-engine.R                      |  7 +++--
 r/src/arrowExports.cpp                  | 11 ++++---
 r/src/compute-exec.cpp                  | 42 ++++++++++++++++---------
 r/src/compute.cpp                       |  4 +--
 9 files changed, 77 insertions(+), 39 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc
index 20c8c347cc1..4a4758c8471 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.cc
+++ b/cpp/src/arrow/compute/exec/exec_plan.cc
@@ -719,11 +719,13 @@ struct ScalarAggregateNode : ExecNode {
   ScalarAggregateNode(ExecNode* input, std::string label,
                       std::shared_ptr<Schema> output_schema,
                       std::vector<const ScalarAggregateKernel*> kernels,
+                      std::vector<int> argument_indices,
                       std::vector<std::vector<std::unique_ptr<KernelState>>> states)
       : ExecNode(input->plan(), std::move(label), {input}, {"target"},
                  /*output_schema=*/std::move(output_schema),
                  /*num_outputs=*/1),
         kernels_(std::move(kernels)),
+        argument_indices_(std::move(argument_indices)),
         states_(std::move(states)) {}
 
   const char* kind_name() override { return "ScalarAggregateNode"; }
@@ -733,7 +735,7 @@ struct ScalarAggregateNode : ExecNode {
       KernelContext batch_ctx{plan()->exec_context()};
       batch_ctx.SetState(states_[i][thread_index].get());
 
-      ExecBatch single_column_batch{{batch.values[i]}, batch.length};
+      ExecBatch single_column_batch{{batch[argument_indices_[i]]}, batch.length};
       RETURN_NOT_OK(kernels_[i]->consume(&batch_ctx, single_column_batch));
     }
     return Status::OK();
@@ -807,7 +809,8 @@ struct ScalarAggregateNode : ExecNode {
   }
 
   Future<> finished_ = Future<>::MakeFinished();
-  std::vector<const ScalarAggregateKernel*> kernels_;
+  const std::vector<const ScalarAggregateKernel*> kernels_;
+  const std::vector<int> argument_indices_;
 
   std::vector<std::vector<std::unique_ptr<KernelState>>> states_;
 
@@ -816,11 +819,17 @@ struct ScalarAggregateNode : ExecNode {
 };
 
 Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
-                                          std::vector<internal::Aggregate> aggregates) {
-  if (input->output_schema()->num_fields() != static_cast<int>(aggregates.size())) {
-    return Status::Invalid("Provided ", aggregates.size(),
-                           " aggregates, expected one for each field of ",
-                           input->output_schema()->ToString());
+                                          std::vector<internal::Aggregate> aggregates,
+                                          std::vector<FieldRef> arguments,
+                                          std::vector<std::string> out_field_names) {
+  if (aggregates.size() != arguments.size()) {
+    return Status::Invalid("Provided ", aggregates.size(), " aggregates but ",
+                           arguments.size(), " arguments.");
+  }
+
+  if (aggregates.size() != out_field_names.size()) {
+    return Status::Invalid("Provided ", aggregates.size(), " aggregates but ",
+                           out_field_names.size(), " field names for the output.");
   }
 
   auto exec_ctx = input->plan()->exec_context();
@@ -828,8 +837,16 @@ Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
   std::vector<const ScalarAggregateKernel*> kernels(aggregates.size());
   std::vector<std::vector<std::unique_ptr<KernelState>>> states(kernels.size());
   FieldVector fields(kernels.size());
+  std::vector<int> argument_indices(kernels.size());
 
   for (size_t i = 0; i < kernels.size(); ++i) {
+    if (!arguments[i].IsName()) {
+      return Status::NotImplemented("Non name field refs");
+    }
+    ARROW_ASSIGN_OR_RAISE(auto match,
+                          arguments[i].FindOneOrNone(*input->output_schema()));
+    argument_indices[i] = match[0];
+
     ARROW_ASSIGN_OR_RAISE(auto function,
                           exec_ctx->func_registry()->GetFunction(aggregates[i].function));
 
@@ -862,12 +879,12 @@ Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
     ARROW_ASSIGN_OR_RAISE(
         auto descr, kernels[i]->signature->out_type().Resolve(&kernel_ctx, {in_type}));
 
-    fields[i] = field(aggregates[i].function, std::move(descr.type));
+    fields[i] = field(std::move(out_field_names[i]), std::move(descr.type));
   }
 
   return input->plan()->EmplaceNode<ScalarAggregateNode>(
       input, std::move(label), schema(std::move(fields)), std::move(kernels),
-      std::move(states));
+      std::move(argument_indices), std::move(states));
 }
 
 namespace internal {
diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h
index 07bb365bbc7..fc3af92af4a 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.h
+++ b/cpp/src/arrow/compute/exec/exec_plan.h
@@ -285,7 +285,9 @@ Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
 
 ARROW_EXPORT
 Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
-                                          std::vector<internal::Aggregate> aggregates);
+                                          std::vector<internal::Aggregate> aggregates,
+                                          std::vector<FieldRef> arguments,
+                                          std::vector<std::string> out_field_names);
 
 /// \brief Make a node which groups input rows based on key fields and computes
 /// aggregates for each group
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index aa807468bcb..f7fce4dddef 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -531,9 +531,11 @@ TEST(ExecPlanExecution, SourceScalarAggSink) {
                        MakeTestSourceNode(plan.get(), "source", basic_data,
                                           /*parallel=*/false, /*slow=*/false));
 
-  ASSERT_OK_AND_ASSIGN(auto scalar_agg,
-                       MakeScalarAggregateNode(source, "scalar_agg",
-                                               {{"sum", nullptr}, {"any", nullptr}}));
+  ASSERT_OK_AND_ASSIGN(
+      auto scalar_agg,
+      MakeScalarAggregateNode(source, "scalar_agg", {{"sum", nullptr}, {"any", nullptr}},
+                              /*targets=*/{"i32", "bool"},
+                              /*out_field_names=*/{"sum(i32)", "any(bool)"}));
 
   auto sink_gen = MakeSinkNode(scalar_agg, "sink");
 
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index a0c4b61b7a0..3d10aa4745e 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -109,6 +109,7 @@ Collate:
     'metadata.R'
     'parquet.R'
     'python.R'
+    'query-engine.R'
     'record-batch-reader.R'
     'record-batch-writer.R'
     'reexports-bit64.R'
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 250fd53f1a0..91553754672 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -280,8 +280,8 @@ io___CompressedInputStream__Make <- function(codec, raw){
     .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
 }
 
-ExecPlan_create <- function(){
-    .Call(`_arrow_ExecPlan_create`)
+ExecPlan_create <- function(use_threads){
+    .Call(`_arrow_ExecPlan_create`, use_threads)
 }
 
 ExecPlan_run <- function(plan, final_node){
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index fa12396740e..614811e26a4 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -36,7 +36,8 @@ ExecPlan <- R6Class("ExecPlan", inherit = ArrowObject,
         filter <- Expression$scalar(TRUE)
         colnames <- names(dataset)
       }
-      # TODO: why do I _need_ to filter/project here?
+      # ScanNode needs the filter to do predicate pushdown and skip partitions,
+      # and it needs to know which fields to materialize (and which are unnecessary)
       ExecNode_Scan(self, dataset, filter, colnames)
     },
     Run = function(node) {
@@ -45,7 +46,9 @@ ExecPlan <- R6Class("ExecPlan", inherit = ArrowObject,
     }
   )
 )
-ExecPlan$create <- ExecPlan_create
+ExecPlan$create <- function(use_threads = option_use_threads()) {
+  ExecPlan_create(use_threads)
+}
 
 ExecNode <- R6Class("ExecNode", inherit = ArrowObject,
   public = list(
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 371aae96703..ff0acd50953 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1094,14 +1094,15 @@ extern "C" SEXP _arrow_io___CompressedInputStream__Make(SEXP codec_sexp, SEXP ra
 
 // compute-exec.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<compute::ExecPlan> ExecPlan_create();
-extern "C" SEXP _arrow_ExecPlan_create(){
+std::shared_ptr<compute::ExecPlan> ExecPlan_create(bool use_threads);
+extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){
 BEGIN_CPP11
-	return cpp11::as_sexp(ExecPlan_create());
+	arrow::r::Input<bool>::type use_threads(use_threads_sexp);
+	return cpp11::as_sexp(ExecPlan_create(use_threads));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_ExecPlan_create(){
+extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){
 	Rf_error("Cannot call ExecPlan_create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
@@ -7092,7 +7093,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, 
 		{ "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, 
 		{ "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, 
-		{ "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 0}, 
+		{ "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 1}, 
 		{ "_arrow_ExecPlan_run", (DL_FUNC) &_arrow_ExecPlan_run, 2}, 
 		{ "_arrow_ExecNode_Scan", (DL_FUNC) &_arrow_ExecNode_Scan, 4}, 
 		{ "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, 
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index dd341784899..e3fb08cfb0f 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -21,17 +21,32 @@
 
 #include <arrow/compute/api.h>
 #include <arrow/compute/exec/exec_plan.h>
+#include <arrow/compute/exec/expression.h>
 #include <arrow/table.h>
 #include <arrow/util/future.h>
+#include <arrow/util/thread_pool.h>
 
 namespace compute = ::arrow::compute;
 
 std::shared_ptr<compute::FunctionOptions> make_compute_options(std::string func_name,
                                                                cpp11::list options);
 
+template <typename T>
+void AddKeepalive(compute::ExecPlan* plan, T keepalive) {
+  struct Callback {
+    void operator()(const arrow::Status&) && {}
+    T keepalive;
+  };
+  plan->finished().AddCallback(Callback{std::move(keepalive)});
+}
+
 // [[arrow::export]]
-std::shared_ptr<compute::ExecPlan> ExecPlan_create() {
-  return ValueOrStop(compute::ExecPlan::Make(gc_context()));
+std::shared_ptr<compute::ExecPlan> ExecPlan_create(bool use_threads) {
+  auto executor = use_threads ? arrow::internal::GetCpuThreadPool() : nullptr;
+  auto context = std::make_shared<compute::ExecContext>(gc_memory_pool(), executor);
+  auto plan = ValueOrStop(compute::ExecPlan::Make(context.get()));
+  AddKeepalive(plan.get(), std::move(context));
+  return plan;
 }
 
 // [[arrow::export]]
@@ -69,7 +84,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_Scan(
     const std::shared_ptr<arrow::dataset::Dataset>& dataset,
     const std::shared_ptr<compute::Expression>& filter,
     std::vector<std::string> materialized_field_names) {
-  // TODO: pass in ScanOptions by file type
+  // TODO: pass in FragmentScanOptions
   auto options = std::make_shared<arrow::dataset::ScanOptions>();
 
   options->use_async = true;
@@ -119,9 +134,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_Project(
 
 std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
     const std::shared_ptr<compute::ExecNode>& input, cpp11::list options,
-    std::vector<std::string> targets, std::vector<std::string> out_field_names) {
-  // PROBLEM: need to keep these alive as long as the plan somehow.
-  std::vector<std::shared_ptr<arrow::compute::FunctionOptions>> keep_alives;
+    std::vector<std::string> target_names, std::vector<std::string> out_field_names) {
   std::vector<arrow::compute::internal::Aggregate> aggregates;
 
   for (cpp11::list name_opts : options) {
@@ -130,16 +143,17 @@ std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
 
     aggregates.push_back(
         arrow::compute::internal::Aggregate{std::move(name), opts.get()});
-    keep_alives.push_back(std::move(opts));
-  }
 
-  auto scalar_agg = ValueOrStop(compute::MakeScalarAggregateNode(
-      input, /*label=*/"scalar_agg", aggregates, targets, out_field_names));
+    AddKeepalive(input->plan(), std::move(opts));
+  }
 
-  return std::shared_ptr<compute::ExecNode>(scalar_agg, [keep_alives](...) {
-    // empty destructor: ExecNode lifetime is managed by an ExecPlan
-    // also carries the function options
-  });
+  std::vector<arrow::FieldRef> targets;
+  for (auto&& name : target_names) {
+    targets.emplace_back(std::move(name));
+  }
+  return ExecNodeOrStop(compute::MakeScalarAggregateNode(
+      input.get(), /*label=*/"scalar_agg", std::move(aggregates), std::move(targets),
+      std::move(out_field_names)));
 }
 
 #endif
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 142a460d2eb..30821137383 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -22,13 +22,11 @@
 #include <arrow/compute/api.h>
 #include <arrow/record_batch.h>
 #include <arrow/table.h>
-#include <arrow/util/thread_pool.h>
 
 std::shared_ptr<arrow::compute::CastOptions> make_cast_options(cpp11::list options);
 
 arrow::compute::ExecContext* gc_context() {
-  static arrow::compute::ExecContext context(gc_memory_pool(),
-                                             arrow::internal::GetCpuThreadPool());
+  static arrow::compute::ExecContext context(gc_memory_pool());
   return &context;
 }
 

From b5b41a3ad357a1c4f7d643d4f9d55dd1e7f84f99 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 15 Jul 2021 13:58:16 -0400
Subject: [PATCH 06/24] Compiles but segfaults

---
 r/R/arrowExports.R     |  4 ++++
 r/R/dplyr-summarize.R  | 10 +++++-----
 r/R/query-engine.R     |  4 ++--
 r/src/arrowExports.cpp | 21 ++++++++++++++++++++-
 r/src/compute-exec.cpp |  3 ++-
 5 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 91553754672..a870e7fb372 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -300,6 +300,10 @@ ExecNode_Project <- function(input, exprs, names){
     .Call(`_arrow_ExecNode_Project`, input, exprs, names)
 }
 
+ExecNode_ScalarAggregate <- function(input, options, target_names, out_field_names){
+    .Call(`_arrow_ExecNode_ScalarAggregate`, input, options, target_names, out_field_names)
+}
+
 RecordBatch__cast <- function(batch, schema, options){
     .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
 }
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 2db8a954918..05933a62b22 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -32,7 +32,7 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb
       dplyr::summarise(to_duckdb(.data), ...)
   } else if (isTRUE(getOption("arrow.summarize", FALSE))) {
     # Try stuff, if successful return()
-    out <- try(do_arrow_summarize(.data, ...), silent = TRUE)
+    out <- do_arrow_summarize(.data, ...)
     if (inherits(out, "try-error")) {
       return(abandon_ship(call, .data, format(out)))
     } else {
@@ -63,7 +63,7 @@ do_arrow_summarize <- function(.data, ...) {
     list(
       fun = "sum",
       data = x,
-      options = list(na.rm = na.rm)
+      options = list(na.rm = na.rm, na.min_count = 0L)
     )
   }
   results <- list()
@@ -101,9 +101,9 @@ do_exec_plan <- function(.data) {
   project_node <- start_node$Project(.data$selected_columns)
 
   final_node <- project_node$ScalarAggregate(
-    options = .data$aggregates,
-    targets = names(.data),
-    out_field_names = names(.data$aggregates)
+    options = .data$aggregations,
+    target_names = names(.data),
+    out_field_names = names(.data$aggregations)
   )
   plan$Run(final_node)
 }
\ No newline at end of file
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 614811e26a4..c358b1de396 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -60,8 +60,8 @@ ExecNode <- R6Class("ExecNode", inherit = ArrowObject,
       assert_is(expr, "Expression")
       ExecNode_Filter(self, expr)
     },
-    ScalarAggregate = function(options, targets, out_field_names) {
-      ExecNode_ScalarAggregate(self, options, targets, out_field_names)
+    ScalarAggregate = function(options, target_names, out_field_names) {
+      ExecNode_ScalarAggregate(self, options, target_names, out_field_names)
     }
   )
 )
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index ff0acd50953..874361a2d8a 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1124,7 +1124,7 @@ extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
 #endif
 
 // compute-exec.cpp
-#if defined(ARROW_R_WITH_ARROW)
+#if defined(ARROW_R_WITH_DATASET)
 std::shared_ptr<compute::ExecNode> ExecNode_Scan(const std::shared_ptr<compute::ExecPlan>& plan, const std::shared_ptr<arrow::dataset::Dataset>& dataset, const std::shared_ptr<compute::Expression>& filter, std::vector<std::string> materialized_field_names);
 extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP filter_sexp, SEXP materialized_field_names_sexp){
 BEGIN_CPP11
@@ -1174,6 +1174,24 @@ extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP n
 }
 #endif
 
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(const std::shared_ptr<compute::ExecNode>& input, cpp11::list options, std::vector<std::string> target_names, std::vector<std::string> out_field_names);
+extern "C" SEXP _arrow_ExecNode_ScalarAggregate(SEXP input_sexp, SEXP options_sexp, SEXP target_names_sexp, SEXP out_field_names_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<cpp11::list>::type options(options_sexp);
+	arrow::r::Input<std::vector<std::string>>::type target_names(target_names_sexp);
+	arrow::r::Input<std::vector<std::string>>::type out_field_names(out_field_names_sexp);
+	return cpp11::as_sexp(ExecNode_ScalarAggregate(input, options, target_names, out_field_names));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_ScalarAggregate(SEXP input_sexp, SEXP options_sexp, SEXP target_names_sexp, SEXP out_field_names_sexp){
+	Rf_error("Cannot call ExecNode_ScalarAggregate(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // compute.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::RecordBatch> RecordBatch__cast(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& schema, cpp11::list options);
@@ -7098,6 +7116,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ExecNode_Scan", (DL_FUNC) &_arrow_ExecNode_Scan, 4}, 
 		{ "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, 
 		{ "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, 
+		{ "_arrow_ExecNode_ScalarAggregate", (DL_FUNC) &_arrow_ExecNode_ScalarAggregate, 4}, 
 		{ "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, 
 		{ "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, 
 		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index e3fb08cfb0f..932566fc696 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -78,7 +78,7 @@ std::shared_ptr<compute::ExecNode> ExecNodeOrStop(
 
 #include <arrow/dataset/scanner.h>
 
-// [[arrow::export]]
+// [[dataset::export]]
 std::shared_ptr<compute::ExecNode> ExecNode_Scan(
     const std::shared_ptr<compute::ExecPlan>& plan,
     const std::shared_ptr<arrow::dataset::Dataset>& dataset,
@@ -132,6 +132,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_Project(
       input.get(), /*label=*/"project", std::move(expressions), std::move(names)));
 }
 
+// [[arrow::export]]
 std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
     const std::shared_ptr<compute::ExecNode>& input, cpp11::list options,
     std::vector<std::string> target_names, std::vector<std::string> out_field_names) {

From f34c932be0be67dd3ec848c4c1bc08de4d92d056 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 15 Jul 2021 14:39:08 -0400
Subject: [PATCH 07/24] revert keepalives

---
 r/src/compute-exec.cpp                  | 23 ++++++++---------------
 r/tests/testthat/test-dplyr-aggregate.R |  3 +--
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index 932566fc696..f5a734db510 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -19,6 +19,8 @@
 
 #if defined(ARROW_R_WITH_ARROW)
 
+#include <iostream>
+
 #include <arrow/compute/api.h>
 #include <arrow/compute/exec/exec_plan.h>
 #include <arrow/compute/exec/expression.h>
@@ -31,21 +33,12 @@ namespace compute = ::arrow::compute;
 std::shared_ptr<compute::FunctionOptions> make_compute_options(std::string func_name,
                                                                cpp11::list options);
 
-template <typename T>
-void AddKeepalive(compute::ExecPlan* plan, T keepalive) {
-  struct Callback {
-    void operator()(const arrow::Status&) && {}
-    T keepalive;
-  };
-  plan->finished().AddCallback(Callback{std::move(keepalive)});
-}
-
 // [[arrow::export]]
 std::shared_ptr<compute::ExecPlan> ExecPlan_create(bool use_threads) {
-  auto executor = use_threads ? arrow::internal::GetCpuThreadPool() : nullptr;
-  auto context = std::make_shared<compute::ExecContext>(gc_memory_pool(), executor);
-  auto plan = ValueOrStop(compute::ExecPlan::Make(context.get()));
-  AddKeepalive(plan.get(), std::move(context));
+  static compute::ExecContext threaded_context{gc_memory_pool(),
+                                               arrow::internal::GetCpuThreadPool()};
+  auto plan = ValueOrStop(
+      compute::ExecPlan::Make(use_threads ? &threaded_context : gc_context()));
   return plan;
 }
 
@@ -137,6 +130,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
     const std::shared_ptr<compute::ExecNode>& input, cpp11::list options,
     std::vector<std::string> target_names, std::vector<std::string> out_field_names) {
   std::vector<arrow::compute::internal::Aggregate> aggregates;
+  std::vector<std::shared_ptr<arrow::compute::FunctionOptions>> keep_alives;
 
   for (cpp11::list name_opts : options) {
     auto name = cpp11::as_cpp<std::string>(name_opts[0]);
@@ -144,8 +138,7 @@ std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
 
     aggregates.push_back(
         arrow::compute::internal::Aggregate{std::move(name), opts.get()});
-
-    AddKeepalive(input->plan(), std::move(opts));
+    keep_alives.push_back(std::move(opts));
   }
 
   std::vector<arrow::FieldRef> targets;
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 279e5448753..eec5357fe47 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -28,10 +28,9 @@ tbl$verses <- verses[[1]]
 tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
 
 test_that("Can aggregate", {
-  withr::local_options(list(arrow.summarize = TRUE))
   expect_dplyr_equal(
     input %>%
       summarize(total = sum(int)),
     tbl
   )
-})
\ No newline at end of file
+})

From 683dbcc3535f231f52855d7fcbda2d6f8087f325 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 15 Jul 2021 14:59:02 -0400
Subject: [PATCH 08/24] Actually run the tests

---
 r/tests/testthat/test-dplyr-aggregate.R | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index eec5357fe47..b615384ebea 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -28,9 +28,19 @@ tbl$verses <- verses[[1]]
 tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
 
 test_that("Can aggregate", {
+  withr::local_options(list(arrow.summarize = TRUE))
   expect_dplyr_equal(
     input %>%
-      summarize(total = sum(int)),
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
     tbl
   )
+  # This is failing because the default is na.rm = FALSE
+  expect_dplyr_equal(
+    input %>%
+      summarize(total = sum(int)) %>%
+      collect(),
+    tbl
+  )
+
 })

From a1f676d40ead2fbbc3665f6f3ceb04c720f62390 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 15 Jul 2021 15:01:38 -0400
Subject: [PATCH 09/24] Restore docs

---
 r/man/ChunkedArray.Rd           | 22 +++++++++++++++
 r/man/Field.Rd                  |  5 ++++
 r/man/FileFormat.Rd             | 15 ++++++++++
 r/man/ParquetFileReader.Rd      | 12 ++++++++
 r/man/RecordBatch.Rd            | 11 ++++++++
 r/man/RecordBatchReader.Rd      | 37 +++++++++++++++++++++++++
 r/man/RecordBatchWriter.Rd      | 37 +++++++++++++++++++++++++
 r/man/Scalar.Rd                 | 17 ++++++++++++
 r/man/Schema.Rd                 |  9 ++++++
 r/man/Table.Rd                  | 11 ++++++++
 r/man/array.Rd                  | 23 ++++++++++++++++
 r/man/buffer.Rd                 |  9 ++++++
 r/man/call_function.Rd          | 10 +++++++
 r/man/codec_is_available.Rd     |  5 ++++
 r/man/copy_files.Rd             | 10 +++++++
 r/man/data-type.Rd              |  8 ++++++
 r/man/hive_partition.Rd         |  5 ++++
 r/man/list_compute_functions.Rd |  7 +++++
 r/man/load_flight_server.Rd     |  5 ++++
 r/man/match_arrow.Rd            | 25 +++++++++++++++++
 r/man/open_dataset.Rd           | 49 +++++++++++++++++++++++++++++++++
 r/man/read_delim_arrow.Rd       | 11 ++++++++
 r/man/read_feather.Rd           | 11 ++++++++
 r/man/read_json_arrow.Rd        | 12 ++++++++
 r/man/read_parquet.Rd           |  9 ++++++
 r/man/s3_bucket.Rd              |  5 ++++
 r/man/type.Rd                   | 10 +++++++
 r/man/unify_schemas.Rd          |  7 +++++
 r/man/value_counts.Rd           |  6 ++++
 r/man/write_csv_arrow.Rd        |  7 +++++
 r/man/write_feather.Rd          |  7 +++++
 r/man/write_ipc_stream.Rd       |  7 +++++
 r/man/write_parquet.Rd          | 12 ++++++++
 r/man/write_to_raw.Rd           |  7 +++++
 34 files changed, 443 insertions(+)

diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd
index 486b6222af7..3a504f01466 100644
--- a/r/man/ChunkedArray.Rd
+++ b/r/man/ChunkedArray.Rd
@@ -53,6 +53,28 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Pass items into chunked_array as separate objects to create chunks
+class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
+class_scores$num_chunks
+
+# When taking a Slice from a chunked_array, chunks are preserved
+class_scores$Slice(2, length = 5)
+
+# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk
+# containing all values, ordered.
+class_scores$Take(class_scores$SortIndices(descending = TRUE))
+
+# If you pass a list into chunked_array, you get a list of length 1
+list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8)))
+list_scores$num_chunks
+
+# When constructing a ChunkedArray, the first chunk is used to infer type.
+doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
+doubles$type
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \link{Array}
 }
diff --git a/r/man/Field.Rd b/r/man/Field.Rd
index 03dffd11ca9..77d31fa637a 100644
--- a/r/man/Field.Rd
+++ b/r/man/Field.Rd
@@ -28,3 +28,8 @@ field(name, type, metadata)
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+field("x", int32())
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd
index b8d4dc01bad..5bc9475b408 100644
--- a/r/man/FileFormat.Rd
+++ b/r/man/FileFormat.Rd
@@ -51,3 +51,18 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time)
 It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat})
 }
 
+\examples{
+\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+## Semi-colon delimited files
+# Set up directory for examples
+tf <- tempfile()
+dir.create(tf)
+on.exit(unlink(tf))
+write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
+
+# Create FileFormat object
+format <- FileFormat$create(format = "text", delimiter = ";")
+
+open_dataset(tf, format = format)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd
index 0b49df79d6b..31de9ead104 100644
--- a/r/man/ParquetFileReader.Rd
+++ b/r/man/ParquetFileReader.Rd
@@ -45,3 +45,15 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat
 }
 }
 
+\examples{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+f <- system.file("v0.7.1.parquet", package="arrow")
+pq <- ParquetFileReader$create(f)
+pq$GetSchema()
+if (codec_is_available("snappy")) {
+  # This file has compressed data columns
+  tab <- pq$ReadTable()
+  tab$schema
+}
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd
index e3024b91b7a..ff08c215853 100644
--- a/r/man/RecordBatch.Rd
+++ b/r/man/RecordBatch.Rd
@@ -79,3 +79,14 @@ All list elements are coerced to string. See \code{schema()} for more informatio
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+batch <- record_batch(name = rownames(mtcars), mtcars)
+dim(batch)
+dim(head(batch))
+names(batch)
+batch$mpg
+batch[["cyl"]]
+as.data.frame(batch[4:8, c("gear", "hp", "wt")])
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd
index a206c30c8fb..90c796a6693 100644
--- a/r/man/RecordBatchReader.Rd
+++ b/r/man/RecordBatchReader.Rd
@@ -43,6 +43,43 @@ are in the file.
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+
+batch <- record_batch(chickwts)
+
+# This opens a connection to the file in Arrow
+file_obj <- FileOutputStream$create(tf)
+# Pass that to a RecordBatchWriter to write data conforming to a schema
+writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
+writer$write(batch)
+# You may write additional batches to the stream, provided that they have
+# the same schema.
+# Call "close" on the writer to indicate end-of-file/stream
+writer$close()
+# Then, close the connection--closing the IPC message does not close the file
+file_obj$close()
+
+# Now, we have a file we can read from. Same pattern: open file connection,
+# then pass it to a RecordBatchReader
+read_file_obj <- ReadableFile$create(tf)
+reader <- RecordBatchFileReader$create(read_file_obj)
+# RecordBatchFileReader knows how many batches it has (StreamReader does not)
+reader$num_record_batches
+# We could consume the Reader by calling $read_next_batch() until all are,
+# consumed, or we can call $read_table() to pull them all into a Table
+tab <- reader$read_table()
+# Call as.data.frame to turn that Table into an R data.frame
+df <- as.data.frame(tab)
+# This should be the same data we sent
+all.equal(df, chickwts, check.attributes = FALSE)
+# Unlike the Writers, we don't have to close RecordBatchReaders,
+# but we do still need to close the file connection
+read_file_obj$close()
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface
 for reading data from these formats and are sufficient for many use cases.
diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd
index cc6d2feb3ac..219c150e6a4 100644
--- a/r/man/RecordBatchWriter.Rd
+++ b/r/man/RecordBatchWriter.Rd
@@ -45,6 +45,43 @@ to be closed separately.
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+
+batch <- record_batch(chickwts)
+
+# This opens a connection to the file in Arrow
+file_obj <- FileOutputStream$create(tf)
+# Pass that to a RecordBatchWriter to write data conforming to a schema
+writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
+writer$write(batch)
+# You may write additional batches to the stream, provided that they have
+# the same schema.
+# Call "close" on the writer to indicate end-of-file/stream
+writer$close()
+# Then, close the connection--closing the IPC message does not close the file
+file_obj$close()
+
+# Now, we have a file we can read from. Same pattern: open file connection,
+# then pass it to a RecordBatchReader
+read_file_obj <- ReadableFile$create(tf)
+reader <- RecordBatchFileReader$create(read_file_obj)
+# RecordBatchFileReader knows how many batches it has (StreamReader does not)
+reader$num_record_batches
+# We could consume the Reader by calling $read_next_batch() until all are,
+# consumed, or we can call $read_table() to pull them all into a Table
+tab <- reader$read_table()
+# Call as.data.frame to turn that Table into an R data.frame
+df <- as.data.frame(tab)
+# This should be the same data we sent
+all.equal(df, chickwts, check.attributes = FALSE)
+# Unlike the Writers, we don't have to close RecordBatchReaders,
+# but we do still need to close the file connection
+read_file_obj$close()
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler
 interface for writing data to these formats and are sufficient for many use
diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd
index 9128988d11c..21e04c12e08 100644
--- a/r/man/Scalar.Rd
+++ b/r/man/Scalar.Rd
@@ -19,3 +19,20 @@ A \code{Scalar} holds a single value of an Arrow type.
 \verb{$type}: Scalar type
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+Scalar$create(pi)
+Scalar$create(404)
+# If you pass a vector into Scalar$create, you get a list containing your items
+Scalar$create(c(1, 2, 3))
+
+# Comparisons
+my_scalar <- Scalar$create(99)
+my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE
+my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE
+my_scalar$Equals(Scalar$create(99.000009)) # FALSE
+my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
+
+my_scalar$ToString()
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd
index 0c66e5c2a42..6e385bb804e 100644
--- a/r/man/Schema.Rd
+++ b/r/man/Schema.Rd
@@ -74,3 +74,12 @@ Files with compressed metadata are readable by older versions of arrow, but
 the metadata is dropped.
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5))
+tab1 <- Table$create(df)
+tab1$schema
+tab2 <- Table$create(df, schema = schema(col1 = int8(), col2 = float32()))
+tab2$schema
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/Table.Rd b/r/man/Table.Rd
index d955b0f5a29..2675943e572 100644
--- a/r/man/Table.Rd
+++ b/r/man/Table.Rd
@@ -79,3 +79,14 @@ All list elements are coerced to string. See \code{schema()} for more informatio
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tab <- Table$create(name = rownames(mtcars), mtcars)
+dim(tab)
+dim(head(tab))
+names(tab)
+tab$mpg
+tab[["cyl"]]
+as.data.frame(tab[4:8, c("gear", "hp", "wt")])
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/array.Rd b/r/man/array.Rd
index ed25a2b0a34..71957aff90c 100644
--- a/r/man/array.Rd
+++ b/r/man/array.Rd
@@ -82,3 +82,26 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+my_array <- Array$create(1:10)
+my_array$type
+my_array$cast(int8())
+
+# Check if value is null; zero-indexed
+na_array <- Array$create(c(1:5, NA))
+na_array$IsNull(0)
+na_array$IsNull(5)
+na_array$IsValid(5)
+na_array$null_count
+
+# zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
+new_array <- na_array$Slice(5)
+new_array$offset
+
+# Compare 2 arrays
+na_array2 = na_array
+na_array2 == na_array # element-wise comparison
+na_array2$Equals(na_array) # overall comparison
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd
index 99b636da3c7..a3ca1fc2fcb 100644
--- a/r/man/buffer.Rd
+++ b/r/man/buffer.Rd
@@ -33,3 +33,12 @@ contiguous memory with a particular size.
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+my_buffer <- buffer(c(1, 2, 3, 4))
+my_buffer$is_mutable
+my_buffer$ZeroPadding()
+my_buffer$size
+my_buffer$capacity
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/call_function.Rd b/r/man/call_function.Rd
index 7e9b7e50ea0..bef89f10b18 100644
--- a/r/man/call_function.Rd
+++ b/r/man/call_function.Rd
@@ -35,6 +35,16 @@ are callable with an \code{arrow_} prefix.
 When passing indices in \code{...}, \code{args}, or \code{options}, express them as
 0-based integers (consistent with C++).
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+a <- Array$create(c(1L, 2L, 3L, NA, 5L))
+s <- Scalar$create(4L)
+call_function("fill_null", a, s)
+
+a <- Array$create(rnorm(10000))
+call_function("quantile", a, options = list(q = seq(0, 1, 0.25)))
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for
 the functions and their respective options.
diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd
index 1b5e8278fa9..b3238ff1dca 100644
--- a/r/man/codec_is_available.Rd
+++ b/r/man/codec_is_available.Rd
@@ -18,3 +18,8 @@ Support for compression libraries depends on the build-time settings of
 the Arrow C++ library. This function lets you know which are available for
 use.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+codec_is_available("gzip")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/copy_files.Rd b/r/man/copy_files.Rd
index 75cc4405d8a..1b83703f19f 100644
--- a/r/man/copy_files.Rd
+++ b/r/man/copy_files.Rd
@@ -23,3 +23,13 @@ Nothing: called for side effects in the file system
 \description{
 Copy files between FileSystems
 }
+\examples{
+\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Copy an S3 bucket's files to a local directory:
+copy_files("s3://your-bucket-name", "local-directory")
+# Using a FileSystem object
+copy_files(s3_bucket("your-bucket-name"), "local-directory")
+# Or go the other way, from local to S3
+copy_files("local-directory", s3_bucket("your-bucket-name"))
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd
index 101702a2fb2..a0631897573 100644
--- a/r/man/data-type.Rd
+++ b/r/man/data-type.Rd
@@ -150,6 +150,14 @@ are translated to R objects, \code{uint32} and \code{uint64} are converted to \c
 types, this conversion can be disabled (so that \code{int64} always yields a
 \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+bool()
+struct(a = int32(), b = double())
+timestamp("ms", timezone = "CEST")
+time64("ns")
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type.
 }
diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd
index 39d5d8d0ae2..eef9f9157ea 100644
--- a/r/man/hive_partition.Rd
+++ b/r/man/hive_partition.Rd
@@ -28,3 +28,8 @@ Hive partitioning embeds field names and values in path segments, such as
 Because fields are named in the path segments, order of fields passed to
 \code{hive_partition()} does not matter.
 }
+\examples{
+\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+hive_partition(year = int16(), month = int8())
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd
index ba17688d833..668e090c0ca 100644
--- a/r/man/list_compute_functions.Rd
+++ b/r/man/list_compute_functions.Rd
@@ -37,3 +37,10 @@ The package includes Arrow methods for many base R functions that can
 be called directly on Arrow objects, as well as some tidyverse-flavored versions
 available inside \code{dplyr} verbs.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+list_compute_functions() 
+list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
+list_compute_functions(pattern = "^is", invert = TRUE)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd
index 7e2000a9ca2..66d30f39147 100644
--- a/r/man/load_flight_server.Rd
+++ b/r/man/load_flight_server.Rd
@@ -15,3 +15,8 @@ to look in the \verb{inst/} directory for included modules.}
 \description{
 Load a Python Flight server
 }
+\examples{
+\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+load_flight_server("demo_flight_server")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/match_arrow.Rd b/r/man/match_arrow.Rd
index 21481af4c6b..d63ef3eed87 100644
--- a/r/man/match_arrow.Rd
+++ b/r/man/match_arrow.Rd
@@ -26,3 +26,28 @@ per element of \code{x} it it is present in \code{table}.
 \code{base::match()} is not a generic, so we can't just define Arrow methods for
 it. This function exposes the analogous functions in the Arrow C++ library.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# note that the returned value is 0-indexed
+cars_tbl <- Table$create(name = rownames(mtcars), mtcars)
+match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
+
+is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
+
+# Although there are multiple matches, you are returned the index of the first 
+# match, as with the base R equivalent
+match(4, mtcars$cyl) # 1-indexed
+match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
+
+# If `x` contains multiple values, you are returned the indices of the first 
+# match for each value.
+match(c(4, 6, 8), mtcars$cyl)
+match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
+
+# Return type matches type of `x`
+is_in(c(4, 6, 8), mtcars$cyl) # returns vector
+is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
+is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
+is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
index 974d4286f59..1ca3d661880 100644
--- a/r/man/open_dataset.Rd
+++ b/r/man/open_dataset.Rd
@@ -90,6 +90,55 @@ can accelerate queries that only touch some partitions (files). Call
 \code{open_dataset()} to point to a directory of data files and return a
 \code{Dataset}, then use \code{dplyr} methods to query it.
 }
+\examples{
+\dontshow{if (arrow_with_dataset() & arrow_with_parquet() ) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Set up directory for examples
+tf <- tempfile()
+dir.create(tf)
+on.exit(unlink(tf))
+
+data <- dplyr::group_by(mtcars, cyl)
+write_dataset(data, tf)
+
+# You can specify a directory containing the files for your dataset and
+# open_dataset will scan all files in your directory.
+open_dataset(tf)
+
+# You can also supply a vector of paths
+open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf,"cyl=8/part-2.parquet")))
+
+## You must specify the file format if using a format other than parquet.
+tf2 <- tempfile()
+dir.create(tf2)
+on.exit(unlink(tf2))
+write_dataset(data, tf2, format = "ipc")
+# This line will results in errors when you try to work with the data
+\dontrun{open_dataset(tf2)}
+# This line will work
+open_dataset(tf2, format = "ipc") 
+
+## You can specify file partitioning to include it as a field in your dataset
+# Create a temporary directory and write example dataset
+tf3 <- tempfile()
+dir.create(tf3)
+on.exit(unlink(tf3))
+write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
+
+# View files - you can see the partitioning means that files have been written 
+# to folders based on Month/Day values
+list.files(tf3, recursive = TRUE)
+
+# With no partitioning specified, dataset contains all files but doesn't include
+# directory names as field names
+open_dataset(tf3)
+
+# Now that partitioning has been specified, your dataset contains columns for Month and Day
+open_dataset(tf3, partitioning = c("Month", "Day"))
+
+# If you want to specify the data types for your fields, you can pass in a Schema
+open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8()))
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{vignette("dataset", package = "arrow")}
 }
diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd
index d9c80306931..71394e547c9 100644
--- a/r/man/read_delim_arrow.Rd
+++ b/r/man/read_delim_arrow.Rd
@@ -205,3 +205,14 @@ Note that if you are specifying column names, whether by \code{schema} or
 to idenfity column names, you'll need to add \code{skip = 1} to skip that row.
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(mtcars, file = tf)
+  df <- read_csv_arrow(tf)
+  dim(df)
+  # Can select columns
+  df <- read_csv_arrow(tf, col_select = starts_with("d"))
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd
index fa18e3f7844..95f4d1d12c6 100644
--- a/r/man/read_feather.Rd
+++ b/r/man/read_feather.Rd
@@ -34,6 +34,17 @@ and to make sharing data across data analysis languages easy.
 This function reads both the original, limited specification of the format
 and the version 2 specification, which is the Apache Arrow IPC file format.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_feather(mtcars, tf)
+df <- read_feather(tf)
+dim(df)
+# Can select columns
+df <- read_feather(tf, col_select = starts_with("d"))
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \link{FeatherReader} and \link{RecordBatchReader} for lower-level access to reading Arrow IPC data.
 }
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index 476c99fe4de..4806b4ad1f0 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -38,3 +38,15 @@ A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}.
 \description{
 Using \link{JsonTableReader}
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  writeLines('
+    { "hello": 3.5, "world": false, "yo": "thing" }
+    { "hello": 3.25, "world": null }
+    { "hello": 0.0, "world": true, "yo": null }
+  ', tf, useBytes=TRUE)
+  df <- read_json_arrow(tf)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd
index ffb2cf7109f..056e8644747 100644
--- a/r/man/read_parquet.Rd
+++ b/r/man/read_parquet.Rd
@@ -39,3 +39,12 @@ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_data_frame} is
 '\href{https://parquet.apache.org/}{Parquet}' is a columnar storage file format.
 This function enables you to read Parquet files into R.
 }
+\examples{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_parquet(mtcars, tf)
+df <- read_parquet(tf, col_select = starts_with("d"))
+head(df)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/s3_bucket.Rd b/r/man/s3_bucket.Rd
index 78d527a56c4..95a086deae5 100644
--- a/r/man/s3_bucket.Rd
+++ b/r/man/s3_bucket.Rd
@@ -21,3 +21,8 @@ are authorized to access the bucket's contents.
 that automatically detects the bucket's AWS region and holding onto the its
 relative path.
 }
+\examples{
+\dontshow{if (arrow_with_s3()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+bucket <- s3_bucket("ursa-labs-taxi-data")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/type.Rd b/r/man/type.Rd
index 2f85e4a6ac6..d55bbe24bd5 100644
--- a/r/man/type.Rd
+++ b/r/man/type.Rd
@@ -15,3 +15,13 @@ an arrow logical type
 \description{
 infer the arrow Array type from an R vector
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+type(1:10)
+type(1L:10L)
+type(c(1, 1.5, 2))
+type(c("A", "B", "C"))
+type(mtcars)
+type(Sys.Date())
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/unify_schemas.Rd b/r/man/unify_schemas.Rd
index 709e33a5e74..50c80c2dda9 100644
--- a/r/man/unify_schemas.Rd
+++ b/r/man/unify_schemas.Rd
@@ -18,3 +18,10 @@ A \code{Schema} with the union of fields contained in the inputs, or
 \description{
 Combine and harmonize schemas
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+a <- schema(b = double(), c = bool())
+z <- schema(b = double(), k = utf8())
+unify_schemas(a, z)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/value_counts.Rd b/r/man/value_counts.Rd
index 139af8edc63..6ef77cd4727 100644
--- a/r/man/value_counts.Rd
+++ b/r/man/value_counts.Rd
@@ -16,3 +16,9 @@ A \code{StructArray} containing "values" (same type as \code{x}) and "counts"
 \description{
 This function tabulates the values in the array and returns a table of counts.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+cyl_vals <- Array$create(mtcars$cyl)
+value_counts(cyl_vals)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd
index d6df2bcd08e..55a239ca998 100644
--- a/r/man/write_csv_arrow.Rd
+++ b/r/man/write_csv_arrow.Rd
@@ -23,3 +23,10 @@ the stream will be left open.
 \description{
 Write CSV file to disk
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_csv_arrow(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd
index 0cc8c591369..c6273b61be8 100644
--- a/r/man/write_feather.Rd
+++ b/r/man/write_feather.Rd
@@ -47,6 +47,13 @@ and to make sharing data across data analysis languages easy.
 This function writes both the original, limited specification of the format
 and the version 2 specification, which is the Apache Arrow IPC file format.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_feather(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \link{RecordBatchWriter} for lower-level access to writing Arrow IPC data.
 
diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd
index 4f742ce9178..888d947eb99 100644
--- a/r/man/write_ipc_stream.Rd
+++ b/r/man/write_ipc_stream.Rd
@@ -31,6 +31,13 @@ with some nonstandard behavior, is deprecated. You should explicitly choose
 the function that will write the desired IPC format (stream or file) since
 either can be written to a file or \code{OutputStream}.
 }
+\examples{
+\dontshow{if (arrow_available() ) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_ipc_stream(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to
 serialize data to a buffer.
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
index 823a6038e84..d7147f7e8e6 100644
--- a/r/man/write_parquet.Rd
+++ b/r/man/write_parquet.Rd
@@ -94,3 +94,15 @@ The default "snappy" is used if available, otherwise "uncompressed". To
 disable compression, set \code{compression = "uncompressed"}.
 Note that "uncompressed" columns may still have dictionary encoding.
 }
+\examples{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf1 <- tempfile(fileext = ".parquet")
+write_parquet(data.frame(x = 1:5), tf1)
+
+# using compression
+if (codec_is_available("gzip")) {
+  tf2 <- tempfile(fileext = ".gz.parquet")
+  write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5)
+}
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/write_to_raw.Rd b/r/man/write_to_raw.Rd
index 46af09a96e8..1f507e384c3 100644
--- a/r/man/write_to_raw.Rd
+++ b/r/man/write_to_raw.Rd
@@ -20,3 +20,10 @@ the data (\code{data.frame}, \code{RecordBatch}, or \code{Table}) they were give
 This function wraps those so that you can serialize data to a buffer and
 access that buffer as a \code{raw} vector in R.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# The default format is "stream"
+write_to_raw(mtcars)
+write_to_raw(mtcars, format = "file")
+\dontshow{\}) # examplesIf}
+}

From 100a178f1626636ada6637ab1584f5f97333eacd Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 15 Jul 2021 15:02:27 -0400
Subject: [PATCH 10/24] Restore try()

---
 r/R/dplyr-summarize.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 05933a62b22..6f6a5f31f31 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -32,7 +32,7 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb
       dplyr::summarise(to_duckdb(.data), ...)
   } else if (isTRUE(getOption("arrow.summarize", FALSE))) {
     # Try stuff, if successful return()
-    out <- do_arrow_summarize(.data, ...)
+    out <- try(do_arrow_summarize(.data, ...), silent = TRUE)
     if (inherits(out, "try-error")) {
       return(abandon_ship(call, .data, format(out)))
     } else {

From d3190a23203c5865a00ee0f36a451b6364d65ac6 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 15 Jul 2021 15:34:34 -0400
Subject: [PATCH 11/24] Use FieldsInExpression to project in Scan

---
 r/R/arrowExports.R     |  8 ++++++--
 r/R/query-engine.R     |  5 +++--
 r/src/arrowExports.cpp | 30 +++++++++++++++++++++++-------
 r/src/expression.cpp   | 15 +++++++++++++--
 4 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index a870e7fb372..e30a4d35d72 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -828,14 +828,18 @@ compute___expr__call <- function(func_name, argument_list, options){
     .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
 }
 
-compute___expr__field_ref <- function(name){
-    .Call(`_arrow_compute___expr__field_ref`, name)
+field_names_in_expression <- function(x){
+    .Call(`_arrow_field_names_in_expression`, x)
 }
 
 compute___expr__get_field_ref_name <- function(x){
     .Call(`_arrow_compute___expr__get_field_ref_name`, x)
 }
 
+compute___expr__field_ref <- function(name){
+    .Call(`_arrow_compute___expr__field_ref`, name)
+}
+
 compute___expr__scalar <- function(x){
     .Call(`_arrow_compute___expr__scalar`, x)
 }
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index c358b1de396..6822bc0be9b 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -24,8 +24,9 @@ ExecPlan <- R6Class("ExecPlan", inherit = ArrowObject,
         if (isTRUE(filter)) {
           filter <- Expression$scalar(TRUE)
         }
-        # TODO: use FieldsInExpression to find all from dataset$selected_columns
-        colnames <- names(dataset$.data)
+        # Use FieldsInExpression to find all from dataset$selected_columns
+        colnames <- unique(unlist(map(dataset$selected_columns,
+          field_names_in_expression)))
         dataset <- dataset$.data
       } else {
         if (inherits(dataset, "ArrowTabular")) {
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 874361a2d8a..336d3f3824a 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -3223,16 +3223,16 @@ extern "C" SEXP _arrow_compute___expr__call(SEXP func_name_sexp, SEXP argument_l
 
 // expression.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name);
-extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
+std::vector<std::string> field_names_in_expression(const std::shared_ptr<compute::Expression>& x);
+extern "C" SEXP _arrow_field_names_in_expression(SEXP x_sexp){
 BEGIN_CPP11
-	arrow::r::Input<std::string>::type name(name_sexp);
-	return cpp11::as_sexp(compute___expr__field_ref(name));
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	return cpp11::as_sexp(field_names_in_expression(x));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
-	Rf_error("Cannot call compute___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_field_names_in_expression(SEXP x_sexp){
+	Rf_error("Cannot call field_names_in_expression(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -3251,6 +3251,21 @@ extern "C" SEXP _arrow_compute___expr__get_field_ref_name(SEXP x_sexp){
 }
 #endif
 
+// expression.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name);
+extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
+BEGIN_CPP11
+	arrow::r::Input<std::string>::type name(name_sexp);
+	return cpp11::as_sexp(compute___expr__field_ref(name));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
+	Rf_error("Cannot call compute___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // expression.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<compute::Expression> compute___expr__scalar(const std::shared_ptr<arrow::Scalar>& x);
@@ -7248,8 +7263,9 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, 
 		{ "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, 
 		{ "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3}, 
-		{ "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, 
+		{ "_arrow_field_names_in_expression", (DL_FUNC) &_arrow_field_names_in_expression, 1}, 
 		{ "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, 
+		{ "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, 
 		{ "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, 
 		{ "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, 
 		{ "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2}, 
diff --git a/r/src/expression.cpp b/r/src/expression.cpp
index 4b671cb99dd..3fcba46e911 100644
--- a/r/src/expression.cpp
+++ b/r/src/expression.cpp
@@ -44,8 +44,14 @@ std::shared_ptr<compute::Expression> compute___expr__call(std::string func_name,
 }
 
 // [[arrow::export]]
-std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name) {
-  return std::make_shared<compute::Expression>(compute::field_ref(std::move(name)));
+std::vector<std::string> field_names_in_expression(
+    const std::shared_ptr<compute::Expression>& x) {
+  std::vector<std::string> out;
+  auto field_refs = FieldsInExpression(*x);
+  for (auto f : field_refs) {
+    out.push_back(*f.name());
+  }
+  return out;
 }
 
 // [[arrow::export]]
@@ -57,6 +63,11 @@ std::string compute___expr__get_field_ref_name(
   return "";
 }
 
+// [[arrow::export]]
+std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name) {
+  return std::make_shared<compute::Expression>(compute::field_ref(std::move(name)));
+}
+
 // [[arrow::export]]
 std::shared_ptr<compute::Expression> compute___expr__scalar(
     const std::shared_ptr<arrow::Scalar>& x) {

From 1107cd25bcccad735d6904d0da00889721304043 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Mon, 26 Jul 2021 13:50:59 -0400
Subject: [PATCH 12/24] repair merge error

---
 cpp/src/arrow/compute/exec/plan_test.cc |  3 ++-
 cpp/src/arrow/dataset/scanner.cc        |  9 +++++----
 cpp/src/arrow/dataset/scanner_test.cc   | 14 ++++++++------
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index f7fce4dddef..101257f5de8 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -567,7 +567,8 @@ TEST(ExecPlanExecution, ScalarSourceScalarAggSink) {
   ASSERT_OK_AND_ASSIGN(
       auto scalar_agg,
       MakeScalarAggregateNode(source, "scalar_agg",
-                              {{"count", nullptr}, {"sum", nullptr}, {"mean", nullptr}}));
+                              {{"count", nullptr}, {"sum", nullptr}, {"mean", nullptr}},
+                              {"a", "b", "c"}, {"sum a", "sum b", "sum c"}));
 
   auto sink_gen = MakeSinkNode(scalar_agg, "sink");
 
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 192f84f46df..d81b9cd1c5c 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -816,14 +816,15 @@ Result<int64_t> AsyncScanner::CountRows() {
   ARROW_ASSIGN_OR_RAISE(auto scan,
                         MakeScanNode(plan.get(), std::move(fragment_gen), options));
 
-  ARROW_ASSIGN_OR_RAISE(
-      auto get_selection,
-      compute::MakeProjectNode(scan, "get_selection", {options->filter}));
+  ARROW_ASSIGN_OR_RAISE(auto get_selection,
+                        compute::MakeProjectNode(scan, "get_selection", {options->filter},
+                                                 {"selection_mask"}));
 
   ARROW_ASSIGN_OR_RAISE(
       auto sum_selection,
       compute::MakeScalarAggregateNode(get_selection, "sum_selection",
-                                       {compute::internal::Aggregate{"sum", nullptr}}));
+                                       {compute::internal::Aggregate{"sum", nullptr}},
+                                       {"selection_mask"}, {"sum"}));
 
   AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen =
       compute::MakeSinkNode(sum_selection, "sink");
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index de7f780183a..34fa1486ef2 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1471,14 +1471,16 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) {
   ASSERT_OK_AND_ASSIGN(
       compute::ExecNode * sum,
       compute::MakeScalarAggregateNode(project, "scalar_agg",
-                                       {compute::internal::Aggregate{"sum", nullptr}}));
+                                       {compute::internal::Aggregate{"sum", nullptr}},
+                                       {a_times_2.ToString()}, {"a*2 sum"}));
 
   // finally, pipe the project node into a sink node
   auto sink_gen = compute::MakeSinkNode(sum, "sink");
 
   // translate sink_gen (async) to sink_reader (sync)
-  std::shared_ptr<RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
-      schema({field("sum", int64())}), std::move(sink_gen), exec_context.memory_pool());
+  std::shared_ptr<RecordBatchReader> sink_reader =
+      compute::MakeGeneratorReader(schema({field("a*2 sum", int64())}),
+                                   std::move(sink_gen), exec_context.memory_pool());
 
   // start the ExecPlan
   ASSERT_OK(plan->StartProducing());
@@ -1489,9 +1491,9 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) {
   // wait 1s for completion
   ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
 
-  auto expected = TableFromJSON(schema({field("sum", int64())}), {
-                                                                     R"([
-                                               {"sum": 4}
+  auto expected = TableFromJSON(schema({field("a*2 sum", int64())}), {
+                                                                         R"([
+                                               {"a*2 sum": 4}
                                           ])"});
   AssertTablesEqual(*expected, *collected, /*same_chunk_layout=*/false);
 }

From 2576f59e8f4d73189733b4c5220d7f0097872de1 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Mon, 26 Jul 2021 15:00:25 -0400
Subject: [PATCH 13/24] Basic exercise of GroupByNode

---
 r/R/arrowExports.R                      |  4 +++
 r/R/dplyr-summarize.R                   | 47 +++++++++++++++++--------
 r/R/query-engine.R                      |  3 ++
 r/src/arrowExports.cpp                  | 19 ++++++++++
 r/src/compute-exec.cpp                  | 32 ++++++++++++++---
 r/tests/testthat/test-dplyr-aggregate.R | 15 ++++++--
 6 files changed, 99 insertions(+), 21 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index e30a4d35d72..268a17ef4f4 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -304,6 +304,10 @@ ExecNode_ScalarAggregate <- function(input, options, target_names, out_field_nam
     .Call(`_arrow_ExecNode_ScalarAggregate`, input, options, target_names, out_field_names)
 }
 
+ExecNode_GroupByAggregate <- function(input, group_vars, agg_srcs, aggregations){
+    .Call(`_arrow_ExecNode_GroupByAggregate`, input, group_vars, agg_srcs, aggregations)
+}
+
 RecordBatch__cast <- function(batch, schema, options){
     .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
 }
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 6f6a5f31f31..217230d9ab2 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -46,10 +46,6 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb
 summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
 
 do_arrow_summarize <- function(.data, ...) {
-  if (length(dplyr::group_vars(.data))) {
-    stop("Grouped aggregation not supprted in Arrow", call. = FALSE)
-  }
-
   exprs <- quos(...)
   # Check for unnamed expressions and fix if any
   unnamed <- !nzchar(names(exprs))
@@ -90,20 +86,41 @@ do_arrow_summarize <- function(.data, ...) {
   .data$selected_columns <- inputs
 
   # Eventually, we will return .data here if (dataset) but do it eagerly now
-  do_exec_plan(.data)
+  do_exec_plan(.data, group_vars = dplyr::group_vars(.data))
 }
 
-do_exec_plan <- function(.data) {
+do_exec_plan <- function(.data, group_vars = NULL) {
   plan <- ExecPlan$create()
-  # Scan also will filter and select columns, so we don't need to Filter
-  start_node <- plan$Scan(.data)
-  # If any columns are derived we need to Project (otherwise this may be no-op)
-  project_node <- start_node$Project(.data$selected_columns)
 
-  final_node <- project_node$ScalarAggregate(
-    options = .data$aggregations,
-    target_names = names(.data),
-    out_field_names = names(.data$aggregations)
-  )
+  if (length(group_vars) == 0) {
+    # Scan also will filter and select columns, so we don't need to Filter
+    start_node <- plan$Scan(.data)
+    # If any columns are derived we need to Project (otherwise this may be no-op)
+    project_node <- start_node$Project(.data$selected_columns)
+    final_node <- project_node$ScalarAggregate(
+      options = .data$aggregations,
+      target_names = names(.data),
+      out_field_names = names(.data$aggregations)
+    )
+  } else {
+    # Collect the target names first because we have to add back the group vars
+    target_names <- names(.data)
+    .data <- ensure_group_vars(.data)
+
+    # We also need to prefix all of the aggregation function names with "hash_"
+    .data$aggregations <- lapply(.data$aggregations, function(x) {
+      x[["fun"]] <- paste0("hash_", x[["fun"]])
+      x
+    })
+    # Scan also will filter and select columns, so we don't need to Filter
+    start_node <- plan$Scan(.data)
+    # If any columns are derived we need to Project (otherwise this may be no-op)
+    project_node <- start_node$Project(.data$selected_columns)
+    final_node <- project_node$GroupByAggregate(
+      group_vars,
+      target_names = target_names,
+      aggregations = .data$aggregations
+    )
+  }
   plan$Run(final_node)
 }
\ No newline at end of file
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 6822bc0be9b..1d1125628e1 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -63,6 +63,9 @@ ExecNode <- R6Class("ExecNode", inherit = ArrowObject,
     },
     ScalarAggregate = function(options, target_names, out_field_names) {
       ExecNode_ScalarAggregate(self, options, target_names, out_field_names)
+    },
+    GroupByAggregate = function(group_vars, target_names, aggregations) {
+      ExecNode_GroupByAggregate(self, group_vars, target_names, aggregations)
     }
   )
 )
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 336d3f3824a..92ddbae23fd 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1192,6 +1192,24 @@ extern "C" SEXP _arrow_ExecNode_ScalarAggregate(SEXP input_sexp, SEXP options_se
 }
 #endif
 
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_GroupByAggregate(const std::shared_ptr<compute::ExecNode>& input, std::vector<std::string> group_vars, std::vector<std::string> agg_srcs, cpp11::list aggregations);
+extern "C" SEXP _arrow_ExecNode_GroupByAggregate(SEXP input_sexp, SEXP group_vars_sexp, SEXP agg_srcs_sexp, SEXP aggregations_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<std::vector<std::string>>::type group_vars(group_vars_sexp);
+	arrow::r::Input<std::vector<std::string>>::type agg_srcs(agg_srcs_sexp);
+	arrow::r::Input<cpp11::list>::type aggregations(aggregations_sexp);
+	return cpp11::as_sexp(ExecNode_GroupByAggregate(input, group_vars, agg_srcs, aggregations));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_GroupByAggregate(SEXP input_sexp, SEXP group_vars_sexp, SEXP agg_srcs_sexp, SEXP aggregations_sexp){
+	Rf_error("Cannot call ExecNode_GroupByAggregate(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // compute.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::RecordBatch> RecordBatch__cast(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& schema, cpp11::list options);
@@ -7132,6 +7150,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, 
 		{ "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, 
 		{ "_arrow_ExecNode_ScalarAggregate", (DL_FUNC) &_arrow_ExecNode_ScalarAggregate, 4}, 
+		{ "_arrow_ExecNode_GroupByAggregate", (DL_FUNC) &_arrow_ExecNode_GroupByAggregate, 4}, 
 		{ "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, 
 		{ "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, 
 		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index f5a734db510..61a79bf462e 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -19,8 +19,6 @@
 
 #if defined(ARROW_R_WITH_ARROW)
 
-#include <iostream>
-
 #include <arrow/compute/api.h>
 #include <arrow/compute/exec/exec_plan.h>
 #include <arrow/compute/exec/expression.h>
@@ -28,6 +26,8 @@
 #include <arrow/util/future.h>
 #include <arrow/util/thread_pool.h>
 
+#include <iostream>
+
 namespace compute = ::arrow::compute;
 
 std::shared_ptr<compute::FunctionOptions> make_compute_options(std::string func_name,
@@ -94,8 +94,8 @@ std::shared_ptr<compute::ExecNode> ExecNode_Scan(
   }
 
   options->projection =
-      ValueOrStop(call("project", std::move(exprs),
-                       compute::ProjectOptions{std::move(materialized_field_names)})
+      ValueOrStop(call("make_struct", std::move(exprs),
+                       compute::MakeStructOptions{std::move(materialized_field_names)})
                       .Bind(*dataset->schema()));
 
   return ExecNodeOrStop(arrow::dataset::MakeScanNode(plan.get(), dataset, options));
@@ -150,4 +150,28 @@ std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
       std::move(out_field_names)));
 }
 
+// [[arrow::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_GroupByAggregate(
+    const std::shared_ptr<compute::ExecNode>& input, std::vector<std::string> group_vars,
+    std::vector<std::string> agg_srcs, cpp11::list aggregations) {
+  std::vector<arrow::compute::internal::Aggregate> aggs;
+  std::vector<std::shared_ptr<arrow::compute::FunctionOptions>> keep_alives;
+
+  for (cpp11::list name_opts : aggregations) {
+    auto name = cpp11::as_cpp<std::string>(name_opts[0]);
+    auto opts = make_compute_options(name, name_opts[1]);
+
+    aggs.push_back(arrow::compute::internal::Aggregate{std::move(name), opts.get()});
+    keep_alives.push_back(std::move(opts));
+  }
+
+  return ExecNodeOrStop(compute::MakeGroupByNode(input.get(), /*label=*/"group_agg",
+                                                 /*keys=*/std::move(group_vars),
+                                                 std::move(agg_srcs), std::move(aggs)));
+}
+
+// Result<ExecNode*> MakeGroupByNode(ExecNode* input, std::string label,
+//                                   std::vector<std::string> keys,
+//                                   std::vector<std::string> agg_srcs,
+//                                   std::vector<internal::Aggregate> aggs);
 #endif
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index b615384ebea..1f258b2f736 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -26,6 +26,7 @@ tbl$verses <- verses[[1]]
 # c(" a ", "  b  ", "   c   ", ...) increasing padding
 # nchar =   3  5  7  9 11 13 15 17 19 21
 tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
+tbl$some_grouping <- rep(c(1, 2), 5)
 
 test_that("Can aggregate", {
   withr::local_options(list(arrow.summarize = TRUE))
@@ -35,12 +36,22 @@ test_that("Can aggregate", {
       collect(),
     tbl
   )
-  # This is failing because the default is na.rm = FALSE
+  skip("This is failing because the default is na.rm = FALSE")
   expect_dplyr_equal(
     input %>%
       summarize(total = sum(int)) %>%
       collect(),
     tbl
   )
-
 })
+
+test_that("Group by aggregate on dataset", {
+  withr::local_options(list(arrow.summarize = TRUE))
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+})
\ No newline at end of file

From 1b423a0a4620bd76ab34a0bc21bd9897b6b87060 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 27 Jul 2021 07:38:27 -0400
Subject: [PATCH 14/24] fix ExecBatch slicing

---
 cpp/src/arrow/compute/exec.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index 2a32c96ed3b..7d6db9f58db 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -115,7 +115,7 @@ ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const {
     if (value.is_scalar()) continue;
     value = value.array()->Slice(offset, length);
   }
-  out.length = length;
+  out.length = std::min(length, this->length - offset);
   return out;
 }
 

From 1816f2cfe11e2388165fd15eb810cee94415765c Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Tue, 27 Jul 2021 14:38:56 -0400
Subject: [PATCH 15/24] Adapt result to meet dplyr expectation

---
 r/R/dplyr-summarize.R                   | 48 +++++++++++++++----------
 r/tests/testthat/test-dplyr-aggregate.R | 11 ++++++
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 217230d9ab2..ba9fe8be046 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -92,35 +92,45 @@ do_arrow_summarize <- function(.data, ...) {
 do_exec_plan <- function(.data, group_vars = NULL) {
   plan <- ExecPlan$create()
 
-  if (length(group_vars) == 0) {
-    # Scan also will filter and select columns, so we don't need to Filter
-    start_node <- plan$Scan(.data)
-    # If any columns are derived we need to Project (otherwise this may be no-op)
-    project_node <- start_node$Project(.data$selected_columns)
-    final_node <- project_node$ScalarAggregate(
-      options = .data$aggregations,
-      target_names = names(.data),
-      out_field_names = names(.data$aggregations)
-    )
-  } else {
-    # Collect the target names first because we have to add back the group vars
-    target_names <- names(.data)
-    .data <- ensure_group_vars(.data)
+  grouped <- length(group_vars) > 0
+
+  # Collect the target names first because we have to add back the group vars
+  target_names <- names(.data)
 
+  if (grouped) {
+    .data <- ensure_group_vars(.data)
     # We also need to prefix all of the aggregation function names with "hash_"
     .data$aggregations <- lapply(.data$aggregations, function(x) {
       x[["fun"]] <- paste0("hash_", x[["fun"]])
       x
     })
-    # Scan also will filter and select columns, so we don't need to Filter
-    start_node <- plan$Scan(.data)
-    # If any columns are derived we need to Project (otherwise this may be no-op)
-    project_node <- start_node$Project(.data$selected_columns)
+  }
+
+  # Scan also will filter and select columns, so we don't need to Filter
+  start_node <- plan$Scan(.data)
+  # If any columns are derived we need to Project (otherwise this may be no-op)
+  project_node <- start_node$Project(.data$selected_columns)
+
+  if (grouped) {
     final_node <- project_node$GroupByAggregate(
       group_vars,
       target_names = target_names,
       aggregations = .data$aggregations
     )
+    out <- plan$Run(final_node)
+    # The result will have result columns first (named by their function)
+    # then the grouping cols. dplyr orders group cols first, and it accepts
+    # names for the result cols. Adapt the result to meet that expectation.
+    n_results <- length(.data$aggregations)
+    names(out)[seq_along(.data$aggregations)] <- names(.data$aggregations)
+    out <- out[c((n_results + 1):ncol(out), seq_along(.data$aggregations))]
+  } else {
+    final_node <- project_node$ScalarAggregate(
+      options = .data$aggregations,
+      target_names = target_names,
+      out_field_names = names(.data$aggregations)
+    )
+    out <- plan$Run(final_node)
   }
-  plan$Run(final_node)
+  out
 }
\ No newline at end of file
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 1f258b2f736..d444f1bf391 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -51,6 +51,17 @@ test_that("Group by aggregate on dataset", {
     input %>%
       group_by(some_grouping) %>%
       summarize(total = sum(int, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+
+  skip("This is failing because the default is na.rm = FALSE")
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int)) %>%
+      arrange(some_grouping) %>%
       collect(),
     tbl
   )

From 776e1f52236810b594cbad25a9f5cf95a8f43002 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 29 Jul 2021 10:45:38 -0400
Subject: [PATCH 16/24] Remove some tests for features not implemented for
 datasets since that's no longer a thing :tada:

---
 r/R/dplyr.R                     | 21 +++------------------
 r/tests/testthat/test-dataset.R | 11 -----------
 2 files changed, 3 insertions(+), 29 deletions(-)

diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 88accac24e9..00443c7834d 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -216,31 +216,16 @@ restore_dplyr_features <- function(df, query) {
 # Helper to handle unsupported dplyr features
 # * For Table/RecordBatch, we collect() and then call the dplyr method in R
 # * For Dataset, we just error
-abandon_ship <- function(call, .data, msg = NULL) {
+abandon_ship <- function(call, .data, msg) {
   dplyr_fun_name <- sub("^(.*?)\\..*", "\\1", as.character(call[[1]]))
   if (query_on_dataset(.data)) {
-    if (is.null(msg)) {
-      # Default message: function not implemented
-      not_implemented_for_dataset(paste0(dplyr_fun_name, "()"))
-    } else {
-      stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE)
-    }
+    stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE)
   }
   # else, collect and call dplyr method
-  if (!is.null(msg)) {
-    warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE)
-  }
+  warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE)
   call$.data <- dplyr::collect(.data)
   call[[1]] <- get(dplyr_fun_name, envir = asNamespace("dplyr"))
   eval.parent(call, 2)
 }
 
 query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
-
-not_implemented_for_dataset <- function(method) {
-  stop(
-    method, " is not currently implemented for Arrow Datasets. ",
-    "Call collect() first to pull data into R.",
-    call. = FALSE
-  )
-}
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 4711cacfcd0..793ba06c4a8 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -986,17 +986,6 @@ test_that("dplyr method not implemented messages", {
     "Filter expression not supported for Arrow Datasets: dbl > max(dbl)\nCall collect() first to pull data into R.",
     fixed = TRUE
   )
-  # One explicit test of the full message
-  expect_error(
-    ds %>% summarize(mean(int)),
-    "summarize() is not currently implemented for Arrow Datasets. Call collect() first to pull data into R.",
-    fixed = TRUE
-  )
-  # Helper for everything else
-  expect_not_implemented <- function(x) {
-    expect_error(x, "is not currently implemented for Arrow Datasets")
-  }
-  expect_not_implemented(ds %>% filter(int == 1) %>% summarize(n()))
 })
 
 test_that("Dataset and query print methods", {

From 58f4930b6a3d770546c306e7f4e62103588fd5d5 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 29 Jul 2021 11:55:42 -0400
Subject: [PATCH 17/24] Refactor agg function definition and registry and add
 any/all

---
 r/R/dplyr-eval.R                        |  8 ++-
 r/R/dplyr-functions.R                   | 31 ++++++++++++
 r/R/dplyr-summarize.R                   | 17 +++----
 r/tests/testthat/test-dplyr-aggregate.R | 65 +++++++++++++++++++++++--
 4 files changed, 105 insertions(+), 16 deletions(-)

diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R
index 57497e41cd2..3a1261602a3 100644
--- a/r/R/dplyr-eval.R
+++ b/r/R/dplyr-eval.R
@@ -39,7 +39,7 @@ arrow_eval <- function(expr, mask) {
     }
 
     out <- structure(msg, class = "try-error", condition = e)
-    if (grepl("not supported.*Arrow", msg)) {
+    if (grepl("not supported.*Arrow", msg) || getOption("arrow.debug", FALSE)) {
       # One of ours. Mark it so that consumers can handle it differently
       class(out) <- c("arrow-try-error", class(out))
     }
@@ -75,7 +75,7 @@ arrow_not_supported <- function(msg) {
 }
 
 # Create a data mask for evaluating a dplyr expression
-arrow_mask <- function(.data) {
+arrow_mask <- function(.data, aggregation = FALSE) {
   f_env <- new_environment(.cache$functions)
 
   # Add functions that need to error hard and clear.
@@ -86,6 +86,10 @@ arrow_mask <- function(.data) {
     f_env[[f]] <- fail
   }
 
+  if (aggregation) {
+    f_env <- new_environment(agg_funcs, parent = f_env)
+  }
+
   # Assign the schema to the expressions
   map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
 
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 40e4cd4776b..83cf2d8ab88 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -777,3 +777,34 @@ nse_funcs$case_when <- function(...) {
     )
   )
 }
+
+# Aggregation functions
+# These all return a list of:
+#' @param fun string function name
+#' @param data Expression (these are all currently a single field)
+#' @parma options list of function options, as passed to call_function
+#' For group-by aggregation, `hash_` gets prepended to the function name.
+#' So to see a list of available hash aggregation functions, do
+#' list_compute_functions("^hash_")
+agg_funcs <- list()
+agg_funcs$sum <- function(x, na.rm = FALSE) {
+  list(
+    fun = "sum",
+    data = x,
+    options = list(na.rm = na.rm, na.min_count = 0L)
+  )
+}
+agg_funcs$any <- function(x, na.rm = FALSE) {
+  list(
+    fun = "any",
+    data = x,
+    options = list(na.rm = na.rm, na.min_count = 0L)
+  )
+}
+agg_funcs$all <- function(x, na.rm = FALSE) {
+  list(
+    fun = "all",
+    data = x,
+    options = list(na.rm = na.rm, na.min_count = 0L)
+  )
+}
\ No newline at end of file
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index ba9fe8be046..7c8788f786f 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -52,16 +52,8 @@ do_arrow_summarize <- function(.data, ...) {
   # Deparse and take the first element in case they're long expressions
   names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
 
-  mask <- arrow_mask(.data)
-  # Add aggregation wrappers to arrow_mask somehow
-  # (this is not ideal, would overwrite same-named objects)
-  mask$sum <- function(x, na.rm = FALSE) {
-    list(
-      fun = "sum",
-      data = x,
-      options = list(na.rm = na.rm, na.min_count = 0L)
-    )
-  }
+  mask <- arrow_mask(.data, aggregation = TRUE)
+
   results <- list()
   for (i in seq_along(exprs)) {
     # Iterate over the indices and not the names because names may be repeated
@@ -69,7 +61,10 @@ do_arrow_summarize <- function(.data, ...) {
     new_var <- names(exprs)[i]
     results[[new_var]] <- arrow_eval(exprs[[i]], mask)
     if (inherits(results[[new_var]], "try-error")) {
-      msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
+      msg <- handle_arrow_not_supported(
+        results[[new_var]],
+        as_label(exprs[[i]])
+      )
       stop(msg, call. = FALSE)
     }
     # Put it in the data mask too?
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index d444f1bf391..bf3870cbc47 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -17,6 +17,8 @@
 
 skip_if_not_available("dataset")
 
+withr::local_options(list(arrow.summarize = TRUE))
+
 library(dplyr)
 library(stringr)
 
@@ -29,7 +31,6 @@ tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side =
 tbl$some_grouping <- rep(c(1, 2), 5)
 
 test_that("Can aggregate", {
-  withr::local_options(list(arrow.summarize = TRUE))
   expect_dplyr_equal(
     input %>%
       summarize(total = sum(int, na.rm = TRUE)) %>%
@@ -45,8 +46,7 @@ test_that("Can aggregate", {
   )
 })
 
-test_that("Group by aggregate on dataset", {
-  withr::local_options(list(arrow.summarize = TRUE))
+test_that("Group by sum on dataset", {
   expect_dplyr_equal(
     input %>%
       group_by(some_grouping) %>%
@@ -56,6 +56,15 @@ test_that("Group by aggregate on dataset", {
     tbl
   )
 
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int * 4, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+
   skip("This is failing because the default is na.rm = FALSE")
   expect_dplyr_equal(
     input %>%
@@ -65,4 +74,54 @@ test_that("Group by aggregate on dataset", {
       collect(),
     tbl
   )
+})
+
+test_that("Group by any/all", {
+  withr::local_options(list(arrow.debug = TRUE))
+
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(any(lgl, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(all(lgl, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  # na.rm option also is not being passed/received to any/all
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(has_words = nchar(verses) < 0) %>%
+      group_by(some_grouping) %>%
+      summarize(any(has_words)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(has_words = nchar(verses) < 0) %>%
+      group_by(some_grouping) %>%
+      summarize(all(has_words)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  skip("This seems to be calling base::nchar")
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(has_words = all(nchar(verses) < 0)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
 })
\ No newline at end of file

From aeb0bf88907be202aeaba86374a772cca0867e93 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 30 Jul 2021 12:08:06 -0400
Subject: [PATCH 18/24] Add jira references

---
 r/tests/testthat/test-dplyr-aggregate.R | 45 +++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index bf3870cbc47..76088536982 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -37,7 +37,7 @@ test_that("Can aggregate", {
       collect(),
     tbl
   )
-  skip("This is failing because the default is na.rm = FALSE")
+  skip("ARROW-13497: This is failing because the default is na.rm = FALSE")
   expect_dplyr_equal(
     input %>%
       summarize(total = sum(int)) %>%
@@ -65,7 +65,7 @@ test_that("Group by sum on dataset", {
     tbl
   )
 
-  skip("This is failing because the default is na.rm = FALSE")
+  skip("ARROW-13497: This is failing because the default is na.rm = FALSE")
   expect_dplyr_equal(
     input %>%
       group_by(some_grouping) %>%
@@ -95,7 +95,7 @@ test_that("Group by any/all", {
       collect(),
     tbl
   )
-  # na.rm option also is not being passed/received to any/all
+  # ARROW-13497: na.rm option also is not being passed/received to any/all
 
   expect_dplyr_equal(
     input %>%
@@ -124,4 +124,43 @@ test_that("Group by any/all", {
       collect(),
     tbl
   )
+})
+
+test_that("Filter and aggregate", {
+  skip("ARROW-13498")
+  expect_dplyr_equal(
+    input %>%
+      filter(some_grouping == 2) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int > 5) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(some_grouping == 2) %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int > 5) %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
 })
\ No newline at end of file

From a7f5cde6b2bdecf1beb3b1baa16377b92d13e1d5 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 30 Jul 2021 12:54:52 -0400
Subject: [PATCH 19/24] Use filter node to actually filter

---
 r/R/dplyr-summarize.R                   | 5 ++++-
 r/tests/testthat/test-dplyr-aggregate.R | 1 -
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 7c8788f786f..366fb5d0f24 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -101,8 +101,11 @@ do_exec_plan <- function(.data, group_vars = NULL) {
     })
   }
 
-  # Scan also will filter and select columns, so we don't need to Filter
   start_node <- plan$Scan(.data)
+  # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again
+  if (inherits(.data$filtered_rows, "Expression")) {
+    start_node <- start_node$Filter(.data$filtered_rows)
+  }
   # If any columns are derived we need to Project (otherwise this may be no-op)
   project_node <- start_node$Project(.data$selected_columns)
 
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 76088536982..2774c3a4db4 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -127,7 +127,6 @@ test_that("Group by any/all", {
 })
 
 test_that("Filter and aggregate", {
-  skip("ARROW-13498")
   expect_dplyr_equal(
     input %>%
       filter(some_grouping == 2) %>%

From eab89e808aae815eba9fd1e8fcef7f92459313b9 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Tue, 3 Aug 2021 11:23:42 -0400
Subject: [PATCH 20/24] Format and re-doc

---
 r/R/dplyr-eval.R                        |  2 +-
 r/R/dplyr-functions.R                   | 12 ++++++------
 r/R/dplyr-summarize.R                   |  4 ++--
 r/R/dplyr.R                             |  2 +-
 r/R/duckdb.R                            |  7 +++++--
 r/R/query-engine.R                      | 14 +++++++++-----
 r/man/FileFormat.Rd                     |  2 +-
 r/man/ParquetFileReader.Rd              |  2 +-
 r/man/array.Rd                          |  2 +-
 r/man/list_compute_functions.Rd         |  2 +-
 r/man/match_arrow.Rd                    |  4 ++--
 r/man/open_dataset.Rd                   | 12 +++++++-----
 r/man/read_delim_arrow.Rd               | 14 +++++++-------
 r/man/read_json_arrow.Rd                | 10 +++++-----
 r/man/to_duckdb.Rd                      | 19 +++++++++++++++++++
 r/man/write_ipc_stream.Rd               |  2 +-
 r/tests/testthat/test-dplyr-aggregate.R |  2 +-
 17 files changed, 70 insertions(+), 42 deletions(-)

diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R
index 3a1261602a3..a60d97657bc 100644
--- a/r/R/dplyr-eval.R
+++ b/r/R/dplyr-eval.R
@@ -103,4 +103,4 @@ arrow_mask <- function(.data, aggregation = FALSE) {
   # (because if we do we get `Error: Can't modify the data pronoun` in mutate())
   out$.data <- .data$selected_columns
   out
-}
+}
\ No newline at end of file
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 83cf2d8ab88..c65a286d2cc 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -780,12 +780,12 @@ nse_funcs$case_when <- function(...) {
 
 # Aggregation functions
 # These all return a list of:
-#' @param fun string function name
-#' @param data Expression (these are all currently a single field)
-#' @parma options list of function options, as passed to call_function
-#' For group-by aggregation, `hash_` gets prepended to the function name.
-#' So to see a list of available hash aggregation functions, do
-#' list_compute_functions("^hash_")
+# @param fun string function name
+# @param data Expression (these are all currently a single field)
+# @param options list of function options, as passed to call_function
+# For group-by aggregation, `hash_` gets prepended to the function name.
+# So to see a list of available hash aggregation functions, do
+# list_compute_functions("^hash_")
 agg_funcs <- list()
 agg_funcs$sum <- function(x, na.rm = FALSE) {
   list(
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 366fb5d0f24..126b949164d 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -29,7 +29,7 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb
   ))
   .data <- dplyr::select(.data, vars_to_keep)
   if (match.arg(.engine) == "duckdb") {
-      dplyr::summarise(to_duckdb(.data), ...)
+    dplyr::summarise(to_duckdb(.data), ...)
   } else if (isTRUE(getOption("arrow.summarize", FALSE))) {
     # Try stuff, if successful return()
     out <- try(do_arrow_summarize(.data, ...), silent = TRUE)
@@ -68,7 +68,7 @@ do_arrow_summarize <- function(.data, ...) {
       stop(msg, call. = FALSE)
     }
     # Put it in the data mask too?
-    #mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
+    # mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
   }
 
   # Now, from that, split out the data (expressions) and options
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 00443c7834d..aa1d10439b4 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -228,4 +228,4 @@ abandon_ship <- function(call, .data, msg) {
   eval.parent(call, 2)
 }
 
-query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
+query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
\ No newline at end of file
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index 6ed1df3d826..ba9c4469fea 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -40,8 +40,7 @@
 #'
 #' @name to_duckdb
 #' @export
-#' @examplesIf { arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) &&
-#'   packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE) }
+#' @examplesIf getFromNamespace("run_duckdb_examples", "arrow")()
 #' library(dplyr)
 #'
 #' ds <- InMemoryDataset$create(mtcars)
@@ -113,3 +112,7 @@ duckdb_disconnector <- function(con, tbl_name) {
   })
   environment()
 }
+
+run_duckdb_examples <- function() {
+  arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE)
+}
\ No newline at end of file
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 1d1125628e1..cb6dc292707 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -15,7 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-ExecPlan <- R6Class("ExecPlan", inherit = ArrowObject,
+ExecPlan <- R6Class("ExecPlan",
+  inherit = ArrowObject,
   public = list(
     Scan = function(dataset) {
       # Handle arrow_dplyr_query
@@ -25,8 +26,10 @@ ExecPlan <- R6Class("ExecPlan", inherit = ArrowObject,
           filter <- Expression$scalar(TRUE)
         }
         # Use FieldsInExpression to find all from dataset$selected_columns
-        colnames <- unique(unlist(map(dataset$selected_columns,
-          field_names_in_expression)))
+        colnames <- unique(unlist(map(
+          dataset$selected_columns,
+          field_names_in_expression
+        )))
         dataset <- dataset$.data
       } else {
         if (inherits(dataset, "ArrowTabular")) {
@@ -51,7 +54,8 @@ ExecPlan$create <- function(use_threads = option_use_threads()) {
   ExecPlan_create(use_threads)
 }
 
-ExecNode <- R6Class("ExecNode", inherit = ArrowObject,
+ExecNode <- R6Class("ExecNode",
+  inherit = ArrowObject,
   public = list(
     Project = function(cols) {
       assert_is_list_of(cols, "Expression")
@@ -68,4 +72,4 @@ ExecNode <- R6Class("ExecNode", inherit = ArrowObject,
       ExecNode_GroupByAggregate(self, group_vars, target_names, aggregations)
     }
   )
-)
+)
\ No newline at end of file
diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd
index 5bc9475b408..cabacc93755 100644
--- a/r/man/FileFormat.Rd
+++ b/r/man/FileFormat.Rd
@@ -52,7 +52,7 @@ It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFile
 }
 
 \examples{
-\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 ## Semi-colon delimited files
 # Set up directory for examples
 tf <- tempfile()
diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd
index 31de9ead104..30d0725a498 100644
--- a/r/man/ParquetFileReader.Rd
+++ b/r/man/ParquetFileReader.Rd
@@ -47,7 +47,7 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat
 
 \examples{
 \dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-f <- system.file("v0.7.1.parquet", package="arrow")
+f <- system.file("v0.7.1.parquet", package = "arrow")
 pq <- ParquetFileReader$create(f)
 pq$GetSchema()
 if (codec_is_available("snappy")) {
diff --git a/r/man/array.Rd b/r/man/array.Rd
index 71957aff90c..78d3eaff6ea 100644
--- a/r/man/array.Rd
+++ b/r/man/array.Rd
@@ -100,7 +100,7 @@ new_array <- na_array$Slice(5)
 new_array$offset
 
 # Compare 2 arrays
-na_array2 = na_array
+na_array2 <- na_array
 na_array2 == na_array # element-wise comparison
 na_array2$Equals(na_array) # overall comparison
 \dontshow{\}) # examplesIf}
diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd
index 668e090c0ca..4ca0e518f13 100644
--- a/r/man/list_compute_functions.Rd
+++ b/r/man/list_compute_functions.Rd
@@ -39,7 +39,7 @@ available inside \code{dplyr} verbs.
 }
 \examples{
 \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-list_compute_functions() 
+list_compute_functions()
 list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
 list_compute_functions(pattern = "^is", invert = TRUE)
 \dontshow{\}) # examplesIf}
diff --git a/r/man/match_arrow.Rd b/r/man/match_arrow.Rd
index d63ef3eed87..c2343361c6e 100644
--- a/r/man/match_arrow.Rd
+++ b/r/man/match_arrow.Rd
@@ -34,12 +34,12 @@ match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
 
 is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
 
-# Although there are multiple matches, you are returned the index of the first 
+# Although there are multiple matches, you are returned the index of the first
 # match, as with the base R equivalent
 match(4, mtcars$cyl) # 1-indexed
 match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
 
-# If `x` contains multiple values, you are returned the indices of the first 
+# If `x` contains multiple values, you are returned the indices of the first
 # match for each value.
 match(c(4, 6, 8), mtcars$cyl)
 match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
index 1ca3d661880..53eade595be 100644
--- a/r/man/open_dataset.Rd
+++ b/r/man/open_dataset.Rd
@@ -91,7 +91,7 @@ can accelerate queries that only touch some partitions (files). Call
 \code{Dataset}, then use \code{dplyr} methods to query it.
 }
 \examples{
-\dontshow{if (arrow_with_dataset() & arrow_with_parquet() ) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (arrow_with_dataset() & arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # Set up directory for examples
 tf <- tempfile()
 dir.create(tf)
@@ -105,7 +105,7 @@ write_dataset(data, tf)
 open_dataset(tf)
 
 # You can also supply a vector of paths
-open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf,"cyl=8/part-2.parquet")))
+open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf, "cyl=8/part-2.parquet")))
 
 ## You must specify the file format if using a format other than parquet.
 tf2 <- tempfile()
@@ -113,9 +113,11 @@ dir.create(tf2)
 on.exit(unlink(tf2))
 write_dataset(data, tf2, format = "ipc")
 # This line will results in errors when you try to work with the data
-\dontrun{open_dataset(tf2)}
+\dontrun{
+open_dataset(tf2)
+}
 # This line will work
-open_dataset(tf2, format = "ipc") 
+open_dataset(tf2, format = "ipc")
 
 ## You can specify file partitioning to include it as a field in your dataset
 # Create a temporary directory and write example dataset
@@ -124,7 +126,7 @@ dir.create(tf3)
 on.exit(unlink(tf3))
 write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
 
-# View files - you can see the partitioning means that files have been written 
+# View files - you can see the partitioning means that files have been written
 # to folders based on Month/Day values
 list.files(tf3, recursive = TRUE)
 
diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd
index 71394e547c9..30b146a4fee 100644
--- a/r/man/read_delim_arrow.Rd
+++ b/r/man/read_delim_arrow.Rd
@@ -207,12 +207,12 @@ to idenfity column names, you'll need to add \code{skip = 1} to skip that row.
 
 \examples{
 \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-  tf <- tempfile()
-  on.exit(unlink(tf))
-  write.csv(mtcars, file = tf)
-  df <- read_csv_arrow(tf)
-  dim(df)
-  # Can select columns
-  df <- read_csv_arrow(tf, col_select = starts_with("d"))
+tf <- tempfile()
+on.exit(unlink(tf))
+write.csv(mtcars, file = tf)
+df <- read_csv_arrow(tf)
+dim(df)
+# Can select columns
+df <- read_csv_arrow(tf, col_select = starts_with("d"))
 \dontshow{\}) # examplesIf}
 }
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index 4806b4ad1f0..53d7107ae81 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -40,13 +40,13 @@ Using \link{JsonTableReader}
 }
 \examples{
 \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-  tf <- tempfile()
-  on.exit(unlink(tf))
-  writeLines('
+tf <- tempfile()
+on.exit(unlink(tf))
+writeLines('
     { "hello": 3.5, "world": false, "yo": "thing" }
     { "hello": 3.25, "world": null }
     { "hello": 0.0, "world": true, "yo": null }
-  ', tf, useBytes=TRUE)
-  df <- read_json_arrow(tf)
+  ', tf, useBytes = TRUE)
+df <- read_json_arrow(tf)
 \dontshow{\}) # examplesIf}
 }
diff --git a/r/man/to_duckdb.Rd b/r/man/to_duckdb.Rd
index c273a7520d5..ffde91f14f2 100644
--- a/r/man/to_duckdb.Rd
+++ b/r/man/to_duckdb.Rd
@@ -39,3 +39,22 @@ that starts with an Arrow object to use DuckDB to calculate the summarization
 step. Internally, this calls \code{to_duckdb()} with all of the default argument
 values.
 }
+\examples{
+\dontshow{if (getFromNamespace("run_duckdb_examples", "arrow")()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+library(dplyr)
+
+ds <- InMemoryDataset$create(mtcars)
+
+ds \%>\%
+  filter(mpg < 30) \%>\%
+  to_duckdb() \%>\%
+  group_by(cyl) \%>\%
+  summarize(mean_mpg = mean(mpg, na.rm = TRUE))
+
+# the same query can be simplified using .engine = "duckdb"
+ds \%>\%
+  filter(mpg < 30) \%>\%
+  group_by(cyl) \%>\%
+  summarize(mean_mpg = mean(mpg, na.rm = TRUE), .engine = "duckdb")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd
index 888d947eb99..2f215f25fd7 100644
--- a/r/man/write_ipc_stream.Rd
+++ b/r/man/write_ipc_stream.Rd
@@ -32,7 +32,7 @@ the function that will write the desired IPC format (stream or file) since
 either can be written to a file or \code{OutputStream}.
 }
 \examples{
-\dontshow{if (arrow_available() ) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 write_ipc_stream(mtcars, tf)
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 2774c3a4db4..413c6afdf71 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -27,7 +27,7 @@ tbl <- example_data
 tbl$verses <- verses[[1]]
 # c(" a ", "  b  ", "   c   ", ...) increasing padding
 # nchar =   3  5  7  9 11 13 15 17 19 21
-tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
 tbl$some_grouping <- rep(c(1, 2), 5)
 
 test_that("Can aggregate", {

From da43f5ce10f3f15a6fccf08a717998122849bc2e Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 4 Aug 2021 09:04:54 -0400
Subject: [PATCH 21/24] Remove feature flag

---
 r/R/dplyr-summarize.R                   |  5 +----
 r/R/dplyr.R                             |  1 +
 r/tests/testthat/test-dplyr-aggregate.R | 24 +++++++++++++++++++++---
 r/tests/testthat/test-dplyr-group-by.R  |  6 ++++--
 r/tests/testthat/test-dplyr.R           | 18 ------------------
 5 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 126b949164d..1320f67c66c 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -30,7 +30,7 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb
   .data <- dplyr::select(.data, vars_to_keep)
   if (match.arg(.engine) == "duckdb") {
     dplyr::summarise(to_duckdb(.data), ...)
-  } else if (isTRUE(getOption("arrow.summarize", FALSE))) {
+  } else {
     # Try stuff, if successful return()
     out <- try(do_arrow_summarize(.data, ...), silent = TRUE)
     if (inherits(out, "try-error")) {
@@ -38,9 +38,6 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb
     } else {
       return(out)
     }
-  } else {
-    # If unsuccessful or if option not set, do the work in R
-    dplyr::summarise(dplyr::collect(.data), ...)
   }
 }
 summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index aa1d10439b4..ff5e30e66c5 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -222,6 +222,7 @@ abandon_ship <- function(call, .data, msg) {
     stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE)
   }
   # else, collect and call dplyr method
+  msg <- sub("\\n$", "", msg)
   warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE)
   call$.data <- dplyr::collect(.data)
   call[[1]] <- get(dplyr_fun_name, envir = asNamespace("dplyr"))
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 413c6afdf71..1a53a1b23b5 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -17,8 +17,6 @@
 
 skip_if_not_available("dataset")
 
-withr::local_options(list(arrow.summarize = TRUE))
-
 library(dplyr)
 library(stringr)
 
@@ -30,7 +28,27 @@ tbl$verses <- verses[[1]]
 tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
 tbl$some_grouping <- rep(c(1, 2), 5)
 
-test_that("Can aggregate", {
+test_that("summarize", {
+  expect_dplyr_equal(
+    input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>%
+      summarize(min_int = min(int)),
+    tbl,
+    warning = TRUE
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>%
+      summarize(min_int = min(int) / 2),
+    tbl,
+    warning = TRUE
+  )
+})
+
+test_that("Can aggregate in Arrow", {
   expect_dplyr_equal(
     input %>%
       summarize(total = sum(int, na.rm = TRUE)) %>%
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index fe0394bc636..18be2a9304a 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -29,7 +29,8 @@ test_that("group_by groupings are recorded", {
       select(int, chr) %>%
       filter(int > 5) %>%
       summarize(min_int = min(int)),
-    tbl
+    tbl,
+    warning = TRUE
   )
 })
 
@@ -62,7 +63,8 @@ test_that("ungroup", {
       ungroup() %>%
       filter(int > 5) %>%
       summarize(min_int = min(int)),
-    tbl
+    tbl,
+    warning = TRUE
   )
 })
 
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index da21ccd9ed1..9297df09490 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -69,24 +69,6 @@ See $.data for the source Arrow object',
   )
 })
 
-test_that("summarize", {
-  expect_dplyr_equal(
-    input %>%
-      select(int, chr) %>%
-      filter(int > 5) %>%
-      summarize(min_int = min(int)),
-    tbl
-  )
-
-  expect_dplyr_equal(
-    input %>%
-      select(int, chr) %>%
-      filter(int > 5) %>%
-      summarize(min_int = min(int) / 2),
-    tbl
-  )
-})
-
 test_that("Empty select returns no columns", {
   expect_dplyr_equal(
     input %>% select() %>% collect(),

From 56df2d3c875ea577b29f780adde2fef6b30c743d Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 4 Aug 2021 09:07:36 -0400
Subject: [PATCH 22/24] handle .groups argument

Co-authored-by: Ian Cook <ianmcook@gmail.com>
---
 r/R/dplyr-summarize.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 1320f67c66c..5677afb904a 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -42,7 +42,11 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb
 }
 summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
 
-do_arrow_summarize <- function(.data, ...) {
+do_arrow_summarize <- function(.data, ..., .groups = NULL) {
+  if (!is.null(.groups)) {
+    # ARROW-13550
+    abort("`summarize()` with `.groups` argument not supported in Arrow")
+  }
   exprs <- quos(...)
   # Check for unnamed expressions and fix if any
   unnamed <- !nzchar(names(exprs))
@@ -128,4 +132,4 @@ do_exec_plan <- function(.data, group_vars = NULL) {
     out <- plan$Run(final_node)
   }
   out
-}
\ No newline at end of file
+}

From f5d5d30b1c1361829b2a2def9b33ea0c7fbe9499 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 4 Aug 2021 09:50:21 -0400
Subject: [PATCH 23/24] Prevent na.rm = FALSE aggregation because it's wrong

---
 r/R/dplyr-functions.R                   | 18 +++++++++++++-----
 r/tests/testthat/test-dplyr-aggregate.R | 14 ++++++++------
 r/tests/testthat/test-dplyr.R           |  2 +-
 3 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index c65a286d2cc..607be82c36b 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -713,7 +713,7 @@ nse_funcs$log <- nse_funcs$logb <- function(x, base = exp(1)) {
     return(Expression$create("log10_checked", x))
   }
   # ARROW-13345
-  stop("`base` values other than exp(1), 2 and 10 not supported in Arrow", call. = FALSE)
+  arrow_not_supported("`base` values other than exp(1), 2 and 10")
 }
 
 nse_funcs$if_else <- function(condition, true, false, missing = NULL) {
@@ -791,20 +791,28 @@ agg_funcs$sum <- function(x, na.rm = FALSE) {
   list(
     fun = "sum",
     data = x,
-    options = list(na.rm = na.rm, na.min_count = 0L)
+    options = arrow_na_rm(na.rm = na.rm)
   )
 }
 agg_funcs$any <- function(x, na.rm = FALSE) {
   list(
     fun = "any",
     data = x,
-    options = list(na.rm = na.rm, na.min_count = 0L)
+    options = arrow_na_rm(na.rm)
   )
 }
 agg_funcs$all <- function(x, na.rm = FALSE) {
   list(
     fun = "all",
     data = x,
-    options = list(na.rm = na.rm, na.min_count = 0L)
+    options = arrow_na_rm(na.rm)
   )
-}
\ No newline at end of file
+}
+
+arrow_na_rm <- function(na.rm) {
+  if (!isTRUE(na.rm)) {
+    # TODO: ARROW-13497
+    arrow_not_supported(paste("na.rm =", na.rm))
+  }
+  list(na.rm = na.rm, na.min_count = 0L)
+}
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 1a53a1b23b5..2807f0053fa 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -55,12 +55,13 @@ test_that("Can aggregate in Arrow", {
       collect(),
     tbl
   )
-  skip("ARROW-13497: This is failing because the default is na.rm = FALSE")
   expect_dplyr_equal(
     input %>%
       summarize(total = sum(int)) %>%
       collect(),
-    tbl
+    tbl,
+    # ARROW-13497: This is failing because the default is na.rm = FALSE
+    warning = TRUE
   )
 })
 
@@ -83,14 +84,15 @@ test_that("Group by sum on dataset", {
     tbl
   )
 
-  skip("ARROW-13497: This is failing because the default is na.rm = FALSE")
   expect_dplyr_equal(
     input %>%
       group_by(some_grouping) %>%
       summarize(total = sum(int)) %>%
       arrange(some_grouping) %>%
       collect(),
-    tbl
+    tbl,
+    # ARROW-13497: This is failing because the default is na.rm = FALSE
+    warning = TRUE
   )
 })
 
@@ -119,7 +121,7 @@ test_that("Group by any/all", {
     input %>%
       mutate(has_words = nchar(verses) < 0) %>%
       group_by(some_grouping) %>%
-      summarize(any(has_words)) %>%
+      summarize(any(has_words, na.rm = TRUE)) %>%
       arrange(some_grouping) %>%
       collect(),
     tbl
@@ -128,7 +130,7 @@ test_that("Group by any/all", {
     input %>%
       mutate(has_words = nchar(verses) < 0) %>%
       group_by(some_grouping) %>%
-      summarize(all(has_words)) %>%
+      summarize(all(has_words, na.rm = TRUE)) %>%
       arrange(some_grouping) %>%
       collect(),
     tbl
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 9297df09490..ed03c58a884 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -1036,7 +1036,7 @@ test_that("log functions", {
 
   expect_error(
     nse_funcs$log(Expression$scalar(x), base = 5),
-    "`base` values other than exp(1), 2 and 10 not supported in Arrow",
+    "`base` values other than exp(1), 2 and 10 not supported by Arrow",
     fixed = TRUE
   )
 

From 69228153f34052ad7fed0038bed5ddecfd8722cd Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 4 Aug 2021 14:47:13 -0400
Subject: [PATCH 24/24] Suppress warning and style files

---
 r/R/dplyr-eval.R                        |  2 +-
 r/R/dplyr.R                             |  2 +-
 r/R/duckdb.R                            |  2 +-
 r/R/query-engine.R                      |  2 +-
 r/tests/testthat/test-dataset.R         | 15 +++++++++------
 r/tests/testthat/test-dplyr-aggregate.R |  2 +-
 6 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R
index a60d97657bc..3a1261602a3 100644
--- a/r/R/dplyr-eval.R
+++ b/r/R/dplyr-eval.R
@@ -103,4 +103,4 @@ arrow_mask <- function(.data, aggregation = FALSE) {
   # (because if we do we get `Error: Can't modify the data pronoun` in mutate())
   out$.data <- .data$selected_columns
   out
-}
\ No newline at end of file
+}
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index ff5e30e66c5..b2793bdb3c3 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -229,4 +229,4 @@ abandon_ship <- function(call, .data, msg) {
   eval.parent(call, 2)
 }
 
-query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
\ No newline at end of file
+query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index ba9c4469fea..bc003a6ea8f 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -115,4 +115,4 @@ duckdb_disconnector <- function(con, tbl_name) {
 
 run_duckdb_examples <- function() {
   arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE)
-}
\ No newline at end of file
+}
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index cb6dc292707..72c35c515db 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -72,4 +72,4 @@ ExecNode <- R6Class("ExecNode",
       ExecNode_GroupByAggregate(self, group_vars, target_names, aggregations)
     }
   )
-)
\ No newline at end of file
+)
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 793ba06c4a8..1a71fea86c7 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -638,12 +638,15 @@ test_that("Creating UnionDataset", {
 test_that("map_batches", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = "part")
-  expect_equivalent(
-    ds %>%
-      filter(int > 5) %>%
-      select(int, lgl) %>%
-      map_batches(~ summarize(., min_int = min(int))),
-    tibble(min_int = c(6L, 101L))
+  expect_warning(
+    expect_equivalent(
+      ds %>%
+        filter(int > 5) %>%
+        select(int, lgl) %>%
+        map_batches(~ summarize(., min_int = min(int))),
+      tibble(min_int = c(6L, 101L))
+    ),
+    "pulling data into R" # ARROW-13502
   )
 })
 
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 2807f0053fa..8235ef29948 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -182,4 +182,4 @@ test_that("Filter and aggregate", {
       collect(),
     tbl
   )
-})
\ No newline at end of file
+})