From 867e147e4e1812859c9846d91d6a8c95fb938277 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Tue, 24 Aug 2021 15:00:12 -0400
Subject: [PATCH 01/27] Refactor ExecPlan building; use it in collect()

---
 r/R/dplyr-collect.R                     |  8 ++--
 r/R/dplyr-summarize.R                   | 46 +++------------------
 r/R/query-engine.R                      | 55 +++++++++++++++++++++++--
 r/tests/testthat/test-dplyr-aggregate.R | 47 ++++++++++++++++++++-
 4 files changed, 104 insertions(+), 52 deletions(-)

diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index cec56ab9110..d17b0ddfca2 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -19,11 +19,9 @@
 # The following S3 methods are registered on load if dplyr is present
 
 collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
-  x <- ensure_group_vars(x)
-  x <- ensure_arrange_vars(x) # this sets x$temp_columns
   # Pull only the selected rows and cols into R
-  # See dataset.R for Dataset and Scanner(Builder) classes
-  tab <- Scanner$create(x)$ToTable()
+  # See query-engine.R for ExecPlan/Nodes
+  tab <- do_exec_plan(x)
   # Arrange rows
   if (length(x$arrange_vars) > 0) {
     tab <- tab[
@@ -59,4 +57,4 @@ pull.arrow_dplyr_query <- function(.data, var = -1) {
   .data$selected_columns <- set_names(.data$selected_columns[var], var)
   dplyr::collect(.data)[[1]]
 }
-pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
+pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
\ No newline at end of file
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 394e5fe2ac9..a4d6db9cf38 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -82,47 +82,11 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
   .data$selected_columns <- inputs
 
   # Eventually, we will return .data here if (dataset) but do it eagerly now
-  do_exec_plan(.data, group_vars = dplyr::group_vars(.data))
+  do_exec_plan(.data)
 }
 
-do_exec_plan <- function(.data, group_vars = NULL) {
+do_exec_plan <- function(.data) {
   plan <- ExecPlan$create()
-
-  grouped <- length(group_vars) > 0
-
-  # Collect the target names first because we have to add back the group vars
-  target_names <- names(.data)
-
-  if (grouped) {
-    .data <- ensure_group_vars(.data)
-    # We also need to prefix all of the aggregation function names with "hash_"
-    .data$aggregations <- lapply(.data$aggregations, function(x) {
-      x[["fun"]] <- paste0("hash_", x[["fun"]])
-      x
-    })
-  }
-
-  start_node <- plan$Scan(.data)
-  # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again
-  if (inherits(.data$filtered_rows, "Expression")) {
-    start_node <- start_node$Filter(.data$filtered_rows)
-  }
-  # If any columns are derived we need to Project (otherwise this may be no-op)
-  project_node <- start_node$Project(.data$selected_columns)
-
-  final_node <- project_node$Aggregate(
-    options = .data$aggregations,
-    target_names = target_names,
-    out_field_names = names(.data$aggregations),
-    key_names = group_vars
-  )
-
-  out <- plan$Run(final_node)
-  if (grouped) {
-    # The result will have result columns first then the grouping cols.
-    # dplyr orders group cols first, so adapt the result to meet that expectation.
-    n_results <- length(.data$aggregations)
-    out <- out[c((n_results + 1):ncol(out), seq_along(.data$aggregations))]
-  }
-  out
-}
+  final_node <- plan$Build(.data)
+  plan$Run(final_node)
+}
\ No newline at end of file
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 4de2f87165b..2aaaa2c6597 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -42,7 +42,50 @@ ExecPlan <- R6Class("ExecPlan",
       }
       # ScanNode needs the filter to do predicate pushdown and skip partitions,
       # and it needs to know which fields to materialize (and which are unnecessary)
-      ExecNode_Scan(self, dataset, filter, colnames)
+      ExecNode_Scan(self, dataset, filter, colnames %||% character(0))
+    },
+    Build = function(.data) {
+      group_vars <- dplyr::group_vars(.data)
+      grouped <- length(group_vars) > 0
+
+      # Collect the target names first because we have to add back the group vars
+      target_names <- names(.data)
+      .data <- ensure_group_vars(.data)
+      .data <- ensure_arrange_vars(.data) # this sets x$temp_columns
+
+      node <- self$Scan(.data)
+      # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again
+      if (inherits(.data$filtered_rows, "Expression")) {
+        node <- node$Filter(.data$filtered_rows)
+      }
+      # If any columns are derived we need to Project (otherwise this may be no-op)
+      node <- node$Project(c(.data$selected_columns, .data$temp_columns))
+
+      if (length(.data$aggregations)) {
+        if (grouped) {
+          # We need to prefix all of the aggregation function names with "hash_"
+          .data$aggregations <- lapply(.data$aggregations, function(x) {
+            x[["fun"]] <- paste0("hash_", x[["fun"]])
+            x
+          })
+        }
+
+        node <- node$Aggregate(
+          options = .data$aggregations,
+          target_names = target_names,
+          out_field_names = names(.data$aggregations),
+          key_names = group_vars
+        )
+
+        if (grouped) {
+          # The result will have result columns first then the grouping cols.
+          # dplyr orders group cols first, so adapt the result to meet that expectation.
+          node <- node$Project(
+            make_field_refs(c(group_vars, names(.data$aggregations)))
+          )
+        }
+      }
+      node
     },
     Run = function(node) {
       assert_is(node, "ExecNode")
@@ -58,8 +101,12 @@ ExecNode <- R6Class("ExecNode",
   inherit = ArrowObject,
   public = list(
     Project = function(cols) {
-      assert_is_list_of(cols, "Expression")
-      ExecNode_Project(self, cols, names(cols))
+      if (length(cols)) {
+        assert_is_list_of(cols, "Expression")
+        ExecNode_Project(self, cols, names(cols))
+      } else {
+        ExecNode_Project(self, character(0), character(0))
+      }
     },
     Filter = function(expr) {
       assert_is(expr, "Expression")
@@ -69,4 +116,4 @@ ExecNode <- R6Class("ExecNode",
       ExecNode_Aggregate(self, options, target_names, out_field_names, key_names)
     }
   )
-)
+)
\ No newline at end of file
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 3a04b6d2314..1aa30654495 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -33,7 +33,8 @@ test_that("summarize", {
     input %>%
       select(int, chr) %>%
       filter(int > 5) %>%
-      summarize(min_int = min(int)),
+      summarize(min_int = min(int)) %>%
+      collect(),
     tbl,
     warning = TRUE
   )
@@ -42,12 +43,28 @@ test_that("summarize", {
     input %>%
       select(int, chr) %>%
       filter(int > 5) %>%
-      summarize(min_int = min(int) / 2),
+      summarize(min_int = min(int) / 2) %>%
+      collect(),
     tbl,
     warning = TRUE
   )
 })
 
+test_that("summarize() doesn't evaluate eagerly", {
+  skip("TODO")
+  expect_s3_class(
+    Table$create(tbl) %>%
+      summarize(total = sum(int)),
+    "arrow_dplyr_query"
+  )
+  expect_r6_class(
+    Table$create(tbl) %>%
+      summarize(total = sum(int)) %>%
+      collect(),
+    "ArrowTabular"
+  )
+})
+
 test_that("Can aggregate in Arrow", {
   expect_dplyr_equal(
     input %>%
@@ -289,3 +306,29 @@ test_that("Filter and aggregate", {
     tbl
   )
 })
+
+test_that("Expressions on aggregations", {
+  # This is what it effectively is
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        any = any(lgl),
+        all = all(lgl)
+      ) %>%
+      arrange(some_grouping) %>%
+      transmute(some = any & !all) %>%
+      collect(),
+    tbl
+  )
+  # More concisely:
+  skip("Not implemented")
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(any(lgl) & !all(lgl)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+})
\ No newline at end of file

From 1bf8a074b60386e093ee8210834161edf8402d3d Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Tue, 24 Aug 2021 16:51:26 -0400
Subject: [PATCH 02/27] Implement order_by_sink and sort results of summarize

---
 r/R/arrowExports.R                      |   5 +-
 r/R/dplyr-collect.R                     |  32 +-
 r/R/dplyr-summarize.R                   |   6 -
 r/R/dplyr.R                             |  22 -
 r/R/query-engine.R                      |  38 +-
 r/src/arrowExports.cpp                  | 899 ++++++++++++------------
 r/src/compute-exec.cpp                  |  16 +-
 r/tests/testthat/test-dataset.R         |   4 +-
 r/tests/testthat/test-dplyr-aggregate.R |  15 +-
 9 files changed, 530 insertions(+), 507 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 72a5e455858..ce6d2e872d4 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -280,8 +280,8 @@ ExecPlan_create <- function(use_threads) {
   .Call(`_arrow_ExecPlan_create`, use_threads)
 }
 
-ExecPlan_run <- function(plan, final_node) {
-  .Call(`_arrow_ExecPlan_run`, plan, final_node)
+ExecPlan_run <- function(plan, final_node, sort_options) {
+  .Call(`_arrow_ExecPlan_run`, plan, final_node, sort_options)
 }
 
 ExecNode_Scan <- function(plan, dataset, filter, materialized_field_names) {
@@ -1767,3 +1767,4 @@ SetIOThreadPoolCapacity <- function(threads) {
 Array__infer_type <- function(x) {
   .Call(`_arrow_Array__infer_type`, x)
 }
+
diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index d17b0ddfca2..7db1b682305 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -22,14 +22,6 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
   # Pull only the selected rows and cols into R
   # See query-engine.R for ExecPlan/Nodes
   tab <- do_exec_plan(x)
-  # Arrange rows
-  if (length(x$arrange_vars) > 0) {
-    tab <- tab[
-      tab$SortIndices(names(x$arrange_vars), x$arrange_desc),
-      names(x$selected_columns), # this omits x$temp_columns from the result
-      drop = FALSE
-    ]
-  }
   if (as_data_frame) {
     df <- as.data.frame(tab)
     tab$invalidate()
@@ -57,4 +49,26 @@ pull.arrow_dplyr_query <- function(.data, var = -1) {
   .data$selected_columns <- set_names(.data$selected_columns[var], var)
   dplyr::collect(.data)[[1]]
 }
-pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
\ No newline at end of file
+pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
+
+restore_dplyr_features <- function(df, query) {
+  # An arrow_dplyr_query holds some attributes that Arrow doesn't know about
+  # After calling collect(), make sure these features are carried over
+
+  if (length(query$group_by_vars) > 0) {
+    # Preserve groupings, if present
+    if (is.data.frame(df)) {
+      df <- dplyr::grouped_df(
+        df,
+        dplyr::group_vars(query),
+        drop = dplyr::group_by_drop_default(query)
+      )
+    } else {
+      # This is a Table, via compute() or collect(as_data_frame = FALSE)
+      df <- arrow_dplyr_query(df)
+      df$group_by_vars <- query$group_by_vars
+      df$drop_empty_groups <- query$drop_empty_groups
+    }
+  }
+  df
+}
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index a4d6db9cf38..9a0cfc5bf91 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -84,9 +84,3 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
   # Eventually, we will return .data here if (dataset) but do it eagerly now
   do_exec_plan(.data)
 }
-
-do_exec_plan <- function(.data) {
-  plan <- ExecPlan$create()
-  final_node <- plan$Build(.data)
-  plan$Run(final_node)
-}
\ No newline at end of file
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index b2793bdb3c3..c3029a114c3 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -191,28 +191,6 @@ ensure_arrange_vars <- function(x) {
   x
 }
 
-restore_dplyr_features <- function(df, query) {
-  # An arrow_dplyr_query holds some attributes that Arrow doesn't know about
-  # After calling collect(), make sure these features are carried over
-
-  if (length(query$group_by_vars) > 0) {
-    # Preserve groupings, if present
-    if (is.data.frame(df)) {
-      df <- dplyr::grouped_df(
-        df,
-        dplyr::group_vars(query),
-        drop = dplyr::group_by_drop_default(query)
-      )
-    } else {
-      # This is a Table, via compute() or collect(as_data_frame = FALSE)
-      df <- arrow_dplyr_query(df)
-      df$group_by_vars <- query$group_by_vars
-      df$drop_empty_groups <- query$drop_empty_groups
-    }
-  }
-  df
-}
-
 # Helper to handle unsupported dplyr features
 # * For Table/RecordBatch, we collect() and then call the dplyr method in R
 # * For Dataset, we just error
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 2aaaa2c6597..c595dd27df3 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -15,6 +15,19 @@
 # specific language governing permissions and limitations
 # under the License.
 
+do_exec_plan <- function(.data) {
+  plan <- ExecPlan$create()
+  final_node <- plan$Build(.data)
+  tab <- plan$Run(final_node)
+
+  if (length(final_node$sort$temp_columns) > 0) {
+    # If arrange() created $temp_columns, make sure to omit them from the result
+    tab <- tab[, setdiff(names(tab), final_node$sort$temp_columns), drop = FALSE]
+  }
+
+  tab
+}
+
 ExecPlan <- R6Class("ExecPlan",
   inherit = ArrowObject,
   public = list(
@@ -85,11 +98,33 @@ ExecPlan <- R6Class("ExecPlan",
           )
         }
       }
+
+      # tab <- tab[
+      #   tab$SortIndices(names(x$arrange_vars), x$arrange_desc),
+      #   names(x$selected_columns), # this omits x$temp_columns from the result
+      #   drop = FALSE
+      # ]
+
+      # Apply sorting: this is currently not an ExecNode itself, it is a
+      # sink node option.
+      # TODO: error if doing a subsequent operation that would throw away sorting!
+      if (length(.data$arrange_vars)) {
+        node$sort <- list(
+          names = names(.data$arrange_vars),
+          orders = as.integer(.data$arrange_desc),
+          temp_columns = names(.data$temp_columns)
+        )
+      } else if (length(.data$aggregations) && grouped) {
+        node$sort <- list(
+          names = group_vars,
+          orders = rep(0L, length(group_vars))
+        )
+      }
       node
     },
     Run = function(node) {
       assert_is(node, "ExecNode")
-      ExecPlan_run(self, node)
+      ExecPlan_run(self, node, node$sort %||% list())
     }
   )
 )
@@ -100,6 +135,7 @@ ExecPlan$create <- function(use_threads = option_use_threads()) {
 ExecNode <- R6Class("ExecNode",
   inherit = ArrowObject,
   public = list(
+    sort = NULL,
     Project = function(cols) {
       if (length(cols)) {
         assert_is_list_of(cols, "Expression")
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index cb69ce17442..de8ca36af6c 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1094,16 +1094,17 @@ extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){
 
 // compute-exec.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Table> ExecPlan_run(const std::shared_ptr<compute::ExecPlan>& plan, const std::shared_ptr<compute::ExecNode>& final_node);
-extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
+std::shared_ptr<arrow::Table> ExecPlan_run(const std::shared_ptr<compute::ExecPlan>& plan, const std::shared_ptr<compute::ExecNode>& final_node, cpp11::list sort_options);
+extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp, SEXP sort_options_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<compute::ExecPlan>&>::type plan(plan_sexp);
 	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type final_node(final_node_sexp);
-	return cpp11::as_sexp(ExecPlan_run(plan, final_node));
+	arrow::r::Input<cpp11::list>::type sort_options(sort_options_sexp);
+	return cpp11::as_sexp(ExecPlan_run(plan, final_node, sort_options));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
+extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp, SEXP sort_options_sexp){
 	Rf_error("Cannot call ExecPlan_run(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
@@ -7035,450 +7036,450 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_parquet_available", (DL_FUNC)& _parquet_available, 0 },
 		{ "_s3_available", (DL_FUNC)& _s3_available, 0 },
 		{ "_json_available", (DL_FUNC)& _json_available, 0 },
-		{ "_arrow_is_altrep", (DL_FUNC) &_arrow_is_altrep, 1},
-		{ "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2},
-		{ "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3},
-		{ "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2},
-		{ "_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2},
-		{ "_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1},
-		{ "_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1},
-		{ "_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1},
-		{ "_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1},
-		{ "_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1},
-		{ "_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1},
-		{ "_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2},
-		{ "_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2},
-		{ "_arrow_Array__Diff", (DL_FUNC) &_arrow_Array__Diff, 2},
-		{ "_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1},
-		{ "_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5},
-		{ "_arrow_Array__View", (DL_FUNC) &_arrow_Array__View, 2},
-		{ "_arrow_Array__Validate", (DL_FUNC) &_arrow_Array__Validate, 1},
-		{ "_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1},
-		{ "_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1},
-		{ "_arrow_StructArray__field", (DL_FUNC) &_arrow_StructArray__field, 2},
-		{ "_arrow_StructArray__GetFieldByName", (DL_FUNC) &_arrow_StructArray__GetFieldByName, 2},
-		{ "_arrow_StructArray__Flatten", (DL_FUNC) &_arrow_StructArray__Flatten, 1},
-		{ "_arrow_ListArray__value_type", (DL_FUNC) &_arrow_ListArray__value_type, 1},
-		{ "_arrow_LargeListArray__value_type", (DL_FUNC) &_arrow_LargeListArray__value_type, 1},
-		{ "_arrow_ListArray__values", (DL_FUNC) &_arrow_ListArray__values, 1},
-		{ "_arrow_LargeListArray__values", (DL_FUNC) &_arrow_LargeListArray__values, 1},
-		{ "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2},
-		{ "_arrow_LargeListArray__value_length", (DL_FUNC) &_arrow_LargeListArray__value_length, 2},
-		{ "_arrow_FixedSizeListArray__value_length", (DL_FUNC) &_arrow_FixedSizeListArray__value_length, 2},
-		{ "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2},
-		{ "_arrow_LargeListArray__value_offset", (DL_FUNC) &_arrow_LargeListArray__value_offset, 2},
-		{ "_arrow_FixedSizeListArray__value_offset", (DL_FUNC) &_arrow_FixedSizeListArray__value_offset, 2},
-		{ "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1},
-		{ "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1},
-		{ "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1},
-		{ "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 2},
-		{ "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2},
-		{ "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2},
-		{ "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1},
-		{ "_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1},
-		{ "_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1},
-		{ "_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset, 1},
-		{ "_arrow_ArrayData__buffers", (DL_FUNC) &_arrow_ArrayData__buffers, 1},
-		{ "_arrow_Buffer__is_mutable", (DL_FUNC) &_arrow_Buffer__is_mutable, 1},
-		{ "_arrow_Buffer__ZeroPadding", (DL_FUNC) &_arrow_Buffer__ZeroPadding, 1},
-		{ "_arrow_Buffer__capacity", (DL_FUNC) &_arrow_Buffer__capacity, 1},
-		{ "_arrow_Buffer__size", (DL_FUNC) &_arrow_Buffer__size, 1},
-		{ "_arrow_r___RBuffer__initialize", (DL_FUNC) &_arrow_r___RBuffer__initialize, 1},
-		{ "_arrow_Buffer__data", (DL_FUNC) &_arrow_Buffer__data, 1},
-		{ "_arrow_Buffer__Equals", (DL_FUNC) &_arrow_Buffer__Equals, 2},
-		{ "_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1},
-		{ "_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1},
-		{ "_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1},
-		{ "_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2},
-		{ "_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1},
-		{ "_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1},
-		{ "_arrow_ChunkedArray__Slice1", (DL_FUNC) &_arrow_ChunkedArray__Slice1, 2},
-		{ "_arrow_ChunkedArray__Slice2", (DL_FUNC) &_arrow_ChunkedArray__Slice2, 3},
-		{ "_arrow_ChunkedArray__View", (DL_FUNC) &_arrow_ChunkedArray__View, 2},
-		{ "_arrow_ChunkedArray__Validate", (DL_FUNC) &_arrow_ChunkedArray__Validate, 1},
-		{ "_arrow_ChunkedArray__Equals", (DL_FUNC) &_arrow_ChunkedArray__Equals, 2},
-		{ "_arrow_ChunkedArray__ToString", (DL_FUNC) &_arrow_ChunkedArray__ToString, 1},
-		{ "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2},
-		{ "_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 2},
-		{ "_arrow_util___Codec__name", (DL_FUNC) &_arrow_util___Codec__name, 1},
-		{ "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1},
-		{ "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2},
-		{ "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2},
-		{ "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 1},
-		{ "_arrow_ExecPlan_run", (DL_FUNC) &_arrow_ExecPlan_run, 2},
-		{ "_arrow_ExecNode_Scan", (DL_FUNC) &_arrow_ExecNode_Scan, 4},
-		{ "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2},
-		{ "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3},
-		{ "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 5},
-		{ "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3},
-		{ "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3},
-		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3},
-		{ "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0},
-		{ "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0},
-		{ "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0},
-		{ "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1},
-		{ "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1},
-		{ "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1},
-		{ "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1},
-		{ "_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1},
-		{ "_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4},
-		{ "_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1},
-		{ "_arrow_TimestampParser__kind", (DL_FUNC) &_arrow_TimestampParser__kind, 1},
-		{ "_arrow_TimestampParser__format", (DL_FUNC) &_arrow_TimestampParser__format, 1},
-		{ "_arrow_TimestampParser__MakeStrptime", (DL_FUNC) &_arrow_TimestampParser__MakeStrptime, 1},
-		{ "_arrow_TimestampParser__MakeISO8601", (DL_FUNC) &_arrow_TimestampParser__MakeISO8601, 0},
-		{ "_arrow_csv___WriteCSV__Table", (DL_FUNC) &_arrow_csv___WriteCSV__Table, 3},
-		{ "_arrow_csv___WriteCSV__RecordBatch", (DL_FUNC) &_arrow_csv___WriteCSV__RecordBatch, 3},
-		{ "_arrow_dataset___Dataset__NewScan", (DL_FUNC) &_arrow_dataset___Dataset__NewScan, 1},
-		{ "_arrow_dataset___Dataset__schema", (DL_FUNC) &_arrow_dataset___Dataset__schema, 1},
-		{ "_arrow_dataset___Dataset__type_name", (DL_FUNC) &_arrow_dataset___Dataset__type_name, 1},
-		{ "_arrow_dataset___Dataset__ReplaceSchema", (DL_FUNC) &_arrow_dataset___Dataset__ReplaceSchema, 2},
-		{ "_arrow_dataset___UnionDataset__create", (DL_FUNC) &_arrow_dataset___UnionDataset__create, 2},
-		{ "_arrow_dataset___InMemoryDataset__create", (DL_FUNC) &_arrow_dataset___InMemoryDataset__create, 1},
-		{ "_arrow_dataset___UnionDataset__children", (DL_FUNC) &_arrow_dataset___UnionDataset__children, 1},
-		{ "_arrow_dataset___FileSystemDataset__format", (DL_FUNC) &_arrow_dataset___FileSystemDataset__format, 1},
-		{ "_arrow_dataset___FileSystemDataset__filesystem", (DL_FUNC) &_arrow_dataset___FileSystemDataset__filesystem, 1},
-		{ "_arrow_dataset___FileSystemDataset__files", (DL_FUNC) &_arrow_dataset___FileSystemDataset__files, 1},
-		{ "_arrow_dataset___DatasetFactory__Finish1", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish1, 2},
-		{ "_arrow_dataset___DatasetFactory__Finish2", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish2, 2},
-		{ "_arrow_dataset___DatasetFactory__Inspect", (DL_FUNC) &_arrow_dataset___DatasetFactory__Inspect, 2},
-		{ "_arrow_dataset___UnionDatasetFactory__Make", (DL_FUNC) &_arrow_dataset___UnionDatasetFactory__Make, 1},
-		{ "_arrow_dataset___FileSystemDatasetFactory__Make0", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make0, 3},
-		{ "_arrow_dataset___FileSystemDatasetFactory__Make2", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make2, 4},
-		{ "_arrow_dataset___FileSystemDatasetFactory__Make1", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make1, 3},
-		{ "_arrow_dataset___FileSystemDatasetFactory__Make3", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make3, 4},
-		{ "_arrow_dataset___FileFormat__type_name", (DL_FUNC) &_arrow_dataset___FileFormat__type_name, 1},
-		{ "_arrow_dataset___FileFormat__DefaultWriteOptions", (DL_FUNC) &_arrow_dataset___FileFormat__DefaultWriteOptions, 1},
-		{ "_arrow_dataset___ParquetFileFormat__Make", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__Make, 2},
-		{ "_arrow_dataset___FileWriteOptions__type_name", (DL_FUNC) &_arrow_dataset___FileWriteOptions__type_name, 1},
-		{ "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3},
-		{ "_arrow_dataset___IpcFileWriteOptions__update2", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update2, 4},
-		{ "_arrow_dataset___IpcFileWriteOptions__update1", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update1, 3},
-		{ "_arrow_dataset___CsvFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___CsvFileWriteOptions__update, 2},
-		{ "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0},
-		{ "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 3},
-		{ "_arrow_dataset___FragmentScanOptions__type_name", (DL_FUNC) &_arrow_dataset___FragmentScanOptions__type_name, 1},
-		{ "_arrow_dataset___CsvFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___CsvFragmentScanOptions__Make, 2},
-		{ "_arrow_dataset___ParquetFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___ParquetFragmentScanOptions__Make, 3},
-		{ "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 2},
-		{ "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 2},
-		{ "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 3},
-		{ "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 2},
-		{ "_arrow_dataset___ScannerBuilder__ProjectNames", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectNames, 2},
-		{ "_arrow_dataset___ScannerBuilder__ProjectExprs", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectExprs, 3},
-		{ "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2},
-		{ "_arrow_dataset___ScannerBuilder__UseThreads", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseThreads, 2},
-		{ "_arrow_dataset___ScannerBuilder__UseAsync", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseAsync, 2},
-		{ "_arrow_dataset___ScannerBuilder__BatchSize", (DL_FUNC) &_arrow_dataset___ScannerBuilder__BatchSize, 2},
-		{ "_arrow_dataset___ScannerBuilder__FragmentScanOptions", (DL_FUNC) &_arrow_dataset___ScannerBuilder__FragmentScanOptions, 2},
-		{ "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1},
-		{ "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1},
-		{ "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1},
-		{ "_arrow_dataset___Scanner__ScanBatches", (DL_FUNC) &_arrow_dataset___Scanner__ScanBatches, 1},
-		{ "_arrow_dataset___Scanner__ToRecordBatchReader", (DL_FUNC) &_arrow_dataset___Scanner__ToRecordBatchReader, 1},
-		{ "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2},
-		{ "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1},
-		{ "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1},
-		{ "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6},
-		{ "_arrow_dataset___Scanner__TakeRows", (DL_FUNC) &_arrow_dataset___Scanner__TakeRows, 2},
-		{ "_arrow_dataset___Scanner__CountRows", (DL_FUNC) &_arrow_dataset___Scanner__CountRows, 1},
-		{ "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0},
-		{ "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0},
-		{ "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0},
-		{ "_arrow_Int64__initialize", (DL_FUNC) &_arrow_Int64__initialize, 0},
-		{ "_arrow_UInt8__initialize", (DL_FUNC) &_arrow_UInt8__initialize, 0},
-		{ "_arrow_UInt16__initialize", (DL_FUNC) &_arrow_UInt16__initialize, 0},
-		{ "_arrow_UInt32__initialize", (DL_FUNC) &_arrow_UInt32__initialize, 0},
-		{ "_arrow_UInt64__initialize", (DL_FUNC) &_arrow_UInt64__initialize, 0},
-		{ "_arrow_Float16__initialize", (DL_FUNC) &_arrow_Float16__initialize, 0},
-		{ "_arrow_Float32__initialize", (DL_FUNC) &_arrow_Float32__initialize, 0},
-		{ "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0},
-		{ "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0},
-		{ "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0},
-		{ "_arrow_LargeUtf8__initialize", (DL_FUNC) &_arrow_LargeUtf8__initialize, 0},
-		{ "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0},
-		{ "_arrow_LargeBinary__initialize", (DL_FUNC) &_arrow_LargeBinary__initialize, 0},
-		{ "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0},
-		{ "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0},
-		{ "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0},
-		{ "_arrow_Decimal128Type__initialize", (DL_FUNC) &_arrow_Decimal128Type__initialize, 2},
-		{ "_arrow_FixedSizeBinary__initialize", (DL_FUNC) &_arrow_FixedSizeBinary__initialize, 1},
-		{ "_arrow_Timestamp__initialize", (DL_FUNC) &_arrow_Timestamp__initialize, 2},
-		{ "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1},
-		{ "_arrow_Time64__initialize", (DL_FUNC) &_arrow_Time64__initialize, 1},
-		{ "_arrow_list__", (DL_FUNC) &_arrow_list__, 1},
-		{ "_arrow_large_list__", (DL_FUNC) &_arrow_large_list__, 1},
-		{ "_arrow_fixed_size_list__", (DL_FUNC) &_arrow_fixed_size_list__, 2},
-		{ "_arrow_struct__", (DL_FUNC) &_arrow_struct__, 1},
-		{ "_arrow_DataType__ToString", (DL_FUNC) &_arrow_DataType__ToString, 1},
-		{ "_arrow_DataType__name", (DL_FUNC) &_arrow_DataType__name, 1},
-		{ "_arrow_DataType__Equals", (DL_FUNC) &_arrow_DataType__Equals, 2},
-		{ "_arrow_DataType__num_fields", (DL_FUNC) &_arrow_DataType__num_fields, 1},
-		{ "_arrow_DataType__fields", (DL_FUNC) &_arrow_DataType__fields, 1},
-		{ "_arrow_DataType__id", (DL_FUNC) &_arrow_DataType__id, 1},
-		{ "_arrow_ListType__ToString", (DL_FUNC) &_arrow_ListType__ToString, 1},
-		{ "_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1},
-		{ "_arrow_DateType__unit", (DL_FUNC) &_arrow_DateType__unit, 1},
-		{ "_arrow_TimeType__unit", (DL_FUNC) &_arrow_TimeType__unit, 1},
-		{ "_arrow_DecimalType__precision", (DL_FUNC) &_arrow_DecimalType__precision, 1},
-		{ "_arrow_DecimalType__scale", (DL_FUNC) &_arrow_DecimalType__scale, 1},
-		{ "_arrow_TimestampType__timezone", (DL_FUNC) &_arrow_TimestampType__timezone, 1},
-		{ "_arrow_TimestampType__unit", (DL_FUNC) &_arrow_TimestampType__unit, 1},
-		{ "_arrow_DictionaryType__initialize", (DL_FUNC) &_arrow_DictionaryType__initialize, 3},
-		{ "_arrow_DictionaryType__index_type", (DL_FUNC) &_arrow_DictionaryType__index_type, 1},
-		{ "_arrow_DictionaryType__value_type", (DL_FUNC) &_arrow_DictionaryType__value_type, 1},
-		{ "_arrow_DictionaryType__name", (DL_FUNC) &_arrow_DictionaryType__name, 1},
-		{ "_arrow_DictionaryType__ordered", (DL_FUNC) &_arrow_DictionaryType__ordered, 1},
-		{ "_arrow_StructType__GetFieldByName", (DL_FUNC) &_arrow_StructType__GetFieldByName, 2},
-		{ "_arrow_StructType__GetFieldIndex", (DL_FUNC) &_arrow_StructType__GetFieldIndex, 2},
-		{ "_arrow_StructType__field_names", (DL_FUNC) &_arrow_StructType__field_names, 1},
-		{ "_arrow_ListType__value_field", (DL_FUNC) &_arrow_ListType__value_field, 1},
-		{ "_arrow_ListType__value_type", (DL_FUNC) &_arrow_ListType__value_type, 1},
-		{ "_arrow_LargeListType__value_field", (DL_FUNC) &_arrow_LargeListType__value_field, 1},
-		{ "_arrow_LargeListType__value_type", (DL_FUNC) &_arrow_LargeListType__value_type, 1},
-		{ "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1},
-		{ "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1},
-		{ "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1},
-		{ "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3},
-		{ "_arrow_field_names_in_expression", (DL_FUNC) &_arrow_field_names_in_expression, 1},
-		{ "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1},
-		{ "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1},
-		{ "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1},
-		{ "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1},
-		{ "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2},
-		{ "_arrow_compute___expr__type_id", (DL_FUNC) &_arrow_compute___expr__type_id, 2},
-		{ "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6},
-		{ "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1},
-		{ "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2},
-		{ "_arrow_ipc___feather___Reader__Open", (DL_FUNC) &_arrow_ipc___feather___Reader__Open, 1},
-		{ "_arrow_ipc___feather___Reader__schema", (DL_FUNC) &_arrow_ipc___feather___Reader__schema, 1},
-		{ "_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3},
-		{ "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1},
-		{ "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1},
-		{ "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2},
-		{ "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1},
-		{ "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1},
-		{ "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1},
-		{ "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2},
-		{ "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1},
-		{ "_arrow_fs___FileInfo__set_path", (DL_FUNC) &_arrow_fs___FileInfo__set_path, 2},
-		{ "_arrow_fs___FileInfo__size", (DL_FUNC) &_arrow_fs___FileInfo__size, 1},
-		{ "_arrow_fs___FileInfo__set_size", (DL_FUNC) &_arrow_fs___FileInfo__set_size, 2},
-		{ "_arrow_fs___FileInfo__base_name", (DL_FUNC) &_arrow_fs___FileInfo__base_name, 1},
-		{ "_arrow_fs___FileInfo__extension", (DL_FUNC) &_arrow_fs___FileInfo__extension, 1},
-		{ "_arrow_fs___FileInfo__mtime", (DL_FUNC) &_arrow_fs___FileInfo__mtime, 1},
-		{ "_arrow_fs___FileInfo__set_mtime", (DL_FUNC) &_arrow_fs___FileInfo__set_mtime, 2},
-		{ "_arrow_fs___FileSelector__base_dir", (DL_FUNC) &_arrow_fs___FileSelector__base_dir, 1},
-		{ "_arrow_fs___FileSelector__allow_not_found", (DL_FUNC) &_arrow_fs___FileSelector__allow_not_found, 1},
-		{ "_arrow_fs___FileSelector__recursive", (DL_FUNC) &_arrow_fs___FileSelector__recursive, 1},
-		{ "_arrow_fs___FileSelector__create", (DL_FUNC) &_arrow_fs___FileSelector__create, 3},
-		{ "_arrow_fs___FileSystem__GetTargetInfos_Paths", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_Paths, 2},
-		{ "_arrow_fs___FileSystem__GetTargetInfos_FileSelector", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_FileSelector, 2},
-		{ "_arrow_fs___FileSystem__CreateDir", (DL_FUNC) &_arrow_fs___FileSystem__CreateDir, 3},
-		{ "_arrow_fs___FileSystem__DeleteDir", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDir, 2},
-		{ "_arrow_fs___FileSystem__DeleteDirContents", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDirContents, 2},
-		{ "_arrow_fs___FileSystem__DeleteFile", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFile, 2},
-		{ "_arrow_fs___FileSystem__DeleteFiles", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFiles, 2},
-		{ "_arrow_fs___FileSystem__Move", (DL_FUNC) &_arrow_fs___FileSystem__Move, 3},
-		{ "_arrow_fs___FileSystem__CopyFile", (DL_FUNC) &_arrow_fs___FileSystem__CopyFile, 3},
-		{ "_arrow_fs___FileSystem__OpenInputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputStream, 2},
-		{ "_arrow_fs___FileSystem__OpenInputFile", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputFile, 2},
-		{ "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2},
-		{ "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2},
-		{ "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1},
-		{ "_arrow_fs___LocalFileSystem__create", (DL_FUNC) &_arrow_fs___LocalFileSystem__create, 0},
-		{ "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2},
-		{ "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1},
-		{ "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1},
-		{ "_arrow_fs___FileSystemFromUri", (DL_FUNC) &_arrow_fs___FileSystemFromUri, 1},
-		{ "_arrow_fs___CopyFiles", (DL_FUNC) &_arrow_fs___CopyFiles, 6},
-		{ "_arrow_fs___S3FileSystem__create", (DL_FUNC) &_arrow_fs___S3FileSystem__create, 12},
-		{ "_arrow_fs___S3FileSystem__region", (DL_FUNC) &_arrow_fs___S3FileSystem__region, 1},
-		{ "_arrow_io___Readable__Read", (DL_FUNC) &_arrow_io___Readable__Read, 2},
-		{ "_arrow_io___InputStream__Close", (DL_FUNC) &_arrow_io___InputStream__Close, 1},
-		{ "_arrow_io___OutputStream__Close", (DL_FUNC) &_arrow_io___OutputStream__Close, 1},
-		{ "_arrow_io___RandomAccessFile__GetSize", (DL_FUNC) &_arrow_io___RandomAccessFile__GetSize, 1},
-		{ "_arrow_io___RandomAccessFile__supports_zero_copy", (DL_FUNC) &_arrow_io___RandomAccessFile__supports_zero_copy, 1},
-		{ "_arrow_io___RandomAccessFile__Seek", (DL_FUNC) &_arrow_io___RandomAccessFile__Seek, 2},
-		{ "_arrow_io___RandomAccessFile__Tell", (DL_FUNC) &_arrow_io___RandomAccessFile__Tell, 1},
-		{ "_arrow_io___RandomAccessFile__Read0", (DL_FUNC) &_arrow_io___RandomAccessFile__Read0, 1},
-		{ "_arrow_io___RandomAccessFile__ReadAt", (DL_FUNC) &_arrow_io___RandomAccessFile__ReadAt, 3},
-		{ "_arrow_io___MemoryMappedFile__Create", (DL_FUNC) &_arrow_io___MemoryMappedFile__Create, 2},
-		{ "_arrow_io___MemoryMappedFile__Open", (DL_FUNC) &_arrow_io___MemoryMappedFile__Open, 2},
-		{ "_arrow_io___MemoryMappedFile__Resize", (DL_FUNC) &_arrow_io___MemoryMappedFile__Resize, 2},
-		{ "_arrow_io___ReadableFile__Open", (DL_FUNC) &_arrow_io___ReadableFile__Open, 1},
-		{ "_arrow_io___BufferReader__initialize", (DL_FUNC) &_arrow_io___BufferReader__initialize, 1},
-		{ "_arrow_io___Writable__write", (DL_FUNC) &_arrow_io___Writable__write, 2},
-		{ "_arrow_io___OutputStream__Tell", (DL_FUNC) &_arrow_io___OutputStream__Tell, 1},
-		{ "_arrow_io___FileOutputStream__Open", (DL_FUNC) &_arrow_io___FileOutputStream__Open, 1},
-		{ "_arrow_io___BufferOutputStream__Create", (DL_FUNC) &_arrow_io___BufferOutputStream__Create, 1},
-		{ "_arrow_io___BufferOutputStream__capacity", (DL_FUNC) &_arrow_io___BufferOutputStream__capacity, 1},
-		{ "_arrow_io___BufferOutputStream__Finish", (DL_FUNC) &_arrow_io___BufferOutputStream__Finish, 1},
-		{ "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1},
-		{ "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2},
-		{ "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2},
-		{ "_arrow_json___ParseOptions__initialize1", (DL_FUNC) &_arrow_json___ParseOptions__initialize1, 1},
-		{ "_arrow_json___ParseOptions__initialize2", (DL_FUNC) &_arrow_json___ParseOptions__initialize2, 2},
-		{ "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3},
-		{ "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1},
-		{ "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0},
-		{ "_arrow_MemoryPool__bytes_allocated", (DL_FUNC) &_arrow_MemoryPool__bytes_allocated, 1},
-		{ "_arrow_MemoryPool__max_memory", (DL_FUNC) &_arrow_MemoryPool__max_memory, 1},
-		{ "_arrow_MemoryPool__backend_name", (DL_FUNC) &_arrow_MemoryPool__backend_name, 1},
-		{ "_arrow_supported_memory_backends", (DL_FUNC) &_arrow_supported_memory_backends, 0},
-		{ "_arrow_ipc___Message__body_length", (DL_FUNC) &_arrow_ipc___Message__body_length, 1},
-		{ "_arrow_ipc___Message__metadata", (DL_FUNC) &_arrow_ipc___Message__metadata, 1},
-		{ "_arrow_ipc___Message__body", (DL_FUNC) &_arrow_ipc___Message__body, 1},
-		{ "_arrow_ipc___Message__Verify", (DL_FUNC) &_arrow_ipc___Message__Verify, 1},
-		{ "_arrow_ipc___Message__type", (DL_FUNC) &_arrow_ipc___Message__type, 1},
-		{ "_arrow_ipc___Message__Equals", (DL_FUNC) &_arrow_ipc___Message__Equals, 2},
-		{ "_arrow_ipc___ReadRecordBatch__Message__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__Message__Schema, 2},
-		{ "_arrow_ipc___ReadSchema_InputStream", (DL_FUNC) &_arrow_ipc___ReadSchema_InputStream, 1},
-		{ "_arrow_ipc___ReadSchema_Message", (DL_FUNC) &_arrow_ipc___ReadSchema_Message, 1},
-		{ "_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1},
-		{ "_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1},
-		{ "_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1},
-		{ "_arrow_parquet___arrow___ArrowReaderProperties__Make", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__Make, 1},
-		{ "_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads, 2},
-		{ "_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads, 2},
-		{ "_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary, 2},
-		{ "_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary, 3},
-		{ "_arrow_parquet___arrow___FileReader__OpenFile", (DL_FUNC) &_arrow_parquet___arrow___FileReader__OpenFile, 2},
-		{ "_arrow_parquet___arrow___FileReader__ReadTable1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable1, 1},
-		{ "_arrow_parquet___arrow___FileReader__ReadTable2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable2, 2},
-		{ "_arrow_parquet___arrow___FileReader__ReadRowGroup1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup1, 2},
-		{ "_arrow_parquet___arrow___FileReader__ReadRowGroup2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup2, 3},
-		{ "_arrow_parquet___arrow___FileReader__ReadRowGroups1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups1, 2},
-		{ "_arrow_parquet___arrow___FileReader__ReadRowGroups2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups2, 3},
-		{ "_arrow_parquet___arrow___FileReader__num_rows", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_rows, 1},
-		{ "_arrow_parquet___arrow___FileReader__num_columns", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_columns, 1},
-		{ "_arrow_parquet___arrow___FileReader__num_row_groups", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_row_groups, 1},
-		{ "_arrow_parquet___arrow___FileReader__ReadColumn", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadColumn, 2},
-		{ "_arrow_parquet___ArrowWriterProperties___create", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___create, 3},
-		{ "_arrow_parquet___WriterProperties___Builder__create", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__create, 0},
-		{ "_arrow_parquet___WriterProperties___Builder__version", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__version, 2},
-		{ "_arrow_parquet___ArrowWriterProperties___Builder__set_compressions", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compressions, 3},
-		{ "_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels, 3},
-		{ "_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary, 3},
-		{ "_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics, 3},
-		{ "_arrow_parquet___ArrowWriterProperties___Builder__data_page_size", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__data_page_size, 2},
-		{ "_arrow_parquet___WriterProperties___Builder__build", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__build, 1},
-		{ "_arrow_parquet___arrow___ParquetFileWriter__Open", (DL_FUNC) &_arrow_parquet___arrow___ParquetFileWriter__Open, 4},
-		{ "_arrow_parquet___arrow___FileWriter__WriteTable", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__WriteTable, 3},
-		{ "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1},
-		{ "_arrow_parquet___arrow___WriteTable", (DL_FUNC) &_arrow_parquet___arrow___WriteTable, 4},
-		{ "_arrow_parquet___arrow___FileReader__GetSchema", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema, 1},
-		{ "_arrow_allocate_arrow_schema", (DL_FUNC) &_arrow_allocate_arrow_schema, 0},
-		{ "_arrow_delete_arrow_schema", (DL_FUNC) &_arrow_delete_arrow_schema, 1},
-		{ "_arrow_allocate_arrow_array", (DL_FUNC) &_arrow_allocate_arrow_array, 0},
-		{ "_arrow_delete_arrow_array", (DL_FUNC) &_arrow_delete_arrow_array, 1},
-		{ "_arrow_allocate_arrow_array_stream", (DL_FUNC) &_arrow_allocate_arrow_array_stream, 0},
-		{ "_arrow_delete_arrow_array_stream", (DL_FUNC) &_arrow_delete_arrow_array_stream, 1},
-		{ "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2},
-		{ "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2},
-		{ "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1},
-		{ "_arrow_ImportField", (DL_FUNC) &_arrow_ImportField, 1},
-		{ "_arrow_ImportType", (DL_FUNC) &_arrow_ImportType, 1},
-		{ "_arrow_ImportRecordBatchReader", (DL_FUNC) &_arrow_ImportRecordBatchReader, 1},
-		{ "_arrow_ExportType", (DL_FUNC) &_arrow_ExportType, 2},
-		{ "_arrow_ExportField", (DL_FUNC) &_arrow_ExportField, 2},
-		{ "_arrow_ExportSchema", (DL_FUNC) &_arrow_ExportSchema, 2},
-		{ "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3},
-		{ "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3},
-		{ "_arrow_ExportRecordBatchReader", (DL_FUNC) &_arrow_ExportRecordBatchReader, 2},
-		{ "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 3},
-		{ "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2},
-		{ "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3},
-		{ "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1},
-		{ "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1},
-		{ "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1},
-		{ "_arrow_RecordBatch__RenameColumns", (DL_FUNC) &_arrow_RecordBatch__RenameColumns, 2},
-		{ "_arrow_RecordBatch__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_RecordBatch__ReplaceSchemaMetadata, 2},
-		{ "_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1},
-		{ "_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2},
-		{ "_arrow_RecordBatch__GetColumnByName", (DL_FUNC) &_arrow_RecordBatch__GetColumnByName, 2},
-		{ "_arrow_RecordBatch__SelectColumns", (DL_FUNC) &_arrow_RecordBatch__SelectColumns, 2},
-		{ "_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 3},
-		{ "_arrow_RecordBatch__AddColumn", (DL_FUNC) &_arrow_RecordBatch__AddColumn, 4},
-		{ "_arrow_RecordBatch__SetColumn", (DL_FUNC) &_arrow_RecordBatch__SetColumn, 4},
-		{ "_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2},
-		{ "_arrow_RecordBatch__column_name", (DL_FUNC) &_arrow_RecordBatch__column_name, 2},
-		{ "_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1},
-		{ "_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2},
-		{ "_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3},
-		{ "_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1},
-		{ "_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2},
-		{ "_arrow_RecordBatch__from_arrays", (DL_FUNC) &_arrow_RecordBatch__from_arrays, 2},
-		{ "_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1},
-		{ "_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1},
-		{ "_arrow_RecordBatchReader__batches", (DL_FUNC) &_arrow_RecordBatchReader__batches, 1},
-		{ "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1},
-		{ "_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1},
-		{ "_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1},
-		{ "_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1},
-		{ "_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2},
-		{ "_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1},
-		{ "_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1},
-		{ "_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1},
-		{ "_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2},
-		{ "_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2},
-		{ "_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1},
-		{ "_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 4},
-		{ "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4},
-		{ "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2},
-		{ "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1},
-		{ "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2},
-		{ "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2},
-		{ "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1},
-		{ "_arrow_MakeArrayFromScalar", (DL_FUNC) &_arrow_MakeArrayFromScalar, 2},
-		{ "_arrow_Scalar__is_valid", (DL_FUNC) &_arrow_Scalar__is_valid, 1},
-		{ "_arrow_Scalar__type", (DL_FUNC) &_arrow_Scalar__type, 1},
-		{ "_arrow_Scalar__Equals", (DL_FUNC) &_arrow_Scalar__Equals, 2},
-		{ "_arrow_Scalar__ApproxEquals", (DL_FUNC) &_arrow_Scalar__ApproxEquals, 2},
-		{ "_arrow_schema_", (DL_FUNC) &_arrow_schema_, 1},
-		{ "_arrow_Schema__ToString", (DL_FUNC) &_arrow_Schema__ToString, 1},
-		{ "_arrow_Schema__num_fields", (DL_FUNC) &_arrow_Schema__num_fields, 1},
-		{ "_arrow_Schema__field", (DL_FUNC) &_arrow_Schema__field, 2},
-		{ "_arrow_Schema__AddField", (DL_FUNC) &_arrow_Schema__AddField, 3},
-		{ "_arrow_Schema__SetField", (DL_FUNC) &_arrow_Schema__SetField, 3},
-		{ "_arrow_Schema__RemoveField", (DL_FUNC) &_arrow_Schema__RemoveField, 2},
-		{ "_arrow_Schema__GetFieldByName", (DL_FUNC) &_arrow_Schema__GetFieldByName, 2},
-		{ "_arrow_Schema__fields", (DL_FUNC) &_arrow_Schema__fields, 1},
-		{ "_arrow_Schema__field_names", (DL_FUNC) &_arrow_Schema__field_names, 1},
-		{ "_arrow_Schema__HasMetadata", (DL_FUNC) &_arrow_Schema__HasMetadata, 1},
-		{ "_arrow_Schema__metadata", (DL_FUNC) &_arrow_Schema__metadata, 1},
-		{ "_arrow_Schema__WithMetadata", (DL_FUNC) &_arrow_Schema__WithMetadata, 2},
-		{ "_arrow_Schema__serialize", (DL_FUNC) &_arrow_Schema__serialize, 1},
-		{ "_arrow_Schema__Equals", (DL_FUNC) &_arrow_Schema__Equals, 3},
-		{ "_arrow_arrow__UnifySchemas", (DL_FUNC) &_arrow_arrow__UnifySchemas, 1},
-		{ "_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1},
-		{ "_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1},
-		{ "_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1},
-		{ "_arrow_Table__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_Table__ReplaceSchemaMetadata, 2},
-		{ "_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2},
-		{ "_arrow_Table__field", (DL_FUNC) &_arrow_Table__field, 2},
-		{ "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1},
-		{ "_arrow_Table__ColumnNames", (DL_FUNC) &_arrow_Table__ColumnNames, 1},
-		{ "_arrow_Table__RenameColumns", (DL_FUNC) &_arrow_Table__RenameColumns, 2},
-		{ "_arrow_Table__Slice1", (DL_FUNC) &_arrow_Table__Slice1, 2},
-		{ "_arrow_Table__Slice2", (DL_FUNC) &_arrow_Table__Slice2, 3},
-		{ "_arrow_Table__Equals", (DL_FUNC) &_arrow_Table__Equals, 3},
-		{ "_arrow_Table__Validate", (DL_FUNC) &_arrow_Table__Validate, 1},
-		{ "_arrow_Table__ValidateFull", (DL_FUNC) &_arrow_Table__ValidateFull, 1},
-		{ "_arrow_Table__GetColumnByName", (DL_FUNC) &_arrow_Table__GetColumnByName, 2},
-		{ "_arrow_Table__RemoveColumn", (DL_FUNC) &_arrow_Table__RemoveColumn, 2},
-		{ "_arrow_Table__AddColumn", (DL_FUNC) &_arrow_Table__AddColumn, 4},
-		{ "_arrow_Table__SetColumn", (DL_FUNC) &_arrow_Table__SetColumn, 4},
-		{ "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2},
-		{ "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1},
-		{ "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2},
-		{ "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0},
-		{ "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1},
-		{ "_arrow_GetIOThreadPoolCapacity", (DL_FUNC) &_arrow_GetIOThreadPoolCapacity, 0},
-		{ "_arrow_SetIOThreadPoolCapacity", (DL_FUNC) &_arrow_SetIOThreadPoolCapacity, 1},
-		{ "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1},
-		{ "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1},
-		{ "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1},
+		{ "_arrow_is_altrep", (DL_FUNC) &_arrow_is_altrep, 1}, 
+		{ "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, 
+		{ "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, 
+		{ "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, 
+		{ "_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2}, 
+		{ "_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1}, 
+		{ "_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1}, 
+		{ "_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1}, 
+		{ "_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1}, 
+		{ "_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1}, 
+		{ "_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1}, 
+		{ "_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2}, 
+		{ "_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2}, 
+		{ "_arrow_Array__Diff", (DL_FUNC) &_arrow_Array__Diff, 2}, 
+		{ "_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1}, 
+		{ "_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5}, 
+		{ "_arrow_Array__View", (DL_FUNC) &_arrow_Array__View, 2}, 
+		{ "_arrow_Array__Validate", (DL_FUNC) &_arrow_Array__Validate, 1}, 
+		{ "_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1}, 
+		{ "_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1}, 
+		{ "_arrow_StructArray__field", (DL_FUNC) &_arrow_StructArray__field, 2}, 
+		{ "_arrow_StructArray__GetFieldByName", (DL_FUNC) &_arrow_StructArray__GetFieldByName, 2}, 
+		{ "_arrow_StructArray__Flatten", (DL_FUNC) &_arrow_StructArray__Flatten, 1}, 
+		{ "_arrow_ListArray__value_type", (DL_FUNC) &_arrow_ListArray__value_type, 1}, 
+		{ "_arrow_LargeListArray__value_type", (DL_FUNC) &_arrow_LargeListArray__value_type, 1}, 
+		{ "_arrow_ListArray__values", (DL_FUNC) &_arrow_ListArray__values, 1}, 
+		{ "_arrow_LargeListArray__values", (DL_FUNC) &_arrow_LargeListArray__values, 1}, 
+		{ "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2}, 
+		{ "_arrow_LargeListArray__value_length", (DL_FUNC) &_arrow_LargeListArray__value_length, 2}, 
+		{ "_arrow_FixedSizeListArray__value_length", (DL_FUNC) &_arrow_FixedSizeListArray__value_length, 2}, 
+		{ "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2}, 
+		{ "_arrow_LargeListArray__value_offset", (DL_FUNC) &_arrow_LargeListArray__value_offset, 2}, 
+		{ "_arrow_FixedSizeListArray__value_offset", (DL_FUNC) &_arrow_FixedSizeListArray__value_offset, 2}, 
+		{ "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, 
+		{ "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, 
+		{ "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, 
+		{ "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 2}, 
+		{ "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, 
+		{ "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, 
+		{ "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, 
+		{ "_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, 
+		{ "_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, 
+		{ "_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset, 1}, 
+		{ "_arrow_ArrayData__buffers", (DL_FUNC) &_arrow_ArrayData__buffers, 1}, 
+		{ "_arrow_Buffer__is_mutable", (DL_FUNC) &_arrow_Buffer__is_mutable, 1}, 
+		{ "_arrow_Buffer__ZeroPadding", (DL_FUNC) &_arrow_Buffer__ZeroPadding, 1}, 
+		{ "_arrow_Buffer__capacity", (DL_FUNC) &_arrow_Buffer__capacity, 1}, 
+		{ "_arrow_Buffer__size", (DL_FUNC) &_arrow_Buffer__size, 1}, 
+		{ "_arrow_r___RBuffer__initialize", (DL_FUNC) &_arrow_r___RBuffer__initialize, 1}, 
+		{ "_arrow_Buffer__data", (DL_FUNC) &_arrow_Buffer__data, 1}, 
+		{ "_arrow_Buffer__Equals", (DL_FUNC) &_arrow_Buffer__Equals, 2}, 
+		{ "_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1}, 
+		{ "_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1}, 
+		{ "_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1}, 
+		{ "_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2}, 
+		{ "_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1}, 
+		{ "_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1}, 
+		{ "_arrow_ChunkedArray__Slice1", (DL_FUNC) &_arrow_ChunkedArray__Slice1, 2}, 
+		{ "_arrow_ChunkedArray__Slice2", (DL_FUNC) &_arrow_ChunkedArray__Slice2, 3}, 
+		{ "_arrow_ChunkedArray__View", (DL_FUNC) &_arrow_ChunkedArray__View, 2}, 
+		{ "_arrow_ChunkedArray__Validate", (DL_FUNC) &_arrow_ChunkedArray__Validate, 1}, 
+		{ "_arrow_ChunkedArray__Equals", (DL_FUNC) &_arrow_ChunkedArray__Equals, 2}, 
+		{ "_arrow_ChunkedArray__ToString", (DL_FUNC) &_arrow_ChunkedArray__ToString, 1}, 
+		{ "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, 
+		{ "_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 2}, 
+		{ "_arrow_util___Codec__name", (DL_FUNC) &_arrow_util___Codec__name, 1}, 
+		{ "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, 
+		{ "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, 
+		{ "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, 
+		{ "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 1}, 
+		{ "_arrow_ExecPlan_run", (DL_FUNC) &_arrow_ExecPlan_run, 3}, 
+		{ "_arrow_ExecNode_Scan", (DL_FUNC) &_arrow_ExecNode_Scan, 4}, 
+		{ "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, 
+		{ "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, 
+		{ "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 5}, 
+		{ "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, 
+		{ "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, 
+		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
+		{ "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, 
+		{ "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, 
+		{ "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, 
+		{ "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, 
+		{ "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, 
+		{ "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, 
+		{ "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, 
+		{ "_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1}, 
+		{ "_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4}, 
+		{ "_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1}, 
+		{ "_arrow_TimestampParser__kind", (DL_FUNC) &_arrow_TimestampParser__kind, 1}, 
+		{ "_arrow_TimestampParser__format", (DL_FUNC) &_arrow_TimestampParser__format, 1}, 
+		{ "_arrow_TimestampParser__MakeStrptime", (DL_FUNC) &_arrow_TimestampParser__MakeStrptime, 1}, 
+		{ "_arrow_TimestampParser__MakeISO8601", (DL_FUNC) &_arrow_TimestampParser__MakeISO8601, 0}, 
+		{ "_arrow_csv___WriteCSV__Table", (DL_FUNC) &_arrow_csv___WriteCSV__Table, 3}, 
+		{ "_arrow_csv___WriteCSV__RecordBatch", (DL_FUNC) &_arrow_csv___WriteCSV__RecordBatch, 3}, 
+		{ "_arrow_dataset___Dataset__NewScan", (DL_FUNC) &_arrow_dataset___Dataset__NewScan, 1}, 
+		{ "_arrow_dataset___Dataset__schema", (DL_FUNC) &_arrow_dataset___Dataset__schema, 1}, 
+		{ "_arrow_dataset___Dataset__type_name", (DL_FUNC) &_arrow_dataset___Dataset__type_name, 1}, 
+		{ "_arrow_dataset___Dataset__ReplaceSchema", (DL_FUNC) &_arrow_dataset___Dataset__ReplaceSchema, 2}, 
+		{ "_arrow_dataset___UnionDataset__create", (DL_FUNC) &_arrow_dataset___UnionDataset__create, 2}, 
+		{ "_arrow_dataset___InMemoryDataset__create", (DL_FUNC) &_arrow_dataset___InMemoryDataset__create, 1}, 
+		{ "_arrow_dataset___UnionDataset__children", (DL_FUNC) &_arrow_dataset___UnionDataset__children, 1}, 
+		{ "_arrow_dataset___FileSystemDataset__format", (DL_FUNC) &_arrow_dataset___FileSystemDataset__format, 1}, 
+		{ "_arrow_dataset___FileSystemDataset__filesystem", (DL_FUNC) &_arrow_dataset___FileSystemDataset__filesystem, 1}, 
+		{ "_arrow_dataset___FileSystemDataset__files", (DL_FUNC) &_arrow_dataset___FileSystemDataset__files, 1}, 
+		{ "_arrow_dataset___DatasetFactory__Finish1", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish1, 2}, 
+		{ "_arrow_dataset___DatasetFactory__Finish2", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish2, 2}, 
+		{ "_arrow_dataset___DatasetFactory__Inspect", (DL_FUNC) &_arrow_dataset___DatasetFactory__Inspect, 2}, 
+		{ "_arrow_dataset___UnionDatasetFactory__Make", (DL_FUNC) &_arrow_dataset___UnionDatasetFactory__Make, 1}, 
+		{ "_arrow_dataset___FileSystemDatasetFactory__Make0", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make0, 3}, 
+		{ "_arrow_dataset___FileSystemDatasetFactory__Make2", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make2, 4}, 
+		{ "_arrow_dataset___FileSystemDatasetFactory__Make1", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make1, 3}, 
+		{ "_arrow_dataset___FileSystemDatasetFactory__Make3", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make3, 4}, 
+		{ "_arrow_dataset___FileFormat__type_name", (DL_FUNC) &_arrow_dataset___FileFormat__type_name, 1}, 
+		{ "_arrow_dataset___FileFormat__DefaultWriteOptions", (DL_FUNC) &_arrow_dataset___FileFormat__DefaultWriteOptions, 1}, 
+		{ "_arrow_dataset___ParquetFileFormat__Make", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__Make, 2}, 
+		{ "_arrow_dataset___FileWriteOptions__type_name", (DL_FUNC) &_arrow_dataset___FileWriteOptions__type_name, 1}, 
+		{ "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3}, 
+		{ "_arrow_dataset___IpcFileWriteOptions__update2", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update2, 4}, 
+		{ "_arrow_dataset___IpcFileWriteOptions__update1", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update1, 3}, 
+		{ "_arrow_dataset___CsvFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___CsvFileWriteOptions__update, 2}, 
+		{ "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0}, 
+		{ "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 3}, 
+		{ "_arrow_dataset___FragmentScanOptions__type_name", (DL_FUNC) &_arrow_dataset___FragmentScanOptions__type_name, 1}, 
+		{ "_arrow_dataset___CsvFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___CsvFragmentScanOptions__Make, 2}, 
+		{ "_arrow_dataset___ParquetFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___ParquetFragmentScanOptions__Make, 3}, 
+		{ "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 2}, 
+		{ "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 2}, 
+		{ "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 3}, 
+		{ "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 2}, 
+		{ "_arrow_dataset___ScannerBuilder__ProjectNames", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectNames, 2}, 
+		{ "_arrow_dataset___ScannerBuilder__ProjectExprs", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectExprs, 3}, 
+		{ "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2}, 
+		{ "_arrow_dataset___ScannerBuilder__UseThreads", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseThreads, 2}, 
+		{ "_arrow_dataset___ScannerBuilder__UseAsync", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseAsync, 2}, 
+		{ "_arrow_dataset___ScannerBuilder__BatchSize", (DL_FUNC) &_arrow_dataset___ScannerBuilder__BatchSize, 2}, 
+		{ "_arrow_dataset___ScannerBuilder__FragmentScanOptions", (DL_FUNC) &_arrow_dataset___ScannerBuilder__FragmentScanOptions, 2}, 
+		{ "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1}, 
+		{ "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1}, 
+		{ "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, 
+		{ "_arrow_dataset___Scanner__ScanBatches", (DL_FUNC) &_arrow_dataset___Scanner__ScanBatches, 1}, 
+		{ "_arrow_dataset___Scanner__ToRecordBatchReader", (DL_FUNC) &_arrow_dataset___Scanner__ToRecordBatchReader, 1}, 
+		{ "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, 
+		{ "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, 
+		{ "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, 
+		{ "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, 
+		{ "_arrow_dataset___Scanner__TakeRows", (DL_FUNC) &_arrow_dataset___Scanner__TakeRows, 2}, 
+		{ "_arrow_dataset___Scanner__CountRows", (DL_FUNC) &_arrow_dataset___Scanner__CountRows, 1}, 
+		{ "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, 
+		{ "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, 
+		{ "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, 
+		{ "_arrow_Int64__initialize", (DL_FUNC) &_arrow_Int64__initialize, 0}, 
+		{ "_arrow_UInt8__initialize", (DL_FUNC) &_arrow_UInt8__initialize, 0}, 
+		{ "_arrow_UInt16__initialize", (DL_FUNC) &_arrow_UInt16__initialize, 0}, 
+		{ "_arrow_UInt32__initialize", (DL_FUNC) &_arrow_UInt32__initialize, 0}, 
+		{ "_arrow_UInt64__initialize", (DL_FUNC) &_arrow_UInt64__initialize, 0}, 
+		{ "_arrow_Float16__initialize", (DL_FUNC) &_arrow_Float16__initialize, 0}, 
+		{ "_arrow_Float32__initialize", (DL_FUNC) &_arrow_Float32__initialize, 0}, 
+		{ "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, 
+		{ "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, 
+		{ "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, 
+		{ "_arrow_LargeUtf8__initialize", (DL_FUNC) &_arrow_LargeUtf8__initialize, 0}, 
+		{ "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, 
+		{ "_arrow_LargeBinary__initialize", (DL_FUNC) &_arrow_LargeBinary__initialize, 0}, 
+		{ "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0}, 
+		{ "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, 
+		{ "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, 
+		{ "_arrow_Decimal128Type__initialize", (DL_FUNC) &_arrow_Decimal128Type__initialize, 2}, 
+		{ "_arrow_FixedSizeBinary__initialize", (DL_FUNC) &_arrow_FixedSizeBinary__initialize, 1}, 
+		{ "_arrow_Timestamp__initialize", (DL_FUNC) &_arrow_Timestamp__initialize, 2}, 
+		{ "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1}, 
+		{ "_arrow_Time64__initialize", (DL_FUNC) &_arrow_Time64__initialize, 1}, 
+		{ "_arrow_list__", (DL_FUNC) &_arrow_list__, 1}, 
+		{ "_arrow_large_list__", (DL_FUNC) &_arrow_large_list__, 1}, 
+		{ "_arrow_fixed_size_list__", (DL_FUNC) &_arrow_fixed_size_list__, 2}, 
+		{ "_arrow_struct__", (DL_FUNC) &_arrow_struct__, 1}, 
+		{ "_arrow_DataType__ToString", (DL_FUNC) &_arrow_DataType__ToString, 1}, 
+		{ "_arrow_DataType__name", (DL_FUNC) &_arrow_DataType__name, 1}, 
+		{ "_arrow_DataType__Equals", (DL_FUNC) &_arrow_DataType__Equals, 2}, 
+		{ "_arrow_DataType__num_fields", (DL_FUNC) &_arrow_DataType__num_fields, 1}, 
+		{ "_arrow_DataType__fields", (DL_FUNC) &_arrow_DataType__fields, 1}, 
+		{ "_arrow_DataType__id", (DL_FUNC) &_arrow_DataType__id, 1}, 
+		{ "_arrow_ListType__ToString", (DL_FUNC) &_arrow_ListType__ToString, 1}, 
+		{ "_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1}, 
+		{ "_arrow_DateType__unit", (DL_FUNC) &_arrow_DateType__unit, 1}, 
+		{ "_arrow_TimeType__unit", (DL_FUNC) &_arrow_TimeType__unit, 1}, 
+		{ "_arrow_DecimalType__precision", (DL_FUNC) &_arrow_DecimalType__precision, 1}, 
+		{ "_arrow_DecimalType__scale", (DL_FUNC) &_arrow_DecimalType__scale, 1}, 
+		{ "_arrow_TimestampType__timezone", (DL_FUNC) &_arrow_TimestampType__timezone, 1}, 
+		{ "_arrow_TimestampType__unit", (DL_FUNC) &_arrow_TimestampType__unit, 1}, 
+		{ "_arrow_DictionaryType__initialize", (DL_FUNC) &_arrow_DictionaryType__initialize, 3}, 
+		{ "_arrow_DictionaryType__index_type", (DL_FUNC) &_arrow_DictionaryType__index_type, 1}, 
+		{ "_arrow_DictionaryType__value_type", (DL_FUNC) &_arrow_DictionaryType__value_type, 1}, 
+		{ "_arrow_DictionaryType__name", (DL_FUNC) &_arrow_DictionaryType__name, 1}, 
+		{ "_arrow_DictionaryType__ordered", (DL_FUNC) &_arrow_DictionaryType__ordered, 1}, 
+		{ "_arrow_StructType__GetFieldByName", (DL_FUNC) &_arrow_StructType__GetFieldByName, 2}, 
+		{ "_arrow_StructType__GetFieldIndex", (DL_FUNC) &_arrow_StructType__GetFieldIndex, 2}, 
+		{ "_arrow_StructType__field_names", (DL_FUNC) &_arrow_StructType__field_names, 1}, 
+		{ "_arrow_ListType__value_field", (DL_FUNC) &_arrow_ListType__value_field, 1}, 
+		{ "_arrow_ListType__value_type", (DL_FUNC) &_arrow_ListType__value_type, 1}, 
+		{ "_arrow_LargeListType__value_field", (DL_FUNC) &_arrow_LargeListType__value_field, 1}, 
+		{ "_arrow_LargeListType__value_type", (DL_FUNC) &_arrow_LargeListType__value_type, 1}, 
+		{ "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, 
+		{ "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, 
+		{ "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, 
+		{ "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3}, 
+		{ "_arrow_field_names_in_expression", (DL_FUNC) &_arrow_field_names_in_expression, 1}, 
+		{ "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, 
+		{ "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, 
+		{ "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, 
+		{ "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, 
+		{ "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2}, 
+		{ "_arrow_compute___expr__type_id", (DL_FUNC) &_arrow_compute___expr__type_id, 2}, 
+		{ "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, 
+		{ "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, 
+		{ "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, 
+		{ "_arrow_ipc___feather___Reader__Open", (DL_FUNC) &_arrow_ipc___feather___Reader__Open, 1}, 
+		{ "_arrow_ipc___feather___Reader__schema", (DL_FUNC) &_arrow_ipc___feather___Reader__schema, 1}, 
+		{ "_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, 
+		{ "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, 
+		{ "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, 
+		{ "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2}, 
+		{ "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, 
+		{ "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, 
+		{ "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, 
+		{ "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, 
+		{ "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, 
+		{ "_arrow_fs___FileInfo__set_path", (DL_FUNC) &_arrow_fs___FileInfo__set_path, 2}, 
+		{ "_arrow_fs___FileInfo__size", (DL_FUNC) &_arrow_fs___FileInfo__size, 1}, 
+		{ "_arrow_fs___FileInfo__set_size", (DL_FUNC) &_arrow_fs___FileInfo__set_size, 2}, 
+		{ "_arrow_fs___FileInfo__base_name", (DL_FUNC) &_arrow_fs___FileInfo__base_name, 1}, 
+		{ "_arrow_fs___FileInfo__extension", (DL_FUNC) &_arrow_fs___FileInfo__extension, 1}, 
+		{ "_arrow_fs___FileInfo__mtime", (DL_FUNC) &_arrow_fs___FileInfo__mtime, 1}, 
+		{ "_arrow_fs___FileInfo__set_mtime", (DL_FUNC) &_arrow_fs___FileInfo__set_mtime, 2}, 
+		{ "_arrow_fs___FileSelector__base_dir", (DL_FUNC) &_arrow_fs___FileSelector__base_dir, 1}, 
+		{ "_arrow_fs___FileSelector__allow_not_found", (DL_FUNC) &_arrow_fs___FileSelector__allow_not_found, 1}, 
+		{ "_arrow_fs___FileSelector__recursive", (DL_FUNC) &_arrow_fs___FileSelector__recursive, 1}, 
+		{ "_arrow_fs___FileSelector__create", (DL_FUNC) &_arrow_fs___FileSelector__create, 3}, 
+		{ "_arrow_fs___FileSystem__GetTargetInfos_Paths", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_Paths, 2}, 
+		{ "_arrow_fs___FileSystem__GetTargetInfos_FileSelector", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_FileSelector, 2}, 
+		{ "_arrow_fs___FileSystem__CreateDir", (DL_FUNC) &_arrow_fs___FileSystem__CreateDir, 3}, 
+		{ "_arrow_fs___FileSystem__DeleteDir", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDir, 2}, 
+		{ "_arrow_fs___FileSystem__DeleteDirContents", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDirContents, 2}, 
+		{ "_arrow_fs___FileSystem__DeleteFile", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFile, 2}, 
+		{ "_arrow_fs___FileSystem__DeleteFiles", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFiles, 2}, 
+		{ "_arrow_fs___FileSystem__Move", (DL_FUNC) &_arrow_fs___FileSystem__Move, 3}, 
+		{ "_arrow_fs___FileSystem__CopyFile", (DL_FUNC) &_arrow_fs___FileSystem__CopyFile, 3}, 
+		{ "_arrow_fs___FileSystem__OpenInputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputStream, 2}, 
+		{ "_arrow_fs___FileSystem__OpenInputFile", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputFile, 2}, 
+		{ "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2}, 
+		{ "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2}, 
+		{ "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1}, 
+		{ "_arrow_fs___LocalFileSystem__create", (DL_FUNC) &_arrow_fs___LocalFileSystem__create, 0}, 
+		{ "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2}, 
+		{ "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1}, 
+		{ "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, 
+		{ "_arrow_fs___FileSystemFromUri", (DL_FUNC) &_arrow_fs___FileSystemFromUri, 1}, 
+		{ "_arrow_fs___CopyFiles", (DL_FUNC) &_arrow_fs___CopyFiles, 6}, 
+		{ "_arrow_fs___S3FileSystem__create", (DL_FUNC) &_arrow_fs___S3FileSystem__create, 12}, 
+		{ "_arrow_fs___S3FileSystem__region", (DL_FUNC) &_arrow_fs___S3FileSystem__region, 1}, 
+		{ "_arrow_io___Readable__Read", (DL_FUNC) &_arrow_io___Readable__Read, 2}, 
+		{ "_arrow_io___InputStream__Close", (DL_FUNC) &_arrow_io___InputStream__Close, 1}, 
+		{ "_arrow_io___OutputStream__Close", (DL_FUNC) &_arrow_io___OutputStream__Close, 1}, 
+		{ "_arrow_io___RandomAccessFile__GetSize", (DL_FUNC) &_arrow_io___RandomAccessFile__GetSize, 1}, 
+		{ "_arrow_io___RandomAccessFile__supports_zero_copy", (DL_FUNC) &_arrow_io___RandomAccessFile__supports_zero_copy, 1}, 
+		{ "_arrow_io___RandomAccessFile__Seek", (DL_FUNC) &_arrow_io___RandomAccessFile__Seek, 2}, 
+		{ "_arrow_io___RandomAccessFile__Tell", (DL_FUNC) &_arrow_io___RandomAccessFile__Tell, 1}, 
+		{ "_arrow_io___RandomAccessFile__Read0", (DL_FUNC) &_arrow_io___RandomAccessFile__Read0, 1}, 
+		{ "_arrow_io___RandomAccessFile__ReadAt", (DL_FUNC) &_arrow_io___RandomAccessFile__ReadAt, 3}, 
+		{ "_arrow_io___MemoryMappedFile__Create", (DL_FUNC) &_arrow_io___MemoryMappedFile__Create, 2}, 
+		{ "_arrow_io___MemoryMappedFile__Open", (DL_FUNC) &_arrow_io___MemoryMappedFile__Open, 2}, 
+		{ "_arrow_io___MemoryMappedFile__Resize", (DL_FUNC) &_arrow_io___MemoryMappedFile__Resize, 2}, 
+		{ "_arrow_io___ReadableFile__Open", (DL_FUNC) &_arrow_io___ReadableFile__Open, 1}, 
+		{ "_arrow_io___BufferReader__initialize", (DL_FUNC) &_arrow_io___BufferReader__initialize, 1}, 
+		{ "_arrow_io___Writable__write", (DL_FUNC) &_arrow_io___Writable__write, 2}, 
+		{ "_arrow_io___OutputStream__Tell", (DL_FUNC) &_arrow_io___OutputStream__Tell, 1}, 
+		{ "_arrow_io___FileOutputStream__Open", (DL_FUNC) &_arrow_io___FileOutputStream__Open, 1}, 
+		{ "_arrow_io___BufferOutputStream__Create", (DL_FUNC) &_arrow_io___BufferOutputStream__Create, 1}, 
+		{ "_arrow_io___BufferOutputStream__capacity", (DL_FUNC) &_arrow_io___BufferOutputStream__capacity, 1}, 
+		{ "_arrow_io___BufferOutputStream__Finish", (DL_FUNC) &_arrow_io___BufferOutputStream__Finish, 1}, 
+		{ "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1}, 
+		{ "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2}, 
+		{ "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2}, 
+		{ "_arrow_json___ParseOptions__initialize1", (DL_FUNC) &_arrow_json___ParseOptions__initialize1, 1}, 
+		{ "_arrow_json___ParseOptions__initialize2", (DL_FUNC) &_arrow_json___ParseOptions__initialize2, 2}, 
+		{ "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3}, 
+		{ "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1}, 
+		{ "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, 
+		{ "_arrow_MemoryPool__bytes_allocated", (DL_FUNC) &_arrow_MemoryPool__bytes_allocated, 1}, 
+		{ "_arrow_MemoryPool__max_memory", (DL_FUNC) &_arrow_MemoryPool__max_memory, 1}, 
+		{ "_arrow_MemoryPool__backend_name", (DL_FUNC) &_arrow_MemoryPool__backend_name, 1}, 
+		{ "_arrow_supported_memory_backends", (DL_FUNC) &_arrow_supported_memory_backends, 0}, 
+		{ "_arrow_ipc___Message__body_length", (DL_FUNC) &_arrow_ipc___Message__body_length, 1}, 
+		{ "_arrow_ipc___Message__metadata", (DL_FUNC) &_arrow_ipc___Message__metadata, 1}, 
+		{ "_arrow_ipc___Message__body", (DL_FUNC) &_arrow_ipc___Message__body, 1}, 
+		{ "_arrow_ipc___Message__Verify", (DL_FUNC) &_arrow_ipc___Message__Verify, 1}, 
+		{ "_arrow_ipc___Message__type", (DL_FUNC) &_arrow_ipc___Message__type, 1}, 
+		{ "_arrow_ipc___Message__Equals", (DL_FUNC) &_arrow_ipc___Message__Equals, 2}, 
+		{ "_arrow_ipc___ReadRecordBatch__Message__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__Message__Schema, 2}, 
+		{ "_arrow_ipc___ReadSchema_InputStream", (DL_FUNC) &_arrow_ipc___ReadSchema_InputStream, 1}, 
+		{ "_arrow_ipc___ReadSchema_Message", (DL_FUNC) &_arrow_ipc___ReadSchema_Message, 1}, 
+		{ "_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1}, 
+		{ "_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1}, 
+		{ "_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1}, 
+		{ "_arrow_parquet___arrow___ArrowReaderProperties__Make", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__Make, 1}, 
+		{ "_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads, 2}, 
+		{ "_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads, 2}, 
+		{ "_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary, 2}, 
+		{ "_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary, 3}, 
+		{ "_arrow_parquet___arrow___FileReader__OpenFile", (DL_FUNC) &_arrow_parquet___arrow___FileReader__OpenFile, 2}, 
+		{ "_arrow_parquet___arrow___FileReader__ReadTable1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable1, 1}, 
+		{ "_arrow_parquet___arrow___FileReader__ReadTable2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable2, 2}, 
+		{ "_arrow_parquet___arrow___FileReader__ReadRowGroup1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup1, 2}, 
+		{ "_arrow_parquet___arrow___FileReader__ReadRowGroup2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup2, 3}, 
+		{ "_arrow_parquet___arrow___FileReader__ReadRowGroups1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups1, 2}, 
+		{ "_arrow_parquet___arrow___FileReader__ReadRowGroups2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups2, 3}, 
+		{ "_arrow_parquet___arrow___FileReader__num_rows", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_rows, 1}, 
+		{ "_arrow_parquet___arrow___FileReader__num_columns", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_columns, 1}, 
+		{ "_arrow_parquet___arrow___FileReader__num_row_groups", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_row_groups, 1}, 
+		{ "_arrow_parquet___arrow___FileReader__ReadColumn", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadColumn, 2}, 
+		{ "_arrow_parquet___ArrowWriterProperties___create", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___create, 3}, 
+		{ "_arrow_parquet___WriterProperties___Builder__create", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__create, 0}, 
+		{ "_arrow_parquet___WriterProperties___Builder__version", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__version, 2}, 
+		{ "_arrow_parquet___ArrowWriterProperties___Builder__set_compressions", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compressions, 3}, 
+		{ "_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels, 3}, 
+		{ "_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary, 3}, 
+		{ "_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics, 3}, 
+		{ "_arrow_parquet___ArrowWriterProperties___Builder__data_page_size", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__data_page_size, 2}, 
+		{ "_arrow_parquet___WriterProperties___Builder__build", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__build, 1}, 
+		{ "_arrow_parquet___arrow___ParquetFileWriter__Open", (DL_FUNC) &_arrow_parquet___arrow___ParquetFileWriter__Open, 4}, 
+		{ "_arrow_parquet___arrow___FileWriter__WriteTable", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__WriteTable, 3}, 
+		{ "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1}, 
+		{ "_arrow_parquet___arrow___WriteTable", (DL_FUNC) &_arrow_parquet___arrow___WriteTable, 4}, 
+		{ "_arrow_parquet___arrow___FileReader__GetSchema", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema, 1}, 
+		{ "_arrow_allocate_arrow_schema", (DL_FUNC) &_arrow_allocate_arrow_schema, 0}, 
+		{ "_arrow_delete_arrow_schema", (DL_FUNC) &_arrow_delete_arrow_schema, 1}, 
+		{ "_arrow_allocate_arrow_array", (DL_FUNC) &_arrow_allocate_arrow_array, 0}, 
+		{ "_arrow_delete_arrow_array", (DL_FUNC) &_arrow_delete_arrow_array, 1}, 
+		{ "_arrow_allocate_arrow_array_stream", (DL_FUNC) &_arrow_allocate_arrow_array_stream, 0}, 
+		{ "_arrow_delete_arrow_array_stream", (DL_FUNC) &_arrow_delete_arrow_array_stream, 1}, 
+		{ "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, 
+		{ "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, 
+		{ "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, 
+		{ "_arrow_ImportField", (DL_FUNC) &_arrow_ImportField, 1}, 
+		{ "_arrow_ImportType", (DL_FUNC) &_arrow_ImportType, 1}, 
+		{ "_arrow_ImportRecordBatchReader", (DL_FUNC) &_arrow_ImportRecordBatchReader, 1}, 
+		{ "_arrow_ExportType", (DL_FUNC) &_arrow_ExportType, 2}, 
+		{ "_arrow_ExportField", (DL_FUNC) &_arrow_ExportField, 2}, 
+		{ "_arrow_ExportSchema", (DL_FUNC) &_arrow_ExportSchema, 2}, 
+		{ "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, 
+		{ "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, 
+		{ "_arrow_ExportRecordBatchReader", (DL_FUNC) &_arrow_ExportRecordBatchReader, 2}, 
+		{ "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 3}, 
+		{ "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2}, 
+		{ "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, 
+		{ "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, 
+		{ "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, 
+		{ "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, 
+		{ "_arrow_RecordBatch__RenameColumns", (DL_FUNC) &_arrow_RecordBatch__RenameColumns, 2}, 
+		{ "_arrow_RecordBatch__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_RecordBatch__ReplaceSchemaMetadata, 2}, 
+		{ "_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, 
+		{ "_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, 
+		{ "_arrow_RecordBatch__GetColumnByName", (DL_FUNC) &_arrow_RecordBatch__GetColumnByName, 2}, 
+		{ "_arrow_RecordBatch__SelectColumns", (DL_FUNC) &_arrow_RecordBatch__SelectColumns, 2}, 
+		{ "_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 3}, 
+		{ "_arrow_RecordBatch__AddColumn", (DL_FUNC) &_arrow_RecordBatch__AddColumn, 4}, 
+		{ "_arrow_RecordBatch__SetColumn", (DL_FUNC) &_arrow_RecordBatch__SetColumn, 4}, 
+		{ "_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2}, 
+		{ "_arrow_RecordBatch__column_name", (DL_FUNC) &_arrow_RecordBatch__column_name, 2}, 
+		{ "_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1}, 
+		{ "_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2}, 
+		{ "_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3}, 
+		{ "_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1}, 
+		{ "_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2}, 
+		{ "_arrow_RecordBatch__from_arrays", (DL_FUNC) &_arrow_RecordBatch__from_arrays, 2}, 
+		{ "_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, 
+		{ "_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, 
+		{ "_arrow_RecordBatchReader__batches", (DL_FUNC) &_arrow_RecordBatchReader__batches, 1}, 
+		{ "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, 
+		{ "_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, 
+		{ "_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, 
+		{ "_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, 
+		{ "_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, 
+		{ "_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, 
+		{ "_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, 
+		{ "_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, 
+		{ "_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, 
+		{ "_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2}, 
+		{ "_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, 
+		{ "_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 4}, 
+		{ "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4}, 
+		{ "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2}, 
+		{ "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1}, 
+		{ "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, 
+		{ "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2}, 
+		{ "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1}, 
+		{ "_arrow_MakeArrayFromScalar", (DL_FUNC) &_arrow_MakeArrayFromScalar, 2}, 
+		{ "_arrow_Scalar__is_valid", (DL_FUNC) &_arrow_Scalar__is_valid, 1}, 
+		{ "_arrow_Scalar__type", (DL_FUNC) &_arrow_Scalar__type, 1}, 
+		{ "_arrow_Scalar__Equals", (DL_FUNC) &_arrow_Scalar__Equals, 2}, 
+		{ "_arrow_Scalar__ApproxEquals", (DL_FUNC) &_arrow_Scalar__ApproxEquals, 2}, 
+		{ "_arrow_schema_", (DL_FUNC) &_arrow_schema_, 1}, 
+		{ "_arrow_Schema__ToString", (DL_FUNC) &_arrow_Schema__ToString, 1}, 
+		{ "_arrow_Schema__num_fields", (DL_FUNC) &_arrow_Schema__num_fields, 1}, 
+		{ "_arrow_Schema__field", (DL_FUNC) &_arrow_Schema__field, 2}, 
+		{ "_arrow_Schema__AddField", (DL_FUNC) &_arrow_Schema__AddField, 3}, 
+		{ "_arrow_Schema__SetField", (DL_FUNC) &_arrow_Schema__SetField, 3}, 
+		{ "_arrow_Schema__RemoveField", (DL_FUNC) &_arrow_Schema__RemoveField, 2}, 
+		{ "_arrow_Schema__GetFieldByName", (DL_FUNC) &_arrow_Schema__GetFieldByName, 2}, 
+		{ "_arrow_Schema__fields", (DL_FUNC) &_arrow_Schema__fields, 1}, 
+		{ "_arrow_Schema__field_names", (DL_FUNC) &_arrow_Schema__field_names, 1}, 
+		{ "_arrow_Schema__HasMetadata", (DL_FUNC) &_arrow_Schema__HasMetadata, 1}, 
+		{ "_arrow_Schema__metadata", (DL_FUNC) &_arrow_Schema__metadata, 1}, 
+		{ "_arrow_Schema__WithMetadata", (DL_FUNC) &_arrow_Schema__WithMetadata, 2}, 
+		{ "_arrow_Schema__serialize", (DL_FUNC) &_arrow_Schema__serialize, 1}, 
+		{ "_arrow_Schema__Equals", (DL_FUNC) &_arrow_Schema__Equals, 3}, 
+		{ "_arrow_arrow__UnifySchemas", (DL_FUNC) &_arrow_arrow__UnifySchemas, 1}, 
+		{ "_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1}, 
+		{ "_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, 
+		{ "_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, 
+		{ "_arrow_Table__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_Table__ReplaceSchemaMetadata, 2}, 
+		{ "_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, 
+		{ "_arrow_Table__field", (DL_FUNC) &_arrow_Table__field, 2}, 
+		{ "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, 
+		{ "_arrow_Table__ColumnNames", (DL_FUNC) &_arrow_Table__ColumnNames, 1}, 
+		{ "_arrow_Table__RenameColumns", (DL_FUNC) &_arrow_Table__RenameColumns, 2}, 
+		{ "_arrow_Table__Slice1", (DL_FUNC) &_arrow_Table__Slice1, 2}, 
+		{ "_arrow_Table__Slice2", (DL_FUNC) &_arrow_Table__Slice2, 3}, 
+		{ "_arrow_Table__Equals", (DL_FUNC) &_arrow_Table__Equals, 3}, 
+		{ "_arrow_Table__Validate", (DL_FUNC) &_arrow_Table__Validate, 1}, 
+		{ "_arrow_Table__ValidateFull", (DL_FUNC) &_arrow_Table__ValidateFull, 1}, 
+		{ "_arrow_Table__GetColumnByName", (DL_FUNC) &_arrow_Table__GetColumnByName, 2}, 
+		{ "_arrow_Table__RemoveColumn", (DL_FUNC) &_arrow_Table__RemoveColumn, 2}, 
+		{ "_arrow_Table__AddColumn", (DL_FUNC) &_arrow_Table__AddColumn, 4}, 
+		{ "_arrow_Table__SetColumn", (DL_FUNC) &_arrow_Table__SetColumn, 4}, 
+		{ "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2}, 
+		{ "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, 
+		{ "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, 
+		{ "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, 
+		{ "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, 
+		{ "_arrow_GetIOThreadPoolCapacity", (DL_FUNC) &_arrow_GetIOThreadPoolCapacity, 0}, 
+		{ "_arrow_SetIOThreadPoolCapacity", (DL_FUNC) &_arrow_SetIOThreadPoolCapacity, 1}, 
+		{ "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, 
+		{ "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, 
+		{ "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, 
 		{NULL, NULL, 0}
 };
 extern "C" void R_init_arrow(DllInfo* dll){
@@ -7490,3 +7491,5 @@ extern "C" void R_init_arrow(DllInfo* dll){
   #endif
 
 }
+
+
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index 3d0bbca63d2..cab1a09c6ae 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -57,12 +57,22 @@ std::shared_ptr<compute::ExecNode> MakeExecNodeOrStop(
 // [[arrow::export]]
 std::shared_ptr<arrow::Table> ExecPlan_run(
     const std::shared_ptr<compute::ExecPlan>& plan,
-    const std::shared_ptr<compute::ExecNode>& final_node) {
+    const std::shared_ptr<compute::ExecNode>& final_node, cpp11::list sort_options) {
   // For now, don't require R to construct SinkNodes.
   // Instead, just pass the node we should collect as an argument.
   arrow::AsyncGenerator<arrow::util::optional<compute::ExecBatch>> sink_gen;
-  MakeExecNodeOrStop("sink", plan.get(), {final_node.get()},
-                     compute::SinkNodeOptions{&sink_gen});
+
+  // Sorting uses a different sink node; there is no general sort yet
+  if (sort_options.size() > 0) {
+    MakeExecNodeOrStop("order_by_sink", plan.get(), {final_node.get()},
+                       compute::OrderBySinkNodeOptions{
+                           *std::dynamic_pointer_cast<compute::SortOptions>(
+                               make_compute_options("sort_indices", sort_options)),
+                           &sink_gen});
+  } else {
+    MakeExecNodeOrStop("sink", plan.get(), {final_node.get()},
+                       compute::SinkNodeOptions{&sink_gen});
+  }
 
   StopIfNotOk(plan->Validate());
   StopIfNotOk(plan->StartProducing());
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index b027dc98702..57569be50fe 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -133,7 +133,7 @@ test_that("Simple interface for datasets", {
 
   # Collecting virtual partition column works
   expect_equal(
-    collect(ds) %>% pull(part),
+    ds %>% pull(part),
     c(rep(1, 10), rep(2, 10))
   )
 })
@@ -1728,4 +1728,4 @@ test_that("Error if no format specified and files are not parquet", {
       "Did you mean to specify a 'format'"
     )
   )
-})
+})
\ No newline at end of file
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 1aa30654495..e0c5b10d5be 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -85,7 +85,6 @@ test_that("Group by sum on dataset", {
     input %>%
       group_by(some_grouping) %>%
       summarize(total = sum(int, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -94,7 +93,6 @@ test_that("Group by sum on dataset", {
     input %>%
       group_by(some_grouping) %>%
       summarize(total = sum(int * 4, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -103,7 +101,6 @@ test_that("Group by sum on dataset", {
     input %>%
       group_by(some_grouping) %>%
       summarize(total = sum(int)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl,
   )
@@ -195,7 +192,6 @@ test_that("Group by any/all", {
     input %>%
       group_by(some_grouping) %>%
       summarize(any(lgl, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -203,7 +199,6 @@ test_that("Group by any/all", {
     input %>%
       group_by(some_grouping) %>%
       summarize(all(lgl, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -211,7 +206,6 @@ test_that("Group by any/all", {
     input %>%
       group_by(some_grouping) %>%
       summarize(any(lgl, na.rm = FALSE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -219,7 +213,6 @@ test_that("Group by any/all", {
     input %>%
       group_by(some_grouping) %>%
       summarize(all(lgl, na.rm = FALSE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -229,7 +222,6 @@ test_that("Group by any/all", {
       mutate(has_words = nchar(verses) < 0) %>%
       group_by(some_grouping) %>%
       summarize(any(has_words, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -238,7 +230,6 @@ test_that("Group by any/all", {
       mutate(has_words = nchar(verses) < 0) %>%
       group_by(some_grouping) %>%
       summarize(all(has_words, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -246,7 +237,6 @@ test_that("Group by any/all", {
     input %>%
       group_by(some_grouping) %>%
       summarize(has_words = all(nchar(verses) < 0, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -291,7 +281,6 @@ test_that("Filter and aggregate", {
       filter(some_grouping == 2) %>%
       group_by(some_grouping) %>%
       summarize(total = sum(int, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -301,7 +290,6 @@ test_that("Filter and aggregate", {
       filter(int > 5) %>%
       group_by(some_grouping) %>%
       summarize(total = sum(int, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -316,7 +304,7 @@ test_that("Expressions on aggregations", {
         any = any(lgl),
         all = all(lgl)
       ) %>%
-      arrange(some_grouping) %>%
+      collect() %>%
       transmute(some = any & !all) %>%
       collect(),
     tbl
@@ -327,7 +315,6 @@ test_that("Expressions on aggregations", {
     input %>%
       group_by(some_grouping) %>%
       summarize(any(lgl) & !all(lgl)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )

From d75f4bdb43bfb2f38e1f9acc9737d0930b7bdda5 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Tue, 24 Aug 2021 16:53:05 -0400
Subject: [PATCH 03/27] Cleanup

---
 r/R/query-engine.R | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index c595dd27df3..ec5ff637211 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -99,12 +99,6 @@ ExecPlan <- R6Class("ExecPlan",
         }
       }
 
-      # tab <- tab[
-      #   tab$SortIndices(names(x$arrange_vars), x$arrange_desc),
-      #   names(x$selected_columns), # this omits x$temp_columns from the result
-      #   drop = FALSE
-      # ]
-
       # Apply sorting: this is currently not an ExecNode itself, it is a
       # sink node option.
       # TODO: error if doing a subsequent operation that would throw away sorting!
@@ -152,4 +146,4 @@ ExecNode <- R6Class("ExecNode",
       ExecNode_Aggregate(self, options, target_names, out_field_names, key_names)
     }
   )
-)
\ No newline at end of file
+)

From 7e9aa033685649fc66e3508047253918169768be Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Tue, 24 Aug 2021 17:02:57 -0400
Subject: [PATCH 04/27] Add some comments

---
 r/R/query-engine.R | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index ec5ff637211..824ad2ea22c 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -58,13 +58,15 @@ ExecPlan <- R6Class("ExecPlan",
       ExecNode_Scan(self, dataset, filter, colnames %||% character(0))
     },
     Build = function(.data) {
+      # This method takes an arrow_dplyr_query and chains together the
+      # ExecNodes that they produce. It does not evaluate them--that is Run().
       group_vars <- dplyr::group_vars(.data)
       grouped <- length(group_vars) > 0
 
       # Collect the target names first because we have to add back the group vars
       target_names <- names(.data)
       .data <- ensure_group_vars(.data)
-      .data <- ensure_arrange_vars(.data) # this sets x$temp_columns
+      .data <- ensure_arrange_vars(.data) # this sets .data$temp_columns
 
       node <- self$Scan(.data)
       # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again
@@ -96,6 +98,12 @@ ExecPlan <- R6Class("ExecPlan",
           node <- node$Project(
             make_field_refs(c(group_vars, names(.data$aggregations)))
           )
+          # Add sorting instructions for the rows too to match dplyr
+          # (see below about why sorting isn't itself a Node)
+          node$sort <- list(
+            names = group_vars,
+            orders = rep(0L, length(group_vars))
+          )
         }
       }
 
@@ -108,11 +116,6 @@ ExecPlan <- R6Class("ExecPlan",
           orders = as.integer(.data$arrange_desc),
           temp_columns = names(.data$temp_columns)
         )
-      } else if (length(.data$aggregations) && grouped) {
-        node$sort <- list(
-          names = group_vars,
-          orders = rep(0L, length(group_vars))
-        )
       }
       node
     },
@@ -129,6 +132,9 @@ ExecPlan$create <- function(use_threads = option_use_threads()) {
 ExecNode <- R6Class("ExecNode",
   inherit = ArrowObject,
   public = list(
+    # `sort` is a slight hack to be able to keep around arrange() params,
+    # which don't currently yield their own ExecNode but rather are consumed
+    # in the SinkNode (in ExecPlan$run())
     sort = NULL,
     Project = function(cols) {
       if (length(cols)) {
@@ -146,4 +152,4 @@ ExecNode <- R6Class("ExecNode",
       ExecNode_Aggregate(self, options, target_names, out_field_names, key_names)
     }
   )
-)
+)
\ No newline at end of file

From 2cb3ea600a9ea5da77e716c85d427e015601befb Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 26 Aug 2021 16:03:47 -0400
Subject: [PATCH 05/27] summarize now doesn't evaluate

---
 r/R/dplyr-summarize.R                   |  5 ++---
 r/tests/testthat/test-dplyr-aggregate.R | 15 +++++----------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 9a0cfc5bf91..5ff2ffc5399 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -81,6 +81,5 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
   names(inputs) <- as.character(seq_along(inputs))
   .data$selected_columns <- inputs
 
-  # Eventually, we will return .data here if (dataset) but do it eagerly now
-  do_exec_plan(.data)
-}
+  .data
+}
\ No newline at end of file
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index e0c5b10d5be..3f848f22a1e 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -51,7 +51,6 @@ test_that("summarize", {
 })
 
 test_that("summarize() doesn't evaluate eagerly", {
-  skip("TODO")
   expect_s3_class(
     Table$create(tbl) %>%
       summarize(total = sum(int)),
@@ -60,7 +59,7 @@ test_that("summarize() doesn't evaluate eagerly", {
   expect_r6_class(
     Table$create(tbl) %>%
       summarize(total = sum(int)) %>%
-      collect(),
+      compute(),
     "ArrowTabular"
   )
 })
@@ -111,7 +110,6 @@ test_that("Group by mean on dataset", {
     input %>%
       group_by(some_grouping) %>%
       summarize(mean = mean(int, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -120,7 +118,6 @@ test_that("Group by mean on dataset", {
     input %>%
       group_by(some_grouping) %>%
       summarize(mean = mean(int, na.rm = FALSE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -131,7 +128,6 @@ test_that("Group by sd on dataset", {
     input %>%
       group_by(some_grouping) %>%
       summarize(sd = sd(int, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -141,7 +137,6 @@ test_that("Group by sd on dataset", {
     input %>%
       group_by(some_grouping) %>%
       summarize(sd = sd(int, na.rm = FALSE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -152,7 +147,6 @@ test_that("Group by var on dataset", {
     input %>%
       group_by(some_grouping) %>%
       summarize(var = var(int, na.rm = TRUE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -162,7 +156,6 @@ test_that("Group by var on dataset", {
     input %>%
       group_by(some_grouping) %>%
       summarize(var = var(int, na.rm = FALSE)) %>%
-      arrange(some_grouping) %>%
       collect(),
     tbl
   )
@@ -304,8 +297,10 @@ test_that("Expressions on aggregations", {
         any = any(lgl),
         all = all(lgl)
       ) %>%
-      collect() %>%
-      transmute(some = any & !all) %>%
+      compute() %>%
+      ungroup() %>% # TODO: loosen the restriction on mutate after group_by
+      mutate(some = any & !all) %>%
+      select(some_grouping, some) %>%
       collect(),
     tbl
   )

From 9a2cde5a45a65b5b8d072dc035dff724bde16077 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 26 Aug 2021 16:24:49 -0400
Subject: [PATCH 06/27] Make dataset tests not assume row order

---
 r/tests/testthat/test-dataset.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 57569be50fe..f7a41434e03 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -133,7 +133,7 @@ test_that("Simple interface for datasets", {
 
   # Collecting virtual partition column works
   expect_equal(
-    ds %>% pull(part),
+    ds %>% arrange(part) %>% pull(part),
     c(rep(1, 10), rep(2, 10))
   )
 })
@@ -348,7 +348,7 @@ test_that("IPC/Feather format data", {
 
   # Collecting virtual partition column works
   expect_equal(
-    collect(ds) %>% pull(part),
+    ds %>% arrange(part) %>% pull(part),
     c(rep(3, 10), rep(4, 10))
   )
 })
@@ -376,7 +376,7 @@ test_that("CSV dataset", {
   )
   # Collecting virtual partition column works
   expect_equal(
-    collect(ds) %>% pull(part),
+    collect(ds) %>% arrange(part) %>% pull(part),
     c(rep(5, 10), rep(6, 10))
   )
 })
@@ -804,7 +804,7 @@ test_that("filter scalar validation doesn't crash (ARROW-7772)", {
 test_that("collect() on Dataset works (if fits in memory)", {
   skip_if_not_available("parquet")
   expect_equal(
-    collect(open_dataset(dataset_dir)),
+    collect(open_dataset(dataset_dir)) %>% arrange(int),
     rbind(df1, df2)
   )
 })

From a1cd90f5e1de2c2c2e6dbb6e7981b10af68e8baa Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 26 Aug 2021 16:52:51 -0400
Subject: [PATCH 07/27] Add support for derived grouping columns in summarize

---
 r/R/dplyr-summarize.R                   |  2 +-
 r/tests/testthat/test-dplyr-aggregate.R | 40 ++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 5ff2ffc5399..764db6d7146 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -79,7 +79,7 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
   # This is essentially a projection, and the column names don't matter
   # (but must exist)
   names(inputs) <- as.character(seq_along(inputs))
-  .data$selected_columns <- inputs
+  .data$selected_columns <- c(inputs, .data$selected_columns[.data$group_by_vars])
 
   .data
 }
\ No newline at end of file
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 3f848f22a1e..21b4501e3cd 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -161,7 +161,7 @@ test_that("Group by var on dataset", {
   )
 })
 
-test_that("n()", {
+test_that("Group by any/all", {
   withr::local_options(list(arrow.debug = TRUE))
   expect_dplyr_equal(
     input %>%
@@ -288,6 +288,44 @@ test_that("Filter and aggregate", {
   )
 })
 
+test_that("Group by edge cases", {
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping * 2) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      group_by(alt = some_grouping * 2) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Do things after summarize", {
+  group2_sum <- tbl %>%
+    group_by(some_grouping) %>%
+    filter(int > 5) %>%
+    summarize(total = sum(int, na.rm = TRUE)) %>%
+    pull() %>%
+    tail(1)
+
+  skip("WIP")
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      filter(int > 5) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      filter(total == group2_sum) %>%
+      collect() %>% print(),
+    tbl
+  )
+})
+
 test_that("Expressions on aggregations", {
   # This is what it effectively is
   expect_dplyr_equal(

From 90612b5f6c77b55400ef623b9411a3db2b8c7482 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 27 Aug 2021 13:24:55 -0400
Subject: [PATCH 08/27] summarize() collapses the query and we can do things on
 it after

---
 r/NAMESPACE                             |  1 +
 r/R/arrow-package.R                     |  2 +-
 r/R/arrowExports.R                      |  5 +-
 r/R/dplyr-functions.R                   | 15 +++++
 r/R/dplyr-summarize.R                   |  9 ++-
 r/R/dplyr.R                             | 38 +++++++++--
 r/R/expression.R                        |  3 +
 r/R/query-engine.R                      | 15 ++++-
 r/src/arrowExports.cpp                  | 17 +++++
 r/src/expression.cpp                    |  5 ++
 r/tests/testthat/test-dplyr-aggregate.R |  4 +-
 r/tests/testthat/test-dplyr-collapse.R  | 84 +++++++++++++++++++++++++
 12 files changed, 180 insertions(+), 18 deletions(-)
 create mode 100644 r/tests/testthat/test-dplyr-collapse.R

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 8ce6d162eb0..5e78d04de52 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -291,6 +291,7 @@ importFrom(bit64,print.integer64)
 importFrom(bit64,str.integer64)
 importFrom(methods,as)
 importFrom(purrr,as_mapper)
+importFrom(purrr,imap)
 importFrom(purrr,imap_chr)
 importFrom(purrr,keep)
 importFrom(purrr,map)
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 537eebb1b1d..04f01faf268 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -17,7 +17,7 @@
 
 #' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
 #' @importFrom R6 R6Class
-#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap_chr
+#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap imap_chr
 #' @importFrom assertthat assert_that is.string
 #' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos
 #' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index ce6d2e872d4..b852a3d8ca9 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -816,6 +816,10 @@ FixedSizeListType__list_size <- function(type) {
   .Call(`_arrow_FixedSizeListType__list_size`, type)
 }
 
+compute___expr__equals <- function(lhs, rhs) {
+  .Call(`_arrow_compute___expr__equals`, lhs, rhs)
+}
+
 compute___expr__call <- function(func_name, argument_list, options) {
   .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
 }
@@ -1767,4 +1771,3 @@ SetIOThreadPoolCapacity <- function(threads) {
 Array__infer_type <- function(x) {
   .Call(`_arrow_Array__infer_type`, x)
 }
-
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index e535546dd1b..2247d22f368 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -840,3 +840,18 @@ agg_funcs$n <- function() {
     options = list()
   )
 }
+
+output_type <- function(fun, input_type) {
+    # These are quick and dirty heuristics.
+    if (fun %in% c("any", "all")) {
+        bool()
+    } else if (fun %in% "sum") {
+        # It may upcast to a bigger type but this is close enough
+        input_type
+    } else if (fun %in% c("mean", "stddev", "variance")) {
+        float64()
+    } else {
+        # Just so things don't error, assume the resulting type is the same
+        input_type
+    }
+}
\ No newline at end of file
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 764db6d7146..8ca4757a4e2 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -72,14 +72,13 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
     # Should we: mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
   }
 
+  # TODO: Should summarize just record the aggregations and leave this projection etc. to do_exec_plan?
   # Now, from that, split out the data (expressions) and options
   .data$aggregations <- lapply(results, function(x) x[c("fun", "options")])
-
   inputs <- lapply(results, function(x) x$data)
-  # This is essentially a projection, and the column names don't matter
-  # (but must exist)
-  names(inputs) <- as.character(seq_along(inputs))
+  # TODO: validate that none of names(inputs) are the same as names(group_by_vars)
+  # dplyr does not error on this but the result it gives isn't great
   .data$selected_columns <- c(inputs, .data$selected_columns[.data$group_by_vars])
 
-  .data
+  do_collapse(.data)
 }
\ No newline at end of file
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index c3029a114c3..f7e170415d1 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -43,18 +43,21 @@ arrow_dplyr_query <- function(.data) {
     ))
   }
 
+  .adq(.data)
+}
+
+.adq <- function(.data) {
+  if (!inherits(.data, c("Dataset", "arrow_dplyr_query"))) {
+    .data <- InMemoryDataset$create(.data)
+  }
   structure(
     list(
-      .data = if (inherits(.data, "Dataset")) {
-        .data$clone()
-      } else {
-        InMemoryDataset$create(.data)
-      },
+      .data = .data,
       # selected_columns is a named list:
       # * contents are references/expressions pointing to the data
       # * names are the names they should be in the end (i.e. this
       #   records any renaming)
-      selected_columns = make_field_refs(names(.data)),
+      selected_columns = make_field_refs(names(.data$schema)),
       # filtered_rows will be an Expression
       filtered_rows = TRUE,
       # group_by_vars is a character vector of columns (as renamed)
@@ -75,6 +78,29 @@ arrow_dplyr_query <- function(.data) {
   )
 }
 
+do_collapse <- function(.data) {
+  .data$schema <- implicit_schema(.data)
+  .adq(.data)
+}
+
+implicit_schema <- function(.data) {
+  # c(.data$group_by_vars, names(.data$aggregations))
+  .data <- ensure_group_vars(.data)
+  old_schm <- .data$.data$schema
+  new_fields <- map(.data$selected_columns, ~ .$type(old_schm))
+  if (is.null(.data$aggregations)) {
+    return(schema(!!!new_fields))
+  }
+  # * Put group_by_vars first (this can't be done by summarize, they have to be last per the aggregate node signature, and they get projected to this order after aggregation)
+  # * Infer the output types from the aggregations
+  group_fields <- new_fields[.data$group_by_vars]
+  agg_fields <- imap(
+    new_fields[setdiff(names(new_fields), .data$group_by_vars)],
+    ~ output_type(.data$aggregations[[.y]][["fun"]], .x)
+  )
+  schema(!!!c(group_fields, agg_fields))
+}
+
 make_field_refs <- function(field_names) {
   set_names(lapply(field_names, Expression$field_ref), field_names)
 }
diff --git a/r/R/expression.R b/r/R/expression.R
index aa9af9270c9..82e21ccf2e1 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -125,6 +125,9 @@ Expression <- R6Class("Expression",
   inherit = ArrowObject,
   public = list(
     ToString = function() compute___expr__ToString(self),
+    Equals = function(other, ...) {
+      inherits(other, "Expression") && compute___expr__equals(self, other)
+    },
     # TODO: Implement type determination without storing
     # schemas in Expression objects (ARROW-13186)
     schema = NULL,
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 824ad2ea22c..58b3f1ab28e 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -68,13 +68,22 @@ ExecPlan <- R6Class("ExecPlan",
       .data <- ensure_group_vars(.data)
       .data <- ensure_arrange_vars(.data) # this sets .data$temp_columns
 
-      node <- self$Scan(.data)
+      if (inherits(.data$.data, "arrow_dplyr_query")) {
+        # We have a nested query. Recurse.
+        node <- self$Build(.data$.data)
+      } else {
+        node <- self$Scan(.data)
+      }
+
       # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again
       if (inherits(.data$filtered_rows, "Expression")) {
         node <- node$Filter(.data$filtered_rows)
       }
-      # If any columns are derived we need to Project (otherwise this may be no-op)
-      node <- node$Project(c(.data$selected_columns, .data$temp_columns))
+      # If any columns are derived, reordered, or renamed we need to Project
+      projection <- c(.data$selected_columns, .data$temp_columns)
+      if (!isTRUE(all.equal(projection, make_field_refs(names(.data$.data$schema))))) {
+        node <- node$Project(projection)
+      }
 
       if (length(.data$aggregations)) {
         if (grouped) {
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index de8ca36af6c..f33b81c08f0 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -3174,6 +3174,22 @@ extern "C" SEXP _arrow_FixedSizeListType__list_size(SEXP type_sexp){
 }
 #endif
 
+// expression.cpp
+#if defined(ARROW_R_WITH_ARROW)
+bool compute___expr__equals(const std::shared_ptr<compute::Expression>& lhs, const std::shared_ptr<compute::Expression>& rhs);
+extern "C" SEXP _arrow_compute___expr__equals(SEXP lhs_sexp, SEXP rhs_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type lhs(lhs_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type rhs(rhs_sexp);
+	return cpp11::as_sexp(compute___expr__equals(lhs, rhs));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_compute___expr__equals(SEXP lhs_sexp, SEXP rhs_sexp){
+	Rf_error("Cannot call compute___expr__equals(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // expression.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<compute::Expression> compute___expr__call(std::string func_name, cpp11::list argument_list, cpp11::list options);
@@ -7240,6 +7256,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, 
 		{ "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, 
 		{ "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, 
+		{ "_arrow_compute___expr__equals", (DL_FUNC) &_arrow_compute___expr__equals, 2}, 
 		{ "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3}, 
 		{ "_arrow_field_names_in_expression", (DL_FUNC) &_arrow_field_names_in_expression, 1}, 
 		{ "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, 
diff --git a/r/src/expression.cpp b/r/src/expression.cpp
index 3fcba46e911..97a8a746bba 100644
--- a/r/src/expression.cpp
+++ b/r/src/expression.cpp
@@ -27,6 +27,11 @@ namespace compute = ::arrow::compute;
 std::shared_ptr<compute::FunctionOptions> make_compute_options(std::string func_name,
                                                                cpp11::list options);
 
+// [[arrow::export]]
+bool compute___expr__equals(const std::shared_ptr<compute::Expression>& lhs,
+                            const std::shared_ptr<compute::Expression>& rhs) {
+  return lhs->Equals(*rhs);
+}
 // [[arrow::export]]
 std::shared_ptr<compute::Expression> compute___expr__call(std::string func_name,
                                                           cpp11::list argument_list,
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
index 21b4501e3cd..32ee3cd20ac 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -314,14 +314,14 @@ test_that("Do things after summarize", {
     pull() %>%
     tail(1)
 
-  skip("WIP")
   expect_dplyr_equal(
     input %>%
       group_by(some_grouping) %>%
       filter(int > 5) %>%
       summarize(total = sum(int, na.rm = TRUE)) %>%
       filter(total == group2_sum) %>%
-      collect() %>% print(),
+      mutate(extra = total * 5) %>%
+      collect(),
     tbl
   )
 })
diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
new file mode 100644
index 00000000000..84287976ced
--- /dev/null
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -0,0 +1,84 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr)
+library(stringr)
+
+tbl <- example_data
+# Add some better string data
+tbl$verses <- verses[[1]]
+# c(" a ", "  b  ", "   c   ", ...) increasing padding
+# nchar =   3  5  7  9 11 13 15 17 19 21
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
+tbl$some_grouping <- rep(c(1, 2), 5)
+
+tab <- Table$create(tbl)
+
+test_that("implicit_schema with select", {
+  expect_equal(
+    tab %>%
+      select(int, lgl) %>%
+      implicit_schema(),
+    schema(int = int32(), lgl = bool())
+  )
+})
+
+test_that("implicit_schema with rename", {
+  expect_equal(
+    tab %>%
+      select(numbers = int, lgl) %>%
+      implicit_schema(),
+    schema(numbers = int32(), lgl = bool())
+  )
+})
+
+test_that("implicit_schema with mutate", {
+  expect_equal(
+    tab %>%
+      transmute(
+        numbers = int * 4,
+        words = as.character(int)
+      ) %>%
+      implicit_schema(),
+    schema(numbers = float64(), words = utf8())
+  )
+})
+
+test_that("implicit_schema with summarize", {
+  expect_equal(
+    tab %>%
+      summarize(
+        avg = mean(int)
+      ) %>%
+      implicit_schema(),
+    schema(avg = float64())
+  )
+})
+
+test_that("implicit_schema with group_by summarize", {
+  expect_equal(
+    tab %>%
+      group_by(some_grouping) %>%
+      summarize(
+        avg = mean(int * 5L)
+      ) %>%
+      implicit_schema(),
+    schema(some_grouping = float64(), avg = float64())
+  )
+})
\ No newline at end of file

From f6cf638cd2cec9789bfa7c0a4e3bcec6e36ea65e Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 27 Aug 2021 13:32:03 -0400
Subject: [PATCH 09/27] Rename test file

---
 .../testthat/{test-dplyr-aggregate.R => test-dplyr-summarize.R} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename r/tests/testthat/{test-dplyr-aggregate.R => test-dplyr-summarize.R} (99%)

diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-summarize.R
similarity index 99%
rename from r/tests/testthat/test-dplyr-aggregate.R
rename to r/tests/testthat/test-dplyr-summarize.R
index 32ee3cd20ac..ec9d281e053 100644
--- a/r/tests/testthat/test-dplyr-aggregate.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -343,7 +343,7 @@ test_that("Expressions on aggregations", {
     tbl
   )
   # More concisely:
-  skip("Not implemented")
+  skip("TODO: ARROW-13778")
   expect_dplyr_equal(
     input %>%
       group_by(some_grouping) %>%

From bd6e3632f7183fc00987634e4e701a03458360e4 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 27 Aug 2021 14:11:07 -0400
Subject: [PATCH 10/27] Refactor and fix tests

---
 r/R/dplyr-mutate.R                   | 15 ++++++----
 r/R/dplyr-summarize.R                | 24 ++++++----------
 r/R/dplyr.R                          | 25 +++++++++--------
 r/R/query-engine.R                   | 41 ++++++++++++++++++++--------
 r/tests/testthat/test-dplyr-filter.R |  2 +-
 5 files changed, 62 insertions(+), 45 deletions(-)

diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R
index f19505c1958..f2df4a078a4 100644
--- a/r/R/dplyr-mutate.R
+++ b/r/R/dplyr-mutate.R
@@ -24,7 +24,7 @@ mutate.arrow_dplyr_query <- function(.data,
                                      .before = NULL,
                                      .after = NULL) {
   call <- match.call()
-  exprs <- quos(...)
+  exprs <- ensure_named_exprs(quos(...))
 
   .keep <- match.arg(.keep)
   .before <- enquo(.before)
@@ -45,11 +45,6 @@ mutate.arrow_dplyr_query <- function(.data,
     return(abandon_ship(call, .data, "mutate() on grouped data not supported in Arrow"))
   }
 
-  # Check for unnamed expressions and fix if any
-  unnamed <- !nzchar(names(exprs))
-  # Deparse and take the first element in case they're long expressions
-  names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
-
   mask <- arrow_mask(.data)
   results <- list()
   for (i in seq_along(exprs)) {
@@ -133,3 +128,11 @@ check_transmute_args <- function(..., .keep, .before, .after) {
   }
   enquos(...)
 }
+
+ensure_named_exprs <- function(exprs) {
+  # Check for unnamed expressions and fix if any
+  unnamed <- !nzchar(names(exprs))
+  # Deparse and take the first element in case they're long expressions
+  names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
+  exprs
+}
\ No newline at end of file
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 8ca4757a4e2..5fb22200a04 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -47,11 +47,7 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
     # ARROW-13550
     abort("`summarize()` with `.groups` argument not supported in Arrow")
   }
-  exprs <- quos(...)
-  # Check for unnamed expressions and fix if any
-  unnamed <- !nzchar(names(exprs))
-  # Deparse and take the first element in case they're long expressions
-  names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
+  exprs <- ensure_named_exprs(quos(...))
 
   mask <- arrow_mask(.data, aggregation = TRUE)
 
@@ -68,17 +64,15 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
       )
       stop(msg, call. = FALSE)
     }
-    # Put it in the data mask too?
-    # Should we: mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
   }
 
-  # TODO: Should summarize just record the aggregations and leave this projection etc. to do_exec_plan?
-  # Now, from that, split out the data (expressions) and options
-  .data$aggregations <- lapply(results, function(x) x[c("fun", "options")])
-  inputs <- lapply(results, function(x) x$data)
-  # TODO: validate that none of names(inputs) are the same as names(group_by_vars)
-  # dplyr does not error on this but the result it gives isn't great
-  .data$selected_columns <- c(inputs, .data$selected_columns[.data$group_by_vars])
-
+  .data$aggregations <- results
   do_collapse(.data)
+}
+
+summarize_projection <- function(.data) {
+  c(
+    map(.data$aggregations, ~ .$data),
+    .data$selected_columns[.data$group_by_vars]
+  )
 }
\ No newline at end of file
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index f7e170415d1..a93a0020f82 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -87,18 +87,21 @@ implicit_schema <- function(.data) {
   # c(.data$group_by_vars, names(.data$aggregations))
   .data <- ensure_group_vars(.data)
   old_schm <- .data$.data$schema
-  new_fields <- map(.data$selected_columns, ~ .$type(old_schm))
+
   if (is.null(.data$aggregations)) {
-    return(schema(!!!new_fields))
+    new_fields <- map(.data$selected_columns, ~ .$type(old_schm))
+  } else {
+    new_fields <- map(summarize_projection(.data), ~ .$type(old_schm))
+    # * Put group_by_vars first (this can't be done by summarize, they have to be last per the aggregate node signature, and they get projected to this order after aggregation)
+    # * Infer the output types from the aggregations
+    group_fields <- new_fields[.data$group_by_vars]
+    agg_fields <- imap(
+      new_fields[setdiff(names(new_fields), .data$group_by_vars)],
+      ~ output_type(.data$aggregations[[.y]][["fun"]], .x)
+    )
+    new_fields <- c(group_fields, agg_fields)
   }
-  # * Put group_by_vars first (this can't be done by summarize, they have to be last per the aggregate node signature, and they get projected to this order after aggregation)
-  # * Infer the output types from the aggregations
-  group_fields <- new_fields[.data$group_by_vars]
-  agg_fields <- imap(
-    new_fields[setdiff(names(new_fields), .data$group_by_vars)],
-    ~ output_type(.data$aggregations[[.y]][["fun"]], .x)
-  )
-  schema(!!!c(group_fields, agg_fields))
+  schema(!!!new_fields)
 }
 
 make_field_refs <- function(field_names) {
@@ -233,4 +236,4 @@ abandon_ship <- function(call, .data, msg) {
   eval.parent(call, 2)
 }
 
-query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
+query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
\ No newline at end of file
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 58b3f1ab28e..462d58cece0 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -79,13 +79,14 @@ ExecPlan <- R6Class("ExecPlan",
       if (inherits(.data$filtered_rows, "Expression")) {
         node <- node$Filter(.data$filtered_rows)
       }
-      # If any columns are derived, reordered, or renamed we need to Project
-      projection <- c(.data$selected_columns, .data$temp_columns)
-      if (!isTRUE(all.equal(projection, make_field_refs(names(.data$.data$schema))))) {
-        node <- node$Project(projection)
-      }
 
-      if (length(.data$aggregations)) {
+      if (!is.null(.data$aggregations)) {
+        # Project to include just the data required for each aggregation,
+        # plus group_by_vars (last)
+        # TODO: validate that none of names(aggregations) are the same as names(group_by_vars)
+        # dplyr does not error on this but the result it gives isn't great
+        node <- node$Project(summarize_projection(.data))
+
         if (grouped) {
           # We need to prefix all of the aggregation function names with "hash_"
           .data$aggregations <- lapply(.data$aggregations, function(x) {
@@ -95,8 +96,8 @@ ExecPlan <- R6Class("ExecPlan",
         }
 
         node <- node$Aggregate(
-          options = .data$aggregations,
-          target_names = target_names,
+          options = map(.data$aggregations, ~ .[c("fun", "options")]),
+          target_names = names(.data$aggregations),
           out_field_names = names(.data$aggregations),
           key_names = group_vars
         )
@@ -114,6 +115,16 @@ ExecPlan <- R6Class("ExecPlan",
             orders = rep(0L, length(group_vars))
           )
         }
+      } else {
+        # If any columns are derived, reordered, or renamed we need to Project
+        # If there are aggregations, the projection was already handled above
+        # We have to project at least once to eliminate some junk columns
+        # that the ExecPlan adds:
+        # __fragment_index, __batch_index, __last_in_fragment
+        # Presumably extraneous repeated projection of the same thing
+        # (as when we've done collapse() and not projected after) is cheap/no-op
+        projection <- c(.data$selected_columns, .data$temp_columns)
+        node <- node$Project(projection)
       }
 
       # Apply sorting: this is currently not an ExecNode itself, it is a
@@ -145,20 +156,26 @@ ExecNode <- R6Class("ExecNode",
     # which don't currently yield their own ExecNode but rather are consumed
     # in the SinkNode (in ExecPlan$run())
     sort = NULL,
+    preserve_sort = function(new_node) {
+      new_node$sort <- self$sort
+      new_node
+    },
     Project = function(cols) {
       if (length(cols)) {
         assert_is_list_of(cols, "Expression")
-        ExecNode_Project(self, cols, names(cols))
+        self$preserve_sort(ExecNode_Project(self, cols, names(cols)))
       } else {
-        ExecNode_Project(self, character(0), character(0))
+        self$preserve_sort(ExecNode_Project(self, character(0), character(0)))
       }
     },
     Filter = function(expr) {
       assert_is(expr, "Expression")
-      ExecNode_Filter(self, expr)
+      self$preserve_sort(ExecNode_Filter(self, expr))
     },
     Aggregate = function(options, target_names, out_field_names, key_names) {
-      ExecNode_Aggregate(self, options, target_names, out_field_names, key_names)
+      self$preserve_sort(
+        ExecNode_Aggregate(self, options, target_names, out_field_names, key_names)
+      )
     }
   )
 )
\ No newline at end of file
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index e56ee4be462..ea511e88f94 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -396,4 +396,4 @@ test_that("filter() with .data pronoun", {
       collect(),
     tbl
   )
-})
+})
\ No newline at end of file

From bcea9c8f953670d2744ac3bee4250c67a3c35af1 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 27 Aug 2021 14:24:58 -0400
Subject: [PATCH 11/27] Clarify comments and add todos for the collapse() work

---
 r/R/dataset-scan.R | 1 +
 r/R/dplyr.R        | 9 +++++++--
 r/R/query-engine.R | 7 +++++--
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index 615b0f945a8..3ea501ea9ec 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -82,6 +82,7 @@ Scanner$create <- function(dataset,
   }
 
   if (inherits(dataset, "arrow_dplyr_query")) {
+    # TODO: update for collapse()
     if (inherits(dataset$.data, "ArrowTabular")) {
       # To handle mutate() on Table/RecordBatch, we need to collect(as_data_frame=FALSE) now
       dataset <- dplyr::collect(dataset, as_data_frame = FALSE)
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index a93a0020f82..904792b59cc 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -78,13 +78,13 @@ arrow_dplyr_query <- function(.data) {
   )
 }
 
+# TODO: move to dplyr-collect.R
 do_collapse <- function(.data) {
   .data$schema <- implicit_schema(.data)
   .adq(.data)
 }
 
 implicit_schema <- function(.data) {
-  # c(.data$group_by_vars, names(.data$aggregations))
   .data <- ensure_group_vars(.data)
   old_schm <- .data$.data$schema
 
@@ -111,6 +111,7 @@ make_field_refs <- function(field_names) {
 #' @export
 print.arrow_dplyr_query <- function(x, ...) {
   schm <- x$.data$schema
+  # TODO: refactor this to use implicit_schema(x)
   types <- map_chr(x$selected_columns, function(expr) {
     name <- expr$field_name
     if (nzchar(name)) {
@@ -125,6 +126,7 @@ print.arrow_dplyr_query <- function(x, ...) {
     }
   })
   fields <- paste(names(types), types, sep = ": ", collapse = "\n")
+  # TODO: update for collapse()
   cat(class(x$.data)[1], " (query)\n", sep = "")
   cat(fields, "\n", sep = "")
   cat("\n")
@@ -150,6 +152,7 @@ print.arrow_dplyr_query <- function(x, ...) {
       sep = ""
     )
   }
+  # TODO: update for collapse()
   cat("See $.data for the source Arrow object\n")
   invisible(x)
 }
@@ -163,6 +166,7 @@ dim.arrow_dplyr_query <- function(x) {
   cols <- length(names(x))
 
   if (isTRUE(x$filtered)) {
+    # TODO: update for collapse()
     rows <- x$.data$num_rows
   } else {
     rows <- Scanner$create(x)$CountRows()
@@ -236,4 +240,5 @@ abandon_ship <- function(call, .data, msg) {
   eval.parent(call, 2)
 }
 
-query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
\ No newline at end of file
+# TODO: update for collapse()
+query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 462d58cece0..f7a5c2888d2 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -43,6 +43,7 @@ ExecPlan <- R6Class("ExecPlan",
           dataset$selected_columns,
           field_names_in_expression
         )))
+        # TODO: update for collapse() (assert that is Dataset now?)
         dataset <- dataset$.data
       } else {
         if (inherits(dataset, "ArrowTabular")) {
@@ -129,7 +130,9 @@ ExecPlan <- R6Class("ExecPlan",
 
       # Apply sorting: this is currently not an ExecNode itself, it is a
       # sink node option.
-      # TODO: error if doing a subsequent operation that would throw away sorting!
+      # TODO: handle some cases:
+      # (1) arrange > summarize > arrange
+      # (2) ARROW-13779: arrange then operation where order matters (e.g. cumsum)
       if (length(.data$arrange_vars)) {
         node$sort <- list(
           names = names(.data$arrange_vars),
@@ -178,4 +181,4 @@ ExecNode <- R6Class("ExecNode",
       )
     }
   )
-)
\ No newline at end of file
+)

From 11c7066dc96343cb854e628dfd686bd950a42150 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Mon, 30 Aug 2021 12:24:11 -0400
Subject: [PATCH 12/27] Add collapse()

---
 r/R/arrow-package.R                     |   2 +-
 r/R/dataset-scan.R                      |  17 ++--
 r/R/dplyr-arrange.R                     |   2 +-
 r/R/dplyr-collect.R                     |  38 +++++++-
 r/R/dplyr-filter.R                      |   2 +-
 r/R/dplyr-group-by.R                    |   2 +-
 r/R/dplyr-mutate.R                      |   4 +-
 r/R/dplyr-select.R                      |   6 +-
 r/R/dplyr-summarize.R                   |   7 +-
 r/R/dplyr.R                             |  71 ++++++---------
 r/R/duckdb.R                            |   2 +-
 r/R/query-engine.R                      |   4 +-
 r/tests/testthat/test-dplyr-collapse.R  | 111 ++++++++++++++++++++++++
 r/tests/testthat/test-dplyr-summarize.R |  16 ++++
 14 files changed, 210 insertions(+), 74 deletions(-)

diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 04f01faf268..c09b8f05319 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -35,7 +35,7 @@
     c(
       "select", "filter", "collect", "summarise", "group_by", "groups",
       "group_vars", "group_by_drop_default", "ungroup", "mutate", "transmute",
-      "arrange", "rename", "pull", "relocate", "compute"
+      "arrange", "rename", "pull", "relocate", "compute", "collapse"
     )
   )
   for (cl in c("Dataset", "ArrowTabular", "arrow_dplyr_query")) {
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index 3ea501ea9ec..dc0686ee605 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -73,19 +73,14 @@ Scanner$create <- function(dataset,
                            projection = NULL,
                            filter = TRUE,
                            use_threads = option_use_threads(),
-                           use_async = NULL,
+                           use_async = getOption("arrow.use_async", FALSE),
                            batch_size = NULL,
                            fragment_scan_options = NULL,
                            ...) {
-  if (is.null(use_async)) {
-    use_async <- getOption("arrow.use_async", FALSE)
-  }
-
   if (inherits(dataset, "arrow_dplyr_query")) {
-    # TODO: update for collapse()
-    if (inherits(dataset$.data, "ArrowTabular")) {
-      # To handle mutate() on Table/RecordBatch, we need to collect(as_data_frame=FALSE) now
-      dataset <- dplyr::collect(dataset, as_data_frame = FALSE)
+    if (is_collapsed(dataset)) {
+      # TODO: Is there a way to get a RecordBatchReader rather than evaluating?
+      dataset$.data <- as_adq(dplyr::compute(dataset$.data))$.data
     }
 
     proj <- c(dataset$selected_columns, dataset$temp_columns)
@@ -118,7 +113,7 @@ Scanner$create <- function(dataset,
       ...
     ))
   }
-  if (inherits(dataset, c("data.frame", "RecordBatch", "Table"))) {
+  if (inherits(dataset, c("data.frame", "ArrowTabular"))) {
     dataset <- InMemoryDataset$create(dataset)
   }
   assert_is(dataset, "Dataset")
@@ -236,4 +231,4 @@ ScannerBuilder <- R6Class("ScannerBuilder",
 )
 
 #' @export
-names.ScannerBuilder <- function(x) names(x$schema)
+names.ScannerBuilder <- function(x) names(x$schema)
\ No newline at end of file
diff --git a/r/R/dplyr-arrange.R b/r/R/dplyr-arrange.R
index 345fc183295..017e1d6b302 100644
--- a/r/R/dplyr-arrange.R
+++ b/r/R/dplyr-arrange.R
@@ -30,7 +30,7 @@ arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) {
     # Nothing to do
     return(.data)
   }
-  .data <- arrow_dplyr_query(.data)
+  .data <- as_adq(.data)
   # find and remove any dplyr::desc() and tidy-eval
   # the arrange expressions inside an Arrow data_mask
   sorts <- vector("list", length(exprs))
diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index 7db1b682305..f810a1bd57c 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -19,7 +19,6 @@
 # The following S3 methods are registered on load if dplyr is present
 
 collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
-  # Pull only the selected rows and cols into R
   # See query-engine.R for ExecPlan/Nodes
   tab <- do_exec_plan(x)
   if (as_data_frame) {
@@ -37,20 +36,21 @@ collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) {
     x
   }
 }
-collect.Dataset <- function(x, ...) dplyr::collect(arrow_dplyr_query(x), ...)
+collect.Dataset <- function(x, ...) dplyr::collect(as_adq(x), ...)
 
 compute.arrow_dplyr_query <- function(x, ...) dplyr::collect(x, as_data_frame = FALSE)
 compute.ArrowTabular <- function(x, ...) x
 compute.Dataset <- compute.arrow_dplyr_query
 
 pull.arrow_dplyr_query <- function(.data, var = -1) {
-  .data <- arrow_dplyr_query(.data)
+  .data <- as_adq(.data)
   var <- vars_pull(names(.data), !!enquo(var))
   .data$selected_columns <- set_names(.data$selected_columns[var], var)
   dplyr::collect(.data)[[1]]
 }
 pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
 
+# TODO: Correctly handle group_vars after summarize; also in collapse()
 restore_dplyr_features <- function(df, query) {
   # An arrow_dplyr_query holds some attributes that Arrow doesn't know about
   # After calling collect(), make sure these features are carried over
@@ -65,10 +65,40 @@ restore_dplyr_features <- function(df, query) {
       )
     } else {
       # This is a Table, via compute() or collect(as_data_frame = FALSE)
-      df <- arrow_dplyr_query(df)
+      df <- as_adq(df)
       df$group_by_vars <- query$group_by_vars
       df$drop_empty_groups <- query$drop_empty_groups
     }
   }
   df
 }
+
+collapse.arrow_dplyr_query <- function(x, ...) {
+  # Figure out what schema will result from the query
+  x$schema <- implicit_schema(x)
+  # Nest inside a new arrow_dplyr_query
+  arrow_dplyr_query(x)
+}
+collapse.Dataset <- collapse.ArrowTabular <- function(x, ...) {
+  arrow_dplyr_query(x)
+}
+
+implicit_schema <- function(.data) {
+  .data <- ensure_group_vars(.data)
+  old_schm <- .data$.data$schema
+
+  if (is.null(.data$aggregations)) {
+    new_fields <- map(.data$selected_columns, ~ .$type(old_schm))
+  } else {
+    new_fields <- map(summarize_projection(.data), ~ .$type(old_schm))
+    # * Put group_by_vars first (this can't be done by summarize, they have to be last per the aggregate node signature, and they get projected to this order after aggregation)
+    # * Infer the output types from the aggregations
+    group_fields <- new_fields[.data$group_by_vars]
+    agg_fields <- imap(
+      new_fields[setdiff(names(new_fields), .data$group_by_vars)],
+      ~ output_type(.data$aggregations[[.y]][["fun"]], .x)
+    )
+    new_fields <- c(group_fields, agg_fields)
+  }
+  schema(!!!new_fields)
+}
\ No newline at end of file
diff --git a/r/R/dplyr-filter.R b/r/R/dplyr-filter.R
index a44750a9c81..61f27010e77 100644
--- a/r/R/dplyr-filter.R
+++ b/r/R/dplyr-filter.R
@@ -26,7 +26,7 @@ filter.arrow_dplyr_query <- function(.data, ..., .preserve = FALSE) {
     return(.data)
   }
 
-  .data <- arrow_dplyr_query(.data)
+  .data <- as_adq(.data)
   # tidy-eval the filter expressions inside an Arrow data_mask
   filters <- lapply(filts, arrow_eval, arrow_mask(.data))
   bad_filters <- map_lgl(filters, ~ inherits(., "try-error"))
diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
index 42cca039022..a89144d6c4a 100644
--- a/r/R/dplyr-group-by.R
+++ b/r/R/dplyr-group-by.R
@@ -23,7 +23,7 @@ group_by.arrow_dplyr_query <- function(.data,
                                        .add = FALSE,
                                        add = .add,
                                        .drop = dplyr::group_by_drop_default(.data)) {
-  .data <- arrow_dplyr_query(.data)
+  .data <- as_adq(.data)
   new_groups <- enquos(...)
   # ... can contain expressions (i.e. can add (or rename?) columns) and so we
   # need to identify those and add them on to the query with mutate. Specifically,
diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R
index f2df4a078a4..051c5254e50 100644
--- a/r/R/dplyr-mutate.R
+++ b/r/R/dplyr-mutate.R
@@ -35,7 +35,7 @@ mutate.arrow_dplyr_query <- function(.data,
     return(.data)
   }
 
-  .data <- arrow_dplyr_query(.data)
+  .data <- as_adq(.data)
 
   # Restrict the cases we support for now
   if (length(dplyr::group_vars(.data)) > 0) {
@@ -135,4 +135,4 @@ ensure_named_exprs <- function(exprs) {
   # Deparse and take the first element in case they're long expressions
   names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
   exprs
-}
\ No newline at end of file
+}
diff --git a/r/R/dplyr-select.R b/r/R/dplyr-select.R
index ee740db4cfb..9a867ced964 100644
--- a/r/R/dplyr-select.R
+++ b/r/R/dplyr-select.R
@@ -22,13 +22,13 @@ tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns)
 
 select.arrow_dplyr_query <- function(.data, ...) {
   check_select_helpers(enexprs(...))
-  column_select(arrow_dplyr_query(.data), !!!enquos(...))
+  column_select(as_adq(.data), !!!enquos(...))
 }
 select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query
 
 rename.arrow_dplyr_query <- function(.data, ...) {
   check_select_helpers(enexprs(...))
-  column_select(arrow_dplyr_query(.data), !!!enquos(...), .FUN = vars_rename)
+  column_select(as_adq(.data), !!!enquos(...), .FUN = vars_rename)
 }
 rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query
 
@@ -60,7 +60,7 @@ relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL
   # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R
   # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829
 
-  .data <- arrow_dplyr_query(.data)
+  .data <- as_adq(.data)
 
   # Assign the schema to the expressions
   map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 5fb22200a04..c6806b27e3b 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -20,7 +20,7 @@
 
 summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb")) {
   call <- match.call()
-  .data <- arrow_dplyr_query(.data)
+  .data <- as_adq(.data)
   exprs <- quos(...)
   # Only retain the columns we need to do our aggregations
   vars_to_keep <- unique(c(
@@ -67,7 +67,8 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
   }
 
   .data$aggregations <- results
-  do_collapse(.data)
+  # TODO: should in-memory query evaluate eagerly?
+  collapse.arrow_dplyr_query(.data)
 }
 
 summarize_projection <- function(.data) {
@@ -75,4 +76,4 @@ summarize_projection <- function(.data) {
     map(.data$aggregations, ~ .$data),
     .data$selected_columns[.data$group_by_vars]
   )
-}
\ No newline at end of file
+}
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 904792b59cc..8010ae8e6fa 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -23,14 +23,10 @@ arrow_dplyr_query <- function(.data) {
   # An arrow_dplyr_query is a container for an Arrow data object (Table,
   # RecordBatch, or Dataset) and the state of the user's dplyr query--things
   # like selected columns, filters, and group vars.
-
-  # For most dplyr methods,
-  # method.Table == method.RecordBatch == method.Dataset == method.arrow_dplyr_query
-  # This works because the functions all pass .data through arrow_dplyr_query()
-  if (inherits(.data, "arrow_dplyr_query")) {
-    return(.data)
+  # An arrow_dplyr_query can contain another arrow_dplyr_query in .data
+  if (!inherits(.data, c("Dataset", "arrow_dplyr_query"))) {
+    .data <- InMemoryDataset$create(.data)
   }
-
   # Evaluating expressions on a dataset with duplicated fieldnames will error
   dupes <- duplicated(names(.data))
   if (any(dupes)) {
@@ -42,14 +38,6 @@ arrow_dplyr_query <- function(.data) {
       )
     ))
   }
-
-  .adq(.data)
-}
-
-.adq <- function(.data) {
-  if (!inherits(.data, c("Dataset", "arrow_dplyr_query"))) {
-    .data <- InMemoryDataset$create(.data)
-  }
   structure(
     list(
       .data = .data,
@@ -78,30 +66,14 @@ arrow_dplyr_query <- function(.data) {
   )
 }
 
-# TODO: move to dplyr-collect.R
-do_collapse <- function(.data) {
-  .data$schema <- implicit_schema(.data)
-  .adq(.data)
-}
-
-implicit_schema <- function(.data) {
-  .data <- ensure_group_vars(.data)
-  old_schm <- .data$.data$schema
-
-  if (is.null(.data$aggregations)) {
-    new_fields <- map(.data$selected_columns, ~ .$type(old_schm))
-  } else {
-    new_fields <- map(summarize_projection(.data), ~ .$type(old_schm))
-    # * Put group_by_vars first (this can't be done by summarize, they have to be last per the aggregate node signature, and they get projected to this order after aggregation)
-    # * Infer the output types from the aggregations
-    group_fields <- new_fields[.data$group_by_vars]
-    agg_fields <- imap(
-      new_fields[setdiff(names(new_fields), .data$group_by_vars)],
-      ~ output_type(.data$aggregations[[.y]][["fun"]], .x)
-    )
-    new_fields <- c(group_fields, agg_fields)
+as_adq <- function(.data) {
+  # For most dplyr methods,
+  # method.Table == method.RecordBatch == method.Dataset == method.arrow_dplyr_query
+  # This works because the functions all pass .data through as_adq()
+  if (inherits(.data, "arrow_dplyr_query")) {
+    return(.data)
   }
-  schema(!!!new_fields)
+  arrow_dplyr_query(.data)
 }
 
 make_field_refs <- function(field_names) {
@@ -111,7 +83,6 @@ make_field_refs <- function(field_names) {
 #' @export
 print.arrow_dplyr_query <- function(x, ...) {
   schm <- x$.data$schema
-  # TODO: refactor this to use implicit_schema(x)
   types <- map_chr(x$selected_columns, function(expr) {
     name <- expr$field_name
     if (nzchar(name)) {
@@ -165,8 +136,10 @@ names.arrow_dplyr_query <- function(x) names(x$selected_columns)
 dim.arrow_dplyr_query <- function(x) {
   cols <- length(names(x))
 
-  if (isTRUE(x$filtered)) {
-    # TODO: update for collapse()
+  if (is_collapsed(x)) {
+    # Don't evaluate just for nrow
+    rows <- NA_integer_
+  } else if (isTRUE(x$filtered)) {
     rows <- x$.data$num_rows
   } else {
     rows <- Scanner$create(x)$CountRows()
@@ -181,12 +154,14 @@ as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALS
 
 #' @export
 head.arrow_dplyr_query <- function(x, n = 6L, ...) {
+  # TODO: refactor/rename
   out <- head.Dataset(x, n, ...)
   restore_dplyr_features(out, x)
 }
 
 #' @export
 tail.arrow_dplyr_query <- function(x, n = 6L, ...) {
+  # TODO: refactor/rename
   out <- tail.Dataset(x, n, ...)
   restore_dplyr_features(out, x)
 }
@@ -194,6 +169,7 @@ tail.arrow_dplyr_query <- function(x, n = 6L, ...) {
 #' @export
 `[.arrow_dplyr_query` <- `[.Dataset`
 # TODO: ^ should also probably restore_dplyr_features, and/or that should be moved down
+# TODO: refactor/rename
 
 ensure_group_vars <- function(x) {
   if (inherits(x, "arrow_dplyr_query")) {
@@ -228,17 +204,24 @@ ensure_arrange_vars <- function(x) {
 # * For Table/RecordBatch, we collect() and then call the dplyr method in R
 # * For Dataset, we just error
 abandon_ship <- function(call, .data, msg) {
+  msg <- trimws(msg)
   dplyr_fun_name <- sub("^(.*?)\\..*", "\\1", as.character(call[[1]]))
   if (query_on_dataset(.data)) {
     stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE)
   }
   # else, collect and call dplyr method
-  msg <- sub("\\n$", "", msg)
   warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE)
   call$.data <- dplyr::collect(.data)
   call[[1]] <- get(dplyr_fun_name, envir = asNamespace("dplyr"))
   eval.parent(call, 2)
 }
 
-# TODO: update for collapse()
-query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
+query_on_dataset <- function(x) {
+  if (is_collapsed(x)) {
+    query_on_dataset((x$.data))
+  } else {
+    !inherits(x$.data, "InMemoryDataset")
+  }
+}
+
+is_collapsed <- function(x) inherits(x$.data, "arrow_dplyr_query")
\ No newline at end of file
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index edef5cdc143..87d1b2cfad6 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -60,7 +60,7 @@ to_duckdb <- function(.data,
                       con = arrow_duck_connection(),
                       table_name = unique_arrow_tablename(),
                       auto_disconnect = TRUE) {
-  .data <- arrow_dplyr_query(.data)
+  .data <- as_adq(.data)
   duckdb::duckdb_register_arrow(con, table_name, .data)
 
   tbl <- tbl(con, table_name)
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index f7a5c2888d2..534cd05e299 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -43,8 +43,8 @@ ExecPlan <- R6Class("ExecPlan",
           dataset$selected_columns,
           field_names_in_expression
         )))
-        # TODO: update for collapse() (assert that is Dataset now?)
         dataset <- dataset$.data
+        assert_is(dataset, "Dataset")
       } else {
         if (inherits(dataset, "ArrowTabular")) {
           dataset <- InMemoryDataset$create(dataset)
@@ -181,4 +181,4 @@ ExecNode <- R6Class("ExecNode",
       )
     }
   )
-)
+)
\ No newline at end of file
diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
index 84287976ced..674bcd249d4 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -81,4 +81,115 @@ test_that("implicit_schema with group_by summarize", {
       implicit_schema(),
     schema(some_grouping = float64(), avg = float64())
   )
+})
+
+test_that("collapse", {
+  q <- tab %>%
+    filter(dbl > 2, chr == "d" | chr == "f") %>%
+    select(chr, int, lgl) %>%
+    mutate(twice = int * 2L)
+  expect_false(is_collapsed(q))
+  expect_true(is_collapsed(collapse(q)))
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl > 2, chr == "d" | chr == "f") %>%
+      select(chr, int, lgl) %>%
+      mutate(twice = int * 2L) %>%
+      collapse() %>%
+      filter(int < 5) %>%
+      select(int, twice) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl > 2, chr == "d" | chr == "f") %>%
+      collapse() %>%
+      select(chr, int, lgl) %>%
+      collapse() %>%
+      filter(int < 5) %>%
+      select(int, chr) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Properties of collapsed query", {
+  q <- tab %>%
+    filter(dbl > 2) %>%
+    select(chr, int, lgl) %>%
+    mutate(twice = int * 2L) %>%
+    group_by(lgl) %>%
+    summarize(total = sum(int, na.rm = TRUE)) %>%
+    mutate(extra = total * 5)
+
+  # print(tbl %>%
+  #   filter(dbl > 2) %>%
+  #   select(chr, int, lgl) %>%
+  #   mutate(twice = int * 2L) %>%
+  #   group_by(lgl) %>%
+  #   summarize(total = sum(int, na.rm = TRUE)) %>%
+  #   mutate(extra = total * 5))
+
+  #   # A tibble: 3 × 3
+  #   lgl   total extra
+  #   <lgl> <int> <dbl>
+  # 1 FALSE     8    40
+  # 2 TRUE      8    40
+  # 3 NA       25   125
+
+  # Avoid evaluating just for nrow
+  expect_identical(dim(q), c(NA_integer_, 3L))
+
+
+  # TODO: improve print method
+  #   expect_output(print(q),
+  # "arrow_dplyr_query (query)
+  # lgl: bool
+  # total: int32
+  # extra: double (multiply_checked(total, 5))
+
+  # See $.data for the source Arrow object"
+  #   )
+
+  expect_equal(
+    head(q, 1) %>% collect(),
+    tibble::tibble(lgl = FALSE, total = 8L, extra = 40)
+  )
+  expect_equal(
+    tail(q, 1) %>% collect(),
+    tibble::tibble(lgl = NA, total = 25L, extra = 125)
+  )
+})
+
+test_that("query_on_dataset handles collapse()", {
+  expect_false(query_on_dataset(
+    tab %>%
+      select(int, chr)
+  ))
+  expect_false(query_on_dataset(
+    tab %>%
+      select(int, chr) %>%
+      collapse() %>%
+      select(int)
+  ))
+
+  ds_dir <- tempfile()
+  dir.create(ds_dir)
+  on.exit(unlink(ds_dir))
+  write_parquet(tab, file.path(ds_dir, "file.parquet"))
+  ds <- open_dataset(ds_dir)
+
+  expect_true(query_on_dataset(
+    ds %>%
+      select(int, chr)
+  ))
+  expect_true(query_on_dataset(
+    ds %>%
+      select(int, chr) %>%
+      collapse() %>%
+      select(int)
+  ))
 })
\ No newline at end of file
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index ec9d281e053..5d7fc999beb 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -324,6 +324,22 @@ test_that("Do things after summarize", {
       collect(),
     tbl
   )
+
+  skip("ARROW-13501")
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl > 2) %>%
+      select(chr, int, lgl) %>%
+      mutate(twice = int * 2L) %>%
+      group_by(lgl) %>%
+      summarize(
+        count = n(),
+        total = sum(twice, na.rm = TRUE)
+      ) %>%
+      mutate(mean = total / count) %>%
+      collect(),
+    tbl
+  )
 })
 
 test_that("Expressions on aggregations", {

From cc2f0d7bb43d8dc5cbba89a6c48da1d1a75a4b03 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Mon, 30 Aug 2021 14:02:12 -0400
Subject: [PATCH 13/27] Style and unskip test

---
 r/R/dataset-scan.R                      |  2 +-
 r/R/dplyr-collect.R                     |  2 +-
 r/R/dplyr-functions.R                   | 26 ++++++++++++-------------
 r/R/dplyr.R                             |  2 +-
 r/R/query-engine.R                      |  2 +-
 r/tests/testthat/test-dataset.R         |  2 +-
 r/tests/testthat/test-dplyr-collapse.R  |  2 +-
 r/tests/testthat/test-dplyr-filter.R    |  2 +-
 r/tests/testthat/test-dplyr-summarize.R |  3 +--
 9 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index dc0686ee605..75108df1052 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -231,4 +231,4 @@ ScannerBuilder <- R6Class("ScannerBuilder",
 )
 
 #' @export
-names.ScannerBuilder <- function(x) names(x$schema)
\ No newline at end of file
+names.ScannerBuilder <- function(x) names(x$schema)
diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index f810a1bd57c..94210276417 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -101,4 +101,4 @@ implicit_schema <- function(.data) {
     new_fields <- c(group_fields, agg_fields)
   }
   schema(!!!new_fields)
-}
\ No newline at end of file
+}
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 2247d22f368..72731216f50 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -842,16 +842,16 @@ agg_funcs$n <- function() {
 }
 
 output_type <- function(fun, input_type) {
-    # These are quick and dirty heuristics.
-    if (fun %in% c("any", "all")) {
-        bool()
-    } else if (fun %in% "sum") {
-        # It may upcast to a bigger type but this is close enough
-        input_type
-    } else if (fun %in% c("mean", "stddev", "variance")) {
-        float64()
-    } else {
-        # Just so things don't error, assume the resulting type is the same
-        input_type
-    }
-}
\ No newline at end of file
+  # These are quick and dirty heuristics.
+  if (fun %in% c("any", "all")) {
+    bool()
+  } else if (fun %in% "sum") {
+    # It may upcast to a bigger type but this is close enough
+    input_type
+  } else if (fun %in% c("mean", "stddev", "variance")) {
+    float64()
+  } else {
+    # Just so things don't error, assume the resulting type is the same
+    input_type
+  }
+}
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 8010ae8e6fa..20e976d7ce8 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -224,4 +224,4 @@ query_on_dataset <- function(x) {
   }
 }
 
-is_collapsed <- function(x) inherits(x$.data, "arrow_dplyr_query")
\ No newline at end of file
+is_collapsed <- function(x) inherits(x$.data, "arrow_dplyr_query")
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index 534cd05e299..c3621093a8d 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -181,4 +181,4 @@ ExecNode <- R6Class("ExecNode",
       )
     }
   )
-)
\ No newline at end of file
+)
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index f7a41434e03..d1f4c98a28f 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -1728,4 +1728,4 @@ test_that("Error if no format specified and files are not parquet", {
       "Did you mean to specify a 'format'"
     )
   )
-})
\ No newline at end of file
+})
diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
index 674bcd249d4..57e03001d91 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -192,4 +192,4 @@ test_that("query_on_dataset handles collapse()", {
       collapse() %>%
       select(int)
   ))
-})
\ No newline at end of file
+})
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index ea511e88f94..e56ee4be462 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -396,4 +396,4 @@ test_that("filter() with .data pronoun", {
       collect(),
     tbl
   )
-})
\ No newline at end of file
+})
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index 5d7fc999beb..daf11767bfe 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -325,7 +325,6 @@ test_that("Do things after summarize", {
     tbl
   )
 
-  skip("ARROW-13501")
   expect_dplyr_equal(
     input %>%
       filter(dbl > 2) %>%
@@ -367,4 +366,4 @@ test_that("Expressions on aggregations", {
       collect(),
     tbl
   )
-})
\ No newline at end of file
+})

From 2ea6d04c24e9a427dc546534bb21ac6670c19a9a Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Tue, 31 Aug 2021 09:33:20 -0500
Subject: [PATCH 14/27] use arrange instead of hardcoding

---
 r/tests/testthat/test-metadata.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index bc6d285b333..9a540f47b02 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -241,7 +241,7 @@ test_that("metadata of list elements (ARROW-10386)", {
     df_from_ds <- collect(ds),
     "Row-level metadata is not compatible with this operation and has been ignored"
   )
-  expect_equal(df_from_ds[c(1, 4, 3, 2), ], df, check.attributes = FALSE)
+  expect_equal(arrange(df_from_ds, int), arrange(df, int), check.attributes = FALSE)
 
   # however there is *no* warning if we don't select the metadata column
   expect_warning(

From 9e26457aa62eb9e22b0fbf6690fd468d9e29f1ca Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 1 Sep 2021 15:09:50 -0400
Subject: [PATCH 15/27] Skip column metadata warning test

---
 r/tests/testthat/test-metadata.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 9a540f47b02..6ebdcd93283 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -239,7 +239,8 @@ test_that("metadata of list elements (ARROW-10386)", {
   ds <- open_dataset(dst_dir)
   expect_warning(
     df_from_ds <- collect(ds),
-    "Row-level metadata is not compatible with this operation and has been ignored"
+    NA # TODO: ARROW-13852
+    # "Row-level metadata is not compatible with this operation and has been ignored"
   )
   expect_equal(arrange(df_from_ds, int), arrange(df, int), check.attributes = FALSE)
 
@@ -248,4 +249,4 @@ test_that("metadata of list elements (ARROW-10386)", {
     df_from_ds <- ds %>% select(int) %>% collect(),
     NA
   )
-})
+})
\ No newline at end of file

From 92d8d3fb490260f33a01a6fa3dae8958e668088c Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 1 Sep 2021 16:31:41 -0400
Subject: [PATCH 16/27] Note breaking changes before I forget

---
 r/NEWS.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/r/NEWS.md b/r/NEWS.md
index 2a22681e457..0421457e067 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -19,6 +19,11 @@
 
 # arrow 5.0.0.9000
 
+## Breaking changes
+
+* `dplyr::summarize()` on an in-memory Arrow Table or RecordBatch no longer eagerly evaluates. Call `compute()` or `collect()` to evaluate the query.
+* Row order of data from a Dataset query is no longer deterministic. If you need a stable sort order, you should explicitly `arrange()` the query.
+
 # arrow 5.0.0
 
 ## More dplyr

From 88d07bb35a1ddca274fd6c866ef49e8cbb748cc9 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 2 Sep 2021 11:20:30 -0400
Subject: [PATCH 17/27] Add options(arrow.summarise.sort), default FALSE

---
 r/R/query-engine.R                      | 14 ++++++++------
 r/tests/testthat/test-dplyr-collapse.R  |  2 ++
 r/tests/testthat/test-dplyr-summarize.R |  2 ++
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index c3621093a8d..a96378671af 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -109,12 +109,14 @@ ExecPlan <- R6Class("ExecPlan",
           node <- node$Project(
             make_field_refs(c(group_vars, names(.data$aggregations)))
           )
-          # Add sorting instructions for the rows too to match dplyr
-          # (see below about why sorting isn't itself a Node)
-          node$sort <- list(
-            names = group_vars,
-            orders = rep(0L, length(group_vars))
-          )
+          if (getOption("arrow.summarise.sort", FALSE)) {
+            # Add sorting instructions for the rows too to match dplyr
+            # (see below about why sorting isn't itself a Node)
+            node$sort <- list(
+              names = group_vars,
+              orders = rep(0L, length(group_vars))
+            )
+          }
         }
       } else {
         # If any columns are derived, reordered, or renamed we need to Project
diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
index 57e03001d91..4d3cab86add 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -17,6 +17,8 @@
 
 skip_if_not_available("dataset")
 
+withr::local_options(list(arrow.summarise.sort = TRUE))
+
 library(dplyr)
 library(stringr)
 
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index daf11767bfe..213b48e6543 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -17,6 +17,8 @@
 
 skip_if_not_available("dataset")
 
+# withr::local_options(list(arrow.summarise.sort = TRUE))
+
 library(dplyr)
 library(stringr)
 

From b7d6313c68c11c5d733ed408ed0b839229b605f6 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 2 Sep 2021 11:27:34 -0400
Subject: [PATCH 18/27] Skip all dataset tests on 32-bit windows rtools35

---
 r/tests/testthat/helper-skip.R                 |  9 +++++++++
 r/tests/testthat/test-dataset.R                | 10 +---------
 r/tests/testthat/test-dplyr-arrange.R          |  1 +
 r/tests/testthat/test-dplyr-collapse.R         |  1 +
 r/tests/testthat/test-dplyr-filter.R           |  1 +
 r/tests/testthat/test-dplyr-group-by.R         |  1 +
 r/tests/testthat/test-dplyr-lubridate.R        |  1 +
 r/tests/testthat/test-dplyr-mutate.R           |  1 +
 r/tests/testthat/test-dplyr-string-functions.R |  1 +
 r/tests/testthat/test-dplyr-summarize.R        |  1 +
 r/tests/testthat/test-dplyr.R                  |  1 +
 r/tests/testthat/test-duckdb.R                 |  2 ++
 12 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R
index 906963e38d1..9a6bba60f91 100644
--- a/r/tests/testthat/helper-skip.R
+++ b/r/tests/testthat/helper-skip.R
@@ -68,6 +68,15 @@ skip_on_valgrind <- function() {
   }
 }
 
+skip_if_multithreading_disabled <- function() {
+  is_32bit <- .Machine$sizeof.pointer < 8
+  is_old_r <- getRversion() < "4.0.0"
+  is_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
+  if (is_32bit && is_old_r && is_windows) {
+    skip("Multithreading does not work properly on this system")
+  }
+}
+
 process_is_running <- function(x) {
   cmd <- sprintf("ps aux | grep '%s' | grep -v grep", x)
   tryCatch(system(cmd, ignore.stdout = TRUE) == 0, error = function(e) FALSE)
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index d1f4c98a28f..411eb2ef36e 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -16,6 +16,7 @@
 # under the License.
 
 skip_if_not_available("dataset")
+skip_if_multithreading_disabled()
 
 context("Dataset")
 
@@ -27,15 +28,6 @@ ipc_dir <- make_temp_dir()
 csv_dir <- make_temp_dir()
 tsv_dir <- make_temp_dir()
 
-skip_if_multithreading_disabled <- function() {
-  is_32bit <- .Machine$sizeof.pointer < 8
-  is_old_r <- getRversion() < "4.0.0"
-  is_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
-  if (is_32bit && is_old_r && is_windows) {
-    skip("Multithreading does not work properly on this system")
-  }
-}
-
 
 first_date <- lubridate::ymd_hms("2015-04-29 03:12:39")
 df1 <- tibble(
diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R
index fc24df58ca7..f131b8444f5 100644
--- a/r/tests/testthat/test-dplyr-arrange.R
+++ b/r/tests/testthat/test-dplyr-arrange.R
@@ -16,6 +16,7 @@
 # under the License.
 
 skip_if_not_available("dataset")
+skip_if_multithreading_disabled()
 
 library(dplyr)
 
diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
index 4d3cab86add..776988f870c 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -16,6 +16,7 @@
 # under the License.
 
 skip_if_not_available("dataset")
+skip_if_multithreading_disabled()
 
 withr::local_options(list(arrow.summarise.sort = TRUE))
 
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index e56ee4be462..fb4951ac45e 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -16,6 +16,7 @@
 # under the License.
 
 skip_if_not_available("dataset")
+skip_if_multithreading_disabled()
 
 library(dplyr)
 library(stringr)
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index 18be2a9304a..0a8c76a920d 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -16,6 +16,7 @@
 # under the License.
 
 skip_if_not_available("dataset")
+skip_if_multithreading_disabled()
 
 library(dplyr)
 library(stringr)
diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R
index 64bb42a0ecf..e3281436379 100644
--- a/r/tests/testthat/test-dplyr-lubridate.R
+++ b/r/tests/testthat/test-dplyr-lubridate.R
@@ -16,6 +16,7 @@
 # under the License.
 
 skip_if_not_available("dataset")
+skip_if_multithreading_disabled()
 
 library(lubridate)
 library(dplyr)
diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R
index 44127839108..3f2bb526f0c 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -16,6 +16,7 @@
 # under the License.
 
 skip_if_not_available("dataset")
+skip_if_multithreading_disabled()
 
 library(dplyr)
 library(stringr)
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index b6b8f5a714a..5383bcb2376 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -17,6 +17,7 @@
 
 skip_if_not_available("dataset")
 skip_if_not_available("utf8proc")
+skip_if_multithreading_disabled()
 
 library(dplyr)
 library(stringr)
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index 213b48e6543..8c60f844e5b 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -16,6 +16,7 @@
 # under the License.
 
 skip_if_not_available("dataset")
+skip_if_multithreading_disabled()
 
 # withr::local_options(list(arrow.summarise.sort = TRUE))
 
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index d3a9994b5f1..295d7778dc0 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -16,6 +16,7 @@
 # under the License.
 
 skip_if_not_available("dataset")
+skip_if_multithreading_disabled()
 
 library(dplyr)
 library(stringr)
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
index cdfcb62d02d..c4fad4f5148 100644
--- a/r/tests/testthat/test-duckdb.R
+++ b/r/tests/testthat/test-duckdb.R
@@ -18,6 +18,8 @@
 skip_if_not_installed("duckdb", minimum_version = "0.2.8")
 skip_if_not_installed("dbplyr")
 skip_if_not_available("dataset")
+skip_if_multithreading_disabled()
+
 # when we remove this, we should also remove the FALSE in run_duckdb_examples
 skip("These tests are flaking: https://github.com/duckdb/duckdb/issues/2100")
 library(duckdb)

From 31ec558c9ef52964fb1292f78d5aedbb7ae4a467 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 2 Sep 2021 13:26:21 -0400
Subject: [PATCH 19/27] Correct but not super satisfying print method

---
 r/R/dplyr-summarize.R                  |  4 ++++
 r/R/dplyr.R                            | 17 ++++++++-----
 r/tests/testthat/test-dplyr-collapse.R | 33 ++++++++++++++++++--------
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index c6806b27e3b..cd93e28f07e 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -77,3 +77,7 @@ summarize_projection <- function(.data) {
     .data$selected_columns[.data$group_by_vars]
   )
 }
+
+format_aggregation <- function(x) {
+  paste0(x$fun, "(", x$data$ToString(), ")")
+}
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 20e976d7ce8..c650d7ddb0f 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -97,10 +97,14 @@ print.arrow_dplyr_query <- function(x, ...) {
     }
   })
   fields <- paste(names(types), types, sep = ": ", collapse = "\n")
-  # TODO: update for collapse()
-  cat(class(x$.data)[1], " (query)\n", sep = "")
+  cat(class(source_data(x))[1], " (query)\n", sep = "")
   cat(fields, "\n", sep = "")
   cat("\n")
+  if (length(x$aggregations)) {
+    cat("* Aggregations:\n")
+    aggs <- paste0(names(x$aggregations), ": ", map_chr(x$aggregations, format_aggregation), collapse = "\n")
+    cat(aggs, "\n", sep = "")
+  }
   if (!isTRUE(x$filtered_rows)) {
     filter_string <- x$filtered_rows$ToString()
     cat("* Filter: ", filter_string, "\n", sep = "")
@@ -123,7 +127,6 @@ print.arrow_dplyr_query <- function(x, ...) {
       sep = ""
     )
   }
-  # TODO: update for collapse()
   cat("See $.data for the source Arrow object\n")
   invisible(x)
 }
@@ -216,11 +219,13 @@ abandon_ship <- function(call, .data, msg) {
   eval.parent(call, 2)
 }
 
-query_on_dataset <- function(x) {
+query_on_dataset <- function(x) !inherits(source_data(x), "InMemoryDataset")
+
+source_data <- function(x) {
   if (is_collapsed(x)) {
-    query_on_dataset((x$.data))
+    source_data(x$.data)
   } else {
-    !inherits(x$.data, "InMemoryDataset")
+    x$.data
   }
 }
 
diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
index 776988f870c..eb62e967449 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -146,16 +146,29 @@ test_that("Properties of collapsed query", {
   # Avoid evaluating just for nrow
   expect_identical(dim(q), c(NA_integer_, 3L))
 
-
-  # TODO: improve print method
-  #   expect_output(print(q),
-  # "arrow_dplyr_query (query)
-  # lgl: bool
-  # total: int32
-  # extra: double (multiply_checked(total, 5))
-
-  # See $.data for the source Arrow object"
-  #   )
+  expect_output(
+    print(q),
+    "InMemoryDataset (query)
+lgl: bool
+total: int32
+extra: double (multiply_checked(total, 5))
+
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
+  expect_output(
+    print(q$.data),
+    "InMemoryDataset (query)
+int: int32
+lgl: bool
+
+* Aggregations:
+total: sum(int)
+* Filter: (dbl > 2)
+* Grouped by lgl
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
 
   expect_equal(
     head(q, 1) %>% collect(),

From bd251357ec17055ee3ed1fed818b1ebacf61c438 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 2 Sep 2021 13:30:30 -0400
Subject: [PATCH 20/27] sort more tests

---
 r/tests/testthat/test-s3-minio.R | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R
index 94451e5351a..2ec67ce6290 100644
--- a/r/tests/testthat/test-s3-minio.R
+++ b/r/tests/testthat/test-s3-minio.R
@@ -86,7 +86,7 @@ if (arrow_with_s3() && process_is_running("minio server")) {
     test_that("open_dataset with an S3 file (not directory) URI", {
       skip_if_not_available("parquet")
       expect_identical(
-        open_dataset(minio_uri("test.parquet")) %>% collect(),
+        open_dataset(minio_uri("test.parquet")) %>% arrange(int) %>% collect(),
         example_data
       )
     })
@@ -96,7 +96,9 @@ if (arrow_with_s3() && process_is_running("minio server")) {
         open_dataset(
           c(minio_uri("test.feather"), minio_uri("test2.feather")),
           format = "feather"
-        ) %>% collect(),
+        ) %>%
+          arrange(int) %>%
+          collect(),
         rbind(example_data, example_data)
       )
     })
@@ -153,8 +155,8 @@ if (arrow_with_s3() && process_is_running("minio server")) {
     test_that("open_dataset with fs", {
       ds <- open_dataset(fs$path(minio_path("hive_dir")))
       expect_identical(
-        ds %>% select(dbl, lgl) %>% collect(),
-        rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")])
+        ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int),
+        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")])
       )
     })
 
@@ -170,16 +172,16 @@ if (arrow_with_s3() && process_is_running("minio server")) {
       expect_length(dir(td), 2)
       ds <- open_dataset(td)
       expect_identical(
-        ds %>% select(dbl, lgl) %>% collect(),
-        rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")])
+        ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int),
+        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")])
       )
 
       # Let's copy the other way and use a SubTreeFileSystem rather than URI
       copy_files(td, fs$path(minio_path("hive_dir2")))
       ds2 <- open_dataset(fs$path(minio_path("hive_dir2")))
       expect_identical(
-        ds2 %>% select(dbl, lgl) %>% collect(),
-        rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")])
+        ds2 %>% select(int, dbl, lgl) %>% collect() %>% arrange(int),
+        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")])
       )
     })
   }

From f07b420f28ee404fd7afff6a38ebebbc88bb2111 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 2 Sep 2021 13:58:24 -0400
Subject: [PATCH 21/27] More sort

---
 r/tests/testthat/test-s3-minio.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R
index 2ec67ce6290..a2a13cbf887 100644
--- a/r/tests/testthat/test-s3-minio.R
+++ b/r/tests/testthat/test-s3-minio.R
@@ -86,8 +86,8 @@ if (arrow_with_s3() && process_is_running("minio server")) {
     test_that("open_dataset with an S3 file (not directory) URI", {
       skip_if_not_available("parquet")
       expect_identical(
-        open_dataset(minio_uri("test.parquet")) %>% arrange(int) %>% collect(),
-        example_data
+        open_dataset(minio_uri("test.parquet")) %>% collect() %>% arrange(int),
+        example_data %>% arrange(int)
       )
     })
 
@@ -99,7 +99,7 @@ if (arrow_with_s3() && process_is_running("minio server")) {
         ) %>%
           arrange(int) %>%
           collect(),
-        rbind(example_data, example_data)
+        rbind(example_data, example_data) %>% arrange(int)
       )
     })
 
@@ -156,7 +156,7 @@ if (arrow_with_s3() && process_is_running("minio server")) {
       ds <- open_dataset(fs$path(minio_path("hive_dir")))
       expect_identical(
         ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int),
-        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")])
+        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int)
       )
     })
 
@@ -173,7 +173,7 @@ if (arrow_with_s3() && process_is_running("minio server")) {
       ds <- open_dataset(td)
       expect_identical(
         ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int),
-        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")])
+        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int)
       )
 
       # Let's copy the other way and use a SubTreeFileSystem rather than URI
@@ -181,7 +181,7 @@ if (arrow_with_s3() && process_is_running("minio server")) {
       ds2 <- open_dataset(fs$path(minio_path("hive_dir2")))
       expect_identical(
         ds2 %>% select(int, dbl, lgl) %>% collect() %>% arrange(int),
-        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")])
+        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int)
       )
     })
   }

From be2499eb22077143fced9da56e44fe0a2cae2d0b Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 3 Sep 2021 08:17:38 -0400
Subject: [PATCH 22/27] Apply suggestions from code review

Co-authored-by: Ian Cook <ianmcook@gmail.com>
Co-authored-by: Jonathan Keane <jkeane@gmail.com>
---
 r/R/dplyr.R                             | 5 +++++
 r/tests/testthat/test-dplyr-summarize.R | 2 +-
 r/tests/testthat/test-metadata.R        | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index c650d7ddb0f..7d7c748f0a3 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -66,6 +66,11 @@ arrow_dplyr_query <- function(.data) {
   )
 }
 
+# The only difference between `arrow_dplyr_query()` and `as_adq()` is that if
+# `.data` is already an `arrow_dplyr_query`, `as_adq()`, will return it as is, but 
+# `arrow_dplyr_query()` will nest it inside a new `arrow_dplyr_query`. The only
+# place where `arrow_dplyr_query()` should be called directly is inside
+# `collapse()` methods; everywhere else, call `as_adq()`.
 as_adq <- function(.data) {
   # For most dplyr methods,
   # method.Table == method.RecordBatch == method.Dataset == method.arrow_dplyr_query
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index 8c60f844e5b..dd55108ec05 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -164,7 +164,7 @@ test_that("Group by var on dataset", {
   )
 })
 
-test_that("Group by any/all", {
+test_that("n()", {
   withr::local_options(list(arrow.debug = TRUE))
   expect_dplyr_equal(
     input %>%
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 6ebdcd93283..6ae5b54fbf3 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -249,4 +249,4 @@ test_that("metadata of list elements (ARROW-10386)", {
     df_from_ds <- ds %>% select(int) %>% collect(),
     NA
   )
-})
\ No newline at end of file
+})

From f7e3e546f475faa6bbd02120d7d72080d836b941 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 3 Sep 2021 08:41:13 -0400
Subject: [PATCH 23/27] Cleanups

---
 r/NEWS.md                               | 2 +-
 r/tests/testthat/test-dplyr-summarize.R | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 0421457e067..eb8001d4718 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -22,7 +22,7 @@
 ## Breaking changes
 
 * `dplyr::summarize()` on an in-memory Arrow Table or RecordBatch no longer eagerly evaluates. Call `compute()` or `collect()` to evaluate the query.
-* Row order of data from a Dataset query is no longer deterministic. If you need a stable sort order, you should explicitly `arrange()` the query.
+* Row order of data from a Dataset query is no longer deterministic. If you need a stable sort order, you should explicitly `arrange()` the query. For calls to `summarize()`, you can set `options(arrow.summarise.sort = TRUE)` to match the current `dplyr` behavior of sorting on the grouping columns.
 
 # arrow 5.0.0
 
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index dd55108ec05..d6ccbc90c04 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -18,7 +18,7 @@
 skip_if_not_available("dataset")
 skip_if_multithreading_disabled()
 
-# withr::local_options(list(arrow.summarise.sort = TRUE))
+withr::local_options(list(arrow.summarise.sort = TRUE))
 
 library(dplyr)
 library(stringr)
@@ -369,4 +369,4 @@ test_that("Expressions on aggregations", {
       collect(),
     tbl
   )
-})
+})
\ No newline at end of file

From a63acb9606f006523f19ffd185e6e1790616ee1a Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 3 Sep 2021 08:44:58 -0400
Subject: [PATCH 24/27] Improve test verbosity on windows

---
 .github/workflows/r.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 5acb47a0ae0..8eba1895b8b 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -247,6 +247,7 @@ jobs:
           Sys.setenv(
             RWINLIB_LOCAL = file.path(Sys.getenv("GITHUB_WORKSPACE"), "libarrow.zip"),
             MAKEFLAGS = paste0("-j", parallel::detectCores()),
+            ARROW_R_DEV = TRUE,
             "_R_CHECK_FORCE_SUGGESTS_" = FALSE
           )
           rcmdcheck::rcmdcheck("r",
@@ -260,6 +261,15 @@ jobs:
         shell: cmd
         run: cat check/arrow.Rcheck/00install.out
         if: always()
+      - name: Dump test logs
+        run: cat r/check/arrow.Rcheck/tests/testthat.Rout*
+        if: always()
+      - name: Save the test output
+        if: always()
+        uses: actions/upload-artifact@v2
+        with:
+          name: test-output
+          path: r/check/arrow.Rcheck/tests/testthat.Rout*
       # We can remove this when we drop support for Rtools 3.5.
       - name: Ensure using system tar in actions/cache
         run: |

From 3462b24dce8ecddd19340f1bdc0c235e86736d01 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 3 Sep 2021 09:44:04 -0400
Subject: [PATCH 25/27] Skip all dataset tests on old 32-bit windows

---
 .github/workflows/r.yml                        | 9 ---------
 r/tests/testthat/helper-skip.R                 | 4 ++++
 r/tests/testthat/test-dataset.R                | 8 --------
 r/tests/testthat/test-dplyr-arrange.R          | 1 -
 r/tests/testthat/test-dplyr-collapse.R         | 1 -
 r/tests/testthat/test-dplyr-filter.R           | 1 -
 r/tests/testthat/test-dplyr-group-by.R         | 1 -
 r/tests/testthat/test-dplyr-lubridate.R        | 1 -
 r/tests/testthat/test-dplyr-mutate.R           | 1 -
 r/tests/testthat/test-dplyr-string-functions.R | 1 -
 r/tests/testthat/test-dplyr-summarize.R        | 3 +--
 r/tests/testthat/test-dplyr.R                  | 1 -
 r/tests/testthat/test-duckdb.R                 | 1 -
 13 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 8eba1895b8b..e160ba8128a 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -261,15 +261,6 @@ jobs:
         shell: cmd
         run: cat check/arrow.Rcheck/00install.out
         if: always()
-      - name: Dump test logs
-        run: cat r/check/arrow.Rcheck/tests/testthat.Rout*
-        if: always()
-      - name: Save the test output
-        if: always()
-        uses: actions/upload-artifact@v2
-        with:
-          name: test-output
-          path: r/check/arrow.Rcheck/tests/testthat.Rout*
       # We can remove this when we drop support for Rtools 3.5.
       - name: Ensure using system tar in actions/cache
         run: |
diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R
index 9a6bba60f91..9efa1be85b5 100644
--- a/r/tests/testthat/helper-skip.R
+++ b/r/tests/testthat/helper-skip.R
@@ -25,6 +25,10 @@ skip_if_not_available <- function(feature) {
   if (feature == "re2") {
     # RE2 does not support valgrind (on purpose): https://github.com/google/re2/issues/177
     skip_on_valgrind()
+  } else if (feature == "dataset") {
+    # These tests often hang on 32-bit windows rtools35, and we haven't been
+    # able to figure out how to make them work safely
+    skip_if_multithreading_disabled()
   }
 
   yes <- feature %in% names(build_features) && build_features[feature]
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 411eb2ef36e..41265f0e638 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -16,7 +16,6 @@
 # under the License.
 
 skip_if_not_available("dataset")
-skip_if_multithreading_disabled()
 
 context("Dataset")
 
@@ -346,7 +345,6 @@ test_that("IPC/Feather format data", {
 })
 
 test_that("CSV dataset", {
-  skip_if_multithreading_disabled()
   ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
   expect_r6_class(ds$format, "CsvFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")
@@ -374,7 +372,6 @@ test_that("CSV dataset", {
 })
 
 test_that("CSV scan options", {
-  skip_if_multithreading_disabled()
   options <- FragmentScanOptions$create("text")
   expect_equal(options$type, "csv")
   options <- FragmentScanOptions$create("csv",
@@ -421,7 +418,6 @@ test_that("CSV scan options", {
 })
 
 test_that("compressed CSV dataset", {
-  skip_if_multithreading_disabled()
   skip_if_not_available("gzip")
   dst_dir <- make_temp_dir()
   dst_file <- file.path(dst_dir, "data.csv.gz")
@@ -445,7 +441,6 @@ test_that("compressed CSV dataset", {
 })
 
 test_that("CSV dataset options", {
-  skip_if_multithreading_disabled()
   dst_dir <- make_temp_dir()
   dst_file <- file.path(dst_dir, "data.csv")
   df <- tibble(chr = letters[1:10])
@@ -473,7 +468,6 @@ test_that("CSV dataset options", {
 })
 
 test_that("Other text delimited dataset", {
-  skip_if_multithreading_disabled()
   ds1 <- open_dataset(tsv_dir, partitioning = "part", format = "tsv")
   expect_equivalent(
     ds1 %>%
@@ -502,7 +496,6 @@ test_that("Other text delimited dataset", {
 })
 
 test_that("readr parse options", {
-  skip_if_multithreading_disabled()
   arrow_opts <- names(formals(CsvParseOptions$create))
   readr_opts <- names(formals(readr_to_csv_parse_options))
 
@@ -1654,7 +1647,6 @@ test_that("Writing a dataset: Parquet format options", {
 })
 
 test_that("Writing a dataset: CSV format options", {
-  skip_if_multithreading_disabled()
   df <- tibble(
     int = 1:10,
     dbl = as.numeric(1:10),
diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R
index f131b8444f5..fc24df58ca7 100644
--- a/r/tests/testthat/test-dplyr-arrange.R
+++ b/r/tests/testthat/test-dplyr-arrange.R
@@ -16,7 +16,6 @@
 # under the License.
 
 skip_if_not_available("dataset")
-skip_if_multithreading_disabled()
 
 library(dplyr)
 
diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
index eb62e967449..331f7b7b62c 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -16,7 +16,6 @@
 # under the License.
 
 skip_if_not_available("dataset")
-skip_if_multithreading_disabled()
 
 withr::local_options(list(arrow.summarise.sort = TRUE))
 
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index fb4951ac45e..e56ee4be462 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -16,7 +16,6 @@
 # under the License.
 
 skip_if_not_available("dataset")
-skip_if_multithreading_disabled()
 
 library(dplyr)
 library(stringr)
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index 0a8c76a920d..18be2a9304a 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -16,7 +16,6 @@
 # under the License.
 
 skip_if_not_available("dataset")
-skip_if_multithreading_disabled()
 
 library(dplyr)
 library(stringr)
diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R
index e3281436379..64bb42a0ecf 100644
--- a/r/tests/testthat/test-dplyr-lubridate.R
+++ b/r/tests/testthat/test-dplyr-lubridate.R
@@ -16,7 +16,6 @@
 # under the License.
 
 skip_if_not_available("dataset")
-skip_if_multithreading_disabled()
 
 library(lubridate)
 library(dplyr)
diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R
index 3f2bb526f0c..44127839108 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -16,7 +16,6 @@
 # under the License.
 
 skip_if_not_available("dataset")
-skip_if_multithreading_disabled()
 
 library(dplyr)
 library(stringr)
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index 5383bcb2376..b6b8f5a714a 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -17,7 +17,6 @@
 
 skip_if_not_available("dataset")
 skip_if_not_available("utf8proc")
-skip_if_multithreading_disabled()
 
 library(dplyr)
 library(stringr)
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index d6ccbc90c04..78d36630e56 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -16,7 +16,6 @@
 # under the License.
 
 skip_if_not_available("dataset")
-skip_if_multithreading_disabled()
 
 withr::local_options(list(arrow.summarise.sort = TRUE))
 
@@ -369,4 +368,4 @@ test_that("Expressions on aggregations", {
       collect(),
     tbl
   )
-})
\ No newline at end of file
+})
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 295d7778dc0..d3a9994b5f1 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -16,7 +16,6 @@
 # under the License.
 
 skip_if_not_available("dataset")
-skip_if_multithreading_disabled()
 
 library(dplyr)
 library(stringr)
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
index c4fad4f5148..56343ad729e 100644
--- a/r/tests/testthat/test-duckdb.R
+++ b/r/tests/testthat/test-duckdb.R
@@ -18,7 +18,6 @@
 skip_if_not_installed("duckdb", minimum_version = "0.2.8")
 skip_if_not_installed("dbplyr")
 skip_if_not_available("dataset")
-skip_if_multithreading_disabled()
 
 # when we remove this, we should also remove the FALSE in run_duckdb_examples
 skip("These tests are flaking: https://github.com/duckdb/duckdb/issues/2100")

From 4fa268489e7bc1c4e7b5354cb72dd748a58678ce Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 3 Sep 2021 11:12:31 -0400
Subject: [PATCH 26/27] Final final tweaks

---
 r/R/dplyr-collect.R            | 4 +++-
 r/R/dplyr.R                    | 8 ++++----
 r/tests/testthat/helper-skip.R | 1 +
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index 94210276417..8a5488bf599 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -91,7 +91,9 @@ implicit_schema <- function(.data) {
     new_fields <- map(.data$selected_columns, ~ .$type(old_schm))
   } else {
     new_fields <- map(summarize_projection(.data), ~ .$type(old_schm))
-    # * Put group_by_vars first (this can't be done by summarize, they have to be last per the aggregate node signature, and they get projected to this order after aggregation)
+    # * Put group_by_vars first (this can't be done by summarize,
+    #   they have to be last per the aggregate node signature,
+    #   and they get projected to this order after aggregation)
     # * Infer the output types from the aggregations
     group_fields <- new_fields[.data$group_by_vars]
     agg_fields <- imap(
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 7d7c748f0a3..199120887b9 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -67,7 +67,7 @@ arrow_dplyr_query <- function(.data) {
 }
 
 # The only difference between `arrow_dplyr_query()` and `as_adq()` is that if
-# `.data` is already an `arrow_dplyr_query`, `as_adq()`, will return it as is, but 
+# `.data` is already an `arrow_dplyr_query`, `as_adq()`, will return it as is, but
 # `arrow_dplyr_query()` will nest it inside a new `arrow_dplyr_query`. The only
 # place where `arrow_dplyr_query()` should be called directly is inside
 # `collapse()` methods; everywhere else, call `as_adq()`.
@@ -162,14 +162,14 @@ as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALS
 
 #' @export
 head.arrow_dplyr_query <- function(x, n = 6L, ...) {
-  # TODO: refactor/rename
+  # TODO (ARROW-13893): refactor
   out <- head.Dataset(x, n, ...)
   restore_dplyr_features(out, x)
 }
 
 #' @export
 tail.arrow_dplyr_query <- function(x, n = 6L, ...) {
-  # TODO: refactor/rename
+  # TODO (ARROW-13893): refactor
   out <- tail.Dataset(x, n, ...)
   restore_dplyr_features(out, x)
 }
@@ -177,7 +177,7 @@ tail.arrow_dplyr_query <- function(x, n = 6L, ...) {
 #' @export
 `[.arrow_dplyr_query` <- `[.Dataset`
 # TODO: ^ should also probably restore_dplyr_features, and/or that should be moved down
-# TODO: refactor/rename
+# TODO (ARROW-13893): refactor
 
 ensure_group_vars <- function(x) {
   if (inherits(x, "arrow_dplyr_query")) {
diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R
index 9efa1be85b5..3ec18a63019 100644
--- a/r/tests/testthat/helper-skip.R
+++ b/r/tests/testthat/helper-skip.R
@@ -39,6 +39,7 @@ skip_if_not_available <- function(feature) {
 
 skip_if_no_pyarrow <- function() {
   skip_on_valgrind()
+  skip_on_os("windows")
 
   skip_if_not_installed("reticulate")
   if (!reticulate::py_module_available("pyarrow")) {

From ceecc8f79b54ea412e3d00063302e325d580b186 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 3 Sep 2021 11:59:50 -0400
Subject: [PATCH 27/27] Fix python skip

---
 r/tests/testthat/test-python.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index 9e67219e19a..d5815247d51 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -20,9 +20,10 @@ context("To/from Python")
 test_that("install_pyarrow", {
   skip_on_cran()
   skip_if_not_dev_mode()
-  # Python problems on Apple M1 still
-  skip_if(grepl("arm-apple|aarch64.*darwin", R.Version()$platform))
+  # Windows CI machine doesn't pick up the right python or something
+  skip_on_os("windows")
   skip_if_not_installed("reticulate")
+
   venv <- try(reticulate::virtualenv_create("arrow-test"))
   # Bail out if virtualenv isn't available
   skip_if(inherits(venv, "try-error"))