diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 5acb47a0ae0..e160ba8128a 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -247,6 +247,7 @@ jobs: Sys.setenv( RWINLIB_LOCAL = file.path(Sys.getenv("GITHUB_WORKSPACE"), "libarrow.zip"), MAKEFLAGS = paste0("-j", parallel::detectCores()), + ARROW_R_DEV = TRUE, "_R_CHECK_FORCE_SUGGESTS_" = FALSE ) rcmdcheck::rcmdcheck("r", diff --git a/r/NAMESPACE b/r/NAMESPACE index 8ce6d162eb0..5e78d04de52 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -291,6 +291,7 @@ importFrom(bit64,print.integer64) importFrom(bit64,str.integer64) importFrom(methods,as) importFrom(purrr,as_mapper) +importFrom(purrr,imap) importFrom(purrr,imap_chr) importFrom(purrr,keep) importFrom(purrr,map) diff --git a/r/NEWS.md b/r/NEWS.md index 2a22681e457..eb8001d4718 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,6 +19,11 @@ # arrow 5.0.0.9000 +## Breaking changes + +* `dplyr::summarize()` on an in-memory Arrow Table or RecordBatch no longer eagerly evaluates. Call `compute()` or `collect()` to evaluate the query. +* Row order of data from a Dataset query is no longer deterministic. If you need a stable sort order, you should explicitly `arrange()` the query. For calls to `summarize()`, you can set `options(arrow.summarise.sort = TRUE)` to match the current `dplyr` behavior of sorting on the grouping columns. + # arrow 5.0.0 ## More dplyr diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 537eebb1b1d..c09b8f05319 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -17,7 +17,7 @@ #' @importFrom stats quantile median na.omit na.exclude na.pass na.fail #' @importFrom R6 R6Class -#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap_chr +#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap imap_chr #' @importFrom assertthat assert_that is.string #' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos #' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec @@ -35,7 +35,7 @@ c( "select", "filter", "collect", "summarise", "group_by", "groups", "group_vars", "group_by_drop_default", "ungroup", "mutate", "transmute", - "arrange", "rename", "pull", "relocate", "compute" + "arrange", "rename", "pull", "relocate", "compute", "collapse" ) ) for (cl in c("Dataset", "ArrowTabular", "arrow_dplyr_query")) { diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 72a5e455858..b852a3d8ca9 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -280,8 +280,8 @@ ExecPlan_create <- function(use_threads) { .Call(`_arrow_ExecPlan_create`, use_threads) } -ExecPlan_run <- function(plan, final_node) { - .Call(`_arrow_ExecPlan_run`, plan, final_node) +ExecPlan_run <- function(plan, final_node, sort_options) { + .Call(`_arrow_ExecPlan_run`, plan, final_node, sort_options) } ExecNode_Scan <- function(plan, dataset, filter, materialized_field_names) { @@ -816,6 +816,10 @@ FixedSizeListType__list_size <- function(type) { .Call(`_arrow_FixedSizeListType__list_size`, type) } +compute___expr__equals <- function(lhs, rhs) { + .Call(`_arrow_compute___expr__equals`, lhs, rhs) +} + compute___expr__call <- function(func_name, argument_list, options) { .Call(`_arrow_compute___expr__call`, func_name, argument_list, options) } diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R index 615b0f945a8..75108df1052 100644 --- a/r/R/dataset-scan.R +++ b/r/R/dataset-scan.R @@ -73,18 +73,14 @@ Scanner$create <- function(dataset, projection = NULL, filter = TRUE, use_threads = option_use_threads(), - use_async = NULL, + use_async = getOption("arrow.use_async", FALSE), batch_size = NULL, fragment_scan_options = NULL, ...) { - if (is.null(use_async)) { - use_async <- getOption("arrow.use_async", FALSE) - } - if (inherits(dataset, "arrow_dplyr_query")) { - if (inherits(dataset$.data, "ArrowTabular")) { - # To handle mutate() on Table/RecordBatch, we need to collect(as_data_frame=FALSE) now - dataset <- dplyr::collect(dataset, as_data_frame = FALSE) + if (is_collapsed(dataset)) { + # TODO: Is there a way to get a RecordBatchReader rather than evaluating? + dataset$.data <- as_adq(dplyr::compute(dataset$.data))$.data } proj <- c(dataset$selected_columns, dataset$temp_columns) @@ -117,7 +113,7 @@ Scanner$create <- function(dataset, ... )) } - if (inherits(dataset, c("data.frame", "RecordBatch", "Table"))) { + if (inherits(dataset, c("data.frame", "ArrowTabular"))) { dataset <- InMemoryDataset$create(dataset) } assert_is(dataset, "Dataset") diff --git a/r/R/dplyr-arrange.R b/r/R/dplyr-arrange.R index 345fc183295..017e1d6b302 100644 --- a/r/R/dplyr-arrange.R +++ b/r/R/dplyr-arrange.R @@ -30,7 +30,7 @@ arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) { # Nothing to do return(.data) } - .data <- arrow_dplyr_query(.data) + .data <- as_adq(.data) # find and remove any dplyr::desc() and tidy-eval # the arrange expressions inside an Arrow data_mask sorts <- vector("list", length(exprs)) diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R index cec56ab9110..8a5488bf599 100644 --- a/r/R/dplyr-collect.R +++ b/r/R/dplyr-collect.R @@ -19,19 +19,8 @@ # The following S3 methods are registered on load if dplyr is present collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) { - x <- ensure_group_vars(x) - x <- ensure_arrange_vars(x) # this sets x$temp_columns - # Pull only the selected rows and cols into R - # See dataset.R for Dataset and Scanner(Builder) classes - tab <- Scanner$create(x)$ToTable() - # Arrange rows - if (length(x$arrange_vars) > 0) { - tab <- tab[ - tab$SortIndices(names(x$arrange_vars), x$arrange_desc), - names(x$selected_columns), # this omits x$temp_columns from the result - drop = FALSE - ] - } + # See query-engine.R for ExecPlan/Nodes + tab <- do_exec_plan(x) if (as_data_frame) { df <- as.data.frame(tab) tab$invalidate() @@ -47,16 +36,71 @@ collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) { x } } -collect.Dataset <- function(x, ...) dplyr::collect(arrow_dplyr_query(x), ...) +collect.Dataset <- function(x, ...) dplyr::collect(as_adq(x), ...) compute.arrow_dplyr_query <- function(x, ...) dplyr::collect(x, as_data_frame = FALSE) compute.ArrowTabular <- function(x, ...) x compute.Dataset <- compute.arrow_dplyr_query pull.arrow_dplyr_query <- function(.data, var = -1) { - .data <- arrow_dplyr_query(.data) + .data <- as_adq(.data) var <- vars_pull(names(.data), !!enquo(var)) .data$selected_columns <- set_names(.data$selected_columns[var], var) dplyr::collect(.data)[[1]] } pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query + +# TODO: Correctly handle group_vars after summarize; also in collapse() +restore_dplyr_features <- function(df, query) { + # An arrow_dplyr_query holds some attributes that Arrow doesn't know about + # After calling collect(), make sure these features are carried over + + if (length(query$group_by_vars) > 0) { + # Preserve groupings, if present + if (is.data.frame(df)) { + df <- dplyr::grouped_df( + df, + dplyr::group_vars(query), + drop = dplyr::group_by_drop_default(query) + ) + } else { + # This is a Table, via compute() or collect(as_data_frame = FALSE) + df <- as_adq(df) + df$group_by_vars <- query$group_by_vars + df$drop_empty_groups <- query$drop_empty_groups + } + } + df +} + +collapse.arrow_dplyr_query <- function(x, ...) { + # Figure out what schema will result from the query + x$schema <- implicit_schema(x) + # Nest inside a new arrow_dplyr_query + arrow_dplyr_query(x) +} +collapse.Dataset <- collapse.ArrowTabular <- function(x, ...) { + arrow_dplyr_query(x) +} + +implicit_schema <- function(.data) { + .data <- ensure_group_vars(.data) + old_schm <- .data$.data$schema + + if (is.null(.data$aggregations)) { + new_fields <- map(.data$selected_columns, ~ .$type(old_schm)) + } else { + new_fields <- map(summarize_projection(.data), ~ .$type(old_schm)) + # * Put group_by_vars first (this can't be done by summarize, + # they have to be last per the aggregate node signature, + # and they get projected to this order after aggregation) + # * Infer the output types from the aggregations + group_fields <- new_fields[.data$group_by_vars] + agg_fields <- imap( + new_fields[setdiff(names(new_fields), .data$group_by_vars)], + ~ output_type(.data$aggregations[[.y]][["fun"]], .x) + ) + new_fields <- c(group_fields, agg_fields) + } + schema(!!!new_fields) +} diff --git a/r/R/dplyr-filter.R b/r/R/dplyr-filter.R index a44750a9c81..61f27010e77 100644 --- a/r/R/dplyr-filter.R +++ b/r/R/dplyr-filter.R @@ -26,7 +26,7 @@ filter.arrow_dplyr_query <- function(.data, ..., .preserve = FALSE) { return(.data) } - .data <- arrow_dplyr_query(.data) + .data <- as_adq(.data) # tidy-eval the filter expressions inside an Arrow data_mask filters <- lapply(filts, arrow_eval, arrow_mask(.data)) bad_filters <- map_lgl(filters, ~ inherits(., "try-error")) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index e535546dd1b..72731216f50 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -840,3 +840,18 @@ agg_funcs$n <- function() { options = list() ) } + +output_type <- function(fun, input_type) { + # These are quick and dirty heuristics. + if (fun %in% c("any", "all")) { + bool() + } else if (fun %in% "sum") { + # It may upcast to a bigger type but this is close enough + input_type + } else if (fun %in% c("mean", "stddev", "variance")) { + float64() + } else { + # Just so things don't error, assume the resulting type is the same + input_type + } +} diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R index 42cca039022..a89144d6c4a 100644 --- a/r/R/dplyr-group-by.R +++ b/r/R/dplyr-group-by.R @@ -23,7 +23,7 @@ group_by.arrow_dplyr_query <- function(.data, .add = FALSE, add = .add, .drop = dplyr::group_by_drop_default(.data)) { - .data <- arrow_dplyr_query(.data) + .data <- as_adq(.data) new_groups <- enquos(...) # ... can contain expressions (i.e. can add (or rename?) columns) and so we # need to identify those and add them on to the query with mutate. Specifically, diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index f19505c1958..051c5254e50 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -24,7 +24,7 @@ mutate.arrow_dplyr_query <- function(.data, .before = NULL, .after = NULL) { call <- match.call() - exprs <- quos(...) + exprs <- ensure_named_exprs(quos(...)) .keep <- match.arg(.keep) .before <- enquo(.before) @@ -35,7 +35,7 @@ mutate.arrow_dplyr_query <- function(.data, return(.data) } - .data <- arrow_dplyr_query(.data) + .data <- as_adq(.data) # Restrict the cases we support for now if (length(dplyr::group_vars(.data)) > 0) { @@ -45,11 +45,6 @@ mutate.arrow_dplyr_query <- function(.data, return(abandon_ship(call, .data, "mutate() on grouped data not supported in Arrow")) } - # Check for unnamed expressions and fix if any - unnamed <- !nzchar(names(exprs)) - # Deparse and take the first element in case they're long expressions - names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label) - mask <- arrow_mask(.data) results <- list() for (i in seq_along(exprs)) { @@ -133,3 +128,11 @@ check_transmute_args <- function(..., .keep, .before, .after) { } enquos(...) } + +ensure_named_exprs <- function(exprs) { + # Check for unnamed expressions and fix if any + unnamed <- !nzchar(names(exprs)) + # Deparse and take the first element in case they're long expressions + names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label) + exprs +} diff --git a/r/R/dplyr-select.R b/r/R/dplyr-select.R index ee740db4cfb..9a867ced964 100644 --- a/r/R/dplyr-select.R +++ b/r/R/dplyr-select.R @@ -22,13 +22,13 @@ tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns) select.arrow_dplyr_query <- function(.data, ...) { check_select_helpers(enexprs(...)) - column_select(arrow_dplyr_query(.data), !!!enquos(...)) + column_select(as_adq(.data), !!!enquos(...)) } select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query rename.arrow_dplyr_query <- function(.data, ...) { check_select_helpers(enexprs(...)) - column_select(arrow_dplyr_query(.data), !!!enquos(...), .FUN = vars_rename) + column_select(as_adq(.data), !!!enquos(...), .FUN = vars_rename) } rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query @@ -60,7 +60,7 @@ relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829 - .data <- arrow_dplyr_query(.data) + .data <- as_adq(.data) # Assign the schema to the expressions map(.data$selected_columns, ~ (.$schema <- .data$.data$schema)) diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 394e5fe2ac9..cd93e28f07e 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -20,7 +20,7 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb")) { call <- match.call() - .data <- arrow_dplyr_query(.data) + .data <- as_adq(.data) exprs <- quos(...) # Only retain the columns we need to do our aggregations vars_to_keep <- unique(c( @@ -47,11 +47,7 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) { # ARROW-13550 abort("`summarize()` with `.groups` argument not supported in Arrow") } - exprs <- quos(...) - # Check for unnamed expressions and fix if any - unnamed <- !nzchar(names(exprs)) - # Deparse and take the first element in case they're long expressions - names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label) + exprs <- ensure_named_exprs(quos(...)) mask <- arrow_mask(.data, aggregation = TRUE) @@ -68,61 +64,20 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) { ) stop(msg, call. = FALSE) } - # Put it in the data mask too? - # Should we: mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]] } - # Now, from that, split out the data (expressions) and options - .data$aggregations <- lapply(results, function(x) x[c("fun", "options")]) - - inputs <- lapply(results, function(x) x$data) - # This is essentially a projection, and the column names don't matter - # (but must exist) - names(inputs) <- as.character(seq_along(inputs)) - .data$selected_columns <- inputs - - # Eventually, we will return .data here if (dataset) but do it eagerly now - do_exec_plan(.data, group_vars = dplyr::group_vars(.data)) + .data$aggregations <- results + # TODO: should in-memory query evaluate eagerly? + collapse.arrow_dplyr_query(.data) } -do_exec_plan <- function(.data, group_vars = NULL) { - plan <- ExecPlan$create() - - grouped <- length(group_vars) > 0 - - # Collect the target names first because we have to add back the group vars - target_names <- names(.data) - - if (grouped) { - .data <- ensure_group_vars(.data) - # We also need to prefix all of the aggregation function names with "hash_" - .data$aggregations <- lapply(.data$aggregations, function(x) { - x[["fun"]] <- paste0("hash_", x[["fun"]]) - x - }) - } - - start_node <- plan$Scan(.data) - # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again - if (inherits(.data$filtered_rows, "Expression")) { - start_node <- start_node$Filter(.data$filtered_rows) - } - # If any columns are derived we need to Project (otherwise this may be no-op) - project_node <- start_node$Project(.data$selected_columns) - - final_node <- project_node$Aggregate( - options = .data$aggregations, - target_names = target_names, - out_field_names = names(.data$aggregations), - key_names = group_vars +summarize_projection <- function(.data) { + c( + map(.data$aggregations, ~ .$data), + .data$selected_columns[.data$group_by_vars] ) +} - out <- plan$Run(final_node) - if (grouped) { - # The result will have result columns first then the grouping cols. - # dplyr orders group cols first, so adapt the result to meet that expectation. - n_results <- length(.data$aggregations) - out <- out[c((n_results + 1):ncol(out), seq_along(.data$aggregations))] - } - out +format_aggregation <- function(x) { + paste0(x$fun, "(", x$data$ToString(), ")") } diff --git a/r/R/dplyr.R b/r/R/dplyr.R index b2793bdb3c3..199120887b9 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -23,14 +23,10 @@ arrow_dplyr_query <- function(.data) { # An arrow_dplyr_query is a container for an Arrow data object (Table, # RecordBatch, or Dataset) and the state of the user's dplyr query--things # like selected columns, filters, and group vars. - - # For most dplyr methods, - # method.Table == method.RecordBatch == method.Dataset == method.arrow_dplyr_query - # This works because the functions all pass .data through arrow_dplyr_query() - if (inherits(.data, "arrow_dplyr_query")) { - return(.data) + # An arrow_dplyr_query can contain another arrow_dplyr_query in .data + if (!inherits(.data, c("Dataset", "arrow_dplyr_query"))) { + .data <- InMemoryDataset$create(.data) } - # Evaluating expressions on a dataset with duplicated fieldnames will error dupes <- duplicated(names(.data)) if (any(dupes)) { @@ -42,19 +38,14 @@ arrow_dplyr_query <- function(.data) { ) )) } - structure( list( - .data = if (inherits(.data, "Dataset")) { - .data$clone() - } else { - InMemoryDataset$create(.data) - }, + .data = .data, # selected_columns is a named list: # * contents are references/expressions pointing to the data # * names are the names they should be in the end (i.e. this # records any renaming) - selected_columns = make_field_refs(names(.data)), + selected_columns = make_field_refs(names(.data$schema)), # filtered_rows will be an Expression filtered_rows = TRUE, # group_by_vars is a character vector of columns (as renamed) @@ -75,6 +66,21 @@ arrow_dplyr_query <- function(.data) { ) } +# The only difference between `arrow_dplyr_query()` and `as_adq()` is that if +# `.data` is already an `arrow_dplyr_query`, `as_adq()`, will return it as is, but +# `arrow_dplyr_query()` will nest it inside a new `arrow_dplyr_query`. The only +# place where `arrow_dplyr_query()` should be called directly is inside +# `collapse()` methods; everywhere else, call `as_adq()`. +as_adq <- function(.data) { + # For most dplyr methods, + # method.Table == method.RecordBatch == method.Dataset == method.arrow_dplyr_query + # This works because the functions all pass .data through as_adq() + if (inherits(.data, "arrow_dplyr_query")) { + return(.data) + } + arrow_dplyr_query(.data) +} + make_field_refs <- function(field_names) { set_names(lapply(field_names, Expression$field_ref), field_names) } @@ -96,9 +102,14 @@ print.arrow_dplyr_query <- function(x, ...) { } }) fields <- paste(names(types), types, sep = ": ", collapse = "\n") - cat(class(x$.data)[1], " (query)\n", sep = "") + cat(class(source_data(x))[1], " (query)\n", sep = "") cat(fields, "\n", sep = "") cat("\n") + if (length(x$aggregations)) { + cat("* Aggregations:\n") + aggs <- paste0(names(x$aggregations), ": ", map_chr(x$aggregations, format_aggregation), collapse = "\n") + cat(aggs, "\n", sep = "") + } if (!isTRUE(x$filtered_rows)) { filter_string <- x$filtered_rows$ToString() cat("* Filter: ", filter_string, "\n", sep = "") @@ -133,7 +144,10 @@ names.arrow_dplyr_query <- function(x) names(x$selected_columns) dim.arrow_dplyr_query <- function(x) { cols <- length(names(x)) - if (isTRUE(x$filtered)) { + if (is_collapsed(x)) { + # Don't evaluate just for nrow + rows <- NA_integer_ + } else if (isTRUE(x$filtered)) { rows <- x$.data$num_rows } else { rows <- Scanner$create(x)$CountRows() @@ -148,12 +162,14 @@ as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALS #' @export head.arrow_dplyr_query <- function(x, n = 6L, ...) { + # TODO (ARROW-13893): refactor out <- head.Dataset(x, n, ...) restore_dplyr_features(out, x) } #' @export tail.arrow_dplyr_query <- function(x, n = 6L, ...) { + # TODO (ARROW-13893): refactor out <- tail.Dataset(x, n, ...) restore_dplyr_features(out, x) } @@ -161,6 +177,7 @@ tail.arrow_dplyr_query <- function(x, n = 6L, ...) { #' @export `[.arrow_dplyr_query` <- `[.Dataset` # TODO: ^ should also probably restore_dplyr_features, and/or that should be moved down +# TODO (ARROW-13893): refactor ensure_group_vars <- function(x) { if (inherits(x, "arrow_dplyr_query")) { @@ -191,42 +208,30 @@ ensure_arrange_vars <- function(x) { x } -restore_dplyr_features <- function(df, query) { - # An arrow_dplyr_query holds some attributes that Arrow doesn't know about - # After calling collect(), make sure these features are carried over - - if (length(query$group_by_vars) > 0) { - # Preserve groupings, if present - if (is.data.frame(df)) { - df <- dplyr::grouped_df( - df, - dplyr::group_vars(query), - drop = dplyr::group_by_drop_default(query) - ) - } else { - # This is a Table, via compute() or collect(as_data_frame = FALSE) - df <- arrow_dplyr_query(df) - df$group_by_vars <- query$group_by_vars - df$drop_empty_groups <- query$drop_empty_groups - } - } - df -} - # Helper to handle unsupported dplyr features # * For Table/RecordBatch, we collect() and then call the dplyr method in R # * For Dataset, we just error abandon_ship <- function(call, .data, msg) { + msg <- trimws(msg) dplyr_fun_name <- sub("^(.*?)\\..*", "\\1", as.character(call[[1]])) if (query_on_dataset(.data)) { stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE) } # else, collect and call dplyr method - msg <- sub("\\n$", "", msg) warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE) call$.data <- dplyr::collect(.data) call[[1]] <- get(dplyr_fun_name, envir = asNamespace("dplyr")) eval.parent(call, 2) } -query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset") +query_on_dataset <- function(x) !inherits(source_data(x), "InMemoryDataset") + +source_data <- function(x) { + if (is_collapsed(x)) { + source_data(x$.data) + } else { + x$.data + } +} + +is_collapsed <- function(x) inherits(x$.data, "arrow_dplyr_query") diff --git a/r/R/duckdb.R b/r/R/duckdb.R index edef5cdc143..87d1b2cfad6 100644 --- a/r/R/duckdb.R +++ b/r/R/duckdb.R @@ -60,7 +60,7 @@ to_duckdb <- function(.data, con = arrow_duck_connection(), table_name = unique_arrow_tablename(), auto_disconnect = TRUE) { - .data <- arrow_dplyr_query(.data) + .data <- as_adq(.data) duckdb::duckdb_register_arrow(con, table_name, .data) tbl <- tbl(con, table_name) diff --git a/r/R/expression.R b/r/R/expression.R index aa9af9270c9..82e21ccf2e1 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -125,6 +125,9 @@ Expression <- R6Class("Expression", inherit = ArrowObject, public = list( ToString = function() compute___expr__ToString(self), + Equals = function(other, ...) { + inherits(other, "Expression") && compute___expr__equals(self, other) + }, # TODO: Implement type determination without storing # schemas in Expression objects (ARROW-13186) schema = NULL, diff --git a/r/R/query-engine.R b/r/R/query-engine.R index 4de2f87165b..a96378671af 100644 --- a/r/R/query-engine.R +++ b/r/R/query-engine.R @@ -15,6 +15,19 @@ # specific language governing permissions and limitations # under the License. +do_exec_plan <- function(.data) { + plan <- ExecPlan$create() + final_node <- plan$Build(.data) + tab <- plan$Run(final_node) + + if (length(final_node$sort$temp_columns) > 0) { + # If arrange() created $temp_columns, make sure to omit them from the result + tab <- tab[, setdiff(names(tab), final_node$sort$temp_columns), drop = FALSE] + } + + tab +} + ExecPlan <- R6Class("ExecPlan", inherit = ArrowObject, public = list( @@ -31,6 +44,7 @@ ExecPlan <- R6Class("ExecPlan", field_names_in_expression ))) dataset <- dataset$.data + assert_is(dataset, "Dataset") } else { if (inherits(dataset, "ArrowTabular")) { dataset <- InMemoryDataset$create(dataset) @@ -42,11 +56,97 @@ ExecPlan <- R6Class("ExecPlan", } # ScanNode needs the filter to do predicate pushdown and skip partitions, # and it needs to know which fields to materialize (and which are unnecessary) - ExecNode_Scan(self, dataset, filter, colnames) + ExecNode_Scan(self, dataset, filter, colnames %||% character(0)) + }, + Build = function(.data) { + # This method takes an arrow_dplyr_query and chains together the + # ExecNodes that they produce. It does not evaluate them--that is Run(). + group_vars <- dplyr::group_vars(.data) + grouped <- length(group_vars) > 0 + + # Collect the target names first because we have to add back the group vars + target_names <- names(.data) + .data <- ensure_group_vars(.data) + .data <- ensure_arrange_vars(.data) # this sets .data$temp_columns + + if (inherits(.data$.data, "arrow_dplyr_query")) { + # We have a nested query. Recurse. + node <- self$Build(.data$.data) + } else { + node <- self$Scan(.data) + } + + # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again + if (inherits(.data$filtered_rows, "Expression")) { + node <- node$Filter(.data$filtered_rows) + } + + if (!is.null(.data$aggregations)) { + # Project to include just the data required for each aggregation, + # plus group_by_vars (last) + # TODO: validate that none of names(aggregations) are the same as names(group_by_vars) + # dplyr does not error on this but the result it gives isn't great + node <- node$Project(summarize_projection(.data)) + + if (grouped) { + # We need to prefix all of the aggregation function names with "hash_" + .data$aggregations <- lapply(.data$aggregations, function(x) { + x[["fun"]] <- paste0("hash_", x[["fun"]]) + x + }) + } + + node <- node$Aggregate( + options = map(.data$aggregations, ~ .[c("fun", "options")]), + target_names = names(.data$aggregations), + out_field_names = names(.data$aggregations), + key_names = group_vars + ) + + if (grouped) { + # The result will have result columns first then the grouping cols. + # dplyr orders group cols first, so adapt the result to meet that expectation. + node <- node$Project( + make_field_refs(c(group_vars, names(.data$aggregations))) + ) + if (getOption("arrow.summarise.sort", FALSE)) { + # Add sorting instructions for the rows too to match dplyr + # (see below about why sorting isn't itself a Node) + node$sort <- list( + names = group_vars, + orders = rep(0L, length(group_vars)) + ) + } + } + } else { + # If any columns are derived, reordered, or renamed we need to Project + # If there are aggregations, the projection was already handled above + # We have to project at least once to eliminate some junk columns + # that the ExecPlan adds: + # __fragment_index, __batch_index, __last_in_fragment + # Presumably extraneous repeated projection of the same thing + # (as when we've done collapse() and not projected after) is cheap/no-op + projection <- c(.data$selected_columns, .data$temp_columns) + node <- node$Project(projection) + } + + # Apply sorting: this is currently not an ExecNode itself, it is a + # sink node option. + # TODO: handle some cases: + # (1) arrange > summarize > arrange + # (2) ARROW-13779: arrange then operation where order matters (e.g. cumsum) + if (length(.data$arrange_vars)) { + node$sort <- list( + names = names(.data$arrange_vars), + orders = as.integer(.data$arrange_desc), + temp_columns = names(.data$temp_columns) + ) + } + node }, Run = function(node) { assert_is(node, "ExecNode") - ExecPlan_run(self, node) + ExecPlan_run(self, node, node$sort %||% list()) } ) ) @@ -57,16 +157,30 @@ ExecPlan$create <- function(use_threads = option_use_threads()) { ExecNode <- R6Class("ExecNode", inherit = ArrowObject, public = list( + # `sort` is a slight hack to be able to keep around arrange() params, + # which don't currently yield their own ExecNode but rather are consumed + # in the SinkNode (in ExecPlan$run()) + sort = NULL, + preserve_sort = function(new_node) { + new_node$sort <- self$sort + new_node + }, Project = function(cols) { - assert_is_list_of(cols, "Expression") - ExecNode_Project(self, cols, names(cols)) + if (length(cols)) { + assert_is_list_of(cols, "Expression") + self$preserve_sort(ExecNode_Project(self, cols, names(cols))) + } else { + self$preserve_sort(ExecNode_Project(self, character(0), character(0))) + } }, Filter = function(expr) { assert_is(expr, "Expression") - ExecNode_Filter(self, expr) + self$preserve_sort(ExecNode_Filter(self, expr)) }, Aggregate = function(options, target_names, out_field_names, key_names) { - ExecNode_Aggregate(self, options, target_names, out_field_names, key_names) + self$preserve_sort( + ExecNode_Aggregate(self, options, target_names, out_field_names, key_names) + ) } ) ) diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index cb69ce17442..f33b81c08f0 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1094,16 +1094,17 @@ extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){ // compute-exec.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr ExecPlan_run(const std::shared_ptr& plan, const std::shared_ptr& final_node); -extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){ +std::shared_ptr ExecPlan_run(const std::shared_ptr& plan, const std::shared_ptr& final_node, cpp11::list sort_options); +extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp, SEXP sort_options_sexp){ BEGIN_CPP11 arrow::r::Input&>::type plan(plan_sexp); arrow::r::Input&>::type final_node(final_node_sexp); - return cpp11::as_sexp(ExecPlan_run(plan, final_node)); + arrow::r::Input::type sort_options(sort_options_sexp); + return cpp11::as_sexp(ExecPlan_run(plan, final_node, sort_options)); END_CPP11 } #else -extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){ +extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp, SEXP sort_options_sexp){ Rf_error("Cannot call ExecPlan_run(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif @@ -3173,6 +3174,22 @@ extern "C" SEXP _arrow_FixedSizeListType__list_size(SEXP type_sexp){ } #endif +// expression.cpp +#if defined(ARROW_R_WITH_ARROW) +bool compute___expr__equals(const std::shared_ptr& lhs, const std::shared_ptr& rhs); +extern "C" SEXP _arrow_compute___expr__equals(SEXP lhs_sexp, SEXP rhs_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type lhs(lhs_sexp); + arrow::r::Input&>::type rhs(rhs_sexp); + return cpp11::as_sexp(compute___expr__equals(lhs, rhs)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_compute___expr__equals(SEXP lhs_sexp, SEXP rhs_sexp){ + Rf_error("Cannot call compute___expr__equals(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // expression.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr compute___expr__call(std::string func_name, cpp11::list argument_list, cpp11::list options); @@ -7035,450 +7052,451 @@ static const R_CallMethodDef CallEntries[] = { { "_parquet_available", (DL_FUNC)& _parquet_available, 0 }, { "_s3_available", (DL_FUNC)& _s3_available, 0 }, { "_json_available", (DL_FUNC)& _json_available, 0 }, - { "_arrow_is_altrep", (DL_FUNC) &_arrow_is_altrep, 1}, - { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, - { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, - { "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, - { "_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2}, - { "_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1}, - { "_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1}, - { "_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1}, - { "_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1}, - { "_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1}, - { "_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1}, - { "_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2}, - { "_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2}, - { "_arrow_Array__Diff", (DL_FUNC) &_arrow_Array__Diff, 2}, - { "_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1}, - { "_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5}, - { "_arrow_Array__View", (DL_FUNC) &_arrow_Array__View, 2}, - { "_arrow_Array__Validate", (DL_FUNC) &_arrow_Array__Validate, 1}, - { "_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1}, - { "_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1}, - { "_arrow_StructArray__field", (DL_FUNC) &_arrow_StructArray__field, 2}, - { "_arrow_StructArray__GetFieldByName", (DL_FUNC) &_arrow_StructArray__GetFieldByName, 2}, - { "_arrow_StructArray__Flatten", (DL_FUNC) &_arrow_StructArray__Flatten, 1}, - { "_arrow_ListArray__value_type", (DL_FUNC) &_arrow_ListArray__value_type, 1}, - { "_arrow_LargeListArray__value_type", (DL_FUNC) &_arrow_LargeListArray__value_type, 1}, - { "_arrow_ListArray__values", (DL_FUNC) &_arrow_ListArray__values, 1}, - { "_arrow_LargeListArray__values", (DL_FUNC) &_arrow_LargeListArray__values, 1}, - { "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2}, - { "_arrow_LargeListArray__value_length", (DL_FUNC) &_arrow_LargeListArray__value_length, 2}, - { "_arrow_FixedSizeListArray__value_length", (DL_FUNC) &_arrow_FixedSizeListArray__value_length, 2}, - { "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2}, - { "_arrow_LargeListArray__value_offset", (DL_FUNC) &_arrow_LargeListArray__value_offset, 2}, - { "_arrow_FixedSizeListArray__value_offset", (DL_FUNC) &_arrow_FixedSizeListArray__value_offset, 2}, - { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, - { "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, - { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, - { "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 2}, - { "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, - { "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, - { "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, - { "_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, - { "_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, - { "_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset, 1}, - { "_arrow_ArrayData__buffers", (DL_FUNC) &_arrow_ArrayData__buffers, 1}, - { "_arrow_Buffer__is_mutable", (DL_FUNC) &_arrow_Buffer__is_mutable, 1}, - { "_arrow_Buffer__ZeroPadding", (DL_FUNC) &_arrow_Buffer__ZeroPadding, 1}, - { "_arrow_Buffer__capacity", (DL_FUNC) &_arrow_Buffer__capacity, 1}, - { "_arrow_Buffer__size", (DL_FUNC) &_arrow_Buffer__size, 1}, - { "_arrow_r___RBuffer__initialize", (DL_FUNC) &_arrow_r___RBuffer__initialize, 1}, - { "_arrow_Buffer__data", (DL_FUNC) &_arrow_Buffer__data, 1}, - { "_arrow_Buffer__Equals", (DL_FUNC) &_arrow_Buffer__Equals, 2}, - { "_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1}, - { "_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1}, - { "_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1}, - { "_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2}, - { "_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1}, - { "_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1}, - { "_arrow_ChunkedArray__Slice1", (DL_FUNC) &_arrow_ChunkedArray__Slice1, 2}, - { "_arrow_ChunkedArray__Slice2", (DL_FUNC) &_arrow_ChunkedArray__Slice2, 3}, - { "_arrow_ChunkedArray__View", (DL_FUNC) &_arrow_ChunkedArray__View, 2}, - { "_arrow_ChunkedArray__Validate", (DL_FUNC) &_arrow_ChunkedArray__Validate, 1}, - { "_arrow_ChunkedArray__Equals", (DL_FUNC) &_arrow_ChunkedArray__Equals, 2}, - { "_arrow_ChunkedArray__ToString", (DL_FUNC) &_arrow_ChunkedArray__ToString, 1}, - { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, - { "_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 2}, - { "_arrow_util___Codec__name", (DL_FUNC) &_arrow_util___Codec__name, 1}, - { "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, - { "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, - { "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, - { "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 1}, - { "_arrow_ExecPlan_run", (DL_FUNC) &_arrow_ExecPlan_run, 2}, - { "_arrow_ExecNode_Scan", (DL_FUNC) &_arrow_ExecNode_Scan, 4}, - { "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, - { "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, - { "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 5}, - { "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, - { "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, - { "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, - { "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, - { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, - { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, - { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, - { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, - { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, - { "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, - { "_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1}, - { "_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4}, - { "_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1}, - { "_arrow_TimestampParser__kind", (DL_FUNC) &_arrow_TimestampParser__kind, 1}, - { "_arrow_TimestampParser__format", (DL_FUNC) &_arrow_TimestampParser__format, 1}, - { "_arrow_TimestampParser__MakeStrptime", (DL_FUNC) &_arrow_TimestampParser__MakeStrptime, 1}, - { "_arrow_TimestampParser__MakeISO8601", (DL_FUNC) &_arrow_TimestampParser__MakeISO8601, 0}, - { "_arrow_csv___WriteCSV__Table", (DL_FUNC) &_arrow_csv___WriteCSV__Table, 3}, - { "_arrow_csv___WriteCSV__RecordBatch", (DL_FUNC) &_arrow_csv___WriteCSV__RecordBatch, 3}, - { "_arrow_dataset___Dataset__NewScan", (DL_FUNC) &_arrow_dataset___Dataset__NewScan, 1}, - { "_arrow_dataset___Dataset__schema", (DL_FUNC) &_arrow_dataset___Dataset__schema, 1}, - { "_arrow_dataset___Dataset__type_name", (DL_FUNC) &_arrow_dataset___Dataset__type_name, 1}, - { "_arrow_dataset___Dataset__ReplaceSchema", (DL_FUNC) &_arrow_dataset___Dataset__ReplaceSchema, 2}, - { "_arrow_dataset___UnionDataset__create", (DL_FUNC) &_arrow_dataset___UnionDataset__create, 2}, - { "_arrow_dataset___InMemoryDataset__create", (DL_FUNC) &_arrow_dataset___InMemoryDataset__create, 1}, - { "_arrow_dataset___UnionDataset__children", (DL_FUNC) &_arrow_dataset___UnionDataset__children, 1}, - { "_arrow_dataset___FileSystemDataset__format", (DL_FUNC) &_arrow_dataset___FileSystemDataset__format, 1}, - { "_arrow_dataset___FileSystemDataset__filesystem", (DL_FUNC) &_arrow_dataset___FileSystemDataset__filesystem, 1}, - { "_arrow_dataset___FileSystemDataset__files", (DL_FUNC) &_arrow_dataset___FileSystemDataset__files, 1}, - { "_arrow_dataset___DatasetFactory__Finish1", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish1, 2}, - { "_arrow_dataset___DatasetFactory__Finish2", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish2, 2}, - { "_arrow_dataset___DatasetFactory__Inspect", (DL_FUNC) &_arrow_dataset___DatasetFactory__Inspect, 2}, - { "_arrow_dataset___UnionDatasetFactory__Make", (DL_FUNC) &_arrow_dataset___UnionDatasetFactory__Make, 1}, - { "_arrow_dataset___FileSystemDatasetFactory__Make0", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make0, 3}, - { "_arrow_dataset___FileSystemDatasetFactory__Make2", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make2, 4}, - { "_arrow_dataset___FileSystemDatasetFactory__Make1", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make1, 3}, - { "_arrow_dataset___FileSystemDatasetFactory__Make3", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make3, 4}, - { "_arrow_dataset___FileFormat__type_name", (DL_FUNC) &_arrow_dataset___FileFormat__type_name, 1}, - { "_arrow_dataset___FileFormat__DefaultWriteOptions", (DL_FUNC) &_arrow_dataset___FileFormat__DefaultWriteOptions, 1}, - { "_arrow_dataset___ParquetFileFormat__Make", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__Make, 2}, - { "_arrow_dataset___FileWriteOptions__type_name", (DL_FUNC) &_arrow_dataset___FileWriteOptions__type_name, 1}, - { "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3}, - { "_arrow_dataset___IpcFileWriteOptions__update2", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update2, 4}, - { "_arrow_dataset___IpcFileWriteOptions__update1", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update1, 3}, - { "_arrow_dataset___CsvFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___CsvFileWriteOptions__update, 2}, - { "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0}, - { "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 3}, - { "_arrow_dataset___FragmentScanOptions__type_name", (DL_FUNC) &_arrow_dataset___FragmentScanOptions__type_name, 1}, - { "_arrow_dataset___CsvFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___CsvFragmentScanOptions__Make, 2}, - { "_arrow_dataset___ParquetFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___ParquetFragmentScanOptions__Make, 3}, - { "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 2}, - { "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 2}, - { "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 3}, - { "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 2}, - { "_arrow_dataset___ScannerBuilder__ProjectNames", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectNames, 2}, - { "_arrow_dataset___ScannerBuilder__ProjectExprs", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectExprs, 3}, - { "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2}, - { "_arrow_dataset___ScannerBuilder__UseThreads", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseThreads, 2}, - { "_arrow_dataset___ScannerBuilder__UseAsync", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseAsync, 2}, - { "_arrow_dataset___ScannerBuilder__BatchSize", (DL_FUNC) &_arrow_dataset___ScannerBuilder__BatchSize, 2}, - { "_arrow_dataset___ScannerBuilder__FragmentScanOptions", (DL_FUNC) &_arrow_dataset___ScannerBuilder__FragmentScanOptions, 2}, - { "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1}, - { "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1}, - { "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, - { "_arrow_dataset___Scanner__ScanBatches", (DL_FUNC) &_arrow_dataset___Scanner__ScanBatches, 1}, - { "_arrow_dataset___Scanner__ToRecordBatchReader", (DL_FUNC) &_arrow_dataset___Scanner__ToRecordBatchReader, 1}, - { "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, - { "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, - { "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, - { "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, - { "_arrow_dataset___Scanner__TakeRows", (DL_FUNC) &_arrow_dataset___Scanner__TakeRows, 2}, - { "_arrow_dataset___Scanner__CountRows", (DL_FUNC) &_arrow_dataset___Scanner__CountRows, 1}, - { "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, - { "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, - { "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, - { "_arrow_Int64__initialize", (DL_FUNC) &_arrow_Int64__initialize, 0}, - { "_arrow_UInt8__initialize", (DL_FUNC) &_arrow_UInt8__initialize, 0}, - { "_arrow_UInt16__initialize", (DL_FUNC) &_arrow_UInt16__initialize, 0}, - { "_arrow_UInt32__initialize", (DL_FUNC) &_arrow_UInt32__initialize, 0}, - { "_arrow_UInt64__initialize", (DL_FUNC) &_arrow_UInt64__initialize, 0}, - { "_arrow_Float16__initialize", (DL_FUNC) &_arrow_Float16__initialize, 0}, - { "_arrow_Float32__initialize", (DL_FUNC) &_arrow_Float32__initialize, 0}, - { "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, - { "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, - { "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, - { "_arrow_LargeUtf8__initialize", (DL_FUNC) &_arrow_LargeUtf8__initialize, 0}, - { "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, - { "_arrow_LargeBinary__initialize", (DL_FUNC) &_arrow_LargeBinary__initialize, 0}, - { "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0}, - { "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, - { "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, - { "_arrow_Decimal128Type__initialize", (DL_FUNC) &_arrow_Decimal128Type__initialize, 2}, - { "_arrow_FixedSizeBinary__initialize", (DL_FUNC) &_arrow_FixedSizeBinary__initialize, 1}, - { "_arrow_Timestamp__initialize", (DL_FUNC) &_arrow_Timestamp__initialize, 2}, - { "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1}, - { "_arrow_Time64__initialize", (DL_FUNC) &_arrow_Time64__initialize, 1}, - { "_arrow_list__", (DL_FUNC) &_arrow_list__, 1}, - { "_arrow_large_list__", (DL_FUNC) &_arrow_large_list__, 1}, - { "_arrow_fixed_size_list__", (DL_FUNC) &_arrow_fixed_size_list__, 2}, - { "_arrow_struct__", (DL_FUNC) &_arrow_struct__, 1}, - { "_arrow_DataType__ToString", (DL_FUNC) &_arrow_DataType__ToString, 1}, - { "_arrow_DataType__name", (DL_FUNC) &_arrow_DataType__name, 1}, - { "_arrow_DataType__Equals", (DL_FUNC) &_arrow_DataType__Equals, 2}, - { "_arrow_DataType__num_fields", (DL_FUNC) &_arrow_DataType__num_fields, 1}, - { "_arrow_DataType__fields", (DL_FUNC) &_arrow_DataType__fields, 1}, - { "_arrow_DataType__id", (DL_FUNC) &_arrow_DataType__id, 1}, - { "_arrow_ListType__ToString", (DL_FUNC) &_arrow_ListType__ToString, 1}, - { "_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1}, - { "_arrow_DateType__unit", (DL_FUNC) &_arrow_DateType__unit, 1}, - { "_arrow_TimeType__unit", (DL_FUNC) &_arrow_TimeType__unit, 1}, - { "_arrow_DecimalType__precision", (DL_FUNC) &_arrow_DecimalType__precision, 1}, - { "_arrow_DecimalType__scale", (DL_FUNC) &_arrow_DecimalType__scale, 1}, - { "_arrow_TimestampType__timezone", (DL_FUNC) &_arrow_TimestampType__timezone, 1}, - { "_arrow_TimestampType__unit", (DL_FUNC) &_arrow_TimestampType__unit, 1}, - { "_arrow_DictionaryType__initialize", (DL_FUNC) &_arrow_DictionaryType__initialize, 3}, - { "_arrow_DictionaryType__index_type", (DL_FUNC) &_arrow_DictionaryType__index_type, 1}, - { "_arrow_DictionaryType__value_type", (DL_FUNC) &_arrow_DictionaryType__value_type, 1}, - { "_arrow_DictionaryType__name", (DL_FUNC) &_arrow_DictionaryType__name, 1}, - { "_arrow_DictionaryType__ordered", (DL_FUNC) &_arrow_DictionaryType__ordered, 1}, - { "_arrow_StructType__GetFieldByName", (DL_FUNC) &_arrow_StructType__GetFieldByName, 2}, - { "_arrow_StructType__GetFieldIndex", (DL_FUNC) &_arrow_StructType__GetFieldIndex, 2}, - { "_arrow_StructType__field_names", (DL_FUNC) &_arrow_StructType__field_names, 1}, - { "_arrow_ListType__value_field", (DL_FUNC) &_arrow_ListType__value_field, 1}, - { "_arrow_ListType__value_type", (DL_FUNC) &_arrow_ListType__value_type, 1}, - { "_arrow_LargeListType__value_field", (DL_FUNC) &_arrow_LargeListType__value_field, 1}, - { "_arrow_LargeListType__value_type", (DL_FUNC) &_arrow_LargeListType__value_type, 1}, - { "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, - { "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, - { "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, - { "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3}, - { "_arrow_field_names_in_expression", (DL_FUNC) &_arrow_field_names_in_expression, 1}, - { "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, - { "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, - { "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, - { "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, - { "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2}, - { "_arrow_compute___expr__type_id", (DL_FUNC) &_arrow_compute___expr__type_id, 2}, - { "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, - { "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, - { "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, - { "_arrow_ipc___feather___Reader__Open", (DL_FUNC) &_arrow_ipc___feather___Reader__Open, 1}, - { "_arrow_ipc___feather___Reader__schema", (DL_FUNC) &_arrow_ipc___feather___Reader__schema, 1}, - { "_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, - { "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, - { "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, - { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2}, - { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, - { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, - { "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, - { "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, - { "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, - { "_arrow_fs___FileInfo__set_path", (DL_FUNC) &_arrow_fs___FileInfo__set_path, 2}, - { "_arrow_fs___FileInfo__size", (DL_FUNC) &_arrow_fs___FileInfo__size, 1}, - { "_arrow_fs___FileInfo__set_size", (DL_FUNC) &_arrow_fs___FileInfo__set_size, 2}, - { "_arrow_fs___FileInfo__base_name", (DL_FUNC) &_arrow_fs___FileInfo__base_name, 1}, - { "_arrow_fs___FileInfo__extension", (DL_FUNC) &_arrow_fs___FileInfo__extension, 1}, - { "_arrow_fs___FileInfo__mtime", (DL_FUNC) &_arrow_fs___FileInfo__mtime, 1}, - { "_arrow_fs___FileInfo__set_mtime", (DL_FUNC) &_arrow_fs___FileInfo__set_mtime, 2}, - { "_arrow_fs___FileSelector__base_dir", (DL_FUNC) &_arrow_fs___FileSelector__base_dir, 1}, - { "_arrow_fs___FileSelector__allow_not_found", (DL_FUNC) &_arrow_fs___FileSelector__allow_not_found, 1}, - { "_arrow_fs___FileSelector__recursive", (DL_FUNC) &_arrow_fs___FileSelector__recursive, 1}, - { "_arrow_fs___FileSelector__create", (DL_FUNC) &_arrow_fs___FileSelector__create, 3}, - { "_arrow_fs___FileSystem__GetTargetInfos_Paths", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_Paths, 2}, - { "_arrow_fs___FileSystem__GetTargetInfos_FileSelector", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_FileSelector, 2}, - { "_arrow_fs___FileSystem__CreateDir", (DL_FUNC) &_arrow_fs___FileSystem__CreateDir, 3}, - { "_arrow_fs___FileSystem__DeleteDir", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDir, 2}, - { "_arrow_fs___FileSystem__DeleteDirContents", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDirContents, 2}, - { "_arrow_fs___FileSystem__DeleteFile", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFile, 2}, - { "_arrow_fs___FileSystem__DeleteFiles", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFiles, 2}, - { "_arrow_fs___FileSystem__Move", (DL_FUNC) &_arrow_fs___FileSystem__Move, 3}, - { "_arrow_fs___FileSystem__CopyFile", (DL_FUNC) &_arrow_fs___FileSystem__CopyFile, 3}, - { "_arrow_fs___FileSystem__OpenInputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputStream, 2}, - { "_arrow_fs___FileSystem__OpenInputFile", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputFile, 2}, - { "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2}, - { "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2}, - { "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1}, - { "_arrow_fs___LocalFileSystem__create", (DL_FUNC) &_arrow_fs___LocalFileSystem__create, 0}, - { "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2}, - { "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1}, - { "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, - { "_arrow_fs___FileSystemFromUri", (DL_FUNC) &_arrow_fs___FileSystemFromUri, 1}, - { "_arrow_fs___CopyFiles", (DL_FUNC) &_arrow_fs___CopyFiles, 6}, - { "_arrow_fs___S3FileSystem__create", (DL_FUNC) &_arrow_fs___S3FileSystem__create, 12}, - { "_arrow_fs___S3FileSystem__region", (DL_FUNC) &_arrow_fs___S3FileSystem__region, 1}, - { "_arrow_io___Readable__Read", (DL_FUNC) &_arrow_io___Readable__Read, 2}, - { "_arrow_io___InputStream__Close", (DL_FUNC) &_arrow_io___InputStream__Close, 1}, - { "_arrow_io___OutputStream__Close", (DL_FUNC) &_arrow_io___OutputStream__Close, 1}, - { "_arrow_io___RandomAccessFile__GetSize", (DL_FUNC) &_arrow_io___RandomAccessFile__GetSize, 1}, - { "_arrow_io___RandomAccessFile__supports_zero_copy", (DL_FUNC) &_arrow_io___RandomAccessFile__supports_zero_copy, 1}, - { "_arrow_io___RandomAccessFile__Seek", (DL_FUNC) &_arrow_io___RandomAccessFile__Seek, 2}, - { "_arrow_io___RandomAccessFile__Tell", (DL_FUNC) &_arrow_io___RandomAccessFile__Tell, 1}, - { "_arrow_io___RandomAccessFile__Read0", (DL_FUNC) &_arrow_io___RandomAccessFile__Read0, 1}, - { "_arrow_io___RandomAccessFile__ReadAt", (DL_FUNC) &_arrow_io___RandomAccessFile__ReadAt, 3}, - { "_arrow_io___MemoryMappedFile__Create", (DL_FUNC) &_arrow_io___MemoryMappedFile__Create, 2}, - { "_arrow_io___MemoryMappedFile__Open", (DL_FUNC) &_arrow_io___MemoryMappedFile__Open, 2}, - { "_arrow_io___MemoryMappedFile__Resize", (DL_FUNC) &_arrow_io___MemoryMappedFile__Resize, 2}, - { "_arrow_io___ReadableFile__Open", (DL_FUNC) &_arrow_io___ReadableFile__Open, 1}, - { "_arrow_io___BufferReader__initialize", (DL_FUNC) &_arrow_io___BufferReader__initialize, 1}, - { "_arrow_io___Writable__write", (DL_FUNC) &_arrow_io___Writable__write, 2}, - { "_arrow_io___OutputStream__Tell", (DL_FUNC) &_arrow_io___OutputStream__Tell, 1}, - { "_arrow_io___FileOutputStream__Open", (DL_FUNC) &_arrow_io___FileOutputStream__Open, 1}, - { "_arrow_io___BufferOutputStream__Create", (DL_FUNC) &_arrow_io___BufferOutputStream__Create, 1}, - { "_arrow_io___BufferOutputStream__capacity", (DL_FUNC) &_arrow_io___BufferOutputStream__capacity, 1}, - { "_arrow_io___BufferOutputStream__Finish", (DL_FUNC) &_arrow_io___BufferOutputStream__Finish, 1}, - { "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1}, - { "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2}, - { "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2}, - { "_arrow_json___ParseOptions__initialize1", (DL_FUNC) &_arrow_json___ParseOptions__initialize1, 1}, - { "_arrow_json___ParseOptions__initialize2", (DL_FUNC) &_arrow_json___ParseOptions__initialize2, 2}, - { "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3}, - { "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1}, - { "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, - { "_arrow_MemoryPool__bytes_allocated", (DL_FUNC) &_arrow_MemoryPool__bytes_allocated, 1}, - { "_arrow_MemoryPool__max_memory", (DL_FUNC) &_arrow_MemoryPool__max_memory, 1}, - { "_arrow_MemoryPool__backend_name", (DL_FUNC) &_arrow_MemoryPool__backend_name, 1}, - { "_arrow_supported_memory_backends", (DL_FUNC) &_arrow_supported_memory_backends, 0}, - { "_arrow_ipc___Message__body_length", (DL_FUNC) &_arrow_ipc___Message__body_length, 1}, - { "_arrow_ipc___Message__metadata", (DL_FUNC) &_arrow_ipc___Message__metadata, 1}, - { "_arrow_ipc___Message__body", (DL_FUNC) &_arrow_ipc___Message__body, 1}, - { "_arrow_ipc___Message__Verify", (DL_FUNC) &_arrow_ipc___Message__Verify, 1}, - { "_arrow_ipc___Message__type", (DL_FUNC) &_arrow_ipc___Message__type, 1}, - { "_arrow_ipc___Message__Equals", (DL_FUNC) &_arrow_ipc___Message__Equals, 2}, - { "_arrow_ipc___ReadRecordBatch__Message__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__Message__Schema, 2}, - { "_arrow_ipc___ReadSchema_InputStream", (DL_FUNC) &_arrow_ipc___ReadSchema_InputStream, 1}, - { "_arrow_ipc___ReadSchema_Message", (DL_FUNC) &_arrow_ipc___ReadSchema_Message, 1}, - { "_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1}, - { "_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1}, - { "_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1}, - { "_arrow_parquet___arrow___ArrowReaderProperties__Make", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__Make, 1}, - { "_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads, 2}, - { "_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads, 2}, - { "_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary, 2}, - { "_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary, 3}, - { "_arrow_parquet___arrow___FileReader__OpenFile", (DL_FUNC) &_arrow_parquet___arrow___FileReader__OpenFile, 2}, - { "_arrow_parquet___arrow___FileReader__ReadTable1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable1, 1}, - { "_arrow_parquet___arrow___FileReader__ReadTable2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable2, 2}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroup1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup1, 2}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroup2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup2, 3}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroups1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups1, 2}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroups2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups2, 3}, - { "_arrow_parquet___arrow___FileReader__num_rows", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_rows, 1}, - { "_arrow_parquet___arrow___FileReader__num_columns", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_columns, 1}, - { "_arrow_parquet___arrow___FileReader__num_row_groups", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_row_groups, 1}, - { "_arrow_parquet___arrow___FileReader__ReadColumn", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadColumn, 2}, - { "_arrow_parquet___ArrowWriterProperties___create", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___create, 3}, - { "_arrow_parquet___WriterProperties___Builder__create", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__create, 0}, - { "_arrow_parquet___WriterProperties___Builder__version", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__version, 2}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_compressions", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compressions, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__data_page_size", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__data_page_size, 2}, - { "_arrow_parquet___WriterProperties___Builder__build", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__build, 1}, - { "_arrow_parquet___arrow___ParquetFileWriter__Open", (DL_FUNC) &_arrow_parquet___arrow___ParquetFileWriter__Open, 4}, - { "_arrow_parquet___arrow___FileWriter__WriteTable", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__WriteTable, 3}, - { "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1}, - { "_arrow_parquet___arrow___WriteTable", (DL_FUNC) &_arrow_parquet___arrow___WriteTable, 4}, - { "_arrow_parquet___arrow___FileReader__GetSchema", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema, 1}, - { "_arrow_allocate_arrow_schema", (DL_FUNC) &_arrow_allocate_arrow_schema, 0}, - { "_arrow_delete_arrow_schema", (DL_FUNC) &_arrow_delete_arrow_schema, 1}, - { "_arrow_allocate_arrow_array", (DL_FUNC) &_arrow_allocate_arrow_array, 0}, - { "_arrow_delete_arrow_array", (DL_FUNC) &_arrow_delete_arrow_array, 1}, - { "_arrow_allocate_arrow_array_stream", (DL_FUNC) &_arrow_allocate_arrow_array_stream, 0}, - { "_arrow_delete_arrow_array_stream", (DL_FUNC) &_arrow_delete_arrow_array_stream, 1}, - { "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, - { "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, - { "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, - { "_arrow_ImportField", (DL_FUNC) &_arrow_ImportField, 1}, - { "_arrow_ImportType", (DL_FUNC) &_arrow_ImportType, 1}, - { "_arrow_ImportRecordBatchReader", (DL_FUNC) &_arrow_ImportRecordBatchReader, 1}, - { "_arrow_ExportType", (DL_FUNC) &_arrow_ExportType, 2}, - { "_arrow_ExportField", (DL_FUNC) &_arrow_ExportField, 2}, - { "_arrow_ExportSchema", (DL_FUNC) &_arrow_ExportSchema, 2}, - { "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, - { "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, - { "_arrow_ExportRecordBatchReader", (DL_FUNC) &_arrow_ExportRecordBatchReader, 2}, - { "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 3}, - { "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2}, - { "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, - { "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, - { "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, - { "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, - { "_arrow_RecordBatch__RenameColumns", (DL_FUNC) &_arrow_RecordBatch__RenameColumns, 2}, - { "_arrow_RecordBatch__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_RecordBatch__ReplaceSchemaMetadata, 2}, - { "_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, - { "_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, - { "_arrow_RecordBatch__GetColumnByName", (DL_FUNC) &_arrow_RecordBatch__GetColumnByName, 2}, - { "_arrow_RecordBatch__SelectColumns", (DL_FUNC) &_arrow_RecordBatch__SelectColumns, 2}, - { "_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 3}, - { "_arrow_RecordBatch__AddColumn", (DL_FUNC) &_arrow_RecordBatch__AddColumn, 4}, - { "_arrow_RecordBatch__SetColumn", (DL_FUNC) &_arrow_RecordBatch__SetColumn, 4}, - { "_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2}, - { "_arrow_RecordBatch__column_name", (DL_FUNC) &_arrow_RecordBatch__column_name, 2}, - { "_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1}, - { "_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2}, - { "_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3}, - { "_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1}, - { "_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2}, - { "_arrow_RecordBatch__from_arrays", (DL_FUNC) &_arrow_RecordBatch__from_arrays, 2}, - { "_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, - { "_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, - { "_arrow_RecordBatchReader__batches", (DL_FUNC) &_arrow_RecordBatchReader__batches, 1}, - { "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, - { "_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, - { "_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, - { "_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, - { "_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, - { "_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, - { "_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, - { "_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, - { "_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, - { "_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2}, - { "_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, - { "_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 4}, - { "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4}, - { "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2}, - { "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1}, - { "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, - { "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2}, - { "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1}, - { "_arrow_MakeArrayFromScalar", (DL_FUNC) &_arrow_MakeArrayFromScalar, 2}, - { "_arrow_Scalar__is_valid", (DL_FUNC) &_arrow_Scalar__is_valid, 1}, - { "_arrow_Scalar__type", (DL_FUNC) &_arrow_Scalar__type, 1}, - { "_arrow_Scalar__Equals", (DL_FUNC) &_arrow_Scalar__Equals, 2}, - { "_arrow_Scalar__ApproxEquals", (DL_FUNC) &_arrow_Scalar__ApproxEquals, 2}, - { "_arrow_schema_", (DL_FUNC) &_arrow_schema_, 1}, - { "_arrow_Schema__ToString", (DL_FUNC) &_arrow_Schema__ToString, 1}, - { "_arrow_Schema__num_fields", (DL_FUNC) &_arrow_Schema__num_fields, 1}, - { "_arrow_Schema__field", (DL_FUNC) &_arrow_Schema__field, 2}, - { "_arrow_Schema__AddField", (DL_FUNC) &_arrow_Schema__AddField, 3}, - { "_arrow_Schema__SetField", (DL_FUNC) &_arrow_Schema__SetField, 3}, - { "_arrow_Schema__RemoveField", (DL_FUNC) &_arrow_Schema__RemoveField, 2}, - { "_arrow_Schema__GetFieldByName", (DL_FUNC) &_arrow_Schema__GetFieldByName, 2}, - { "_arrow_Schema__fields", (DL_FUNC) &_arrow_Schema__fields, 1}, - { "_arrow_Schema__field_names", (DL_FUNC) &_arrow_Schema__field_names, 1}, - { "_arrow_Schema__HasMetadata", (DL_FUNC) &_arrow_Schema__HasMetadata, 1}, - { "_arrow_Schema__metadata", (DL_FUNC) &_arrow_Schema__metadata, 1}, - { "_arrow_Schema__WithMetadata", (DL_FUNC) &_arrow_Schema__WithMetadata, 2}, - { "_arrow_Schema__serialize", (DL_FUNC) &_arrow_Schema__serialize, 1}, - { "_arrow_Schema__Equals", (DL_FUNC) &_arrow_Schema__Equals, 3}, - { "_arrow_arrow__UnifySchemas", (DL_FUNC) &_arrow_arrow__UnifySchemas, 1}, - { "_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1}, - { "_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, - { "_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, - { "_arrow_Table__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_Table__ReplaceSchemaMetadata, 2}, - { "_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, - { "_arrow_Table__field", (DL_FUNC) &_arrow_Table__field, 2}, - { "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, - { "_arrow_Table__ColumnNames", (DL_FUNC) &_arrow_Table__ColumnNames, 1}, - { "_arrow_Table__RenameColumns", (DL_FUNC) &_arrow_Table__RenameColumns, 2}, - { "_arrow_Table__Slice1", (DL_FUNC) &_arrow_Table__Slice1, 2}, - { "_arrow_Table__Slice2", (DL_FUNC) &_arrow_Table__Slice2, 3}, - { "_arrow_Table__Equals", (DL_FUNC) &_arrow_Table__Equals, 3}, - { "_arrow_Table__Validate", (DL_FUNC) &_arrow_Table__Validate, 1}, - { "_arrow_Table__ValidateFull", (DL_FUNC) &_arrow_Table__ValidateFull, 1}, - { "_arrow_Table__GetColumnByName", (DL_FUNC) &_arrow_Table__GetColumnByName, 2}, - { "_arrow_Table__RemoveColumn", (DL_FUNC) &_arrow_Table__RemoveColumn, 2}, - { "_arrow_Table__AddColumn", (DL_FUNC) &_arrow_Table__AddColumn, 4}, - { "_arrow_Table__SetColumn", (DL_FUNC) &_arrow_Table__SetColumn, 4}, - { "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2}, - { "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, - { "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, - { "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, - { "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, - { "_arrow_GetIOThreadPoolCapacity", (DL_FUNC) &_arrow_GetIOThreadPoolCapacity, 0}, - { "_arrow_SetIOThreadPoolCapacity", (DL_FUNC) &_arrow_SetIOThreadPoolCapacity, 1}, - { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, - { "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, - { "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, + { "_arrow_is_altrep", (DL_FUNC) &_arrow_is_altrep, 1}, + { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, + { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, + { "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, + { "_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2}, + { "_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1}, + { "_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1}, + { "_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1}, + { "_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1}, + { "_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1}, + { "_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1}, + { "_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2}, + { "_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2}, + { "_arrow_Array__Diff", (DL_FUNC) &_arrow_Array__Diff, 2}, + { "_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1}, + { "_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5}, + { "_arrow_Array__View", (DL_FUNC) &_arrow_Array__View, 2}, + { "_arrow_Array__Validate", (DL_FUNC) &_arrow_Array__Validate, 1}, + { "_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1}, + { "_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1}, + { "_arrow_StructArray__field", (DL_FUNC) &_arrow_StructArray__field, 2}, + { "_arrow_StructArray__GetFieldByName", (DL_FUNC) &_arrow_StructArray__GetFieldByName, 2}, + { "_arrow_StructArray__Flatten", (DL_FUNC) &_arrow_StructArray__Flatten, 1}, + { "_arrow_ListArray__value_type", (DL_FUNC) &_arrow_ListArray__value_type, 1}, + { "_arrow_LargeListArray__value_type", (DL_FUNC) &_arrow_LargeListArray__value_type, 1}, + { "_arrow_ListArray__values", (DL_FUNC) &_arrow_ListArray__values, 1}, + { "_arrow_LargeListArray__values", (DL_FUNC) &_arrow_LargeListArray__values, 1}, + { "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2}, + { "_arrow_LargeListArray__value_length", (DL_FUNC) &_arrow_LargeListArray__value_length, 2}, + { "_arrow_FixedSizeListArray__value_length", (DL_FUNC) &_arrow_FixedSizeListArray__value_length, 2}, + { "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2}, + { "_arrow_LargeListArray__value_offset", (DL_FUNC) &_arrow_LargeListArray__value_offset, 2}, + { "_arrow_FixedSizeListArray__value_offset", (DL_FUNC) &_arrow_FixedSizeListArray__value_offset, 2}, + { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, + { "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, + { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, + { "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 2}, + { "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, + { "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, + { "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, + { "_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, + { "_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, + { "_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset, 1}, + { "_arrow_ArrayData__buffers", (DL_FUNC) &_arrow_ArrayData__buffers, 1}, + { "_arrow_Buffer__is_mutable", (DL_FUNC) &_arrow_Buffer__is_mutable, 1}, + { "_arrow_Buffer__ZeroPadding", (DL_FUNC) &_arrow_Buffer__ZeroPadding, 1}, + { "_arrow_Buffer__capacity", (DL_FUNC) &_arrow_Buffer__capacity, 1}, + { "_arrow_Buffer__size", (DL_FUNC) &_arrow_Buffer__size, 1}, + { "_arrow_r___RBuffer__initialize", (DL_FUNC) &_arrow_r___RBuffer__initialize, 1}, + { "_arrow_Buffer__data", (DL_FUNC) &_arrow_Buffer__data, 1}, + { "_arrow_Buffer__Equals", (DL_FUNC) &_arrow_Buffer__Equals, 2}, + { "_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1}, + { "_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1}, + { "_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1}, + { "_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2}, + { "_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1}, + { "_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1}, + { "_arrow_ChunkedArray__Slice1", (DL_FUNC) &_arrow_ChunkedArray__Slice1, 2}, + { "_arrow_ChunkedArray__Slice2", (DL_FUNC) &_arrow_ChunkedArray__Slice2, 3}, + { "_arrow_ChunkedArray__View", (DL_FUNC) &_arrow_ChunkedArray__View, 2}, + { "_arrow_ChunkedArray__Validate", (DL_FUNC) &_arrow_ChunkedArray__Validate, 1}, + { "_arrow_ChunkedArray__Equals", (DL_FUNC) &_arrow_ChunkedArray__Equals, 2}, + { "_arrow_ChunkedArray__ToString", (DL_FUNC) &_arrow_ChunkedArray__ToString, 1}, + { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, + { "_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 2}, + { "_arrow_util___Codec__name", (DL_FUNC) &_arrow_util___Codec__name, 1}, + { "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, + { "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, + { "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, + { "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 1}, + { "_arrow_ExecPlan_run", (DL_FUNC) &_arrow_ExecPlan_run, 3}, + { "_arrow_ExecNode_Scan", (DL_FUNC) &_arrow_ExecNode_Scan, 4}, + { "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, + { "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, + { "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 5}, + { "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, + { "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, + { "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, + { "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, + { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, + { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, + { "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, + { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, + { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, + { "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, + { "_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1}, + { "_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4}, + { "_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1}, + { "_arrow_TimestampParser__kind", (DL_FUNC) &_arrow_TimestampParser__kind, 1}, + { "_arrow_TimestampParser__format", (DL_FUNC) &_arrow_TimestampParser__format, 1}, + { "_arrow_TimestampParser__MakeStrptime", (DL_FUNC) &_arrow_TimestampParser__MakeStrptime, 1}, + { "_arrow_TimestampParser__MakeISO8601", (DL_FUNC) &_arrow_TimestampParser__MakeISO8601, 0}, + { "_arrow_csv___WriteCSV__Table", (DL_FUNC) &_arrow_csv___WriteCSV__Table, 3}, + { "_arrow_csv___WriteCSV__RecordBatch", (DL_FUNC) &_arrow_csv___WriteCSV__RecordBatch, 3}, + { "_arrow_dataset___Dataset__NewScan", (DL_FUNC) &_arrow_dataset___Dataset__NewScan, 1}, + { "_arrow_dataset___Dataset__schema", (DL_FUNC) &_arrow_dataset___Dataset__schema, 1}, + { "_arrow_dataset___Dataset__type_name", (DL_FUNC) &_arrow_dataset___Dataset__type_name, 1}, + { "_arrow_dataset___Dataset__ReplaceSchema", (DL_FUNC) &_arrow_dataset___Dataset__ReplaceSchema, 2}, + { "_arrow_dataset___UnionDataset__create", (DL_FUNC) &_arrow_dataset___UnionDataset__create, 2}, + { "_arrow_dataset___InMemoryDataset__create", (DL_FUNC) &_arrow_dataset___InMemoryDataset__create, 1}, + { "_arrow_dataset___UnionDataset__children", (DL_FUNC) &_arrow_dataset___UnionDataset__children, 1}, + { "_arrow_dataset___FileSystemDataset__format", (DL_FUNC) &_arrow_dataset___FileSystemDataset__format, 1}, + { "_arrow_dataset___FileSystemDataset__filesystem", (DL_FUNC) &_arrow_dataset___FileSystemDataset__filesystem, 1}, + { "_arrow_dataset___FileSystemDataset__files", (DL_FUNC) &_arrow_dataset___FileSystemDataset__files, 1}, + { "_arrow_dataset___DatasetFactory__Finish1", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish1, 2}, + { "_arrow_dataset___DatasetFactory__Finish2", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish2, 2}, + { "_arrow_dataset___DatasetFactory__Inspect", (DL_FUNC) &_arrow_dataset___DatasetFactory__Inspect, 2}, + { "_arrow_dataset___UnionDatasetFactory__Make", (DL_FUNC) &_arrow_dataset___UnionDatasetFactory__Make, 1}, + { "_arrow_dataset___FileSystemDatasetFactory__Make0", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make0, 3}, + { "_arrow_dataset___FileSystemDatasetFactory__Make2", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make2, 4}, + { "_arrow_dataset___FileSystemDatasetFactory__Make1", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make1, 3}, + { "_arrow_dataset___FileSystemDatasetFactory__Make3", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make3, 4}, + { "_arrow_dataset___FileFormat__type_name", (DL_FUNC) &_arrow_dataset___FileFormat__type_name, 1}, + { "_arrow_dataset___FileFormat__DefaultWriteOptions", (DL_FUNC) &_arrow_dataset___FileFormat__DefaultWriteOptions, 1}, + { "_arrow_dataset___ParquetFileFormat__Make", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__Make, 2}, + { "_arrow_dataset___FileWriteOptions__type_name", (DL_FUNC) &_arrow_dataset___FileWriteOptions__type_name, 1}, + { "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3}, + { "_arrow_dataset___IpcFileWriteOptions__update2", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update2, 4}, + { "_arrow_dataset___IpcFileWriteOptions__update1", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update1, 3}, + { "_arrow_dataset___CsvFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___CsvFileWriteOptions__update, 2}, + { "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0}, + { "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 3}, + { "_arrow_dataset___FragmentScanOptions__type_name", (DL_FUNC) &_arrow_dataset___FragmentScanOptions__type_name, 1}, + { "_arrow_dataset___CsvFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___CsvFragmentScanOptions__Make, 2}, + { "_arrow_dataset___ParquetFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___ParquetFragmentScanOptions__Make, 3}, + { "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 2}, + { "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 2}, + { "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 3}, + { "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 2}, + { "_arrow_dataset___ScannerBuilder__ProjectNames", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectNames, 2}, + { "_arrow_dataset___ScannerBuilder__ProjectExprs", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectExprs, 3}, + { "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2}, + { "_arrow_dataset___ScannerBuilder__UseThreads", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseThreads, 2}, + { "_arrow_dataset___ScannerBuilder__UseAsync", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseAsync, 2}, + { "_arrow_dataset___ScannerBuilder__BatchSize", (DL_FUNC) &_arrow_dataset___ScannerBuilder__BatchSize, 2}, + { "_arrow_dataset___ScannerBuilder__FragmentScanOptions", (DL_FUNC) &_arrow_dataset___ScannerBuilder__FragmentScanOptions, 2}, + { "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1}, + { "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1}, + { "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, + { "_arrow_dataset___Scanner__ScanBatches", (DL_FUNC) &_arrow_dataset___Scanner__ScanBatches, 1}, + { "_arrow_dataset___Scanner__ToRecordBatchReader", (DL_FUNC) &_arrow_dataset___Scanner__ToRecordBatchReader, 1}, + { "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, + { "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, + { "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, + { "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, + { "_arrow_dataset___Scanner__TakeRows", (DL_FUNC) &_arrow_dataset___Scanner__TakeRows, 2}, + { "_arrow_dataset___Scanner__CountRows", (DL_FUNC) &_arrow_dataset___Scanner__CountRows, 1}, + { "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, + { "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, + { "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, + { "_arrow_Int64__initialize", (DL_FUNC) &_arrow_Int64__initialize, 0}, + { "_arrow_UInt8__initialize", (DL_FUNC) &_arrow_UInt8__initialize, 0}, + { "_arrow_UInt16__initialize", (DL_FUNC) &_arrow_UInt16__initialize, 0}, + { "_arrow_UInt32__initialize", (DL_FUNC) &_arrow_UInt32__initialize, 0}, + { "_arrow_UInt64__initialize", (DL_FUNC) &_arrow_UInt64__initialize, 0}, + { "_arrow_Float16__initialize", (DL_FUNC) &_arrow_Float16__initialize, 0}, + { "_arrow_Float32__initialize", (DL_FUNC) &_arrow_Float32__initialize, 0}, + { "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, + { "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, + { "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, + { "_arrow_LargeUtf8__initialize", (DL_FUNC) &_arrow_LargeUtf8__initialize, 0}, + { "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, + { "_arrow_LargeBinary__initialize", (DL_FUNC) &_arrow_LargeBinary__initialize, 0}, + { "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0}, + { "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, + { "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, + { "_arrow_Decimal128Type__initialize", (DL_FUNC) &_arrow_Decimal128Type__initialize, 2}, + { "_arrow_FixedSizeBinary__initialize", (DL_FUNC) &_arrow_FixedSizeBinary__initialize, 1}, + { "_arrow_Timestamp__initialize", (DL_FUNC) &_arrow_Timestamp__initialize, 2}, + { "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1}, + { "_arrow_Time64__initialize", (DL_FUNC) &_arrow_Time64__initialize, 1}, + { "_arrow_list__", (DL_FUNC) &_arrow_list__, 1}, + { "_arrow_large_list__", (DL_FUNC) &_arrow_large_list__, 1}, + { "_arrow_fixed_size_list__", (DL_FUNC) &_arrow_fixed_size_list__, 2}, + { "_arrow_struct__", (DL_FUNC) &_arrow_struct__, 1}, + { "_arrow_DataType__ToString", (DL_FUNC) &_arrow_DataType__ToString, 1}, + { "_arrow_DataType__name", (DL_FUNC) &_arrow_DataType__name, 1}, + { "_arrow_DataType__Equals", (DL_FUNC) &_arrow_DataType__Equals, 2}, + { "_arrow_DataType__num_fields", (DL_FUNC) &_arrow_DataType__num_fields, 1}, + { "_arrow_DataType__fields", (DL_FUNC) &_arrow_DataType__fields, 1}, + { "_arrow_DataType__id", (DL_FUNC) &_arrow_DataType__id, 1}, + { "_arrow_ListType__ToString", (DL_FUNC) &_arrow_ListType__ToString, 1}, + { "_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1}, + { "_arrow_DateType__unit", (DL_FUNC) &_arrow_DateType__unit, 1}, + { "_arrow_TimeType__unit", (DL_FUNC) &_arrow_TimeType__unit, 1}, + { "_arrow_DecimalType__precision", (DL_FUNC) &_arrow_DecimalType__precision, 1}, + { "_arrow_DecimalType__scale", (DL_FUNC) &_arrow_DecimalType__scale, 1}, + { "_arrow_TimestampType__timezone", (DL_FUNC) &_arrow_TimestampType__timezone, 1}, + { "_arrow_TimestampType__unit", (DL_FUNC) &_arrow_TimestampType__unit, 1}, + { "_arrow_DictionaryType__initialize", (DL_FUNC) &_arrow_DictionaryType__initialize, 3}, + { "_arrow_DictionaryType__index_type", (DL_FUNC) &_arrow_DictionaryType__index_type, 1}, + { "_arrow_DictionaryType__value_type", (DL_FUNC) &_arrow_DictionaryType__value_type, 1}, + { "_arrow_DictionaryType__name", (DL_FUNC) &_arrow_DictionaryType__name, 1}, + { "_arrow_DictionaryType__ordered", (DL_FUNC) &_arrow_DictionaryType__ordered, 1}, + { "_arrow_StructType__GetFieldByName", (DL_FUNC) &_arrow_StructType__GetFieldByName, 2}, + { "_arrow_StructType__GetFieldIndex", (DL_FUNC) &_arrow_StructType__GetFieldIndex, 2}, + { "_arrow_StructType__field_names", (DL_FUNC) &_arrow_StructType__field_names, 1}, + { "_arrow_ListType__value_field", (DL_FUNC) &_arrow_ListType__value_field, 1}, + { "_arrow_ListType__value_type", (DL_FUNC) &_arrow_ListType__value_type, 1}, + { "_arrow_LargeListType__value_field", (DL_FUNC) &_arrow_LargeListType__value_field, 1}, + { "_arrow_LargeListType__value_type", (DL_FUNC) &_arrow_LargeListType__value_type, 1}, + { "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, + { "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, + { "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, + { "_arrow_compute___expr__equals", (DL_FUNC) &_arrow_compute___expr__equals, 2}, + { "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3}, + { "_arrow_field_names_in_expression", (DL_FUNC) &_arrow_field_names_in_expression, 1}, + { "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, + { "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, + { "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, + { "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, + { "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2}, + { "_arrow_compute___expr__type_id", (DL_FUNC) &_arrow_compute___expr__type_id, 2}, + { "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, + { "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, + { "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, + { "_arrow_ipc___feather___Reader__Open", (DL_FUNC) &_arrow_ipc___feather___Reader__Open, 1}, + { "_arrow_ipc___feather___Reader__schema", (DL_FUNC) &_arrow_ipc___feather___Reader__schema, 1}, + { "_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, + { "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, + { "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, + { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2}, + { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, + { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, + { "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, + { "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, + { "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, + { "_arrow_fs___FileInfo__set_path", (DL_FUNC) &_arrow_fs___FileInfo__set_path, 2}, + { "_arrow_fs___FileInfo__size", (DL_FUNC) &_arrow_fs___FileInfo__size, 1}, + { "_arrow_fs___FileInfo__set_size", (DL_FUNC) &_arrow_fs___FileInfo__set_size, 2}, + { "_arrow_fs___FileInfo__base_name", (DL_FUNC) &_arrow_fs___FileInfo__base_name, 1}, + { "_arrow_fs___FileInfo__extension", (DL_FUNC) &_arrow_fs___FileInfo__extension, 1}, + { "_arrow_fs___FileInfo__mtime", (DL_FUNC) &_arrow_fs___FileInfo__mtime, 1}, + { "_arrow_fs___FileInfo__set_mtime", (DL_FUNC) &_arrow_fs___FileInfo__set_mtime, 2}, + { "_arrow_fs___FileSelector__base_dir", (DL_FUNC) &_arrow_fs___FileSelector__base_dir, 1}, + { "_arrow_fs___FileSelector__allow_not_found", (DL_FUNC) &_arrow_fs___FileSelector__allow_not_found, 1}, + { "_arrow_fs___FileSelector__recursive", (DL_FUNC) &_arrow_fs___FileSelector__recursive, 1}, + { "_arrow_fs___FileSelector__create", (DL_FUNC) &_arrow_fs___FileSelector__create, 3}, + { "_arrow_fs___FileSystem__GetTargetInfos_Paths", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_Paths, 2}, + { "_arrow_fs___FileSystem__GetTargetInfos_FileSelector", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_FileSelector, 2}, + { "_arrow_fs___FileSystem__CreateDir", (DL_FUNC) &_arrow_fs___FileSystem__CreateDir, 3}, + { "_arrow_fs___FileSystem__DeleteDir", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDir, 2}, + { "_arrow_fs___FileSystem__DeleteDirContents", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDirContents, 2}, + { "_arrow_fs___FileSystem__DeleteFile", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFile, 2}, + { "_arrow_fs___FileSystem__DeleteFiles", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFiles, 2}, + { "_arrow_fs___FileSystem__Move", (DL_FUNC) &_arrow_fs___FileSystem__Move, 3}, + { "_arrow_fs___FileSystem__CopyFile", (DL_FUNC) &_arrow_fs___FileSystem__CopyFile, 3}, + { "_arrow_fs___FileSystem__OpenInputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputStream, 2}, + { "_arrow_fs___FileSystem__OpenInputFile", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputFile, 2}, + { "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2}, + { "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2}, + { "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1}, + { "_arrow_fs___LocalFileSystem__create", (DL_FUNC) &_arrow_fs___LocalFileSystem__create, 0}, + { "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2}, + { "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1}, + { "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, + { "_arrow_fs___FileSystemFromUri", (DL_FUNC) &_arrow_fs___FileSystemFromUri, 1}, + { "_arrow_fs___CopyFiles", (DL_FUNC) &_arrow_fs___CopyFiles, 6}, + { "_arrow_fs___S3FileSystem__create", (DL_FUNC) &_arrow_fs___S3FileSystem__create, 12}, + { "_arrow_fs___S3FileSystem__region", (DL_FUNC) &_arrow_fs___S3FileSystem__region, 1}, + { "_arrow_io___Readable__Read", (DL_FUNC) &_arrow_io___Readable__Read, 2}, + { "_arrow_io___InputStream__Close", (DL_FUNC) &_arrow_io___InputStream__Close, 1}, + { "_arrow_io___OutputStream__Close", (DL_FUNC) &_arrow_io___OutputStream__Close, 1}, + { "_arrow_io___RandomAccessFile__GetSize", (DL_FUNC) &_arrow_io___RandomAccessFile__GetSize, 1}, + { "_arrow_io___RandomAccessFile__supports_zero_copy", (DL_FUNC) &_arrow_io___RandomAccessFile__supports_zero_copy, 1}, + { "_arrow_io___RandomAccessFile__Seek", (DL_FUNC) &_arrow_io___RandomAccessFile__Seek, 2}, + { "_arrow_io___RandomAccessFile__Tell", (DL_FUNC) &_arrow_io___RandomAccessFile__Tell, 1}, + { "_arrow_io___RandomAccessFile__Read0", (DL_FUNC) &_arrow_io___RandomAccessFile__Read0, 1}, + { "_arrow_io___RandomAccessFile__ReadAt", (DL_FUNC) &_arrow_io___RandomAccessFile__ReadAt, 3}, + { "_arrow_io___MemoryMappedFile__Create", (DL_FUNC) &_arrow_io___MemoryMappedFile__Create, 2}, + { "_arrow_io___MemoryMappedFile__Open", (DL_FUNC) &_arrow_io___MemoryMappedFile__Open, 2}, + { "_arrow_io___MemoryMappedFile__Resize", (DL_FUNC) &_arrow_io___MemoryMappedFile__Resize, 2}, + { "_arrow_io___ReadableFile__Open", (DL_FUNC) &_arrow_io___ReadableFile__Open, 1}, + { "_arrow_io___BufferReader__initialize", (DL_FUNC) &_arrow_io___BufferReader__initialize, 1}, + { "_arrow_io___Writable__write", (DL_FUNC) &_arrow_io___Writable__write, 2}, + { "_arrow_io___OutputStream__Tell", (DL_FUNC) &_arrow_io___OutputStream__Tell, 1}, + { "_arrow_io___FileOutputStream__Open", (DL_FUNC) &_arrow_io___FileOutputStream__Open, 1}, + { "_arrow_io___BufferOutputStream__Create", (DL_FUNC) &_arrow_io___BufferOutputStream__Create, 1}, + { "_arrow_io___BufferOutputStream__capacity", (DL_FUNC) &_arrow_io___BufferOutputStream__capacity, 1}, + { "_arrow_io___BufferOutputStream__Finish", (DL_FUNC) &_arrow_io___BufferOutputStream__Finish, 1}, + { "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1}, + { "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2}, + { "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2}, + { "_arrow_json___ParseOptions__initialize1", (DL_FUNC) &_arrow_json___ParseOptions__initialize1, 1}, + { "_arrow_json___ParseOptions__initialize2", (DL_FUNC) &_arrow_json___ParseOptions__initialize2, 2}, + { "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3}, + { "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1}, + { "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, + { "_arrow_MemoryPool__bytes_allocated", (DL_FUNC) &_arrow_MemoryPool__bytes_allocated, 1}, + { "_arrow_MemoryPool__max_memory", (DL_FUNC) &_arrow_MemoryPool__max_memory, 1}, + { "_arrow_MemoryPool__backend_name", (DL_FUNC) &_arrow_MemoryPool__backend_name, 1}, + { "_arrow_supported_memory_backends", (DL_FUNC) &_arrow_supported_memory_backends, 0}, + { "_arrow_ipc___Message__body_length", (DL_FUNC) &_arrow_ipc___Message__body_length, 1}, + { "_arrow_ipc___Message__metadata", (DL_FUNC) &_arrow_ipc___Message__metadata, 1}, + { "_arrow_ipc___Message__body", (DL_FUNC) &_arrow_ipc___Message__body, 1}, + { "_arrow_ipc___Message__Verify", (DL_FUNC) &_arrow_ipc___Message__Verify, 1}, + { "_arrow_ipc___Message__type", (DL_FUNC) &_arrow_ipc___Message__type, 1}, + { "_arrow_ipc___Message__Equals", (DL_FUNC) &_arrow_ipc___Message__Equals, 2}, + { "_arrow_ipc___ReadRecordBatch__Message__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__Message__Schema, 2}, + { "_arrow_ipc___ReadSchema_InputStream", (DL_FUNC) &_arrow_ipc___ReadSchema_InputStream, 1}, + { "_arrow_ipc___ReadSchema_Message", (DL_FUNC) &_arrow_ipc___ReadSchema_Message, 1}, + { "_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1}, + { "_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1}, + { "_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1}, + { "_arrow_parquet___arrow___ArrowReaderProperties__Make", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__Make, 1}, + { "_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads, 2}, + { "_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads, 2}, + { "_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary, 2}, + { "_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary, 3}, + { "_arrow_parquet___arrow___FileReader__OpenFile", (DL_FUNC) &_arrow_parquet___arrow___FileReader__OpenFile, 2}, + { "_arrow_parquet___arrow___FileReader__ReadTable1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable1, 1}, + { "_arrow_parquet___arrow___FileReader__ReadTable2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable2, 2}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroup1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup1, 2}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroup2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup2, 3}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroups1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups1, 2}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroups2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups2, 3}, + { "_arrow_parquet___arrow___FileReader__num_rows", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_rows, 1}, + { "_arrow_parquet___arrow___FileReader__num_columns", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_columns, 1}, + { "_arrow_parquet___arrow___FileReader__num_row_groups", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_row_groups, 1}, + { "_arrow_parquet___arrow___FileReader__ReadColumn", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadColumn, 2}, + { "_arrow_parquet___ArrowWriterProperties___create", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___create, 3}, + { "_arrow_parquet___WriterProperties___Builder__create", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__create, 0}, + { "_arrow_parquet___WriterProperties___Builder__version", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__version, 2}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_compressions", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compressions, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__data_page_size", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__data_page_size, 2}, + { "_arrow_parquet___WriterProperties___Builder__build", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__build, 1}, + { "_arrow_parquet___arrow___ParquetFileWriter__Open", (DL_FUNC) &_arrow_parquet___arrow___ParquetFileWriter__Open, 4}, + { "_arrow_parquet___arrow___FileWriter__WriteTable", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__WriteTable, 3}, + { "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1}, + { "_arrow_parquet___arrow___WriteTable", (DL_FUNC) &_arrow_parquet___arrow___WriteTable, 4}, + { "_arrow_parquet___arrow___FileReader__GetSchema", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema, 1}, + { "_arrow_allocate_arrow_schema", (DL_FUNC) &_arrow_allocate_arrow_schema, 0}, + { "_arrow_delete_arrow_schema", (DL_FUNC) &_arrow_delete_arrow_schema, 1}, + { "_arrow_allocate_arrow_array", (DL_FUNC) &_arrow_allocate_arrow_array, 0}, + { "_arrow_delete_arrow_array", (DL_FUNC) &_arrow_delete_arrow_array, 1}, + { "_arrow_allocate_arrow_array_stream", (DL_FUNC) &_arrow_allocate_arrow_array_stream, 0}, + { "_arrow_delete_arrow_array_stream", (DL_FUNC) &_arrow_delete_arrow_array_stream, 1}, + { "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, + { "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, + { "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, + { "_arrow_ImportField", (DL_FUNC) &_arrow_ImportField, 1}, + { "_arrow_ImportType", (DL_FUNC) &_arrow_ImportType, 1}, + { "_arrow_ImportRecordBatchReader", (DL_FUNC) &_arrow_ImportRecordBatchReader, 1}, + { "_arrow_ExportType", (DL_FUNC) &_arrow_ExportType, 2}, + { "_arrow_ExportField", (DL_FUNC) &_arrow_ExportField, 2}, + { "_arrow_ExportSchema", (DL_FUNC) &_arrow_ExportSchema, 2}, + { "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, + { "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, + { "_arrow_ExportRecordBatchReader", (DL_FUNC) &_arrow_ExportRecordBatchReader, 2}, + { "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 3}, + { "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2}, + { "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, + { "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, + { "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, + { "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, + { "_arrow_RecordBatch__RenameColumns", (DL_FUNC) &_arrow_RecordBatch__RenameColumns, 2}, + { "_arrow_RecordBatch__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_RecordBatch__ReplaceSchemaMetadata, 2}, + { "_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, + { "_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, + { "_arrow_RecordBatch__GetColumnByName", (DL_FUNC) &_arrow_RecordBatch__GetColumnByName, 2}, + { "_arrow_RecordBatch__SelectColumns", (DL_FUNC) &_arrow_RecordBatch__SelectColumns, 2}, + { "_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 3}, + { "_arrow_RecordBatch__AddColumn", (DL_FUNC) &_arrow_RecordBatch__AddColumn, 4}, + { "_arrow_RecordBatch__SetColumn", (DL_FUNC) &_arrow_RecordBatch__SetColumn, 4}, + { "_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2}, + { "_arrow_RecordBatch__column_name", (DL_FUNC) &_arrow_RecordBatch__column_name, 2}, + { "_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1}, + { "_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2}, + { "_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3}, + { "_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1}, + { "_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2}, + { "_arrow_RecordBatch__from_arrays", (DL_FUNC) &_arrow_RecordBatch__from_arrays, 2}, + { "_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, + { "_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, + { "_arrow_RecordBatchReader__batches", (DL_FUNC) &_arrow_RecordBatchReader__batches, 1}, + { "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, + { "_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, + { "_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, + { "_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, + { "_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, + { "_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, + { "_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, + { "_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, + { "_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, + { "_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2}, + { "_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, + { "_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 4}, + { "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4}, + { "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2}, + { "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1}, + { "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, + { "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2}, + { "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1}, + { "_arrow_MakeArrayFromScalar", (DL_FUNC) &_arrow_MakeArrayFromScalar, 2}, + { "_arrow_Scalar__is_valid", (DL_FUNC) &_arrow_Scalar__is_valid, 1}, + { "_arrow_Scalar__type", (DL_FUNC) &_arrow_Scalar__type, 1}, + { "_arrow_Scalar__Equals", (DL_FUNC) &_arrow_Scalar__Equals, 2}, + { "_arrow_Scalar__ApproxEquals", (DL_FUNC) &_arrow_Scalar__ApproxEquals, 2}, + { "_arrow_schema_", (DL_FUNC) &_arrow_schema_, 1}, + { "_arrow_Schema__ToString", (DL_FUNC) &_arrow_Schema__ToString, 1}, + { "_arrow_Schema__num_fields", (DL_FUNC) &_arrow_Schema__num_fields, 1}, + { "_arrow_Schema__field", (DL_FUNC) &_arrow_Schema__field, 2}, + { "_arrow_Schema__AddField", (DL_FUNC) &_arrow_Schema__AddField, 3}, + { "_arrow_Schema__SetField", (DL_FUNC) &_arrow_Schema__SetField, 3}, + { "_arrow_Schema__RemoveField", (DL_FUNC) &_arrow_Schema__RemoveField, 2}, + { "_arrow_Schema__GetFieldByName", (DL_FUNC) &_arrow_Schema__GetFieldByName, 2}, + { "_arrow_Schema__fields", (DL_FUNC) &_arrow_Schema__fields, 1}, + { "_arrow_Schema__field_names", (DL_FUNC) &_arrow_Schema__field_names, 1}, + { "_arrow_Schema__HasMetadata", (DL_FUNC) &_arrow_Schema__HasMetadata, 1}, + { "_arrow_Schema__metadata", (DL_FUNC) &_arrow_Schema__metadata, 1}, + { "_arrow_Schema__WithMetadata", (DL_FUNC) &_arrow_Schema__WithMetadata, 2}, + { "_arrow_Schema__serialize", (DL_FUNC) &_arrow_Schema__serialize, 1}, + { "_arrow_Schema__Equals", (DL_FUNC) &_arrow_Schema__Equals, 3}, + { "_arrow_arrow__UnifySchemas", (DL_FUNC) &_arrow_arrow__UnifySchemas, 1}, + { "_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1}, + { "_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, + { "_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, + { "_arrow_Table__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_Table__ReplaceSchemaMetadata, 2}, + { "_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, + { "_arrow_Table__field", (DL_FUNC) &_arrow_Table__field, 2}, + { "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, + { "_arrow_Table__ColumnNames", (DL_FUNC) &_arrow_Table__ColumnNames, 1}, + { "_arrow_Table__RenameColumns", (DL_FUNC) &_arrow_Table__RenameColumns, 2}, + { "_arrow_Table__Slice1", (DL_FUNC) &_arrow_Table__Slice1, 2}, + { "_arrow_Table__Slice2", (DL_FUNC) &_arrow_Table__Slice2, 3}, + { "_arrow_Table__Equals", (DL_FUNC) &_arrow_Table__Equals, 3}, + { "_arrow_Table__Validate", (DL_FUNC) &_arrow_Table__Validate, 1}, + { "_arrow_Table__ValidateFull", (DL_FUNC) &_arrow_Table__ValidateFull, 1}, + { "_arrow_Table__GetColumnByName", (DL_FUNC) &_arrow_Table__GetColumnByName, 2}, + { "_arrow_Table__RemoveColumn", (DL_FUNC) &_arrow_Table__RemoveColumn, 2}, + { "_arrow_Table__AddColumn", (DL_FUNC) &_arrow_Table__AddColumn, 4}, + { "_arrow_Table__SetColumn", (DL_FUNC) &_arrow_Table__SetColumn, 4}, + { "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2}, + { "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, + { "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, + { "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, + { "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, + { "_arrow_GetIOThreadPoolCapacity", (DL_FUNC) &_arrow_GetIOThreadPoolCapacity, 0}, + { "_arrow_SetIOThreadPoolCapacity", (DL_FUNC) &_arrow_SetIOThreadPoolCapacity, 1}, + { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, + { "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, + { "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, {NULL, NULL, 0} }; extern "C" void R_init_arrow(DllInfo* dll){ @@ -7490,3 +7508,5 @@ extern "C" void R_init_arrow(DllInfo* dll){ #endif } + + diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp index 3d0bbca63d2..cab1a09c6ae 100644 --- a/r/src/compute-exec.cpp +++ b/r/src/compute-exec.cpp @@ -57,12 +57,22 @@ std::shared_ptr MakeExecNodeOrStop( // [[arrow::export]] std::shared_ptr ExecPlan_run( const std::shared_ptr& plan, - const std::shared_ptr& final_node) { + const std::shared_ptr& final_node, cpp11::list sort_options) { // For now, don't require R to construct SinkNodes. // Instead, just pass the node we should collect as an argument. arrow::AsyncGenerator> sink_gen; - MakeExecNodeOrStop("sink", plan.get(), {final_node.get()}, - compute::SinkNodeOptions{&sink_gen}); + + // Sorting uses a different sink node; there is no general sort yet + if (sort_options.size() > 0) { + MakeExecNodeOrStop("order_by_sink", plan.get(), {final_node.get()}, + compute::OrderBySinkNodeOptions{ + *std::dynamic_pointer_cast( + make_compute_options("sort_indices", sort_options)), + &sink_gen}); + } else { + MakeExecNodeOrStop("sink", plan.get(), {final_node.get()}, + compute::SinkNodeOptions{&sink_gen}); + } StopIfNotOk(plan->Validate()); StopIfNotOk(plan->StartProducing()); diff --git a/r/src/expression.cpp b/r/src/expression.cpp index 3fcba46e911..97a8a746bba 100644 --- a/r/src/expression.cpp +++ b/r/src/expression.cpp @@ -27,6 +27,11 @@ namespace compute = ::arrow::compute; std::shared_ptr make_compute_options(std::string func_name, cpp11::list options); +// [[arrow::export]] +bool compute___expr__equals(const std::shared_ptr& lhs, + const std::shared_ptr& rhs) { + return lhs->Equals(*rhs); +} // [[arrow::export]] std::shared_ptr compute___expr__call(std::string func_name, cpp11::list argument_list, diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R index 906963e38d1..3ec18a63019 100644 --- a/r/tests/testthat/helper-skip.R +++ b/r/tests/testthat/helper-skip.R @@ -25,6 +25,10 @@ skip_if_not_available <- function(feature) { if (feature == "re2") { # RE2 does not support valgrind (on purpose): https://github.com/google/re2/issues/177 skip_on_valgrind() + } else if (feature == "dataset") { + # These tests often hang on 32-bit windows rtools35, and we haven't been + # able to figure out how to make them work safely + skip_if_multithreading_disabled() } yes <- feature %in% names(build_features) && build_features[feature] @@ -35,6 +39,7 @@ skip_if_not_available <- function(feature) { skip_if_no_pyarrow <- function() { skip_on_valgrind() + skip_on_os("windows") skip_if_not_installed("reticulate") if (!reticulate::py_module_available("pyarrow")) { @@ -68,6 +73,15 @@ skip_on_valgrind <- function() { } } +skip_if_multithreading_disabled <- function() { + is_32bit <- .Machine$sizeof.pointer < 8 + is_old_r <- getRversion() < "4.0.0" + is_windows <- tolower(Sys.info()[["sysname"]]) == "windows" + if (is_32bit && is_old_r && is_windows) { + skip("Multithreading does not work properly on this system") + } +} + process_is_running <- function(x) { cmd <- sprintf("ps aux | grep '%s' | grep -v grep", x) tryCatch(system(cmd, ignore.stdout = TRUE) == 0, error = function(e) FALSE) diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index b027dc98702..41265f0e638 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -27,15 +27,6 @@ ipc_dir <- make_temp_dir() csv_dir <- make_temp_dir() tsv_dir <- make_temp_dir() -skip_if_multithreading_disabled <- function() { - is_32bit <- .Machine$sizeof.pointer < 8 - is_old_r <- getRversion() < "4.0.0" - is_windows <- tolower(Sys.info()[["sysname"]]) == "windows" - if (is_32bit && is_old_r && is_windows) { - skip("Multithreading does not work properly on this system") - } -} - first_date <- lubridate::ymd_hms("2015-04-29 03:12:39") df1 <- tibble( @@ -133,7 +124,7 @@ test_that("Simple interface for datasets", { # Collecting virtual partition column works expect_equal( - collect(ds) %>% pull(part), + ds %>% arrange(part) %>% pull(part), c(rep(1, 10), rep(2, 10)) ) }) @@ -348,13 +339,12 @@ test_that("IPC/Feather format data", { # Collecting virtual partition column works expect_equal( - collect(ds) %>% pull(part), + ds %>% arrange(part) %>% pull(part), c(rep(3, 10), rep(4, 10)) ) }) test_that("CSV dataset", { - skip_if_multithreading_disabled() ds <- open_dataset(csv_dir, partitioning = "part", format = "csv") expect_r6_class(ds$format, "CsvFileFormat") expect_r6_class(ds$filesystem, "LocalFileSystem") @@ -376,13 +366,12 @@ test_that("CSV dataset", { ) # Collecting virtual partition column works expect_equal( - collect(ds) %>% pull(part), + collect(ds) %>% arrange(part) %>% pull(part), c(rep(5, 10), rep(6, 10)) ) }) test_that("CSV scan options", { - skip_if_multithreading_disabled() options <- FragmentScanOptions$create("text") expect_equal(options$type, "csv") options <- FragmentScanOptions$create("csv", @@ -429,7 +418,6 @@ test_that("CSV scan options", { }) test_that("compressed CSV dataset", { - skip_if_multithreading_disabled() skip_if_not_available("gzip") dst_dir <- make_temp_dir() dst_file <- file.path(dst_dir, "data.csv.gz") @@ -453,7 +441,6 @@ test_that("compressed CSV dataset", { }) test_that("CSV dataset options", { - skip_if_multithreading_disabled() dst_dir <- make_temp_dir() dst_file <- file.path(dst_dir, "data.csv") df <- tibble(chr = letters[1:10]) @@ -481,7 +468,6 @@ test_that("CSV dataset options", { }) test_that("Other text delimited dataset", { - skip_if_multithreading_disabled() ds1 <- open_dataset(tsv_dir, partitioning = "part", format = "tsv") expect_equivalent( ds1 %>% @@ -510,7 +496,6 @@ test_that("Other text delimited dataset", { }) test_that("readr parse options", { - skip_if_multithreading_disabled() arrow_opts <- names(formals(CsvParseOptions$create)) readr_opts <- names(formals(readr_to_csv_parse_options)) @@ -804,7 +789,7 @@ test_that("filter scalar validation doesn't crash (ARROW-7772)", { test_that("collect() on Dataset works (if fits in memory)", { skip_if_not_available("parquet") expect_equal( - collect(open_dataset(dataset_dir)), + collect(open_dataset(dataset_dir)) %>% arrange(int), rbind(df1, df2) ) }) @@ -1662,7 +1647,6 @@ test_that("Writing a dataset: Parquet format options", { }) test_that("Writing a dataset: CSV format options", { - skip_if_multithreading_disabled() df <- tibble( int = 1:10, dbl = as.numeric(1:10), diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R new file mode 100644 index 00000000000..331f7b7b62c --- /dev/null +++ b/r/tests/testthat/test-dplyr-collapse.R @@ -0,0 +1,210 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +skip_if_not_available("dataset") + +withr::local_options(list(arrow.summarise.sort = TRUE)) + +library(dplyr) +library(stringr) + +tbl <- example_data +# Add some better string data +tbl$verses <- verses[[1]] +# c(" a ", " b ", " c ", ...) increasing padding +# nchar = 3 5 7 9 11 13 15 17 19 21 +tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both") +tbl$some_grouping <- rep(c(1, 2), 5) + +tab <- Table$create(tbl) + +test_that("implicit_schema with select", { + expect_equal( + tab %>% + select(int, lgl) %>% + implicit_schema(), + schema(int = int32(), lgl = bool()) + ) +}) + +test_that("implicit_schema with rename", { + expect_equal( + tab %>% + select(numbers = int, lgl) %>% + implicit_schema(), + schema(numbers = int32(), lgl = bool()) + ) +}) + +test_that("implicit_schema with mutate", { + expect_equal( + tab %>% + transmute( + numbers = int * 4, + words = as.character(int) + ) %>% + implicit_schema(), + schema(numbers = float64(), words = utf8()) + ) +}) + +test_that("implicit_schema with summarize", { + expect_equal( + tab %>% + summarize( + avg = mean(int) + ) %>% + implicit_schema(), + schema(avg = float64()) + ) +}) + +test_that("implicit_schema with group_by summarize", { + expect_equal( + tab %>% + group_by(some_grouping) %>% + summarize( + avg = mean(int * 5L) + ) %>% + implicit_schema(), + schema(some_grouping = float64(), avg = float64()) + ) +}) + +test_that("collapse", { + q <- tab %>% + filter(dbl > 2, chr == "d" | chr == "f") %>% + select(chr, int, lgl) %>% + mutate(twice = int * 2L) + expect_false(is_collapsed(q)) + expect_true(is_collapsed(collapse(q))) + + expect_dplyr_equal( + input %>% + filter(dbl > 2, chr == "d" | chr == "f") %>% + select(chr, int, lgl) %>% + mutate(twice = int * 2L) %>% + collapse() %>% + filter(int < 5) %>% + select(int, twice) %>% + collect(), + tbl + ) + + expect_dplyr_equal( + input %>% + filter(dbl > 2, chr == "d" | chr == "f") %>% + collapse() %>% + select(chr, int, lgl) %>% + collapse() %>% + filter(int < 5) %>% + select(int, chr) %>% + collect(), + tbl + ) +}) + +test_that("Properties of collapsed query", { + q <- tab %>% + filter(dbl > 2) %>% + select(chr, int, lgl) %>% + mutate(twice = int * 2L) %>% + group_by(lgl) %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + mutate(extra = total * 5) + + # print(tbl %>% + # filter(dbl > 2) %>% + # select(chr, int, lgl) %>% + # mutate(twice = int * 2L) %>% + # group_by(lgl) %>% + # summarize(total = sum(int, na.rm = TRUE)) %>% + # mutate(extra = total * 5)) + + # # A tibble: 3 × 3 + # lgl total extra + # + # 1 FALSE 8 40 + # 2 TRUE 8 40 + # 3 NA 25 125 + + # Avoid evaluating just for nrow + expect_identical(dim(q), c(NA_integer_, 3L)) + + expect_output( + print(q), + "InMemoryDataset (query) +lgl: bool +total: int32 +extra: double (multiply_checked(total, 5)) + +See $.data for the source Arrow object", + fixed = TRUE + ) + expect_output( + print(q$.data), + "InMemoryDataset (query) +int: int32 +lgl: bool + +* Aggregations: +total: sum(int) +* Filter: (dbl > 2) +* Grouped by lgl +See $.data for the source Arrow object", + fixed = TRUE + ) + + expect_equal( + head(q, 1) %>% collect(), + tibble::tibble(lgl = FALSE, total = 8L, extra = 40) + ) + expect_equal( + tail(q, 1) %>% collect(), + tibble::tibble(lgl = NA, total = 25L, extra = 125) + ) +}) + +test_that("query_on_dataset handles collapse()", { + expect_false(query_on_dataset( + tab %>% + select(int, chr) + )) + expect_false(query_on_dataset( + tab %>% + select(int, chr) %>% + collapse() %>% + select(int) + )) + + ds_dir <- tempfile() + dir.create(ds_dir) + on.exit(unlink(ds_dir)) + write_parquet(tab, file.path(ds_dir, "file.parquet")) + ds <- open_dataset(ds_dir) + + expect_true(query_on_dataset( + ds %>% + select(int, chr) + )) + expect_true(query_on_dataset( + ds %>% + select(int, chr) %>% + collapse() %>% + select(int) + )) +}) diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-summarize.R similarity index 72% rename from r/tests/testthat/test-dplyr-aggregate.R rename to r/tests/testthat/test-dplyr-summarize.R index 3a04b6d2314..78d36630e56 100644 --- a/r/tests/testthat/test-dplyr-aggregate.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -17,6 +17,8 @@ skip_if_not_available("dataset") +withr::local_options(list(arrow.summarise.sort = TRUE)) + library(dplyr) library(stringr) @@ -33,7 +35,8 @@ test_that("summarize", { input %>% select(int, chr) %>% filter(int > 5) %>% - summarize(min_int = min(int)), + summarize(min_int = min(int)) %>% + collect(), tbl, warning = TRUE ) @@ -42,12 +45,27 @@ test_that("summarize", { input %>% select(int, chr) %>% filter(int > 5) %>% - summarize(min_int = min(int) / 2), + summarize(min_int = min(int) / 2) %>% + collect(), tbl, warning = TRUE ) }) +test_that("summarize() doesn't evaluate eagerly", { + expect_s3_class( + Table$create(tbl) %>% + summarize(total = sum(int)), + "arrow_dplyr_query" + ) + expect_r6_class( + Table$create(tbl) %>% + summarize(total = sum(int)) %>% + compute(), + "ArrowTabular" + ) +}) + test_that("Can aggregate in Arrow", { expect_dplyr_equal( input %>% @@ -68,7 +86,6 @@ test_that("Group by sum on dataset", { input %>% group_by(some_grouping) %>% summarize(total = sum(int, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -77,7 +94,6 @@ test_that("Group by sum on dataset", { input %>% group_by(some_grouping) %>% summarize(total = sum(int * 4, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -86,7 +102,6 @@ test_that("Group by sum on dataset", { input %>% group_by(some_grouping) %>% summarize(total = sum(int)) %>% - arrange(some_grouping) %>% collect(), tbl, ) @@ -97,7 +112,6 @@ test_that("Group by mean on dataset", { input %>% group_by(some_grouping) %>% summarize(mean = mean(int, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -106,7 +120,6 @@ test_that("Group by mean on dataset", { input %>% group_by(some_grouping) %>% summarize(mean = mean(int, na.rm = FALSE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -117,7 +130,6 @@ test_that("Group by sd on dataset", { input %>% group_by(some_grouping) %>% summarize(sd = sd(int, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -127,7 +139,6 @@ test_that("Group by sd on dataset", { input %>% group_by(some_grouping) %>% summarize(sd = sd(int, na.rm = FALSE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -138,7 +149,6 @@ test_that("Group by var on dataset", { input %>% group_by(some_grouping) %>% summarize(var = var(int, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -148,7 +158,6 @@ test_that("Group by var on dataset", { input %>% group_by(some_grouping) %>% summarize(var = var(int, na.rm = FALSE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -178,7 +187,6 @@ test_that("Group by any/all", { input %>% group_by(some_grouping) %>% summarize(any(lgl, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -186,7 +194,6 @@ test_that("Group by any/all", { input %>% group_by(some_grouping) %>% summarize(all(lgl, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -194,7 +201,6 @@ test_that("Group by any/all", { input %>% group_by(some_grouping) %>% summarize(any(lgl, na.rm = FALSE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -202,7 +208,6 @@ test_that("Group by any/all", { input %>% group_by(some_grouping) %>% summarize(all(lgl, na.rm = FALSE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -212,7 +217,6 @@ test_that("Group by any/all", { mutate(has_words = nchar(verses) < 0) %>% group_by(some_grouping) %>% summarize(any(has_words, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -221,7 +225,6 @@ test_that("Group by any/all", { mutate(has_words = nchar(verses) < 0) %>% group_by(some_grouping) %>% summarize(all(has_words, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -229,7 +232,6 @@ test_that("Group by any/all", { input %>% group_by(some_grouping) %>% summarize(has_words = all(nchar(verses) < 0, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -274,7 +276,6 @@ test_that("Filter and aggregate", { filter(some_grouping == 2) %>% group_by(some_grouping) %>% summarize(total = sum(int, na.rm = TRUE)) %>% - arrange(some_grouping) %>% collect(), tbl ) @@ -284,7 +285,86 @@ test_that("Filter and aggregate", { filter(int > 5) %>% group_by(some_grouping) %>% summarize(total = sum(int, na.rm = TRUE)) %>% - arrange(some_grouping) %>% + collect(), + tbl + ) +}) + +test_that("Group by edge cases", { + expect_dplyr_equal( + input %>% + group_by(some_grouping * 2) %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + collect(), + tbl + ) + + expect_dplyr_equal( + input %>% + group_by(alt = some_grouping * 2) %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + collect(), + tbl + ) +}) + +test_that("Do things after summarize", { + group2_sum <- tbl %>% + group_by(some_grouping) %>% + filter(int > 5) %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + pull() %>% + tail(1) + + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + filter(int > 5) %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + filter(total == group2_sum) %>% + mutate(extra = total * 5) %>% + collect(), + tbl + ) + + expect_dplyr_equal( + input %>% + filter(dbl > 2) %>% + select(chr, int, lgl) %>% + mutate(twice = int * 2L) %>% + group_by(lgl) %>% + summarize( + count = n(), + total = sum(twice, na.rm = TRUE) + ) %>% + mutate(mean = total / count) %>% + collect(), + tbl + ) +}) + +test_that("Expressions on aggregations", { + # This is what it effectively is + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize( + any = any(lgl), + all = all(lgl) + ) %>% + compute() %>% + ungroup() %>% # TODO: loosen the restriction on mutate after group_by + mutate(some = any & !all) %>% + select(some_grouping, some) %>% + collect(), + tbl + ) + # More concisely: + skip("TODO: ARROW-13778") + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(any(lgl) & !all(lgl)) %>% collect(), tbl ) diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R index cdfcb62d02d..56343ad729e 100644 --- a/r/tests/testthat/test-duckdb.R +++ b/r/tests/testthat/test-duckdb.R @@ -18,6 +18,7 @@ skip_if_not_installed("duckdb", minimum_version = "0.2.8") skip_if_not_installed("dbplyr") skip_if_not_available("dataset") + # when we remove this, we should also remove the FALSE in run_duckdb_examples skip("These tests are flaking: https://github.com/duckdb/duckdb/issues/2100") library(duckdb) diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index bc6d285b333..6ae5b54fbf3 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -239,9 +239,10 @@ test_that("metadata of list elements (ARROW-10386)", { ds <- open_dataset(dst_dir) expect_warning( df_from_ds <- collect(ds), - "Row-level metadata is not compatible with this operation and has been ignored" + NA # TODO: ARROW-13852 + # "Row-level metadata is not compatible with this operation and has been ignored" ) - expect_equal(df_from_ds[c(1, 4, 3, 2), ], df, check.attributes = FALSE) + expect_equal(arrange(df_from_ds, int), arrange(df, int), check.attributes = FALSE) # however there is *no* warning if we don't select the metadata column expect_warning( diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R index 9e67219e19a..d5815247d51 100644 --- a/r/tests/testthat/test-python.R +++ b/r/tests/testthat/test-python.R @@ -20,9 +20,10 @@ context("To/from Python") test_that("install_pyarrow", { skip_on_cran() skip_if_not_dev_mode() - # Python problems on Apple M1 still - skip_if(grepl("arm-apple|aarch64.*darwin", R.Version()$platform)) + # Windows CI machine doesn't pick up the right python or something + skip_on_os("windows") skip_if_not_installed("reticulate") + venv <- try(reticulate::virtualenv_create("arrow-test")) # Bail out if virtualenv isn't available skip_if(inherits(venv, "try-error")) diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R index 94451e5351a..a2a13cbf887 100644 --- a/r/tests/testthat/test-s3-minio.R +++ b/r/tests/testthat/test-s3-minio.R @@ -86,8 +86,8 @@ if (arrow_with_s3() && process_is_running("minio server")) { test_that("open_dataset with an S3 file (not directory) URI", { skip_if_not_available("parquet") expect_identical( - open_dataset(minio_uri("test.parquet")) %>% collect(), - example_data + open_dataset(minio_uri("test.parquet")) %>% collect() %>% arrange(int), + example_data %>% arrange(int) ) }) @@ -96,8 +96,10 @@ if (arrow_with_s3() && process_is_running("minio server")) { open_dataset( c(minio_uri("test.feather"), minio_uri("test2.feather")), format = "feather" - ) %>% collect(), - rbind(example_data, example_data) + ) %>% + arrange(int) %>% + collect(), + rbind(example_data, example_data) %>% arrange(int) ) }) @@ -153,8 +155,8 @@ if (arrow_with_s3() && process_is_running("minio server")) { test_that("open_dataset with fs", { ds <- open_dataset(fs$path(minio_path("hive_dir"))) expect_identical( - ds %>% select(dbl, lgl) %>% collect(), - rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")]) + ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int), + rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int) ) }) @@ -170,16 +172,16 @@ if (arrow_with_s3() && process_is_running("minio server")) { expect_length(dir(td), 2) ds <- open_dataset(td) expect_identical( - ds %>% select(dbl, lgl) %>% collect(), - rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")]) + ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int), + rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int) ) # Let's copy the other way and use a SubTreeFileSystem rather than URI copy_files(td, fs$path(minio_path("hive_dir2"))) ds2 <- open_dataset(fs$path(minio_path("hive_dir2"))) expect_identical( - ds2 %>% select(dbl, lgl) %>% collect(), - rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")]) + ds2 %>% select(int, dbl, lgl) %>% collect() %>% arrange(int), + rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int) ) }) }