diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc index 2a32c96ed3b..7d6db9f58db 100644 --- a/cpp/src/arrow/compute/exec.cc +++ b/cpp/src/arrow/compute/exec.cc @@ -115,7 +115,7 @@ ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const { if (value.is_scalar()) continue; value = value.array()->Slice(offset, length); } - out.length = length; + out.length = std::min(length, this->length - offset); return out; } diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc index 20c8c347cc1..4a4758c8471 100644 --- a/cpp/src/arrow/compute/exec/exec_plan.cc +++ b/cpp/src/arrow/compute/exec/exec_plan.cc @@ -719,11 +719,13 @@ struct ScalarAggregateNode : ExecNode { ScalarAggregateNode(ExecNode* input, std::string label, std::shared_ptr output_schema, std::vector kernels, + std::vector argument_indices, std::vector>> states) : ExecNode(input->plan(), std::move(label), {input}, {"target"}, /*output_schema=*/std::move(output_schema), /*num_outputs=*/1), kernels_(std::move(kernels)), + argument_indices_(std::move(argument_indices)), states_(std::move(states)) {} const char* kind_name() override { return "ScalarAggregateNode"; } @@ -733,7 +735,7 @@ struct ScalarAggregateNode : ExecNode { KernelContext batch_ctx{plan()->exec_context()}; batch_ctx.SetState(states_[i][thread_index].get()); - ExecBatch single_column_batch{{batch.values[i]}, batch.length}; + ExecBatch single_column_batch{{batch[argument_indices_[i]]}, batch.length}; RETURN_NOT_OK(kernels_[i]->consume(&batch_ctx, single_column_batch)); } return Status::OK(); @@ -807,7 +809,8 @@ struct ScalarAggregateNode : ExecNode { } Future<> finished_ = Future<>::MakeFinished(); - std::vector kernels_; + const std::vector kernels_; + const std::vector argument_indices_; std::vector>> states_; @@ -816,11 +819,17 @@ struct ScalarAggregateNode : ExecNode { }; Result MakeScalarAggregateNode(ExecNode* input, std::string label, - std::vector aggregates) { - if (input->output_schema()->num_fields() != static_cast(aggregates.size())) { - return Status::Invalid("Provided ", aggregates.size(), - " aggregates, expected one for each field of ", - input->output_schema()->ToString()); + std::vector aggregates, + std::vector arguments, + std::vector out_field_names) { + if (aggregates.size() != arguments.size()) { + return Status::Invalid("Provided ", aggregates.size(), " aggregates but ", + arguments.size(), " arguments."); + } + + if (aggregates.size() != out_field_names.size()) { + return Status::Invalid("Provided ", aggregates.size(), " aggregates but ", + out_field_names.size(), " field names for the output."); } auto exec_ctx = input->plan()->exec_context(); @@ -828,8 +837,16 @@ Result MakeScalarAggregateNode(ExecNode* input, std::string label, std::vector kernels(aggregates.size()); std::vector>> states(kernels.size()); FieldVector fields(kernels.size()); + std::vector argument_indices(kernels.size()); for (size_t i = 0; i < kernels.size(); ++i) { + if (!arguments[i].IsName()) { + return Status::NotImplemented("Non name field refs"); + } + ARROW_ASSIGN_OR_RAISE(auto match, + arguments[i].FindOneOrNone(*input->output_schema())); + argument_indices[i] = match[0]; + ARROW_ASSIGN_OR_RAISE(auto function, exec_ctx->func_registry()->GetFunction(aggregates[i].function)); @@ -862,12 +879,12 @@ Result MakeScalarAggregateNode(ExecNode* input, std::string label, ARROW_ASSIGN_OR_RAISE( auto descr, kernels[i]->signature->out_type().Resolve(&kernel_ctx, {in_type})); - fields[i] = field(aggregates[i].function, std::move(descr.type)); + fields[i] = field(std::move(out_field_names[i]), std::move(descr.type)); } return input->plan()->EmplaceNode( input, std::move(label), schema(std::move(fields)), std::move(kernels), - std::move(states)); + std::move(argument_indices), std::move(states)); } namespace internal { diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h index 07bb365bbc7..fc3af92af4a 100644 --- a/cpp/src/arrow/compute/exec/exec_plan.h +++ b/cpp/src/arrow/compute/exec/exec_plan.h @@ -285,7 +285,9 @@ Result MakeProjectNode(ExecNode* input, std::string label, ARROW_EXPORT Result MakeScalarAggregateNode(ExecNode* input, std::string label, - std::vector aggregates); + std::vector aggregates, + std::vector arguments, + std::vector out_field_names); /// \brief Make a node which groups input rows based on key fields and computes /// aggregates for each group diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc index aa807468bcb..101257f5de8 100644 --- a/cpp/src/arrow/compute/exec/plan_test.cc +++ b/cpp/src/arrow/compute/exec/plan_test.cc @@ -531,9 +531,11 @@ TEST(ExecPlanExecution, SourceScalarAggSink) { MakeTestSourceNode(plan.get(), "source", basic_data, /*parallel=*/false, /*slow=*/false)); - ASSERT_OK_AND_ASSIGN(auto scalar_agg, - MakeScalarAggregateNode(source, "scalar_agg", - {{"sum", nullptr}, {"any", nullptr}})); + ASSERT_OK_AND_ASSIGN( + auto scalar_agg, + MakeScalarAggregateNode(source, "scalar_agg", {{"sum", nullptr}, {"any", nullptr}}, + /*targets=*/{"i32", "bool"}, + /*out_field_names=*/{"sum(i32)", "any(bool)"})); auto sink_gen = MakeSinkNode(scalar_agg, "sink"); @@ -565,7 +567,8 @@ TEST(ExecPlanExecution, ScalarSourceScalarAggSink) { ASSERT_OK_AND_ASSIGN( auto scalar_agg, MakeScalarAggregateNode(source, "scalar_agg", - {{"count", nullptr}, {"sum", nullptr}, {"mean", nullptr}})); + {{"count", nullptr}, {"sum", nullptr}, {"mean", nullptr}}, + {"a", "b", "c"}, {"sum a", "sum b", "sum c"})); auto sink_gen = MakeSinkNode(scalar_agg, "sink"); diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc index 192f84f46df..d81b9cd1c5c 100644 --- a/cpp/src/arrow/dataset/scanner.cc +++ b/cpp/src/arrow/dataset/scanner.cc @@ -816,14 +816,15 @@ Result AsyncScanner::CountRows() { ARROW_ASSIGN_OR_RAISE(auto scan, MakeScanNode(plan.get(), std::move(fragment_gen), options)); - ARROW_ASSIGN_OR_RAISE( - auto get_selection, - compute::MakeProjectNode(scan, "get_selection", {options->filter})); + ARROW_ASSIGN_OR_RAISE(auto get_selection, + compute::MakeProjectNode(scan, "get_selection", {options->filter}, + {"selection_mask"})); ARROW_ASSIGN_OR_RAISE( auto sum_selection, compute::MakeScalarAggregateNode(get_selection, "sum_selection", - {compute::internal::Aggregate{"sum", nullptr}})); + {compute::internal::Aggregate{"sum", nullptr}}, + {"selection_mask"}, {"sum"})); AsyncGenerator> sink_gen = compute::MakeSinkNode(sum_selection, "sink"); diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index de7f780183a..34fa1486ef2 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -1471,14 +1471,16 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) { ASSERT_OK_AND_ASSIGN( compute::ExecNode * sum, compute::MakeScalarAggregateNode(project, "scalar_agg", - {compute::internal::Aggregate{"sum", nullptr}})); + {compute::internal::Aggregate{"sum", nullptr}}, + {a_times_2.ToString()}, {"a*2 sum"})); // finally, pipe the project node into a sink node auto sink_gen = compute::MakeSinkNode(sum, "sink"); // translate sink_gen (async) to sink_reader (sync) - std::shared_ptr sink_reader = compute::MakeGeneratorReader( - schema({field("sum", int64())}), std::move(sink_gen), exec_context.memory_pool()); + std::shared_ptr sink_reader = + compute::MakeGeneratorReader(schema({field("a*2 sum", int64())}), + std::move(sink_gen), exec_context.memory_pool()); // start the ExecPlan ASSERT_OK(plan->StartProducing()); @@ -1489,9 +1491,9 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) { // wait 1s for completion ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s"; - auto expected = TableFromJSON(schema({field("sum", int64())}), { - R"([ - {"sum": 4} + auto expected = TableFromJSON(schema({field("a*2 sum", int64())}), { + R"([ + {"a*2 sum": 4} ])"}); AssertTablesEqual(*expected, *collected, /*same_chunk_layout=*/false); } diff --git a/r/DESCRIPTION b/r/DESCRIPTION index a0c4b61b7a0..3d10aa4745e 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -109,6 +109,7 @@ Collate: 'metadata.R' 'parquet.R' 'python.R' + 'query-engine.R' 'record-batch-reader.R' 'record-batch-writer.R' 'reexports-bit64.R' diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index e54f88e9d4e..268a17ef4f4 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1,1749 +1,1784 @@ # Generated by using data-raw/codegen.R -> do not edit by hand -is_altrep_int_nonull <- function(x) { - .Call(`_arrow_is_altrep_int_nonull`, x) +is_altrep_int_nonull <- function(x){ + .Call(`_arrow_is_altrep_int_nonull`, x) } -is_altrep_dbl_nonull <- function(x) { - .Call(`_arrow_is_altrep_dbl_nonull`, x) +is_altrep_dbl_nonull <- function(x){ + .Call(`_arrow_is_altrep_dbl_nonull`, x) } -Array__Slice1 <- function(array, offset) { - .Call(`_arrow_Array__Slice1`, array, offset) +Array__Slice1 <- function(array, offset){ + .Call(`_arrow_Array__Slice1`, array, offset) } -Array__Slice2 <- function(array, offset, length) { - .Call(`_arrow_Array__Slice2`, array, offset, length) +Array__Slice2 <- function(array, offset, length){ + .Call(`_arrow_Array__Slice2`, array, offset, length) } -Array__IsNull <- function(x, i) { - .Call(`_arrow_Array__IsNull`, x, i) +Array__IsNull <- function(x, i){ + .Call(`_arrow_Array__IsNull`, x, i) } -Array__IsValid <- function(x, i) { - .Call(`_arrow_Array__IsValid`, x, i) +Array__IsValid <- function(x, i){ + .Call(`_arrow_Array__IsValid`, x, i) } -Array__length <- function(x) { - .Call(`_arrow_Array__length`, x) +Array__length <- function(x){ + .Call(`_arrow_Array__length`, x) } -Array__offset <- function(x) { - .Call(`_arrow_Array__offset`, x) +Array__offset <- function(x){ + .Call(`_arrow_Array__offset`, x) } -Array__null_count <- function(x) { - .Call(`_arrow_Array__null_count`, x) +Array__null_count <- function(x){ + .Call(`_arrow_Array__null_count`, x) } -Array__type <- function(x) { - .Call(`_arrow_Array__type`, x) +Array__type <- function(x){ + .Call(`_arrow_Array__type`, x) } -Array__ToString <- function(x) { - .Call(`_arrow_Array__ToString`, x) +Array__ToString <- function(x){ + .Call(`_arrow_Array__ToString`, x) } -Array__type_id <- function(x) { - .Call(`_arrow_Array__type_id`, x) +Array__type_id <- function(x){ + .Call(`_arrow_Array__type_id`, x) } -Array__Equals <- function(lhs, rhs) { - .Call(`_arrow_Array__Equals`, lhs, rhs) +Array__Equals <- function(lhs, rhs){ + .Call(`_arrow_Array__Equals`, lhs, rhs) } -Array__ApproxEquals <- function(lhs, rhs) { - .Call(`_arrow_Array__ApproxEquals`, lhs, rhs) +Array__ApproxEquals <- function(lhs, rhs){ + .Call(`_arrow_Array__ApproxEquals`, lhs, rhs) } -Array__Diff <- function(lhs, rhs) { - .Call(`_arrow_Array__Diff`, lhs, rhs) +Array__Diff <- function(lhs, rhs){ + .Call(`_arrow_Array__Diff`, lhs, rhs) } -Array__data <- function(array) { - .Call(`_arrow_Array__data`, array) +Array__data <- function(array){ + .Call(`_arrow_Array__data`, array) } -Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx) { - .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx) +Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx){ + .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx) } -Array__View <- function(array, type) { - .Call(`_arrow_Array__View`, array, type) +Array__View <- function(array, type){ + .Call(`_arrow_Array__View`, array, type) } -Array__Validate <- function(array) { - invisible(.Call(`_arrow_Array__Validate`, array)) +Array__Validate <- function(array){ + invisible(.Call(`_arrow_Array__Validate`, array)) } -DictionaryArray__indices <- function(array) { - .Call(`_arrow_DictionaryArray__indices`, array) +DictionaryArray__indices <- function(array){ + .Call(`_arrow_DictionaryArray__indices`, array) } -DictionaryArray__dictionary <- function(array) { - .Call(`_arrow_DictionaryArray__dictionary`, array) +DictionaryArray__dictionary <- function(array){ + .Call(`_arrow_DictionaryArray__dictionary`, array) } -StructArray__field <- function(array, i) { - .Call(`_arrow_StructArray__field`, array, i) +StructArray__field <- function(array, i){ + .Call(`_arrow_StructArray__field`, array, i) } -StructArray__GetFieldByName <- function(array, name) { - .Call(`_arrow_StructArray__GetFieldByName`, array, name) +StructArray__GetFieldByName <- function(array, name){ + .Call(`_arrow_StructArray__GetFieldByName`, array, name) } -StructArray__Flatten <- function(array) { - .Call(`_arrow_StructArray__Flatten`, array) +StructArray__Flatten <- function(array){ + .Call(`_arrow_StructArray__Flatten`, array) } -ListArray__value_type <- function(array) { - .Call(`_arrow_ListArray__value_type`, array) +ListArray__value_type <- function(array){ + .Call(`_arrow_ListArray__value_type`, array) } -LargeListArray__value_type <- function(array) { - .Call(`_arrow_LargeListArray__value_type`, array) +LargeListArray__value_type <- function(array){ + .Call(`_arrow_LargeListArray__value_type`, array) } -ListArray__values <- function(array) { - .Call(`_arrow_ListArray__values`, array) +ListArray__values <- function(array){ + .Call(`_arrow_ListArray__values`, array) } -LargeListArray__values <- function(array) { - .Call(`_arrow_LargeListArray__values`, array) +LargeListArray__values <- function(array){ + .Call(`_arrow_LargeListArray__values`, array) } -ListArray__value_length <- function(array, i) { - .Call(`_arrow_ListArray__value_length`, array, i) +ListArray__value_length <- function(array, i){ + .Call(`_arrow_ListArray__value_length`, array, i) } -LargeListArray__value_length <- function(array, i) { - .Call(`_arrow_LargeListArray__value_length`, array, i) +LargeListArray__value_length <- function(array, i){ + .Call(`_arrow_LargeListArray__value_length`, array, i) } -FixedSizeListArray__value_length <- function(array, i) { - .Call(`_arrow_FixedSizeListArray__value_length`, array, i) +FixedSizeListArray__value_length <- function(array, i){ + .Call(`_arrow_FixedSizeListArray__value_length`, array, i) } -ListArray__value_offset <- function(array, i) { - .Call(`_arrow_ListArray__value_offset`, array, i) +ListArray__value_offset <- function(array, i){ + .Call(`_arrow_ListArray__value_offset`, array, i) } -LargeListArray__value_offset <- function(array, i) { - .Call(`_arrow_LargeListArray__value_offset`, array, i) +LargeListArray__value_offset <- function(array, i){ + .Call(`_arrow_LargeListArray__value_offset`, array, i) } -FixedSizeListArray__value_offset <- function(array, i) { - .Call(`_arrow_FixedSizeListArray__value_offset`, array, i) +FixedSizeListArray__value_offset <- function(array, i){ + .Call(`_arrow_FixedSizeListArray__value_offset`, array, i) } -ListArray__raw_value_offsets <- function(array) { - .Call(`_arrow_ListArray__raw_value_offsets`, array) +ListArray__raw_value_offsets <- function(array){ + .Call(`_arrow_ListArray__raw_value_offsets`, array) } -LargeListArray__raw_value_offsets <- function(array) { - .Call(`_arrow_LargeListArray__raw_value_offsets`, array) +LargeListArray__raw_value_offsets <- function(array){ + .Call(`_arrow_LargeListArray__raw_value_offsets`, array) } -Array__as_vector <- function(array) { - .Call(`_arrow_Array__as_vector`, array) +Array__as_vector <- function(array){ + .Call(`_arrow_Array__as_vector`, array) } -ChunkedArray__as_vector <- function(chunked_array, use_threads) { - .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads) +ChunkedArray__as_vector <- function(chunked_array, use_threads){ + .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads) } -RecordBatch__to_dataframe <- function(batch, use_threads) { - .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads) +RecordBatch__to_dataframe <- function(batch, use_threads){ + .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads) } -Table__to_dataframe <- function(table, use_threads) { - .Call(`_arrow_Table__to_dataframe`, table, use_threads) +Table__to_dataframe <- function(table, use_threads){ + .Call(`_arrow_Table__to_dataframe`, table, use_threads) } -ArrayData__get_type <- function(x) { - .Call(`_arrow_ArrayData__get_type`, x) +ArrayData__get_type <- function(x){ + .Call(`_arrow_ArrayData__get_type`, x) } -ArrayData__get_length <- function(x) { - .Call(`_arrow_ArrayData__get_length`, x) +ArrayData__get_length <- function(x){ + .Call(`_arrow_ArrayData__get_length`, x) } -ArrayData__get_null_count <- function(x) { - .Call(`_arrow_ArrayData__get_null_count`, x) +ArrayData__get_null_count <- function(x){ + .Call(`_arrow_ArrayData__get_null_count`, x) } -ArrayData__get_offset <- function(x) { - .Call(`_arrow_ArrayData__get_offset`, x) +ArrayData__get_offset <- function(x){ + .Call(`_arrow_ArrayData__get_offset`, x) } -ArrayData__buffers <- function(x) { - .Call(`_arrow_ArrayData__buffers`, x) +ArrayData__buffers <- function(x){ + .Call(`_arrow_ArrayData__buffers`, x) } -Buffer__is_mutable <- function(buffer) { - .Call(`_arrow_Buffer__is_mutable`, buffer) +Buffer__is_mutable <- function(buffer){ + .Call(`_arrow_Buffer__is_mutable`, buffer) } -Buffer__ZeroPadding <- function(buffer) { - invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer)) +Buffer__ZeroPadding <- function(buffer){ + invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer)) } -Buffer__capacity <- function(buffer) { - .Call(`_arrow_Buffer__capacity`, buffer) +Buffer__capacity <- function(buffer){ + .Call(`_arrow_Buffer__capacity`, buffer) } -Buffer__size <- function(buffer) { - .Call(`_arrow_Buffer__size`, buffer) +Buffer__size <- function(buffer){ + .Call(`_arrow_Buffer__size`, buffer) } -r___RBuffer__initialize <- function(x) { - .Call(`_arrow_r___RBuffer__initialize`, x) +r___RBuffer__initialize <- function(x){ + .Call(`_arrow_r___RBuffer__initialize`, x) } -Buffer__data <- function(buffer) { - .Call(`_arrow_Buffer__data`, buffer) +Buffer__data <- function(buffer){ + .Call(`_arrow_Buffer__data`, buffer) } -Buffer__Equals <- function(x, y) { - .Call(`_arrow_Buffer__Equals`, x, y) +Buffer__Equals <- function(x, y){ + .Call(`_arrow_Buffer__Equals`, x, y) } -ChunkedArray__length <- function(chunked_array) { - .Call(`_arrow_ChunkedArray__length`, chunked_array) +ChunkedArray__length <- function(chunked_array){ + .Call(`_arrow_ChunkedArray__length`, chunked_array) } -ChunkedArray__null_count <- function(chunked_array) { - .Call(`_arrow_ChunkedArray__null_count`, chunked_array) +ChunkedArray__null_count <- function(chunked_array){ + .Call(`_arrow_ChunkedArray__null_count`, chunked_array) } -ChunkedArray__num_chunks <- function(chunked_array) { - .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array) +ChunkedArray__num_chunks <- function(chunked_array){ + .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array) } -ChunkedArray__chunk <- function(chunked_array, i) { - .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i) +ChunkedArray__chunk <- function(chunked_array, i){ + .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i) } -ChunkedArray__chunks <- function(chunked_array) { - .Call(`_arrow_ChunkedArray__chunks`, chunked_array) +ChunkedArray__chunks <- function(chunked_array){ + .Call(`_arrow_ChunkedArray__chunks`, chunked_array) } -ChunkedArray__type <- function(chunked_array) { - .Call(`_arrow_ChunkedArray__type`, chunked_array) +ChunkedArray__type <- function(chunked_array){ + .Call(`_arrow_ChunkedArray__type`, chunked_array) } -ChunkedArray__Slice1 <- function(chunked_array, offset) { - .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset) +ChunkedArray__Slice1 <- function(chunked_array, offset){ + .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset) } -ChunkedArray__Slice2 <- function(chunked_array, offset, length) { - .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length) +ChunkedArray__Slice2 <- function(chunked_array, offset, length){ + .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length) } -ChunkedArray__View <- function(array, type) { - .Call(`_arrow_ChunkedArray__View`, array, type) +ChunkedArray__View <- function(array, type){ + .Call(`_arrow_ChunkedArray__View`, array, type) } -ChunkedArray__Validate <- function(chunked_array) { - invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array)) +ChunkedArray__Validate <- function(chunked_array){ + invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array)) } -ChunkedArray__Equals <- function(x, y) { - .Call(`_arrow_ChunkedArray__Equals`, x, y) +ChunkedArray__Equals <- function(x, y){ + .Call(`_arrow_ChunkedArray__Equals`, x, y) } -ChunkedArray__ToString <- function(x) { - .Call(`_arrow_ChunkedArray__ToString`, x) +ChunkedArray__ToString <- function(x){ + .Call(`_arrow_ChunkedArray__ToString`, x) } -ChunkedArray__from_list <- function(chunks, s_type) { - .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type) +ChunkedArray__from_list <- function(chunks, s_type){ + .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type) } -util___Codec__Create <- function(codec, compression_level) { - .Call(`_arrow_util___Codec__Create`, codec, compression_level) +util___Codec__Create <- function(codec, compression_level){ + .Call(`_arrow_util___Codec__Create`, codec, compression_level) } -util___Codec__name <- function(codec) { - .Call(`_arrow_util___Codec__name`, codec) +util___Codec__name <- function(codec){ + .Call(`_arrow_util___Codec__name`, codec) } -util___Codec__IsAvailable <- function(codec) { - .Call(`_arrow_util___Codec__IsAvailable`, codec) +util___Codec__IsAvailable <- function(codec){ + .Call(`_arrow_util___Codec__IsAvailable`, codec) } -io___CompressedOutputStream__Make <- function(codec, raw) { - .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw) +io___CompressedOutputStream__Make <- function(codec, raw){ + .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw) } -io___CompressedInputStream__Make <- function(codec, raw) { - .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw) +io___CompressedInputStream__Make <- function(codec, raw){ + .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw) } -RecordBatch__cast <- function(batch, schema, options) { - .Call(`_arrow_RecordBatch__cast`, batch, schema, options) +ExecPlan_create <- function(use_threads){ + .Call(`_arrow_ExecPlan_create`, use_threads) } -Table__cast <- function(table, schema, options) { - .Call(`_arrow_Table__cast`, table, schema, options) +ExecPlan_run <- function(plan, final_node){ + .Call(`_arrow_ExecPlan_run`, plan, final_node) } -compute__CallFunction <- function(func_name, args, options) { - .Call(`_arrow_compute__CallFunction`, func_name, args, options) +ExecNode_Scan <- function(plan, dataset, filter, materialized_field_names){ + .Call(`_arrow_ExecNode_Scan`, plan, dataset, filter, materialized_field_names) } -compute__GroupBy <- function(arguments, keys, options) { - .Call(`_arrow_compute__GroupBy`, arguments, keys, options) +ExecNode_Filter <- function(input, filter){ + .Call(`_arrow_ExecNode_Filter`, input, filter) } -compute__GetFunctionNames <- function() { - .Call(`_arrow_compute__GetFunctionNames`) +ExecNode_Project <- function(input, exprs, names){ + .Call(`_arrow_ExecNode_Project`, input, exprs, names) } -build_info <- function() { - .Call(`_arrow_build_info`) +ExecNode_ScalarAggregate <- function(input, options, target_names, out_field_names){ + .Call(`_arrow_ExecNode_ScalarAggregate`, input, options, target_names, out_field_names) } -runtime_info <- function() { - .Call(`_arrow_runtime_info`) +ExecNode_GroupByAggregate <- function(input, group_vars, agg_srcs, aggregations){ + .Call(`_arrow_ExecNode_GroupByAggregate`, input, group_vars, agg_srcs, aggregations) } -csv___WriteOptions__initialize <- function(options) { - .Call(`_arrow_csv___WriteOptions__initialize`, options) +RecordBatch__cast <- function(batch, schema, options){ + .Call(`_arrow_RecordBatch__cast`, batch, schema, options) } -csv___ReadOptions__initialize <- function(options) { - .Call(`_arrow_csv___ReadOptions__initialize`, options) +Table__cast <- function(table, schema, options){ + .Call(`_arrow_Table__cast`, table, schema, options) } -csv___ParseOptions__initialize <- function(options) { - .Call(`_arrow_csv___ParseOptions__initialize`, options) +compute__CallFunction <- function(func_name, args, options){ + .Call(`_arrow_compute__CallFunction`, func_name, args, options) } -csv___ReadOptions__column_names <- function(options) { - .Call(`_arrow_csv___ReadOptions__column_names`, options) +compute__GroupBy <- function(arguments, keys, options){ + .Call(`_arrow_compute__GroupBy`, arguments, keys, options) } -csv___ConvertOptions__initialize <- function(options) { - .Call(`_arrow_csv___ConvertOptions__initialize`, options) +compute__GetFunctionNames <- function(){ + .Call(`_arrow_compute__GetFunctionNames`) } -csv___TableReader__Make <- function(input, read_options, parse_options, convert_options) { - .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options) +build_info <- function(){ + .Call(`_arrow_build_info`) } -csv___TableReader__Read <- function(table_reader) { - .Call(`_arrow_csv___TableReader__Read`, table_reader) +runtime_info <- function(){ + .Call(`_arrow_runtime_info`) } -TimestampParser__kind <- function(parser) { - .Call(`_arrow_TimestampParser__kind`, parser) +csv___WriteOptions__initialize <- function(options){ + .Call(`_arrow_csv___WriteOptions__initialize`, options) } -TimestampParser__format <- function(parser) { - .Call(`_arrow_TimestampParser__format`, parser) +csv___ReadOptions__initialize <- function(options){ + .Call(`_arrow_csv___ReadOptions__initialize`, options) } -TimestampParser__MakeStrptime <- function(format) { - .Call(`_arrow_TimestampParser__MakeStrptime`, format) +csv___ParseOptions__initialize <- function(options){ + .Call(`_arrow_csv___ParseOptions__initialize`, options) } -TimestampParser__MakeISO8601 <- function() { - .Call(`_arrow_TimestampParser__MakeISO8601`) +csv___ReadOptions__column_names <- function(options){ + .Call(`_arrow_csv___ReadOptions__column_names`, options) } -csv___WriteCSV__Table <- function(table, write_options, stream) { - invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream)) +csv___ConvertOptions__initialize <- function(options){ + .Call(`_arrow_csv___ConvertOptions__initialize`, options) } -csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream) { - invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream)) +csv___TableReader__Make <- function(input, read_options, parse_options, convert_options){ + .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options) } -dataset___Dataset__NewScan <- function(ds) { - .Call(`_arrow_dataset___Dataset__NewScan`, ds) +csv___TableReader__Read <- function(table_reader){ + .Call(`_arrow_csv___TableReader__Read`, table_reader) } -dataset___Dataset__schema <- function(dataset) { - .Call(`_arrow_dataset___Dataset__schema`, dataset) +TimestampParser__kind <- function(parser){ + .Call(`_arrow_TimestampParser__kind`, parser) } -dataset___Dataset__type_name <- function(dataset) { - .Call(`_arrow_dataset___Dataset__type_name`, dataset) +TimestampParser__format <- function(parser){ + .Call(`_arrow_TimestampParser__format`, parser) } -dataset___Dataset__ReplaceSchema <- function(dataset, schm) { - .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm) +TimestampParser__MakeStrptime <- function(format){ + .Call(`_arrow_TimestampParser__MakeStrptime`, format) } -dataset___UnionDataset__create <- function(datasets, schm) { - .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm) +TimestampParser__MakeISO8601 <- function(){ + .Call(`_arrow_TimestampParser__MakeISO8601`) } -dataset___InMemoryDataset__create <- function(table) { - .Call(`_arrow_dataset___InMemoryDataset__create`, table) +csv___WriteCSV__Table <- function(table, write_options, stream){ + invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream)) } -dataset___UnionDataset__children <- function(ds) { - .Call(`_arrow_dataset___UnionDataset__children`, ds) +csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream){ + invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream)) } -dataset___FileSystemDataset__format <- function(dataset) { - .Call(`_arrow_dataset___FileSystemDataset__format`, dataset) +dataset___Dataset__NewScan <- function(ds){ + .Call(`_arrow_dataset___Dataset__NewScan`, ds) } -dataset___FileSystemDataset__filesystem <- function(dataset) { - .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset) +dataset___Dataset__schema <- function(dataset){ + .Call(`_arrow_dataset___Dataset__schema`, dataset) } -dataset___FileSystemDataset__files <- function(dataset) { - .Call(`_arrow_dataset___FileSystemDataset__files`, dataset) +dataset___Dataset__type_name <- function(dataset){ + .Call(`_arrow_dataset___Dataset__type_name`, dataset) } -dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas) { - .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas) +dataset___Dataset__ReplaceSchema <- function(dataset, schm){ + .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm) } -dataset___DatasetFactory__Finish2 <- function(factory, schema) { - .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema) +dataset___UnionDataset__create <- function(datasets, schm){ + .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm) } -dataset___DatasetFactory__Inspect <- function(factory, unify_schemas) { - .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas) +dataset___InMemoryDataset__create <- function(table){ + .Call(`_arrow_dataset___InMemoryDataset__create`, table) } -dataset___UnionDatasetFactory__Make <- function(children) { - .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children) +dataset___UnionDataset__children <- function(ds){ + .Call(`_arrow_dataset___UnionDataset__children`, ds) } -dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format) { - .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format) +dataset___FileSystemDataset__format <- function(dataset){ + .Call(`_arrow_dataset___FileSystemDataset__format`, dataset) } -dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning) { - .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning) +dataset___FileSystemDataset__filesystem <- function(dataset){ + .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset) } -dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format) { - .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format) +dataset___FileSystemDataset__files <- function(dataset){ + .Call(`_arrow_dataset___FileSystemDataset__files`, dataset) } -dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory) { - .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory) +dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas){ + .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas) } -dataset___FileFormat__type_name <- function(format) { - .Call(`_arrow_dataset___FileFormat__type_name`, format) +dataset___DatasetFactory__Finish2 <- function(factory, schema){ + .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema) } -dataset___FileFormat__DefaultWriteOptions <- function(fmt) { - .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt) +dataset___DatasetFactory__Inspect <- function(factory, unify_schemas){ + .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas) } -dataset___ParquetFileFormat__Make <- function(options, dict_columns) { - .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns) +dataset___UnionDatasetFactory__Make <- function(children){ + .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children) } -dataset___FileWriteOptions__type_name <- function(options) { - .Call(`_arrow_dataset___FileWriteOptions__type_name`, options) +dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format){ + .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format) } -dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props) { - invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props)) +dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning){ + .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning) } -dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version) { - invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version)) +dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format){ + .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format) } -dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version) { - invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version)) +dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory){ + .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory) } -dataset___CsvFileWriteOptions__update <- function(csv_options, write_options) { - invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options)) +dataset___FileFormat__type_name <- function(format){ + .Call(`_arrow_dataset___FileFormat__type_name`, format) } -dataset___IpcFileFormat__Make <- function() { - .Call(`_arrow_dataset___IpcFileFormat__Make`) +dataset___FileFormat__DefaultWriteOptions <- function(fmt){ + .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt) } -dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options) { - .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options) +dataset___ParquetFileFormat__Make <- function(options, dict_columns){ + .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns) } -dataset___FragmentScanOptions__type_name <- function(fragment_scan_options) { - .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options) +dataset___FileWriteOptions__type_name <- function(options){ + .Call(`_arrow_dataset___FileWriteOptions__type_name`, options) } -dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options) { - .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options) +dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props){ + invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props)) } -dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer) { - .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer) +dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version){ + invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version)) } -dataset___DirectoryPartitioning <- function(schm, segment_encoding) { - .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding) +dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version){ + invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version)) } -dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding) { - .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding) +dataset___CsvFileWriteOptions__update <- function(csv_options, write_options){ + invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options)) } -dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding) { - .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding) +dataset___IpcFileFormat__Make <- function(){ + .Call(`_arrow_dataset___IpcFileFormat__Make`) } -dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding) { - .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding) +dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options){ + .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options) } -dataset___ScannerBuilder__ProjectNames <- function(sb, cols) { - invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols)) +dataset___FragmentScanOptions__type_name <- function(fragment_scan_options){ + .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options) } -dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names) { - invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names)) +dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options){ + .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options) } -dataset___ScannerBuilder__Filter <- function(sb, expr) { - invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr)) +dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer){ + .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer) } -dataset___ScannerBuilder__UseThreads <- function(sb, threads) { - invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads)) +dataset___DirectoryPartitioning <- function(schm, segment_encoding){ + .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding) } -dataset___ScannerBuilder__UseAsync <- function(sb, use_async) { - invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async)) +dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding){ + .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding) } -dataset___ScannerBuilder__BatchSize <- function(sb, batch_size) { - invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size)) +dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding){ + .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding) } -dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options) { - invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options)) +dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding){ + .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding) } -dataset___ScannerBuilder__schema <- function(sb) { - .Call(`_arrow_dataset___ScannerBuilder__schema`, sb) +dataset___ScannerBuilder__ProjectNames <- function(sb, cols){ + invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols)) } -dataset___ScannerBuilder__Finish <- function(sb) { - .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb) +dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names){ + invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names)) } -dataset___Scanner__ToTable <- function(scanner) { - .Call(`_arrow_dataset___Scanner__ToTable`, scanner) +dataset___ScannerBuilder__Filter <- function(sb, expr){ + invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr)) } -dataset___Scanner__ScanBatches <- function(scanner) { - .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner) +dataset___ScannerBuilder__UseThreads <- function(sb, threads){ + invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads)) } -dataset___Scanner__ToRecordBatchReader <- function(scanner) { - .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner) +dataset___ScannerBuilder__UseAsync <- function(sb, use_async){ + invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async)) } -dataset___Scanner__head <- function(scanner, n) { - .Call(`_arrow_dataset___Scanner__head`, scanner, n) +dataset___ScannerBuilder__BatchSize <- function(sb, batch_size){ + invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size)) } -dataset___Scanner__schema <- function(sc) { - .Call(`_arrow_dataset___Scanner__schema`, sc) +dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options){ + invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options)) } -dataset___ScanTask__get_batches <- function(scan_task) { - .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task) +dataset___ScannerBuilder__schema <- function(sb){ + .Call(`_arrow_dataset___ScannerBuilder__schema`, sb) } -dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner) { - invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner)) +dataset___ScannerBuilder__Finish <- function(sb){ + .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb) } -dataset___Scanner__TakeRows <- function(scanner, indices) { - .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices) +dataset___Scanner__ToTable <- function(scanner){ + .Call(`_arrow_dataset___Scanner__ToTable`, scanner) } -dataset___Scanner__CountRows <- function(scanner) { - .Call(`_arrow_dataset___Scanner__CountRows`, scanner) +dataset___Scanner__ScanBatches <- function(scanner){ + .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner) } -Int8__initialize <- function() { - .Call(`_arrow_Int8__initialize`) +dataset___Scanner__ToRecordBatchReader <- function(scanner){ + .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner) } -Int16__initialize <- function() { - .Call(`_arrow_Int16__initialize`) +dataset___Scanner__head <- function(scanner, n){ + .Call(`_arrow_dataset___Scanner__head`, scanner, n) } -Int32__initialize <- function() { - .Call(`_arrow_Int32__initialize`) +dataset___Scanner__schema <- function(sc){ + .Call(`_arrow_dataset___Scanner__schema`, sc) } -Int64__initialize <- function() { - .Call(`_arrow_Int64__initialize`) +dataset___ScanTask__get_batches <- function(scan_task){ + .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task) } -UInt8__initialize <- function() { - .Call(`_arrow_UInt8__initialize`) +dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner){ + invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner)) } -UInt16__initialize <- function() { - .Call(`_arrow_UInt16__initialize`) +dataset___Scanner__TakeRows <- function(scanner, indices){ + .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices) } -UInt32__initialize <- function() { - .Call(`_arrow_UInt32__initialize`) +dataset___Scanner__CountRows <- function(scanner){ + .Call(`_arrow_dataset___Scanner__CountRows`, scanner) } -UInt64__initialize <- function() { - .Call(`_arrow_UInt64__initialize`) +Int8__initialize <- function(){ + .Call(`_arrow_Int8__initialize`) } -Float16__initialize <- function() { - .Call(`_arrow_Float16__initialize`) +Int16__initialize <- function(){ + .Call(`_arrow_Int16__initialize`) } -Float32__initialize <- function() { - .Call(`_arrow_Float32__initialize`) +Int32__initialize <- function(){ + .Call(`_arrow_Int32__initialize`) } -Float64__initialize <- function() { - .Call(`_arrow_Float64__initialize`) +Int64__initialize <- function(){ + .Call(`_arrow_Int64__initialize`) } -Boolean__initialize <- function() { - .Call(`_arrow_Boolean__initialize`) +UInt8__initialize <- function(){ + .Call(`_arrow_UInt8__initialize`) } -Utf8__initialize <- function() { - .Call(`_arrow_Utf8__initialize`) +UInt16__initialize <- function(){ + .Call(`_arrow_UInt16__initialize`) } -LargeUtf8__initialize <- function() { - .Call(`_arrow_LargeUtf8__initialize`) +UInt32__initialize <- function(){ + .Call(`_arrow_UInt32__initialize`) } -Binary__initialize <- function() { - .Call(`_arrow_Binary__initialize`) +UInt64__initialize <- function(){ + .Call(`_arrow_UInt64__initialize`) } -LargeBinary__initialize <- function() { - .Call(`_arrow_LargeBinary__initialize`) +Float16__initialize <- function(){ + .Call(`_arrow_Float16__initialize`) } -Date32__initialize <- function() { - .Call(`_arrow_Date32__initialize`) +Float32__initialize <- function(){ + .Call(`_arrow_Float32__initialize`) } -Date64__initialize <- function() { - .Call(`_arrow_Date64__initialize`) +Float64__initialize <- function(){ + .Call(`_arrow_Float64__initialize`) } -Null__initialize <- function() { - .Call(`_arrow_Null__initialize`) +Boolean__initialize <- function(){ + .Call(`_arrow_Boolean__initialize`) } -Decimal128Type__initialize <- function(precision, scale) { - .Call(`_arrow_Decimal128Type__initialize`, precision, scale) +Utf8__initialize <- function(){ + .Call(`_arrow_Utf8__initialize`) } -FixedSizeBinary__initialize <- function(byte_width) { - .Call(`_arrow_FixedSizeBinary__initialize`, byte_width) +LargeUtf8__initialize <- function(){ + .Call(`_arrow_LargeUtf8__initialize`) } -Timestamp__initialize <- function(unit, timezone) { - .Call(`_arrow_Timestamp__initialize`, unit, timezone) +Binary__initialize <- function(){ + .Call(`_arrow_Binary__initialize`) } -Time32__initialize <- function(unit) { - .Call(`_arrow_Time32__initialize`, unit) +LargeBinary__initialize <- function(){ + .Call(`_arrow_LargeBinary__initialize`) } -Time64__initialize <- function(unit) { - .Call(`_arrow_Time64__initialize`, unit) +Date32__initialize <- function(){ + .Call(`_arrow_Date32__initialize`) } -list__ <- function(x) { - .Call(`_arrow_list__`, x) +Date64__initialize <- function(){ + .Call(`_arrow_Date64__initialize`) } -large_list__ <- function(x) { - .Call(`_arrow_large_list__`, x) +Null__initialize <- function(){ + .Call(`_arrow_Null__initialize`) } -fixed_size_list__ <- function(x, list_size) { - .Call(`_arrow_fixed_size_list__`, x, list_size) +Decimal128Type__initialize <- function(precision, scale){ + .Call(`_arrow_Decimal128Type__initialize`, precision, scale) } -struct__ <- function(fields) { - .Call(`_arrow_struct__`, fields) +FixedSizeBinary__initialize <- function(byte_width){ + .Call(`_arrow_FixedSizeBinary__initialize`, byte_width) } -DataType__ToString <- function(type) { - .Call(`_arrow_DataType__ToString`, type) +Timestamp__initialize <- function(unit, timezone){ + .Call(`_arrow_Timestamp__initialize`, unit, timezone) } -DataType__name <- function(type) { - .Call(`_arrow_DataType__name`, type) +Time32__initialize <- function(unit){ + .Call(`_arrow_Time32__initialize`, unit) } -DataType__Equals <- function(lhs, rhs) { - .Call(`_arrow_DataType__Equals`, lhs, rhs) +Time64__initialize <- function(unit){ + .Call(`_arrow_Time64__initialize`, unit) } -DataType__num_fields <- function(type) { - .Call(`_arrow_DataType__num_fields`, type) +list__ <- function(x){ + .Call(`_arrow_list__`, x) } -DataType__fields <- function(type) { - .Call(`_arrow_DataType__fields`, type) +large_list__ <- function(x){ + .Call(`_arrow_large_list__`, x) } -DataType__id <- function(type) { - .Call(`_arrow_DataType__id`, type) +fixed_size_list__ <- function(x, list_size){ + .Call(`_arrow_fixed_size_list__`, x, list_size) } -ListType__ToString <- function(type) { - .Call(`_arrow_ListType__ToString`, type) +struct__ <- function(fields){ + .Call(`_arrow_struct__`, fields) } -FixedWidthType__bit_width <- function(type) { - .Call(`_arrow_FixedWidthType__bit_width`, type) +DataType__ToString <- function(type){ + .Call(`_arrow_DataType__ToString`, type) } -DateType__unit <- function(type) { - .Call(`_arrow_DateType__unit`, type) +DataType__name <- function(type){ + .Call(`_arrow_DataType__name`, type) } -TimeType__unit <- function(type) { - .Call(`_arrow_TimeType__unit`, type) +DataType__Equals <- function(lhs, rhs){ + .Call(`_arrow_DataType__Equals`, lhs, rhs) } -DecimalType__precision <- function(type) { - .Call(`_arrow_DecimalType__precision`, type) +DataType__num_fields <- function(type){ + .Call(`_arrow_DataType__num_fields`, type) } -DecimalType__scale <- function(type) { - .Call(`_arrow_DecimalType__scale`, type) +DataType__fields <- function(type){ + .Call(`_arrow_DataType__fields`, type) } -TimestampType__timezone <- function(type) { - .Call(`_arrow_TimestampType__timezone`, type) +DataType__id <- function(type){ + .Call(`_arrow_DataType__id`, type) } -TimestampType__unit <- function(type) { - .Call(`_arrow_TimestampType__unit`, type) +ListType__ToString <- function(type){ + .Call(`_arrow_ListType__ToString`, type) } -DictionaryType__initialize <- function(index_type, value_type, ordered) { - .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered) +FixedWidthType__bit_width <- function(type){ + .Call(`_arrow_FixedWidthType__bit_width`, type) } -DictionaryType__index_type <- function(type) { - .Call(`_arrow_DictionaryType__index_type`, type) +DateType__unit <- function(type){ + .Call(`_arrow_DateType__unit`, type) } -DictionaryType__value_type <- function(type) { - .Call(`_arrow_DictionaryType__value_type`, type) +TimeType__unit <- function(type){ + .Call(`_arrow_TimeType__unit`, type) } -DictionaryType__name <- function(type) { - .Call(`_arrow_DictionaryType__name`, type) +DecimalType__precision <- function(type){ + .Call(`_arrow_DecimalType__precision`, type) } -DictionaryType__ordered <- function(type) { - .Call(`_arrow_DictionaryType__ordered`, type) +DecimalType__scale <- function(type){ + .Call(`_arrow_DecimalType__scale`, type) } -StructType__GetFieldByName <- function(type, name) { - .Call(`_arrow_StructType__GetFieldByName`, type, name) +TimestampType__timezone <- function(type){ + .Call(`_arrow_TimestampType__timezone`, type) } -StructType__GetFieldIndex <- function(type, name) { - .Call(`_arrow_StructType__GetFieldIndex`, type, name) +TimestampType__unit <- function(type){ + .Call(`_arrow_TimestampType__unit`, type) } -StructType__field_names <- function(type) { - .Call(`_arrow_StructType__field_names`, type) +DictionaryType__initialize <- function(index_type, value_type, ordered){ + .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered) } -ListType__value_field <- function(type) { - .Call(`_arrow_ListType__value_field`, type) +DictionaryType__index_type <- function(type){ + .Call(`_arrow_DictionaryType__index_type`, type) } -ListType__value_type <- function(type) { - .Call(`_arrow_ListType__value_type`, type) +DictionaryType__value_type <- function(type){ + .Call(`_arrow_DictionaryType__value_type`, type) } -LargeListType__value_field <- function(type) { - .Call(`_arrow_LargeListType__value_field`, type) +DictionaryType__name <- function(type){ + .Call(`_arrow_DictionaryType__name`, type) } -LargeListType__value_type <- function(type) { - .Call(`_arrow_LargeListType__value_type`, type) +DictionaryType__ordered <- function(type){ + .Call(`_arrow_DictionaryType__ordered`, type) } -FixedSizeListType__value_field <- function(type) { - .Call(`_arrow_FixedSizeListType__value_field`, type) +StructType__GetFieldByName <- function(type, name){ + .Call(`_arrow_StructType__GetFieldByName`, type, name) } -FixedSizeListType__value_type <- function(type) { - .Call(`_arrow_FixedSizeListType__value_type`, type) +StructType__GetFieldIndex <- function(type, name){ + .Call(`_arrow_StructType__GetFieldIndex`, type, name) } -FixedSizeListType__list_size <- function(type) { - .Call(`_arrow_FixedSizeListType__list_size`, type) +StructType__field_names <- function(type){ + .Call(`_arrow_StructType__field_names`, type) } -compute___expr__call <- function(func_name, argument_list, options) { - .Call(`_arrow_compute___expr__call`, func_name, argument_list, options) +ListType__value_field <- function(type){ + .Call(`_arrow_ListType__value_field`, type) } -compute___expr__field_ref <- function(name) { - .Call(`_arrow_compute___expr__field_ref`, name) +ListType__value_type <- function(type){ + .Call(`_arrow_ListType__value_type`, type) } -compute___expr__get_field_ref_name <- function(x) { - .Call(`_arrow_compute___expr__get_field_ref_name`, x) +LargeListType__value_field <- function(type){ + .Call(`_arrow_LargeListType__value_field`, type) } -compute___expr__scalar <- function(x) { - .Call(`_arrow_compute___expr__scalar`, x) +LargeListType__value_type <- function(type){ + .Call(`_arrow_LargeListType__value_type`, type) } -compute___expr__ToString <- function(x) { - .Call(`_arrow_compute___expr__ToString`, x) +FixedSizeListType__value_field <- function(type){ + .Call(`_arrow_FixedSizeListType__value_field`, type) } -compute___expr__type <- function(x, schema) { - .Call(`_arrow_compute___expr__type`, x, schema) +FixedSizeListType__value_type <- function(type){ + .Call(`_arrow_FixedSizeListType__value_type`, type) } -compute___expr__type_id <- function(x, schema) { - .Call(`_arrow_compute___expr__type_id`, x, schema) +FixedSizeListType__list_size <- function(type){ + .Call(`_arrow_FixedSizeListType__list_size`, type) } -ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level) { - invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level)) +compute___expr__call <- function(func_name, argument_list, options){ + .Call(`_arrow_compute___expr__call`, func_name, argument_list, options) } -ipc___feather___Reader__version <- function(reader) { - .Call(`_arrow_ipc___feather___Reader__version`, reader) +field_names_in_expression <- function(x){ + .Call(`_arrow_field_names_in_expression`, x) } -ipc___feather___Reader__Read <- function(reader, columns) { - .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns) +compute___expr__get_field_ref_name <- function(x){ + .Call(`_arrow_compute___expr__get_field_ref_name`, x) } -ipc___feather___Reader__Open <- function(stream) { - .Call(`_arrow_ipc___feather___Reader__Open`, stream) +compute___expr__field_ref <- function(name){ + .Call(`_arrow_compute___expr__field_ref`, name) } -ipc___feather___Reader__schema <- function(reader) { - .Call(`_arrow_ipc___feather___Reader__schema`, reader) +compute___expr__scalar <- function(x){ + .Call(`_arrow_compute___expr__scalar`, x) } -Field__initialize <- function(name, field, nullable) { - .Call(`_arrow_Field__initialize`, name, field, nullable) +compute___expr__ToString <- function(x){ + .Call(`_arrow_compute___expr__ToString`, x) } -Field__ToString <- function(field) { - .Call(`_arrow_Field__ToString`, field) +compute___expr__type <- function(x, schema){ + .Call(`_arrow_compute___expr__type`, x, schema) } -Field__name <- function(field) { - .Call(`_arrow_Field__name`, field) +compute___expr__type_id <- function(x, schema){ + .Call(`_arrow_compute___expr__type_id`, x, schema) } -Field__Equals <- function(field, other) { - .Call(`_arrow_Field__Equals`, field, other) +ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){ + invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level)) } -Field__nullable <- function(field) { - .Call(`_arrow_Field__nullable`, field) +ipc___feather___Reader__version <- function(reader){ + .Call(`_arrow_ipc___feather___Reader__version`, reader) } -Field__type <- function(field) { - .Call(`_arrow_Field__type`, field) +ipc___feather___Reader__Read <- function(reader, columns){ + .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns) } -fs___FileInfo__type <- function(x) { - .Call(`_arrow_fs___FileInfo__type`, x) +ipc___feather___Reader__Open <- function(stream){ + .Call(`_arrow_ipc___feather___Reader__Open`, stream) } -fs___FileInfo__set_type <- function(x, type) { - invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type)) +ipc___feather___Reader__schema <- function(reader){ + .Call(`_arrow_ipc___feather___Reader__schema`, reader) } -fs___FileInfo__path <- function(x) { - .Call(`_arrow_fs___FileInfo__path`, x) +Field__initialize <- function(name, field, nullable){ + .Call(`_arrow_Field__initialize`, name, field, nullable) } -fs___FileInfo__set_path <- function(x, path) { - invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path)) +Field__ToString <- function(field){ + .Call(`_arrow_Field__ToString`, field) } -fs___FileInfo__size <- function(x) { - .Call(`_arrow_fs___FileInfo__size`, x) +Field__name <- function(field){ + .Call(`_arrow_Field__name`, field) } -fs___FileInfo__set_size <- function(x, size) { - invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size)) +Field__Equals <- function(field, other){ + .Call(`_arrow_Field__Equals`, field, other) } -fs___FileInfo__base_name <- function(x) { - .Call(`_arrow_fs___FileInfo__base_name`, x) +Field__nullable <- function(field){ + .Call(`_arrow_Field__nullable`, field) } -fs___FileInfo__extension <- function(x) { - .Call(`_arrow_fs___FileInfo__extension`, x) +Field__type <- function(field){ + .Call(`_arrow_Field__type`, field) } -fs___FileInfo__mtime <- function(x) { - .Call(`_arrow_fs___FileInfo__mtime`, x) +fs___FileInfo__type <- function(x){ + .Call(`_arrow_fs___FileInfo__type`, x) } -fs___FileInfo__set_mtime <- function(x, time) { - invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time)) +fs___FileInfo__set_type <- function(x, type){ + invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type)) } -fs___FileSelector__base_dir <- function(selector) { - .Call(`_arrow_fs___FileSelector__base_dir`, selector) +fs___FileInfo__path <- function(x){ + .Call(`_arrow_fs___FileInfo__path`, x) } -fs___FileSelector__allow_not_found <- function(selector) { - .Call(`_arrow_fs___FileSelector__allow_not_found`, selector) +fs___FileInfo__set_path <- function(x, path){ + invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path)) } -fs___FileSelector__recursive <- function(selector) { - .Call(`_arrow_fs___FileSelector__recursive`, selector) +fs___FileInfo__size <- function(x){ + .Call(`_arrow_fs___FileInfo__size`, x) } -fs___FileSelector__create <- function(base_dir, allow_not_found, recursive) { - .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive) +fs___FileInfo__set_size <- function(x, size){ + invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size)) } -fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths) { - .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths) +fs___FileInfo__base_name <- function(x){ + .Call(`_arrow_fs___FileInfo__base_name`, x) } -fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector) { - .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector) +fs___FileInfo__extension <- function(x){ + .Call(`_arrow_fs___FileInfo__extension`, x) } -fs___FileSystem__CreateDir <- function(file_system, path, recursive) { - invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive)) +fs___FileInfo__mtime <- function(x){ + .Call(`_arrow_fs___FileInfo__mtime`, x) } -fs___FileSystem__DeleteDir <- function(file_system, path) { - invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path)) +fs___FileInfo__set_mtime <- function(x, time){ + invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time)) } -fs___FileSystem__DeleteDirContents <- function(file_system, path) { - invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path)) +fs___FileSelector__base_dir <- function(selector){ + .Call(`_arrow_fs___FileSelector__base_dir`, selector) } -fs___FileSystem__DeleteFile <- function(file_system, path) { - invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path)) +fs___FileSelector__allow_not_found <- function(selector){ + .Call(`_arrow_fs___FileSelector__allow_not_found`, selector) } -fs___FileSystem__DeleteFiles <- function(file_system, paths) { - invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths)) +fs___FileSelector__recursive <- function(selector){ + .Call(`_arrow_fs___FileSelector__recursive`, selector) } -fs___FileSystem__Move <- function(file_system, src, dest) { - invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest)) +fs___FileSelector__create <- function(base_dir, allow_not_found, recursive){ + .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive) } -fs___FileSystem__CopyFile <- function(file_system, src, dest) { - invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest)) +fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths){ + .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths) } -fs___FileSystem__OpenInputStream <- function(file_system, path) { - .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path) +fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector){ + .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector) } -fs___FileSystem__OpenInputFile <- function(file_system, path) { - .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path) +fs___FileSystem__CreateDir <- function(file_system, path, recursive){ + invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive)) } -fs___FileSystem__OpenOutputStream <- function(file_system, path) { - .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path) +fs___FileSystem__DeleteDir <- function(file_system, path){ + invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path)) } -fs___FileSystem__OpenAppendStream <- function(file_system, path) { - .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path) +fs___FileSystem__DeleteDirContents <- function(file_system, path){ + invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path)) } -fs___FileSystem__type_name <- function(file_system) { - .Call(`_arrow_fs___FileSystem__type_name`, file_system) +fs___FileSystem__DeleteFile <- function(file_system, path){ + invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path)) } -fs___LocalFileSystem__create <- function() { - .Call(`_arrow_fs___LocalFileSystem__create`) +fs___FileSystem__DeleteFiles <- function(file_system, paths){ + invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths)) } -fs___SubTreeFileSystem__create <- function(base_path, base_fs) { - .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs) +fs___FileSystem__Move <- function(file_system, src, dest){ + invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest)) } -fs___SubTreeFileSystem__base_fs <- function(file_system) { - .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system) +fs___FileSystem__CopyFile <- function(file_system, src, dest){ + invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest)) } -fs___SubTreeFileSystem__base_path <- function(file_system) { - .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system) +fs___FileSystem__OpenInputStream <- function(file_system, path){ + .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path) } -fs___FileSystemFromUri <- function(path) { - .Call(`_arrow_fs___FileSystemFromUri`, path) +fs___FileSystem__OpenInputFile <- function(file_system, path){ + .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path) } -fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads) { - invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads)) +fs___FileSystem__OpenOutputStream <- function(file_system, path){ + .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path) } -fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes) { - .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes) +fs___FileSystem__OpenAppendStream <- function(file_system, path){ + .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path) } -fs___S3FileSystem__region <- function(fs) { - .Call(`_arrow_fs___S3FileSystem__region`, fs) +fs___FileSystem__type_name <- function(file_system){ + .Call(`_arrow_fs___FileSystem__type_name`, file_system) } -io___Readable__Read <- function(x, nbytes) { - .Call(`_arrow_io___Readable__Read`, x, nbytes) +fs___LocalFileSystem__create <- function(){ + .Call(`_arrow_fs___LocalFileSystem__create`) } -io___InputStream__Close <- function(x) { - invisible(.Call(`_arrow_io___InputStream__Close`, x)) +fs___SubTreeFileSystem__create <- function(base_path, base_fs){ + .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs) } -io___OutputStream__Close <- function(x) { - invisible(.Call(`_arrow_io___OutputStream__Close`, x)) +fs___SubTreeFileSystem__base_fs <- function(file_system){ + .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system) } -io___RandomAccessFile__GetSize <- function(x) { - .Call(`_arrow_io___RandomAccessFile__GetSize`, x) +fs___SubTreeFileSystem__base_path <- function(file_system){ + .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system) } -io___RandomAccessFile__supports_zero_copy <- function(x) { - .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x) +fs___FileSystemFromUri <- function(path){ + .Call(`_arrow_fs___FileSystemFromUri`, path) } -io___RandomAccessFile__Seek <- function(x, position) { - invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position)) +fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads){ + invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads)) } -io___RandomAccessFile__Tell <- function(x) { - .Call(`_arrow_io___RandomAccessFile__Tell`, x) +fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes){ + .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes) } -io___RandomAccessFile__Read0 <- function(x) { - .Call(`_arrow_io___RandomAccessFile__Read0`, x) +fs___S3FileSystem__region <- function(fs){ + .Call(`_arrow_fs___S3FileSystem__region`, fs) } -io___RandomAccessFile__ReadAt <- function(x, position, nbytes) { - .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes) +io___Readable__Read <- function(x, nbytes){ + .Call(`_arrow_io___Readable__Read`, x, nbytes) } -io___MemoryMappedFile__Create <- function(path, size) { - .Call(`_arrow_io___MemoryMappedFile__Create`, path, size) +io___InputStream__Close <- function(x){ + invisible(.Call(`_arrow_io___InputStream__Close`, x)) } -io___MemoryMappedFile__Open <- function(path, mode) { - .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode) +io___OutputStream__Close <- function(x){ + invisible(.Call(`_arrow_io___OutputStream__Close`, x)) } -io___MemoryMappedFile__Resize <- function(x, size) { - invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size)) +io___RandomAccessFile__GetSize <- function(x){ + .Call(`_arrow_io___RandomAccessFile__GetSize`, x) } -io___ReadableFile__Open <- function(path) { - .Call(`_arrow_io___ReadableFile__Open`, path) +io___RandomAccessFile__supports_zero_copy <- function(x){ + .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x) } -io___BufferReader__initialize <- function(buffer) { - .Call(`_arrow_io___BufferReader__initialize`, buffer) +io___RandomAccessFile__Seek <- function(x, position){ + invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position)) } -io___Writable__write <- function(stream, buf) { - invisible(.Call(`_arrow_io___Writable__write`, stream, buf)) +io___RandomAccessFile__Tell <- function(x){ + .Call(`_arrow_io___RandomAccessFile__Tell`, x) } -io___OutputStream__Tell <- function(stream) { - .Call(`_arrow_io___OutputStream__Tell`, stream) +io___RandomAccessFile__Read0 <- function(x){ + .Call(`_arrow_io___RandomAccessFile__Read0`, x) } -io___FileOutputStream__Open <- function(path) { - .Call(`_arrow_io___FileOutputStream__Open`, path) +io___RandomAccessFile__ReadAt <- function(x, position, nbytes){ + .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes) } -io___BufferOutputStream__Create <- function(initial_capacity) { - .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity) +io___MemoryMappedFile__Create <- function(path, size){ + .Call(`_arrow_io___MemoryMappedFile__Create`, path, size) } -io___BufferOutputStream__capacity <- function(stream) { - .Call(`_arrow_io___BufferOutputStream__capacity`, stream) +io___MemoryMappedFile__Open <- function(path, mode){ + .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode) } -io___BufferOutputStream__Finish <- function(stream) { - .Call(`_arrow_io___BufferOutputStream__Finish`, stream) +io___MemoryMappedFile__Resize <- function(x, size){ + invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size)) } -io___BufferOutputStream__Tell <- function(stream) { - .Call(`_arrow_io___BufferOutputStream__Tell`, stream) +io___ReadableFile__Open <- function(path){ + .Call(`_arrow_io___ReadableFile__Open`, path) } -io___BufferOutputStream__Write <- function(stream, bytes) { - invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes)) +io___BufferReader__initialize <- function(buffer){ + .Call(`_arrow_io___BufferReader__initialize`, buffer) } -json___ReadOptions__initialize <- function(use_threads, block_size) { - .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size) +io___Writable__write <- function(stream, buf){ + invisible(.Call(`_arrow_io___Writable__write`, stream, buf)) } -json___ParseOptions__initialize1 <- function(newlines_in_values) { - .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values) +io___OutputStream__Tell <- function(stream){ + .Call(`_arrow_io___OutputStream__Tell`, stream) } -json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema) { - .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema) +io___FileOutputStream__Open <- function(path){ + .Call(`_arrow_io___FileOutputStream__Open`, path) } -json___TableReader__Make <- function(input, read_options, parse_options) { - .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options) +io___BufferOutputStream__Create <- function(initial_capacity){ + .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity) } -json___TableReader__Read <- function(table_reader) { - .Call(`_arrow_json___TableReader__Read`, table_reader) +io___BufferOutputStream__capacity <- function(stream){ + .Call(`_arrow_io___BufferOutputStream__capacity`, stream) } -MemoryPool__default <- function() { - .Call(`_arrow_MemoryPool__default`) +io___BufferOutputStream__Finish <- function(stream){ + .Call(`_arrow_io___BufferOutputStream__Finish`, stream) } -MemoryPool__bytes_allocated <- function(pool) { - .Call(`_arrow_MemoryPool__bytes_allocated`, pool) +io___BufferOutputStream__Tell <- function(stream){ + .Call(`_arrow_io___BufferOutputStream__Tell`, stream) } -MemoryPool__max_memory <- function(pool) { - .Call(`_arrow_MemoryPool__max_memory`, pool) +io___BufferOutputStream__Write <- function(stream, bytes){ + invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes)) } -MemoryPool__backend_name <- function(pool) { - .Call(`_arrow_MemoryPool__backend_name`, pool) +json___ReadOptions__initialize <- function(use_threads, block_size){ + .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size) } -supported_memory_backends <- function() { - .Call(`_arrow_supported_memory_backends`) +json___ParseOptions__initialize1 <- function(newlines_in_values){ + .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values) } -ipc___Message__body_length <- function(message) { - .Call(`_arrow_ipc___Message__body_length`, message) +json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema){ + .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema) } -ipc___Message__metadata <- function(message) { - .Call(`_arrow_ipc___Message__metadata`, message) +json___TableReader__Make <- function(input, read_options, parse_options){ + .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options) } -ipc___Message__body <- function(message) { - .Call(`_arrow_ipc___Message__body`, message) +json___TableReader__Read <- function(table_reader){ + .Call(`_arrow_json___TableReader__Read`, table_reader) } -ipc___Message__Verify <- function(message) { - .Call(`_arrow_ipc___Message__Verify`, message) +MemoryPool__default <- function(){ + .Call(`_arrow_MemoryPool__default`) } -ipc___Message__type <- function(message) { - .Call(`_arrow_ipc___Message__type`, message) +MemoryPool__bytes_allocated <- function(pool){ + .Call(`_arrow_MemoryPool__bytes_allocated`, pool) } -ipc___Message__Equals <- function(x, y) { - .Call(`_arrow_ipc___Message__Equals`, x, y) +MemoryPool__max_memory <- function(pool){ + .Call(`_arrow_MemoryPool__max_memory`, pool) } -ipc___ReadRecordBatch__Message__Schema <- function(message, schema) { - .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema) +MemoryPool__backend_name <- function(pool){ + .Call(`_arrow_MemoryPool__backend_name`, pool) } -ipc___ReadSchema_InputStream <- function(stream) { - .Call(`_arrow_ipc___ReadSchema_InputStream`, stream) +supported_memory_backends <- function(){ + .Call(`_arrow_supported_memory_backends`) } -ipc___ReadSchema_Message <- function(message) { - .Call(`_arrow_ipc___ReadSchema_Message`, message) +ipc___Message__body_length <- function(message){ + .Call(`_arrow_ipc___Message__body_length`, message) } -ipc___MessageReader__Open <- function(stream) { - .Call(`_arrow_ipc___MessageReader__Open`, stream) +ipc___Message__metadata <- function(message){ + .Call(`_arrow_ipc___Message__metadata`, message) } -ipc___MessageReader__ReadNextMessage <- function(reader) { - .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader) +ipc___Message__body <- function(message){ + .Call(`_arrow_ipc___Message__body`, message) } -ipc___ReadMessage <- function(stream) { - .Call(`_arrow_ipc___ReadMessage`, stream) +ipc___Message__Verify <- function(message){ + .Call(`_arrow_ipc___Message__Verify`, message) } -parquet___arrow___ArrowReaderProperties__Make <- function(use_threads) { - .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads) +ipc___Message__type <- function(message){ + .Call(`_arrow_ipc___Message__type`, message) } -parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads) { - invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads)) +ipc___Message__Equals <- function(x, y){ + .Call(`_arrow_ipc___Message__Equals`, x, y) } -parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads) { - .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads) +ipc___ReadRecordBatch__Message__Schema <- function(message, schema){ + .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema) } -parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index) { - .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index) +ipc___ReadSchema_InputStream <- function(stream){ + .Call(`_arrow_ipc___ReadSchema_InputStream`, stream) } -parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict) { - invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict)) +ipc___ReadSchema_Message <- function(message){ + .Call(`_arrow_ipc___ReadSchema_Message`, message) } -parquet___arrow___FileReader__OpenFile <- function(file, props) { - .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props) +ipc___MessageReader__Open <- function(stream){ + .Call(`_arrow_ipc___MessageReader__Open`, stream) } -parquet___arrow___FileReader__ReadTable1 <- function(reader) { - .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader) +ipc___MessageReader__ReadNextMessage <- function(reader){ + .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader) } -parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices) { - .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices) +ipc___ReadMessage <- function(stream){ + .Call(`_arrow_ipc___ReadMessage`, stream) } -parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i) { - .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i) +parquet___arrow___ArrowReaderProperties__Make <- function(use_threads){ + .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads) } -parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices) { - .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices) +parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads){ + invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads)) } -parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups) { - .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups) +parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads){ + .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads) } -parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices) { - .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices) +parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index){ + .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index) } -parquet___arrow___FileReader__num_rows <- function(reader) { - .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader) +parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict){ + invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict)) } -parquet___arrow___FileReader__num_columns <- function(reader) { - .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader) +parquet___arrow___FileReader__OpenFile <- function(file, props){ + .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props) } -parquet___arrow___FileReader__num_row_groups <- function(reader) { - .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader) +parquet___arrow___FileReader__ReadTable1 <- function(reader){ + .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader) } -parquet___arrow___FileReader__ReadColumn <- function(reader, i) { - .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i) +parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices){ + .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices) } -parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit) { - .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit) +parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i){ + .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i) } -parquet___WriterProperties___Builder__create <- function() { - .Call(`_arrow_parquet___WriterProperties___Builder__create`) +parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices){ + .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices) } -parquet___WriterProperties___Builder__version <- function(builder, version) { - invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version)) +parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups){ + .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups) } -parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types) { - invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types)) +parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices){ + .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices) } -parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels) { - invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels)) +parquet___arrow___FileReader__num_rows <- function(reader){ + .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader) } -parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary) { - invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary)) +parquet___arrow___FileReader__num_columns <- function(reader){ + .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader) } -parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics) { - invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics)) +parquet___arrow___FileReader__num_row_groups <- function(reader){ + .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader) } -parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size) { - invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size)) +parquet___arrow___FileReader__ReadColumn <- function(reader, i){ + .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i) } -parquet___WriterProperties___Builder__build <- function(builder) { - .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder) +parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit){ + .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit) } -parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties) { - .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties) +parquet___WriterProperties___Builder__create <- function(){ + .Call(`_arrow_parquet___WriterProperties___Builder__create`) } -parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size) { - invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size)) +parquet___WriterProperties___Builder__version <- function(builder, version){ + invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version)) } -parquet___arrow___FileWriter__Close <- function(writer) { - invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer)) +parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types){ + invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types)) } -parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties) { - invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties)) +parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels){ + invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels)) } -parquet___arrow___FileReader__GetSchema <- function(reader) { - .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader) +parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary){ + invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary)) } -allocate_arrow_schema <- function() { - .Call(`_arrow_allocate_arrow_schema`) +parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics){ + invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics)) } -delete_arrow_schema <- function(ptr) { - invisible(.Call(`_arrow_delete_arrow_schema`, ptr)) +parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size){ + invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size)) } -allocate_arrow_array <- function() { - .Call(`_arrow_allocate_arrow_array`) +parquet___WriterProperties___Builder__build <- function(builder){ + .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder) } -delete_arrow_array <- function(ptr) { - invisible(.Call(`_arrow_delete_arrow_array`, ptr)) +parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties){ + .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties) } -allocate_arrow_array_stream <- function() { - .Call(`_arrow_allocate_arrow_array_stream`) +parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size){ + invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size)) } -delete_arrow_array_stream <- function(ptr) { - invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr)) +parquet___arrow___FileWriter__Close <- function(writer){ + invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer)) } -ImportArray <- function(array, schema) { - .Call(`_arrow_ImportArray`, array, schema) +parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties){ + invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties)) } -ImportRecordBatch <- function(array, schema) { - .Call(`_arrow_ImportRecordBatch`, array, schema) +parquet___arrow___FileReader__GetSchema <- function(reader){ + .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader) } -ImportSchema <- function(schema) { - .Call(`_arrow_ImportSchema`, schema) +allocate_arrow_schema <- function(){ + .Call(`_arrow_allocate_arrow_schema`) } -ImportField <- function(field) { - .Call(`_arrow_ImportField`, field) +delete_arrow_schema <- function(ptr){ + invisible(.Call(`_arrow_delete_arrow_schema`, ptr)) } -ImportType <- function(type) { - .Call(`_arrow_ImportType`, type) +allocate_arrow_array <- function(){ + .Call(`_arrow_allocate_arrow_array`) } -ImportRecordBatchReader <- function(stream) { - .Call(`_arrow_ImportRecordBatchReader`, stream) +delete_arrow_array <- function(ptr){ + invisible(.Call(`_arrow_delete_arrow_array`, ptr)) } -ExportType <- function(type, ptr) { - invisible(.Call(`_arrow_ExportType`, type, ptr)) +allocate_arrow_array_stream <- function(){ + .Call(`_arrow_allocate_arrow_array_stream`) } -ExportField <- function(field, ptr) { - invisible(.Call(`_arrow_ExportField`, field, ptr)) +delete_arrow_array_stream <- function(ptr){ + invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr)) } -ExportSchema <- function(schema, ptr) { - invisible(.Call(`_arrow_ExportSchema`, schema, ptr)) +ImportArray <- function(array, schema){ + .Call(`_arrow_ImportArray`, array, schema) } -ExportArray <- function(array, array_ptr, schema_ptr) { - invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr)) +ImportRecordBatch <- function(array, schema){ + .Call(`_arrow_ImportRecordBatch`, array, schema) } -ExportRecordBatch <- function(batch, array_ptr, schema_ptr) { - invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr)) +ImportSchema <- function(schema){ + .Call(`_arrow_ImportSchema`, schema) } -ExportRecordBatchReader <- function(reader, stream_ptr) { - invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr)) +ImportField <- function(field){ + .Call(`_arrow_ImportField`, field) } -Table__from_dots <- function(lst, schema_sxp, use_threads) { - .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads) +ImportType <- function(type){ + .Call(`_arrow_ImportType`, type) } -vec_to_arrow <- function(x, s_type) { - .Call(`_arrow_vec_to_arrow`, x, s_type) +ImportRecordBatchReader <- function(stream){ + .Call(`_arrow_ImportRecordBatchReader`, stream) } -DictionaryArray__FromArrays <- function(type, indices, dict) { - .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict) +ExportType <- function(type, ptr){ + invisible(.Call(`_arrow_ExportType`, type, ptr)) } -RecordBatch__num_columns <- function(x) { - .Call(`_arrow_RecordBatch__num_columns`, x) +ExportField <- function(field, ptr){ + invisible(.Call(`_arrow_ExportField`, field, ptr)) } -RecordBatch__num_rows <- function(x) { - .Call(`_arrow_RecordBatch__num_rows`, x) +ExportSchema <- function(schema, ptr){ + invisible(.Call(`_arrow_ExportSchema`, schema, ptr)) } -RecordBatch__schema <- function(x) { - .Call(`_arrow_RecordBatch__schema`, x) +ExportArray <- function(array, array_ptr, schema_ptr){ + invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr)) } -RecordBatch__RenameColumns <- function(batch, names) { - .Call(`_arrow_RecordBatch__RenameColumns`, batch, names) +ExportRecordBatch <- function(batch, array_ptr, schema_ptr){ + invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr)) } -RecordBatch__ReplaceSchemaMetadata <- function(x, metadata) { - .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata) +ExportRecordBatchReader <- function(reader, stream_ptr){ + invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr)) } -RecordBatch__columns <- function(batch) { - .Call(`_arrow_RecordBatch__columns`, batch) +Table__from_dots <- function(lst, schema_sxp, use_threads){ + .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads) } -RecordBatch__column <- function(batch, i) { - .Call(`_arrow_RecordBatch__column`, batch, i) +vec_to_arrow <- function(x, s_type){ + .Call(`_arrow_vec_to_arrow`, x, s_type) } -RecordBatch__GetColumnByName <- function(batch, name) { - .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name) +DictionaryArray__FromArrays <- function(type, indices, dict){ + .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict) } -RecordBatch__SelectColumns <- function(batch, indices) { - .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices) +RecordBatch__num_columns <- function(x){ + .Call(`_arrow_RecordBatch__num_columns`, x) } -RecordBatch__Equals <- function(self, other, check_metadata) { - .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata) +RecordBatch__num_rows <- function(x){ + .Call(`_arrow_RecordBatch__num_rows`, x) } -RecordBatch__AddColumn <- function(batch, i, field, column) { - .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column) +RecordBatch__schema <- function(x){ + .Call(`_arrow_RecordBatch__schema`, x) } -RecordBatch__SetColumn <- function(batch, i, field, column) { - .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column) +RecordBatch__RenameColumns <- function(batch, names){ + .Call(`_arrow_RecordBatch__RenameColumns`, batch, names) } -RecordBatch__RemoveColumn <- function(batch, i) { - .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i) +RecordBatch__ReplaceSchemaMetadata <- function(x, metadata){ + .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata) } -RecordBatch__column_name <- function(batch, i) { - .Call(`_arrow_RecordBatch__column_name`, batch, i) +RecordBatch__columns <- function(batch){ + .Call(`_arrow_RecordBatch__columns`, batch) } -RecordBatch__names <- function(batch) { - .Call(`_arrow_RecordBatch__names`, batch) +RecordBatch__column <- function(batch, i){ + .Call(`_arrow_RecordBatch__column`, batch, i) } -RecordBatch__Slice1 <- function(self, offset) { - .Call(`_arrow_RecordBatch__Slice1`, self, offset) +RecordBatch__GetColumnByName <- function(batch, name){ + .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name) } -RecordBatch__Slice2 <- function(self, offset, length) { - .Call(`_arrow_RecordBatch__Slice2`, self, offset, length) +RecordBatch__SelectColumns <- function(batch, indices){ + .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices) } -ipc___SerializeRecordBatch__Raw <- function(batch) { - .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch) +RecordBatch__Equals <- function(self, other, check_metadata){ + .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata) } -ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema) { - .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema) +RecordBatch__AddColumn <- function(batch, i, field, column){ + .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column) } -RecordBatch__from_arrays <- function(schema_sxp, lst) { - .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst) +RecordBatch__SetColumn <- function(batch, i, field, column){ + .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column) } -RecordBatchReader__schema <- function(reader) { - .Call(`_arrow_RecordBatchReader__schema`, reader) +RecordBatch__RemoveColumn <- function(batch, i){ + .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i) } -RecordBatchReader__ReadNext <- function(reader) { - .Call(`_arrow_RecordBatchReader__ReadNext`, reader) +RecordBatch__column_name <- function(batch, i){ + .Call(`_arrow_RecordBatch__column_name`, batch, i) } -RecordBatchReader__batches <- function(reader) { - .Call(`_arrow_RecordBatchReader__batches`, reader) +RecordBatch__names <- function(batch){ + .Call(`_arrow_RecordBatch__names`, batch) } -Table__from_RecordBatchReader <- function(reader) { - .Call(`_arrow_Table__from_RecordBatchReader`, reader) +RecordBatch__Slice1 <- function(self, offset){ + .Call(`_arrow_RecordBatch__Slice1`, self, offset) } -ipc___RecordBatchStreamReader__Open <- function(stream) { - .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream) +RecordBatch__Slice2 <- function(self, offset, length){ + .Call(`_arrow_RecordBatch__Slice2`, self, offset, length) } -ipc___RecordBatchFileReader__schema <- function(reader) { - .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader) +ipc___SerializeRecordBatch__Raw <- function(batch){ + .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch) } -ipc___RecordBatchFileReader__num_record_batches <- function(reader) { - .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader) +ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema){ + .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema) } -ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i) { - .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i) +RecordBatch__from_arrays <- function(schema_sxp, lst){ + .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst) } -ipc___RecordBatchFileReader__Open <- function(file) { - .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file) +RecordBatchReader__schema <- function(reader){ + .Call(`_arrow_RecordBatchReader__schema`, reader) } -Table__from_RecordBatchFileReader <- function(reader) { - .Call(`_arrow_Table__from_RecordBatchFileReader`, reader) +RecordBatchReader__ReadNext <- function(reader){ + .Call(`_arrow_RecordBatchReader__ReadNext`, reader) } -ipc___RecordBatchFileReader__batches <- function(reader) { - .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader) +RecordBatchReader__batches <- function(reader){ + .Call(`_arrow_RecordBatchReader__batches`, reader) } -ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) { - invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch)) +Table__from_RecordBatchReader <- function(reader){ + .Call(`_arrow_Table__from_RecordBatchReader`, reader) } -ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table) { - invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table)) +ipc___RecordBatchStreamReader__Open <- function(stream){ + .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream) } -ipc___RecordBatchWriter__Close <- function(batch_writer) { - invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer)) +ipc___RecordBatchFileReader__schema <- function(reader){ + .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader) } -ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) { - .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version) +ipc___RecordBatchFileReader__num_record_batches <- function(reader){ + .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader) } -ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) { - .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version) +ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i){ + .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i) } -Array__GetScalar <- function(x, i) { - .Call(`_arrow_Array__GetScalar`, x, i) +ipc___RecordBatchFileReader__Open <- function(file){ + .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file) } -Scalar__ToString <- function(s) { - .Call(`_arrow_Scalar__ToString`, s) +Table__from_RecordBatchFileReader <- function(reader){ + .Call(`_arrow_Table__from_RecordBatchFileReader`, reader) } -StructScalar__field <- function(s, i) { - .Call(`_arrow_StructScalar__field`, s, i) +ipc___RecordBatchFileReader__batches <- function(reader){ + .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader) } -StructScalar__GetFieldByName <- function(s, name) { - .Call(`_arrow_StructScalar__GetFieldByName`, s, name) +ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch){ + invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch)) } -Scalar__as_vector <- function(scalar) { - .Call(`_arrow_Scalar__as_vector`, scalar) +ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table){ + invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table)) } -MakeArrayFromScalar <- function(scalar, n) { - .Call(`_arrow_MakeArrayFromScalar`, scalar, n) +ipc___RecordBatchWriter__Close <- function(batch_writer){ + invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer)) } -Scalar__is_valid <- function(s) { - .Call(`_arrow_Scalar__is_valid`, s) +ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){ + .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version) } -Scalar__type <- function(s) { - .Call(`_arrow_Scalar__type`, s) +ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){ + .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version) } -Scalar__Equals <- function(lhs, rhs) { - .Call(`_arrow_Scalar__Equals`, lhs, rhs) +Array__GetScalar <- function(x, i){ + .Call(`_arrow_Array__GetScalar`, x, i) } -Scalar__ApproxEquals <- function(lhs, rhs) { - .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs) +Scalar__ToString <- function(s){ + .Call(`_arrow_Scalar__ToString`, s) } -schema_ <- function(fields) { - .Call(`_arrow_schema_`, fields) +StructScalar__field <- function(s, i){ + .Call(`_arrow_StructScalar__field`, s, i) } -Schema__ToString <- function(s) { - .Call(`_arrow_Schema__ToString`, s) +StructScalar__GetFieldByName <- function(s, name){ + .Call(`_arrow_StructScalar__GetFieldByName`, s, name) } -Schema__num_fields <- function(s) { - .Call(`_arrow_Schema__num_fields`, s) +Scalar__as_vector <- function(scalar){ + .Call(`_arrow_Scalar__as_vector`, scalar) } -Schema__field <- function(s, i) { - .Call(`_arrow_Schema__field`, s, i) +MakeArrayFromScalar <- function(scalar, n){ + .Call(`_arrow_MakeArrayFromScalar`, scalar, n) } -Schema__AddField <- function(s, i, field) { - .Call(`_arrow_Schema__AddField`, s, i, field) +Scalar__is_valid <- function(s){ + .Call(`_arrow_Scalar__is_valid`, s) } -Schema__SetField <- function(s, i, field) { - .Call(`_arrow_Schema__SetField`, s, i, field) +Scalar__type <- function(s){ + .Call(`_arrow_Scalar__type`, s) } -Schema__RemoveField <- function(s, i) { - .Call(`_arrow_Schema__RemoveField`, s, i) +Scalar__Equals <- function(lhs, rhs){ + .Call(`_arrow_Scalar__Equals`, lhs, rhs) } -Schema__GetFieldByName <- function(s, x) { - .Call(`_arrow_Schema__GetFieldByName`, s, x) +Scalar__ApproxEquals <- function(lhs, rhs){ + .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs) } -Schema__fields <- function(schema) { - .Call(`_arrow_Schema__fields`, schema) +schema_ <- function(fields){ + .Call(`_arrow_schema_`, fields) } -Schema__field_names <- function(schema) { - .Call(`_arrow_Schema__field_names`, schema) +Schema__ToString <- function(s){ + .Call(`_arrow_Schema__ToString`, s) } -Schema__HasMetadata <- function(schema) { - .Call(`_arrow_Schema__HasMetadata`, schema) +Schema__num_fields <- function(s){ + .Call(`_arrow_Schema__num_fields`, s) } -Schema__metadata <- function(schema) { - .Call(`_arrow_Schema__metadata`, schema) +Schema__field <- function(s, i){ + .Call(`_arrow_Schema__field`, s, i) } -Schema__WithMetadata <- function(schema, metadata) { - .Call(`_arrow_Schema__WithMetadata`, schema, metadata) +Schema__AddField <- function(s, i, field){ + .Call(`_arrow_Schema__AddField`, s, i, field) } -Schema__serialize <- function(schema) { - .Call(`_arrow_Schema__serialize`, schema) +Schema__SetField <- function(s, i, field){ + .Call(`_arrow_Schema__SetField`, s, i, field) } -Schema__Equals <- function(schema, other, check_metadata) { - .Call(`_arrow_Schema__Equals`, schema, other, check_metadata) +Schema__RemoveField <- function(s, i){ + .Call(`_arrow_Schema__RemoveField`, s, i) } -arrow__UnifySchemas <- function(schemas) { - .Call(`_arrow_arrow__UnifySchemas`, schemas) +Schema__GetFieldByName <- function(s, x){ + .Call(`_arrow_Schema__GetFieldByName`, s, x) } -Table__num_columns <- function(x) { - .Call(`_arrow_Table__num_columns`, x) +Schema__fields <- function(schema){ + .Call(`_arrow_Schema__fields`, schema) } -Table__num_rows <- function(x) { - .Call(`_arrow_Table__num_rows`, x) +Schema__field_names <- function(schema){ + .Call(`_arrow_Schema__field_names`, schema) } -Table__schema <- function(x) { - .Call(`_arrow_Table__schema`, x) +Schema__HasMetadata <- function(schema){ + .Call(`_arrow_Schema__HasMetadata`, schema) } -Table__ReplaceSchemaMetadata <- function(x, metadata) { - .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata) +Schema__metadata <- function(schema){ + .Call(`_arrow_Schema__metadata`, schema) } -Table__column <- function(table, i) { - .Call(`_arrow_Table__column`, table, i) +Schema__WithMetadata <- function(schema, metadata){ + .Call(`_arrow_Schema__WithMetadata`, schema, metadata) } -Table__field <- function(table, i) { - .Call(`_arrow_Table__field`, table, i) +Schema__serialize <- function(schema){ + .Call(`_arrow_Schema__serialize`, schema) } -Table__columns <- function(table) { - .Call(`_arrow_Table__columns`, table) +Schema__Equals <- function(schema, other, check_metadata){ + .Call(`_arrow_Schema__Equals`, schema, other, check_metadata) } -Table__ColumnNames <- function(table) { - .Call(`_arrow_Table__ColumnNames`, table) +arrow__UnifySchemas <- function(schemas){ + .Call(`_arrow_arrow__UnifySchemas`, schemas) } -Table__RenameColumns <- function(table, names) { - .Call(`_arrow_Table__RenameColumns`, table, names) +Table__num_columns <- function(x){ + .Call(`_arrow_Table__num_columns`, x) } -Table__Slice1 <- function(table, offset) { - .Call(`_arrow_Table__Slice1`, table, offset) +Table__num_rows <- function(x){ + .Call(`_arrow_Table__num_rows`, x) } -Table__Slice2 <- function(table, offset, length) { - .Call(`_arrow_Table__Slice2`, table, offset, length) +Table__schema <- function(x){ + .Call(`_arrow_Table__schema`, x) } -Table__Equals <- function(lhs, rhs, check_metadata) { - .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata) +Table__ReplaceSchemaMetadata <- function(x, metadata){ + .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata) } -Table__Validate <- function(table) { - .Call(`_arrow_Table__Validate`, table) +Table__column <- function(table, i){ + .Call(`_arrow_Table__column`, table, i) } -Table__ValidateFull <- function(table) { - .Call(`_arrow_Table__ValidateFull`, table) +Table__field <- function(table, i){ + .Call(`_arrow_Table__field`, table, i) } -Table__GetColumnByName <- function(table, name) { - .Call(`_arrow_Table__GetColumnByName`, table, name) +Table__columns <- function(table){ + .Call(`_arrow_Table__columns`, table) } -Table__RemoveColumn <- function(table, i) { - .Call(`_arrow_Table__RemoveColumn`, table, i) +Table__ColumnNames <- function(table){ + .Call(`_arrow_Table__ColumnNames`, table) } -Table__AddColumn <- function(table, i, field, column) { - .Call(`_arrow_Table__AddColumn`, table, i, field, column) +Table__RenameColumns <- function(table, names){ + .Call(`_arrow_Table__RenameColumns`, table, names) } -Table__SetColumn <- function(table, i, field, column) { - .Call(`_arrow_Table__SetColumn`, table, i, field, column) +Table__Slice1 <- function(table, offset){ + .Call(`_arrow_Table__Slice1`, table, offset) } -Table__SelectColumns <- function(table, indices) { - .Call(`_arrow_Table__SelectColumns`, table, indices) +Table__Slice2 <- function(table, offset, length){ + .Call(`_arrow_Table__Slice2`, table, offset, length) } -all_record_batches <- function(lst) { - .Call(`_arrow_all_record_batches`, lst) +Table__Equals <- function(lhs, rhs, check_metadata){ + .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata) } -Table__from_record_batches <- function(batches, schema_sxp) { - .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp) +Table__Validate <- function(table){ + .Call(`_arrow_Table__Validate`, table) } -GetCpuThreadPoolCapacity <- function() { - .Call(`_arrow_GetCpuThreadPoolCapacity`) +Table__ValidateFull <- function(table){ + .Call(`_arrow_Table__ValidateFull`, table) } -SetCpuThreadPoolCapacity <- function(threads) { - invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads)) +Table__GetColumnByName <- function(table, name){ + .Call(`_arrow_Table__GetColumnByName`, table, name) } -GetIOThreadPoolCapacity <- function() { - .Call(`_arrow_GetIOThreadPoolCapacity`) +Table__RemoveColumn <- function(table, i){ + .Call(`_arrow_Table__RemoveColumn`, table, i) } -SetIOThreadPoolCapacity <- function(threads) { - invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads)) +Table__AddColumn <- function(table, i, field, column){ + .Call(`_arrow_Table__AddColumn`, table, i, field, column) } -Array__infer_type <- function(x) { - .Call(`_arrow_Array__infer_type`, x) +Table__SetColumn <- function(table, i, field, column){ + .Call(`_arrow_Table__SetColumn`, table, i, field, column) } + +Table__SelectColumns <- function(table, indices){ + .Call(`_arrow_Table__SelectColumns`, table, indices) +} + +all_record_batches <- function(lst){ + .Call(`_arrow_all_record_batches`, lst) +} + +Table__from_record_batches <- function(batches, schema_sxp){ + .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp) +} + +GetCpuThreadPoolCapacity <- function(){ + .Call(`_arrow_GetCpuThreadPoolCapacity`) +} + +SetCpuThreadPoolCapacity <- function(threads){ + invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads)) +} + +GetIOThreadPoolCapacity <- function(){ + .Call(`_arrow_GetIOThreadPoolCapacity`) +} + +SetIOThreadPoolCapacity <- function(threads){ + invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads)) +} + +Array__infer_type <- function(x){ + .Call(`_arrow_Array__infer_type`, x) +} + + + diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R index 57497e41cd2..3a1261602a3 100644 --- a/r/R/dplyr-eval.R +++ b/r/R/dplyr-eval.R @@ -39,7 +39,7 @@ arrow_eval <- function(expr, mask) { } out <- structure(msg, class = "try-error", condition = e) - if (grepl("not supported.*Arrow", msg)) { + if (grepl("not supported.*Arrow", msg) || getOption("arrow.debug", FALSE)) { # One of ours. Mark it so that consumers can handle it differently class(out) <- c("arrow-try-error", class(out)) } @@ -75,7 +75,7 @@ arrow_not_supported <- function(msg) { } # Create a data mask for evaluating a dplyr expression -arrow_mask <- function(.data) { +arrow_mask <- function(.data, aggregation = FALSE) { f_env <- new_environment(.cache$functions) # Add functions that need to error hard and clear. @@ -86,6 +86,10 @@ arrow_mask <- function(.data) { f_env[[f]] <- fail } + if (aggregation) { + f_env <- new_environment(agg_funcs, parent = f_env) + } + # Assign the schema to the expressions map(.data$selected_columns, ~ (.$schema <- .data$.data$schema)) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 40e4cd4776b..607be82c36b 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -713,7 +713,7 @@ nse_funcs$log <- nse_funcs$logb <- function(x, base = exp(1)) { return(Expression$create("log10_checked", x)) } # ARROW-13345 - stop("`base` values other than exp(1), 2 and 10 not supported in Arrow", call. = FALSE) + arrow_not_supported("`base` values other than exp(1), 2 and 10") } nse_funcs$if_else <- function(condition, true, false, missing = NULL) { @@ -777,3 +777,42 @@ nse_funcs$case_when <- function(...) { ) ) } + +# Aggregation functions +# These all return a list of: +# @param fun string function name +# @param data Expression (these are all currently a single field) +# @param options list of function options, as passed to call_function +# For group-by aggregation, `hash_` gets prepended to the function name. +# So to see a list of available hash aggregation functions, do +# list_compute_functions("^hash_") +agg_funcs <- list() +agg_funcs$sum <- function(x, na.rm = FALSE) { + list( + fun = "sum", + data = x, + options = arrow_na_rm(na.rm = na.rm) + ) +} +agg_funcs$any <- function(x, na.rm = FALSE) { + list( + fun = "any", + data = x, + options = arrow_na_rm(na.rm) + ) +} +agg_funcs$all <- function(x, na.rm = FALSE) { + list( + fun = "all", + data = x, + options = arrow_na_rm(na.rm) + ) +} + +arrow_na_rm <- function(na.rm) { + if (!isTRUE(na.rm)) { + # TODO: ARROW-13497 + arrow_not_supported(paste("na.rm =", na.rm)) + } + list(na.rm = na.rm, na.min_count = 0L) +} diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 26db190099f..5677afb904a 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -28,14 +28,108 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb dplyr::group_vars(.data) # vars needed for grouping )) .data <- dplyr::select(.data, vars_to_keep) - if (match.arg(.engine) == "duckdb") { dplyr::summarise(to_duckdb(.data), ...) } else { - if (query_on_dataset(.data)) { - not_implemented_for_dataset("summarize()") + # Try stuff, if successful return() + out <- try(do_arrow_summarize(.data, ...), silent = TRUE) + if (inherits(out, "try-error")) { + return(abandon_ship(call, .data, format(out))) + } else { + return(out) } - dplyr::summarise(dplyr::collect(.data), ...) } } summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query + +do_arrow_summarize <- function(.data, ..., .groups = NULL) { + if (!is.null(.groups)) { + # ARROW-13550 + abort("`summarize()` with `.groups` argument not supported in Arrow") + } + exprs <- quos(...) + # Check for unnamed expressions and fix if any + unnamed <- !nzchar(names(exprs)) + # Deparse and take the first element in case they're long expressions + names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label) + + mask <- arrow_mask(.data, aggregation = TRUE) + + results <- list() + for (i in seq_along(exprs)) { + # Iterate over the indices and not the names because names may be repeated + # (which overwrites the previous name) + new_var <- names(exprs)[i] + results[[new_var]] <- arrow_eval(exprs[[i]], mask) + if (inherits(results[[new_var]], "try-error")) { + msg <- handle_arrow_not_supported( + results[[new_var]], + as_label(exprs[[i]]) + ) + stop(msg, call. = FALSE) + } + # Put it in the data mask too? + # mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]] + } + + # Now, from that, split out the data (expressions) and options + .data$aggregations <- lapply(results, function(x) x[c("fun", "options")]) + + inputs <- lapply(results, function(x) x$data) + # This is essentially a projection, and the column names don't matter + # (but must exist) + names(inputs) <- as.character(seq_along(inputs)) + .data$selected_columns <- inputs + + # Eventually, we will return .data here if (dataset) but do it eagerly now + do_exec_plan(.data, group_vars = dplyr::group_vars(.data)) +} + +do_exec_plan <- function(.data, group_vars = NULL) { + plan <- ExecPlan$create() + + grouped <- length(group_vars) > 0 + + # Collect the target names first because we have to add back the group vars + target_names <- names(.data) + + if (grouped) { + .data <- ensure_group_vars(.data) + # We also need to prefix all of the aggregation function names with "hash_" + .data$aggregations <- lapply(.data$aggregations, function(x) { + x[["fun"]] <- paste0("hash_", x[["fun"]]) + x + }) + } + + start_node <- plan$Scan(.data) + # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again + if (inherits(.data$filtered_rows, "Expression")) { + start_node <- start_node$Filter(.data$filtered_rows) + } + # If any columns are derived we need to Project (otherwise this may be no-op) + project_node <- start_node$Project(.data$selected_columns) + + if (grouped) { + final_node <- project_node$GroupByAggregate( + group_vars, + target_names = target_names, + aggregations = .data$aggregations + ) + out <- plan$Run(final_node) + # The result will have result columns first (named by their function) + # then the grouping cols. dplyr orders group cols first, and it accepts + # names for the result cols. Adapt the result to meet that expectation. + n_results <- length(.data$aggregations) + names(out)[seq_along(.data$aggregations)] <- names(.data$aggregations) + out <- out[c((n_results + 1):ncol(out), seq_along(.data$aggregations))] + } else { + final_node <- project_node$ScalarAggregate( + options = .data$aggregations, + target_names = target_names, + out_field_names = names(.data$aggregations) + ) + out <- plan$Run(final_node) + } + out +} diff --git a/r/R/dplyr.R b/r/R/dplyr.R index 88accac24e9..b2793bdb3c3 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -216,31 +216,17 @@ restore_dplyr_features <- function(df, query) { # Helper to handle unsupported dplyr features # * For Table/RecordBatch, we collect() and then call the dplyr method in R # * For Dataset, we just error -abandon_ship <- function(call, .data, msg = NULL) { +abandon_ship <- function(call, .data, msg) { dplyr_fun_name <- sub("^(.*?)\\..*", "\\1", as.character(call[[1]])) if (query_on_dataset(.data)) { - if (is.null(msg)) { - # Default message: function not implemented - not_implemented_for_dataset(paste0(dplyr_fun_name, "()")) - } else { - stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE) - } + stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE) } # else, collect and call dplyr method - if (!is.null(msg)) { - warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE) - } + msg <- sub("\\n$", "", msg) + warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE) call$.data <- dplyr::collect(.data) call[[1]] <- get(dplyr_fun_name, envir = asNamespace("dplyr")) eval.parent(call, 2) } query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset") - -not_implemented_for_dataset <- function(method) { - stop( - method, " is not currently implemented for Arrow Datasets. ", - "Call collect() first to pull data into R.", - call. = FALSE - ) -} diff --git a/r/R/duckdb.R b/r/R/duckdb.R index 6ed1df3d826..bc003a6ea8f 100644 --- a/r/R/duckdb.R +++ b/r/R/duckdb.R @@ -40,8 +40,7 @@ #' #' @name to_duckdb #' @export -#' @examplesIf { arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && -#' packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE) } +#' @examplesIf getFromNamespace("run_duckdb_examples", "arrow")() #' library(dplyr) #' #' ds <- InMemoryDataset$create(mtcars) @@ -113,3 +112,7 @@ duckdb_disconnector <- function(con, tbl_name) { }) environment() } + +run_duckdb_examples <- function() { + arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE) +} diff --git a/r/R/query-engine.R b/r/R/query-engine.R new file mode 100644 index 00000000000..72c35c515db --- /dev/null +++ b/r/R/query-engine.R @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ExecPlan <- R6Class("ExecPlan", + inherit = ArrowObject, + public = list( + Scan = function(dataset) { + # Handle arrow_dplyr_query + if (inherits(dataset, "arrow_dplyr_query")) { + filter <- dataset$filtered_rows + if (isTRUE(filter)) { + filter <- Expression$scalar(TRUE) + } + # Use FieldsInExpression to find all from dataset$selected_columns + colnames <- unique(unlist(map( + dataset$selected_columns, + field_names_in_expression + ))) + dataset <- dataset$.data + } else { + if (inherits(dataset, "ArrowTabular")) { + dataset <- InMemoryDataset$create(dataset) + } + assert_is(dataset, "Dataset") + # Set some defaults + filter <- Expression$scalar(TRUE) + colnames <- names(dataset) + } + # ScanNode needs the filter to do predicate pushdown and skip partitions, + # and it needs to know which fields to materialize (and which are unnecessary) + ExecNode_Scan(self, dataset, filter, colnames) + }, + Run = function(node) { + assert_is(node, "ExecNode") + ExecPlan_run(self, node) + } + ) +) +ExecPlan$create <- function(use_threads = option_use_threads()) { + ExecPlan_create(use_threads) +} + +ExecNode <- R6Class("ExecNode", + inherit = ArrowObject, + public = list( + Project = function(cols) { + assert_is_list_of(cols, "Expression") + ExecNode_Project(self, cols, names(cols)) + }, + Filter = function(expr) { + assert_is(expr, "Expression") + ExecNode_Filter(self, expr) + }, + ScalarAggregate = function(options, target_names, out_field_names) { + ExecNode_ScalarAggregate(self, options, target_names, out_field_names) + }, + GroupByAggregate = function(group_vars, target_names, aggregations) { + ExecNode_GroupByAggregate(self, group_vars, target_names, aggregations) + } + ) +) diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd index 486b6222af7..3a504f01466 100644 --- a/r/man/ChunkedArray.Rd +++ b/r/man/ChunkedArray.Rd @@ -53,6 +53,28 @@ within the array's internal data. This can be an expensive check, potentially \c } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# Pass items into chunked_array as separate objects to create chunks +class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73)) +class_scores$num_chunks + +# When taking a Slice from a chunked_array, chunks are preserved +class_scores$Slice(2, length = 5) + +# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk +# containing all values, ordered. +class_scores$Take(class_scores$SortIndices(descending = TRUE)) + +# If you pass a list into chunked_array, you get a list of length 1 +list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8))) +list_scores$num_chunks + +# When constructing a ChunkedArray, the first chunk is used to infer type. +doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L)) +doubles$type +\dontshow{\}) # examplesIf} +} \seealso{ \link{Array} } diff --git a/r/man/Field.Rd b/r/man/Field.Rd index 03dffd11ca9..77d31fa637a 100644 --- a/r/man/Field.Rd +++ b/r/man/Field.Rd @@ -28,3 +28,8 @@ field(name, type, metadata) } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +field("x", int32()) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd index b8d4dc01bad..cabacc93755 100644 --- a/r/man/FileFormat.Rd +++ b/r/man/FileFormat.Rd @@ -51,3 +51,18 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time) It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat}) } +\examples{ +\dontshow{if (arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +## Semi-colon delimited files +# Set up directory for examples +tf <- tempfile() +dir.create(tf) +on.exit(unlink(tf)) +write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) + +# Create FileFormat object +format <- FileFormat$create(format = "text", delimiter = ";") + +open_dataset(tf, format = format) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd index 0b49df79d6b..30d0725a498 100644 --- a/r/man/ParquetFileReader.Rd +++ b/r/man/ParquetFileReader.Rd @@ -45,3 +45,15 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat } } +\examples{ +\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +f <- system.file("v0.7.1.parquet", package = "arrow") +pq <- ParquetFileReader$create(f) +pq$GetSchema() +if (codec_is_available("snappy")) { + # This file has compressed data columns + tab <- pq$ReadTable() + tab$schema +} +\dontshow{\}) # examplesIf} +} diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd index e3024b91b7a..ff08c215853 100644 --- a/r/man/RecordBatch.Rd +++ b/r/man/RecordBatch.Rd @@ -79,3 +79,14 @@ All list elements are coerced to string. See \code{schema()} for more informatio } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +batch <- record_batch(name = rownames(mtcars), mtcars) +dim(batch) +dim(head(batch)) +names(batch) +batch$mpg +batch[["cyl"]] +as.data.frame(batch[4:8, c("gear", "hp", "wt")]) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd index a206c30c8fb..90c796a6693 100644 --- a/r/man/RecordBatchReader.Rd +++ b/r/man/RecordBatchReader.Rd @@ -43,6 +43,43 @@ are in the file. } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) + +batch <- record_batch(chickwts) + +# This opens a connection to the file in Arrow +file_obj <- FileOutputStream$create(tf) +# Pass that to a RecordBatchWriter to write data conforming to a schema +writer <- RecordBatchFileWriter$create(file_obj, batch$schema) +writer$write(batch) +# You may write additional batches to the stream, provided that they have +# the same schema. +# Call "close" on the writer to indicate end-of-file/stream +writer$close() +# Then, close the connection--closing the IPC message does not close the file +file_obj$close() + +# Now, we have a file we can read from. Same pattern: open file connection, +# then pass it to a RecordBatchReader +read_file_obj <- ReadableFile$create(tf) +reader <- RecordBatchFileReader$create(read_file_obj) +# RecordBatchFileReader knows how many batches it has (StreamReader does not) +reader$num_record_batches +# We could consume the Reader by calling $read_next_batch() until all are, +# consumed, or we can call $read_table() to pull them all into a Table +tab <- reader$read_table() +# Call as.data.frame to turn that Table into an R data.frame +df <- as.data.frame(tab) +# This should be the same data we sent +all.equal(df, chickwts, check.attributes = FALSE) +# Unlike the Writers, we don't have to close RecordBatchReaders, +# but we do still need to close the file connection +read_file_obj$close() +\dontshow{\}) # examplesIf} +} \seealso{ \code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface for reading data from these formats and are sufficient for many use cases. diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd index cc6d2feb3ac..219c150e6a4 100644 --- a/r/man/RecordBatchWriter.Rd +++ b/r/man/RecordBatchWriter.Rd @@ -45,6 +45,43 @@ to be closed separately. } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) + +batch <- record_batch(chickwts) + +# This opens a connection to the file in Arrow +file_obj <- FileOutputStream$create(tf) +# Pass that to a RecordBatchWriter to write data conforming to a schema +writer <- RecordBatchFileWriter$create(file_obj, batch$schema) +writer$write(batch) +# You may write additional batches to the stream, provided that they have +# the same schema. +# Call "close" on the writer to indicate end-of-file/stream +writer$close() +# Then, close the connection--closing the IPC message does not close the file +file_obj$close() + +# Now, we have a file we can read from. Same pattern: open file connection, +# then pass it to a RecordBatchReader +read_file_obj <- ReadableFile$create(tf) +reader <- RecordBatchFileReader$create(read_file_obj) +# RecordBatchFileReader knows how many batches it has (StreamReader does not) +reader$num_record_batches +# We could consume the Reader by calling $read_next_batch() until all are, +# consumed, or we can call $read_table() to pull them all into a Table +tab <- reader$read_table() +# Call as.data.frame to turn that Table into an R data.frame +df <- as.data.frame(tab) +# This should be the same data we sent +all.equal(df, chickwts, check.attributes = FALSE) +# Unlike the Writers, we don't have to close RecordBatchReaders, +# but we do still need to close the file connection +read_file_obj$close() +\dontshow{\}) # examplesIf} +} \seealso{ \code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler interface for writing data to these formats and are sufficient for many use diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd index 9128988d11c..21e04c12e08 100644 --- a/r/man/Scalar.Rd +++ b/r/man/Scalar.Rd @@ -19,3 +19,20 @@ A \code{Scalar} holds a single value of an Arrow type. \verb{$type}: Scalar type } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +Scalar$create(pi) +Scalar$create(404) +# If you pass a vector into Scalar$create, you get a list containing your items +Scalar$create(c(1, 2, 3)) + +# Comparisons +my_scalar <- Scalar$create(99) +my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE +my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE +my_scalar$Equals(Scalar$create(99.000009)) # FALSE +my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match) + +my_scalar$ToString() +\dontshow{\}) # examplesIf} +} diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd index 0c66e5c2a42..6e385bb804e 100644 --- a/r/man/Schema.Rd +++ b/r/man/Schema.Rd @@ -74,3 +74,12 @@ Files with compressed metadata are readable by older versions of arrow, but the metadata is dropped. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5)) +tab1 <- Table$create(df) +tab1$schema +tab2 <- Table$create(df, schema = schema(col1 = int8(), col2 = float32())) +tab2$schema +\dontshow{\}) # examplesIf} +} diff --git a/r/man/Table.Rd b/r/man/Table.Rd index d955b0f5a29..2675943e572 100644 --- a/r/man/Table.Rd +++ b/r/man/Table.Rd @@ -79,3 +79,14 @@ All list elements are coerced to string. See \code{schema()} for more informatio } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tab <- Table$create(name = rownames(mtcars), mtcars) +dim(tab) +dim(head(tab)) +names(tab) +tab$mpg +tab[["cyl"]] +as.data.frame(tab[4:8, c("gear", "hp", "wt")]) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/array.Rd b/r/man/array.Rd index ed25a2b0a34..78d3eaff6ea 100644 --- a/r/man/array.Rd +++ b/r/man/array.Rd @@ -82,3 +82,26 @@ within the array's internal data. This can be an expensive check, potentially \c } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +my_array <- Array$create(1:10) +my_array$type +my_array$cast(int8()) + +# Check if value is null; zero-indexed +na_array <- Array$create(c(1:5, NA)) +na_array$IsNull(0) +na_array$IsNull(5) +na_array$IsValid(5) +na_array$null_count + +# zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice +new_array <- na_array$Slice(5) +new_array$offset + +# Compare 2 arrays +na_array2 <- na_array +na_array2 == na_array # element-wise comparison +na_array2$Equals(na_array) # overall comparison +\dontshow{\}) # examplesIf} +} diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd index 99b636da3c7..a3ca1fc2fcb 100644 --- a/r/man/buffer.Rd +++ b/r/man/buffer.Rd @@ -33,3 +33,12 @@ contiguous memory with a particular size. } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +my_buffer <- buffer(c(1, 2, 3, 4)) +my_buffer$is_mutable +my_buffer$ZeroPadding() +my_buffer$size +my_buffer$capacity +\dontshow{\}) # examplesIf} +} diff --git a/r/man/call_function.Rd b/r/man/call_function.Rd index 7e9b7e50ea0..bef89f10b18 100644 --- a/r/man/call_function.Rd +++ b/r/man/call_function.Rd @@ -35,6 +35,16 @@ are callable with an \code{arrow_} prefix. When passing indices in \code{...}, \code{args}, or \code{options}, express them as 0-based integers (consistent with C++). } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +a <- Array$create(c(1L, 2L, 3L, NA, 5L)) +s <- Scalar$create(4L) +call_function("fill_null", a, s) + +a <- Array$create(rnorm(10000)) +call_function("quantile", a, options = list(q = seq(0, 1, 0.25))) +\dontshow{\}) # examplesIf} +} \seealso{ \href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for the functions and their respective options. diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd index 1b5e8278fa9..b3238ff1dca 100644 --- a/r/man/codec_is_available.Rd +++ b/r/man/codec_is_available.Rd @@ -18,3 +18,8 @@ Support for compression libraries depends on the build-time settings of the Arrow C++ library. This function lets you know which are available for use. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +codec_is_available("gzip") +\dontshow{\}) # examplesIf} +} diff --git a/r/man/copy_files.Rd b/r/man/copy_files.Rd index 75cc4405d8a..1b83703f19f 100644 --- a/r/man/copy_files.Rd +++ b/r/man/copy_files.Rd @@ -23,3 +23,13 @@ Nothing: called for side effects in the file system \description{ Copy files between FileSystems } +\examples{ +\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# Copy an S3 bucket's files to a local directory: +copy_files("s3://your-bucket-name", "local-directory") +# Using a FileSystem object +copy_files(s3_bucket("your-bucket-name"), "local-directory") +# Or go the other way, from local to S3 +copy_files("local-directory", s3_bucket("your-bucket-name")) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 101702a2fb2..a0631897573 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -150,6 +150,14 @@ are translated to R objects, \code{uint32} and \code{uint64} are converted to \c types, this conversion can be disabled (so that \code{int64} always yields a \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +bool() +struct(a = int32(), b = double()) +timestamp("ms", timezone = "CEST") +time64("ns") +\dontshow{\}) # examplesIf} +} \seealso{ \code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type. } diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd index 39d5d8d0ae2..eef9f9157ea 100644 --- a/r/man/hive_partition.Rd +++ b/r/man/hive_partition.Rd @@ -28,3 +28,8 @@ Hive partitioning embeds field names and values in path segments, such as Because fields are named in the path segments, order of fields passed to \code{hive_partition()} does not matter. } +\examples{ +\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +hive_partition(year = int16(), month = int8()) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd index ba17688d833..4ca0e518f13 100644 --- a/r/man/list_compute_functions.Rd +++ b/r/man/list_compute_functions.Rd @@ -37,3 +37,10 @@ The package includes Arrow methods for many base R functions that can be called directly on Arrow objects, as well as some tidyverse-flavored versions available inside \code{dplyr} verbs. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +list_compute_functions() +list_compute_functions(pattern = "^UTF8", ignore.case = TRUE) +list_compute_functions(pattern = "^is", invert = TRUE) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd index 7e2000a9ca2..66d30f39147 100644 --- a/r/man/load_flight_server.Rd +++ b/r/man/load_flight_server.Rd @@ -15,3 +15,8 @@ to look in the \verb{inst/} directory for included modules.} \description{ Load a Python Flight server } +\examples{ +\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +load_flight_server("demo_flight_server") +\dontshow{\}) # examplesIf} +} diff --git a/r/man/match_arrow.Rd b/r/man/match_arrow.Rd index 21481af4c6b..c2343361c6e 100644 --- a/r/man/match_arrow.Rd +++ b/r/man/match_arrow.Rd @@ -26,3 +26,28 @@ per element of \code{x} it it is present in \code{table}. \code{base::match()} is not a generic, so we can't just define Arrow methods for it. This function exposes the analogous functions in the Arrow C++ library. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# note that the returned value is 0-indexed +cars_tbl <- Table$create(name = rownames(mtcars), mtcars) +match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name) + +is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name) + +# Although there are multiple matches, you are returned the index of the first +# match, as with the base R equivalent +match(4, mtcars$cyl) # 1-indexed +match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed + +# If `x` contains multiple values, you are returned the indices of the first +# match for each value. +match(c(4, 6, 8), mtcars$cyl) +match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl) + +# Return type matches type of `x` +is_in(c(4, 6, 8), mtcars$cyl) # returns vector +is_in(Scalar$create(4), mtcars$cyl) # returns Scalar +is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array +is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray +\dontshow{\}) # examplesIf} +} diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd index 974d4286f59..53eade595be 100644 --- a/r/man/open_dataset.Rd +++ b/r/man/open_dataset.Rd @@ -90,6 +90,57 @@ can accelerate queries that only touch some partitions (files). Call \code{open_dataset()} to point to a directory of data files and return a \code{Dataset}, then use \code{dplyr} methods to query it. } +\examples{ +\dontshow{if (arrow_with_dataset() & arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# Set up directory for examples +tf <- tempfile() +dir.create(tf) +on.exit(unlink(tf)) + +data <- dplyr::group_by(mtcars, cyl) +write_dataset(data, tf) + +# You can specify a directory containing the files for your dataset and +# open_dataset will scan all files in your directory. +open_dataset(tf) + +# You can also supply a vector of paths +open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf, "cyl=8/part-2.parquet"))) + +## You must specify the file format if using a format other than parquet. +tf2 <- tempfile() +dir.create(tf2) +on.exit(unlink(tf2)) +write_dataset(data, tf2, format = "ipc") +# This line will results in errors when you try to work with the data +\dontrun{ +open_dataset(tf2) +} +# This line will work +open_dataset(tf2, format = "ipc") + +## You can specify file partitioning to include it as a field in your dataset +# Create a temporary directory and write example dataset +tf3 <- tempfile() +dir.create(tf3) +on.exit(unlink(tf3)) +write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE) + +# View files - you can see the partitioning means that files have been written +# to folders based on Month/Day values +list.files(tf3, recursive = TRUE) + +# With no partitioning specified, dataset contains all files but doesn't include +# directory names as field names +open_dataset(tf3) + +# Now that partitioning has been specified, your dataset contains columns for Month and Day +open_dataset(tf3, partitioning = c("Month", "Day")) + +# If you want to specify the data types for your fields, you can pass in a Schema +open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8())) +\dontshow{\}) # examplesIf} +} \seealso{ \code{vignette("dataset", package = "arrow")} } diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd index d9c80306931..30b146a4fee 100644 --- a/r/man/read_delim_arrow.Rd +++ b/r/man/read_delim_arrow.Rd @@ -205,3 +205,14 @@ Note that if you are specifying column names, whether by \code{schema} or to idenfity column names, you'll need to add \code{skip = 1} to skip that row. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write.csv(mtcars, file = tf) +df <- read_csv_arrow(tf) +dim(df) +# Can select columns +df <- read_csv_arrow(tf, col_select = starts_with("d")) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index fa18e3f7844..95f4d1d12c6 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -34,6 +34,17 @@ and to make sharing data across data analysis languages easy. This function reads both the original, limited specification of the format and the version 2 specification, which is the Apache Arrow IPC file format. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write_feather(mtcars, tf) +df <- read_feather(tf) +dim(df) +# Can select columns +df <- read_feather(tf, col_select = starts_with("d")) +\dontshow{\}) # examplesIf} +} \seealso{ \link{FeatherReader} and \link{RecordBatchReader} for lower-level access to reading Arrow IPC data. } diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd index 476c99fe4de..53d7107ae81 100644 --- a/r/man/read_json_arrow.Rd +++ b/r/man/read_json_arrow.Rd @@ -38,3 +38,15 @@ A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}. \description{ Using \link{JsonTableReader} } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +writeLines(' + { "hello": 3.5, "world": false, "yo": "thing" } + { "hello": 3.25, "world": null } + { "hello": 0.0, "world": true, "yo": null } + ', tf, useBytes = TRUE) +df <- read_json_arrow(tf) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index ffb2cf7109f..056e8644747 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -39,3 +39,12 @@ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_data_frame} is '\href{https://parquet.apache.org/}{Parquet}' is a columnar storage file format. This function enables you to read Parquet files into R. } +\examples{ +\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write_parquet(mtcars, tf) +df <- read_parquet(tf, col_select = starts_with("d")) +head(df) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/s3_bucket.Rd b/r/man/s3_bucket.Rd index 78d527a56c4..95a086deae5 100644 --- a/r/man/s3_bucket.Rd +++ b/r/man/s3_bucket.Rd @@ -21,3 +21,8 @@ are authorized to access the bucket's contents. that automatically detects the bucket's AWS region and holding onto the its relative path. } +\examples{ +\dontshow{if (arrow_with_s3()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +bucket <- s3_bucket("ursa-labs-taxi-data") +\dontshow{\}) # examplesIf} +} diff --git a/r/man/to_duckdb.Rd b/r/man/to_duckdb.Rd index c273a7520d5..ffde91f14f2 100644 --- a/r/man/to_duckdb.Rd +++ b/r/man/to_duckdb.Rd @@ -39,3 +39,22 @@ that starts with an Arrow object to use DuckDB to calculate the summarization step. Internally, this calls \code{to_duckdb()} with all of the default argument values. } +\examples{ +\dontshow{if (getFromNamespace("run_duckdb_examples", "arrow")()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +library(dplyr) + +ds <- InMemoryDataset$create(mtcars) + +ds \%>\% + filter(mpg < 30) \%>\% + to_duckdb() \%>\% + group_by(cyl) \%>\% + summarize(mean_mpg = mean(mpg, na.rm = TRUE)) + +# the same query can be simplified using .engine = "duckdb" +ds \%>\% + filter(mpg < 30) \%>\% + group_by(cyl) \%>\% + summarize(mean_mpg = mean(mpg, na.rm = TRUE), .engine = "duckdb") +\dontshow{\}) # examplesIf} +} diff --git a/r/man/type.Rd b/r/man/type.Rd index 2f85e4a6ac6..d55bbe24bd5 100644 --- a/r/man/type.Rd +++ b/r/man/type.Rd @@ -15,3 +15,13 @@ an arrow logical type \description{ infer the arrow Array type from an R vector } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +type(1:10) +type(1L:10L) +type(c(1, 1.5, 2)) +type(c("A", "B", "C")) +type(mtcars) +type(Sys.Date()) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/unify_schemas.Rd b/r/man/unify_schemas.Rd index 709e33a5e74..50c80c2dda9 100644 --- a/r/man/unify_schemas.Rd +++ b/r/man/unify_schemas.Rd @@ -18,3 +18,10 @@ A \code{Schema} with the union of fields contained in the inputs, or \description{ Combine and harmonize schemas } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +a <- schema(b = double(), c = bool()) +z <- schema(b = double(), k = utf8()) +unify_schemas(a, z) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/value_counts.Rd b/r/man/value_counts.Rd index 139af8edc63..6ef77cd4727 100644 --- a/r/man/value_counts.Rd +++ b/r/man/value_counts.Rd @@ -16,3 +16,9 @@ A \code{StructArray} containing "values" (same type as \code{x}) and "counts" \description{ This function tabulates the values in the array and returns a table of counts. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +cyl_vals <- Array$create(mtcars$cyl) +value_counts(cyl_vals) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd index d6df2bcd08e..55a239ca998 100644 --- a/r/man/write_csv_arrow.Rd +++ b/r/man/write_csv_arrow.Rd @@ -23,3 +23,10 @@ the stream will be left open. \description{ Write CSV file to disk } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write_csv_arrow(mtcars, tf) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd index 0cc8c591369..c6273b61be8 100644 --- a/r/man/write_feather.Rd +++ b/r/man/write_feather.Rd @@ -47,6 +47,13 @@ and to make sharing data across data analysis languages easy. This function writes both the original, limited specification of the format and the version 2 specification, which is the Apache Arrow IPC file format. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write_feather(mtcars, tf) +\dontshow{\}) # examplesIf} +} \seealso{ \link{RecordBatchWriter} for lower-level access to writing Arrow IPC data. diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd index 4f742ce9178..2f215f25fd7 100644 --- a/r/man/write_ipc_stream.Rd +++ b/r/man/write_ipc_stream.Rd @@ -31,6 +31,13 @@ with some nonstandard behavior, is deprecated. You should explicitly choose the function that will write the desired IPC format (stream or file) since either can be written to a file or \code{OutputStream}. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write_ipc_stream(mtcars, tf) +\dontshow{\}) # examplesIf} +} \seealso{ \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to serialize data to a buffer. diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index 823a6038e84..d7147f7e8e6 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -94,3 +94,15 @@ The default "snappy" is used if available, otherwise "uncompressed". To disable compression, set \code{compression = "uncompressed"}. Note that "uncompressed" columns may still have dictionary encoding. } +\examples{ +\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf1 <- tempfile(fileext = ".parquet") +write_parquet(data.frame(x = 1:5), tf1) + +# using compression +if (codec_is_available("gzip")) { + tf2 <- tempfile(fileext = ".gz.parquet") + write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) +} +\dontshow{\}) # examplesIf} +} diff --git a/r/man/write_to_raw.Rd b/r/man/write_to_raw.Rd index 46af09a96e8..1f507e384c3 100644 --- a/r/man/write_to_raw.Rd +++ b/r/man/write_to_raw.Rd @@ -20,3 +20,10 @@ the data (\code{data.frame}, \code{RecordBatch}, or \code{Table}) they were give This function wraps those so that you can serialize data to a buffer and access that buffer as a \code{raw} vector in R. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# The default format is "stream" +write_to_raw(mtcars) +write_to_raw(mtcars, format = "file") +\dontshow{\}) # examplesIf} +} diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 19095a4cbde..92ddbae23fd 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1092,6 +1092,124 @@ extern "C" SEXP _arrow_io___CompressedInputStream__Make(SEXP codec_sexp, SEXP ra } #endif +// compute-exec.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr ExecPlan_create(bool use_threads); +extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){ +BEGIN_CPP11 + arrow::r::Input::type use_threads(use_threads_sexp); + return cpp11::as_sexp(ExecPlan_create(use_threads)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){ + Rf_error("Cannot call ExecPlan_create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + +// compute-exec.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr ExecPlan_run(const std::shared_ptr& plan, const std::shared_ptr& final_node); +extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type plan(plan_sexp); + arrow::r::Input&>::type final_node(final_node_sexp); + return cpp11::as_sexp(ExecPlan_run(plan, final_node)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){ + Rf_error("Cannot call ExecPlan_run(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + +// compute-exec.cpp +#if defined(ARROW_R_WITH_DATASET) +std::shared_ptr ExecNode_Scan(const std::shared_ptr& plan, const std::shared_ptr& dataset, const std::shared_ptr& filter, std::vector materialized_field_names); +extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP filter_sexp, SEXP materialized_field_names_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type plan(plan_sexp); + arrow::r::Input&>::type dataset(dataset_sexp); + arrow::r::Input&>::type filter(filter_sexp); + arrow::r::Input>::type materialized_field_names(materialized_field_names_sexp); + return cpp11::as_sexp(ExecNode_Scan(plan, dataset, filter, materialized_field_names)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP filter_sexp, SEXP materialized_field_names_sexp){ + Rf_error("Cannot call ExecNode_Scan(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + +// compute-exec.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr ExecNode_Filter(const std::shared_ptr& input, const std::shared_ptr& filter); +extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type input(input_sexp); + arrow::r::Input&>::type filter(filter_sexp); + return cpp11::as_sexp(ExecNode_Filter(input, filter)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){ + Rf_error("Cannot call ExecNode_Filter(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + +// compute-exec.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr ExecNode_Project(const std::shared_ptr& input, const std::vector>& exprs, std::vector names); +extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type input(input_sexp); + arrow::r::Input>&>::type exprs(exprs_sexp); + arrow::r::Input>::type names(names_sexp); + return cpp11::as_sexp(ExecNode_Project(input, exprs, names)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){ + Rf_error("Cannot call ExecNode_Project(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + +// compute-exec.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr ExecNode_ScalarAggregate(const std::shared_ptr& input, cpp11::list options, std::vector target_names, std::vector out_field_names); +extern "C" SEXP _arrow_ExecNode_ScalarAggregate(SEXP input_sexp, SEXP options_sexp, SEXP target_names_sexp, SEXP out_field_names_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type input(input_sexp); + arrow::r::Input::type options(options_sexp); + arrow::r::Input>::type target_names(target_names_sexp); + arrow::r::Input>::type out_field_names(out_field_names_sexp); + return cpp11::as_sexp(ExecNode_ScalarAggregate(input, options, target_names, out_field_names)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_ExecNode_ScalarAggregate(SEXP input_sexp, SEXP options_sexp, SEXP target_names_sexp, SEXP out_field_names_sexp){ + Rf_error("Cannot call ExecNode_ScalarAggregate(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + +// compute-exec.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr ExecNode_GroupByAggregate(const std::shared_ptr& input, std::vector group_vars, std::vector agg_srcs, cpp11::list aggregations); +extern "C" SEXP _arrow_ExecNode_GroupByAggregate(SEXP input_sexp, SEXP group_vars_sexp, SEXP agg_srcs_sexp, SEXP aggregations_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type input(input_sexp); + arrow::r::Input>::type group_vars(group_vars_sexp); + arrow::r::Input>::type agg_srcs(agg_srcs_sexp); + arrow::r::Input::type aggregations(aggregations_sexp); + return cpp11::as_sexp(ExecNode_GroupByAggregate(input, group_vars, agg_srcs, aggregations)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_ExecNode_GroupByAggregate(SEXP input_sexp, SEXP group_vars_sexp, SEXP agg_srcs_sexp, SEXP aggregations_sexp){ + Rf_error("Cannot call ExecNode_GroupByAggregate(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // compute.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr RecordBatch__cast(const std::shared_ptr& batch, const std::shared_ptr& schema, cpp11::list options); @@ -3123,16 +3241,16 @@ extern "C" SEXP _arrow_compute___expr__call(SEXP func_name_sexp, SEXP argument_l // expression.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr compute___expr__field_ref(std::string name); -extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){ +std::vector field_names_in_expression(const std::shared_ptr& x); +extern "C" SEXP _arrow_field_names_in_expression(SEXP x_sexp){ BEGIN_CPP11 - arrow::r::Input::type name(name_sexp); - return cpp11::as_sexp(compute___expr__field_ref(name)); + arrow::r::Input&>::type x(x_sexp); + return cpp11::as_sexp(field_names_in_expression(x)); END_CPP11 } #else -extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){ - Rf_error("Cannot call compute___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +extern "C" SEXP _arrow_field_names_in_expression(SEXP x_sexp){ + Rf_error("Cannot call field_names_in_expression(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif @@ -3151,6 +3269,21 @@ extern "C" SEXP _arrow_compute___expr__get_field_ref_name(SEXP x_sexp){ } #endif +// expression.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr compute___expr__field_ref(std::string name); +extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){ +BEGIN_CPP11 + arrow::r::Input::type name(name_sexp); + return cpp11::as_sexp(compute___expr__field_ref(name)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){ + Rf_error("Cannot call compute___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // expression.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr compute___expr__scalar(const std::shared_ptr& x); @@ -7011,6 +7144,13 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, { "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, { "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, + { "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 1}, + { "_arrow_ExecPlan_run", (DL_FUNC) &_arrow_ExecPlan_run, 2}, + { "_arrow_ExecNode_Scan", (DL_FUNC) &_arrow_ExecNode_Scan, 4}, + { "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, + { "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, + { "_arrow_ExecNode_ScalarAggregate", (DL_FUNC) &_arrow_ExecNode_ScalarAggregate, 4}, + { "_arrow_ExecNode_GroupByAggregate", (DL_FUNC) &_arrow_ExecNode_GroupByAggregate, 4}, { "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, { "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, { "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, @@ -7142,8 +7282,9 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, { "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, { "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3}, - { "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, + { "_arrow_field_names_in_expression", (DL_FUNC) &_arrow_field_names_in_expression, 1}, { "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, + { "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, { "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, { "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, { "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2}, diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index b5a8914d432..4ecb99174b5 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -47,6 +47,15 @@ #include #include +namespace arrow { +namespace compute { + +class ExecPlan; +class ExecNode; + +} // namespace compute +} // namespace arrow + #if defined(ARROW_R_WITH_PARQUET) #include #endif @@ -60,6 +69,7 @@ namespace fs = ::arrow::fs; std::shared_ptr RecordBatch__from_arrays(SEXP, SEXP); arrow::MemoryPool* gc_memory_pool(); +arrow::compute::ExecContext* gc_context(); #if (R_VERSION < R_Version(3, 5, 0)) #define LOGICAL_RO(x) ((const int*)LOGICAL(x)) diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp new file mode 100644 index 00000000000..61a79bf462e --- /dev/null +++ b/r/src/compute-exec.cpp @@ -0,0 +1,177 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "./arrow_types.h" + +#if defined(ARROW_R_WITH_ARROW) + +#include +#include +#include +#include +#include +#include + +#include + +namespace compute = ::arrow::compute; + +std::shared_ptr make_compute_options(std::string func_name, + cpp11::list options); + +// [[arrow::export]] +std::shared_ptr ExecPlan_create(bool use_threads) { + static compute::ExecContext threaded_context{gc_memory_pool(), + arrow::internal::GetCpuThreadPool()}; + auto plan = ValueOrStop( + compute::ExecPlan::Make(use_threads ? &threaded_context : gc_context())); + return plan; +} + +// [[arrow::export]] +std::shared_ptr ExecPlan_run( + const std::shared_ptr& plan, + const std::shared_ptr& final_node) { + // For now, don't require R to construct SinkNodes. + // Instead, just pass the node we should collect as an argument. + auto sink_gen = compute::MakeSinkNode(final_node.get(), "sink"); + + StopIfNotOk(plan->Validate()); + StopIfNotOk(plan->StartProducing()); + + std::shared_ptr sink_reader = compute::MakeGeneratorReader( + final_node->output_schema(), std::move(sink_gen), gc_memory_pool()); + + plan->finished().Wait(); + return ValueOrStop(arrow::Table::FromRecordBatchReader(sink_reader.get())); +} + +std::shared_ptr ExecNodeOrStop( + arrow::Result maybe_node) { + return std::shared_ptr(ValueOrStop(maybe_node), [](...) { + // empty destructor: ExecNode lifetime is managed by an ExecPlan + }); +} + +#if defined(ARROW_R_WITH_DATASET) + +#include + +// [[dataset::export]] +std::shared_ptr ExecNode_Scan( + const std::shared_ptr& plan, + const std::shared_ptr& dataset, + const std::shared_ptr& filter, + std::vector materialized_field_names) { + // TODO: pass in FragmentScanOptions + auto options = std::make_shared(); + + options->use_async = true; + + options->dataset_schema = dataset->schema(); + + // ScanNode needs the filter to do predicate pushdown and skip partitions + options->filter = ValueOrStop(filter->Bind(*dataset->schema())); + + // ScanNode needs to know which fields to materialize (and which are unnecessary) + std::vector exprs; + for (const auto& name : materialized_field_names) { + exprs.push_back(compute::field_ref(name)); + } + + options->projection = + ValueOrStop(call("make_struct", std::move(exprs), + compute::MakeStructOptions{std::move(materialized_field_names)}) + .Bind(*dataset->schema())); + + return ExecNodeOrStop(arrow::dataset::MakeScanNode(plan.get(), dataset, options)); +} + +#endif + +// [[arrow::export]] +std::shared_ptr ExecNode_Filter( + const std::shared_ptr& input, + const std::shared_ptr& filter) { + return ExecNodeOrStop( + compute::MakeFilterNode(input.get(), /*label=*/"filter", *filter)); +} + +// [[arrow::export]] +std::shared_ptr ExecNode_Project( + const std::shared_ptr& input, + const std::vector>& exprs, + std::vector names) { + // We have shared_ptrs of expressions but need the Expressions + std::vector expressions; + for (auto expr : exprs) { + expressions.push_back(*expr); + } + return ExecNodeOrStop(compute::MakeProjectNode( + input.get(), /*label=*/"project", std::move(expressions), std::move(names))); +} + +// [[arrow::export]] +std::shared_ptr ExecNode_ScalarAggregate( + const std::shared_ptr& input, cpp11::list options, + std::vector target_names, std::vector out_field_names) { + std::vector aggregates; + std::vector> keep_alives; + + for (cpp11::list name_opts : options) { + auto name = cpp11::as_cpp(name_opts[0]); + auto opts = make_compute_options(name, name_opts[1]); + + aggregates.push_back( + arrow::compute::internal::Aggregate{std::move(name), opts.get()}); + keep_alives.push_back(std::move(opts)); + } + + std::vector targets; + for (auto&& name : target_names) { + targets.emplace_back(std::move(name)); + } + return ExecNodeOrStop(compute::MakeScalarAggregateNode( + input.get(), /*label=*/"scalar_agg", std::move(aggregates), std::move(targets), + std::move(out_field_names))); +} + +// [[arrow::export]] +std::shared_ptr ExecNode_GroupByAggregate( + const std::shared_ptr& input, std::vector group_vars, + std::vector agg_srcs, cpp11::list aggregations) { + std::vector aggs; + std::vector> keep_alives; + + for (cpp11::list name_opts : aggregations) { + auto name = cpp11::as_cpp(name_opts[0]); + auto opts = make_compute_options(name, name_opts[1]); + + aggs.push_back(arrow::compute::internal::Aggregate{std::move(name), opts.get()}); + keep_alives.push_back(std::move(opts)); + } + + return ExecNodeOrStop(compute::MakeGroupByNode(input.get(), /*label=*/"group_agg", + /*keys=*/std::move(group_vars), + std::move(agg_srcs), std::move(aggs))); +} + +// Result MakeGroupByNode(ExecNode* input, std::string label, +// std::vector keys, +// std::vector agg_srcs, +// std::vector aggs); +#endif diff --git a/r/src/expression.cpp b/r/src/expression.cpp index 4b671cb99dd..3fcba46e911 100644 --- a/r/src/expression.cpp +++ b/r/src/expression.cpp @@ -44,8 +44,14 @@ std::shared_ptr compute___expr__call(std::string func_name, } // [[arrow::export]] -std::shared_ptr compute___expr__field_ref(std::string name) { - return std::make_shared(compute::field_ref(std::move(name))); +std::vector field_names_in_expression( + const std::shared_ptr& x) { + std::vector out; + auto field_refs = FieldsInExpression(*x); + for (auto f : field_refs) { + out.push_back(*f.name()); + } + return out; } // [[arrow::export]] @@ -57,6 +63,11 @@ std::string compute___expr__get_field_ref_name( return ""; } +// [[arrow::export]] +std::shared_ptr compute___expr__field_ref(std::string name) { + return std::make_shared(compute::field_ref(std::move(name))); +} + // [[arrow::export]] std::shared_ptr compute___expr__scalar( const std::shared_ptr& x) { diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 4711cacfcd0..1a71fea86c7 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -638,12 +638,15 @@ test_that("Creating UnionDataset", { test_that("map_batches", { skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = "part") - expect_equivalent( - ds %>% - filter(int > 5) %>% - select(int, lgl) %>% - map_batches(~ summarize(., min_int = min(int))), - tibble(min_int = c(6L, 101L)) + expect_warning( + expect_equivalent( + ds %>% + filter(int > 5) %>% + select(int, lgl) %>% + map_batches(~ summarize(., min_int = min(int))), + tibble(min_int = c(6L, 101L)) + ), + "pulling data into R" # ARROW-13502 ) }) @@ -986,17 +989,6 @@ test_that("dplyr method not implemented messages", { "Filter expression not supported for Arrow Datasets: dbl > max(dbl)\nCall collect() first to pull data into R.", fixed = TRUE ) - # One explicit test of the full message - expect_error( - ds %>% summarize(mean(int)), - "summarize() is not currently implemented for Arrow Datasets. Call collect() first to pull data into R.", - fixed = TRUE - ) - # Helper for everything else - expect_not_implemented <- function(x) { - expect_error(x, "is not currently implemented for Arrow Datasets") - } - expect_not_implemented(ds %>% filter(int == 1) %>% summarize(n())) }) test_that("Dataset and query print methods", { diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R new file mode 100644 index 00000000000..8235ef29948 --- /dev/null +++ b/r/tests/testthat/test-dplyr-aggregate.R @@ -0,0 +1,185 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +skip_if_not_available("dataset") + +library(dplyr) +library(stringr) + +tbl <- example_data +# Add some better string data +tbl$verses <- verses[[1]] +# c(" a ", " b ", " c ", ...) increasing padding +# nchar = 3 5 7 9 11 13 15 17 19 21 +tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both") +tbl$some_grouping <- rep(c(1, 2), 5) + +test_that("summarize", { + expect_dplyr_equal( + input %>% + select(int, chr) %>% + filter(int > 5) %>% + summarize(min_int = min(int)), + tbl, + warning = TRUE + ) + + expect_dplyr_equal( + input %>% + select(int, chr) %>% + filter(int > 5) %>% + summarize(min_int = min(int) / 2), + tbl, + warning = TRUE + ) +}) + +test_that("Can aggregate in Arrow", { + expect_dplyr_equal( + input %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + collect(), + tbl + ) + expect_dplyr_equal( + input %>% + summarize(total = sum(int)) %>% + collect(), + tbl, + # ARROW-13497: This is failing because the default is na.rm = FALSE + warning = TRUE + ) +}) + +test_that("Group by sum on dataset", { + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) + + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(total = sum(int * 4, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) + + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(total = sum(int)) %>% + arrange(some_grouping) %>% + collect(), + tbl, + # ARROW-13497: This is failing because the default is na.rm = FALSE + warning = TRUE + ) +}) + +test_that("Group by any/all", { + withr::local_options(list(arrow.debug = TRUE)) + + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(any(lgl, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(all(lgl, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) + # ARROW-13497: na.rm option also is not being passed/received to any/all + + expect_dplyr_equal( + input %>% + mutate(has_words = nchar(verses) < 0) %>% + group_by(some_grouping) %>% + summarize(any(has_words, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) + expect_dplyr_equal( + input %>% + mutate(has_words = nchar(verses) < 0) %>% + group_by(some_grouping) %>% + summarize(all(has_words, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) + skip("This seems to be calling base::nchar") + expect_dplyr_equal( + input %>% + group_by(some_grouping) %>% + summarize(has_words = all(nchar(verses) < 0)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) +}) + +test_that("Filter and aggregate", { + expect_dplyr_equal( + input %>% + filter(some_grouping == 2) %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + collect(), + tbl + ) + + expect_dplyr_equal( + input %>% + filter(int > 5) %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + collect(), + tbl + ) + + expect_dplyr_equal( + input %>% + filter(some_grouping == 2) %>% + group_by(some_grouping) %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) + + expect_dplyr_equal( + input %>% + filter(int > 5) %>% + group_by(some_grouping) %>% + summarize(total = sum(int, na.rm = TRUE)) %>% + arrange(some_grouping) %>% + collect(), + tbl + ) +}) diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index fe0394bc636..18be2a9304a 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -29,7 +29,8 @@ test_that("group_by groupings are recorded", { select(int, chr) %>% filter(int > 5) %>% summarize(min_int = min(int)), - tbl + tbl, + warning = TRUE ) }) @@ -62,7 +63,8 @@ test_that("ungroup", { ungroup() %>% filter(int > 5) %>% summarize(min_int = min(int)), - tbl + tbl, + warning = TRUE ) }) diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R index da21ccd9ed1..ed03c58a884 100644 --- a/r/tests/testthat/test-dplyr.R +++ b/r/tests/testthat/test-dplyr.R @@ -69,24 +69,6 @@ See $.data for the source Arrow object', ) }) -test_that("summarize", { - expect_dplyr_equal( - input %>% - select(int, chr) %>% - filter(int > 5) %>% - summarize(min_int = min(int)), - tbl - ) - - expect_dplyr_equal( - input %>% - select(int, chr) %>% - filter(int > 5) %>% - summarize(min_int = min(int) / 2), - tbl - ) -}) - test_that("Empty select returns no columns", { expect_dplyr_equal( input %>% select() %>% collect(), @@ -1054,7 +1036,7 @@ test_that("log functions", { expect_error( nse_funcs$log(Expression$scalar(x), base = 5), - "`base` values other than exp(1), 2 and 10 not supported in Arrow", + "`base` values other than exp(1), 2 and 10 not supported by Arrow", fixed = TRUE )