From 740352d35afedff69e31d43e2c77fe1c35b86a3a Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 09:02:29 +0100 Subject: [PATCH 01/17] refactoring --- r/DESCRIPTION | 3 + r/NAMESPACE | 71 ++- r/R/ArrayData.R | 24 + r/R/ChunkedArray.R | 14 +- r/R/Column.R | 12 + r/R/Field.R | 15 +- r/R/R6.R | 24 + r/R/RcppExports.R | 28 +- r/R/RecordBatch.R | 17 +- r/R/RecordBatchReader.R | 183 ++---- r/R/RecordBatchWriter.R | 244 ++++---- r/R/Schema.R | 39 +- r/R/array.R | 52 +- r/R/buffer.R | 25 +- r/R/dictionary.R | 15 +- r/R/feather.R | 4 +- r/R/io.R | 205 +++++-- r/R/memory_pool.R | 18 +- r/R/message.R | 57 +- r/R/read_record_batch.R | 52 ++ r/R/read_table.R | 66 +++ r/R/write_table.R | 93 +++ r/README.Rmd | 8 +- r/data-raw/test.R | 85 --- r/doc/arrow.R | 90 +++ r/doc/arrow.Rmd | 236 ++++++++ r/doc/arrow.html | 540 ++++++++++++++++++ r/man/BufferOutputStream.Rd | 17 + r/man/BufferReader.Rd | 14 + r/man/FileOutputStream.Rd | 17 + r/man/FixedSizeBufferWriter.Rd | 17 + r/man/{message_reader.Rd => MessageReader.Rd} | 6 +- r/man/MockOutputStream.Rd | 14 + r/man/ReadableFile.Rd | 17 + r/man/RecordBatchFileReader.Rd | 14 + r/man/RecordBatchFileWriter.Rd | 24 + r/man/RecordBatchStreamReader.Rd | 14 + r/man/RecordBatchStreamWriter.Rd | 24 + r/man/array.Rd | 4 +- r/man/arrow__Array.Rd | 57 ++ r/man/arrow__ArrayData.Rd | 28 + r/man/arrow__Buffer.Rd | 21 + r/man/arrow__ChunkedArray.Rd | 17 + r/man/arrow__Column.Rd | 17 + r/man/arrow__DataType.Rd | 17 + r/man/arrow__DictionaryType.Rd | 17 + r/man/arrow__Field.Rd | 17 + r/man/arrow__FixedWidthType.Rd | 17 + r/man/arrow__RecordBatch.Rd | 17 + r/man/arrow__RecordBatchReader.Rd | 17 + r/man/arrow__Schema.Rd | 29 + r/man/arrow___MemoryPool.Rd | 17 + r/man/arrow__io__BufferOutputStream.Rd | 21 + r/man/arrow__io__BufferReader.Rd | 21 + r/man/arrow__io__FileOutputStream.Rd | 21 + r/man/arrow__io__FixedSizeBufferWriter.Rd | 21 + r/man/arrow__io__InputStream.Rd | 21 + r/man/arrow__io__MemoryMappedFile.Rd | 24 + r/man/arrow__io__MockOutputStream.Rd | 21 + r/man/arrow__io__OutputStream.Rd | 23 + r/man/arrow__io__RandomAccessFile.Rd | 21 + r/man/arrow__io__Readable.Rd | 21 + r/man/arrow__io__ReadableFile.Rd | 21 + r/man/arrow__ipc__Message.Rd | 17 + r/man/arrow__ipc__MessageReader.Rd | 17 + r/man/arrow__ipc__RecordBatchFileReader.Rd | 17 + r/man/arrow__ipc__RecordBatchFileWriter.Rd | 40 ++ r/man/arrow__ipc__RecordBatchStreamReader.Rd | 17 + r/man/arrow__ipc__RecordBatchStreamWriter.Rd | 40 ++ r/man/arrow__ipc__RecordBatchWriter.Rd | 28 + r/man/buffer.Rd | 8 +- r/man/buffer_reader.Rd | 14 - r/man/chunked_array.Rd | 4 +- r/man/default_memory_pool.Rd | 14 + r/man/dictionary.Rd | 3 + r/man/field.Rd | 4 +- r/man/io.Rd | 40 -- r/man/mmap_create.Rd | 19 + r/man/mmap_open.Rd | 16 + r/man/read_record_batch.Rd | 18 +- r/man/read_table.Rd | 4 +- r/man/record_batch.Rd | 7 +- r/man/record_batch_file_reader.Rd | 14 - r/man/record_batch_file_writer.Rd | 19 - r/man/record_batch_stream_reader.Rd | 14 - r/man/record_batch_stream_writer.Rd | 16 - r/man/schema.Rd | 6 +- r/man/write_arrow.Rd | 2 +- r/man/write_arrow.arrow-colon-colon-Table.Rd | 17 + r/man/write_record_batch.Rd | 18 - r/man/write_table.Rd | 2 +- r/src/RcppExports.cpp | 86 ++- r/src/recordbatch.cpp | 27 + r/src/recordbatchwriter.cpp | 43 +- r/tests/testthat/test-RecordBatch.R | 61 -- r/tests/testthat/test-Table.R | 8 +- r/tests/testthat/test-bufferreader.R | 8 +- r/tests/testthat/test-feather.R | 4 +- r/tests/testthat/test-message.R | 10 +- r/tests/testthat/test-messagereader.R | 23 +- r/tests/testthat/test-read_record_batch.R | 73 +++ r/tests/testthat/test-schema.R | 34 +- 102 files changed, 2940 insertions(+), 798 deletions(-) create mode 100644 r/R/read_record_batch.R create mode 100644 r/R/read_table.R create mode 100644 r/R/write_table.R delete mode 100644 r/data-raw/test.R create mode 100644 r/doc/arrow.R create mode 100644 r/doc/arrow.Rmd create mode 100644 r/doc/arrow.html create mode 100644 r/man/BufferOutputStream.Rd create mode 100644 r/man/BufferReader.Rd create mode 100644 r/man/FileOutputStream.Rd create mode 100644 r/man/FixedSizeBufferWriter.Rd rename r/man/{message_reader.Rd => MessageReader.Rd} (79%) create mode 100644 r/man/MockOutputStream.Rd create mode 100644 r/man/ReadableFile.Rd create mode 100644 r/man/RecordBatchFileReader.Rd create mode 100644 r/man/RecordBatchFileWriter.Rd create mode 100644 r/man/RecordBatchStreamReader.Rd create mode 100644 r/man/RecordBatchStreamWriter.Rd create mode 100644 r/man/arrow__Array.Rd create mode 100644 r/man/arrow__ArrayData.Rd create mode 100644 r/man/arrow__Buffer.Rd create mode 100644 r/man/arrow__ChunkedArray.Rd create mode 100644 r/man/arrow__Column.Rd create mode 100644 r/man/arrow__DataType.Rd create mode 100644 r/man/arrow__DictionaryType.Rd create mode 100644 r/man/arrow__Field.Rd create mode 100644 r/man/arrow__FixedWidthType.Rd create mode 100644 r/man/arrow__RecordBatch.Rd create mode 100644 r/man/arrow__RecordBatchReader.Rd create mode 100644 r/man/arrow__Schema.Rd create mode 100644 r/man/arrow___MemoryPool.Rd create mode 100644 r/man/arrow__io__BufferOutputStream.Rd create mode 100644 r/man/arrow__io__BufferReader.Rd create mode 100644 r/man/arrow__io__FileOutputStream.Rd create mode 100644 r/man/arrow__io__FixedSizeBufferWriter.Rd create mode 100644 r/man/arrow__io__InputStream.Rd create mode 100644 r/man/arrow__io__MemoryMappedFile.Rd create mode 100644 r/man/arrow__io__MockOutputStream.Rd create mode 100644 r/man/arrow__io__OutputStream.Rd create mode 100644 r/man/arrow__io__RandomAccessFile.Rd create mode 100644 r/man/arrow__io__Readable.Rd create mode 100644 r/man/arrow__io__ReadableFile.Rd create mode 100644 r/man/arrow__ipc__Message.Rd create mode 100644 r/man/arrow__ipc__MessageReader.Rd create mode 100644 r/man/arrow__ipc__RecordBatchFileReader.Rd create mode 100644 r/man/arrow__ipc__RecordBatchFileWriter.Rd create mode 100644 r/man/arrow__ipc__RecordBatchStreamReader.Rd create mode 100644 r/man/arrow__ipc__RecordBatchStreamWriter.Rd create mode 100644 r/man/arrow__ipc__RecordBatchWriter.Rd delete mode 100644 r/man/buffer_reader.Rd create mode 100644 r/man/default_memory_pool.Rd delete mode 100644 r/man/io.Rd create mode 100644 r/man/mmap_create.Rd create mode 100644 r/man/mmap_open.Rd delete mode 100644 r/man/record_batch_file_reader.Rd delete mode 100644 r/man/record_batch_file_writer.Rd delete mode 100644 r/man/record_batch_stream_reader.Rd delete mode 100644 r/man/record_batch_stream_writer.Rd create mode 100644 r/man/write_arrow.arrow-colon-colon-Table.Rd delete mode 100644 r/man/write_record_batch.Rd create mode 100644 r/tests/testthat/test-read_record_batch.R diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 0250023e8fb..e5368427844 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -61,6 +61,9 @@ Collate: 'memory_pool.R' 'message.R' 'on_exit.R' + 'read_record_batch.R' + 'read_table.R' 'reexports-bit64.R' 'reexports-tibble.R' + 'write_table.R' 'zzz.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index 490d2118c58..7d5c95d9844 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -6,41 +6,47 @@ S3method("==","arrow::DataType") S3method("==","arrow::Field") S3method("==","arrow::RecordBatch") S3method("==","arrow::ipc::Message") +S3method(BufferReader,"arrow::Buffer") +S3method(BufferReader,default) +S3method(FixedSizeBufferWriter,"arrow::Buffer") +S3method(FixedSizeBufferWriter,default) +S3method(MessageReader,"arrow::io::InputStream") +S3method(MessageReader,default) +S3method(RecordBatchFileReader,"arrow::io::RandomAccessFile") +S3method(RecordBatchFileReader,character) +S3method(RecordBatchFileReader,fs_path) +S3method(RecordBatchFileWriter,"arrow::io::OutputStream") +S3method(RecordBatchFileWriter,character) +S3method(RecordBatchFileWriter,fs_path) +S3method(RecordBatchStreamReader,"arrow::io::InputStream") +S3method(RecordBatchStreamReader,raw) +S3method(RecordBatchStreamWriter,"arrow::io::OutputStream") +S3method(RecordBatchStreamWriter,character) +S3method(RecordBatchStreamWriter,fs_path) S3method(as_tibble,"arrow::RecordBatch") S3method(as_tibble,"arrow::Table") +S3method(buffer,complex) S3method(buffer,default) S3method(buffer,integer) S3method(buffer,numeric) S3method(buffer,raw) -S3method(buffer_reader,"arrow::Buffer") -S3method(buffer_reader,default) S3method(feather_table_reader,"arrow::io::RandomAccessFile") S3method(feather_table_reader,"arrow::ipc::feather::TableReader") S3method(feather_table_reader,character) S3method(feather_table_reader,default) S3method(feather_table_reader,fs_path) S3method(feather_table_writer,"arrow::io::OutputStream") -S3method(fixed_size_buffer_writer,"arrow::Buffer") -S3method(fixed_size_buffer_writer,default) S3method(length,"arrow::Array") -S3method(message_reader,"arrow::io::InputStream") -S3method(message_reader,default) -S3method(message_reader,raw) S3method(names,"arrow::RecordBatch") S3method(print,"arrow-enum") S3method(read_message,"arrow::io::InputStream") -S3method(read_message,default) -S3method(read_record_batch,"arrow::io::BufferReader") -S3method(read_record_batch,"arrow::io::RandomAccessFile") +S3method(read_message,"arrow::ipc::MessageReader") +S3method(read_record_batch,"arrow::Buffer") +S3method(read_record_batch,"arrow::io::InputStream") S3method(read_record_batch,"arrow::ipc::Message") -S3method(read_record_batch,"arrow::ipc::RecordBatchFileReader") -S3method(read_record_batch,"arrow::ipc::RecordBatchStreamReader") -S3method(read_record_batch,character) -S3method(read_record_batch,fs_path) S3method(read_record_batch,raw) S3method(read_schema,"arrow::Buffer") S3method(read_schema,"arrow::io::InputStream") -S3method(read_schema,default) S3method(read_schema,raw) S3method(read_table,"arrow::io::BufferReader") S3method(read_table,"arrow::io::RandomAccessFile") @@ -49,12 +55,6 @@ S3method(read_table,"arrow::ipc::RecordBatchStreamReader") S3method(read_table,character) S3method(read_table,fs_path) S3method(read_table,raw) -S3method(record_batch_file_reader,"arrow::io::RandomAccessFile") -S3method(record_batch_file_reader,character) -S3method(record_batch_file_reader,fs_path) -S3method(record_batch_stream_reader,"arrow::io::InputStream") -S3method(record_batch_stream_reader,raw) -S3method(write_arrow,"arrow::RecordBatch") S3method(write_arrow,"arrow::Table") S3method(write_arrow,data.frame) S3method(write_feather,"arrow::RecordBatch") @@ -64,19 +64,25 @@ S3method(write_feather_RecordBatch,"arrow::io::OutputStream") S3method(write_feather_RecordBatch,character) S3method(write_feather_RecordBatch,default) S3method(write_feather_RecordBatch,fs_path) -S3method(write_record_batch,"arrow::io::OutputStream") -S3method(write_record_batch,"arrow::ipc::RecordBatchWriter") -S3method(write_record_batch,character) -S3method(write_record_batch,fs_path) -S3method(write_record_batch,raw) S3method(write_table,"arrow::io::OutputStream") S3method(write_table,"arrow::ipc::RecordBatchWriter") S3method(write_table,character) S3method(write_table,fs_path) S3method(write_table,raw) +export(BufferOutputStream) +export(BufferReader) export(DateUnit) export(FileMode) +export(FileOutputStream) +export(FixedSizeBufferWriter) +export(MessageReader) export(MessageType) +export(MockOutputStream) +export(ReadableFile) +export(RecordBatchFileReader) +export(RecordBatchFileWriter) +export(RecordBatchStreamReader) +export(RecordBatchStreamWriter) export(StatusCode) export(TimeUnit) export(Type) @@ -84,20 +90,16 @@ export(array) export(as_tibble) export(boolean) export(buffer) -export(buffer_output_stream) -export(buffer_reader) export(cast_options) export(chunked_array) export(date32) export(date64) export(decimal) +export(default_memory_pool) export(dictionary) export(feather_table_reader) export(feather_table_writer) export(field) -export(file_open) -export(file_output_stream) -export(fixed_size_buffer_writer) export(float16) export(float32) export(float64) @@ -106,10 +108,8 @@ export(int32) export(int64) export(int8) export(list_of) -export(message_reader) export(mmap_create) export(mmap_open) -export(mock_output_stream) export(null) export(print.integer64) export(read_arrow) @@ -119,10 +119,6 @@ export(read_record_batch) export(read_schema) export(read_table) export(record_batch) -export(record_batch_file_reader) -export(record_batch_file_writer) -export(record_batch_stream_reader) -export(record_batch_stream_writer) export(schema) export(str.integer64) export(struct) @@ -138,7 +134,6 @@ export(utf8) export(write_arrow) export(write_feather) export(write_feather_RecordBatch) -export(write_record_batch) export(write_table) importFrom(R6,R6Class) importFrom(Rcpp,sourceCpp) diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R index 47b858d589f..765971b405b 100644 --- a/r/R/ArrayData.R +++ b/r/R/ArrayData.R @@ -17,6 +17,30 @@ #' @include R6.R +#' @title class arrow::ArrayData +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Usage: +#' +#' ``` +#' data <- array(...)$data() +#' +#' data$type() +#' data$length() +#' data$null_count() +#' data$offset() +#' data$buffers() +#' ``` +#' +#' @section Methods: +#' +#' ... +#' +#' @rdname arrow__ArrayData +#' @name arrow__ArrayData `arrow::ArrayData` <- R6Class("arrow::ArrayData", inherit = `arrow::Object`, active = list( diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R index 338438f578d..188ef35cf16 100644 --- a/r/R/ChunkedArray.R +++ b/r/R/ChunkedArray.R @@ -17,6 +17,18 @@ #' @include R6.R +#' @title class arrow::ChunkedArray +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__ChunkedArray +#' @name arrow__ChunkedArray `arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = `arrow::Object`, public = list( length = function() ChunkedArray__length(self), @@ -41,7 +53,7 @@ ) ) -#' create an arrow::Array from an R vector +#' create an [arrow::ChunkedArray][arrow__ChunkedArray] from various R vectors #' #' @param \dots Vectors to coerce #' @param type currently ignored diff --git a/r/R/Column.R b/r/R/Column.R index bf3fe0a0e10..cf37eb0956c 100644 --- a/r/R/Column.R +++ b/r/R/Column.R @@ -17,6 +17,18 @@ #' @include R6.R +#' @title class arrow::Column +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__Column +#' @name arrow__Column `arrow::Column` <- R6Class("arrow::Column", inherit = `arrow::Object`, public = list( length = function() Column__length(self), diff --git a/r/R/Field.R b/r/R/Field.R index 79c0f33be68..224866409cb 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -17,7 +17,20 @@ #' @include R6.R -`arrow::Field` <- R6Class("arrow::Field", inherit = `arrow::Object`, +#' @title class arrow::Field +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__Field +#' @name arrow__Field +`arrow::Field` <- R6Class("arrow::Field", + inherit = `arrow::Object`, public = list( ToString = function() { Field__ToString(self) diff --git a/r/R/R6.R b/r/R/R6.R index 1caa885d90c..a01e886a620 100644 --- a/r/R/R6.R +++ b/r/R/R6.R @@ -54,6 +54,18 @@ unique_ptr <- function(class, xp) { !(lhs == rhs) } +#' @title class arrow::DataType +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__DataType +#' @name arrow__DataType `arrow::DataType` <- R6Class("arrow::DataType", inherit = `arrow::Object`, public = list( @@ -116,6 +128,18 @@ unique_ptr <- function(class, xp) { #----- metadata +#' @title class arrow::FixedWidthType +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__FixedWidthType +#' @name arrow__FixedWidthType `arrow::FixedWidthType` <- R6Class("arrow::FixedWidthType", inherit = `arrow::DataType`, public = list( diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index 324510cf1b6..54763e58f01 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -665,6 +665,14 @@ RecordBatch__Slice2 <- function(self, offset, length) { .Call(`_arrow_RecordBatch__Slice2`, self, offset, length) } +ipc___SerializeRecordBatch__Raw <- function(batch) { + .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch) +} + +ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema) { + .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema) +} + RecordBatchReader__schema <- function(reader) { .Call(`_arrow_RecordBatchReader__schema`, reader) } @@ -701,16 +709,8 @@ Table__from_RecordBatchStreamReader <- function(reader) { .Call(`_arrow_Table__from_RecordBatchStreamReader`, reader) } -ipc___RecordBatchFileWriter__Open <- function(stream, schema) { - .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema) -} - -ipc___RecordBatchStreamWriter__Open <- function(stream, schema) { - .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema) -} - -ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch, allow_64bit) { - invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch, allow_64bit)) +ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) { + invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch)) } ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table) { @@ -721,6 +721,14 @@ ipc___RecordBatchWriter__Close <- function(batch_writer) { invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer)) } +ipc___RecordBatchFileWriter__Open <- function(stream, schema) { + .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema) +} + +ipc___RecordBatchStreamWriter__Open <- function(stream, schema) { + .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema) +} + Table__from_dataframe <- function(tbl) { .Call(`_arrow_Table__from_dataframe`, tbl) } diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index c606d12143b..22618a4ad74 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -17,6 +17,18 @@ #' @include R6.R +#' @title class arrow::RecordBatch +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__RecordBatch +#' @name arrow__RecordBatch `arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = `arrow::Object`, public = list( num_columns = function() RecordBatch__num_columns(self), @@ -29,9 +41,11 @@ assert_that(inherits(other, "arrow::RecordBatch")) RecordBatch__Equals(self, other) }, + RemoveColumn = function(i){ shared_ptr(`arrow::RecordBatch`, RecordBatch__RemoveColumn(self, i)) }, + Slice = function(offset, length = NULL) { if (is.null(length)) { shared_ptr(`arrow::RecordBatch`, RecordBatch__Slice1(self, offset)) @@ -66,10 +80,11 @@ RecordBatch__to_dataframe(x) } -#' Create an arrow::RecordBatch from a data frame +#' Create an [arrow::RecordBatch][arrow__RecordBatch] from a data frame #' #' @param .data a data frame #' +#' @return a [arrow::RecordBatch][arrow__RecordBatch] #' @export record_batch <- function(.data){ shared_ptr(`arrow::RecordBatch`, RecordBatch__from_dataframe(.data)) diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 35037538426..85bf35221a4 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -17,6 +17,18 @@ #' @include R6.R +#' @title class arrow::RecordBatchReader +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__RecordBatchReader +#' @name arrow__RecordBatchReader `arrow::RecordBatchReader` <- R6Class("arrow::RecordBatchReader", inherit = `arrow::Object`, public = list( schema = function() shared_ptr(`arrow::Schema`, RecordBatchReader__schema(self)), @@ -26,8 +38,32 @@ ) ) +#' @title class arrow::ipc::RecordBatchStreamReader +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__ipc__RecordBatchStreamReader +#' @name arrow__ipc__RecordBatchStreamReader `arrow::ipc::RecordBatchStreamReader` <- R6Class("arrow::ipc::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`) +#' @title class arrow::ipc::RecordBatchFileReader +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__ipc__RecordBatchFileReader +#' @name arrow__ipc__RecordBatchFileReader `arrow::ipc::RecordBatchFileReader` <- R6Class("arrow::ipc::RecordBatchFileReader", inherit = `arrow::Object`, public = list( schema = function() shared_ptr(`arrow::Schema`, ipc___RecordBatchFileReader__schema(self)), @@ -36,160 +72,47 @@ ) ) - -#' Create a `arrow::ipc::RecordBatchStreamReader` from an input stream +#' Create a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] from an input stream +#' +#' @param stream input stream, an [arrow::io::InputStream][arrow__io__InputStream] or a raw vector #' -#' @param stream input stream #' @export -record_batch_stream_reader <- function(stream){ - UseMethod("record_batch_stream_reader") +RecordBatchStreamReader <- function(stream){ + UseMethod("RecordBatchStreamReader") } #' @export -`record_batch_stream_reader.arrow::io::InputStream` <- function(stream) { +`RecordBatchStreamReader.arrow::io::InputStream` <- function(stream) { shared_ptr(`arrow::ipc::RecordBatchStreamReader`, ipc___RecordBatchStreamReader__Open(stream)) } #' @export -`record_batch_stream_reader.raw` <- function(stream) { - record_batch_stream_reader(buffer_reader(stream)) +`RecordBatchStreamReader.raw` <- function(stream) { + RecordBatchStreamReader(BufferReader(stream)) } -#' Create an `arrow::ipc::RecordBatchFileReader` from a file +#' Create an [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] from a file #' -#' @param file The file to read from +#' @param file The file to read from. A file path, or an [arrow::io::RandomAccessFile][arrow__ipc__RecordBatchFileReader] #' #' @export -record_batch_file_reader <- function(file) { - UseMethod("record_batch_file_reader") +RecordBatchFileReader <- function(file) { + UseMethod("RecordBatchFileReader") } #' @export -`record_batch_file_reader.arrow::io::RandomAccessFile` <- function(file) { +`RecordBatchFileReader.arrow::io::RandomAccessFile` <- function(file) { shared_ptr(`arrow::ipc::RecordBatchFileReader`, ipc___RecordBatchFileReader__Open(file)) } #' @export -`record_batch_file_reader.character` <- function(file) { +`RecordBatchFileReader.character` <- function(file) { assert_that(length(file) == 1L) - record_batch_file_reader(fs::path_abs(file)) -} - -#' @export -`record_batch_file_reader.fs_path` <- function(file) { - record_batch_file_reader(file_open(file)) -} - -#-------- read_record_batch - -#' Read a single record batch from a stream -#' -#' @param stream input stream -#' @param ... additional parameters -#' -#' @details `stream` can be a `arrow::io::RandomAccessFile` stream as created by [file_open()] or [mmap_open()] or a path. -#' -#' @export -read_record_batch <- function(stream, ...){ - UseMethod("read_record_batch") -} - -#' @export -read_record_batch.character <- function(stream, ...){ - assert_that(length(stream) == 1L) - read_record_batch(fs::path_abs(stream)) -} - -#' @export -read_record_batch.fs_path <- function(stream, ...){ - stream <- close_on_exit(file_open(stream)) - read_record_batch(stream) -} - -#' @export -`read_record_batch.arrow::io::RandomAccessFile` <- function(stream, ...){ - reader <- record_batch_file_reader(stream) - reader$ReadRecordBatch(0) -} - -#' @export -`read_record_batch.arrow::io::BufferReader` <- function(stream, ...){ - reader <- record_batch_stream_reader(stream) - reader$ReadNext() -} - -#' @export -read_record_batch.raw <- function(stream, ...){ - stream <- close_on_exit(buffer_reader(stream)) - read_record_batch(stream) -} - -#' @export -`read_record_batch.arrow::ipc::RecordBatchStreamReader` <- function(stream, ...) { - stream$ReadNext() + RecordBatchFileReader(fs::path_abs(file)) } #' @export -`read_record_batch.arrow::ipc::RecordBatchFileReader` <- function(stream, i = 0, ...) { - stream$ReadRecordBatch(i) +`RecordBatchFileReader.fs_path` <- function(file) { + RecordBatchFileReader(ReadableFile(file)) } - -#' @export -`read_record_batch.arrow::ipc::Message` <- function(stream, schema, ...) { - assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__Message__Schema(stream, schema)) -} - - -#--------- read_table - -#' Read an arrow::Table from a stream -#' -#' @param stream stream. Either a stream created by [file_open()] or [mmap_open()] or a file path. -#' -#' @export -read_table <- function(stream){ - UseMethod("read_table") -} - -#' @export -read_table.character <- function(stream){ - assert_that(length(stream) == 1L) - read_table(fs::path_abs(stream)) -} - -#' @export -read_table.fs_path <- function(stream) { - stream <- close_on_exit(file_open(stream)) - read_table(stream) -} - -#' @export -`read_table.arrow::io::RandomAccessFile` <- function(stream) { - reader <- record_batch_file_reader(stream) - read_table(reader) -} - -#' @export -`read_table.arrow::ipc::RecordBatchFileReader` <- function(stream) { - shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(stream)) -} - -#' @export -`read_table.arrow::ipc::RecordBatchStreamReader` <- function(stream) { - shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(stream)) -} - -#' @export -`read_table.arrow::io::BufferReader` <- function(stream) { - reader <- record_batch_stream_reader(stream) - read_table(reader) -} - -#' @export -`read_table.raw` <- function(stream) { - stream <- close_on_exit(buffer_reader(stream)) - read_table(stream) -} - diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 515b6986b94..6e97bfea8cc 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -17,175 +17,161 @@ #' @include R6.R +#' @title class arrow::ipc::RecordBatchWriter +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' - `$WriteRecordBatch(batch)`: Write record batch to stream +#' - `$WriteTable(table)`: write Table to stream +#' - `$Close()`: close stream +#' +#' @section Derived classes: +#' +#' - [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] implements the streaming binary format +#' - [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] implements the binary file format +#' +#' @rdname arrow__ipc__RecordBatchWriter +#' @name arrow__ipc__RecordBatchWriter `arrow::ipc::RecordBatchWriter` <- R6Class("arrow::ipc::RecordBatchWriter", inherit = `arrow::Object`, public = list( - WriteRecordBatch = function(batch, allow_64bit) ipc___RecordBatchWriter__WriteRecordBatch(self, batch, allow_64bit), + WriteRecordBatch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), WriteTable = function(table) ipc___RecordBatchWriter__WriteTable(self, table), Close = function() ipc___RecordBatchWriter__Close(self) ) ) -`arrow::ipc::RecordBatchStreamWriter` <- R6Class("arrow::ipc::RecordBatchStreamWriter", inherit = `arrow::ipc::RecordBatchWriter`) -`arrow::ipc::RecordBatchFileWriter` <- R6Class("arrow::ipc::RecordBatchFileWriter", inherit = `arrow::ipc::RecordBatchStreamWriter`) - -#' Create a record batch file writer from a stream +#' @title class arrow::ipc::RecordBatchStreamWriter #' -#' @param stream a stream -#' @param schema the schema of the batches +#' Writer for the Arrow streaming binary format #' -#' @return an `arrow::ipc::RecordBatchWriter` object +#' @usage NULL +#' @format NULL +#' @docType class #' -#' @export -record_batch_file_writer <- function(stream, schema) { - assert_that( - inherits(stream, "arrow::io::OutputStream"), - inherits(schema, "arrow::Schema") - ) - shared_ptr(`arrow::ipc::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(stream, schema)) -} - -#' Create a record batch stream writer +#' @section usage: #' -#' @param stream a stream -#' @param schema a schema +#' ``` +#' writer <- RecordBatchStreamWriter(sink, schema) #' -#' @export -record_batch_stream_writer <- function(stream, schema) { - assert_that( - inherits(stream, "arrow::io::OutputStream"), - inherits(schema, "arrow::Schema") - ) - shared_ptr(`arrow::ipc::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(stream, schema)) -} - -#-------- write_record_batch - -#' write a record batch +#' writer$WriteRecordBatch(batch) +#' writer$WriteTable(table) +#' writer$Close() +#' ``` #' -#' @param x a `arrow::RecordBatch` -#' @param stream where to stream the record batch -#' @param ... extra parameters +#' @section Factory: #' -#' @export -write_record_batch <- function(x, stream, ...){ - UseMethod("write_record_batch", stream) -} - -#' @export -`write_record_batch.arrow::io::OutputStream` <- function(x, stream, ...) { - stream_writer <- close_on_exit(record_batch_stream_writer(stream, x$schema())) - write_record_batch(x, stream_writer) -} - -#' @export -`write_record_batch.arrow::ipc::RecordBatchWriter` <- function(x, stream, allow_64bit = TRUE, ...){ - stream$WriteRecordBatch(x, allow_64bit) -} - -#' @export -`write_record_batch.character` <- function(x, stream, ...) { - assert_that(length(stream) == 1L) - write_record_batch(x, fs::path_abs(stream), ...) -} - -#' @export -`write_record_batch.fs_path` <- function(x, stream, ...) { - assert_that(length(stream) == 1L) - file_stream <- close_on_exit(file_output_stream(stream)) - file_writer <- close_on_exit(record_batch_file_writer(file_stream, x$schema())) - write_record_batch(x, file_writer, ...) -} - -#' @export -`write_record_batch.raw` <- function(x, stream, ...) { - # how many bytes do we need - mock <- mock_output_stream() - write_record_batch(x, mock) - n <- mock$GetExtentBytesWritten() - - bytes <- raw(n) - buffer <- buffer(bytes) - buffer_writer <- fixed_size_buffer_writer(buffer) - write_record_batch(x, buffer_writer) - - bytes -} - -#-------- stream Table - -#' write an arrow::Table +#' The [RecordBatchStreamWriter()] function creates a record batch stream writer. #' -#' @param x an `arrow::Table` -#' @param stream where to stream the record batch -#' @param ... extra parameters +#' @section Methods: +#' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter] #' -#' @export -write_table <- function(x, stream, ...) { - UseMethod("write_table", stream) -} +#' - `$WriteRecordBatch(batch)`: Write record batch to stream +#' - `$WriteTable(table)`: write Table to stream +#' - `$Close()`: close stream +#' +#' @rdname arrow__ipc__RecordBatchStreamWriter +#' @name arrow__ipc__RecordBatchStreamWriter +`arrow::ipc::RecordBatchStreamWriter` <- R6Class("arrow::ipc::RecordBatchStreamWriter", inherit = `arrow::ipc::RecordBatchWriter`) +#' Writer for the Arrow streaming binary format +#' +#' @param sink Where to write. Can either be: +#' +#' - A string, meant as a file path, passed to [fs::path_ab] +#' - a [file path][fs::path_abs()] +#' - [arrow::io::OutputStream][arrow__io__OutputStream] +#' +#' @param schema The [arrow::Schema][arrow__Schema] for data to be written. +#' +#' @return a [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] +#' #' @export -`write_table.arrow::io::OutputStream` <- function(x, stream, ...) { - stream_writer <- close_on_exit(record_batch_stream_writer(stream, x$schema())) - write_table(x, stream_writer) +RecordBatchStreamWriter <- function(sink, schema) { + UseMethod("RecordBatchStreamWriter") } #' @export -`write_table.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){ - stream$WriteTable(x) +RecordBatchStreamWriter.character <- function(sink, schema){ + RecordBatchStreamWriter(fs::path_abs(sink), schema) } #' @export -`write_table.character` <- function(x, stream, ...) { - assert_that(length(stream) == 1L) - write_table(x, fs::path_abs(stream), ...) +RecordBatchStreamWriter.fs_path <- function(sink, schema){ + RecordBatchStreamWriter(FileOutputStream(sink), schema) } #' @export -`write_table.fs_path` <- function(x, stream, ...) { - assert_that(length(stream) == 1L) - file_stream <- close_on_exit(file_output_stream(stream)) - file_writer <- close_on_exit(record_batch_file_writer(file_stream, x$schema())) - write_table(x, file_writer, ...) +`RecordBatchStreamWriter.arrow::io::OutputStream` <- function(sink, schema){ + assert_that(inherits(schema, "arrow::Schema")) + shared_ptr(`arrow::ipc::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(sink, schema)) } -#' @export -`write_table.raw` <- function(x, stream, ...) { - # how many bytes do we need - mock <- mock_output_stream() - write_table(x, mock) - n <- mock$GetExtentBytesWritten() - - bytes <- raw(n) - buffer <- buffer(bytes) - buffer_writer <- fixed_size_buffer_writer(buffer) - write_table(x, buffer_writer) - - bytes -} +#' @title class arrow::ipc::RecordBatchFileWriter +#' +#' Writer for the Arrow binary file format +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section usage: +#' +#' ``` +#' writer <- RecordBatchFileWriter(sink, schema) +#' +#' writer$WriteRecordBatch(batch) +#' writer$WriteTable(table) +#' writer$Close() +#' ``` +#' +#' @section Factory: +#' +#' The [RecordBatchFileWriter()] function creates a record batch stream writer. +#' +#' @section Methods: +#' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter] +#' +#' - `$WriteRecordBatch(batch)`: Write record batch to stream +#' - `$WriteTable(table)`: write Table to stream +#' - `$Close()`: close stream +#' +#' @rdname arrow__ipc__RecordBatchFileWriter +#' @name arrow__ipc__RecordBatchFileWriter +`arrow::ipc::RecordBatchFileWriter` <- R6Class("arrow::ipc::RecordBatchFileWriter", inherit = `arrow::ipc::RecordBatchStreamWriter`) -#' Write an object to a stream +#' Create a record batch file writer from a stream #' -#' @param x An object to stream -#' @param stream A stream -#' @param ... additional parameters +#' @param sink Where to write. Can either be: +#' +#' - character vector of length one +#' - a [file path][fs::path_abs()] +#' - [arrow::io::OutputStream][arrow__io__OutputStream] +#' +#' @param schema The [arrow::Schema][arrow__Schema] for data to be written. +#' +#' @return an `arrow::ipc::RecordBatchWriter` object #' #' @export -write_arrow <- function(x, stream, ...){ - UseMethod("write_arrow") +RecordBatchFileWriter <- function(sink, schema) { + UseMethod("RecordBatchFileWriter") } #' @export -`write_arrow.arrow::RecordBatch` <- function(x, stream, ...) { - write_record_batch(x, stream, ...) +RecordBatchFileWriter.character <- function(sink, schema){ + RecordBatchFileWriter(fs::path_abs(sink), schema) } #' @export -`write_arrow.arrow::Table` <- function(x, stream, ...) { - write_table(x, stream, ...) +RecordBatchFileWriter.fs_path <- function(sink, schema){ + RecordBatchFileWriter(FileOutputStream(sink), schema) } #' @export -`write_arrow.data.frame` <- function(x, stream, ...) { - write_record_batch(record_batch(x), stream, ...) +`RecordBatchFileWriter.arrow::io::OutputStream` <- function(sink, schema){ + assert_that(inherits(schema, "arrow::Schema")) + shared_ptr(`arrow::ipc::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(sink, schema)) } diff --git a/r/R/Schema.R b/r/R/Schema.R index b158fee169d..08047a3b11f 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -17,6 +17,30 @@ #' @include R6.R +#' @title class arrow::Schema +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Usage: +#' +#' ``` +#' s <- schema(...) +#' +#' s$ToString() +#' s$num_fields() +#' s$field(i) +#' ``` +#' +#' @section Methods: +#' +#' - `$ToString()`: convert to a string +#' - `$num_fields()`: returns the number of fields +#' - `$field(i)`: returns the field at index `i` (0-based) +#' +#' @rdname arrow__Schema +#' @name arrow__Schema `arrow::Schema` <- R6Class("arrow::Schema", inherit = `arrow::Object`, public = list( @@ -29,11 +53,11 @@ ) ) -#' Schema functions +#' Schema factory #' #' @param ... named list of data types #' -#' @return a Schema +#' @return a [schema][arrow__Schema] #' #' @export schema <- function(...){ @@ -50,11 +74,6 @@ read_schema <- function(stream, ...) { UseMethod("read_schema") } -#' @export -read_schema.default <- function(stream, ...) { - stop("unsupported") -} - #' @export `read_schema.arrow::io::InputStream` <- function(stream, ...) { shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) @@ -62,10 +81,12 @@ read_schema.default <- function(stream, ...) { #' @export `read_schema.arrow::Buffer` <- function(stream, ...) { - read_schema(buffer_reader(stream), ...) + stream <- close_on_exit(BufferReader(stream)) + shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) } #' @export `read_schema.raw` <- function(stream, ...) { - read_schema(buffer(stream), ...) + stream <- close_on_exit(BufferReader(stream)) + shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) } diff --git a/r/R/array.R b/r/R/array.R index 2d434f9a221..26a2e6f11f2 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -17,6 +17,56 @@ #' @include R6.R +#' @title class arrow::Array +#' +#' Array base type. Immutable data array with some logical type and some length. +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Usage: +#' +#' ``` +#' a <- array(...) +#' +#' a$IsNull(i) +#' a$IsValid(i) +#' a$length() or length(a) +#' a$offset() +#' a$null_count() +#' a$type() +#' a$type_id() +#' a$Equals(b) +#' a$ApproxEquals(b) +#' a$as_vector() +#' a$ToString() +#' a$Slice(offset, length = NULL) +#' a$RangeEquals(other, start_idx, end_idx, other_start_idx) +#' +#' print(a) +#' a == a +#' ``` +#' +#' @section Methods: +#' +#' - `$IsNull(i)`: Return true if value at index is null. Does not boundscheck +#' - `$IsValid(i)`: Return true if value at index is valid. Does not boundscheck +#' - `$length()`: Size in the number of elements this array contains +#' - `$offset()`: A relative position into another array's data, to enable zero-copy slicing +#' - `$null_count()`: The number of null entries in the array +#' - `$type()`: logical type of data +#' - `$type_id()`: type id +#' - `$Equals(other)` : is this array equal to `other` +#' - `$ApproxEquals(other)` : +#' - `$data()`: return the underlying [arrow::ArrayData][arrow__ArrayData] +#' - `$as_vector()`: convert to an R vector +#' - `$ToString()`: string representation of the array +#' - `$Slice(offset, length = NULL)` : Construct a zero-copy slice of the array with the indicated offset and length. If length is `NULL`, the slice goes until the end of the array. +#' - `$RangeEquals(other, start_idx, end_idx, other_start_idx)` : +#' +#' @rdname arrow__Array +#' @name arrow__Array `arrow::Array` <- R6Class("arrow::Array", inherit = `arrow::Object`, public = list( @@ -65,7 +115,7 @@ #' @export `==.arrow::Array` <- function(x, y) x$Equals(y) -#' create an arrow::Array from an R vector +#' create an [arrow::Array][arrow__Array] from an R vector #' #' @param \dots Vectors to coerce #' @param type currently ignored diff --git a/r/R/buffer.R b/r/R/buffer.R index 9684a972913..c0cadbe8805 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -18,6 +18,21 @@ #' @include R6.R #' @include enums.R +#' @title class arrow::Buffer +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' - `$is_mutable()` : +#' - `$ZeroPadding()` : +#' - `$size()` : +#' - `$capacity()`: +#' +#' @rdname arrow__Buffer +#' @name arrow__Buffer `arrow::Buffer` <- R6Class("arrow::Buffer", inherit = `arrow::Object`, public = list( is_mutable = function() Buffer__is_mutable(self), @@ -27,12 +42,11 @@ ) ) -`arrow::MutableBuffer` <- R6Class("arrow::Buffer", inherit = `arrow::Buffer`) - -#' Create a buffer from an R object +#' Create a [arrow::Buffer][arrow__Buffer] from an R object +#' +#' @param x R object. Only raw, numeric and integer vectors are currently supported #' -#' @param x R object -#' @return an instance of `arrow::Buffer` that borrows memory from `x` +#' @return an instance of [arrow::Buffer][arrow__Buffer] that borrows memory from `x` #' #' @export buffer <- function(x){ @@ -44,7 +58,6 @@ buffer.default <- function(x) { stop("cannot convert to Buffer") } - #' @export buffer.raw <- function(x) { shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x)) diff --git a/r/R/dictionary.R b/r/R/dictionary.R index d8a71d92a9f..28f86a87b92 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -17,6 +17,18 @@ #' @include R6.R +#' @title class arrow::DictionaryType +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__DictionaryType +#' @name arrow__DictionaryType `arrow::DictionaryType` <- R6Class("arrow::DictionaryType", inherit = `arrow::FixedWidthType`, public = list( @@ -25,7 +37,6 @@ dictionary = function() shared_ptr(`arrow::Array`, DictionaryType__dictionary(self)), ordered = function() DictionaryType__ordered(self) ) - ) #' dictionary type factory @@ -34,6 +45,8 @@ #' @param values values array, typically an arrow array of strings #' @param ordered Is this an ordered dictionary #' +#' @return a [arrow::DictionaryType][arrow__DictionaryType] +#' #' @export dictionary <- function(type, values, ordered = FALSE) { assert_that( diff --git a/r/R/feather.R b/r/R/feather.R index c36c571bd4b..bae71d31bc1 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -100,7 +100,7 @@ write_feather_RecordBatch <- function(data, stream) { #' @export #' @method write_feather_RecordBatch fs_path `write_feather_RecordBatch.fs_path` <- function(data, stream) { - file_stream <- close_on_exit(file_output_stream(stream)) + file_stream <- close_on_exit(FileOutputStream(stream)) `write_feather_RecordBatch.arrow::io::OutputStream`(data, file_stream) } @@ -133,7 +133,7 @@ feather_table_reader.character <- function(file, mmap = TRUE, ...) { #' @export feather_table_reader.fs_path <- function(file, mmap = TRUE, ...) { - stream <- if(isTRUE(mmap)) mmap_open(file, ...) else file_open(file, ...) + stream <- if(isTRUE(mmap)) mmap_open(file, ...) else ReadableFile(file, ...) feather_table_reader(stream) } diff --git a/r/R/io.R b/r/R/io.R index d4534927412..f1f2f4bfae5 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -19,34 +19,57 @@ #' @include enums.R #' @include buffer.R -`arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = `arrow::Object`, - public = list( - Read = function(nbytes) shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes)) - ) -) - -`arrow::io::InputStream` <- R6Class("arrow::io::InputStream", inherit = `arrow::io::Readable`, - public = list( - Close = function() io___InputStream__Close(self) - ) -) +# OutputStream ------------------------------------------------------------ `arrow::io::Writable` <- R6Class("arrow::io::Writable", inherit = `arrow::Object`) +#' @title OutputStream +#' +#' @section Methods: +#' +#' - `arrow::Buffer` `Read`(`int` nbytes): Read `nbytes` bytes +#' - `void` `Close`(): close the stream +#' +#' @rdname arrow__io__OutputStream +#' @name arrow__io__OutputStream `arrow::io::OutputStream` <- R6Class("arrow::io::OutputStream", inherit = `arrow::io::Writable`, public = list( Close = function() io___OutputStream__Close(self) ) ) +#' @title class arrow::io::FileOutputStream +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__FileOutputStream +#' @name arrow__io__FileOutputStream `arrow::io::FileOutputStream` <- R6Class("arrow::io::FileOutputStream", inherit = `arrow::io::OutputStream`) +#' @title class arrow::io::MockOutputStream +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__MockOutputStream +#' @name arrow__io__MockOutputStream `arrow::io::MockOutputStream` <- R6Class("arrow::io::MockOutputStream", inherit = `arrow::io::OutputStream`, public = list( GetExtentBytesWritten = function() io___MockOutputStream__GetExtentBytesWritten(self) ) ) +#' @title class arrow::io::BufferOutputStream +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__BufferOutputStream +#' @name arrow__io__BufferOutputStream `arrow::io::BufferOutputStream` <- R6Class("arrow::io::BufferOutputStream", inherit = `arrow::io::OutputStream`, public = list( capacity = function() io___BufferOutputStream__capacity(self), @@ -56,8 +79,55 @@ ) ) +#' @title class arrow::io::FixedSizeBufferWriter +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__FixedSizeBufferWriter +#' @name arrow__io__FixedSizeBufferWriter `arrow::io::FixedSizeBufferWriter` <- R6Class("arrow::io::FixedSizeBufferWriter", inherit = `arrow::io::OutputStream`) + +# InputStream ------------------------------------------------------------- + +#' @title class arrow::io::Readable +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__Readable +#' @name arrow__io__Readable +`arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = `arrow::Object`, + public = list( + Read = function(nbytes) shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes)) + ) +) + +#' @title class arrow::io::InputStream +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__InputStream +#' @name arrow__io__InputStream +`arrow::io::InputStream` <- R6Class("arrow::io::InputStream", inherit = `arrow::io::Readable`, + public = list( + Close = function() io___InputStream__Close(self) + ) +) + +#' @title class arrow::io::RandomAccessFile +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__RandomAccessFile +#' @name arrow__io__RandomAccessFile `arrow::io::RandomAccessFile` <- R6Class("arrow::io::RandomAccessFile", inherit = `arrow::io::InputStream`, public = list( GetSize = function() io___RandomAccessFile__GetSize(self), @@ -67,94 +137,145 @@ ) ) +#' @title class arrow::io::MemoryMappedFile +#' +#' @section Methods: +#' +#' TODO +#' +#' @seealso [mmap_open()], [mmap_create()] +#' +#' +#' @rdname arrow__io__MemoryMappedFile +#' @name arrow__io__MemoryMappedFile `arrow::io::MemoryMappedFile` <- R6Class("arrow::io::MemoryMappedFile", inherit = `arrow::io::RandomAccessFile`, public = list( Resize = function(size) io___MemoryMappedFile__Resize(self, size) ) ) +#' @title class arrow::io::ReadableFile +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__ReadableFile +#' @name arrow__io__ReadableFile `arrow::io::ReadableFile` <- R6Class("arrow::io::ReadableFile", inherit = `arrow::io::RandomAccessFile`) -`arrow::io::BufferReader` <- R6Class("arrow::io::BufferReader", inherit = `arrow::io::RandomAccessFile`) +#' @title class arrow::io::BufferReader +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__BufferReader +#' @name arrow__io__BufferReader +`arrow::io::BufferReader` <- R6Class("arrow::io::BufferReader", inherit = `arrow::io::RandomAccessFile`) #' Create a new read/write memory mapped file of a given size #' #' @param path file path #' @param size size in bytes -#' @param mode file mode (read/write/readwrite) -#' @param buffer an `arrow::Buffer`, typically created by [buffer()] -#' @param initial_capacity initial capacity for the buffer output stream #' -#' @rdname io +#' @return a [arrow::io::MemoryMappedFile][arrow__io__MemoryMappedFile] +#' #' @export -mmap_create <- `arrow::io::MemoryMappedFile`$create <- function(path, size) { +mmap_create <- function(path, size) { shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(fs::path_abs(path), size)) } -#' @rdname io +#' Open a memory mapped file +#' +#' @param path file path +#' @param mode file mode (read/write/readwrite) +#' #' @export -mmap_open <- `arrow::io::MemoryMappedFile`$open <- function(path, mode = c("read", "write", "readwrite")) { +mmap_open <- function(path, mode = c("read", "write", "readwrite")) { mode <- match(match.arg(mode), c("read", "write", "readwrite")) - 1L shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(fs::path_abs(path), mode)) } -#' @rdname io +#' open a [arrow::io::ReadableFile][arrow__io__ReadableFile] +#' +#' @param path file path +#' +#' @return a [arrow::io::ReadableFile][arrow__io__ReadableFile] +#' #' @export -file_open <- `arrow::io::ReadableFile`$open <- function(path) { +ReadableFile <- function(path) { shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(fs::path_abs(path))) } -#' @rdname io +#' Open a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] +#' +#' @param path file path +#' +#' @return a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] +#' #' @export -file_output_stream <- function(path) { +FileOutputStream <- function(path) { shared_ptr(`arrow::io::FileOutputStream`, io___FileOutputStream__Open(path)) } -#' @rdname io +#' Open a [arrow::io::MockOutputStream][arrow__io__MockOutputStream] +#' +#' @return a [arrow::io::MockOutputStream][arrow__io__MockOutputStream] +#' #' @export -mock_output_stream <- function() { +MockOutputStream <- function() { shared_ptr(`arrow::io::MockOutputStream`, io___MockOutputStream__initialize()) } -#' @rdname io +#' Open a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] +#' +#' @param initial_capacity initial capacity +#' +#' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] +#' #' @export -buffer_output_stream <- function(initial_capacity = 0L) { +BufferOutputStream <- function(initial_capacity = 0L) { shared_ptr(`arrow::io::BufferOutputStream`, io___BufferOutputStream__Create(initial_capacity)) } -#' @rdname io +#' Open a [arrow::io::FixedSizeBufferWriter][arrow__io__FixedSizeBufferWriter] +#' +#' @param buffer [arrow::Buffer][arrow__Buffer] or something [buffer()] can handle +#' +#' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] +#' #' @export -fixed_size_buffer_writer <- function(buffer){ - UseMethod("fixed_size_buffer_writer") +FixedSizeBufferWriter <- function(buffer){ + UseMethod("FixedSizeBufferWriter") } #' @export -fixed_size_buffer_writer.default <- function(buffer){ - fixed_size_buffer_writer(buffer(buffer)) +FixedSizeBufferWriter.default <- function(buffer){ + FixedSizeBufferWriter(buffer(buffer)) } #' @export -`fixed_size_buffer_writer.arrow::Buffer` <- function(buffer){ +`FixedSizeBufferWriter.arrow::Buffer` <- function(buffer){ assert_that(buffer$is_mutable()) shared_ptr(`arrow::io::FixedSizeBufferWriter`, io___FixedSizeBufferWriter__initialize(buffer)) } -#' Create a `arrow::BufferReader` +#' Create a [arrow::io::BufferReader][arrow__io__BufferReader] #' -#' @param x R object to treat as a buffer or a buffer created by [buffer()] +#' @param buffer R object to treat as a buffer or a buffer created by [buffer()] #' #' @export -buffer_reader <- function(x) { - UseMethod("buffer_reader") +BufferReader <- function(x) { + UseMethod("BufferReader") } #' @export -`buffer_reader.arrow::Buffer` <- function(x) { - shared_ptr(`arrow::io::BufferReader`, io___BufferReader__initialize(x)) +BufferReader.default <- function(x) { + BufferReader(buffer(x)) } #' @export -buffer_reader.default <- function(x) { - buffer_reader(buffer(x)) +`BufferReader.arrow::Buffer` <- function(x) { + shared_ptr(`arrow::io::BufferReader`, io___BufferReader__initialize(x)) } - diff --git a/r/R/memory_pool.R b/r/R/memory_pool.R index 49f65d2a1f3..88c2c7bc198 100644 --- a/r/R/memory_pool.R +++ b/r/R/memory_pool.R @@ -16,7 +16,19 @@ # under the License. #' @include R6.R - +#' +#' @title class arrow::MemoryPool +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow___MemoryPool +#' @name arrow__MemoryPool `arrow::MemoryPool` <- R6Class("arrow::MemoryPool", inherit = `arrow::Object`, public = list( @@ -28,6 +40,10 @@ ) ) +#' default [arrow::MemoryPool][arrow__MemoryPool] +#' +#' @return the default [arrow::MemoryPool][arrow__MemoryPool] +#' @export default_memory_pool <- function() { shared_ptr(`arrow::MemoryPool`, MemoryPool__default()) } diff --git a/r/R/message.R b/r/R/message.R index f31fb9a53b7..d808e65963b 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -17,6 +17,18 @@ #' @include R6.R +#' @title class arrow::ipc::Message +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__ipc__Message +#' @name arrow__ipc__Message `arrow::ipc::Message` <- R6Class("arrow::ipc::Message", inherit = `arrow::Object`, public = list( Equals = function(other){ @@ -36,51 +48,58 @@ #' @export `==.arrow::ipc::Message` <- function(x, y) x$Equals(y) +#' @title class arrow::ipc::MessageReader +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__ipc__MessageReader +#' @name arrow__ipc__MessageReader `arrow::ipc::MessageReader` <- R6Class("arrow::ipc::MessageReader", inherit = `arrow::Object`, public = list( ReadNextMessage = function() unique_ptr(`arrow::ipc::Message`, ipc___MessageReader__ReadNextMessage(self)) ) ) -#' Read a Message from a stream +#' Open a MessageReader that reads from a stream #' #' @param stream an InputStream #' #' @export -read_message <- function(stream) { - UseMethod("read_message") +MessageReader <- function(stream) { + UseMethod("MessageReader") } #' @export -read_message.default <- function(stream) { - stop("unsupported") +MessageReader.default <- function(stream) { + MessageReader(BufferReader(stream)) } #' @export -`read_message.arrow::io::InputStream` <- function(stream) { - unique_ptr(`arrow::ipc::Message`, ipc___ReadMessage(stream) ) +`MessageReader.arrow::io::InputStream` <- function(stream) { + unique_ptr(`arrow::ipc::MessageReader`, ipc___MessageReader__Open(stream)) } -#' Open a MessageReader that reads from a stream +#' Read a Message from a stream #' #' @param stream an InputStream #' #' @export -message_reader <- function(stream) { - UseMethod("message_reader") -} - -#' @export -message_reader.default <- function(stream) { - stop("unsupported") +read_message <- function(stream) { + UseMethod("read_message") } #' @export -message_reader.raw <- function(stream) { - message_reader(buffer_reader(stream)) +`read_message.arrow::io::InputStream` <- function(stream) { + unique_ptr(`arrow::ipc::Message`, ipc___ReadMessage(stream) ) } #' @export -`message_reader.arrow::io::InputStream` <- function(stream) { - unique_ptr(`arrow::ipc::MessageReader`, ipc___MessageReader__Open(stream)) +`read_message.arrow::ipc::MessageReader` <- function(stream) { + stream$ReadNextMessage() } diff --git a/r/R/read_record_batch.R b/r/R/read_record_batch.R new file mode 100644 index 00000000000..967ac5b7650 --- /dev/null +++ b/r/R/read_record_batch.R @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' read [arrow::RecordBatch][arrow__RecordBatch] as encapsulated IPC message, given a known [arrow::Schema][arrow__Schema] +#' +#' @param obj a [arrow::ipc::Message][arrow__ipc__Message], a [arrow::io::InputStream][arrow__io__InputStream], a [arrow::Buffer][arrow__Buffer], or a raw vector +#' @param schema a [arrow::Schema][arrow__Schema] +#' +#' @return a [arrow::RecordBatch][arrow__RecordBatch] +#' +#' @export +read_record_batch <- function(obj, schema){ + UseMethod("read_record_batch") +} + +#' @export +`read_record_batch.arrow::ipc::Message` <- function(obj, schema) { + assert_that(inherits(schema, "arrow::Schema")) + shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__Message__Schema(obj, schema)) +} + +#' @export +`read_record_batch.arrow::io::InputStream` <- function(obj, schema) { + assert_that(inherits(schema, "arrow::Schema")) + shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__InputStream__Schema(obj, schema)) +} + +#' @export +read_record_batch.raw <- function(obj, schema){ + stream <- close_on_exit(BufferReader(obj)) + read_record_batch(stream, schema) +} + +#' @export +`read_record_batch.arrow::Buffer` <- function(obj, schema){ + stream <- close_on_exit(BufferReader(obj)) + read_record_batch(stream, schema) +} diff --git a/r/R/read_table.R b/r/R/read_table.R new file mode 100644 index 00000000000..11e57ba89ae --- /dev/null +++ b/r/R/read_table.R @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Read an arrow::Table from a stream +#' +#' @param stream stream. Either a stream created by [ReadableFile()] or [mmap_open()] or a file path. +#' +#' @export +read_table <- function(stream){ + UseMethod("read_table") +} + +#' @export +read_table.character <- function(stream){ + assert_that(length(stream) == 1L) + read_table(fs::path_abs(stream)) +} + +#' @export +read_table.fs_path <- function(stream) { + stream <- close_on_exit(ReadableFile(stream)) + read_table(stream) +} + +#' @export +`read_table.arrow::io::RandomAccessFile` <- function(stream) { + reader <- RecordBatchFileReader(stream) + read_table(reader) +} + +#' @export +`read_table.arrow::ipc::RecordBatchFileReader` <- function(stream) { + shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(stream)) +} + +#' @export +`read_table.arrow::ipc::RecordBatchStreamReader` <- function(stream) { + shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(stream)) +} + +#' @export +`read_table.arrow::io::BufferReader` <- function(stream) { + reader <- RecordBatchStreamReader(stream) + read_table(reader) +} + +#' @export +`read_table.raw` <- function(stream) { + stream <- close_on_exit(BufferReader(stream)) + read_table(stream) +} + diff --git a/r/R/write_table.R b/r/R/write_table.R new file mode 100644 index 00000000000..44d0037212e --- /dev/null +++ b/r/R/write_table.R @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' write an arrow::Table +#' +#' @param x an `arrow::Table` +#' @param stream where to stream the record batch +#' @param ... extra parameters +#' +#' @export +write_table <- function(x, stream, ...) { + UseMethod("write_table", stream) +} + +#' @export +`write_table.arrow::io::OutputStream` <- function(x, stream, ...) { + stream_writer <- close_on_exit(RecordBatchStreamWriter(stream, x$schema())) + write_table(x, stream_writer) +} + +#' @export +`write_table.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){ + stream$WriteTable(x) +} + +#' @export +`write_table.character` <- function(x, stream, ...) { + assert_that(length(stream) == 1L) + write_table(x, fs::path_abs(stream), ...) +} + +#' @export +`write_table.fs_path` <- function(x, stream, ...) { + assert_that(length(stream) == 1L) + file_stream <- close_on_exit(FileOutputStream(stream)) + file_writer <- close_on_exit(RecordBatchFileWriter(file_stream, x$schema())) + write_table(x, file_writer, ...) +} + +#' @export +`write_table.raw` <- function(x, stream, ...) { + # how many bytes do we need + mock <- MockOutputStream() + write_table(x, mock) + n <- mock$GetExtentBytesWritten() + + bytes <- raw(n) + buffer <- buffer(bytes) + buffer_writer <- FixedSizeBufferWriter(buffer) + write_table(x, buffer_writer) + + bytes +} + +#' Write an object to a stream +#' +#' @param x An object to stream +#' @param stream A stream +#' @param ... additional parameters +#' +#' @export +write_arrow <- function(x, stream, ...){ + UseMethod("write_arrow") +} + +#' #' @export +#' `write_arrow.arrow::RecordBatch` <- function(x, stream, ...) { +#' write_record_batch(x, stream, ...) +#' } + +#' @export +`write_arrow.arrow::Table` <- function(x, stream, ...) { + write_table(x, stream, ...) +} + +#' @export +`write_arrow.data.frame` <- function(x, stream, ...) { + write_table(table(x), stream, ...) +} diff --git a/r/README.Rmd b/r/README.Rmd index 204a9f9d566..2c51d01c0f0 100644 --- a/r/README.Rmd +++ b/r/README.Rmd @@ -46,9 +46,9 @@ tf <- tempfile() # write arrow::Table to file (tib <- tibble(x = 1:10, y = rnorm(10))) -arrow::write_arrow(tib, tf) +# arrow::write_arrow(tib, tf) -# read it back with pyarrow -pa <- import("pyarrow") -as_tibble(pa$open_file(tf)$read_pandas()) +# # read it back with pyarrow +# pa <- import("pyarrow") +# as_tibble(pa$open_file(tf)$read_pandas()) ``` diff --git a/r/data-raw/test.R b/r/data-raw/test.R deleted file mode 100644 index 516af58616e..00000000000 --- a/r/data-raw/test.R +++ /dev/null @@ -1,85 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -library(tidyverse) -library(arrow) - -# meta data -(t1 <- int32()) -(t2 <- utf8()) -(t5 <- timestamp(unit = TimeUnit$MILLI)) - -# lists -list_of(t1) - -# shema -schema(x = int32(), y = float64()) - -# :scream_cat: -# -# pa.schema( -# [ -# pa.field('x', pa.int32()), -# pa.field('y', pa.float64()) -# ] -# ) -# - -schema(x = int32(), y = list_of(float64())) - -#------- arrays - -# arr = pa.array([1, 2, 3]) -arr <- array(1:3, 5:80) -arr -arr$as_vector() - -#------- read_arrow / stream -tbl <- tibble(x=1:10, y=rnorm(10)) -write_arrow(tbl, "/tmp/test.arrow") -readr::write_rds(tbl, "/tmp/test.rds") -fs::file_info(c("/tmp/test.arrow", "/tmp/test.rds")) - -(data <- read_arrow("/tmp/test.arrow")) - -# tibble <-> arrow::RecordBatch -(batch <- record_batch(tbl)) -batch$num_columns() -batch$num_rows() -write_arrow(batch, "/tmp/test") -readBin("/tmp/test", what = raw(), n = 1000) -batch$schema() -all.equal(tbl, data) - -batch <- read_record_batch("/tmp/test") -batch$schema() -batch$column(0) -batch$column(0)$as_vector() - -as_tibble(batch) - -# tibble <-> arrow::Table -tab <- arrow::table(tbl) -tab -tab$schema() -tab$num_columns() -tab$num_rows() - -# read_arrow, stream -tbl <- tibble(x = rnorm(20), y = seq_len(20)) -write_arrow(tbl, tf) - diff --git a/r/doc/arrow.R b/r/doc/arrow.R new file mode 100644 index 00000000000..4599092e7d7 --- /dev/null +++ b/r/doc/arrow.R @@ -0,0 +1,90 @@ +## ----setup, include = FALSE---------------------------------------------- +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +library(arrow, warn.conflicts = FALSE) + +## ---- eval = FALSE------------------------------------------------------- +# # install.packages("remotes") +# remotes::install_github("apache/arrow/r") + +## ------------------------------------------------------------------------ +library(arrow, warn.conflicts = FALSE) +t1 <- int32() +t2 <- utf8() +t5 <- timestamp(TimeUnit$MILLI) + +t1 +t2 +t5 + +## ------------------------------------------------------------------------ +t6 <- list_of(t1) +t6 + +## ------------------------------------------------------------------------ +t7 <- struct(s0 = int32(), s3 = list_of(int16())) +t7 + +## ------------------------------------------------------------------------ +s <- schema( + field0 = int32(), + field1 = utf8(), + field3 = list_of(int32()) +) +s + +## ------------------------------------------------------------------------ +a <- array(1:10) +a + +## ------------------------------------------------------------------------ +# TODO: should this be an active like in python ? +# a$type rather than a$type() +a$type() + +## ------------------------------------------------------------------------ +a$length() +length(a) + +# TODO: should this be an active like in python ? +# a$null_count rather than a$null_count() +a$null_count() + +## ------------------------------------------------------------------------ +f <- factor(c("a", "b"), levels = c("a", "b", "c")) +a <- array(f) +a$type() +a$indices() +a$dictionary() +a + +## ------------------------------------------------------------------------ +tbl <- tibble::tibble( + f0 = 1:4, + f1 = c("foo", "bar", "baz", NA), + f2 = c(TRUE, NA, FALSE, NA) +) +batch <- record_batch(tbl) +batch$num_columns() +batch$num_rows() + +# convert a record batch back to a tibble +as_tibble(batch) + +## ------------------------------------------------------------------------ +batch$Slice(2) +batch$Slice(2, 1) + +## ------------------------------------------------------------------------ +tab <- table(tbl) +tab +tab$num_columns() +tab$num_rows() + +## ------------------------------------------------------------------------ +tab$column(0L) +tab$column(0L)$data() +tab$column(0L)$data()$chunks() + diff --git a/r/doc/arrow.Rmd b/r/doc/arrow.Rmd new file mode 100644 index 00000000000..91ffe04213e --- /dev/null +++ b/r/doc/arrow.Rmd @@ -0,0 +1,236 @@ +--- +title: "arrow" +author: "Romain François" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{arrow} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +library(arrow, warn.conflicts = FALSE) +``` + +# Install arrow + +## CRAN + +`arrow` is not yet available from CRAN + +## Installing from source + +The `arrow` package is currently only available from github. To install the development version, you first need to +install the C++ library so that `pkg-config` finds it: + +```shell +git clone https://github.com/apache/arrow.git +cd arrow/cpp && mkdir release && cd release + +# It is important to statically link to boost libraries +cmake .. -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off +make install +``` + +Then you can install the R package with [remotes::install_github()](https://remotes.r-lib.org/reference/install_github.html) + +```{r, eval = FALSE} +# install.packages("remotes") +remotes::install_github("apache/arrow/r") +``` + +This will install the correct versions of packages `arrow` depend on. + +# Development + +## System Requirements + +## Environment Setup and Build + +## Build and test + +## Developing on Windows + +# Memory and IO Interfaces + +# Data Type and In-Memory Data Model + +Apache Arrow defines columnar array data structures by composing type metadata with memory buffers. These data +structures are exposed in R as a set of interrelated [R6](https://r6.r-lib.org) classes. + + - Type metadata: Instances of `arrow::DataType`, which describe a logical array type + - Schemas: Instances of `arrow::Schema`, which describes a named collection of types. These + can be thought of as the column types in a table-like object. + - Arrays: Instances of `arrow::Array`, which are atomic, contiguous columnar data structures + composed from `arrow::Buffer` objects. + - Record Batches: Instances of `arrow::RecordBatch` which are a collection of `Array` objects + with a particular schema + - Tables: Instances of `arrow::Table`, a logical table data structures in which each column + consists of one or more `Array` objects of the same logical type. + +## Type metadata + +Apache Arrow defines language agnostic column-oriented data structures for array data. These include: + + - Fixed-length primitive types: numbers, booleans, data and times, fixed size binary and other values + that fit into a given number + - Variable length primitive types: binary, string + - Nested types: list, struct and union + - Dictionary type: An encoded categorical type + +Each logical type in `arrow` has a corresponding factory function for creating an instance of that type object in R. + +```{r} +library(arrow, warn.conflicts = FALSE) +t1 <- int32() +t2 <- utf8() +t5 <- timestamp(TimeUnit$MILLI) + +t1 +t2 +t5 +``` + +We use the name *logical* type because the *physical* storage may be the same for one or more types. For +example `int64`, `float64` and `timestamp[ms]` all occupy 64 bites per value. + +These objects are *metadata*, they are used for describing the data in arrays, schemas and record batches. + + + +Arrow supports *nested value types* like list, struct, and union. The `list_of()` function is the +factory for list types. + +```{r} +t6 <- list_of(t1) +t6 +``` + +A `struct` is a collection of named fields : + +```{r} +t7 <- struct(s0 = int32(), s3 = list_of(int16())) +t7 +``` + +## Schemas + +The `arrow::Schema` type is similar to the `struct` array type, it defines the column names and types in a record +batch or table data structure. The `schema()` factory function makes new `arrow::Schema` objects in R: + +```{r} +s <- schema( + field0 = int32(), + field1 = utf8(), + field3 = list_of(int32()) +) +s +``` + +It is fairly rare to create schemas directly. + +## Arrays + +For each data type, there is an accompanying array data structure for holding memory buffers +that define a single contiguous chunk of columnar array data. + +The `array()` function can be used to create `arrow::Array` instances, although you would +typically manipulate arrays from record batches and tables. + +```{r} +a <- array(1:10) +a +``` + +The `$type()` method gives the corresponding piece of type metadata: + +```{r} +# TODO: should this be an active like in python ? +# a$type rather than a$type() +a$type() +``` + +Each in-memory array has a known length and null count: + +```{r} +a$length() +length(a) + +# TODO: should this be an active like in python ? +# a$null_count rather than a$null_count() +a$null_count() +``` + +## Handling of missing values + +## List Arrays + +## Struct Arrays + +## Union Arrays + +## Dictionary Arrays + +The `arrow::Dictionary` type is a special array that is similar to an R factor. The +`array()` factory converts R factors to the appropriate type of dictionary array: + +```{r} +f <- factor(c("a", "b"), levels = c("a", "b", "c")) +a <- array(f) +a$type() +a$indices() +a$dictionary() +a +``` + +## RecordBatch + +A *Record Batch* in Apache Arrow is a collection of equal length array instances. The `record_batch()` function +may be used to convert a data frame to a RecordBatch. + +```{r} +tbl <- tibble::tibble( + f0 = 1:4, + f1 = c("foo", "bar", "baz", NA), + f2 = c(TRUE, NA, FALSE, NA) +) +batch <- record_batch(tbl) +batch$num_columns() +batch$num_rows() + +# convert a record batch back to a tibble +as_tibble(batch) +``` + +A record batch can be sliced (0-based) : + +```{r} +batch$Slice(2) +batch$Slice(2, 1) +``` + +## Table + +A *Table* is a set of record batches of the same schema, it can be created with the `arrow::table()` function: + +```{r} +tab <- table(tbl) +tab +tab$num_columns() +tab$num_rows() +``` + +The table columns are instances of `arrow::Column`, which is a container for one or more arrays +of the same type. + +```{r} +tab$column(0L) +tab$column(0L)$data() +tab$column(0L)$data()$chunks() +``` + diff --git a/r/doc/arrow.html b/r/doc/arrow.html new file mode 100644 index 00000000000..c54d7f3b65c --- /dev/null +++ b/r/doc/arrow.html @@ -0,0 +1,540 @@ + + + + + + + + + + + + + + + + +arrow + + + + + + + + + + + + + + + + + +

arrow

+

Romain François

+

2018-11-16

+ + + +
+

Install arrow

+
+

CRAN

+

arrow is not yet available from CRAN

+
+
+

Installing from source

+

The arrow package is currently only available from github. To install the development version, you first need to install the C++ library so that pkg-config finds it:

+
git clone https://github.com/apache/arrow.git
+cd arrow/cpp && mkdir release && cd release
+
+# It is important to statically link to boost libraries
+cmake .. -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off
+make install
+

Then you can install the R package with remotes::install_github()

+ +

This will install the correct versions of packages arrow depend on.

+
+
+
+

Development

+
+

System Requirements

+
+
+

Environment Setup and Build

+
+
+

Build and test

+
+
+

Developing on Windows

+
+
+
+

Memory and IO Interfaces

+
+
+

Data Type and In-Memory Data Model

+

Apache Arrow defines columnar array data structures by composing type metadata with memory buffers. These data structures are exposed in R as a set of interrelated R6 classes.

+ +
+

Type metadata

+

Apache Arrow defines language agnostic column-oriented data structures for array data. These include:

+
    +
  • Fixed-length primitive types: numbers, booleans, data and times, fixed size binary and other values that fit into a given number
  • +
  • Variable length primitive types: binary, string
  • +
  • Nested types: list, struct and union
  • +
  • Dictionary type: An encoded categorical type
  • +
+

Each logical type in arrow has a corresponding factory function for creating an instance of that type object in R.

+ +

We use the name logical type because the physical storage may be the same for one or more types. For example int64, float64 and timestamp[ms] all occupy 64 bites per value.

+

These objects are metadata, they are used for describing the data in arrays, schemas and record batches.

+ +

Arrow supports nested value types like list, struct, and union. The list_of() function is the factory for list types.

+ +

A struct is a collection of named fields :

+ +
+
+

Schemas

+

The arrow::Schema type is similar to the struct array type, it defines the column names and types in a record batch or table data structure. The schema() factory function makes new arrow::Schema objects in R:

+ +

It is fairly rare to create schemas directly.

+
+
+

Arrays

+

For each data type, there is an accompanying array data structure for holding memory buffers that define a single contiguous chunk of columnar array data.

+

The array() function can be used to create arrow::Array instances, although you would typically manipulate arrays from record batches and tables.

+ +

The $type() method gives the corresponding piece of type metadata:

+ +

Each in-memory array has a known length and null count:

+ +
+
+

Handling of missing values

+
+
+

List Arrays

+
+
+

Struct Arrays

+
+
+

Union Arrays

+
+
+

Dictionary Arrays

+

The arrow::Dictionary type is a special array that is similar to an R factor. The array() factory converts R factors to the appropriate type of dictionary array:

+ +
+ +
+

Table

+

A Table is a set of record batches of the same schema, it can be created with the arrow::table() function:

+ +

The table columns are instances of arrow::Column, which is a container for one or more arrays of the same type.

+ +
+
+ + + + + + + + diff --git a/r/man/BufferOutputStream.Rd b/r/man/BufferOutputStream.Rd new file mode 100644 index 00000000000..1776f995930 --- /dev/null +++ b/r/man/BufferOutputStream.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{BufferOutputStream} +\alias{BufferOutputStream} +\title{Open a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream}} +\usage{ +BufferOutputStream(initial_capacity = 0L) +} +\arguments{ +\item{initial_capacity}{initial capacity} +} +\value{ +a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} +} +\description{ +Open a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} +} diff --git a/r/man/BufferReader.Rd b/r/man/BufferReader.Rd new file mode 100644 index 00000000000..d8f9d536270 --- /dev/null +++ b/r/man/BufferReader.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{BufferReader} +\alias{BufferReader} +\title{Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader}} +\usage{ +BufferReader(x) +} +\arguments{ +\item{buffer}{R object to treat as a buffer or a buffer created by \code{\link[=buffer]{buffer()}}} +} +\description{ +Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader} +} diff --git a/r/man/FileOutputStream.Rd b/r/man/FileOutputStream.Rd new file mode 100644 index 00000000000..4155d349d1a --- /dev/null +++ b/r/man/FileOutputStream.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{FileOutputStream} +\alias{FileOutputStream} +\title{Open a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream}} +\usage{ +FileOutputStream(path) +} +\arguments{ +\item{path}{file path} +} +\value{ +a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream} +} +\description{ +Open a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream} +} diff --git a/r/man/FixedSizeBufferWriter.Rd b/r/man/FixedSizeBufferWriter.Rd new file mode 100644 index 00000000000..553d61b76e1 --- /dev/null +++ b/r/man/FixedSizeBufferWriter.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{FixedSizeBufferWriter} +\alias{FixedSizeBufferWriter} +\title{Open a \link[=arrow__io__FixedSizeBufferWriter]{arrow::io::FixedSizeBufferWriter}} +\usage{ +FixedSizeBufferWriter(buffer) +} +\arguments{ +\item{buffer}{\link[=arrow__Buffer]{arrow::Buffer} or something \code{\link[=buffer]{buffer()}} can handle} +} +\value{ +a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} +} +\description{ +Open a \link[=arrow__io__FixedSizeBufferWriter]{arrow::io::FixedSizeBufferWriter} +} diff --git a/r/man/message_reader.Rd b/r/man/MessageReader.Rd similarity index 79% rename from r/man/message_reader.Rd rename to r/man/MessageReader.Rd index 0d8b1e7ff63..01589f5d078 100644 --- a/r/man/message_reader.Rd +++ b/r/man/MessageReader.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/message.R -\name{message_reader} -\alias{message_reader} +\name{MessageReader} +\alias{MessageReader} \title{Open a MessageReader that reads from a stream} \usage{ -message_reader(stream) +MessageReader(stream) } \arguments{ \item{stream}{an InputStream} diff --git a/r/man/MockOutputStream.Rd b/r/man/MockOutputStream.Rd new file mode 100644 index 00000000000..2e3c0b6d3e3 --- /dev/null +++ b/r/man/MockOutputStream.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{MockOutputStream} +\alias{MockOutputStream} +\title{Open a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream}} +\usage{ +MockOutputStream() +} +\value{ +a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream} +} +\description{ +Open a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream} +} diff --git a/r/man/ReadableFile.Rd b/r/man/ReadableFile.Rd new file mode 100644 index 00000000000..11535321bfb --- /dev/null +++ b/r/man/ReadableFile.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{ReadableFile} +\alias{ReadableFile} +\title{open a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile}} +\usage{ +ReadableFile(path) +} +\arguments{ +\item{path}{file path} +} +\value{ +a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile} +} +\description{ +open a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile} +} diff --git a/r/man/RecordBatchFileReader.Rd b/r/man/RecordBatchFileReader.Rd new file mode 100644 index 00000000000..3ea04817e0e --- /dev/null +++ b/r/man/RecordBatchFileReader.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchReader.R +\name{RecordBatchFileReader} +\alias{RecordBatchFileReader} +\title{Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} from a file} +\usage{ +RecordBatchFileReader(file) +} +\arguments{ +\item{file}{The file to read from. A file path, or an \link[=arrow__ipc__RecordBatchFileReader]{arrow::io::RandomAccessFile}} +} +\description{ +Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} from a file +} diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd new file mode 100644 index 00000000000..90858304b0b --- /dev/null +++ b/r/man/RecordBatchFileWriter.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchWriter.R +\name{RecordBatchFileWriter} +\alias{RecordBatchFileWriter} +\title{Create a record batch file writer from a stream} +\usage{ +RecordBatchFileWriter(sink, schema) +} +\arguments{ +\item{sink}{Where to write. Can either be: +\itemize{ +\item character vector of length one +\item a \link[fs:path_abs]{file path} +\item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} +}} + +\item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.} +} +\value{ +an \code{arrow::ipc::RecordBatchWriter} object +} +\description{ +Create a record batch file writer from a stream +} diff --git a/r/man/RecordBatchStreamReader.Rd b/r/man/RecordBatchStreamReader.Rd new file mode 100644 index 00000000000..4bd0e8ccdc5 --- /dev/null +++ b/r/man/RecordBatchStreamReader.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchReader.R +\name{RecordBatchStreamReader} +\alias{RecordBatchStreamReader} +\title{Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} from an input stream} +\usage{ +RecordBatchStreamReader(stream) +} +\arguments{ +\item{stream}{input stream, an \link[=arrow__io__InputStream]{arrow::io::InputStream} or a raw vector} +} +\description{ +Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} from an input stream +} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd new file mode 100644 index 00000000000..693e8046a0b --- /dev/null +++ b/r/man/RecordBatchStreamWriter.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchWriter.R +\name{RecordBatchStreamWriter} +\alias{RecordBatchStreamWriter} +\title{Writer for the Arrow streaming binary format} +\usage{ +RecordBatchStreamWriter(sink, schema) +} +\arguments{ +\item{sink}{Where to write. Can either be: +\itemize{ +\item A string, meant as a file path, passed to \link[fs:path_ab]{fs::path_ab} +\item a \link[fs:path_abs]{file path} +\item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} +}} + +\item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.} +} +\value{ +a \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} +} +\description{ +Writer for the Arrow streaming binary format +} diff --git a/r/man/array.Rd b/r/man/array.Rd index 38bd773be92..ccdba181db8 100644 --- a/r/man/array.Rd +++ b/r/man/array.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/array.R \name{array} \alias{array} -\title{create an arrow::Array from an R vector} +\title{create an \link[=arrow__Array]{arrow::Array} from an R vector} \usage{ array(..., type) } @@ -12,5 +12,5 @@ array(..., type) \item{type}{currently ignored} } \description{ -create an arrow::Array from an R vector +create an \link[=arrow__Array]{arrow::Array} from an R vector } diff --git a/r/man/arrow__Array.Rd b/r/man/arrow__Array.Rd new file mode 100644 index 00000000000..b11373d26b3 --- /dev/null +++ b/r/man/arrow__Array.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/array.R +\docType{class} +\name{arrow__Array} +\alias{arrow__Array} +\alias{arrow::Array} +\title{class arrow::Array + +Array base type. Immutable data array with some logical type and some length.} +\description{ +class arrow::Array + +Array base type. Immutable data array with some logical type and some length. +} +\section{Usage}{ +\preformatted{a <- array(...) + +a$IsNull(i) +a$IsValid(i) +a$length() or length(a) +a$offset() +a$null_count() +a$type() +a$type_id() +a$Equals(b) +a$ApproxEquals(b) +a$as_vector() +a$ToString() +a$Slice(offset, length = NULL) +a$RangeEquals(other, start_idx, end_idx, other_start_idx) + +print(a) +a == a +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$IsNull(i)}: Return true if value at index is null. Does not boundscheck +\item \code{$IsValid(i)}: Return true if value at index is valid. Does not boundscheck +\item \code{$length()}: Size in the number of elements this array contains +\item \code{$offset()}: A relative position into another array's data, to enable zero-copy slicing +\item \code{$null_count()}: The number of null entries in the array +\item \code{$type()}: logical type of data +\item \code{$type_id()}: type id +\item \code{$Equals(other)} : is this array equal to \code{other} +\item \code{$ApproxEquals(other)} : +\item \code{$data()}: return the underlying \link[=arrow__ArrayData]{arrow::ArrayData} +\item \code{$as_vector()}: convert to an R vector +\item \code{$ToString()}: string representation of the array +\item \code{$Slice(offset, length = NULL)} : Construct a zero-copy slice of the array with the indicated offset and length. If length is \code{NULL}, the slice goes until the end of the array. +\item \code{$RangeEquals(other, start_idx, end_idx, other_start_idx)} : +} +} + +\keyword{datasets} diff --git a/r/man/arrow__ArrayData.Rd b/r/man/arrow__ArrayData.Rd new file mode 100644 index 00000000000..bdf996605c5 --- /dev/null +++ b/r/man/arrow__ArrayData.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ArrayData.R +\docType{class} +\name{arrow__ArrayData} +\alias{arrow__ArrayData} +\alias{arrow::ArrayData} +\title{class arrow::ArrayData} +\description{ +class arrow::ArrayData +} +\section{Usage}{ +\preformatted{data <- array(...)$data() + +data$type() +data$length() +data$null_count() +data$offset() +data$buffers() +} +} + +\section{Methods}{ + + +... +} + +\keyword{datasets} diff --git a/r/man/arrow__Buffer.Rd b/r/man/arrow__Buffer.Rd new file mode 100644 index 00000000000..135da7a20e7 --- /dev/null +++ b/r/man/arrow__Buffer.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/buffer.R +\docType{class} +\name{arrow__Buffer} +\alias{arrow__Buffer} +\alias{arrow::Buffer} +\title{class arrow::Buffer} +\description{ +class arrow::Buffer +} +\section{Methods}{ + +\itemize{ +\item \code{$is_mutable()} : +\item \code{$ZeroPadding()} : +\item \code{$size()} : +\item \code{$capacity()}: +} +} + +\keyword{datasets} diff --git a/r/man/arrow__ChunkedArray.Rd b/r/man/arrow__ChunkedArray.Rd new file mode 100644 index 00000000000..a87bf1c0dcc --- /dev/null +++ b/r/man/arrow__ChunkedArray.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ChunkedArray.R +\docType{class} +\name{arrow__ChunkedArray} +\alias{arrow__ChunkedArray} +\alias{arrow::ChunkedArray} +\title{class arrow::ChunkedArray} +\description{ +class arrow::ChunkedArray +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__Column.Rd b/r/man/arrow__Column.Rd new file mode 100644 index 00000000000..6a0ee6a40a5 --- /dev/null +++ b/r/man/arrow__Column.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Column.R +\docType{class} +\name{arrow__Column} +\alias{arrow__Column} +\alias{arrow::Column} +\title{class arrow::Column} +\description{ +class arrow::Column +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__DataType.Rd b/r/man/arrow__DataType.Rd new file mode 100644 index 00000000000..53bd6327d91 --- /dev/null +++ b/r/man/arrow__DataType.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/R6.R +\docType{class} +\name{arrow__DataType} +\alias{arrow__DataType} +\alias{arrow::DataType} +\title{class arrow::DataType} +\description{ +class arrow::DataType +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__DictionaryType.Rd b/r/man/arrow__DictionaryType.Rd new file mode 100644 index 00000000000..ba462ee0114 --- /dev/null +++ b/r/man/arrow__DictionaryType.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dictionary.R +\docType{class} +\name{arrow__DictionaryType} +\alias{arrow__DictionaryType} +\alias{arrow::DictionaryType} +\title{class arrow::DictionaryType} +\description{ +class arrow::DictionaryType +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__Field.Rd b/r/man/arrow__Field.Rd new file mode 100644 index 00000000000..893a65aa08e --- /dev/null +++ b/r/man/arrow__Field.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Field.R +\docType{class} +\name{arrow__Field} +\alias{arrow__Field} +\alias{arrow::Field} +\title{class arrow::Field} +\description{ +class arrow::Field +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__FixedWidthType.Rd b/r/man/arrow__FixedWidthType.Rd new file mode 100644 index 00000000000..610a4003429 --- /dev/null +++ b/r/man/arrow__FixedWidthType.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/R6.R +\docType{class} +\name{arrow__FixedWidthType} +\alias{arrow__FixedWidthType} +\alias{arrow::FixedWidthType} +\title{class arrow::FixedWidthType} +\description{ +class arrow::FixedWidthType +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__RecordBatch.Rd b/r/man/arrow__RecordBatch.Rd new file mode 100644 index 00000000000..40ba6323ee0 --- /dev/null +++ b/r/man/arrow__RecordBatch.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatch.R +\docType{class} +\name{arrow__RecordBatch} +\alias{arrow__RecordBatch} +\alias{arrow::RecordBatch} +\title{class arrow::RecordBatch} +\description{ +class arrow::RecordBatch +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__RecordBatchReader.Rd b/r/man/arrow__RecordBatchReader.Rd new file mode 100644 index 00000000000..b3ccd3f1749 --- /dev/null +++ b/r/man/arrow__RecordBatchReader.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchReader.R +\docType{class} +\name{arrow__RecordBatchReader} +\alias{arrow__RecordBatchReader} +\alias{arrow::RecordBatchReader} +\title{class arrow::RecordBatchReader} +\description{ +class arrow::RecordBatchReader +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__Schema.Rd b/r/man/arrow__Schema.Rd new file mode 100644 index 00000000000..b657ff2c4a8 --- /dev/null +++ b/r/man/arrow__Schema.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Schema.R +\docType{class} +\name{arrow__Schema} +\alias{arrow__Schema} +\alias{arrow::Schema} +\title{class arrow::Schema} +\description{ +class arrow::Schema +} +\section{Usage}{ +\preformatted{s <- schema(...) + +s$ToString() +s$num_fields() +s$field(i) +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$ToString()}: convert to a string +\item \code{$num_fields()}: returns the number of fields +\item \code{$field(i)}: returns the field at index \code{i} (0-based) +} +} + +\keyword{datasets} diff --git a/r/man/arrow___MemoryPool.Rd b/r/man/arrow___MemoryPool.Rd new file mode 100644 index 00000000000..9189e8be4a3 --- /dev/null +++ b/r/man/arrow___MemoryPool.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/memory_pool.R +\docType{class} +\name{arrow__MemoryPool} +\alias{arrow__MemoryPool} +\alias{arrow::MemoryPool} +\title{class arrow::MemoryPool} +\description{ +class arrow::MemoryPool +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__BufferOutputStream.Rd b/r/man/arrow__io__BufferOutputStream.Rd new file mode 100644 index 00000000000..ade78ca276d --- /dev/null +++ b/r/man/arrow__io__BufferOutputStream.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__BufferOutputStream} +\alias{arrow__io__BufferOutputStream} +\alias{arrow::io::BufferOutputStream} +\title{class arrow::io::BufferOutputStream} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::BufferOutputStream +} +\description{ +class arrow::io::BufferOutputStream +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__BufferReader.Rd b/r/man/arrow__io__BufferReader.Rd new file mode 100644 index 00000000000..42ee6bba94c --- /dev/null +++ b/r/man/arrow__io__BufferReader.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__BufferReader} +\alias{arrow__io__BufferReader} +\alias{arrow::io::BufferReader} +\title{class arrow::io::BufferReader} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::BufferReader +} +\description{ +class arrow::io::BufferReader +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__FileOutputStream.Rd b/r/man/arrow__io__FileOutputStream.Rd new file mode 100644 index 00000000000..429ab8dadbd --- /dev/null +++ b/r/man/arrow__io__FileOutputStream.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__FileOutputStream} +\alias{arrow__io__FileOutputStream} +\alias{arrow::io::FileOutputStream} +\title{class arrow::io::FileOutputStream} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::FileOutputStream +} +\description{ +class arrow::io::FileOutputStream +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__FixedSizeBufferWriter.Rd b/r/man/arrow__io__FixedSizeBufferWriter.Rd new file mode 100644 index 00000000000..ca0ec4a5b01 --- /dev/null +++ b/r/man/arrow__io__FixedSizeBufferWriter.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__FixedSizeBufferWriter} +\alias{arrow__io__FixedSizeBufferWriter} +\alias{arrow::io::FixedSizeBufferWriter} +\title{class arrow::io::FixedSizeBufferWriter} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::FixedSizeBufferWriter +} +\description{ +class arrow::io::FixedSizeBufferWriter +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__InputStream.Rd b/r/man/arrow__io__InputStream.Rd new file mode 100644 index 00000000000..a18fe7af344 --- /dev/null +++ b/r/man/arrow__io__InputStream.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__InputStream} +\alias{arrow__io__InputStream} +\alias{arrow::io::InputStream} +\title{class arrow::io::InputStream} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::InputStream +} +\description{ +class arrow::io::InputStream +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__MemoryMappedFile.Rd b/r/man/arrow__io__MemoryMappedFile.Rd new file mode 100644 index 00000000000..cbec49e52a6 --- /dev/null +++ b/r/man/arrow__io__MemoryMappedFile.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__MemoryMappedFile} +\alias{arrow__io__MemoryMappedFile} +\alias{arrow::io::MemoryMappedFile} +\title{class arrow::io::MemoryMappedFile} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::MemoryMappedFile +} +\description{ +class arrow::io::MemoryMappedFile +} +\section{Methods}{ + + +TODO +} + +\seealso{ +\code{\link[=mmap_open]{mmap_open()}}, \code{\link[=mmap_create]{mmap_create()}} +} +\keyword{datasets} diff --git a/r/man/arrow__io__MockOutputStream.Rd b/r/man/arrow__io__MockOutputStream.Rd new file mode 100644 index 00000000000..aa1fe0df55b --- /dev/null +++ b/r/man/arrow__io__MockOutputStream.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__MockOutputStream} +\alias{arrow__io__MockOutputStream} +\alias{arrow::io::MockOutputStream} +\title{class arrow::io::MockOutputStream} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::MockOutputStream +} +\description{ +class arrow::io::MockOutputStream +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__OutputStream.Rd b/r/man/arrow__io__OutputStream.Rd new file mode 100644 index 00000000000..5ff980491e9 --- /dev/null +++ b/r/man/arrow__io__OutputStream.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__OutputStream} +\alias{arrow__io__OutputStream} +\alias{arrow::io::OutputStream} +\title{OutputStream} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::OutputStream +} +\description{ +OutputStream +} +\section{Methods}{ + +\itemize{ +\item \code{arrow::Buffer} \code{Read}(\code{int} nbytes): Read \code{nbytes} bytes +\item \code{void} \code{Close}(): close the stream +} +} + +\keyword{datasets} diff --git a/r/man/arrow__io__RandomAccessFile.Rd b/r/man/arrow__io__RandomAccessFile.Rd new file mode 100644 index 00000000000..55a308e0994 --- /dev/null +++ b/r/man/arrow__io__RandomAccessFile.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__RandomAccessFile} +\alias{arrow__io__RandomAccessFile} +\alias{arrow::io::RandomAccessFile} +\title{class arrow::io::RandomAccessFile} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::RandomAccessFile +} +\description{ +class arrow::io::RandomAccessFile +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__Readable.Rd b/r/man/arrow__io__Readable.Rd new file mode 100644 index 00000000000..a0b6af95eec --- /dev/null +++ b/r/man/arrow__io__Readable.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__Readable} +\alias{arrow__io__Readable} +\alias{arrow::io::Readable} +\title{class arrow::io::Readable} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::Readable +} +\description{ +class arrow::io::Readable +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__ReadableFile.Rd b/r/man/arrow__io__ReadableFile.Rd new file mode 100644 index 00000000000..a8a6783eef6 --- /dev/null +++ b/r/man/arrow__io__ReadableFile.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{data} +\name{arrow__io__ReadableFile} +\alias{arrow__io__ReadableFile} +\alias{arrow::io::ReadableFile} +\title{class arrow::io::ReadableFile} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\usage{ +arrow::io::ReadableFile +} +\description{ +class arrow::io::ReadableFile +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__Message.Rd b/r/man/arrow__ipc__Message.Rd new file mode 100644 index 00000000000..d3811f8f4c1 --- /dev/null +++ b/r/man/arrow__ipc__Message.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/message.R +\docType{class} +\name{arrow__ipc__Message} +\alias{arrow__ipc__Message} +\alias{arrow::ipc::Message} +\title{class arrow::ipc::Message} +\description{ +class arrow::ipc::Message +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__MessageReader.Rd b/r/man/arrow__ipc__MessageReader.Rd new file mode 100644 index 00000000000..883e9e0618b --- /dev/null +++ b/r/man/arrow__ipc__MessageReader.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/message.R +\docType{class} +\name{arrow__ipc__MessageReader} +\alias{arrow__ipc__MessageReader} +\alias{arrow::ipc::MessageReader} +\title{class arrow::ipc::MessageReader} +\description{ +class arrow::ipc::MessageReader +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchFileReader.Rd b/r/man/arrow__ipc__RecordBatchFileReader.Rd new file mode 100644 index 00000000000..675f636b365 --- /dev/null +++ b/r/man/arrow__ipc__RecordBatchFileReader.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchReader.R +\docType{class} +\name{arrow__ipc__RecordBatchFileReader} +\alias{arrow__ipc__RecordBatchFileReader} +\alias{arrow::ipc::RecordBatchFileReader} +\title{class arrow::ipc::RecordBatchFileReader} +\description{ +class arrow::ipc::RecordBatchFileReader +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchFileWriter.Rd b/r/man/arrow__ipc__RecordBatchFileWriter.Rd new file mode 100644 index 00000000000..9f51fe4bfea --- /dev/null +++ b/r/man/arrow__ipc__RecordBatchFileWriter.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchWriter.R +\docType{class} +\name{arrow__ipc__RecordBatchFileWriter} +\alias{arrow__ipc__RecordBatchFileWriter} +\alias{arrow::ipc::RecordBatchFileWriter} +\title{class arrow::ipc::RecordBatchFileWriter + +Writer for the Arrow binary file format} +\description{ +class arrow::ipc::RecordBatchFileWriter + +Writer for the Arrow binary file format +} +\section{usage}{ +\preformatted{writer <- RecordBatchFileWriter(sink, schema) + +writer$WriteRecordBatch(batch) +writer$WriteTable(table) +writer$Close() +} +} + +\section{Factory}{ + + +The \code{\link[=RecordBatchFileWriter]{RecordBatchFileWriter()}} function creates a record batch stream writer. +} + +\section{Methods}{ + +inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter} +\itemize{ +\item \code{$WriteRecordBatch(batch)}: Write record batch to stream +\item \code{$WriteTable(table)}: write Table to stream +\item \code{$Close()}: close stream +} +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchStreamReader.Rd b/r/man/arrow__ipc__RecordBatchStreamReader.Rd new file mode 100644 index 00000000000..49f57cce057 --- /dev/null +++ b/r/man/arrow__ipc__RecordBatchStreamReader.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchReader.R +\docType{class} +\name{arrow__ipc__RecordBatchStreamReader} +\alias{arrow__ipc__RecordBatchStreamReader} +\alias{arrow::ipc::RecordBatchStreamReader} +\title{class arrow::ipc::RecordBatchStreamReader} +\description{ +class arrow::ipc::RecordBatchStreamReader +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd new file mode 100644 index 00000000000..f84a27bc19a --- /dev/null +++ b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchWriter.R +\docType{class} +\name{arrow__ipc__RecordBatchStreamWriter} +\alias{arrow__ipc__RecordBatchStreamWriter} +\alias{arrow::ipc::RecordBatchStreamWriter} +\title{class arrow::ipc::RecordBatchStreamWriter + +Writer for the Arrow streaming binary format} +\description{ +class arrow::ipc::RecordBatchStreamWriter + +Writer for the Arrow streaming binary format +} +\section{usage}{ +\preformatted{writer <- RecordBatchStreamWriter(sink, schema) + +writer$WriteRecordBatch(batch) +writer$WriteTable(table) +writer$Close() +} +} + +\section{Factory}{ + + +The \code{\link[=RecordBatchStreamWriter]{RecordBatchStreamWriter()}} function creates a record batch stream writer. +} + +\section{Methods}{ + +inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter} +\itemize{ +\item \code{$WriteRecordBatch(batch)}: Write record batch to stream +\item \code{$WriteTable(table)}: write Table to stream +\item \code{$Close()}: close stream +} +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchWriter.Rd b/r/man/arrow__ipc__RecordBatchWriter.Rd new file mode 100644 index 00000000000..632fc6b3f8b --- /dev/null +++ b/r/man/arrow__ipc__RecordBatchWriter.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchWriter.R +\docType{class} +\name{arrow__ipc__RecordBatchWriter} +\alias{arrow__ipc__RecordBatchWriter} +\alias{arrow::ipc::RecordBatchWriter} +\title{class arrow::ipc::RecordBatchWriter} +\description{ +class arrow::ipc::RecordBatchWriter +} +\section{Methods}{ + +\itemize{ +\item \code{$WriteRecordBatch(batch)}: Write record batch to stream +\item \code{$WriteTable(table)}: write Table to stream +\item \code{$Close()}: close stream +} +} + +\section{Derived classes}{ + +\itemize{ +\item \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} implements the streaming binary format +\item \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter} implements the binary file format +} +} + +\keyword{datasets} diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd index 4d4e97e47d8..60fd25d4bf1 100644 --- a/r/man/buffer.Rd +++ b/r/man/buffer.Rd @@ -2,16 +2,16 @@ % Please edit documentation in R/buffer.R \name{buffer} \alias{buffer} -\title{Create a buffer from an R object} +\title{Create a \link[=arrow__Buffer]{arrow::Buffer} from an R object} \usage{ buffer(x) } \arguments{ -\item{x}{R object} +\item{x}{R object. Only raw, numeric and integer vectors are currently supported} } \value{ -an instance of \code{arrow::Buffer} that borrows memory from \code{x} +an instance of \link[=arrow__Buffer]{arrow::Buffer} that borrows memory from \code{x} } \description{ -Create a buffer from an R object +Create a \link[=arrow__Buffer]{arrow::Buffer} from an R object } diff --git a/r/man/buffer_reader.Rd b/r/man/buffer_reader.Rd deleted file mode 100644 index 3b814fb00b1..00000000000 --- a/r/man/buffer_reader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{buffer_reader} -\alias{buffer_reader} -\title{Create a \code{arrow::BufferReader}} -\usage{ -buffer_reader(x) -} -\arguments{ -\item{x}{R object to treat as a buffer or a buffer created by \code{\link[=buffer]{buffer()}}} -} -\description{ -Create a \code{arrow::BufferReader} -} diff --git a/r/man/chunked_array.Rd b/r/man/chunked_array.Rd index 1f4fb836143..c6973be7210 100644 --- a/r/man/chunked_array.Rd +++ b/r/man/chunked_array.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ChunkedArray.R \name{chunked_array} \alias{chunked_array} -\title{create an arrow::Array from an R vector} +\title{create an \link[=arrow__ChunkedArray]{arrow::ChunkedArray} from various R vectors} \usage{ chunked_array(..., type) } @@ -12,5 +12,5 @@ chunked_array(..., type) \item{type}{currently ignored} } \description{ -create an arrow::Array from an R vector +create an \link[=arrow__ChunkedArray]{arrow::ChunkedArray} from various R vectors } diff --git a/r/man/default_memory_pool.Rd b/r/man/default_memory_pool.Rd new file mode 100644 index 00000000000..1725ff0e10a --- /dev/null +++ b/r/man/default_memory_pool.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/memory_pool.R +\name{default_memory_pool} +\alias{default_memory_pool} +\title{default \link[=arrow__MemoryPool]{arrow::MemoryPool}} +\usage{ +default_memory_pool() +} +\value{ +the default \link[=arrow__MemoryPool]{arrow::MemoryPool} +} +\description{ +default \link[=arrow__MemoryPool]{arrow::MemoryPool} +} diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd index 2a7989648b0..340283ec4da 100644 --- a/r/man/dictionary.Rd +++ b/r/man/dictionary.Rd @@ -13,6 +13,9 @@ dictionary(type, values, ordered = FALSE) \item{ordered}{Is this an ordered dictionary} } +\value{ +a \link[=arrow__DictionaryType]{arrow::DictionaryType} +} \description{ dictionary type factory } diff --git a/r/man/field.Rd b/r/man/field.Rd index e7af66db290..5cbd8033875 100644 --- a/r/man/field.Rd +++ b/r/man/field.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/Field.R \name{field} \alias{field} -\title{Factor for a \code{arrow::Field}} +\title{Factory for a \code{arrow::Field}} \usage{ field(name, type, metadata) } @@ -14,7 +14,7 @@ field(name, type, metadata) \item{metadata}{currently ignored} } \description{ -Factor for a \code{arrow::Field} +Factory for a \code{arrow::Field} } \examples{ field("x", int32()) diff --git a/r/man/io.Rd b/r/man/io.Rd deleted file mode 100644 index 74817bf88a3..00000000000 --- a/r/man/io.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{mmap_create} -\alias{mmap_create} -\alias{mmap_open} -\alias{file_open} -\alias{file_output_stream} -\alias{mock_output_stream} -\alias{buffer_output_stream} -\alias{fixed_size_buffer_writer} -\title{Create a new read/write memory mapped file of a given size} -\usage{ -mmap_create(path, size) - -mmap_open(path, mode = c("read", "write", "readwrite")) - -file_open(path) - -file_output_stream(path) - -mock_output_stream() - -buffer_output_stream(initial_capacity = 0L) - -fixed_size_buffer_writer(buffer) -} -\arguments{ -\item{path}{file path} - -\item{size}{size in bytes} - -\item{mode}{file mode (read/write/readwrite)} - -\item{initial_capacity}{initial capacity for the buffer output stream} - -\item{buffer}{an \code{arrow::Buffer}, typically created by \code{\link[=buffer]{buffer()}}} -} -\description{ -Create a new read/write memory mapped file of a given size -} diff --git a/r/man/mmap_create.Rd b/r/man/mmap_create.Rd new file mode 100644 index 00000000000..050ae18c76f --- /dev/null +++ b/r/man/mmap_create.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{mmap_create} +\alias{mmap_create} +\title{Create a new read/write memory mapped file of a given size} +\usage{ +mmap_create(path, size) +} +\arguments{ +\item{path}{file path} + +\item{size}{size in bytes} +} +\value{ +a \link[=arrow__io__MemoryMappedFile]{arrow::io::MemoryMappedFile} +} +\description{ +Create a new read/write memory mapped file of a given size +} diff --git a/r/man/mmap_open.Rd b/r/man/mmap_open.Rd new file mode 100644 index 00000000000..d0047a72c38 --- /dev/null +++ b/r/man/mmap_open.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{mmap_open} +\alias{mmap_open} +\title{Open a memory mapped file} +\usage{ +mmap_open(path, mode = c("read", "write", "readwrite")) +} +\arguments{ +\item{path}{file path} + +\item{mode}{file mode (read/write/readwrite)} +} +\description{ +Open a memory mapped file +} diff --git a/r/man/read_record_batch.Rd b/r/man/read_record_batch.Rd index 4ca048f28ec..fef12cbac4a 100644 --- a/r/man/read_record_batch.Rd +++ b/r/man/read_record_batch.Rd @@ -1,19 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R +% Please edit documentation in R/read_record_batch.R \name{read_record_batch} \alias{read_record_batch} -\title{Read a single record batch from a stream} +\title{read \link[=arrow__RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=arrow__Schema]{arrow::Schema}} \usage{ -read_record_batch(stream, ...) +read_record_batch(obj, schema) } \arguments{ -\item{stream}{input stream} +\item{obj}{a \link[=arrow__ipc__Message]{arrow::ipc::Message}, a \link[=arrow__io__InputStream]{arrow::io::InputStream}, a \link[=arrow__Buffer]{arrow::Buffer}, or a raw vector} -\item{...}{additional parameters} +\item{schema}{a \link[=arrow__Schema]{arrow::Schema}} } -\description{ -Read a single record batch from a stream +\value{ +a \link[=arrow__RecordBatch]{arrow::RecordBatch} } -\details{ -\code{stream} can be a \code{arrow::io::RandomAccessFile} stream as created by \code{\link[=file_open]{file_open()}} or \code{\link[=mmap_open]{mmap_open()}} or a path. +\description{ +read \link[=arrow__RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=arrow__Schema]{arrow::Schema} } diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index f851057e8a7..a9fc3078740 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R +% Please edit documentation in R/read_table.R \name{read_table} \alias{read_table} \title{Read an arrow::Table from a stream} @@ -7,7 +7,7 @@ read_table(stream) } \arguments{ -\item{stream}{stream. Either a stream created by \code{\link[=file_open]{file_open()}} or \code{\link[=mmap_open]{mmap_open()}} or a file path.} +\item{stream}{stream. Either a stream created by \code{\link[=ReadableFile]{ReadableFile()}} or \code{\link[=mmap_open]{mmap_open()}} or a file path.} } \description{ Read an arrow::Table from a stream diff --git a/r/man/record_batch.Rd b/r/man/record_batch.Rd index e108d64b46a..4567a9ab763 100644 --- a/r/man/record_batch.Rd +++ b/r/man/record_batch.Rd @@ -2,13 +2,16 @@ % Please edit documentation in R/RecordBatch.R \name{record_batch} \alias{record_batch} -\title{Create an arrow::RecordBatch from a data frame} +\title{Create an \link[=arrow__RecordBatch]{arrow::RecordBatch} from a data frame} \usage{ record_batch(.data) } \arguments{ \item{.data}{a data frame} } +\value{ +a \link[=arrow__RecordBatch]{arrow::RecordBatch} +} \description{ -Create an arrow::RecordBatch from a data frame +Create an \link[=arrow__RecordBatch]{arrow::RecordBatch} from a data frame } diff --git a/r/man/record_batch_file_reader.Rd b/r/man/record_batch_file_reader.Rd deleted file mode 100644 index b7e211dfbc2..00000000000 --- a/r/man/record_batch_file_reader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\name{record_batch_file_reader} -\alias{record_batch_file_reader} -\title{Create an \code{arrow::ipc::RecordBatchFileReader} from a file} -\usage{ -record_batch_file_reader(file) -} -\arguments{ -\item{file}{The file to read from} -} -\description{ -Create an \code{arrow::ipc::RecordBatchFileReader} from a file -} diff --git a/r/man/record_batch_file_writer.Rd b/r/man/record_batch_file_writer.Rd deleted file mode 100644 index b7dcb0c39e4..00000000000 --- a/r/man/record_batch_file_writer.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{record_batch_file_writer} -\alias{record_batch_file_writer} -\title{Create a record batch file writer from a stream} -\usage{ -record_batch_file_writer(stream, schema) -} -\arguments{ -\item{stream}{a stream} - -\item{schema}{the schema of the batches} -} -\value{ -an \code{arrow::ipc::RecordBatchWriter} object -} -\description{ -Create a record batch file writer from a stream -} diff --git a/r/man/record_batch_stream_reader.Rd b/r/man/record_batch_stream_reader.Rd deleted file mode 100644 index 018045f6a32..00000000000 --- a/r/man/record_batch_stream_reader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\name{record_batch_stream_reader} -\alias{record_batch_stream_reader} -\title{Create a \code{arrow::ipc::RecordBatchStreamReader} from an input stream} -\usage{ -record_batch_stream_reader(stream) -} -\arguments{ -\item{stream}{input stream} -} -\description{ -Create a \code{arrow::ipc::RecordBatchStreamReader} from an input stream -} diff --git a/r/man/record_batch_stream_writer.Rd b/r/man/record_batch_stream_writer.Rd deleted file mode 100644 index d720d50d3a7..00000000000 --- a/r/man/record_batch_stream_writer.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{record_batch_stream_writer} -\alias{record_batch_stream_writer} -\title{Create a record batch stream writer} -\usage{ -record_batch_stream_writer(stream, schema) -} -\arguments{ -\item{stream}{a stream} - -\item{schema}{a schema} -} -\description{ -Create a record batch stream writer -} diff --git a/r/man/schema.Rd b/r/man/schema.Rd index 9b77d47b613..ad3bcb1f4e0 100644 --- a/r/man/schema.Rd +++ b/r/man/schema.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/Schema.R \name{schema} \alias{schema} -\title{Schema functions} +\title{Schema factory} \usage{ schema(...) } @@ -10,8 +10,8 @@ schema(...) \item{...}{named list of data types} } \value{ -a Schema +a \link[=arrow__Schema]{schema} } \description{ -Schema functions +Schema factory } diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index 42b39f1d051..5e0d13f304d 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R +% Please edit documentation in R/write_table.R \name{write_arrow} \alias{write_arrow} \title{Write an object to a stream} diff --git a/r/man/write_arrow.arrow-colon-colon-Table.Rd b/r/man/write_arrow.arrow-colon-colon-Table.Rd new file mode 100644 index 00000000000..b499f97e4bc --- /dev/null +++ b/r/man/write_arrow.arrow-colon-colon-Table.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_table.R +\name{write_arrow.arrow::Table} +\alias{write_arrow.arrow::Table} +\title{#' @export +\code{write_arrow.arrow::RecordBatch} <- function(x, stream, ...) { +write_record_batch(x, stream, ...) +}} +\usage{ +\method{write_arrow}{arrow::Table}(x, stream, ...) +} +\description{ +#' @export +\code{write_arrow.arrow::RecordBatch} <- function(x, stream, ...) { +write_record_batch(x, stream, ...) +} +} diff --git a/r/man/write_record_batch.Rd b/r/man/write_record_batch.Rd deleted file mode 100644 index afc3363f0df..00000000000 --- a/r/man/write_record_batch.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{write_record_batch} -\alias{write_record_batch} -\title{write a record batch} -\usage{ -write_record_batch(x, stream, ...) -} -\arguments{ -\item{x}{a \code{arrow::RecordBatch}} - -\item{stream}{where to stream the record batch} - -\item{...}{extra parameters} -} -\description{ -write a record batch -} diff --git a/r/man/write_table.Rd b/r/man/write_table.Rd index a247870ec01..33633fd5a39 100644 --- a/r/man/write_table.Rd +++ b/r/man/write_table.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R +% Please edit documentation in R/write_table.R \name{write_table} \alias{write_table} \title{write an arrow::Table} diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index 2c549ad1b90..95e5dc7b5d0 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -1859,6 +1859,29 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// ipc___SerializeRecordBatch__Raw +RawVector ipc___SerializeRecordBatch__Raw(const std::shared_ptr& batch); +RcppExport SEXP _arrow_ipc___SerializeRecordBatch__Raw(SEXP batchSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type batch(batchSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___SerializeRecordBatch__Raw(batch)); + return rcpp_result_gen; +END_RCPP +} +// ipc___ReadRecordBatch__InputStream__Schema +std::shared_ptr ipc___ReadRecordBatch__InputStream__Schema(const std::shared_ptr& stream, const std::shared_ptr& schema); +RcppExport SEXP _arrow_ipc___ReadRecordBatch__InputStream__Schema(SEXP streamSEXP, SEXP schemaSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type stream(streamSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr& >::type schema(schemaSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___ReadRecordBatch__InputStream__Schema(stream, schema)); + return rcpp_result_gen; +END_RCPP +} // RecordBatchReader__schema std::shared_ptr RecordBatchReader__schema(const std::shared_ptr& reader); RcppExport SEXP _arrow_RecordBatchReader__schema(SEXP readerSEXP) { @@ -1959,39 +1982,14 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// ipc___RecordBatchFileWriter__Open -std::shared_ptr ipc___RecordBatchFileWriter__Open(const std::shared_ptr& stream, const std::shared_ptr& schema); -RcppExport SEXP _arrow_ipc___RecordBatchFileWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr& >::type stream(streamSEXP); - Rcpp::traits::input_parameter< const std::shared_ptr& >::type schema(schemaSEXP); - rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchFileWriter__Open(stream, schema)); - return rcpp_result_gen; -END_RCPP -} -// ipc___RecordBatchStreamWriter__Open -std::shared_ptr ipc___RecordBatchStreamWriter__Open(const std::shared_ptr& stream, const std::shared_ptr& schema); -RcppExport SEXP _arrow_ipc___RecordBatchStreamWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr& >::type stream(streamSEXP); - Rcpp::traits::input_parameter< const std::shared_ptr& >::type schema(schemaSEXP); - rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchStreamWriter__Open(stream, schema)); - return rcpp_result_gen; -END_RCPP -} // ipc___RecordBatchWriter__WriteRecordBatch -void ipc___RecordBatchWriter__WriteRecordBatch(const std::shared_ptr& batch_writer, const std::shared_ptr& batch, bool allow_64bit); -RcppExport SEXP _arrow_ipc___RecordBatchWriter__WriteRecordBatch(SEXP batch_writerSEXP, SEXP batchSEXP, SEXP allow_64bitSEXP) { +void ipc___RecordBatchWriter__WriteRecordBatch(const std::shared_ptr& batch_writer, const std::shared_ptr& batch); +RcppExport SEXP _arrow_ipc___RecordBatchWriter__WriteRecordBatch(SEXP batch_writerSEXP, SEXP batchSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const std::shared_ptr& >::type batch_writer(batch_writerSEXP); Rcpp::traits::input_parameter< const std::shared_ptr& >::type batch(batchSEXP); - Rcpp::traits::input_parameter< bool >::type allow_64bit(allow_64bitSEXP); - ipc___RecordBatchWriter__WriteRecordBatch(batch_writer, batch, allow_64bit); + ipc___RecordBatchWriter__WriteRecordBatch(batch_writer, batch); return R_NilValue; END_RCPP } @@ -2016,6 +2014,30 @@ BEGIN_RCPP return R_NilValue; END_RCPP } +// ipc___RecordBatchFileWriter__Open +std::shared_ptr ipc___RecordBatchFileWriter__Open(const std::shared_ptr& stream, const std::shared_ptr& schema); +RcppExport SEXP _arrow_ipc___RecordBatchFileWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type stream(streamSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr& >::type schema(schemaSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchFileWriter__Open(stream, schema)); + return rcpp_result_gen; +END_RCPP +} +// ipc___RecordBatchStreamWriter__Open +std::shared_ptr ipc___RecordBatchStreamWriter__Open(const std::shared_ptr& stream, const std::shared_ptr& schema); +RcppExport SEXP _arrow_ipc___RecordBatchStreamWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type stream(streamSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr& >::type schema(schemaSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchStreamWriter__Open(stream, schema)); + return rcpp_result_gen; +END_RCPP +} // Table__from_dataframe std::shared_ptr Table__from_dataframe(DataFrame tbl); RcppExport SEXP _arrow_Table__from_dataframe(SEXP tblSEXP) { @@ -2251,6 +2273,8 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1}, {"_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2}, {"_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3}, + {"_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1}, + {"_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2}, {"_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, {"_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, {"_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, @@ -2260,11 +2284,11 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, {"_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, {"_arrow_Table__from_RecordBatchStreamReader", (DL_FUNC) &_arrow_Table__from_RecordBatchStreamReader, 1}, - {"_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 2}, - {"_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 2}, - {"_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 3}, + {"_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, {"_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2}, {"_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, + {"_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 2}, + {"_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 2}, {"_arrow_Table__from_dataframe", (DL_FUNC) &_arrow_Table__from_dataframe, 1}, {"_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1}, {"_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index 829ad45eadb..2787cf2aa54 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -120,3 +120,30 @@ std::shared_ptr RecordBatch__Slice2( const std::shared_ptr& self, int offset, int length) { return self->Slice(offset, length); } + +// [[Rcpp::export]] +RawVector ipc___SerializeRecordBatch__Raw(const std::shared_ptr& batch) { + // how many bytes do we need ? + int64_t size; + STOP_IF_NOT_OK(arrow::ipc::GetRecordBatchSize(*batch, &size)); + + // allocate the result raw vector + RawVector out(no_init(size)); + + // serialize into the bytes of the raw vector + auto buffer = std::make_shared>(out); + arrow::io::FixedSizeBufferWriter stream(buffer); + STOP_IF_NOT_OK(arrow::ipc::SerializeRecordBatch(*batch, arrow::default_memory_pool(), &stream)); + STOP_IF_NOT_OK(stream.Close()); + + return out; +} + +// [[Rcpp::export]] +std::shared_ptr ipc___ReadRecordBatch__InputStream__Schema( + const std::shared_ptr& stream, + const std::shared_ptr& schema) { + std::shared_ptr batch; + STOP_IF_NOT_OK(arrow::ipc::ReadRecordBatch(schema, stream.get(), &batch)); + return batch; +} diff --git a/r/src/recordbatchwriter.cpp b/r/src/recordbatchwriter.cpp index f86c474fec3..f766d7cfadc 100644 --- a/r/src/recordbatchwriter.cpp +++ b/r/src/recordbatchwriter.cpp @@ -17,6 +17,29 @@ #include "arrow_types.h" + + +// [[Rcpp::export]] +void ipc___RecordBatchWriter__WriteRecordBatch( + const std::shared_ptr& batch_writer, + const std::shared_ptr& batch) { + STOP_IF_NOT_OK(batch_writer->WriteRecordBatch(*batch, true)); +} + +// [[Rcpp::export]] +void ipc___RecordBatchWriter__WriteTable( + const std::shared_ptr& batch_writer, + const std::shared_ptr& table) { + STOP_IF_NOT_OK(batch_writer->WriteTable(*table)); +} + +// [[Rcpp::export]] +void ipc___RecordBatchWriter__Close( + const std::shared_ptr& batch_writer) { + STOP_IF_NOT_OK(batch_writer->Close()); +} + + // [[Rcpp::export]] std::shared_ptr ipc___RecordBatchFileWriter__Open( const std::shared_ptr& stream, @@ -27,6 +50,7 @@ std::shared_ptr ipc___RecordBatchFileWriter__Open return file_writer; } + // [[Rcpp::export]] std::shared_ptr ipc___RecordBatchStreamWriter__Open( const std::shared_ptr& stream, @@ -37,22 +61,3 @@ std::shared_ptr ipc___RecordBatchStreamWriter__Op return stream_writer; } -// [[Rcpp::export]] -void ipc___RecordBatchWriter__WriteRecordBatch( - const std::shared_ptr& batch_writer, - const std::shared_ptr& batch, bool allow_64bit) { - STOP_IF_NOT_OK(batch_writer->WriteRecordBatch(*batch, allow_64bit)); -} - -// [[Rcpp::export]] -void ipc___RecordBatchWriter__WriteTable( - const std::shared_ptr& batch_writer, - const std::shared_ptr& table) { - STOP_IF_NOT_OK(batch_writer->WriteTable(*table)); -} - -// [[Rcpp::export]] -void ipc___RecordBatchWriter__Close( - const std::shared_ptr& batch_writer) { - STOP_IF_NOT_OK(batch_writer->Close()); -} diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index 348327783fd..a2f1218243a 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -107,67 +107,6 @@ test_that("RecordBatch with 0 rows are supported", { fct = dictionary(int32(), array(c("a", "b"))) ) ) - - tf <- local_tempfile() - write_record_batch(batch, tf) - res <- read_record_batch(tf) - expect_equal(res, batch) -}) - -test_that("read_record_batch handles various streams (ARROW-3450, ARROW-3505)", { - tbl <- tibble::tibble( - int = 1:10, dbl = as.numeric(1:10), - lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), - chr = letters[1:10] - ) - batch <- record_batch(tbl) - tf <- local_tempfile() - write_record_batch(batch, tf) - - bytes <- write_record_batch(batch, raw()) - buf_reader <- buffer_reader(bytes) - - batch1 <- read_record_batch(tf) - batch2 <- read_record_batch(fs::path_abs(tf)) - - readable_file <- close_on_exit(file_open(tf)) - batch3 <- read_record_batch(readable_file) - - mmap_file <- close_on_exit(mmap_open(tf)) - batch4 <- read_record_batch(mmap_file) - batch5 <- read_record_batch(bytes) - batch6 <- read_record_batch(buf_reader) - - stream_reader <- record_batch_stream_reader(bytes) - batch7 <- read_record_batch(stream_reader) - expect_null(read_record_batch(stream_reader)) - - file_reader <- record_batch_file_reader(tf) - batch8 <- read_record_batch(file_reader) - expect_null(read_record_batch(file_reader, i = 2)) - - expect_equal(batch, batch1) - expect_equal(batch, batch2) - expect_equal(batch, batch3) - expect_equal(batch, batch4) - expect_equal(batch, batch5) - expect_equal(batch, batch6) - expect_equal(batch, batch7) - expect_equal(batch, batch8) -}) - -test_that("read_record_batch can handle Message, Schema parameters (ARROW-3499)", { - batch <- record_batch(tibble::tibble(x = 1:10)) - stream <- buffer_reader(write_record_batch(batch, raw())) - - # schema - message <- read_message(stream) - - # batch - message <- read_message(stream) - schema <- batch$schema() - batch2 <- read_record_batch(message, schema) - expect_equal(batch, batch2) }) test_that("RecordBatch cast (ARROW-3741)", { diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index d5db9de2406..78cc529abd0 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -28,12 +28,12 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { write_table(tab, tf) bytes <- write_table(tab, raw()) - buf_reader <- buffer_reader(bytes) + buf_reader <- BufferReader(bytes) tab1 <- read_table(tf) tab2 <- read_table(fs::path_abs(tf)) - readable_file <- close_on_exit(file_open(tf)) + readable_file <- close_on_exit(ReadableFile(tf)) tab3 <- read_table(readable_file) mmap_file <- close_on_exit(mmap_open(tf)) @@ -42,10 +42,10 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { tab5 <- read_table(bytes) tab6 <- read_table(buf_reader) - stream_reader <- record_batch_stream_reader(bytes) + stream_reader <- RecordBatchStreamReader(bytes) tab7 <- read_table(stream_reader) - file_reader <- record_batch_file_reader(tf) + file_reader <- RecordBatchFileReader(tf) tab8 <- read_table(file_reader) expect_equal(tab, tab1) diff --git a/r/tests/testthat/test-bufferreader.R b/r/tests/testthat/test-bufferreader.R index e7680a493fc..72d257101fa 100644 --- a/r/tests/testthat/test-bufferreader.R +++ b/r/tests/testthat/test-bufferreader.R @@ -18,9 +18,9 @@ context("arrow::BufferReader") test_that("BufferReader can be created from R objects", { - num <- buffer_reader(numeric(13)) - int <- buffer_reader(integer(13)) - raw <- buffer_reader(raw(16)) + num <- BufferReader(numeric(13)) + int <- BufferReader(integer(13)) + raw <- BufferReader(raw(16)) expect_is(num, "arrow::io::BufferReader") expect_is(int, "arrow::io::BufferReader") @@ -33,7 +33,7 @@ test_that("BufferReader can be created from R objects", { test_that("BufferReader can be created from Buffer", { buf <- buffer(raw(76)) - reader <- buffer_reader(buf) + reader <- BufferReader(buf) expect_is(reader, "arrow::io::BufferReader") expect_equal(reader$GetSize(), 76) diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index f6d9bee581d..715017fb586 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -29,7 +29,7 @@ test_that("feather read/write round trip", { expect_true(fs::file_exists(tf2)) tf3 <- local_tempfile() - stream <- close_on_exit(file_output_stream(tf3)) + stream <- close_on_exit(FileOutputStream(tf3)) write_feather(tib, stream) expect_true(fs::file_exists(tf3)) @@ -47,7 +47,7 @@ test_that("feather read/write round trip", { expect_is(tab4, "arrow::Table") # reading directly from arrow::io::ReadableFile - tab5 <- read_feather(file_open(tf3)) + tab5 <- read_feather(ReadableFile(tf3)) expect_is(tab5, "arrow::Table") expect_equal(tib, as_tibble(tab1)) diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R index fd05b860568..6f9e94da2ad 100644 --- a/r/tests/testthat/test-message.R +++ b/r/tests/testthat/test-message.R @@ -19,15 +19,11 @@ context("arrow::ipc::Message") test_that("read_message can read from input stream", { batch <- record_batch(tibble::tibble(x = 1:10)) - bytes <- write_record_batch(batch, raw()) - stream <- buffer_reader(bytes) - - message <- read_message(stream) - expect_equal(message$type(), MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + bytes <- batch$serialize() + stream <- BufferReader(bytes) message <- read_message(stream) + expect_is(message, "arrow::ipc::Message") expect_equal(message$type(), MessageType$RECORD_BATCH) expect_is(message$body, "arrow::Buffer") expect_is(message$metadata, "arrow::Buffer") diff --git a/r/tests/testthat/test-messagereader.R b/r/tests/testthat/test-messagereader.R index 4527a2882f0..69c2036cf71 100644 --- a/r/tests/testthat/test-messagereader.R +++ b/r/tests/testthat/test-messagereader.R @@ -19,15 +19,12 @@ context("arrow::ipc::MessageReader") test_that("MessageReader can be created from raw vectors", { batch <- record_batch(tibble::tibble(x = 1:10)) - bytes <- write_record_batch(batch, raw()) + bytes <- batch$serialize() - reader <- message_reader(bytes) - message <- reader$ReadNextMessage() - expect_equal(message$type(), MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + reader <- MessageReader(bytes) message <- reader$ReadNextMessage() + expect_is(message, "arrow::ipc::Message") expect_equal(message$type(), MessageType$RECORD_BATCH) expect_is(message$body, "arrow::Buffer") expect_is(message$metadata, "arrow::Buffer") @@ -38,16 +35,16 @@ test_that("MessageReader can be created from raw vectors", { test_that("MessageReader can be created from input stream", { batch <- record_batch(tibble::tibble(x = 1:10)) - bytes <- write_record_batch(batch, raw()) - stream <- buffer_reader(bytes) + bytes <- batch$serialize() - reader <- message_reader(stream) - message <- reader$ReadNextMessage() - expect_equal(message$type(), MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + stream <- BufferReader(bytes) + expect_is(stream, "arrow::io::BufferReader") + + reader <- MessageReader(stream) + expect_is(reader, "arrow::ipc::MessageReader") message <- reader$ReadNextMessage() + expect_is(message, "arrow::ipc::Message") expect_equal(message$type(), MessageType$RECORD_BATCH) expect_is(message$body, "arrow::Buffer") expect_is(message$metadata, "arrow::Buffer") diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read_record_batch.R new file mode 100644 index 00000000000..f6243149765 --- /dev/null +++ b/r/tests/testthat/test-read_record_batch.R @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +context("read_record_batch()") + +test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { + tab <- table(tibble::tibble( + int = 1:10, dbl = as.numeric(1:10), + lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), + chr = letters[1:10] + )) + tf <- local_tempfile() + + writer <- RecordBatchFileWriter(tf, tab$schema()) + expect_is(writer, "arrow::ipc::RecordBatchFileWriter") + writer$WriteTable(tab) + writer$Close() + tab2 <- read_table(tf) + expect_equal(tab, tab2) + + stream <- FileOutputStream(tf) + writer <- RecordBatchFileWriter(stream, tab$schema()) + expect_is(writer, "arrow::ipc::RecordBatchFileWriter") + writer$WriteTable(tab) + writer$Close() + tab3 <- read_table(tf) + expect_equal(tab, tab3) +}) + +test_that("read_record_batch() handles (raw|Buffer|InputStream, Schema) (ARROW-3450, ARROW-3505)", { + tbl <- tibble::tibble( + int = 1:10, dbl = as.numeric(1:10), + lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), + chr = letters[1:10] + ) + batch <- record_batch(tbl) + schema <- batch$schema() + + raw <- batch$serialize() + batch2 <- read_record_batch(raw, schema) + batch3 <- read_record_batch(buffer(raw), schema) + batch4 <- read_record_batch(close_on_exit(BufferReader(raw)), schema) + + expect_equal(batch, batch2) + expect_equal(batch, batch3) + expect_equal(batch, batch4) +}) + +test_that("read_record_batch() can handle (Message, Schema) parameters (ARROW-3499)", { + batch <- record_batch(tibble::tibble(x = 1:10)) + schema <- batch$schema() + + raw <- batch$serialize() + stream <- close_on_exit(BufferReader(raw)) + + message <- read_message(stream) + batch2 <- read_record_batch(message, schema) + expect_equal(batch, batch2) +}) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index d40fbfa36bc..349892f6481 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -17,18 +17,30 @@ context("arrow::Schema") -test_that("reading schema from raw vector", { +test_that("reading schema from Buffer", { + # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter + # maybe there is an easier way to serialize a schema batch <- record_batch(tibble::tibble(x = 1:10)) - bytes <- write_record_batch(batch, raw()) - schema <- read_schema(bytes) - expect_equal(schema, batch$schema()) -}) + expect_is(batch, "arrow::RecordBatch") -test_that("reading schema from streams", { - batch <- record_batch(tibble::tibble(x = 1:10)) - bytes <- write_record_batch(batch, raw()) - stream <- buffer_reader(bytes) + stream <- BufferOutputStream() + writer <- RecordBatchStreamWriter(stream, batch$schema()) + expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") + writer$Close() + + buffer <- stream$Finish() + expect_is(buffer, "arrow::Buffer") + + reader <- MessageReader(buffer) + expect_is(reader, "arrow::ipc::MessageReader") + + message <- reader$ReadNextMessage() + expect_is(message, "arrow::ipc::Message") + expect_equal(message$type(), MessageType$SCHEMA) - schema <- read_schema(stream) - expect_equal(schema, batch$schema()) + stream <- BufferReader(buffer) + expect_is(stream, "arrow::io::BufferReader") + message <- read_message(stream) + expect_is(message, "arrow::ipc::Message") + expect_equal(message$type(), MessageType$SCHEMA) }) From f1af42e843cecff30acdc094d0c0f58efa3ac103 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 11:12:09 +0100 Subject: [PATCH 02/17] using lower case methods, like in pyarrow --- r/R/RecordBatchWriter.R | 37 ++++++++++---------- r/R/io.R | 6 ++-- r/R/on_exit.R | 2 +- r/R/write_table.R | 2 +- r/man/arrow__io__OutputStream.Rd | 2 +- r/man/arrow__ipc__RecordBatchFileWriter.Rd | 12 +++---- r/man/arrow__ipc__RecordBatchStreamWriter.Rd | 12 +++---- r/man/arrow__ipc__RecordBatchWriter.Rd | 6 ++-- r/src/arrow_types.h | 1 + r/src/recordbatchwriter.cpp | 2 -- r/tests/testthat/test-read_record_batch.R | 8 ++--- r/tests/testthat/test-schema.R | 2 +- 12 files changed, 46 insertions(+), 46 deletions(-) diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 6e97bfea8cc..a5fe43df0e7 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -25,9 +25,9 @@ #' #' @section Methods: #' -#' - `$WriteRecordBatch(batch)`: Write record batch to stream -#' - `$WriteTable(table)`: write Table to stream -#' - `$Close()`: close stream +#' - `$write_batch(batch)`: Write record batch to stream +#' - `$write_table(table)`: write Table to stream +#' - `$close()`: close stream #' #' @section Derived classes: #' @@ -38,9 +38,10 @@ #' @name arrow__ipc__RecordBatchWriter `arrow::ipc::RecordBatchWriter` <- R6Class("arrow::ipc::RecordBatchWriter", inherit = `arrow::Object`, public = list( - WriteRecordBatch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), - WriteTable = function(table) ipc___RecordBatchWriter__WriteTable(self, table), - Close = function() ipc___RecordBatchWriter__Close(self) + write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), + write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table), + + close = function() ipc___RecordBatchWriter__Close(self) ) ) @@ -57,9 +58,9 @@ #' ``` #' writer <- RecordBatchStreamWriter(sink, schema) #' -#' writer$WriteRecordBatch(batch) -#' writer$WriteTable(table) -#' writer$Close() +#' writer$write_batch(batch) +#' writer$write_table(table) +#' writer$close() #' ``` #' #' @section Factory: @@ -69,9 +70,9 @@ #' @section Methods: #' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter] #' -#' - `$WriteRecordBatch(batch)`: Write record batch to stream -#' - `$WriteTable(table)`: write Table to stream -#' - `$Close()`: close stream +#' - `$write_batch(batch)`: Write record batch to stream +#' - `$write_table(table)`: write Table to stream +#' - `$close()`: close stream #' #' @rdname arrow__ipc__RecordBatchStreamWriter #' @name arrow__ipc__RecordBatchStreamWriter @@ -123,9 +124,9 @@ RecordBatchStreamWriter.fs_path <- function(sink, schema){ #' ``` #' writer <- RecordBatchFileWriter(sink, schema) #' -#' writer$WriteRecordBatch(batch) -#' writer$WriteTable(table) -#' writer$Close() +#' writer$write_batch(batch) +#' writer$write_table(table) +#' writer$close() #' ``` #' #' @section Factory: @@ -135,9 +136,9 @@ RecordBatchStreamWriter.fs_path <- function(sink, schema){ #' @section Methods: #' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter] #' -#' - `$WriteRecordBatch(batch)`: Write record batch to stream -#' - `$WriteTable(table)`: write Table to stream -#' - `$Close()`: close stream +#' - `$write_batch(batch)`: Write record batch to stream +#' - `$write_table(table)`: write Table to stream +#' - `$close()`: close stream #' #' @rdname arrow__ipc__RecordBatchFileWriter #' @name arrow__ipc__RecordBatchFileWriter diff --git a/r/R/io.R b/r/R/io.R index f1f2f4bfae5..b8998029277 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -28,13 +28,13 @@ #' @section Methods: #' #' - `arrow::Buffer` `Read`(`int` nbytes): Read `nbytes` bytes -#' - `void` `Close`(): close the stream +#' - `void` `close`(): close the stream #' #' @rdname arrow__io__OutputStream #' @name arrow__io__OutputStream `arrow::io::OutputStream` <- R6Class("arrow::io::OutputStream", inherit = `arrow::io::Writable`, public = list( - Close = function() io___OutputStream__Close(self) + close = function() io___OutputStream__Close(self) ) ) @@ -116,7 +116,7 @@ #' @name arrow__io__InputStream `arrow::io::InputStream` <- R6Class("arrow::io::InputStream", inherit = `arrow::io::Readable`, public = list( - Close = function() io___InputStream__Close(self) + close = function() io___InputStream__Close(self) ) ) diff --git a/r/R/on_exit.R b/r/R/on_exit.R index 9387169b8be..52b017404de 100644 --- a/r/R/on_exit.R +++ b/r/R/on_exit.R @@ -17,7 +17,7 @@ #' @importFrom withr defer_parent close_on_exit <- function(x, ...){ - defer_parent(x$Close(), ...) + defer_parent(x$close(), ...) x } diff --git a/r/R/write_table.R b/r/R/write_table.R index 44d0037212e..e06410c5031 100644 --- a/r/R/write_table.R +++ b/r/R/write_table.R @@ -34,7 +34,7 @@ write_table <- function(x, stream, ...) { #' @export `write_table.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){ - stream$WriteTable(x) + stream$write_table(x) } #' @export diff --git a/r/man/arrow__io__OutputStream.Rd b/r/man/arrow__io__OutputStream.Rd index 5ff980491e9..94b6a73e9a7 100644 --- a/r/man/arrow__io__OutputStream.Rd +++ b/r/man/arrow__io__OutputStream.Rd @@ -16,7 +16,7 @@ OutputStream \itemize{ \item \code{arrow::Buffer} \code{Read}(\code{int} nbytes): Read \code{nbytes} bytes -\item \code{void} \code{Close}(): close the stream +\item \code{void} \code{close}(): close the stream } } diff --git a/r/man/arrow__ipc__RecordBatchFileWriter.Rd b/r/man/arrow__ipc__RecordBatchFileWriter.Rd index 9f51fe4bfea..a80b55941fb 100644 --- a/r/man/arrow__ipc__RecordBatchFileWriter.Rd +++ b/r/man/arrow__ipc__RecordBatchFileWriter.Rd @@ -15,9 +15,9 @@ Writer for the Arrow binary file format \section{usage}{ \preformatted{writer <- RecordBatchFileWriter(sink, schema) -writer$WriteRecordBatch(batch) -writer$WriteTable(table) -writer$Close() +writer$write_batch(batch) +writer$write_table(table) +writer$close() } } @@ -31,9 +31,9 @@ The \code{\link[=RecordBatchFileWriter]{RecordBatchFileWriter()}} function creat inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter} \itemize{ -\item \code{$WriteRecordBatch(batch)}: Write record batch to stream -\item \code{$WriteTable(table)}: write Table to stream -\item \code{$Close()}: close stream +\item \code{$write_batch(batch)}: Write record batch to stream +\item \code{$write_table(table)}: write Table to stream +\item \code{$close()}: close stream } } diff --git a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd index f84a27bc19a..3d2030287d1 100644 --- a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd +++ b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd @@ -15,9 +15,9 @@ Writer for the Arrow streaming binary format \section{usage}{ \preformatted{writer <- RecordBatchStreamWriter(sink, schema) -writer$WriteRecordBatch(batch) -writer$WriteTable(table) -writer$Close() +writer$write_batch(batch) +writer$write_table(table) +writer$close() } } @@ -31,9 +31,9 @@ The \code{\link[=RecordBatchStreamWriter]{RecordBatchStreamWriter()}} function c inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter} \itemize{ -\item \code{$WriteRecordBatch(batch)}: Write record batch to stream -\item \code{$WriteTable(table)}: write Table to stream -\item \code{$Close()}: close stream +\item \code{$write_batch(batch)}: Write record batch to stream +\item \code{$write_table(table)}: write Table to stream +\item \code{$close()}: close stream } } diff --git a/r/man/arrow__ipc__RecordBatchWriter.Rd b/r/man/arrow__ipc__RecordBatchWriter.Rd index 632fc6b3f8b..08593df8524 100644 --- a/r/man/arrow__ipc__RecordBatchWriter.Rd +++ b/r/man/arrow__ipc__RecordBatchWriter.Rd @@ -11,9 +11,9 @@ class arrow::ipc::RecordBatchWriter \section{Methods}{ \itemize{ -\item \code{$WriteRecordBatch(batch)}: Write record batch to stream -\item \code{$WriteTable(table)}: write Table to stream -\item \code{$Close()}: close stream +\item \code{$write_batch(batch)}: Write record batch to stream +\item \code{$write_table(table)}: write Table to stream +\item \code{$close()}: close stream } } diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 419705f9fcd..9ebc558d0d4 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -152,6 +152,7 @@ using LogicalVector_ = Rcpp::Vector; using StringVector_ = Rcpp::Vector; using CharacterVector_ = StringVector_; using RawVector_ = Rcpp::Vector; +using List_ = Rcpp::Vector; template inline typename Rcpp::Vector::stored_type default_value() { diff --git a/r/src/recordbatchwriter.cpp b/r/src/recordbatchwriter.cpp index f766d7cfadc..bb410359a80 100644 --- a/r/src/recordbatchwriter.cpp +++ b/r/src/recordbatchwriter.cpp @@ -18,7 +18,6 @@ #include "arrow_types.h" - // [[Rcpp::export]] void ipc___RecordBatchWriter__WriteRecordBatch( const std::shared_ptr& batch_writer, @@ -39,7 +38,6 @@ void ipc___RecordBatchWriter__Close( STOP_IF_NOT_OK(batch_writer->Close()); } - // [[Rcpp::export]] std::shared_ptr ipc___RecordBatchFileWriter__Open( const std::shared_ptr& stream, diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read_record_batch.R index f6243149765..69c49b278d4 100644 --- a/r/tests/testthat/test-read_record_batch.R +++ b/r/tests/testthat/test-read_record_batch.R @@ -27,16 +27,16 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { writer <- RecordBatchFileWriter(tf, tab$schema()) expect_is(writer, "arrow::ipc::RecordBatchFileWriter") - writer$WriteTable(tab) - writer$Close() + writer$write_table(tab) + writer$close() tab2 <- read_table(tf) expect_equal(tab, tab2) stream <- FileOutputStream(tf) writer <- RecordBatchFileWriter(stream, tab$schema()) expect_is(writer, "arrow::ipc::RecordBatchFileWriter") - writer$WriteTable(tab) - writer$Close() + writer$write_table(tab) + writer$close() tab3 <- read_table(tf) expect_equal(tab, tab3) }) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 349892f6481..525a76ec4da 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -26,7 +26,7 @@ test_that("reading schema from Buffer", { stream <- BufferOutputStream() writer <- RecordBatchStreamWriter(stream, batch$schema()) expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") - writer$Close() + writer$close() buffer <- stream$Finish() expect_is(buffer, "arrow::Buffer") From 79192cb4b0ad27a64bb3979160085182fef9b52b Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 11:19:01 +0100 Subject: [PATCH 03/17] rename BufferOutputStream$Finish to $getvalue, as in pyarrow --- r/R/io.R | 3 ++- r/tests/testthat/test-schema.R | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/r/R/io.R b/r/R/io.R index b8998029277..2e0d40be96a 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -73,7 +73,8 @@ `arrow::io::BufferOutputStream` <- R6Class("arrow::io::BufferOutputStream", inherit = `arrow::io::OutputStream`, public = list( capacity = function() io___BufferOutputStream__capacity(self), - Finish = function() shared_ptr(`arrow::Buffer`, io___BufferOutputStream__Finish(self)), + getvalue = function() shared_ptr(`arrow::Buffer`, io___BufferOutputStream__Finish(self)), + Write = function(bytes) io___BufferOutputStream__Write(self, bytes), Tell = function() io___BufferOutputStream__Tell(self) ) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 525a76ec4da..5c9be332809 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -28,7 +28,7 @@ test_that("reading schema from Buffer", { expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") writer$close() - buffer <- stream$Finish() + buffer <- stream$getvalue() expect_is(buffer, "arrow::Buffer") reader <- MessageReader(buffer) From 1ea3b558f60f606b6aafccd0700603173637ab7f Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 11:41:09 +0100 Subject: [PATCH 04/17] + method $batches() in RecordBatch(Stream|File)Reader --- r/R/RcppExports.R | 8 ++++++++ r/R/RecordBatchReader.R | 10 ++++++++-- r/src/RcppExports.cpp | 24 ++++++++++++++++++++++++ r/src/recordbatchreader.cpp | 27 +++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index 54763e58f01..9188b1dc2bf 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -685,6 +685,10 @@ ipc___RecordBatchStreamReader__Open <- function(stream) { .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream) } +ipc___RecordBatchStreamReader__batches <- function(reader) { + .Call(`_arrow_ipc___RecordBatchStreamReader__batches`, reader) +} + ipc___RecordBatchFileReader__schema <- function(reader) { .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader) } @@ -709,6 +713,10 @@ Table__from_RecordBatchStreamReader <- function(reader) { .Call(`_arrow_Table__from_RecordBatchStreamReader`, reader) } +ipc___RecordBatchFileReader__batches <- function(reader) { + .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader) +} + ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) { invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch)) } diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 85bf35221a4..222f05586c1 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -50,7 +50,11 @@ #' #' @rdname arrow__ipc__RecordBatchStreamReader #' @name arrow__ipc__RecordBatchStreamReader -`arrow::ipc::RecordBatchStreamReader` <- R6Class("arrow::ipc::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`) +`arrow::ipc::RecordBatchStreamReader` <- R6Class("arrow::ipc::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`, + public = list( + batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = `arrow::RecordBatch`) + ) +) #' @title class arrow::ipc::RecordBatchFileReader #' @@ -68,7 +72,9 @@ public = list( schema = function() shared_ptr(`arrow::Schema`, ipc___RecordBatchFileReader__schema(self)), num_record_batches = function() ipc___RecordBatchFileReader__num_record_batches(self), - ReadRecordBatch = function(i) shared_ptr(`arrow::RecordBatch`, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)) + ReadRecordBatch = function(i) shared_ptr(`arrow::RecordBatch`, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)), + + batches = function() map(ipc___RecordBatchFileReader__batches(self), shared_ptr, class = `arrow::RecordBatch`) ) ) diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index 95e5dc7b5d0..5a619ae9c35 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -1915,6 +1915,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// ipc___RecordBatchStreamReader__batches +std::vector> ipc___RecordBatchStreamReader__batches(const std::shared_ptr& reader); +RcppExport SEXP _arrow_ipc___RecordBatchStreamReader__batches(SEXP readerSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type reader(readerSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchStreamReader__batches(reader)); + return rcpp_result_gen; +END_RCPP +} // ipc___RecordBatchFileReader__schema std::shared_ptr ipc___RecordBatchFileReader__schema(const std::shared_ptr& reader); RcppExport SEXP _arrow_ipc___RecordBatchFileReader__schema(SEXP readerSEXP) { @@ -1982,6 +1993,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// ipc___RecordBatchFileReader__batches +std::vector> ipc___RecordBatchFileReader__batches(const std::shared_ptr& reader); +RcppExport SEXP _arrow_ipc___RecordBatchFileReader__batches(SEXP readerSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type reader(readerSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchFileReader__batches(reader)); + return rcpp_result_gen; +END_RCPP +} // ipc___RecordBatchWriter__WriteRecordBatch void ipc___RecordBatchWriter__WriteRecordBatch(const std::shared_ptr& batch_writer, const std::shared_ptr& batch); RcppExport SEXP _arrow_ipc___RecordBatchWriter__WriteRecordBatch(SEXP batch_writerSEXP, SEXP batchSEXP) { @@ -2278,12 +2300,14 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, {"_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, {"_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, + {"_arrow_ipc___RecordBatchStreamReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__batches, 1}, {"_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, {"_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, {"_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, {"_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, {"_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, {"_arrow_Table__from_RecordBatchStreamReader", (DL_FUNC) &_arrow_Table__from_RecordBatchStreamReader, 1}, + {"_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, {"_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, {"_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2}, {"_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, diff --git a/r/src/recordbatchreader.cpp b/r/src/recordbatchreader.cpp index 65a1c9baf3b..fae757575a5 100644 --- a/r/src/recordbatchreader.cpp +++ b/r/src/recordbatchreader.cpp @@ -41,6 +41,21 @@ std::shared_ptr ipc___RecordBatchStreamReader__Open( return reader; } +// [[Rcpp::export]] +std::vector> ipc___RecordBatchStreamReader__batches(const std::shared_ptr& reader) { + std::vector> res; + + while(true) { + std::shared_ptr batch; + STOP_IF_NOT_OK(reader->ReadNext(&batch)); + if (!batch) break; + + res.push_back(batch); + } + + return res; +} + // -------- RecordBatchFileReader // [[Rcpp::export]] @@ -104,3 +119,15 @@ std::shared_ptr Table__from_RecordBatchStreamReader( return table; } + +// [[Rcpp::export]] +std::vector> ipc___RecordBatchFileReader__batches( const std::shared_ptr& reader) { + auto n = reader->num_record_batches(); + std::vector> res(n); + + for(int i = 0; iReadRecordBatch(i, &res[i])); + } + + return res; +} From 2dcca0345bf20c6f5cc4943fd69b3427c967b9b4 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 14:13:08 +0100 Subject: [PATCH 05/17] write_arrow replaces write_table and handles Table, RecordBatch and data.frame, thanks to the unexported dispatcher to_arrow --- r/DESCRIPTION | 2 +- r/NAMESPACE | 12 +-- r/R/RecordBatchWriter.R | 12 +++ r/R/write_arrow.R | 94 ++++++++++++++++++++ r/R/write_table.R | 93 ------------------- r/man/write_arrow.Rd | 28 ++++-- r/man/write_arrow.arrow-colon-colon-Table.Rd | 17 ---- r/man/write_table.Rd | 18 ---- r/tests/testthat/test-Table.R | 5 +- 9 files changed, 136 insertions(+), 145 deletions(-) create mode 100644 r/R/write_arrow.R delete mode 100644 r/R/write_table.R delete mode 100644 r/man/write_arrow.arrow-colon-colon-Table.Rd delete mode 100644 r/man/write_table.Rd diff --git a/r/DESCRIPTION b/r/DESCRIPTION index e5368427844..5f93c83f236 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -65,5 +65,5 @@ Collate: 'read_table.R' 'reexports-bit64.R' 'reexports-tibble.R' - 'write_table.R' + 'write_arrow.R' 'zzz.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index 7d5c95d9844..2898720f73b 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -55,8 +55,10 @@ S3method(read_table,"arrow::ipc::RecordBatchStreamReader") S3method(read_table,character) S3method(read_table,fs_path) S3method(read_table,raw) -S3method(write_arrow,"arrow::Table") -S3method(write_arrow,data.frame) +S3method(write_arrow,"arrow::ipc::RecordBatchWriter") +S3method(write_arrow,character) +S3method(write_arrow,fs_path) +S3method(write_arrow,raw) S3method(write_feather,"arrow::RecordBatch") S3method(write_feather,data.frame) S3method(write_feather,default) @@ -64,11 +66,6 @@ S3method(write_feather_RecordBatch,"arrow::io::OutputStream") S3method(write_feather_RecordBatch,character) S3method(write_feather_RecordBatch,default) S3method(write_feather_RecordBatch,fs_path) -S3method(write_table,"arrow::io::OutputStream") -S3method(write_table,"arrow::ipc::RecordBatchWriter") -S3method(write_table,character) -S3method(write_table,fs_path) -S3method(write_table,raw) export(BufferOutputStream) export(BufferReader) export(DateUnit) @@ -134,7 +131,6 @@ export(utf8) export(write_arrow) export(write_feather) export(write_feather_RecordBatch) -export(write_table) importFrom(R6,R6Class) importFrom(Rcpp,sourceCpp) importFrom(assertthat,assert_that) diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index a5fe43df0e7..78d1e81c54b 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -41,6 +41,18 @@ write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table), + write = function(x) { + if (inherits(x, "arrow::RecordBatch")) { + self$write_batch(x) + } else if(inherits(x, "arrow::Table")) { + self$write_table(x) + } else if (inherits(x, "data.frame")) { + self$write_table(table(x)) + } else { + abort("unexpected type for RecordBatchWriter$write(), must be an arrow::RecordBatch or an arrow::Table") + } + }, + close = function() ipc___RecordBatchWriter__Close(self) ) ) diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R new file mode 100644 index 00000000000..42d5d85ff3d --- /dev/null +++ b/r/R/write_arrow.R @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +to_arrow <- function(x) { + UseMethod("to_arrow") +} + +`to_arrow.arrow::RecordBatch` <- function(x) x +`to_arrow.arrow::Table` <- function(x) x +`to_arrow.data.frame` <- function(x) table(x) + +#' serialize an [arrow::Table][arrow__Table], an [arrow::RecordBatch][arrow__RecordBatch], or a +#' data frame to either the streaming format or the binary file format +#' +#' @param x an [arrow::Table][arrow__Table], an [arrow::RecordBatch][arrow__RecordBatch] or a data.frame +#' +#' @param stream where to serialize to +#' +#' - A [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter]: the `$write()` +#' of `x` is used. The stream is left open. This uses the streaming format +#' or the binary file format depending on the type of the writer. +#' +#' - A string or [file path][fs::path_abs()]: `x` is serialized with +#' a [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e. +#' using the binary file format. +#' +#' - A raw vector: typically of length zero (its data is ignored, and only used for +#' dispatch). `x` is serialized using the streaming format, i.e. using the +#' [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] +#' +#' @param ... extra parameters, currently ignored +#' +#' `write_arrow` is a convenience function, the classes [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] +#' and [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] can be used for more flexibility. +#' +#' @export +write_arrow <- function(x, stream, ...) { + UseMethod("write_arrow", stream) +} + +#' @export +`write_arrow.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){ + stream$write(x) +} + +#' @export +`write_arrow.character` <- function(x, stream, ...) { + write_arrow(x, fs::path_abs(stream), ...) +} + +#' @export +`write_arrow.fs_path` <- function(x, stream, ...) { + assert_that(length(stream) == 1L) + x <- to_arrow(x) + file_stream <- close_on_exit(FileOutputStream(stream)) + file_writer <- close_on_exit(RecordBatchFileWriter(file_stream, x$schema())) + write_arrow(x, file_writer, ...) +} + +#' @export +`write_arrow.raw` <- function(x, stream, ...) { + x <- to_arrow(x) + schema <- x$schema() + + # how many bytes do we need + mock_stream <- MockOutputStream() + writer <- RecordBatchStreamWriter(mock_stream, schema) + writer$write(x) + writer$close() + n <- mock_stream$GetExtentBytesWritten() + + # now that we know the size, stream in a buffer backed by an R raw vector + bytes <- raw(n) + buffer_writer <- FixedSizeBufferWriter(buffer(bytes)) + writer <- RecordBatchStreamWriter(buffer_writer, schema) + writer$write(x) + writer$close() + + bytes +} diff --git a/r/R/write_table.R b/r/R/write_table.R deleted file mode 100644 index e06410c5031..00000000000 --- a/r/R/write_table.R +++ /dev/null @@ -1,93 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#' write an arrow::Table -#' -#' @param x an `arrow::Table` -#' @param stream where to stream the record batch -#' @param ... extra parameters -#' -#' @export -write_table <- function(x, stream, ...) { - UseMethod("write_table", stream) -} - -#' @export -`write_table.arrow::io::OutputStream` <- function(x, stream, ...) { - stream_writer <- close_on_exit(RecordBatchStreamWriter(stream, x$schema())) - write_table(x, stream_writer) -} - -#' @export -`write_table.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){ - stream$write_table(x) -} - -#' @export -`write_table.character` <- function(x, stream, ...) { - assert_that(length(stream) == 1L) - write_table(x, fs::path_abs(stream), ...) -} - -#' @export -`write_table.fs_path` <- function(x, stream, ...) { - assert_that(length(stream) == 1L) - file_stream <- close_on_exit(FileOutputStream(stream)) - file_writer <- close_on_exit(RecordBatchFileWriter(file_stream, x$schema())) - write_table(x, file_writer, ...) -} - -#' @export -`write_table.raw` <- function(x, stream, ...) { - # how many bytes do we need - mock <- MockOutputStream() - write_table(x, mock) - n <- mock$GetExtentBytesWritten() - - bytes <- raw(n) - buffer <- buffer(bytes) - buffer_writer <- FixedSizeBufferWriter(buffer) - write_table(x, buffer_writer) - - bytes -} - -#' Write an object to a stream -#' -#' @param x An object to stream -#' @param stream A stream -#' @param ... additional parameters -#' -#' @export -write_arrow <- function(x, stream, ...){ - UseMethod("write_arrow") -} - -#' #' @export -#' `write_arrow.arrow::RecordBatch` <- function(x, stream, ...) { -#' write_record_batch(x, stream, ...) -#' } - -#' @export -`write_arrow.arrow::Table` <- function(x, stream, ...) { - write_table(x, stream, ...) -} - -#' @export -`write_arrow.data.frame` <- function(x, stream, ...) { - write_table(table(x), stream, ...) -} diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index 5e0d13f304d..4296bcbd899 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -1,18 +1,34 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/write_table.R +% Please edit documentation in R/write_arrow.R \name{write_arrow} \alias{write_arrow} -\title{Write an object to a stream} +\title{serialize an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch}, or a +data frame to either the streaming format or the binary file format} \usage{ write_arrow(x, stream, ...) } \arguments{ -\item{x}{An object to stream} +\item{x}{an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch} or a data.frame} -\item{stream}{A stream} +\item{stream}{where to serialize to +\itemize{ +\item A \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}: the \code{$write()} +of \code{x} is used. The stream is left open. This uses the streaming format +or the binary file format depending on the type of the writer. +\item A string or \link[fs:path_abs]{file path}: \code{x} is serialized with +a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter}, i.e. +using the binary file format. +\item A raw vector: typically of length zero (its data is ignored, and only used for +dispatch). \code{x} is serialized using the streaming format, i.e. using the +\link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} +}} -\item{...}{additional parameters} +\item{...}{extra parameters, currently ignored + +\code{write_arrow} is a convenience function, the classes \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter} +and \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} can be used for more flexibility.} } \description{ -Write an object to a stream +serialize an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch}, or a +data frame to either the streaming format or the binary file format } diff --git a/r/man/write_arrow.arrow-colon-colon-Table.Rd b/r/man/write_arrow.arrow-colon-colon-Table.Rd deleted file mode 100644 index b499f97e4bc..00000000000 --- a/r/man/write_arrow.arrow-colon-colon-Table.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/write_table.R -\name{write_arrow.arrow::Table} -\alias{write_arrow.arrow::Table} -\title{#' @export -\code{write_arrow.arrow::RecordBatch} <- function(x, stream, ...) { -write_record_batch(x, stream, ...) -}} -\usage{ -\method{write_arrow}{arrow::Table}(x, stream, ...) -} -\description{ -#' @export -\code{write_arrow.arrow::RecordBatch} <- function(x, stream, ...) { -write_record_batch(x, stream, ...) -} -} diff --git a/r/man/write_table.Rd b/r/man/write_table.Rd deleted file mode 100644 index 33633fd5a39..00000000000 --- a/r/man/write_table.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/write_table.R -\name{write_table} -\alias{write_table} -\title{write an arrow::Table} -\usage{ -write_table(x, stream, ...) -} -\arguments{ -\item{x}{an \code{arrow::Table}} - -\item{stream}{where to stream the record batch} - -\item{...}{extra parameters} -} -\description{ -write an arrow::Table -} diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index 78cc529abd0..dcc213d9663 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -24,10 +24,11 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { chr = letters[1:10] ) tab <- arrow::table(tbl) + tf <- local_tempfile() - write_table(tab, tf) + write_arrow(tab, tf) - bytes <- write_table(tab, raw()) + bytes <- write_arrow(tab, raw()) buf_reader <- BufferReader(bytes) tab1 <- read_table(tf) From f16f1267a4b6e1db78d45ed12a6795ced8c8e94a Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 14:40:06 +0100 Subject: [PATCH 06/17] update documentation for read_table --- r/NAMESPACE | 2 -- r/R/read_table.R | 60 +++++++++++++++++++++-------------- r/man/read_table.Rd | 26 +++++++++++++-- r/tests/testthat/test-Table.R | 11 +++---- 4 files changed, 63 insertions(+), 36 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 2898720f73b..10677b43f85 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -48,8 +48,6 @@ S3method(read_record_batch,raw) S3method(read_schema,"arrow::Buffer") S3method(read_schema,"arrow::io::InputStream") S3method(read_schema,raw) -S3method(read_table,"arrow::io::BufferReader") -S3method(read_table,"arrow::io::RandomAccessFile") S3method(read_table,"arrow::ipc::RecordBatchFileReader") S3method(read_table,"arrow::ipc::RecordBatchStreamReader") S3method(read_table,character) diff --git a/r/R/read_table.R b/r/R/read_table.R index 11e57ba89ae..c99d175feef 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -15,33 +15,37 @@ # specific language governing permissions and limitations # under the License. -#' Read an arrow::Table from a stream +#' Read an [arrow::Table][arrow__Table] from a stream #' -#' @param stream stream. Either a stream created by [ReadableFile()] or [mmap_open()] or a file path. +#' @param stream stream. +#' +#' - a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader]: +#' read an [arrow::Table][arrow__Table] +#' from all the record batches in the reader +#' +#' - a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader]: +#' read an [arrow::Table][arrow__Table] from the remaining record batches +#' in the reader +#' +#' - a string or [file path][fs::path_abs()]: interpret the file as an arrow +#' binary file format, and uses a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] +#' to process it. +#' +#' - a raw vector: read using a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] +#' +#' @return an [arrow::Table][arrow__Table] +#' +#' @details +#' +#' The methods using [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] and +#' [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] offer the most +#' flexibility. The other methods are for convenience. #' #' @export read_table <- function(stream){ UseMethod("read_table") } -#' @export -read_table.character <- function(stream){ - assert_that(length(stream) == 1L) - read_table(fs::path_abs(stream)) -} - -#' @export -read_table.fs_path <- function(stream) { - stream <- close_on_exit(ReadableFile(stream)) - read_table(stream) -} - -#' @export -`read_table.arrow::io::RandomAccessFile` <- function(stream) { - reader <- RecordBatchFileReader(stream) - read_table(reader) -} - #' @export `read_table.arrow::ipc::RecordBatchFileReader` <- function(stream) { shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(stream)) @@ -53,14 +57,22 @@ read_table.fs_path <- function(stream) { } #' @export -`read_table.arrow::io::BufferReader` <- function(stream) { - reader <- RecordBatchStreamReader(stream) - read_table(reader) +read_table.character <- function(stream){ + assert_that(length(stream) == 1L) + read_table(fs::path_abs(stream)) +} + +#' @export +read_table.fs_path <- function(stream) { + stream <- close_on_exit(ReadableFile(stream)) + batch_reader <- close_on_exit(RecordBatchFileReader(stream)) + shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(batch_reader)) } #' @export `read_table.raw` <- function(stream) { stream <- close_on_exit(BufferReader(stream)) - read_table(stream) + batch_reader <- close_on_exit(RecordBatchStreamReader(stream)) + shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(batch_reader)) } diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index a9fc3078740..e5227a5f3d4 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -2,13 +2,33 @@ % Please edit documentation in R/read_table.R \name{read_table} \alias{read_table} -\title{Read an arrow::Table from a stream} +\title{Read an \link[=arrow__Table]{arrow::Table} from a stream} \usage{ read_table(stream) } \arguments{ -\item{stream}{stream. Either a stream created by \code{\link[=ReadableFile]{ReadableFile()}} or \code{\link[=mmap_open]{mmap_open()}} or a file path.} +\item{stream}{stream. +\itemize{ +\item a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader}: +read an \link[=arrow__Table]{arrow::Table} +from all the record batches in the reader +\item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader}: +read an \link[=arrow__Table]{arrow::Table} from the remaining record batches +in the reader +\item a string or \link[fs:path_abs]{file path}: interpret the file as an arrow +binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} +to process it. +\item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} +}} +} +\value{ +an \link[=arrow__Table]{arrow::Table} } \description{ -Read an arrow::Table from a stream +Read an \link[=arrow__Table]{arrow::Table} from a stream +} +\details{ +The methods using \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} and +\link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} offer the most +flexibility. The other methods are for convenience. } diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index dcc213d9663..c38b124bf65 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -29,25 +29,23 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { write_arrow(tab, tf) bytes <- write_arrow(tab, raw()) - buf_reader <- BufferReader(bytes) tab1 <- read_table(tf) tab2 <- read_table(fs::path_abs(tf)) readable_file <- close_on_exit(ReadableFile(tf)) - tab3 <- read_table(readable_file) + tab3 <- read_table(close_on_exit(RecordBatchFileReader(readable_file))) mmap_file <- close_on_exit(mmap_open(tf)) - tab4 <- read_table(mmap_file) + tab4 <- read_table(close_on_exit(RecordBatchFileReader(mmap_file))) tab5 <- read_table(bytes) - tab6 <- read_table(buf_reader) stream_reader <- RecordBatchStreamReader(bytes) - tab7 <- read_table(stream_reader) + tab6 <- read_table(stream_reader) file_reader <- RecordBatchFileReader(tf) - tab8 <- read_table(file_reader) + tab7 <- read_table(file_reader) expect_equal(tab, tab1) expect_equal(tab, tab2) @@ -56,7 +54,6 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { expect_equal(tab, tab5) expect_equal(tab, tab6) expect_equal(tab, tab7) - expect_equal(tab, tab8) }) test_that("Table cast (ARROW-3741)", { From 6f5db727c34022b9509c48a3e3fdccd3a502f973 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 14:42:11 +0100 Subject: [PATCH 07/17] document read_arrow with read_table --- r/R/Table.R | 11 ----------- r/R/read_table.R | 10 +++++++++- r/man/read_arrow.Rd | 17 ----------------- r/man/read_table.Rd | 8 +++++++- r/src/recordbatch.cpp | 6 ++++-- r/src/recordbatchreader.cpp | 10 ++++++---- r/src/recordbatchwriter.cpp | 3 --- 7 files changed, 26 insertions(+), 39 deletions(-) delete mode 100644 r/man/read_arrow.Rd diff --git a/r/R/Table.R b/r/R/Table.R index e7d4545c1f6..7518e9d01be 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -48,14 +48,3 @@ table <- function(.data){ `as_tibble.arrow::Table` <- function(x, ...){ Table__to_dataframe(x) } - -#' Read an tibble from an arrow::Table on disk -#' -#' @param stream input stream -#' -#' @return a [tibble::tibble] -#' -#' @export -read_arrow <- function(stream){ - as_tibble(read_table(stream)) -} diff --git a/r/R/read_table.R b/r/R/read_table.R index c99d175feef..a540a421735 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -33,7 +33,10 @@ #' #' - a raw vector: read using a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] #' -#' @return an [arrow::Table][arrow__Table] +#' @return +#' +#' - `read_table` returns an [arrow::Table][arrow__Table] +#' - `read_arrow` returns a [tibble::tibble()] #' #' @details #' @@ -76,3 +79,8 @@ read_table.fs_path <- function(stream) { shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(batch_reader)) } +#' @rdname read_table +#' @export +read_arrow <- function(stream){ + as_tibble(read_table(stream)) +} diff --git a/r/man/read_arrow.Rd b/r/man/read_arrow.Rd deleted file mode 100644 index 362ee7adc1a..00000000000 --- a/r/man/read_arrow.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Table.R -\name{read_arrow} -\alias{read_arrow} -\title{Read an tibble from an arrow::Table on disk} -\usage{ -read_arrow(stream) -} -\arguments{ -\item{stream}{input stream} -} -\value{ -a \link[tibble:tibble]{tibble::tibble} -} -\description{ -Read an tibble from an arrow::Table on disk -} diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index e5227a5f3d4..3231b26da26 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/read_table.R \name{read_table} \alias{read_table} +\alias{read_arrow} \title{Read an \link[=arrow__Table]{arrow::Table} from a stream} \usage{ read_table(stream) + +read_arrow(stream) } \arguments{ \item{stream}{stream. @@ -22,7 +25,10 @@ to process it. }} } \value{ -an \link[=arrow__Table]{arrow::Table} +\itemize{ +\item \code{read_table} returns an \link[=arrow__Table]{arrow::Table} +\item \code{read_arrow} returns a \code{\link[tibble:tibble]{tibble::tibble()}} +} } \description{ Read an \link[=arrow__Table]{arrow::Table} from a stream diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index 2787cf2aa54..db03fabfd94 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -122,7 +122,8 @@ std::shared_ptr RecordBatch__Slice2( } // [[Rcpp::export]] -RawVector ipc___SerializeRecordBatch__Raw(const std::shared_ptr& batch) { +RawVector ipc___SerializeRecordBatch__Raw( + const std::shared_ptr& batch) { // how many bytes do we need ? int64_t size; STOP_IF_NOT_OK(arrow::ipc::GetRecordBatchSize(*batch, &size)); @@ -133,7 +134,8 @@ RawVector ipc___SerializeRecordBatch__Raw(const std::shared_ptr>(out); arrow::io::FixedSizeBufferWriter stream(buffer); - STOP_IF_NOT_OK(arrow::ipc::SerializeRecordBatch(*batch, arrow::default_memory_pool(), &stream)); + STOP_IF_NOT_OK( + arrow::ipc::SerializeRecordBatch(*batch, arrow::default_memory_pool(), &stream)); STOP_IF_NOT_OK(stream.Close()); return out; diff --git a/r/src/recordbatchreader.cpp b/r/src/recordbatchreader.cpp index fae757575a5..f3e90228d3c 100644 --- a/r/src/recordbatchreader.cpp +++ b/r/src/recordbatchreader.cpp @@ -42,10 +42,11 @@ std::shared_ptr ipc___RecordBatchStreamReader__Open( } // [[Rcpp::export]] -std::vector> ipc___RecordBatchStreamReader__batches(const std::shared_ptr& reader) { +std::vector> ipc___RecordBatchStreamReader__batches( + const std::shared_ptr& reader) { std::vector> res; - while(true) { + while (true) { std::shared_ptr batch; STOP_IF_NOT_OK(reader->ReadNext(&batch)); if (!batch) break; @@ -121,11 +122,12 @@ std::shared_ptr Table__from_RecordBatchStreamReader( } // [[Rcpp::export]] -std::vector> ipc___RecordBatchFileReader__batches( const std::shared_ptr& reader) { +std::vector> ipc___RecordBatchFileReader__batches( + const std::shared_ptr& reader) { auto n = reader->num_record_batches(); std::vector> res(n); - for(int i = 0; iReadRecordBatch(i, &res[i])); } diff --git a/r/src/recordbatchwriter.cpp b/r/src/recordbatchwriter.cpp index bb410359a80..d4dd212a9bd 100644 --- a/r/src/recordbatchwriter.cpp +++ b/r/src/recordbatchwriter.cpp @@ -17,7 +17,6 @@ #include "arrow_types.h" - // [[Rcpp::export]] void ipc___RecordBatchWriter__WriteRecordBatch( const std::shared_ptr& batch_writer, @@ -48,7 +47,6 @@ std::shared_ptr ipc___RecordBatchFileWriter__Open return file_writer; } - // [[Rcpp::export]] std::shared_ptr ipc___RecordBatchStreamWriter__Open( const std::shared_ptr& stream, @@ -58,4 +56,3 @@ std::shared_ptr ipc___RecordBatchStreamWriter__Op arrow::ipc::RecordBatchStreamWriter::Open(stream.get(), schema, &stream_writer)); return stream_writer; } - From 77916c5d74b5ddf43902c43d1d656a2602722725 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 14:47:50 +0100 Subject: [PATCH 08/17] -doc directory --- r/.gitignore | 3 + r/doc/arrow.R | 90 -------- r/doc/arrow.Rmd | 236 --------------------- r/doc/arrow.html | 540 ----------------------------------------------- 4 files changed, 3 insertions(+), 866 deletions(-) delete mode 100644 r/doc/arrow.R delete mode 100644 r/doc/arrow.Rmd delete mode 100644 r/doc/arrow.html diff --git a/r/.gitignore b/r/.gitignore index 85c986810bd..0f405f57136 100644 --- a/r/.gitignore +++ b/r/.gitignore @@ -1,3 +1,6 @@ +Meta +doc +inst/doc *.o *.o-* *.d diff --git a/r/doc/arrow.R b/r/doc/arrow.R deleted file mode 100644 index 4599092e7d7..00000000000 --- a/r/doc/arrow.R +++ /dev/null @@ -1,90 +0,0 @@ -## ----setup, include = FALSE---------------------------------------------- -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -library(arrow, warn.conflicts = FALSE) - -## ---- eval = FALSE------------------------------------------------------- -# # install.packages("remotes") -# remotes::install_github("apache/arrow/r") - -## ------------------------------------------------------------------------ -library(arrow, warn.conflicts = FALSE) -t1 <- int32() -t2 <- utf8() -t5 <- timestamp(TimeUnit$MILLI) - -t1 -t2 -t5 - -## ------------------------------------------------------------------------ -t6 <- list_of(t1) -t6 - -## ------------------------------------------------------------------------ -t7 <- struct(s0 = int32(), s3 = list_of(int16())) -t7 - -## ------------------------------------------------------------------------ -s <- schema( - field0 = int32(), - field1 = utf8(), - field3 = list_of(int32()) -) -s - -## ------------------------------------------------------------------------ -a <- array(1:10) -a - -## ------------------------------------------------------------------------ -# TODO: should this be an active like in python ? -# a$type rather than a$type() -a$type() - -## ------------------------------------------------------------------------ -a$length() -length(a) - -# TODO: should this be an active like in python ? -# a$null_count rather than a$null_count() -a$null_count() - -## ------------------------------------------------------------------------ -f <- factor(c("a", "b"), levels = c("a", "b", "c")) -a <- array(f) -a$type() -a$indices() -a$dictionary() -a - -## ------------------------------------------------------------------------ -tbl <- tibble::tibble( - f0 = 1:4, - f1 = c("foo", "bar", "baz", NA), - f2 = c(TRUE, NA, FALSE, NA) -) -batch <- record_batch(tbl) -batch$num_columns() -batch$num_rows() - -# convert a record batch back to a tibble -as_tibble(batch) - -## ------------------------------------------------------------------------ -batch$Slice(2) -batch$Slice(2, 1) - -## ------------------------------------------------------------------------ -tab <- table(tbl) -tab -tab$num_columns() -tab$num_rows() - -## ------------------------------------------------------------------------ -tab$column(0L) -tab$column(0L)$data() -tab$column(0L)$data()$chunks() - diff --git a/r/doc/arrow.Rmd b/r/doc/arrow.Rmd deleted file mode 100644 index 91ffe04213e..00000000000 --- a/r/doc/arrow.Rmd +++ /dev/null @@ -1,236 +0,0 @@ ---- -title: "arrow" -author: "Romain François" -date: "`r Sys.Date()`" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{arrow} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -library(arrow, warn.conflicts = FALSE) -``` - -# Install arrow - -## CRAN - -`arrow` is not yet available from CRAN - -## Installing from source - -The `arrow` package is currently only available from github. To install the development version, you first need to -install the C++ library so that `pkg-config` finds it: - -```shell -git clone https://github.com/apache/arrow.git -cd arrow/cpp && mkdir release && cd release - -# It is important to statically link to boost libraries -cmake .. -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off -make install -``` - -Then you can install the R package with [remotes::install_github()](https://remotes.r-lib.org/reference/install_github.html) - -```{r, eval = FALSE} -# install.packages("remotes") -remotes::install_github("apache/arrow/r") -``` - -This will install the correct versions of packages `arrow` depend on. - -# Development - -## System Requirements - -## Environment Setup and Build - -## Build and test - -## Developing on Windows - -# Memory and IO Interfaces - -# Data Type and In-Memory Data Model - -Apache Arrow defines columnar array data structures by composing type metadata with memory buffers. These data -structures are exposed in R as a set of interrelated [R6](https://r6.r-lib.org) classes. - - - Type metadata: Instances of `arrow::DataType`, which describe a logical array type - - Schemas: Instances of `arrow::Schema`, which describes a named collection of types. These - can be thought of as the column types in a table-like object. - - Arrays: Instances of `arrow::Array`, which are atomic, contiguous columnar data structures - composed from `arrow::Buffer` objects. - - Record Batches: Instances of `arrow::RecordBatch` which are a collection of `Array` objects - with a particular schema - - Tables: Instances of `arrow::Table`, a logical table data structures in which each column - consists of one or more `Array` objects of the same logical type. - -## Type metadata - -Apache Arrow defines language agnostic column-oriented data structures for array data. These include: - - - Fixed-length primitive types: numbers, booleans, data and times, fixed size binary and other values - that fit into a given number - - Variable length primitive types: binary, string - - Nested types: list, struct and union - - Dictionary type: An encoded categorical type - -Each logical type in `arrow` has a corresponding factory function for creating an instance of that type object in R. - -```{r} -library(arrow, warn.conflicts = FALSE) -t1 <- int32() -t2 <- utf8() -t5 <- timestamp(TimeUnit$MILLI) - -t1 -t2 -t5 -``` - -We use the name *logical* type because the *physical* storage may be the same for one or more types. For -example `int64`, `float64` and `timestamp[ms]` all occupy 64 bites per value. - -These objects are *metadata*, they are used for describing the data in arrays, schemas and record batches. - - - -Arrow supports *nested value types* like list, struct, and union. The `list_of()` function is the -factory for list types. - -```{r} -t6 <- list_of(t1) -t6 -``` - -A `struct` is a collection of named fields : - -```{r} -t7 <- struct(s0 = int32(), s3 = list_of(int16())) -t7 -``` - -## Schemas - -The `arrow::Schema` type is similar to the `struct` array type, it defines the column names and types in a record -batch or table data structure. The `schema()` factory function makes new `arrow::Schema` objects in R: - -```{r} -s <- schema( - field0 = int32(), - field1 = utf8(), - field3 = list_of(int32()) -) -s -``` - -It is fairly rare to create schemas directly. - -## Arrays - -For each data type, there is an accompanying array data structure for holding memory buffers -that define a single contiguous chunk of columnar array data. - -The `array()` function can be used to create `arrow::Array` instances, although you would -typically manipulate arrays from record batches and tables. - -```{r} -a <- array(1:10) -a -``` - -The `$type()` method gives the corresponding piece of type metadata: - -```{r} -# TODO: should this be an active like in python ? -# a$type rather than a$type() -a$type() -``` - -Each in-memory array has a known length and null count: - -```{r} -a$length() -length(a) - -# TODO: should this be an active like in python ? -# a$null_count rather than a$null_count() -a$null_count() -``` - -## Handling of missing values - -## List Arrays - -## Struct Arrays - -## Union Arrays - -## Dictionary Arrays - -The `arrow::Dictionary` type is a special array that is similar to an R factor. The -`array()` factory converts R factors to the appropriate type of dictionary array: - -```{r} -f <- factor(c("a", "b"), levels = c("a", "b", "c")) -a <- array(f) -a$type() -a$indices() -a$dictionary() -a -``` - -## RecordBatch - -A *Record Batch* in Apache Arrow is a collection of equal length array instances. The `record_batch()` function -may be used to convert a data frame to a RecordBatch. - -```{r} -tbl <- tibble::tibble( - f0 = 1:4, - f1 = c("foo", "bar", "baz", NA), - f2 = c(TRUE, NA, FALSE, NA) -) -batch <- record_batch(tbl) -batch$num_columns() -batch$num_rows() - -# convert a record batch back to a tibble -as_tibble(batch) -``` - -A record batch can be sliced (0-based) : - -```{r} -batch$Slice(2) -batch$Slice(2, 1) -``` - -## Table - -A *Table* is a set of record batches of the same schema, it can be created with the `arrow::table()` function: - -```{r} -tab <- table(tbl) -tab -tab$num_columns() -tab$num_rows() -``` - -The table columns are instances of `arrow::Column`, which is a container for one or more arrays -of the same type. - -```{r} -tab$column(0L) -tab$column(0L)$data() -tab$column(0L)$data()$chunks() -``` - diff --git a/r/doc/arrow.html b/r/doc/arrow.html deleted file mode 100644 index c54d7f3b65c..00000000000 --- a/r/doc/arrow.html +++ /dev/null @@ -1,540 +0,0 @@ - - - - - - - - - - - - - - - - -arrow - - - - - - - - - - - - - - - - - -

arrow

-

Romain François

-

2018-11-16

- - - -
-

Install arrow

-
-

CRAN

-

arrow is not yet available from CRAN

-
-
-

Installing from source

-

The arrow package is currently only available from github. To install the development version, you first need to install the C++ library so that pkg-config finds it:

-
git clone https://github.com/apache/arrow.git
-cd arrow/cpp && mkdir release && cd release
-
-# It is important to statically link to boost libraries
-cmake .. -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off
-make install
-

Then you can install the R package with remotes::install_github()

- -

This will install the correct versions of packages arrow depend on.

-
-
-
-

Development

-
-

System Requirements

-
-
-

Environment Setup and Build

-
-
-

Build and test

-
-
-

Developing on Windows

-
-
-
-

Memory and IO Interfaces

-
-
-

Data Type and In-Memory Data Model

-

Apache Arrow defines columnar array data structures by composing type metadata with memory buffers. These data structures are exposed in R as a set of interrelated R6 classes.

-
    -
  • Type metadata: Instances of arrow::DataType, which describe a logical array type
  • -
  • Schemas: Instances of arrow::Schema, which describes a named collection of types. These can be thought of as the column types in a table-like object.
  • -
  • Arrays: Instances of arrow::Array, which are atomic, contiguous columnar data structures composed from arrow::Buffer objects.
  • -
  • Record Batches: Instances of arrow::RecordBatch which are a collection of Array objects with a particular schema
  • -
  • Tables: Instances of arrow::Table, a logical table data structures in which each column consists of one or more Array objects of the same logical type.
  • -
-
-

Type metadata

-

Apache Arrow defines language agnostic column-oriented data structures for array data. These include:

-
    -
  • Fixed-length primitive types: numbers, booleans, data and times, fixed size binary and other values that fit into a given number
  • -
  • Variable length primitive types: binary, string
  • -
  • Nested types: list, struct and union
  • -
  • Dictionary type: An encoded categorical type
  • -
-

Each logical type in arrow has a corresponding factory function for creating an instance of that type object in R.

- -

We use the name logical type because the physical storage may be the same for one or more types. For example int64, float64 and timestamp[ms] all occupy 64 bites per value.

-

These objects are metadata, they are used for describing the data in arrays, schemas and record batches.

- -

Arrow supports nested value types like list, struct, and union. The list_of() function is the factory for list types.

- -

A struct is a collection of named fields :

- -
-
-

Schemas

-

The arrow::Schema type is similar to the struct array type, it defines the column names and types in a record batch or table data structure. The schema() factory function makes new arrow::Schema objects in R:

- -

It is fairly rare to create schemas directly.

-
-
-

Arrays

-

For each data type, there is an accompanying array data structure for holding memory buffers that define a single contiguous chunk of columnar array data.

-

The array() function can be used to create arrow::Array instances, although you would typically manipulate arrays from record batches and tables.

- -

The $type() method gives the corresponding piece of type metadata:

- -

Each in-memory array has a known length and null count:

- -
-
-

Handling of missing values

-
-
-

List Arrays

-
-
-

Struct Arrays

-
-
-

Union Arrays

-
-
-

Dictionary Arrays

-

The arrow::Dictionary type is a special array that is similar to an R factor. The array() factory converts R factors to the appropriate type of dictionary array:

- -
- -
-

Table

-

A Table is a set of record batches of the same schema, it can be created with the arrow::table() function:

- -

The table columns are instances of arrow::Column, which is a container for one or more arrays of the same type.

- -
-
- - - - - - - - From cc9669519db8ce5b03194c1181c02914545470f5 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 15:40:24 +0100 Subject: [PATCH 09/17] num_rows, num_columns, and schema become active rather than methods (as in pyarrow api) --- r/R/RecordBatch.R | 10 +++++++--- r/R/Table.R | 9 ++++++--- r/R/write_arrow.R | 4 ++-- r/tests/testthat/test-RecordBatch.R | 14 +++++++------- r/tests/testthat/test-read-write.R | 8 ++++---- r/tests/testthat/test-read_record_batch.R | 8 ++++---- r/tests/testthat/test-schema.R | 2 +- 7 files changed, 31 insertions(+), 24 deletions(-) diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index 22618a4ad74..9c78684768f 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -31,9 +31,6 @@ #' @name arrow__RecordBatch `arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = `arrow::Object`, public = list( - num_columns = function() RecordBatch__num_columns(self), - num_rows = function() RecordBatch__num_rows(self), - schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)), column = function(i) shared_ptr(`arrow::Array`, RecordBatch__column(self, i)), column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), @@ -62,6 +59,13 @@ assert_that(identical(self$schema()$names, target_schema$names), msg = "incompatible schemas") shared_ptr(`arrow::RecordBatch`, RecordBatch__cast(self, target_schema, options)) } + ), + + active = list( + num_columns = function() RecordBatch__num_columns(self), + num_rows = function() RecordBatch__num_rows(self), + schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)) + # TODO: columns ) ) diff --git a/r/R/Table.R b/r/R/Table.R index 7518e9d01be..709ab1fcab0 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -19,9 +19,6 @@ `arrow::Table` <- R6Class("arrow::Table", inherit = `arrow::Object`, public = list( - num_columns = function() Table__num_columns(self), - num_rows = function() Table__num_rows(self), - schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)), column = function(i) shared_ptr(`arrow::Column`, Table__column(self, i)), serialize = function(output_stream, ...) write_table(self, output_stream, ...), @@ -32,6 +29,12 @@ assert_that(identical(self$schema()$names, target_schema$names), msg = "incompatible schemas") shared_ptr(`arrow::Table`, Table__cast(self, target_schema, options)) } + ), + + active = list( + num_columns = function() Table__num_columns(self), + num_rows = function() Table__num_rows(self), + schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)) ) ) diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index 42d5d85ff3d..5fc684771e5 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -67,14 +67,14 @@ write_arrow <- function(x, stream, ...) { assert_that(length(stream) == 1L) x <- to_arrow(x) file_stream <- close_on_exit(FileOutputStream(stream)) - file_writer <- close_on_exit(RecordBatchFileWriter(file_stream, x$schema())) + file_writer <- close_on_exit(RecordBatchFileWriter(file_stream, x$schema)) write_arrow(x, file_writer, ...) } #' @export `write_arrow.raw` <- function(x, stream, ...) { x <- to_arrow(x) - schema <- x$schema() + schema <- x$schema # how many bytes do we need mock_stream <- MockOutputStream() diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index a2f1218243a..19f2ef28c84 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -28,15 +28,15 @@ test_that("RecordBatch", { expect_true(batch == batch) expect_equal( - batch$schema(), + batch$schema, schema( int = int32(), dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), array(letters[1:10])) ) ) - expect_equal(batch$num_columns(), 5L) - expect_equal(batch$num_rows(), 10L) + expect_equal(batch$num_columns, 5L) + expect_equal(batch$num_rows, 10L) expect_equal(batch$column_name(0), "int") expect_equal(batch$column_name(1), "dbl") expect_equal(batch$column_name(2), "lgl") @@ -72,7 +72,7 @@ test_that("RecordBatch", { batch2 <- batch$RemoveColumn(0) expect_equal( - batch2$schema(), + batch2$schema, schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), array(letters[1:10]))) ) expect_equal(batch2$column(0), batch$column(1)) @@ -95,10 +95,10 @@ test_that("RecordBatch with 0 rows are supported", { ) batch <- record_batch(tbl) - expect_equal(batch$num_columns(), 5L) - expect_equal(batch$num_rows(), 0L) + expect_equal(batch$num_columns, 5L) + expect_equal(batch$num_rows, 0L) expect_equal( - batch$schema(), + batch$schema, schema( int = int32(), dbl = float64(), diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index 2af718ebe56..47268d71562 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -25,8 +25,8 @@ test_that("arrow::table round trip", { ) tab <- arrow::table(tbl) - expect_equal(tab$num_columns(), 3L) - expect_equal(tab$num_rows(), 10L) + expect_equal(tab$num_columns, 3L) + expect_equal(tab$num_rows, 10L) # arrow::Column col_int <- tab$column(0) @@ -99,8 +99,8 @@ test_that("arrow::table round trip handles NA in integer and numeric", { ) tab <- arrow::table(tbl) - expect_equal(tab$num_columns(), 3L) - expect_equal(tab$num_rows(), 10L) + expect_equal(tab$num_columns, 3L) + expect_equal(tab$num_rows, 10L) expect_equal(tab$column(0)$length(), 10L) expect_equal(tab$column(1)$length(), 10L) diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read_record_batch.R index 69c49b278d4..8477b7a4c3d 100644 --- a/r/tests/testthat/test-read_record_batch.R +++ b/r/tests/testthat/test-read_record_batch.R @@ -25,7 +25,7 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { )) tf <- local_tempfile() - writer <- RecordBatchFileWriter(tf, tab$schema()) + writer <- RecordBatchFileWriter(tf, tab$schema) expect_is(writer, "arrow::ipc::RecordBatchFileWriter") writer$write_table(tab) writer$close() @@ -33,7 +33,7 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { expect_equal(tab, tab2) stream <- FileOutputStream(tf) - writer <- RecordBatchFileWriter(stream, tab$schema()) + writer <- RecordBatchFileWriter(stream, tab$schema) expect_is(writer, "arrow::ipc::RecordBatchFileWriter") writer$write_table(tab) writer$close() @@ -48,7 +48,7 @@ test_that("read_record_batch() handles (raw|Buffer|InputStream, Schema) (ARROW-3 chr = letters[1:10] ) batch <- record_batch(tbl) - schema <- batch$schema() + schema <- batch$schema raw <- batch$serialize() batch2 <- read_record_batch(raw, schema) @@ -62,7 +62,7 @@ test_that("read_record_batch() handles (raw|Buffer|InputStream, Schema) (ARROW-3 test_that("read_record_batch() can handle (Message, Schema) parameters (ARROW-3499)", { batch <- record_batch(tibble::tibble(x = 1:10)) - schema <- batch$schema() + schema <- batch$schema raw <- batch$serialize() stream <- close_on_exit(BufferReader(raw)) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 5c9be332809..42a8548cec3 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -24,7 +24,7 @@ test_that("reading schema from Buffer", { expect_is(batch, "arrow::RecordBatch") stream <- BufferOutputStream() - writer <- RecordBatchStreamWriter(stream, batch$schema()) + writer <- RecordBatchStreamWriter(stream, batch$schema) expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") writer$close() From 59000207949d902ea30aba1de3666c55a0cc8d1e Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 16:11:26 +0100 Subject: [PATCH 10/17] + Table$columns(), RecordBatch$columns() --- r/R/RcppExports.R | 8 ++++++++ r/R/RecordBatch.R | 4 ++-- r/R/Table.R | 3 ++- r/src/RcppExports.cpp | 24 ++++++++++++++++++++++++ r/src/recordbatch.cpp | 11 +++++++++++ r/src/table.cpp | 11 +++++++++++ 6 files changed, 58 insertions(+), 3 deletions(-) diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index 9188b1dc2bf..ccf854927b7 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -629,6 +629,10 @@ RecordBatch__schema <- function(x) { .Call(`_arrow_RecordBatch__schema`, x) } +RecordBatch__columns <- function(batch) { + .Call(`_arrow_RecordBatch__columns`, batch) +} + RecordBatch__column <- function(batch, i) { .Call(`_arrow_RecordBatch__column`, batch, i) } @@ -761,3 +765,7 @@ Table__column <- function(table, i) { .Call(`_arrow_Table__column`, table, i) } +Table__columns <- function(table) { + .Call(`_arrow_Table__columns`, table) +} + diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index 9c78684768f..6089427faec 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -64,8 +64,8 @@ active = list( num_columns = function() RecordBatch__num_columns(self), num_rows = function() RecordBatch__num_rows(self), - schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)) - # TODO: columns + schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)), + columns = function() map(RecordBatch__columns(self), shared_ptr, `arrow::Array`) ) ) diff --git a/r/R/Table.R b/r/R/Table.R index 709ab1fcab0..9ec5f614dbb 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -34,7 +34,8 @@ active = list( num_columns = function() Table__num_columns(self), num_rows = function() Table__num_rows(self), - schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)) + schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)), + columns = function() map(Table__columns(self), shared_ptr, class = `arrow::Column`) ) ) diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index 5a619ae9c35..bca4eafdee4 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -1753,6 +1753,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// RecordBatch__columns +arrow::ArrayVector RecordBatch__columns(const std::shared_ptr& batch); +RcppExport SEXP _arrow_RecordBatch__columns(SEXP batchSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type batch(batchSEXP); + rcpp_result_gen = Rcpp::wrap(RecordBatch__columns(batch)); + return rcpp_result_gen; +END_RCPP +} // RecordBatch__column std::shared_ptr RecordBatch__column(const std::shared_ptr& batch, int i); RcppExport SEXP _arrow_RecordBatch__column(SEXP batchSEXP, SEXP iSEXP) { @@ -2127,6 +2138,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// Table__columns +std::vector> Table__columns(const std::shared_ptr& table); +RcppExport SEXP _arrow_Table__columns(SEXP tableSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type table(tableSEXP); + rcpp_result_gen = Rcpp::wrap(Table__columns(table)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 1}, @@ -2286,6 +2308,7 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, {"_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, {"_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, + {"_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, {"_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, {"_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 1}, {"_arrow_RecordBatch__from_dataframe", (DL_FUNC) &_arrow_RecordBatch__from_dataframe, 1}, @@ -2319,6 +2342,7 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, {"_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 1}, {"_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, + {"_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, {NULL, NULL, 0} }; diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index db03fabfd94..b6bee7ae539 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -40,6 +40,17 @@ std::shared_ptr RecordBatch__schema( return x->schema(); } +// [[Rcpp::export]] +arrow::ArrayVector RecordBatch__columns( + const std::shared_ptr& batch) { + auto nc = batch->num_columns(); + ArrayVector res(nc); + for (int i = 0; i < nc; i++) { + res[i] = batch->column(i); + } + return res; +} + // [[Rcpp::export]] std::shared_ptr RecordBatch__column( const std::shared_ptr& batch, int i) { diff --git a/r/src/table.cpp b/r/src/table.cpp index 4bdff167db9..f4ebd0466b9 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -67,3 +67,14 @@ std::shared_ptr Table__column(const std::shared_ptr int i) { return table->column(i); } + +// [[Rcpp::export]] +std::vector> Table__columns( + const std::shared_ptr& table) { + auto nc = table->num_columns(); + std::vector> res(nc); + for (int i = 0; i < nc; i++) { + res[i] = table->column(i); + } + return res; +} From f62a22d311836f1b377fa30de132e1977f28eb1f Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 16:39:50 +0100 Subject: [PATCH 11/17] Move more things to active rather than methods, to match pyarrow --- r/R/ChunkedArray.R | 10 +++-- r/R/Column.R | 7 +++- r/R/array.R | 8 ++-- r/R/message.R | 4 +- r/tests/testthat/test-Array.R | 54 ++++++++++++------------- r/tests/testthat/test-RecordBatch.R | 10 ++--- r/tests/testthat/test-chunkedarray.R | 58 +++++++++++++-------------- r/tests/testthat/test-message.R | 2 +- r/tests/testthat/test-messagereader.R | 4 +- r/tests/testthat/test-read-write.R | 42 +++++++++---------- r/tests/testthat/test-schema.R | 4 +- 11 files changed, 105 insertions(+), 98 deletions(-) diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R index 188ef35cf16..46e40766290 100644 --- a/r/R/ChunkedArray.R +++ b/r/R/ChunkedArray.R @@ -32,11 +32,7 @@ `arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = `arrow::Object`, public = list( length = function() ChunkedArray__length(self), - null_count = function() ChunkedArray__null_count(self), - num_chunks = function() ChunkedArray__num_chunks(self), chunk = function(i) shared_ptr(`arrow::Array`, ChunkedArray__chunk(self, i)), - chunks = function() purrr::map(ChunkedArray__chunks(self), shared_ptr, class = `arrow::Array`), - type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self)), as_vector = function() ChunkedArray__as_vector(self), Slice = function(offset, length = NULL){ if (is.null(length)) { @@ -50,6 +46,12 @@ assert_that(inherits(options, "arrow::compute::CastOptions")) shared_ptr(`arrow::ChunkedArray`, ChunkedArray__cast(self, target_type, options)) } + ), + active = list( + null_count = function() ChunkedArray__null_count(self), + num_chunks = function() ChunkedArray__num_chunks(self), + chunks = function() map(ChunkedArray__chunks(self), shared_ptr, class = `arrow::Array`), + type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self)) ) ) diff --git a/r/R/Column.R b/r/R/Column.R index cf37eb0956c..fb8af1ea315 100644 --- a/r/R/Column.R +++ b/r/R/Column.R @@ -32,8 +32,11 @@ `arrow::Column` <- R6Class("arrow::Column", inherit = `arrow::Object`, public = list( length = function() Column__length(self), - null_count = function() Column__null_count(self), - type = function() `arrow::DataType`$dispatch(Column__type(self)), data = function() shared_ptr(`arrow::ChunkedArray`, Column__data(self)) + ), + + active = list( + null_count = function() Column__null_count(self), + type = function() `arrow::DataType`$dispatch(Column__type(self)) ) ) diff --git a/r/R/array.R b/r/R/array.R index 26a2e6f11f2..5d70e59093b 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -73,9 +73,6 @@ IsNull = function(i) Array__IsNull(self, i), IsValid = function(i) Array__IsValid(self, i), length = function() Array__length(self), - offset = function() Array__offset(self), - null_count = function() Array__null_count(self), - type = function() `arrow::DataType`$dispatch(Array__type(self)), type_id = function() Array__type_id(self), Equals = function(other) Array__Equals(self, other), ApproxEquals = function(othet) Array__ApproxEquals(self, other), @@ -98,6 +95,11 @@ assert_that(inherits(options, "arrow::compute::CastOptions")) `arrow::Array`$dispatch(Array__cast(self, target_type, options)) } + ), + active = list( + null_count = function() Array__null_count(self), + type = function() `arrow::DataType`$dispatch(Array__type(self)), + offset = function() Array__offset(self) ) ) diff --git a/r/R/message.R b/r/R/message.R index d808e65963b..93c90c09763 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -36,10 +36,10 @@ ipc___Message__Equals(self, other) }, body_length = function() ipc___Message__body_length(self), - Verify = function() ipc___Message__Verify(self), - type = function() ipc___Message__type(self) + Verify = function() ipc___Message__Verify(self) ), active = list( + type = function() ipc___Message__type(self), metadata = function() shared_ptr(`arrow::Buffer`, ipc___Message__metadata(self)), body = function() shared_ptr(`arrow::Buffer`, ipc___Message__body(self)) ) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index cbf67e711d1..e2612e42f0d 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -19,35 +19,35 @@ context("arrow::Array") test_that("Array", { x <- array(1:10, 1:10, 1:5) - expect_equal(x$type(), int32()) + expect_equal(x$type, int32()) expect_equal(x$length(), 25L) expect_equal(x$as_vector(), c(1:10, 1:10, 1:5)) y <- x$Slice(10) - expect_equal(y$type(), int32()) + expect_equal(y$type, int32()) expect_equal(y$length(), 15L) expect_equal(y$as_vector(), c(1:10, 1:5)) expect_true(x$RangeEquals(y, 10, 24, 0)) z <- x$Slice(10, 5) - expect_equal(z$type(), int32()) + expect_equal(z$type, int32()) expect_equal(z$length(), 5L) expect_equal(z$as_vector(), c(1:5)) expect_true(x$RangeEquals(z, 10, 15, 0)) x_dbl <- array(c(1,2,3), c(4,5,6)) - expect_equal(x_dbl$type(), float64()) + expect_equal(x_dbl$type, float64()) expect_equal(x_dbl$length(), 6L) expect_equal(x_dbl$as_vector(), as.numeric(1:6)) y_dbl <- x_dbl$Slice(3) - expect_equal(y_dbl$type(), float64()) + expect_equal(y_dbl$type, float64()) expect_equal(y_dbl$length(), 3L) - expect_equal(y_dbl$offset(), 3L) + expect_equal(y_dbl$offset, 3L) expect_equal(y_dbl$as_vector(), as.numeric(4:6)) z_dbl <- x_dbl$Slice(3, 2) - expect_equal(z_dbl$type(), float64()) + expect_equal(z_dbl$type, float64()) expect_equal(z_dbl$length(), 2L) expect_equal(z_dbl$as_vector(), as.numeric(4:5)) }) @@ -138,7 +138,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { f <- factor(c("itsy", "bitsy", "spider", "spider")) arr_fac <- array(f) expect_equal(arr_fac$length(), 4L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type(), int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -147,7 +147,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 3L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type(), int8()) expect_equal(sl$as_vector(), f[2:4]) # with NA @@ -155,7 +155,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { # TODO: rm the suppressWarnings when https://github.com/r-lib/vctrs/issues/109 arr_fac <- suppressWarnings(array(f)) expect_equal(arr_fac$length(), 5L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type(), int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -165,7 +165,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 4L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type(), int8()) expect_equal(sl$as_vector(), f[2:5]) }) @@ -174,7 +174,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { f <- ordered(c("itsy", "bitsy", "spider", "spider")) arr_fac <- array(f) expect_equal(arr_fac$length(), 4L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type(), int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -183,7 +183,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 3L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type(), int8()) expect_equal(sl$as_vector(), f[2:4]) # with NA @@ -191,7 +191,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { # TODO: rm the suppressWarnings when https://github.com/r-lib/vctrs/issues/109 arr_fac <- suppressWarnings(array(f)) expect_equal(arr_fac$length(), 5L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type(), int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -201,27 +201,27 @@ test_that("Array supports ordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 4L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type(), int8()) expect_equal(sl$as_vector(), f[2:5]) }) test_that("array supports Date (ARROW-3340)", { d <- Sys.Date() + 1:10 a <- array(d) - expect_equal(a$type(), date32()) + expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) d[5] <- NA a <- array(d) - expect_equal(a$type(), date32()) + expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) expect_true(a$IsNull(4)) d2 <- d + .5 a <- array(d2) - expect_equal(a$type(), date32()) + expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) expect_true(a$IsNull(4)) @@ -230,15 +230,15 @@ test_that("array supports Date (ARROW-3340)", { test_that("array supports POSIXct (ARROW-3340)", { times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 a <- array(times) - expect_equal(a$type()$name(), "timestamp") - expect_equal(a$type()$unit(), unclass(TimeUnit$MICRO)) + expect_equal(a$type$name(), "timestamp") + expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) expect_equal(as.numeric(a$as_vector()), as.numeric(times)) times[5] <- NA a <- array(times) - expect_equal(a$type()$name(), "timestamp") - expect_equal(a$type()$unit(), unclass(TimeUnit$MICRO)) + expect_equal(a$type$name(), "timestamp") + expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) expect_equal(as.numeric(a$as_vector()), as.numeric(times)) expect_true(a$IsNull(4)) @@ -247,13 +247,13 @@ test_that("array supports POSIXct (ARROW-3340)", { test_that("array supports integer64", { x <- bit64::as.integer64(1:10) a <- array(x) - expect_equal(a$type(), int64()) + expect_equal(a$type, int64()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), x) x[4] <- NA a <- array(x) - expect_equal(a$type(), int64()) + expect_equal(a$type, int64()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), x) expect_true(a$IsNull(3L)) @@ -268,12 +268,12 @@ test_that("array$as_vector() correctly handles all NA inte64 (ARROW-3795)", { test_that("array supports difftime", { time <- hms::hms(56, 34, 12) a <- array(time, time) - expect_equal(a$type(), time32(unit = TimeUnit$SECOND)) + expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_equal(a$as_vector(), c(time, time)) a <- array(time, NA) - expect_equal(a$type(), time32(unit = TimeUnit$SECOND)) + expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_true(a$IsNull(1)) expect_equal(a$as_vector()[1], time) @@ -284,7 +284,7 @@ test_that("support for NaN (ARROW-3615)", { x <- c(1, NA, NaN, -1) y <- array(x) expect_true(y$IsValid(2)) - expect_equal(y$null_count(), 1L) + expect_equal(y$null_count, 1L) }) test_that("array ignores the type argument (ARROW-3784)", { diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index 19f2ef28c84..c8e91e78081 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -47,27 +47,27 @@ test_that("RecordBatch", { col_int <- batch$column(0) expect_true(inherits(col_int, 'arrow::Array')) expect_equal(col_int$as_vector(), tbl$int) - expect_equal(col_int$type(), int32()) + expect_equal(col_int$type, int32()) col_dbl <- batch$column(1) expect_true(inherits(col_dbl, 'arrow::Array')) expect_equal(col_dbl$as_vector(), tbl$dbl) - expect_equal(col_dbl$type(), float64()) + expect_equal(col_dbl$type, float64()) col_lgl <- batch$column(2) expect_true(inherits(col_dbl, 'arrow::Array')) expect_equal(col_lgl$as_vector(), tbl$lgl) - expect_equal(col_lgl$type(), boolean()) + expect_equal(col_lgl$type, boolean()) col_chr <- batch$column(3) expect_true(inherits(col_chr, 'arrow::Array')) expect_equal(col_chr$as_vector(), tbl$chr) - expect_equal(col_chr$type(), utf8()) + expect_equal(col_chr$type, utf8()) col_fct <- batch$column(4) expect_true(inherits(col_fct, 'arrow::Array')) expect_equal(col_fct$as_vector(), tbl$fct) - expect_equal(col_fct$type(), dictionary(int32(), array(letters[1:10]))) + expect_equal(col_fct$type, dictionary(int32(), array(letters[1:10]))) batch2 <- batch$RemoveColumn(0) diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunkedarray.R index 8bca6201477..188b32b272c 100644 --- a/r/tests/testthat/test-chunkedarray.R +++ b/r/tests/testthat/test-chunkedarray.R @@ -19,38 +19,38 @@ context("arrow::ChunkedArray") test_that("ChunkedArray", { x <- chunked_array(1:10, 1:10, 1:5) - expect_equal(x$type(), int32()) - expect_equal(x$num_chunks(), 3L) + expect_equal(x$type, int32()) + expect_equal(x$num_chunks, 3L) expect_equal(x$length(), 25L) expect_equal(x$as_vector(), c(1:10, 1:10, 1:5)) y <- x$Slice(8) - expect_equal(y$type(), int32()) - expect_equal(y$num_chunks(), 3L) + expect_equal(y$type, int32()) + expect_equal(y$num_chunks, 3L) expect_equal(y$length(), 17L) expect_equal(y$as_vector(), c(9:10, 1:10, 1:5)) z <- x$Slice(8, 5) - expect_equal(z$type(), int32()) - expect_equal(z$num_chunks(), 2L) + expect_equal(z$type, int32()) + expect_equal(z$num_chunks, 2L) expect_equal(z$length(), 5L) expect_equal(z$as_vector(), c(9:10, 1:3)) x_dbl <- chunked_array(c(1,2,3), c(4,5,6)) - expect_equal(x_dbl$type(), float64()) - expect_equal(x_dbl$num_chunks(), 2L) + expect_equal(x_dbl$type, float64()) + expect_equal(x_dbl$num_chunks, 2L) expect_equal(x_dbl$length(), 6L) expect_equal(x_dbl$as_vector(), as.numeric(1:6)) y_dbl <- x_dbl$Slice(2) - expect_equal(y_dbl$type(), float64()) - expect_equal(y_dbl$num_chunks(), 2L) + expect_equal(y_dbl$type, float64()) + expect_equal(y_dbl$num_chunks, 2L) expect_equal(y_dbl$length(), 4L) expect_equal(y_dbl$as_vector(), as.numeric(3:6)) z_dbl <- x_dbl$Slice(2, 2) - expect_equal(z_dbl$type(), float64()) - expect_equal(z_dbl$num_chunks(), 2L) + expect_equal(z_dbl$type, float64()) + expect_equal(z_dbl$num_chunks, 2L) expect_equal(z_dbl$length(), 2L) expect_equal(z_dbl$as_vector(), as.numeric(3:4)) }) @@ -58,19 +58,19 @@ test_that("ChunkedArray", { test_that("ChunkedArray handles !!! splicing", { data <- list(1, 2, 3) x <- chunked_array(!!!data) - expect_equal(x$type(), float64()) - expect_equal(x$num_chunks(), 3L) + expect_equal(x$type, float64()) + expect_equal(x$num_chunks, 3L) }) test_that("ChunkedArray handles NA", { data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L)) x <- chunked_array(!!!data) - expect_equal(x$type(), int32()) - expect_equal(x$num_chunks(), 3L) + expect_equal(x$type, int32()) + expect_equal(x$num_chunks, 3L) expect_equal(x$length(), 25L) expect_equal(x$as_vector(), c(1:10, c(NA, 2:10), c(1:3, NA, 5))) - chunks <- x$chunks() + chunks <- x$chunks expect_equal(Array__Mask(chunks[[1]]), !is.na(data[[1]])) expect_equal(Array__Mask(chunks[[2]]), !is.na(data[[2]])) expect_equal(Array__Mask(chunks[[3]]), !is.na(data[[3]])) @@ -81,10 +81,10 @@ test_that("ChunkedArray supports logical vectors (ARROW-3341)", { data <- purrr::rerun(3, sample(c(TRUE, FALSE, NA), 100, replace = TRUE)) arr_lgl <- chunked_array(!!!data) expect_equal(arr_lgl$length(), 300L) - expect_equal(arr_lgl$null_count(), sum(unlist(map(data, is.na)))) + expect_equal(arr_lgl$null_count, sum(unlist(map(data, is.na)))) expect_identical(arr_lgl$as_vector(), purrr::flatten_lgl(data)) - chunks <- arr_lgl$chunks() + chunks <- arr_lgl$chunks expect_identical(data[[1]], chunks[[1]]$as_vector()) expect_identical(data[[2]], chunks[[2]]$as_vector()) expect_identical(data[[3]], chunks[[3]]$as_vector()) @@ -94,10 +94,10 @@ test_that("ChunkedArray supports logical vectors (ARROW-3341)", { data <- purrr::rerun(3, sample(c(TRUE, FALSE), 100, replace = TRUE)) arr_lgl <- chunked_array(!!!data) expect_equal(arr_lgl$length(), 300L) - expect_equal(arr_lgl$null_count(), sum(unlist(map(data, is.na)))) + expect_equal(arr_lgl$null_count, sum(unlist(map(data, is.na)))) expect_identical(arr_lgl$as_vector(), purrr::flatten_lgl(data)) - chunks <- arr_lgl$chunks() + chunks <- arr_lgl$chunks expect_identical(data[[1]], chunks[[1]]$as_vector()) expect_identical(data[[2]], chunks[[2]]$as_vector()) expect_identical(data[[3]], chunks[[3]]$as_vector()) @@ -112,10 +112,10 @@ test_that("ChunkedArray supports character vectors (ARROW-3339)", { ) arr_chr <- chunked_array(!!!data) expect_equal(arr_chr$length(), length(unlist(data))) - expect_equal(arr_chr$null_count(), 1L) + expect_equal(arr_chr$null_count, 1L) expect_equal(arr_chr$as_vector(), purrr::flatten_chr(data)) - chunks <- arr_chr$chunks() + chunks <- arr_chr$chunks expect_equal(data, purrr::map(chunks, ~.$as_vector())) }) @@ -123,14 +123,14 @@ test_that("ChunkedArray supports factors (ARROW-3716)", { f <- factor(c("itsy", "bitsy", "spider", "spider")) arr_fac <- chunked_array(f, f, f) expect_equal(arr_fac$length(), 12L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type(), int8()) expect_identical(arr_fac$as_vector(), vctrs::vec_c(f, f, f)) }) test_that("ChunkedArray supports dates (ARROW-3716)", { d <- Sys.Date() + 1:10 a <- chunked_array(d, d) - expect_equal(a$type(), date32()) + expect_equal(a$type, date32()) expect_equal(a$length(), 20L) expect_equal(a$as_vector(), c(d, d)) }) @@ -138,8 +138,8 @@ test_that("ChunkedArray supports dates (ARROW-3716)", { test_that("ChunkedArray supports POSIXct (ARROW-3716)", { times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 a <- chunked_array(times, times) - expect_equal(a$type()$name(), "timestamp") - expect_equal(a$type()$unit(), unclass(TimeUnit$MICRO)) + expect_equal(a$type$name(), "timestamp") + expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 20L) expect_equal(as.numeric(a$as_vector()), as.numeric(c(times, times))) }) @@ -147,7 +147,7 @@ test_that("ChunkedArray supports POSIXct (ARROW-3716)", { test_that("ChunkedArray supports integer64 (ARROW-3716)", { x <- bit64::as.integer64(1:10) a <- chunked_array(x, x) - expect_equal(a$type(), int64()) + expect_equal(a$type, int64()) expect_equal(a$length(), 20L) expect_equal(a$as_vector(), c(x,x)) }) @@ -155,7 +155,7 @@ test_that("ChunkedArray supports integer64 (ARROW-3716)", { test_that("ChunkedArray supports difftime", { time <- hms::hms(56, 34, 12) a <- chunked_array(time, time) - expect_equal(a$type(), time32(unit = TimeUnit$SECOND)) + expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_equal(a$as_vector(), c(time, time)) }) diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R index 6f9e94da2ad..3fe5829f869 100644 --- a/r/tests/testthat/test-message.R +++ b/r/tests/testthat/test-message.R @@ -24,7 +24,7 @@ test_that("read_message can read from input stream", { message <- read_message(stream) expect_is(message, "arrow::ipc::Message") - expect_equal(message$type(), MessageType$RECORD_BATCH) + expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "arrow::Buffer") expect_is(message$metadata, "arrow::Buffer") diff --git a/r/tests/testthat/test-messagereader.R b/r/tests/testthat/test-messagereader.R index 69c2036cf71..5ff8277625d 100644 --- a/r/tests/testthat/test-messagereader.R +++ b/r/tests/testthat/test-messagereader.R @@ -25,7 +25,7 @@ test_that("MessageReader can be created from raw vectors", { message <- reader$ReadNextMessage() expect_is(message, "arrow::ipc::Message") - expect_equal(message$type(), MessageType$RECORD_BATCH) + expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "arrow::Buffer") expect_is(message$metadata, "arrow::Buffer") @@ -45,7 +45,7 @@ test_that("MessageReader can be created from input stream", { message <- reader$ReadNextMessage() expect_is(message, "arrow::ipc::Message") - expect_equal(message$type(), MessageType$RECORD_BATCH) + expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "arrow::Buffer") expect_is(message$metadata, "arrow::Buffer") diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index 47268d71562..ffc14eba72b 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -31,18 +31,18 @@ test_that("arrow::table round trip", { # arrow::Column col_int <- tab$column(0) expect_equal(col_int$length(), 10L) - expect_equal(col_int$null_count(), 0L) - expect_equal(col_int$type(), int32()) + expect_equal(col_int$null_count, 0L) + expect_equal(col_int$type, int32()) # arrow::ChunkedArray chunked_array_int <- col_int$data() expect_equal(chunked_array_int$length(), 10L) - expect_equal(chunked_array_int$null_count(), 0L) + expect_equal(chunked_array_int$null_count, 0L) expect_equal(chunked_array_int$as_vector(), tbl$int) # arrow::Array - chunks_int <- chunked_array_int$chunks() - expect_equal(length(chunks_int), chunked_array_int$num_chunks()) + chunks_int <- chunked_array_int$chunks + expect_equal(length(chunks_int), chunked_array_int$num_chunks) for( i in seq_along(chunks_int)){ expect_equal(chunked_array_int$chunk(i-1L), chunks_int[[i]]) } @@ -50,18 +50,18 @@ test_that("arrow::table round trip", { # arrow::Column col_dbl <- tab$column(1) expect_equal(col_dbl$length(), 10L) - expect_equal(col_dbl$null_count(), 0L) - expect_equal(col_dbl$type(), float64()) + expect_equal(col_dbl$null_count, 0L) + expect_equal(col_dbl$type, float64()) # arrow::ChunkedArray chunked_array_dbl <- col_dbl$data() expect_equal(chunked_array_dbl$length(), 10L) - expect_equal(chunked_array_dbl$null_count(), 0L) + expect_equal(chunked_array_dbl$null_count, 0L) expect_equal(chunked_array_dbl$as_vector(), tbl$dbl) # arrow::Array - chunks_dbl <- chunked_array_dbl$chunks() - expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks()) + chunks_dbl <- chunked_array_dbl$chunks + expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks) for( i in seq_along(chunks_dbl)){ expect_equal(chunked_array_dbl$chunk(i-1L), chunks_dbl[[i]]) } @@ -69,18 +69,18 @@ test_that("arrow::table round trip", { # arrow::Colmumn col_raw <- tab$column(2) expect_equal(col_raw$length(), 10L) - expect_equal(col_raw$null_count(), 0L) - expect_equal(col_raw$type(), int8()) + expect_equal(col_raw$null_count, 0L) + expect_equal(col_raw$type, int8()) # arrow::ChunkedArray chunked_array_raw <- col_raw$data() expect_equal(chunked_array_raw$length(), 10L) - expect_equal(chunked_array_raw$null_count(), 0L) + expect_equal(chunked_array_raw$null_count, 0L) expect_equal(chunked_array_raw$as_vector(), tbl$raw) # arrow::Array - chunks_raw <- chunked_array_raw$chunks() - expect_equal(length(chunks_raw), chunked_array_raw$num_chunks()) + chunks_raw <- chunked_array_raw$chunks + expect_equal(length(chunks_raw), chunked_array_raw$num_chunks) for( i in seq_along(chunks_raw)){ expect_equal(chunked_array_raw$chunk(i-1L), chunks_raw[[i]]) } @@ -106,13 +106,13 @@ test_that("arrow::table round trip handles NA in integer and numeric", { expect_equal(tab$column(1)$length(), 10L) expect_equal(tab$column(2)$length(), 10L) - expect_equal(tab$column(0)$null_count(), 1L) - expect_equal(tab$column(1)$null_count(), 2L) - expect_equal(tab$column(2)$null_count(), 0L) + expect_equal(tab$column(0)$null_count, 1L) + expect_equal(tab$column(1)$null_count, 2L) + expect_equal(tab$column(2)$null_count, 0L) - expect_equal(tab$column(0)$type(), int32()) - expect_equal(tab$column(1)$type(), float64()) - expect_equal(tab$column(2)$type(), int8()) + expect_equal(tab$column(0)$type, int32()) + expect_equal(tab$column(1)$type, float64()) + expect_equal(tab$column(2)$type, int8()) tf <- local_tempfile() write_arrow(tbl, tf) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 42a8548cec3..2f2d3ee84e7 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -36,11 +36,11 @@ test_that("reading schema from Buffer", { message <- reader$ReadNextMessage() expect_is(message, "arrow::ipc::Message") - expect_equal(message$type(), MessageType$SCHEMA) + expect_equal(message$type, MessageType$SCHEMA) stream <- BufferReader(buffer) expect_is(stream, "arrow::io::BufferReader") message <- read_message(stream) expect_is(message, "arrow::ipc::Message") - expect_equal(message$type(), MessageType$SCHEMA) + expect_equal(message$type, MessageType$SCHEMA) }) From a7d73066b1b35e3ee30c7e2cce02a68b273c69ad Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 16:48:55 +0100 Subject: [PATCH 12/17] DataType$id and FixedWidthDataType$bit_width become active (as in pyarrow) --- r/R/R6.R | 14 +++-- r/tests/testthat/test-DataType.R | 96 ++++++++++++++++---------------- 2 files changed, 57 insertions(+), 53 deletions(-) diff --git a/r/R/R6.R b/r/R/R6.R index a01e886a620..207371e2277 100644 --- a/r/R/R6.R +++ b/r/R/R6.R @@ -85,11 +85,9 @@ unique_ptr <- function(class, xp) { children = function() { map(DataType__children_pointer(self), shared_ptr, class= `arrow::Field`) }, - id = function(){ - DataType__id(self) - }, + ..dispatch = function(){ - switch(names(Type)[self$id()+1], + switch(names(Type)[self$id + 1], "NA" = null(), BOOL = boolean(), UINT8 = uint8(), @@ -119,6 +117,12 @@ unique_ptr <- function(class, xp) { MAP = stop("Type MAP not implemented yet") ) } + ), + + active = list( + id = function(){ + DataType__id(self) + } ) ) @@ -142,7 +146,7 @@ unique_ptr <- function(class, xp) { #' @name arrow__FixedWidthType `arrow::FixedWidthType` <- R6Class("arrow::FixedWidthType", inherit = `arrow::DataType`, - public = list( + active = list( bit_width = function() FixedWidthType__bit_width(self) ) ) diff --git a/r/tests/testthat/test-DataType.R b/r/tests/testthat/test-DataType.R index b479e5a3f67..4feb4be9945 100644 --- a/r/tests/testthat/test-DataType.R +++ b/r/tests/testthat/test-DataType.R @@ -19,7 +19,7 @@ context("arrow::DataType") test_that("null type works as expected",{ x <- null() - expect_equal(x$id(), 0L) + expect_equal(x$id, 0L) expect_equal(x$name(), "null") expect_equal(x$ToString(), "null") expect_true(x == x) @@ -30,133 +30,133 @@ test_that("null type works as expected",{ test_that("boolean type work as expected",{ x <- boolean() - expect_equal(x$id(), 1L) + expect_equal(x$id, 1L) expect_equal(x$name(), "bool") expect_equal(x$ToString(), "bool") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 1L) + expect_equal(x$bit_width, 1L) }) test_that("int types works as expected",{ x <- uint8() - expect_equal(x$id(), 2L) + expect_equal(x$id, 2L) expect_equal(x$name(), "uint8") expect_equal(x$ToString(), "uint8") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 8L) + expect_equal(x$bit_width, 8L) x <- int8() - expect_equal(x$id(), 3L) + expect_equal(x$id, 3L) expect_equal(x$name(), "int8") expect_equal(x$ToString(), "int8") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 8L) + expect_equal(x$bit_width, 8L) x <- uint16() - expect_equal(x$id(), 4L) + expect_equal(x$id, 4L) expect_equal(x$name(), "uint16") expect_equal(x$ToString(), "uint16") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 16L) + expect_equal(x$bit_width, 16L) x <- int16() - expect_equal(x$id(), 5L) + expect_equal(x$id, 5L) expect_equal(x$name(), "int16") expect_equal(x$ToString(), "int16") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 16L) + expect_equal(x$bit_width, 16L) x <- uint32() - expect_equal(x$id(), 6L) + expect_equal(x$id, 6L) expect_equal(x$name(), "uint32") expect_equal(x$ToString(), "uint32") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 32L) + expect_equal(x$bit_width, 32L) x <- int32() - expect_equal(x$id(), 7L) + expect_equal(x$id, 7L) expect_equal(x$name(), "int32") expect_equal(x$ToString(), "int32") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 32L) + expect_equal(x$bit_width, 32L) x <- uint64() - expect_equal(x$id(), 8L) + expect_equal(x$id, 8L) expect_equal(x$name(), "uint64") expect_equal(x$ToString(), "uint64") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) x <- int64() - expect_equal(x$id(), 9L) + expect_equal(x$id, 9L) expect_equal(x$name(), "int64") expect_equal(x$ToString(), "int64") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) }) test_that("float types work as expected",{ x <- float16() - expect_equal(x$id(), 10L) + expect_equal(x$id, 10L) expect_equal(x$name(), "halffloat") expect_equal(x$ToString(), "halffloat") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 16L) + expect_equal(x$bit_width, 16L) x <- float32() - expect_equal(x$id(), 11L) + expect_equal(x$id, 11L) expect_equal(x$name(), "float") expect_equal(x$ToString(), "float") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 32L) + expect_equal(x$bit_width, 32L) x <- float64() - expect_equal(x$id(), 12L) + expect_equal(x$id, 12L) expect_equal(x$name(), "double") expect_equal(x$ToString(), "double") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) }) test_that("utf8 type works as expected",{ x <- utf8() - expect_equal(x$id(), 13L) + expect_equal(x$id, 13L) expect_equal(x$name(), "utf8") expect_equal(x$ToString(), "string") expect_true(x == x) @@ -167,7 +167,7 @@ test_that("utf8 type works as expected",{ test_that("date types work as expected", { x <- date32() - expect_equal(x$id(), 16L) + expect_equal(x$id, 16L) expect_equal(x$name(), "date32") expect_equal(x$ToString(), "date32[day]") expect_true(x == x) @@ -177,7 +177,7 @@ test_that("date types work as expected", { expect_equal(x$unit(), unclass(DateUnit$DAY)) x <- date64() - expect_equal(x$id(), 17L) + expect_equal(x$id, 17L) expect_equal(x$name(), "date64") expect_equal(x$ToString(), "date64[ms]") expect_true(x == x) @@ -189,105 +189,105 @@ test_that("date types work as expected", { test_that("timestamp type works as expected", { x <- timestamp(TimeUnit$SECOND) - expect_equal(x$id(), 18L) + expect_equal(x$id, 18L) expect_equal(x$name(), "timestamp") expect_equal(x$ToString(), "timestamp[s]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$SECOND)) x <- timestamp(TimeUnit$MILLI) - expect_equal(x$id(), 18L) + expect_equal(x$id, 18L) expect_equal(x$name(), "timestamp") expect_equal(x$ToString(), "timestamp[ms]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$MILLI)) x <- timestamp(TimeUnit$MICRO) - expect_equal(x$id(), 18L) + expect_equal(x$id, 18L) expect_equal(x$name(), "timestamp") expect_equal(x$ToString(), "timestamp[us]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$MICRO)) x <- timestamp(TimeUnit$NANO) - expect_equal(x$id(), 18L) + expect_equal(x$id, 18L) expect_equal(x$name(), "timestamp") expect_equal(x$ToString(), "timestamp[ns]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$NANO)) }) test_that("time32 types work as expected", { x <- time32(TimeUnit$SECOND) - expect_equal(x$id(), 19L) + expect_equal(x$id, 19L) expect_equal(x$name(), "time32") expect_equal(x$ToString(), "time32[s]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 32L) + expect_equal(x$bit_width, 32L) expect_equal(x$unit(), unclass(TimeUnit$SECOND)) x <- time32(TimeUnit$MILLI) - expect_equal(x$id(), 19L) + expect_equal(x$id, 19L) expect_equal(x$name(), "time32") expect_equal(x$ToString(), "time32[ms]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 32L) + expect_equal(x$bit_width, 32L) expect_equal(x$unit(), unclass(TimeUnit$MILLI)) }) test_that("time64 types work as expected", { x <- time64(TimeUnit$MICRO) - expect_equal(x$id(), 20L) + expect_equal(x$id, 20L) expect_equal(x$name(), "time64") expect_equal(x$ToString(), "time64[us]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$unit(), unclass(TimeUnit$MICRO)) x <- time64(TimeUnit$NANO) - expect_equal(x$id(), 20L) + expect_equal(x$id, 20L) expect_equal(x$name(), "time64") expect_equal(x$ToString(), "time64[ns]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$unit(), unclass(TimeUnit$NANO)) }) test_that("list type works as expected", { x <- list_of(int32()) - expect_equal(x$id(), 23L) + expect_equal(x$id, 23L) expect_equal(x$name(), "list") expect_equal(x$ToString(), "list") expect_true(x == x) @@ -301,7 +301,7 @@ test_that("list type works as expected", { test_that("struct type works as expected", { x <- struct(x = int32(), y = boolean()) - expect_equal(x$id(), 24L) + expect_equal(x$id, 24L) expect_equal(x$name(), "struct") expect_equal(x$ToString(), "struct") expect_true(x == x) @@ -318,8 +318,8 @@ test_that("DictionaryType works as expected (ARROW-3355)", { expect_equal(d, d) expect_true(d == d) expect_false(d == int32()) - expect_equal(d$id(), Type$DICTIONARY) - expect_equal(d$bit_width(), 32L) + expect_equal(d$id, Type$DICTIONARY) + expect_equal(d$bit_width, 32L) expect_equal(d$ToString(), "dictionary") expect_equal(d$index_type(), int32()) expect_equal(d$dictionary(), array(c("foo", "bar", "baz"))) From ac0528d3c163bd35f19dbc2ad1acaf128ca590cb Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 16:58:16 +0100 Subject: [PATCH 13/17] methods -> active --- r/R/Field.R | 10 +++++++--- r/R/buffer.R | 5 ++++- r/R/io.R | 2 +- r/tests/testthat/test-arraydata.R | 2 +- r/tests/testthat/test-buffer.R | 6 +++--- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/r/R/Field.R b/r/R/Field.R index 224866409cb..6b21c8bab82 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -35,15 +35,19 @@ ToString = function() { Field__ToString(self) }, + Equals = function(other) { + inherits(other, "arrow::Field") && Field__Equals(self, other) + } + ), + + active = list( name = function() { Field__name(self) }, + nullable = function() { Field__nullable(self) }, - Equals = function(other) { - inherits(other, "arrow::Field") && Field__Equals(self, other) - }, type = function() { `arrow::DataType`$dispatch(Field__type(self)) } diff --git a/r/R/buffer.R b/r/R/buffer.R index c0cadbe8805..2fecd0e4fc6 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -35,8 +35,11 @@ #' @name arrow__Buffer `arrow::Buffer` <- R6Class("arrow::Buffer", inherit = `arrow::Object`, public = list( + ZeroPadding = function() Buffer__ZeroPadding(self) + ), + + active = list( is_mutable = function() Buffer__is_mutable(self), - ZeroPadding = function() Buffer__ZeroPadding(self), size = function() Buffer__size(self), capacity = function() Buffer__capacity(self) ) diff --git a/r/R/io.R b/r/R/io.R index 2e0d40be96a..dd4d173ad48 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -258,7 +258,7 @@ FixedSizeBufferWriter.default <- function(buffer){ #' @export `FixedSizeBufferWriter.arrow::Buffer` <- function(buffer){ - assert_that(buffer$is_mutable()) + assert_that(buffer$is_mutable) shared_ptr(`arrow::io::FixedSizeBufferWriter`, io___FixedSizeBufferWriter__initialize(buffer)) } diff --git a/r/tests/testthat/test-arraydata.R b/r/tests/testthat/test-arraydata.R index 5d8f8f1dcaa..02ca9b85625 100644 --- a/r/tests/testthat/test-arraydata.R +++ b/r/tests/testthat/test-arraydata.R @@ -24,5 +24,5 @@ test_that("string vectors with only empty strings and nulls don't allocate a dat buffers <- a$data()$buffers expect_null(buffers[[1]]) expect_null(buffers[[3]]) - expect_equal(buffers[[2]]$size(), 8L) + expect_equal(buffers[[2]]$size, 8L) }) diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index aa712b02680..a071c35304c 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -21,21 +21,21 @@ test_that("arrow::Buffer can be created from raw vector", { vec <- raw(123) buf <- buffer(vec) expect_is(buf, "arrow::Buffer") - expect_equal(buf$size(), 123) + expect_equal(buf$size, 123) }) test_that("arrow::Buffer can be created from integer vector", { vec <- integer(17) buf <- buffer(vec) expect_is(buf, "arrow::Buffer") - expect_equal(buf$size(), 17 * 4) + expect_equal(buf$size, 17 * 4) }) test_that("arrow::Buffer can be created from numeric vector", { vec <- numeric(17) buf <- buffer(vec) expect_is(buf, "arrow::Buffer") - expect_equal(buf$size(), 17 * 8) + expect_equal(buf$size, 17 * 8) }) test_that("arrow::Buffer can be created from complex vector", { From eed49297e13ffe4b56fddb955033d3a237b391c1 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 17:05:15 +0100 Subject: [PATCH 14/17] doc fixes --- r/R/io.R | 45 +++++++++++++++++++++++ r/man/arrow__io__BufferOutputStream.Rd | 5 +-- r/man/arrow__io__FileOutputStream.Rd | 6 +-- r/man/arrow__io__FixedSizeBufferWriter.Rd | 6 +-- r/man/arrow__io__InputStream.Rd | 6 +-- r/man/arrow__io__MemoryMappedFile.Rd | 6 +-- r/man/arrow__io__MockOutputStream.Rd | 6 +-- r/man/arrow__io__OutputStream.Rd | 6 +-- r/man/arrow__io__RandomAccessFile.Rd | 6 +-- r/man/arrow__io__Readable.Rd | 6 +-- r/man/arrow__io__ReadableFile.Rd | 6 +-- 11 files changed, 55 insertions(+), 49 deletions(-) diff --git a/r/R/io.R b/r/R/io.R index dd4d173ad48..8164c16407b 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -25,6 +25,10 @@ #' @title OutputStream #' +#' @usage NULL +#' @format NULL +#' @docType class +#' #' @section Methods: #' #' - `arrow::Buffer` `Read`(`int` nbytes): Read `nbytes` bytes @@ -40,6 +44,10 @@ #' @title class arrow::io::FileOutputStream #' +#' @usage NULL +#' @format NULL +#' @docType class +#' #' @section Methods: #' #' TODO @@ -50,6 +58,11 @@ #' @title class arrow::io::MockOutputStream #' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' #' @section Methods: #' #' TODO @@ -64,6 +77,8 @@ #' @title class arrow::io::BufferOutputStream #' +#' @usage NULL +#' @docType class #' @section Methods: #' #' TODO @@ -82,6 +97,11 @@ #' @title class arrow::io::FixedSizeBufferWriter #' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' #' @section Methods: #' #' TODO @@ -95,6 +115,11 @@ #' @title class arrow::io::Readable #' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' #' @section Methods: #' #' TODO @@ -109,6 +134,11 @@ #' @title class arrow::io::InputStream #' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' #' @section Methods: #' #' TODO @@ -123,6 +153,11 @@ #' @title class arrow::io::RandomAccessFile #' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' #' @section Methods: #' #' TODO @@ -140,6 +175,11 @@ #' @title class arrow::io::MemoryMappedFile #' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' #' @section Methods: #' #' TODO @@ -157,6 +197,11 @@ #' @title class arrow::io::ReadableFile #' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' #' @section Methods: #' #' TODO diff --git a/r/man/arrow__io__BufferOutputStream.Rd b/r/man/arrow__io__BufferOutputStream.Rd index ade78ca276d..e90d1cc0ed8 100644 --- a/r/man/arrow__io__BufferOutputStream.Rd +++ b/r/man/arrow__io__BufferOutputStream.Rd @@ -1,14 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__BufferOutputStream} \alias{arrow__io__BufferOutputStream} \alias{arrow::io::BufferOutputStream} \title{class arrow::io::BufferOutputStream} \format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::BufferOutputStream -} \description{ class arrow::io::BufferOutputStream } diff --git a/r/man/arrow__io__FileOutputStream.Rd b/r/man/arrow__io__FileOutputStream.Rd index 429ab8dadbd..92eaac13c9f 100644 --- a/r/man/arrow__io__FileOutputStream.Rd +++ b/r/man/arrow__io__FileOutputStream.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__FileOutputStream} \alias{arrow__io__FileOutputStream} \alias{arrow::io::FileOutputStream} \title{class arrow::io::FileOutputStream} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::FileOutputStream -} \description{ class arrow::io::FileOutputStream } diff --git a/r/man/arrow__io__FixedSizeBufferWriter.Rd b/r/man/arrow__io__FixedSizeBufferWriter.Rd index ca0ec4a5b01..39d8bb69c25 100644 --- a/r/man/arrow__io__FixedSizeBufferWriter.Rd +++ b/r/man/arrow__io__FixedSizeBufferWriter.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__FixedSizeBufferWriter} \alias{arrow__io__FixedSizeBufferWriter} \alias{arrow::io::FixedSizeBufferWriter} \title{class arrow::io::FixedSizeBufferWriter} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::FixedSizeBufferWriter -} \description{ class arrow::io::FixedSizeBufferWriter } diff --git a/r/man/arrow__io__InputStream.Rd b/r/man/arrow__io__InputStream.Rd index a18fe7af344..37f83308b64 100644 --- a/r/man/arrow__io__InputStream.Rd +++ b/r/man/arrow__io__InputStream.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__InputStream} \alias{arrow__io__InputStream} \alias{arrow::io::InputStream} \title{class arrow::io::InputStream} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::InputStream -} \description{ class arrow::io::InputStream } diff --git a/r/man/arrow__io__MemoryMappedFile.Rd b/r/man/arrow__io__MemoryMappedFile.Rd index cbec49e52a6..409bb17302a 100644 --- a/r/man/arrow__io__MemoryMappedFile.Rd +++ b/r/man/arrow__io__MemoryMappedFile.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__MemoryMappedFile} \alias{arrow__io__MemoryMappedFile} \alias{arrow::io::MemoryMappedFile} \title{class arrow::io::MemoryMappedFile} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::MemoryMappedFile -} \description{ class arrow::io::MemoryMappedFile } diff --git a/r/man/arrow__io__MockOutputStream.Rd b/r/man/arrow__io__MockOutputStream.Rd index aa1fe0df55b..f0b2c06d7a5 100644 --- a/r/man/arrow__io__MockOutputStream.Rd +++ b/r/man/arrow__io__MockOutputStream.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__MockOutputStream} \alias{arrow__io__MockOutputStream} \alias{arrow::io::MockOutputStream} \title{class arrow::io::MockOutputStream} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::MockOutputStream -} \description{ class arrow::io::MockOutputStream } diff --git a/r/man/arrow__io__OutputStream.Rd b/r/man/arrow__io__OutputStream.Rd index 94b6a73e9a7..c41b815c021 100644 --- a/r/man/arrow__io__OutputStream.Rd +++ b/r/man/arrow__io__OutputStream.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__OutputStream} \alias{arrow__io__OutputStream} \alias{arrow::io::OutputStream} \title{OutputStream} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::OutputStream -} \description{ OutputStream } diff --git a/r/man/arrow__io__RandomAccessFile.Rd b/r/man/arrow__io__RandomAccessFile.Rd index 55a308e0994..f8cb86abda6 100644 --- a/r/man/arrow__io__RandomAccessFile.Rd +++ b/r/man/arrow__io__RandomAccessFile.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__RandomAccessFile} \alias{arrow__io__RandomAccessFile} \alias{arrow::io::RandomAccessFile} \title{class arrow::io::RandomAccessFile} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::RandomAccessFile -} \description{ class arrow::io::RandomAccessFile } diff --git a/r/man/arrow__io__Readable.Rd b/r/man/arrow__io__Readable.Rd index a0b6af95eec..b0b30a42302 100644 --- a/r/man/arrow__io__Readable.Rd +++ b/r/man/arrow__io__Readable.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__Readable} \alias{arrow__io__Readable} \alias{arrow::io::Readable} \title{class arrow::io::Readable} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::Readable -} \description{ class arrow::io::Readable } diff --git a/r/man/arrow__io__ReadableFile.Rd b/r/man/arrow__io__ReadableFile.Rd index a8a6783eef6..440149fbbb4 100644 --- a/r/man/arrow__io__ReadableFile.Rd +++ b/r/man/arrow__io__ReadableFile.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__ReadableFile} \alias{arrow__io__ReadableFile} \alias{arrow::io::ReadableFile} \title{class arrow::io::ReadableFile} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::ReadableFile -} \description{ class arrow::io::ReadableFile } From d11c175c62bd5abdc3aaf340658cf3ffca546f40 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 23 Nov 2018 18:39:02 +0100 Subject: [PATCH 15/17] doc fixes --- r/R/RecordBatchWriter.R | 2 +- r/R/Table.R | 14 +++++++++++++- r/R/io.R | 6 +++++- r/man/BufferReader.Rd | 2 +- r/man/RecordBatchStreamWriter.Rd | 2 +- r/man/arrow__Table.Rd | 17 +++++++++++++++++ r/man/arrow__io__BufferReader.Rd | 6 +----- 7 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 r/man/arrow__Table.Rd diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 78d1e81c54b..77305114d33 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -94,7 +94,7 @@ #' #' @param sink Where to write. Can either be: #' -#' - A string, meant as a file path, passed to [fs::path_ab] +#' - A string, meant as a file path, passed to [fs::path_abs()] #' - a [file path][fs::path_abs()] #' - [arrow::io::OutputStream][arrow__io__OutputStream] #' diff --git a/r/R/Table.R b/r/R/Table.R index 9ec5f614dbb..5f1502e5616 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -16,7 +16,19 @@ # under the License. #' @include R6.R - +#' +#' @title class arrow::Table +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__Table +#' @name arrow__Table `arrow::Table` <- R6Class("arrow::Table", inherit = `arrow::Object`, public = list( column = function(i) shared_ptr(`arrow::Column`, Table__column(self, i)), diff --git a/r/R/io.R b/r/R/io.R index 8164c16407b..b772be30acf 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -212,6 +212,10 @@ #' @title class arrow::io::BufferReader #' +#' @usage NULL +#' @format NULL +#' @docType class +#' #' @section Methods: #' #' TODO @@ -309,7 +313,7 @@ FixedSizeBufferWriter.default <- function(buffer){ #' Create a [arrow::io::BufferReader][arrow__io__BufferReader] #' -#' @param buffer R object to treat as a buffer or a buffer created by [buffer()] +#' @param x R object to treat as a buffer or a buffer created by [buffer()] #' #' @export BufferReader <- function(x) { diff --git a/r/man/BufferReader.Rd b/r/man/BufferReader.Rd index d8f9d536270..ea5dd790cdd 100644 --- a/r/man/BufferReader.Rd +++ b/r/man/BufferReader.Rd @@ -7,7 +7,7 @@ BufferReader(x) } \arguments{ -\item{buffer}{R object to treat as a buffer or a buffer created by \code{\link[=buffer]{buffer()}}} +\item{x}{R object to treat as a buffer or a buffer created by \code{\link[=buffer]{buffer()}}} } \description{ Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd index 693e8046a0b..b9183a80719 100644 --- a/r/man/RecordBatchStreamWriter.Rd +++ b/r/man/RecordBatchStreamWriter.Rd @@ -9,7 +9,7 @@ RecordBatchStreamWriter(sink, schema) \arguments{ \item{sink}{Where to write. Can either be: \itemize{ -\item A string, meant as a file path, passed to \link[fs:path_ab]{fs::path_ab} +\item A string, meant as a file path, passed to \code{\link[fs:path_abs]{fs::path_abs()}} \item a \link[fs:path_abs]{file path} \item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} }} diff --git a/r/man/arrow__Table.Rd b/r/man/arrow__Table.Rd new file mode 100644 index 00000000000..139db980acf --- /dev/null +++ b/r/man/arrow__Table.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Table.R +\docType{class} +\name{arrow__Table} +\alias{arrow__Table} +\alias{arrow::Table} +\title{class arrow::Table} +\description{ +class arrow::Table +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__BufferReader.Rd b/r/man/arrow__io__BufferReader.Rd index 42ee6bba94c..609fec5b6d4 100644 --- a/r/man/arrow__io__BufferReader.Rd +++ b/r/man/arrow__io__BufferReader.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\docType{data} +\docType{class} \name{arrow__io__BufferReader} \alias{arrow__io__BufferReader} \alias{arrow::io::BufferReader} \title{class arrow::io::BufferReader} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\usage{ -arrow::io::BufferReader -} \description{ class arrow::io::BufferReader } From e9889114db1e0b0b24ef12eb7d25b9f23f6518e6 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 27 Nov 2018 16:14:43 +0100 Subject: [PATCH 16/17] more converting public to active --- r/R/Field.R | 4 +- r/R/R6.R | 6 +-- r/R/RecordBatch.R | 4 +- r/R/Table.R | 2 +- r/R/array.R | 6 +-- r/R/dictionary.R | 5 ++- r/man/field.Rd | 1 - r/tests/testthat/test-Array.R | 42 ++++++++++----------- r/tests/testthat/test-DataType.R | 56 ++++++++++++++-------------- r/tests/testthat/test-RecordBatch.R | 6 +-- r/tests/testthat/test-Table.R | 6 +-- r/tests/testthat/test-chunkedarray.R | 20 +++++----- r/tests/testthat/test-field.R | 4 +- 13 files changed, 80 insertions(+), 82 deletions(-) diff --git a/r/R/Field.R b/r/R/Field.R index 6b21c8bab82..4f5636fbfff 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -29,8 +29,7 @@ #' #' @rdname arrow__Field #' @name arrow__Field -`arrow::Field` <- R6Class("arrow::Field", - inherit = `arrow::Object`, +`arrow::Field` <- R6Class("arrow::Field", inherit = `arrow::Object`, public = list( ToString = function() { Field__ToString(self) @@ -44,7 +43,6 @@ name = function() { Field__name(self) }, - nullable = function() { Field__nullable(self) }, diff --git a/r/R/R6.R b/r/R/R6.R index 207371e2277..69d58e0c136 100644 --- a/r/R/R6.R +++ b/r/R/R6.R @@ -72,9 +72,6 @@ unique_ptr <- function(class, xp) { ToString = function() { DataType__ToString(self) }, - name = function() { - DataType__name(self) - }, Equals = function(other) { assert_that(inherits(other, "arrow::DataType")) DataType__Equals(self, other) @@ -122,6 +119,9 @@ unique_ptr <- function(class, xp) { active = list( id = function(){ DataType__id(self) + }, + name = function() { + DataType__name(self) } ) ) diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index 6089427faec..fed10abee76 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -51,12 +51,12 @@ } }, - serialize = function(output_stream, ...) write_record_batch(self, output_stream, ...), + serialize = function() ipc___SerializeRecordBatch__Raw(self), cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_schema, "arrow::Schema")) assert_that(inherits(options, "arrow::compute::CastOptions")) - assert_that(identical(self$schema()$names, target_schema$names), msg = "incompatible schemas") + assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") shared_ptr(`arrow::RecordBatch`, RecordBatch__cast(self, target_schema, options)) } ), diff --git a/r/R/Table.R b/r/R/Table.R index 5f1502e5616..8972634d59f 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -38,7 +38,7 @@ cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_schema, "arrow::Schema")) assert_that(inherits(options, "arrow::compute::CastOptions")) - assert_that(identical(self$schema()$names, target_schema$names), msg = "incompatible schemas") + assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") shared_ptr(`arrow::Table`, Table__cast(self, target_schema, options)) } ), diff --git a/r/R/array.R b/r/R/array.R index 5d70e59093b..63fdb4e0f61 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -75,7 +75,7 @@ length = function() Array__length(self), type_id = function() Array__type_id(self), Equals = function(other) Array__Equals(self, other), - ApproxEquals = function(othet) Array__ApproxEquals(self, other), + ApproxEquals = function(other) Array__ApproxEquals(self, other), data = function() shared_ptr(`arrow::ArrayData`, Array__data(self)), as_vector = function() Array__as_vector(self), ToString = function() Array__ToString(self), @@ -98,8 +98,8 @@ ), active = list( null_count = function() Array__null_count(self), - type = function() `arrow::DataType`$dispatch(Array__type(self)), - offset = function() Array__offset(self) + offset = function() Array__offset(self), + type = function() `arrow::DataType`$dispatch(Array__type(self)) ) ) diff --git a/r/R/dictionary.R b/r/R/dictionary.R index 28f86a87b92..3c3758df303 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -31,10 +31,11 @@ #' @name arrow__DictionaryType `arrow::DictionaryType` <- R6Class("arrow::DictionaryType", inherit = `arrow::FixedWidthType`, - public = list( + + active = list( index_type = function() `arrow::DataType`$dispatch(DictionaryType__index_type(self)), - name = function() DictionaryType__name(self), dictionary = function() shared_ptr(`arrow::Array`, DictionaryType__dictionary(self)), + name = function() DictionaryType__name(self), ordered = function() DictionaryType__ordered(self) ) ) diff --git a/r/man/field.Rd b/r/man/field.Rd index 5cbd8033875..1350a1259e5 100644 --- a/r/man/field.Rd +++ b/r/man/field.Rd @@ -18,5 +18,4 @@ Factory for a \code{arrow::Field} } \examples{ field("x", int32()) - } diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index e2612e42f0d..e456fe88654 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -138,7 +138,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { f <- factor(c("itsy", "bitsy", "spider", "spider")) arr_fac <- array(f) expect_equal(arr_fac$length(), 4L) - expect_equal(arr_fac$type$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -147,7 +147,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 3L) - expect_equal(arr_fac$type$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_equal(sl$as_vector(), f[2:4]) # with NA @@ -155,7 +155,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { # TODO: rm the suppressWarnings when https://github.com/r-lib/vctrs/issues/109 arr_fac <- suppressWarnings(array(f)) expect_equal(arr_fac$length(), 5L) - expect_equal(arr_fac$type$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -165,7 +165,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 4L) - expect_equal(arr_fac$type$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_equal(sl$as_vector(), f[2:5]) }) @@ -174,7 +174,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { f <- ordered(c("itsy", "bitsy", "spider", "spider")) arr_fac <- array(f) expect_equal(arr_fac$length(), 4L) - expect_equal(arr_fac$type$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -183,7 +183,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 3L) - expect_equal(arr_fac$type$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_equal(sl$as_vector(), f[2:4]) # with NA @@ -191,7 +191,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { # TODO: rm the suppressWarnings when https://github.com/r-lib/vctrs/issues/109 arr_fac <- suppressWarnings(array(f)) expect_equal(arr_fac$length(), 5L) - expect_equal(arr_fac$type$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -201,7 +201,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 4L) - expect_equal(arr_fac$type$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_equal(sl$as_vector(), f[2:5]) }) @@ -230,14 +230,14 @@ test_that("array supports Date (ARROW-3340)", { test_that("array supports POSIXct (ARROW-3340)", { times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 a <- array(times) - expect_equal(a$type$name(), "timestamp") + expect_equal(a$type$name, "timestamp") expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) expect_equal(as.numeric(a$as_vector()), as.numeric(times)) times[5] <- NA a <- array(times) - expect_equal(a$type$name(), "timestamp") + expect_equal(a$type$name, "timestamp") expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) expect_equal(as.numeric(a$as_vector()), as.numeric(times)) @@ -300,10 +300,10 @@ test_that("integer types casts (ARROW-3741)", { a_int32 <- a$cast(int32()) a_int64 <- a$cast(int64()) - expect_equal(a_int8$type(), int8()) - expect_equal(a_int16$type(), int16()) - expect_equal(a_int32$type(), int32()) - expect_equal(a_int64$type(), int64()) + expect_equal(a_int8$type, int8()) + expect_equal(a_int16$type, int16()) + expect_equal(a_int32$type, int32()) + expect_equal(a_int64$type, int64()) expect_true(a_int8$IsNull(10L)) expect_true(a_int16$IsNull(10L)) expect_true(a_int32$IsNull(10L)) @@ -314,10 +314,10 @@ test_that("integer types casts (ARROW-3741)", { a_uint32 <- a$cast(uint32()) a_uint64 <- a$cast(uint64()) - expect_equal(a_uint8$type(), uint8()) - expect_equal(a_uint16$type(), uint16()) - expect_equal(a_uint32$type(), uint32()) - expect_equal(a_uint64$type(), uint64()) + expect_equal(a_uint8$type, uint8()) + expect_equal(a_uint16$type, uint16()) + expect_equal(a_uint32$type, uint32()) + expect_equal(a_uint64$type, uint64()) expect_true(a_uint8$IsNull(10L)) expect_true(a_uint16$IsNull(10L)) expect_true(a_uint32$IsNull(10L)) @@ -345,8 +345,8 @@ test_that("float types casts (ARROW-3741)", { a_f32 <- a$cast(float32()) a_f64 <- a$cast(float64()) - expect_equal(a_f32$type(), float32()) - expect_equal(a_f64$type(), float64()) + expect_equal(a_f32$type, float32()) + expect_equal(a_f64$type, float64()) expect_true(a_f32$IsNull(3L)) expect_true(a_f64$IsNull(3L)) @@ -359,5 +359,5 @@ test_that("cast to half float works", { skip("until https://issues.apache.org/jira/browse/ARROW-3802") a <- array(1:4) a_f16 <- a$cast(float16()) - expect_equal(a_16$type(), float16()) + expect_equal(a_16$type, float16()) }) diff --git a/r/tests/testthat/test-DataType.R b/r/tests/testthat/test-DataType.R index 4feb4be9945..fc9fc896eae 100644 --- a/r/tests/testthat/test-DataType.R +++ b/r/tests/testthat/test-DataType.R @@ -20,7 +20,7 @@ context("arrow::DataType") test_that("null type works as expected",{ x <- null() expect_equal(x$id, 0L) - expect_equal(x$name(), "null") + expect_equal(x$name, "null") expect_equal(x$ToString(), "null") expect_true(x == x) expect_false(x == int8()) @@ -31,7 +31,7 @@ test_that("null type works as expected",{ test_that("boolean type work as expected",{ x <- boolean() expect_equal(x$id, 1L) - expect_equal(x$name(), "bool") + expect_equal(x$name, "bool") expect_equal(x$ToString(), "bool") expect_true(x == x) expect_false(x == null()) @@ -43,7 +43,7 @@ test_that("boolean type work as expected",{ test_that("int types works as expected",{ x <- uint8() expect_equal(x$id, 2L) - expect_equal(x$name(), "uint8") + expect_equal(x$name, "uint8") expect_equal(x$ToString(), "uint8") expect_true(x == x) expect_false(x == null()) @@ -53,7 +53,7 @@ test_that("int types works as expected",{ x <- int8() expect_equal(x$id, 3L) - expect_equal(x$name(), "int8") + expect_equal(x$name, "int8") expect_equal(x$ToString(), "int8") expect_true(x == x) expect_false(x == null()) @@ -63,7 +63,7 @@ test_that("int types works as expected",{ x <- uint16() expect_equal(x$id, 4L) - expect_equal(x$name(), "uint16") + expect_equal(x$name, "uint16") expect_equal(x$ToString(), "uint16") expect_true(x == x) expect_false(x == null()) @@ -73,7 +73,7 @@ test_that("int types works as expected",{ x <- int16() expect_equal(x$id, 5L) - expect_equal(x$name(), "int16") + expect_equal(x$name, "int16") expect_equal(x$ToString(), "int16") expect_true(x == x) expect_false(x == null()) @@ -83,7 +83,7 @@ test_that("int types works as expected",{ x <- uint32() expect_equal(x$id, 6L) - expect_equal(x$name(), "uint32") + expect_equal(x$name, "uint32") expect_equal(x$ToString(), "uint32") expect_true(x == x) expect_false(x == null()) @@ -93,7 +93,7 @@ test_that("int types works as expected",{ x <- int32() expect_equal(x$id, 7L) - expect_equal(x$name(), "int32") + expect_equal(x$name, "int32") expect_equal(x$ToString(), "int32") expect_true(x == x) expect_false(x == null()) @@ -103,7 +103,7 @@ test_that("int types works as expected",{ x <- uint64() expect_equal(x$id, 8L) - expect_equal(x$name(), "uint64") + expect_equal(x$name, "uint64") expect_equal(x$ToString(), "uint64") expect_true(x == x) expect_false(x == null()) @@ -113,7 +113,7 @@ test_that("int types works as expected",{ x <- int64() expect_equal(x$id, 9L) - expect_equal(x$name(), "int64") + expect_equal(x$name, "int64") expect_equal(x$ToString(), "int64") expect_true(x == x) expect_false(x == null()) @@ -125,7 +125,7 @@ test_that("int types works as expected",{ test_that("float types work as expected",{ x <- float16() expect_equal(x$id, 10L) - expect_equal(x$name(), "halffloat") + expect_equal(x$name, "halffloat") expect_equal(x$ToString(), "halffloat") expect_true(x == x) expect_false(x == null()) @@ -135,7 +135,7 @@ test_that("float types work as expected",{ x <- float32() expect_equal(x$id, 11L) - expect_equal(x$name(), "float") + expect_equal(x$name, "float") expect_equal(x$ToString(), "float") expect_true(x == x) expect_false(x == null()) @@ -145,7 +145,7 @@ test_that("float types work as expected",{ x <- float64() expect_equal(x$id, 12L) - expect_equal(x$name(), "double") + expect_equal(x$name, "double") expect_equal(x$ToString(), "double") expect_true(x == x) expect_false(x == null()) @@ -157,7 +157,7 @@ test_that("float types work as expected",{ test_that("utf8 type works as expected",{ x <- utf8() expect_equal(x$id, 13L) - expect_equal(x$name(), "utf8") + expect_equal(x$name, "utf8") expect_equal(x$ToString(), "string") expect_true(x == x) expect_false(x == null()) @@ -168,7 +168,7 @@ test_that("utf8 type works as expected",{ test_that("date types work as expected", { x <- date32() expect_equal(x$id, 16L) - expect_equal(x$name(), "date32") + expect_equal(x$name, "date32") expect_equal(x$ToString(), "date32[day]") expect_true(x == x) expect_false(x == null()) @@ -178,7 +178,7 @@ test_that("date types work as expected", { x <- date64() expect_equal(x$id, 17L) - expect_equal(x$name(), "date64") + expect_equal(x$name, "date64") expect_equal(x$ToString(), "date64[ms]") expect_true(x == x) expect_false(x == null()) @@ -190,7 +190,7 @@ test_that("date types work as expected", { test_that("timestamp type works as expected", { x <- timestamp(TimeUnit$SECOND) expect_equal(x$id, 18L) - expect_equal(x$name(), "timestamp") + expect_equal(x$name, "timestamp") expect_equal(x$ToString(), "timestamp[s]") expect_true(x == x) expect_false(x == null()) @@ -202,7 +202,7 @@ test_that("timestamp type works as expected", { x <- timestamp(TimeUnit$MILLI) expect_equal(x$id, 18L) - expect_equal(x$name(), "timestamp") + expect_equal(x$name, "timestamp") expect_equal(x$ToString(), "timestamp[ms]") expect_true(x == x) expect_false(x == null()) @@ -214,7 +214,7 @@ test_that("timestamp type works as expected", { x <- timestamp(TimeUnit$MICRO) expect_equal(x$id, 18L) - expect_equal(x$name(), "timestamp") + expect_equal(x$name, "timestamp") expect_equal(x$ToString(), "timestamp[us]") expect_true(x == x) expect_false(x == null()) @@ -226,7 +226,7 @@ test_that("timestamp type works as expected", { x <- timestamp(TimeUnit$NANO) expect_equal(x$id, 18L) - expect_equal(x$name(), "timestamp") + expect_equal(x$name, "timestamp") expect_equal(x$ToString(), "timestamp[ns]") expect_true(x == x) expect_false(x == null()) @@ -240,7 +240,7 @@ test_that("timestamp type works as expected", { test_that("time32 types work as expected", { x <- time32(TimeUnit$SECOND) expect_equal(x$id, 19L) - expect_equal(x$name(), "time32") + expect_equal(x$name, "time32") expect_equal(x$ToString(), "time32[s]") expect_true(x == x) expect_false(x == null()) @@ -251,7 +251,7 @@ test_that("time32 types work as expected", { x <- time32(TimeUnit$MILLI) expect_equal(x$id, 19L) - expect_equal(x$name(), "time32") + expect_equal(x$name, "time32") expect_equal(x$ToString(), "time32[ms]") expect_true(x == x) expect_false(x == null()) @@ -264,7 +264,7 @@ test_that("time32 types work as expected", { test_that("time64 types work as expected", { x <- time64(TimeUnit$MICRO) expect_equal(x$id, 20L) - expect_equal(x$name(), "time64") + expect_equal(x$name, "time64") expect_equal(x$ToString(), "time64[us]") expect_true(x == x) expect_false(x == null()) @@ -275,7 +275,7 @@ test_that("time64 types work as expected", { x <- time64(TimeUnit$NANO) expect_equal(x$id, 20L) - expect_equal(x$name(), "time64") + expect_equal(x$name, "time64") expect_equal(x$ToString(), "time64[ns]") expect_true(x == x) expect_false(x == null()) @@ -288,7 +288,7 @@ test_that("time64 types work as expected", { test_that("list type works as expected", { x <- list_of(int32()) expect_equal(x$id, 23L) - expect_equal(x$name(), "list") + expect_equal(x$name, "list") expect_equal(x$ToString(), "list") expect_true(x == x) expect_false(x == null()) @@ -302,7 +302,7 @@ test_that("list type works as expected", { test_that("struct type works as expected", { x <- struct(x = int32(), y = boolean()) expect_equal(x$id, 24L) - expect_equal(x$name(), "struct") + expect_equal(x$name, "struct") expect_equal(x$ToString(), "struct") expect_true(x == x) expect_false(x == null()) @@ -321,6 +321,6 @@ test_that("DictionaryType works as expected (ARROW-3355)", { expect_equal(d$id, Type$DICTIONARY) expect_equal(d$bit_width, 32L) expect_equal(d$ToString(), "dictionary") - expect_equal(d$index_type(), int32()) - expect_equal(d$dictionary(), array(c("foo", "bar", "baz"))) + expect_equal(d$index_type, int32()) + expect_equal(d$dictionary, array(c("foo", "bar", "baz"))) }) diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index c8e91e78081..f40bd8387ad 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -117,7 +117,7 @@ test_that("RecordBatch cast (ARROW-3741)", { s2 <- schema(x = int16(), y = int64()) batch2 <- batch$cast(s2) - expect_equal(batch2$schema(), s2) - expect_equal(batch2$column(0L)$type(), int16()) - expect_equal(batch2$column(1L)$type(), int64()) + expect_equal(batch2$schema, s2) + expect_equal(batch2$column(0L)$type, int16()) + expect_equal(batch2$column(1L)$type, int64()) }) diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index c38b124bf65..ec1be9b2348 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -64,7 +64,7 @@ test_that("Table cast (ARROW-3741)", { s2 <- schema(x = int16(), y = int64()) tab2 <- tab$cast(s2) - expect_equal(tab2$schema(), s2) - expect_equal(tab2$column(0L)$type(), int16()) - expect_equal(tab2$column(1L)$type(), int64()) + expect_equal(tab2$schema, s2) + expect_equal(tab2$column(0L)$type, int16()) + expect_equal(tab2$column(1L)$type, int64()) }) diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunkedarray.R index 188b32b272c..11a196d039d 100644 --- a/r/tests/testthat/test-chunkedarray.R +++ b/r/tests/testthat/test-chunkedarray.R @@ -123,7 +123,7 @@ test_that("ChunkedArray supports factors (ARROW-3716)", { f <- factor(c("itsy", "bitsy", "spider", "spider")) arr_fac <- chunked_array(f, f, f) expect_equal(arr_fac$length(), 12L) - expect_equal(arr_fac$type$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), vctrs::vec_c(f, f, f)) }) @@ -138,7 +138,7 @@ test_that("ChunkedArray supports dates (ARROW-3716)", { test_that("ChunkedArray supports POSIXct (ARROW-3716)", { times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 a <- chunked_array(times, times) - expect_equal(a$type$name(), "timestamp") + expect_equal(a$type$name, "timestamp") expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 20L) expect_equal(as.numeric(a$as_vector()), as.numeric(c(times, times))) @@ -177,10 +177,10 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", { expect_is(a_int16, "arrow::ChunkedArray") expect_is(a_int32, "arrow::ChunkedArray") expect_is(a_int64, "arrow::ChunkedArray") - expect_equal(a_int8$type(), int8()) - expect_equal(a_int16$type(), int16()) - expect_equal(a_int32$type(), int32()) - expect_equal(a_int64$type(), int64()) + expect_equal(a_int8$type, int8()) + expect_equal(a_int16$type, int16()) + expect_equal(a_int32$type, int32()) + expect_equal(a_int64$type, int64()) a_uint8 <- a$cast(uint8()) a_uint16 <- a$cast(uint16()) @@ -192,8 +192,8 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", { expect_is(a_uint32, "arrow::ChunkedArray") expect_is(a_uint64, "arrow::ChunkedArray") - expect_equal(a_uint8$type(), uint8()) - expect_equal(a_uint16$type(), uint16()) - expect_equal(a_uint32$type(), uint32()) - expect_equal(a_uint64$type(), uint64()) + expect_equal(a_uint8$type, uint8()) + expect_equal(a_uint16$type, uint16()) + expect_equal(a_uint32$type, uint32()) + expect_equal(a_uint64$type, uint64()) }) diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R index 08bf4db36a5..aaa2875510a 100644 --- a/r/tests/testthat/test-field.R +++ b/r/tests/testthat/test-field.R @@ -19,8 +19,8 @@ context("arrow::Field") test_that("field() factory", { x <- field("x", int32()) - expect_equal(x$type(), int32()) - expect_equal(x$name(), "x") + expect_equal(x$type, int32()) + expect_equal(x$name, "x") expect_true(x == x) expect_false(x == field("x", int64())) }) From 01517e753a3c248b1febe89aa683924d35bf8696 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 28 Nov 2018 11:51:48 +0100 Subject: [PATCH 17/17] handle C_VISIBILITY at configure time. --- r/configure | 2 +- r/man/field.Rd | 1 + r/src/Makevars.in | 2 +- r/tests/testthat/test-buffer.R | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/r/configure b/r/configure index 69f04632a2f..28f6a73ac7e 100755 --- a/r/configure +++ b/r/configure @@ -91,7 +91,7 @@ if [ $? -ne 0 ]; then fi # Write to Makevars -sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars +sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" -e "s|@visibility@|$C_VISIBILITY|" src/Makevars.in > src/Makevars # Success exit 0 diff --git a/r/man/field.Rd b/r/man/field.Rd index 1350a1259e5..5cbd8033875 100644 --- a/r/man/field.Rd +++ b/r/man/field.Rd @@ -18,4 +18,5 @@ Factory for a \code{arrow::Field} } \examples{ field("x", int32()) + } diff --git a/r/src/Makevars.in b/r/src/Makevars.in index 5e285518f24..a0d5fed10ba 100644 --- a/r/src/Makevars.in +++ b/r/src/Makevars.in @@ -16,7 +16,7 @@ # under the License. PKG_CPPFLAGS=@cflags@ -PKG_CXXFLAGS+=$(C_VISIBILITY) +PKG_CXXFLAGS=@visibility@ CXX_STD=CXX11 PKG_LIBS=@libs@ -Wl,-rpath,/usr/local/lib #CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index a071c35304c..26ec8dfde0a 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -42,5 +42,5 @@ test_that("arrow::Buffer can be created from complex vector", { vec <- complex(3) buf <- buffer(vec) expect_is(buf, "arrow::Buffer") - expect_equal(buf$size(), 3 * 16) + expect_equal(buf$size, 3 * 16) })