From 9f52490a0c1c9c4d0b1a7e255d1dc7499942e219 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Wed, 4 Sep 2019 13:22:14 -0700 Subject: [PATCH 01/37] Progress commit renaming Array --- r/NAMESPACE | 6 ++-- r/R/ArrayData.R | 4 +-- r/R/ChunkedArray.R | 4 +-- r/R/RecordBatch.R | 6 ++-- r/R/array.R | 46 ++++++++++++++--------------- r/R/feather.R | 2 +- r/R/type.R | 2 +- r/man/array.Rd | 4 +-- r/man/arrow__Array.Rd | 8 ++--- r/man/arrow__ArrayData.Rd | 6 ++-- r/man/record_batch.Rd | 2 +- r/src/array_from_vector.cpp | 4 +-- r/src/table.cpp | 4 +-- r/tests/testthat/test-RecordBatch.R | 13 ++++---- 14 files changed, 55 insertions(+), 56 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 57445482c96..eb053a26ea4 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -1,12 +1,12 @@ # Generated by roxygen2: do not edit by hand S3method("!=","arrow::Object") -S3method("==","arrow::Array") S3method("==","arrow::DataType") S3method("==","arrow::Field") S3method("==","arrow::RecordBatch") S3method("==","arrow::Schema") S3method("==","arrow::ipc::Message") +S3method("==",Array) S3method(BufferReader,"arrow::Buffer") S3method(BufferReader,default) S3method(CompressedInputStream,"arrow::io::InputStream") @@ -52,7 +52,7 @@ S3method(json_table_reader,"arrow::io::InputStream") S3method(json_table_reader,"arrow::json::TableReader") S3method(json_table_reader,character) S3method(json_table_reader,default) -S3method(length,"arrow::Array") +S3method(length,Array) S3method(names,"arrow::RecordBatch") S3method(parquet_file_reader,"arrow::io::RandomAccessFile") S3method(parquet_file_reader,character) @@ -73,9 +73,9 @@ S3method(read_table,"arrow::ipc::RecordBatchFileReader") S3method(read_table,"arrow::ipc::RecordBatchStreamReader") S3method(read_table,character) S3method(read_table,raw) -S3method(type,"arrow::Array") S3method(type,"arrow::ChunkedArray") S3method(type,"arrow::Column") +S3method(type,Array) S3method(type,default) S3method(write_arrow,"arrow::ipc::RecordBatchWriter") S3method(write_arrow,character) diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R index d9f307bf540..adf7c113c30 100644 --- a/r/R/ArrayData.R +++ b/r/R/ArrayData.R @@ -17,7 +17,7 @@ #' @include type.R -#' @title class arrow::ArrayData +#' @title class ArrayData #' #' @usage NULL #' @format NULL @@ -41,7 +41,7 @@ #' #' @rdname arrow__ArrayData #' @name arrow__ArrayData -`arrow::ArrayData` <- R6Class("arrow::ArrayData", +`ArrayData` <- R6Class("ArrayData", inherit = `arrow::Object`, active = list( type = function() `arrow::DataType`$dispatch(ArrayData__get_type(self)), diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R index e407a494065..4ce2a841613 100644 --- a/r/R/ChunkedArray.R +++ b/r/R/ChunkedArray.R @@ -32,7 +32,7 @@ `arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = `arrow::Object`, public = list( length = function() ChunkedArray__length(self), - chunk = function(i) `arrow::Array`$dispatch(ChunkedArray__chunk(self, i)), + chunk = function(i) `Array`$dispatch(ChunkedArray__chunk(self, i)), as_vector = function() ChunkedArray__as_vector(self), Slice = function(offset, length = NULL){ if (is.null(length)) { @@ -50,7 +50,7 @@ active = list( null_count = function() ChunkedArray__null_count(self), num_chunks = function() ChunkedArray__num_chunks(self), - chunks = function() map(ChunkedArray__chunks(self), ~ `arrow::Array`$dispatch(.x)), + chunks = function() map(ChunkedArray__chunks(self), ~ `Array`$dispatch(.x)), type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self)) ) ) diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index de8b01ef180..40ede348cd4 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -31,7 +31,7 @@ #' @name arrow__RecordBatch `arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = `arrow::Object`, public = list( - column = function(i) shared_ptr(`arrow::Array`, RecordBatch__column(self, i)), + column = function(i) shared_ptr(`Array`, RecordBatch__column(self, i)), column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), Equals = function(other) { @@ -65,7 +65,7 @@ num_columns = function() RecordBatch__num_columns(self), num_rows = function() RecordBatch__num_rows(self), schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)), - columns = function() map(RecordBatch__columns(self), shared_ptr, `arrow::Array`) + columns = function() map(RecordBatch__columns(self), shared_ptr, `Array`) ) ) @@ -91,7 +91,7 @@ #' Create an [arrow::RecordBatch][arrow__RecordBatch] from a data frame #' -#' @param ... A variable number of arrow::Array +#' @param ... A variable number of Array #' @param schema a arrow::Schema #' #' @return a [arrow::RecordBatch][arrow__RecordBatch] diff --git a/r/R/array.R b/r/R/array.R index fd7c6ef7c8d..5c0b86c3877 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -17,7 +17,7 @@ #' @include arrow-package.R -#' @title class arrow::Array +#' @title class Array #' #' Array base type. Immutable data array with some logical type and some length. #' @@ -59,7 +59,7 @@ #' - `$type_id()`: type id #' - `$Equals(other)` : is this array equal to `other` #' - `$ApproxEquals(other)` : -#' - `$data()`: return the underlying [arrow::ArrayData][arrow__ArrayData] +#' - `$data()`: return the underlying [ArrayData][arrow__ArrayData] #' - `$as_vector()`: convert to an R vector #' - `$ToString()`: string representation of the array #' - `$Slice(offset, length = NULL)` : Construct a zero-copy slice of the array with the indicated offset and length. If length is `NULL`, the slice goes until the end of the array. @@ -67,7 +67,7 @@ #' #' @rdname arrow__Array #' @name arrow__Array -`arrow::Array` <- R6Class("arrow::Array", +`Array` <- R6Class("Array", inherit = `arrow::Object`, public = list( IsNull = function(i) Array__IsNull(self, i), @@ -76,24 +76,24 @@ type_id = function() Array__type_id(self), Equals = function(other) Array__Equals(self, other), ApproxEquals = function(other) Array__ApproxEquals(self, other), - data = function() shared_ptr(`arrow::ArrayData`, Array__data(self)), + data = function() shared_ptr(`ArrayData`, Array__data(self)), as_vector = function() Array__as_vector(self), ToString = function() Array__ToString(self), Slice = function(offset, length = NULL){ if (is.null(length)) { - shared_ptr(`arrow::Array`, Array__Slice1(self, offset)) + shared_ptr(`Array`, Array__Slice1(self, offset)) } else { - shared_ptr(`arrow::Array`, Array__Slice2(self, offset, length)) + shared_ptr(`Array`, Array__Slice2(self, offset, length)) } }, RangeEquals = function(other, start_idx, end_idx, other_start_idx) { - assert_that(inherits(other, "arrow::Array")) + assert_that(inherits(other, "Array")) Array__RangeEquals(self, other, start_idx, end_idx, other_start_idx) }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_type, "arrow::DataType")) assert_that(inherits(options, "arrow::compute::CastOptions")) - `arrow::Array`$dispatch(Array__cast(self, target_type, options)) + `Array`$dispatch(Array__cast(self, target_type, options)) } ), active = list( @@ -103,24 +103,24 @@ ) ) -`arrow::DictionaryArray` <- R6Class("arrow::DictionaryArray", inherit = `arrow::Array`, +`arrow::DictionaryArray` <- R6Class("arrow::DictionaryArray", inherit = `Array`, public = list( - indices = function() `arrow::Array`$dispatch(DictionaryArray__indices(self)), - dictionary = function() `arrow::Array`$dispatch(DictionaryArray__dictionary(self)) + indices = function() `Array`$dispatch(DictionaryArray__indices(self)), + dictionary = function() `Array`$dispatch(DictionaryArray__dictionary(self)) ) ) -`arrow::StructArray` <- R6Class("arrow::StructArray", inherit = `arrow::Array`, +`arrow::StructArray` <- R6Class("arrow::StructArray", inherit = `Array`, public = list( - field = function(i) `arrow::Array`$dispatch(StructArray__field(self, i)), - GetFieldByName = function(name) `arrow::Array`$dispatch(StructArray__GetFieldByName(self, name)), - Flatten = function() map(StructArray__Flatten(self), ~ `arrow::Array`$dispatch(.x)) + field = function(i) `Array`$dispatch(StructArray__field(self, i)), + GetFieldByName = function(name) `Array`$dispatch(StructArray__GetFieldByName(self, name)), + Flatten = function() map(StructArray__Flatten(self), ~ `Array`$dispatch(.x)) ) ) -`arrow::ListArray` <- R6Class("arrow::ListArray", inherit = `arrow::Array`, +`arrow::ListArray` <- R6Class("arrow::ListArray", inherit = `Array`, public = list( - values = function() `arrow::Array`$dispatch(ListArray__values(self)), + values = function() `Array`$dispatch(ListArray__values(self)), value_length = function(i) ListArray__value_length(self, i), value_offset = function(i) ListArray__value_offset(self, i), raw_value_offsets = function() ListArray__raw_value_offsets(self) @@ -130,8 +130,8 @@ ) ) -`arrow::Array`$dispatch <- function(xp){ - a <- shared_ptr(`arrow::Array`, xp) +`Array`$dispatch <- function(xp){ + a <- shared_ptr(`Array`, xp) if (a$type_id() == Type$DICTIONARY){ a <- shared_ptr(`arrow::DictionaryArray`, xp) } else if (a$type_id() == Type$STRUCT) { @@ -143,17 +143,17 @@ } #' @export -`length.arrow::Array` <- function(x) x$length() +`length.Array` <- function(x) x$length() #' @export -`==.arrow::Array` <- function(x, y) x$Equals(y) +`==.Array` <- function(x, y) x$Equals(y) -#' create an [arrow::Array][arrow__Array] from an R vector +#' create an [Array][arrow__Array] from an R vector #' #' @param x R object #' @param type Explicit [type][arrow__DataType], or NULL (the default) to infer from the data #' #' @export array <- function(x, type = NULL){ - `arrow::Array`$dispatch(Array__from_vector(x, type)) + `Array`$dispatch(Array__from_vector(x, type)) } diff --git a/r/R/feather.R b/r/R/feather.R index 46c3f5ff2c3..30a7fb2c55e 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -34,7 +34,7 @@ num_rows = function() ipc___feather___TableReader__num_rows(self), num_columns = function() ipc___feather___TableReader__num_columns(self), GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i), - GetColumn = function(i) shared_ptr(`arrow::Array`, ipc___feather___TableReader__GetColumn(self, i)), + GetColumn = function(i) shared_ptr(`Array`, ipc___feather___TableReader__GetColumn(self, i)), Read = function(columns) { shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns)) } diff --git a/r/R/type.R b/r/R/type.R index 86b888d1cce..f70851f0986 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -114,7 +114,7 @@ type.default <- function(x) { } #' @export -`type.arrow::Array` <- function(x) x$type +`type.Array` <- function(x) x$type #' @export `type.arrow::ChunkedArray` <- function(x) x$type diff --git a/r/man/array.Rd b/r/man/array.Rd index 2b784caf9a1..5920a3d83c9 100644 --- a/r/man/array.Rd +++ b/r/man/array.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/array.R \name{array} \alias{array} -\title{create an \link[=arrow__Array]{arrow::Array} from an R vector} +\title{create an \link[=arrow__Array]{Array} from an R vector} \usage{ array(x, type = NULL) } @@ -12,5 +12,5 @@ array(x, type = NULL) \item{type}{Explicit \link[=arrow__DataType]{type}, or NULL (the default) to infer from the data} } \description{ -create an \link[=arrow__Array]{arrow::Array} from an R vector +create an \link[=arrow__Array]{Array} from an R vector } diff --git a/r/man/arrow__Array.Rd b/r/man/arrow__Array.Rd index dabed1f6fa2..940524907f6 100644 --- a/r/man/arrow__Array.Rd +++ b/r/man/arrow__Array.Rd @@ -3,12 +3,12 @@ \docType{class} \name{arrow__Array} \alias{arrow__Array} -\alias{arrow::Array} -\title{class arrow::Array +\alias{Array} +\title{class Array Array base type. Immutable data array with some logical type and some length.} \description{ -class arrow::Array +class Array Array base type. Immutable data array with some logical type and some length. } @@ -46,7 +46,7 @@ a == a \item \code{$type_id()}: type id \item \code{$Equals(other)} : is this array equal to \code{other} \item \code{$ApproxEquals(other)} : -\item \code{$data()}: return the underlying \link[=arrow__ArrayData]{arrow::ArrayData} +\item \code{$data()}: return the underlying \link[=arrow__ArrayData]{ArrayData} \item \code{$as_vector()}: convert to an R vector \item \code{$ToString()}: string representation of the array \item \code{$Slice(offset, length = NULL)} : Construct a zero-copy slice of the array with the indicated offset and length. If length is \code{NULL}, the slice goes until the end of the array. diff --git a/r/man/arrow__ArrayData.Rd b/r/man/arrow__ArrayData.Rd index af48dd334a5..bd4e2363c9a 100644 --- a/r/man/arrow__ArrayData.Rd +++ b/r/man/arrow__ArrayData.Rd @@ -3,10 +3,10 @@ \docType{class} \name{arrow__ArrayData} \alias{arrow__ArrayData} -\alias{arrow::ArrayData} -\title{class arrow::ArrayData} +\alias{ArrayData} +\title{class ArrayData} \description{ -class arrow::ArrayData +class ArrayData } \section{Usage}{ \preformatted{data <- array(x)$data() diff --git a/r/man/record_batch.Rd b/r/man/record_batch.Rd index a9680bf3735..44efe2e2217 100644 --- a/r/man/record_batch.Rd +++ b/r/man/record_batch.Rd @@ -7,7 +7,7 @@ record_batch(..., schema = NULL) } \arguments{ -\item{...}{A variable number of arrow::Array} +\item{...}{A variable number of Array} \item{schema}{a arrow::Schema} } diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index 6d9c8dcfc19..08686983909 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -793,7 +793,7 @@ std::shared_ptr GetFactorType(SEXP factor) { std::shared_ptr InferType(SEXP x) { switch (TYPEOF(x)) { case ENVSXP: - if (Rf_inherits(x, "arrow::Array")) { + if (Rf_inherits(x, "Array")) { Rcpp::ConstReferenceSmartPtrInputParameter> array( x); return static_cast>(array)->type(); @@ -976,7 +976,7 @@ arrow::Status CheckCompatibleStruct(SEXP obj, std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_infered) { // short circuit if `x` is already an Array - if (Rf_inherits(x, "arrow::Array")) { + if (Rf_inherits(x, "Array")) { return Rcpp::ConstReferenceSmartPtrInputParameter>(x); } diff --git a/r/src/table.cpp b/r/src/table.cpp index b179b0c626d..f024c959a69 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -139,7 +139,7 @@ std::shared_ptr Table__from_dots(SEXP lst, SEXP schema_sxp) { auto chunked_array = arrow::r::extract(x); fields[j] = arrow::field(CHAR(name), chunked_array->type()); columns[j] = chunked_array; - } else if (Rf_inherits(x, "arrow::Array")) { + } else if (Rf_inherits(x, "Array")) { auto array = arrow::r::extract(x); fields[j] = arrow::field(CHAR(name), array->type()); columns[j] = std::make_shared(array); @@ -174,7 +174,7 @@ std::shared_ptr Table__from_dots(SEXP lst, SEXP schema_sxp) { if (Rf_inherits(x, "arrow::ChunkedArray")) { auto chunked_array = arrow::r::extract(x); columns[j] = chunked_array; - } else if (Rf_inherits(x, "arrow::Array")) { + } else if (Rf_inherits(x, "Array")) { auto array = arrow::r::extract(x); columns[j] = std::make_shared(array); } else { diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index d5a141c87ff..bf37ed7f107 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -45,27 +45,27 @@ test_that("RecordBatch", { expect_equal(names(batch), c("int", "dbl", "lgl", "chr", "fct")) col_int <- batch$column(0) - expect_true(inherits(col_int, 'arrow::Array')) + expect_true(inherits(col_int, 'Array')) expect_equal(col_int$as_vector(), tbl$int) expect_equal(col_int$type, int32()) col_dbl <- batch$column(1) - expect_true(inherits(col_dbl, 'arrow::Array')) + expect_true(inherits(col_dbl, 'Array')) expect_equal(col_dbl$as_vector(), tbl$dbl) expect_equal(col_dbl$type, float64()) col_lgl <- batch$column(2) - expect_true(inherits(col_dbl, 'arrow::Array')) + expect_true(inherits(col_dbl, 'Array')) expect_equal(col_lgl$as_vector(), tbl$lgl) expect_equal(col_lgl$type, boolean()) col_chr <- batch$column(3) - expect_true(inherits(col_chr, 'arrow::Array')) + expect_true(inherits(col_chr, 'Array')) expect_equal(col_chr$as_vector(), tbl$chr) expect_equal(col_chr$type, utf8()) col_fct <- batch$column(4) - expect_true(inherits(col_fct, 'arrow::Array')) + expect_true(inherits(col_fct, 'Array')) expect_equal(col_fct$as_vector(), tbl$fct) expect_equal(col_fct$type, dictionary(int32(), array(letters[1:10]))) @@ -146,7 +146,7 @@ test_that("RecordBatch dim() and nrow() (ARROW-3816)", { expect_equal(nrow(batch), 10L) }) -test_that("record_batch() handles arrow::Array", { +test_that("record_batch() handles Array", { batch <- record_batch(x = 1:10, y = arrow::array(1:10)) expect_equal(batch$schema, schema(x = int32(), y = int32())) }) @@ -220,4 +220,3 @@ test_that("record_batch() only auto splice data frames", { regexp = "only data frames are allowed as unnamed arguments to be auto spliced" ) }) - From 1f6d154e4baba7231ca847b52fcdb2bc8b2573b7 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Wed, 4 Sep 2019 15:09:57 -0700 Subject: [PATCH 02/37] Replace array() with Array() --- r/NAMESPACE | 2 +- r/R/ArrayData.R | 6 +- r/R/ChunkedArray.R | 4 +- r/R/array.R | 51 ++++--- r/man/{arrow__ArrayData.Rd => ArrayData.Rd} | 5 +- r/man/array.Rd | 60 ++++++-- r/man/arrow__Array.Rd | 57 -------- r/tests/testthat/test-Array.R | 143 ++++++++++---------- r/tests/testthat/test-RecordBatch.R | 10 +- r/tests/testthat/test-Table.R | 2 +- r/tests/testthat/test-arraydata.R | 2 +- r/tests/testthat/test-chunkedarray.R | 4 +- r/tests/testthat/test-json.R | 4 +- r/tests/testthat/test-type.R | 4 +- 14 files changed, 165 insertions(+), 189 deletions(-) rename r/man/{arrow__ArrayData.Rd => ArrayData.Rd} (78%) delete mode 100644 r/man/arrow__Array.Rd diff --git a/r/NAMESPACE b/r/NAMESPACE index eb053a26ea4..5aac994770a 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -86,6 +86,7 @@ S3method(write_feather,default) S3method(write_feather_RecordBatch,"arrow::io::OutputStream") S3method(write_feather_RecordBatch,character) S3method(write_feather_RecordBatch,default) +export(Array) export(BufferOutputStream) export(BufferReader) export(CompressedInputStream) @@ -108,7 +109,6 @@ export(RecordBatchStreamWriter) export(StatusCode) export(TimeUnit) export(Type) -export(array) export(arrow_available) export(bool) export(boolean) diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R index adf7c113c30..3849b8928e2 100644 --- a/r/R/ArrayData.R +++ b/r/R/ArrayData.R @@ -26,7 +26,7 @@ #' @section Usage: #' #' ``` -#' data <- array(x)$data() +#' data <- Array$create(x)$data() #' #' data$type() #' data$length() @@ -39,8 +39,8 @@ #' #' ... #' -#' @rdname arrow__ArrayData -#' @name arrow__ArrayData +#' @rdname ArrayData +#' @name ArrayData `ArrayData` <- R6Class("ArrayData", inherit = `arrow::Object`, active = list( diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R index 4ce2a841613..b2f4bd76177 100644 --- a/r/R/ChunkedArray.R +++ b/r/R/ChunkedArray.R @@ -32,7 +32,7 @@ `arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = `arrow::Object`, public = list( length = function() ChunkedArray__length(self), - chunk = function(i) `Array`$dispatch(ChunkedArray__chunk(self, i)), + chunk = function(i) Array$create(ChunkedArray__chunk(self, i)), as_vector = function() ChunkedArray__as_vector(self), Slice = function(offset, length = NULL){ if (is.null(length)) { @@ -50,7 +50,7 @@ active = list( null_count = function() ChunkedArray__null_count(self), num_chunks = function() ChunkedArray__num_chunks(self), - chunks = function() map(ChunkedArray__chunks(self), ~ `Array`$dispatch(.x)), + chunks = function() map(ChunkedArray__chunks(self), ~ Array$create(.x)), type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self)) ) ) diff --git a/r/R/array.R b/r/R/array.R index 5c0b86c3877..70fdca50923 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -28,7 +28,7 @@ #' @section Usage: #' #' ``` -#' a <- array(x) +#' a <- Array$create(x) #' #' a$IsNull(i) #' a$IsValid(i) @@ -59,15 +59,16 @@ #' - `$type_id()`: type id #' - `$Equals(other)` : is this array equal to `other` #' - `$ApproxEquals(other)` : -#' - `$data()`: return the underlying [ArrayData][arrow__ArrayData] +#' - `$data()`: return the underlying [ArrayData][ArrayData] #' - `$as_vector()`: convert to an R vector #' - `$ToString()`: string representation of the array #' - `$Slice(offset, length = NULL)` : Construct a zero-copy slice of the array with the indicated offset and length. If length is `NULL`, the slice goes until the end of the array. #' - `$RangeEquals(other, start_idx, end_idx, other_start_idx)` : #' -#' @rdname arrow__Array -#' @name arrow__Array -`Array` <- R6Class("Array", +#' @rdname Array +#' @name Array +#' @export +Array <- R6Class("Array", inherit = `arrow::Object`, public = list( IsNull = function(i) Array__IsNull(self, i), @@ -93,7 +94,7 @@ cast = function(target_type, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_type, "arrow::DataType")) assert_that(inherits(options, "arrow::compute::CastOptions")) - `Array`$dispatch(Array__cast(self, target_type, options)) + Array$create(Array__cast(self, target_type, options)) } ), active = list( @@ -105,22 +106,22 @@ `arrow::DictionaryArray` <- R6Class("arrow::DictionaryArray", inherit = `Array`, public = list( - indices = function() `Array`$dispatch(DictionaryArray__indices(self)), - dictionary = function() `Array`$dispatch(DictionaryArray__dictionary(self)) + indices = function() Array$create(DictionaryArray__indices(self)), + dictionary = function() Array$create(DictionaryArray__dictionary(self)) ) ) `arrow::StructArray` <- R6Class("arrow::StructArray", inherit = `Array`, public = list( - field = function(i) `Array`$dispatch(StructArray__field(self, i)), - GetFieldByName = function(name) `Array`$dispatch(StructArray__GetFieldByName(self, name)), - Flatten = function() map(StructArray__Flatten(self), ~ `Array`$dispatch(.x)) + field = function(i) Array$create(StructArray__field(self, i)), + GetFieldByName = function(name) Array$create(StructArray__GetFieldByName(self, name)), + Flatten = function() map(StructArray__Flatten(self), ~ Array$create(.x)) ) ) `arrow::ListArray` <- R6Class("arrow::ListArray", inherit = `Array`, public = list( - values = function() `Array`$dispatch(ListArray__values(self)), + values = function() Array$create(ListArray__values(self)), value_length = function(i) ListArray__value_length(self, i), value_offset = function(i) ListArray__value_offset(self, i), raw_value_offsets = function() ListArray__raw_value_offsets(self) @@ -130,30 +131,24 @@ ) ) -`Array`$dispatch <- function(xp){ - a <- shared_ptr(`Array`, xp) +# Add a class method +Array$create <- function(x, type = NULL) { + if (!inherits(x, "externalptr")) { + x <- Array__from_vector(x, type) + } + a <- shared_ptr(Array, x) if (a$type_id() == Type$DICTIONARY){ - a <- shared_ptr(`arrow::DictionaryArray`, xp) + a <- shared_ptr(`arrow::DictionaryArray`, x) } else if (a$type_id() == Type$STRUCT) { - a <- shared_ptr(`arrow::StructArray`, xp) + a <- shared_ptr(`arrow::StructArray`, x) } else if (a$type_id() == Type$LIST) { - a <- shared_ptr(`arrow::ListArray`, xp) + a <- shared_ptr(`arrow::ListArray`, x) } a } #' @export -`length.Array` <- function(x) x$length() +length.Array <- function(x) x$length() #' @export `==.Array` <- function(x, y) x$Equals(y) - -#' create an [Array][arrow__Array] from an R vector -#' -#' @param x R object -#' @param type Explicit [type][arrow__DataType], or NULL (the default) to infer from the data -#' -#' @export -array <- function(x, type = NULL){ - `Array`$dispatch(Array__from_vector(x, type)) -} diff --git a/r/man/arrow__ArrayData.Rd b/r/man/ArrayData.Rd similarity index 78% rename from r/man/arrow__ArrayData.Rd rename to r/man/ArrayData.Rd index bd4e2363c9a..2cca881da44 100644 --- a/r/man/arrow__ArrayData.Rd +++ b/r/man/ArrayData.Rd @@ -1,15 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ArrayData.R \docType{class} -\name{arrow__ArrayData} -\alias{arrow__ArrayData} +\name{ArrayData} \alias{ArrayData} \title{class ArrayData} \description{ class ArrayData } \section{Usage}{ -\preformatted{data <- array(x)$data() +\preformatted{data <- Array$create(x)$data() data$type() data$length() diff --git a/r/man/array.Rd b/r/man/array.Rd index 5920a3d83c9..8dd07a9bb74 100644 --- a/r/man/array.Rd +++ b/r/man/array.Rd @@ -1,16 +1,56 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/array.R -\name{array} -\alias{array} -\title{create an \link[=arrow__Array]{Array} from an R vector} -\usage{ -array(x, type = NULL) +\docType{class} +\name{Array} +\alias{Array} +\title{class Array + +Array base type. Immutable data array with some logical type and some length.} +\description{ +class Array + +Array base type. Immutable data array with some logical type and some length. } -\arguments{ -\item{x}{R object} +\section{Usage}{ +\preformatted{a <- Array$create(x) -\item{type}{Explicit \link[=arrow__DataType]{type}, or NULL (the default) to infer from the data} +a$IsNull(i) +a$IsValid(i) +a$length() or length(a) +a$offset() +a$null_count() +a$type() +a$type_id() +a$Equals(b) +a$ApproxEquals(b) +a$as_vector() +a$ToString() +a$Slice(offset, length = NULL) +a$RangeEquals(other, start_idx, end_idx, other_start_idx) + +print(a) +a == a } -\description{ -create an \link[=arrow__Array]{Array} from an R vector } + +\section{Methods}{ + +\itemize{ +\item \code{$IsNull(i)}: Return true if value at index is null. Does not boundscheck +\item \code{$IsValid(i)}: Return true if value at index is valid. Does not boundscheck +\item \code{$length()}: Size in the number of elements this array contains +\item \code{$offset()}: A relative position into another array's data, to enable zero-copy slicing +\item \code{$null_count()}: The number of null entries in the array +\item \code{$type()}: logical type of data +\item \code{$type_id()}: type id +\item \code{$Equals(other)} : is this array equal to \code{other} +\item \code{$ApproxEquals(other)} : +\item \code{$data()}: return the underlying \link{ArrayData} +\item \code{$as_vector()}: convert to an R vector +\item \code{$ToString()}: string representation of the array +\item \code{$Slice(offset, length = NULL)} : Construct a zero-copy slice of the array with the indicated offset and length. If length is \code{NULL}, the slice goes until the end of the array. +\item \code{$RangeEquals(other, start_idx, end_idx, other_start_idx)} : +} +} + +\keyword{datasets} diff --git a/r/man/arrow__Array.Rd b/r/man/arrow__Array.Rd deleted file mode 100644 index 940524907f6..00000000000 --- a/r/man/arrow__Array.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/array.R -\docType{class} -\name{arrow__Array} -\alias{arrow__Array} -\alias{Array} -\title{class Array - -Array base type. Immutable data array with some logical type and some length.} -\description{ -class Array - -Array base type. Immutable data array with some logical type and some length. -} -\section{Usage}{ -\preformatted{a <- array(x) - -a$IsNull(i) -a$IsValid(i) -a$length() or length(a) -a$offset() -a$null_count() -a$type() -a$type_id() -a$Equals(b) -a$ApproxEquals(b) -a$as_vector() -a$ToString() -a$Slice(offset, length = NULL) -a$RangeEquals(other, start_idx, end_idx, other_start_idx) - -print(a) -a == a -} -} - -\section{Methods}{ - -\itemize{ -\item \code{$IsNull(i)}: Return true if value at index is null. Does not boundscheck -\item \code{$IsValid(i)}: Return true if value at index is valid. Does not boundscheck -\item \code{$length()}: Size in the number of elements this array contains -\item \code{$offset()}: A relative position into another array's data, to enable zero-copy slicing -\item \code{$null_count()}: The number of null entries in the array -\item \code{$type()}: logical type of data -\item \code{$type_id()}: type id -\item \code{$Equals(other)} : is this array equal to \code{other} -\item \code{$ApproxEquals(other)} : -\item \code{$data()}: return the underlying \link[=arrow__ArrayData]{ArrayData} -\item \code{$as_vector()}: convert to an R vector -\item \code{$ToString()}: string representation of the array -\item \code{$Slice(offset, length = NULL)} : Construct a zero-copy slice of the array with the indicated offset and length. If length is \code{NULL}, the slice goes until the end of the array. -\item \code{$RangeEquals(other, start_idx, end_idx, other_start_idx)} : -} -} - -\keyword{datasets} diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 4a903fac923..b0bec91b2c8 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -18,14 +18,14 @@ context("arrow::Array") test_that("Array", { - x <- array(c(1:10, 1:10, 1:5)) + x <- Array$create(c(1:10, 1:10, 1:5)) expect_equal(x$type, int32()) - expect_equal(x$length(), 25L) + expect_equal(length(x), 25L) expect_equal(x$as_vector(), c(1:10, 1:10, 1:5)) y <- x$Slice(10) expect_equal(y$type, int32()) - expect_equal(y$length(), 15L) + expect_equal(length(y), 15L) expect_equal(y$as_vector(), c(1:10, 1:5)) expect_true(x$RangeEquals(y, 10, 24, 0)) @@ -35,7 +35,7 @@ test_that("Array", { expect_equal(z$as_vector(), c(1:5)) expect_true(x$RangeEquals(z, 10, 15, 0)) - x_dbl <- array(c(1,2,3,4,5,6)) + x_dbl <- Array$create(c(1,2,3,4,5,6)) expect_equal(x_dbl$type, float64()) expect_equal(x_dbl$length(), 6L) expect_equal(x_dbl$as_vector(), as.numeric(1:6)) @@ -53,8 +53,8 @@ test_that("Array", { }) test_that("Array supports NA", { - x_int <- array(as.integer(c(1:10, NA))) - x_dbl <- array(as.numeric(c(1:10, NA))) + x_int <- Array$create(as.integer(c(1:10, NA))) + x_dbl <- Array$create(as.numeric(c(1:10, NA))) expect_true(x_int$IsValid(0L)) expect_true(x_dbl$IsValid(0L)) expect_true(x_int$IsNull(10L)) @@ -68,19 +68,19 @@ test_that("Array supports NA", { test_that("Array supports logical vectors (ARROW-3341)", { # with NA x <- sample(c(TRUE, FALSE, NA), 1000, replace = TRUE) - arr_lgl <- array(x) + arr_lgl <- Array$create(x) expect_identical(x, arr_lgl$as_vector()) # without NA x <- sample(c(TRUE, FALSE), 1000, replace = TRUE) - arr_lgl <- array(x) + arr_lgl <- Array$create(x) expect_identical(x, arr_lgl$as_vector()) }) test_that("Array supports character vectors (ARROW-3339)", { # with NA x <- c("itsy", NA, "spider") - arr_chr <- array(x) + arr_chr <- Array$create(x) expect_equal(arr_chr$length(), 3L) expect_identical(arr_chr$as_vector(), x) expect_true(arr_chr$IsValid(0)) @@ -92,51 +92,51 @@ test_that("Array supports character vectors (ARROW-3339)", { # without NA x <- c("itsy", "bitsy", "spider") - arr_chr <- array(x) + arr_chr <- Array$create(x) expect_equal(arr_chr$length(), 3L) expect_identical(arr_chr$as_vector(), x) }) test_that("empty arrays are supported", { x <- character() - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- integer() - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- numeric() - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- factor(character()) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- logical() - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) }) test_that("array with all nulls are supported", { nas <- c(NA, NA) x <- as.logical(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- as.integer(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- as.numeric(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- as.character(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- as.factor(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) }) test_that("Array supports unordered factors (ARROW-3355)", { # without NA f <- factor(c("itsy", "bitsy", "spider", "spider")) - arr_fac <- array(f) + arr_fac <- Array$create(f) expect_equal(arr_fac$length(), 4L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -152,7 +152,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { # with NA f <- factor(c("itsy", "bitsy", NA, "spider", "spider")) - arr_fac <- array(f) + arr_fac <- Array$create(f) expect_equal(arr_fac$length(), 5L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -171,7 +171,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { test_that("Array supports ordered factors (ARROW-3355)", { # without NA f <- ordered(c("itsy", "bitsy", "spider", "spider")) - arr_fac <- array(f) + arr_fac <- Array$create(f) expect_equal(arr_fac$length(), 4L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -187,7 +187,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { # with NA f <- ordered(c("itsy", "bitsy", NA, "spider", "spider")) - arr_fac <- array(f) + arr_fac <- Array$create(f) expect_equal(arr_fac$length(), 5L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -205,20 +205,20 @@ test_that("Array supports ordered factors (ARROW-3355)", { test_that("array supports Date (ARROW-3340)", { d <- Sys.Date() + 1:10 - a <- array(d) + a <- Array$create(d) expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) d[5] <- NA - a <- array(d) + a <- Array$create(d) expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) expect_true(a$IsNull(4)) d2 <- d + .5 - a <- array(d2) + a <- Array$create(d2) expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) @@ -227,14 +227,14 @@ test_that("array supports Date (ARROW-3340)", { test_that("array supports POSIXct (ARROW-3340)", { times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 - a <- array(times) + a <- Array$create(times) expect_equal(a$type$name, "timestamp") expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) expect_equal(as.numeric(a$as_vector()), as.numeric(times)) times[5] <- NA - a <- array(times) + a <- Array$create(times) expect_equal(a$type$name, "timestamp") expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) @@ -244,13 +244,13 @@ test_that("array supports POSIXct (ARROW-3340)", { test_that("array supports integer64", { x <- bit64::as.integer64(1:10) - a <- array(x) + a <- Array$create(x) expect_equal(a$type, int64()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), x) x[4] <- NA - a <- array(x) + a <- Array$create(x) expect_equal(a$type, int64()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), x) @@ -259,18 +259,18 @@ test_that("array supports integer64", { test_that("array$as_vector() correctly handles all NA inte64 (ARROW-3795)", { x <- bit64::as.integer64(NA) - a <- array(x) + a <- Array$create(x) expect_true(is.na(a$as_vector())) }) test_that("array supports difftime", { time <- hms::hms(56, 34, 12) - a <- array(c(time, time)) + a <- Array$create(c(time, time)) expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_equal(a$as_vector(), c(time, time)) - a <- array(vctrs::vec_c(time, NA)) + a <- Array$create(vctrs::vec_c(time, NA)) expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_true(a$IsNull(1)) @@ -280,13 +280,13 @@ test_that("array supports difftime", { test_that("support for NaN (ARROW-3615)", { x <- c(1, NA, NaN, -1) - y <- array(x) + y <- Array$create(x) expect_true(y$IsValid(2)) expect_equal(y$null_count, 1L) }) test_that("integer types casts (ARROW-3741)", { - a <- array(c(1:10, NA)) + a <- Array$create(c(1:10, NA)) a_int8 <- a$cast(int8()) a_int16 <- a$cast(int16()) a_int32 <- a$cast(int32()) @@ -317,7 +317,7 @@ test_that("integer types casts (ARROW-3741)", { }) test_that("integer types cast safety (ARROW-3741, ARROW-5541)", { - a <- array(-(1:10)) + a <- Array$create(-(1:10)) expect_error(a$cast(uint8()), regexp = "Integer value out of bounds") expect_error(a$cast(uint16()), regexp = "Integer value out of bounds") expect_error(a$cast(uint32()), regexp = "Integer value out of bounds") @@ -331,7 +331,7 @@ test_that("integer types cast safety (ARROW-3741, ARROW-5541)", { test_that("float types casts (ARROW-3741)", { x <- c(1, 2, 3, NA) - a <- array(x) + a <- Array$create(x) a_f32 <- a$cast(float32()) a_f64 <- a$cast(float64()) @@ -347,12 +347,12 @@ test_that("float types casts (ARROW-3741)", { test_that("cast to half float works", { skip("until https://issues.apache.org/jira/browse/ARROW-3802") - a <- array(1:4) + a <- Array$create(1:4) a_f16 <- a$cast(float16()) expect_equal(a_16$type, float16()) }) -test_that("array() supports the type= argument. conversion from INTSXP and int64 to all int types", { +test_that("Array$create() supports the type= argument. conversion from INTSXP and int64 to all int types", { num_int32 <- 12L num_int64 <- bit64::as.integer64(10) @@ -362,38 +362,38 @@ test_that("array() supports the type= argument. conversion from INTSXP and int64 float32(), float64() ) for(type in types) { - expect_equal(array(num_int32, type = type)$type, type) - expect_equal(array(num_int64, type = type)$type, type) + expect_equal(Array$create(num_int32, type = type)$type, type) + expect_equal(Array$create(num_int64, type = type)$type, type) } }) -test_that("array() aborts on overflow", { - expect_error(array(128L, type = int8())$type, "Invalid.*downsize") - expect_error(array(-129L, type = int8())$type, "Invalid.*downsize") +test_that("Array$create() aborts on overflow", { + expect_error(Array$create(128L, type = int8())$type, "Invalid.*downsize") + expect_error(Array$create(-129L, type = int8())$type, "Invalid.*downsize") - expect_error(array(256L, type = uint8())$type, "Invalid.*downsize") - expect_error(array(-1L, type = uint8())$type, "Invalid.*downsize") + expect_error(Array$create(256L, type = uint8())$type, "Invalid.*downsize") + expect_error(Array$create(-1L, type = uint8())$type, "Invalid.*downsize") - expect_error(array(32768L, type = int16())$type, "Invalid.*downsize") - expect_error(array(-32769L, type = int16())$type, "Invalid.*downsize") + expect_error(Array$create(32768L, type = int16())$type, "Invalid.*downsize") + expect_error(Array$create(-32769L, type = int16())$type, "Invalid.*downsize") - expect_error(array(65536L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(-1L, type = uint16())$type, "Invalid.*downsize") + expect_error(Array$create(65536L, type = uint16())$type, "Invalid.*downsize") + expect_error(Array$create(-1L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(65536L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(-1L, type = uint16())$type, "Invalid.*downsize") + expect_error(Array$create(65536L, type = uint16())$type, "Invalid.*downsize") + expect_error(Array$create(-1L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(bit64::as.integer64(2^31), type = int32()), "Invalid.*downsize") - expect_error(array(bit64::as.integer64(2^32), type = uint32()), "Invalid.*downsize") + expect_error(Array$create(bit64::as.integer64(2^31), type = int32()), "Invalid.*downsize") + expect_error(Array$create(bit64::as.integer64(2^32), type = uint32()), "Invalid.*downsize") }) -test_that("array() can convert doubles to integer", { +test_that("Array$create() does not convert doubles to integer", { types <- list( int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64() ) for(type in types) { - a <- array(10, type = type) + a <- Array$create(10, type = type) expect_equal(a$type, type) # exception for now because we cannot handle @@ -404,45 +404,44 @@ test_that("array() can convert doubles to integer", { } }) -test_that("array() converts raw vectors to uint8 arrays (ARROW-3794)", { - expect_equal(array(as.raw(1:10))$type, uint8()) +test_that("Array$create() converts raw vectors to uint8 arrays (ARROW-3794)", { + expect_equal(Array$create(as.raw(1:10))$type, uint8()) }) test_that("Array$as_vector() converts to integer (ARROW-3794)", { - a <- array((-128):127)$cast(int8()) + a <- Array$create((-128):127)$cast(int8()) expect_equal(a$type, int8()) expect_equal(a$as_vector(), (-128):127) - a <- array(0:255)$cast(uint8()) + a <- Array$create(0:255)$cast(uint8()) expect_equal(a$type, uint8()) expect_equal(a$as_vector(), 0:255) }) -test_that("array() recognise arrow::Array (ARROW-3815)", { - a <- array(1:10) - expect_equal(a, array(a)) +test_that("Array$create() recognise arrow::Array (ARROW-3815)", { + a <- Array$create(1:10) + expect_equal(a, Array$create(a)) }) -test_that("array() handles data frame -> struct arrays (ARROW-3811)", { +test_that("Array$create() handles data frame -> struct arrays (ARROW-3811)", { df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10]) - a <- array(df) + a <- Array$create(df) expect_equal(a$type, struct(x = int32(), y = float64(), z = utf8())) expect_equivalent(a$as_vector(), df) }) -test_that("array() can handle data frame with custom struct type (not infered)", { +test_that("Array$create() can handle data frame with custom struct type (not infered)", { df <- tibble::tibble(x = 1:10, y = 1:10) type <- struct(x = float64(), y = int16()) - a <- array(df, type = type) + a <- Array$create(df, type = type) expect_equal(a$type, type) type <- struct(x = float64(), y = int16(), z = int32()) - expect_error(array(df, type = type), regexp = "Number of fields in struct.* incompatible with number of columns in the data frame") + expect_error(Array$create(df, type = type), regexp = "Number of fields in struct.* incompatible with number of columns in the data frame") type <- struct(y = int16(), x = float64()) - expect_error(array(df, type = type), regexp = "Field name in position.*does not match the name of the column of the data frame") + expect_error(Array$create(df, type = type), regexp = "Field name in position.*does not match the name of the column of the data frame") type <- struct(x = float64(), y = utf8()) - expect_error(array(df, type = type), regexp = "Cannot convert R object to string array") + expect_error(Array$create(df, type = type), regexp = "Cannot convert R object to string array") }) - diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index bf37ed7f107..d8ba8da0b59 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -32,7 +32,7 @@ test_that("RecordBatch", { schema( int = int32(), dbl = float64(), lgl = boolean(), chr = utf8(), - fct = dictionary(int32(), array(letters[1:10])) + fct = dictionary(int32(), Array$create(letters[1:10])) ) ) expect_equal(batch$num_columns, 5L) @@ -67,12 +67,12 @@ test_that("RecordBatch", { col_fct <- batch$column(4) expect_true(inherits(col_fct, 'Array')) expect_equal(col_fct$as_vector(), tbl$fct) - expect_equal(col_fct$type, dictionary(int32(), array(letters[1:10]))) + expect_equal(col_fct$type, dictionary(int32(), Array$create(letters[1:10]))) batch2 <- batch$RemoveColumn(0) expect_equal( batch2$schema, - schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), array(letters[1:10]))) + schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), Array$create(letters[1:10]))) ) expect_equal(batch2$column(0), batch$column(1)) expect_identical(as.data.frame(batch2), tbl[,-1]) @@ -103,7 +103,7 @@ test_that("RecordBatch with 0 rows are supported", { dbl = float64(), lgl = boolean(), chr = utf8(), - fct = dictionary(int32(), array(c("a", "b"))) + fct = dictionary(int32(), Array$create(c("a", "b"))) ) ) }) @@ -147,7 +147,7 @@ test_that("RecordBatch dim() and nrow() (ARROW-3816)", { }) test_that("record_batch() handles Array", { - batch <- record_batch(x = 1:10, y = arrow::array(1:10)) + batch <- record_batch(x = 1:10, y = Array$create(1:10)) expect_equal(batch$schema, schema(x = int32(), y = int32())) }) diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index fb04bdefdfd..a22c9928615 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -103,7 +103,7 @@ test_that("table() handles record batches with splicing", { }) test_that("table() handles ... of arrays, chunked arrays, vectors", { - a <- array(1:10) + a <- Array$create(1:10) ca <- chunked_array(1:5, 6:10) v <- rnorm(10) tbl <- tibble::tibble(x = 1:10, y = letters[1:10]) diff --git a/r/tests/testthat/test-arraydata.R b/r/tests/testthat/test-arraydata.R index 02ca9b85625..e05a8306005 100644 --- a/r/tests/testthat/test-arraydata.R +++ b/r/tests/testthat/test-arraydata.R @@ -18,7 +18,7 @@ context("arrow::ArrayData") test_that("string vectors with only empty strings and nulls don't allocate a data buffer (ARROW-3693)", { - a <- array("") + a <- Array$create("") expect_equal(a$length(), 1L) buffers <- a$data()$buffers diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunkedarray.R index 2e6b7306be1..fa0a45b2456 100644 --- a/r/tests/testthat/test-chunkedarray.R +++ b/r/tests/testthat/test-chunkedarray.R @@ -207,7 +207,7 @@ test_that("chunked_array() supports the type= argument. conversion from INTSXP a } }) -test_that("array() aborts on overflow", { +test_that("Array$create() aborts on overflow", { expect_error(chunked_array(128L, type = int8())$type, "Invalid.*downsize") expect_error(chunked_array(-129L, type = int8())$type, "Invalid.*downsize") @@ -276,7 +276,7 @@ test_that("chunked_array() handles 0 chunks if given a type", { test_that("chunked_array() can ingest arrays (ARROW-3815)", { expect_equal( - chunked_array(1:5, array(6:10))$as_vector(), + chunked_array(1:5, Array$create(6:10))$as_vector(), 1:10 ) }) diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R index b3e7d5638f5..cea7b65cccb 100644 --- a/r/tests/testthat/test-json.R +++ b/r/tests/testthat/test-json.R @@ -114,8 +114,8 @@ test_that("Can read json file with nested columns (ARROW-5503)", { ) struct_array <- tab1$column(1)$chunk(0) - ps <- array(c(NA, NA, 78, 90, NA, 19)) - hello <- array(c(NA, NA, "hi", "bonjour", "ciao", NA)) + ps <- Array$create(c(NA, NA, 78, 90, NA, 19)) + hello <- Array$create(c(NA, NA, "hi", "bonjour", "ciao", NA)) expect_equal(struct_array$field(0L), ps) expect_equal(struct_array$GetFieldByName("ps"), ps) expect_equal(struct_array$Flatten(), list(ps, hello)) diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 70f8df63159..9f60b5f0f83 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -18,7 +18,7 @@ context("test-type") test_that("type() gets the right type for arrow::Array", { - a <- array(1:10) + a <- Array$create(1:10) expect_equal(type(a), a$type) }) @@ -35,7 +35,7 @@ test_that("type() infers from R type", { expect_equal(type(""), utf8()) expect_equal( type(iris$Species), - dictionary(int8(), array(levels(iris$Species)), FALSE) + dictionary(int8(), Array$create(levels(iris$Species)), FALSE) ) expect_equal( type(lubridate::ymd_hms("2019-02-14 13:55:05")), From 8edf085628dc56c09339647d8ad831ef7f85a5f1 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Wed, 4 Sep 2019 15:18:06 -0700 Subject: [PATCH 03/37] Remove more backticks --- r/R/ArrayData.R | 2 +- r/R/RecordBatch.R | 4 ++-- r/R/array.R | 16 ++++++++-------- r/R/feather.R | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R index 3849b8928e2..25ec39fdd7d 100644 --- a/r/R/ArrayData.R +++ b/r/R/ArrayData.R @@ -41,7 +41,7 @@ #' #' @rdname ArrayData #' @name ArrayData -`ArrayData` <- R6Class("ArrayData", +ArrayData <- R6Class("ArrayData", inherit = `arrow::Object`, active = list( type = function() `arrow::DataType`$dispatch(ArrayData__get_type(self)), diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index 40ede348cd4..8e5ed83c0e3 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -31,7 +31,7 @@ #' @name arrow__RecordBatch `arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = `arrow::Object`, public = list( - column = function(i) shared_ptr(`Array`, RecordBatch__column(self, i)), + column = function(i) shared_ptr(Array, RecordBatch__column(self, i)), column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), Equals = function(other) { @@ -65,7 +65,7 @@ num_columns = function() RecordBatch__num_columns(self), num_rows = function() RecordBatch__num_rows(self), schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)), - columns = function() map(RecordBatch__columns(self), shared_ptr, `Array`) + columns = function() map(RecordBatch__columns(self), shared_ptr, Array) ) ) diff --git a/r/R/array.R b/r/R/array.R index 70fdca50923..18eecc21aa7 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -82,9 +82,9 @@ Array <- R6Class("Array", ToString = function() Array__ToString(self), Slice = function(offset, length = NULL){ if (is.null(length)) { - shared_ptr(`Array`, Array__Slice1(self, offset)) + shared_ptr(Array, Array__Slice1(self, offset)) } else { - shared_ptr(`Array`, Array__Slice2(self, offset, length)) + shared_ptr(Array, Array__Slice2(self, offset, length)) } }, RangeEquals = function(other, start_idx, end_idx, other_start_idx) { @@ -104,14 +104,14 @@ Array <- R6Class("Array", ) ) -`arrow::DictionaryArray` <- R6Class("arrow::DictionaryArray", inherit = `Array`, +DictionaryArray <- R6Class("DictionaryArray", inherit = Array, public = list( indices = function() Array$create(DictionaryArray__indices(self)), dictionary = function() Array$create(DictionaryArray__dictionary(self)) ) ) -`arrow::StructArray` <- R6Class("arrow::StructArray", inherit = `Array`, +StructArray <- R6Class("StructArray", inherit = Array, public = list( field = function(i) Array$create(StructArray__field(self, i)), GetFieldByName = function(name) Array$create(StructArray__GetFieldByName(self, name)), @@ -119,7 +119,7 @@ Array <- R6Class("Array", ) ) -`arrow::ListArray` <- R6Class("arrow::ListArray", inherit = `Array`, +ListArray <- R6Class("ListArray", inherit = Array, public = list( values = function() Array$create(ListArray__values(self)), value_length = function(i) ListArray__value_length(self, i), @@ -138,11 +138,11 @@ Array$create <- function(x, type = NULL) { } a <- shared_ptr(Array, x) if (a$type_id() == Type$DICTIONARY){ - a <- shared_ptr(`arrow::DictionaryArray`, x) + a <- shared_ptr(DictionaryArray, x) } else if (a$type_id() == Type$STRUCT) { - a <- shared_ptr(`arrow::StructArray`, x) + a <- shared_ptr(StructArray, x) } else if (a$type_id() == Type$LIST) { - a <- shared_ptr(`arrow::ListArray`, x) + a <- shared_ptr(ListArray, x) } a } diff --git a/r/R/feather.R b/r/R/feather.R index 30a7fb2c55e..3319225ce48 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -34,7 +34,7 @@ num_rows = function() ipc___feather___TableReader__num_rows(self), num_columns = function() ipc___feather___TableReader__num_columns(self), GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i), - GetColumn = function(i) shared_ptr(`Array`, ipc___feather___TableReader__GetColumn(self, i)), + GetColumn = function(i) shared_ptr(Array, ipc___feather___TableReader__GetColumn(self, i)), Read = function(columns) { shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns)) } From 9fbecda4678c9ddf718f82c78bdcc14be93e1493 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Wed, 4 Sep 2019 16:01:25 -0700 Subject: [PATCH 04/37] A few more backticks --- r/R/array.R | 2 +- r/R/type.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/array.R b/r/R/array.R index 18eecc21aa7..1e3ab51efc1 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -77,7 +77,7 @@ Array <- R6Class("Array", type_id = function() Array__type_id(self), Equals = function(other) Array__Equals(self, other), ApproxEquals = function(other) Array__ApproxEquals(self, other), - data = function() shared_ptr(`ArrayData`, Array__data(self)), + data = function() shared_ptr(ArrayData, Array__data(self)), as_vector = function() Array__as_vector(self), ToString = function() Array__ToString(self), Slice = function(offset, length = NULL){ diff --git a/r/R/type.R b/r/R/type.R index f70851f0986..9d9df7875be 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -114,7 +114,7 @@ type.default <- function(x) { } #' @export -`type.Array` <- function(x) x$type +type.Array <- function(x) x$type #' @export `type.arrow::ChunkedArray` <- function(x) x$type From 3f1cd7184b1556b3393aa08cc430968f2b7892d1 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 10:00:22 -0700 Subject: [PATCH 05/37] Object --- r/NAMESPACE | 2 +- r/R/ArrayData.R | 2 +- r/R/ChunkedArray.R | 2 +- r/R/Column.R | 2 +- r/R/Field.R | 2 +- r/R/RecordBatch.R | 2 +- r/R/RecordBatchReader.R | 4 ++-- r/R/RecordBatchWriter.R | 2 +- r/R/Schema.R | 2 +- r/R/Table.R | 2 +- r/R/array.R | 6 +++--- r/R/arrow-package.R | 2 +- r/R/buffer.R | 2 +- r/R/compression.R | 2 +- r/R/compute.R | 2 +- r/R/csv.R | 8 ++++---- r/R/feather.R | 4 ++-- r/R/io.R | 4 ++-- r/R/json.R | 6 +++--- r/R/memory_pool.R | 2 +- r/R/message.R | 4 ++-- r/R/parquet.R | 4 ++-- r/R/type.R | 4 ++-- r/man/array.Rd | 3 ++- 24 files changed, 38 insertions(+), 37 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 5aac994770a..77750c393cc 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -1,6 +1,6 @@ # Generated by roxygen2: do not edit by hand -S3method("!=","arrow::Object") +S3method("!=",Object) S3method("==","arrow::DataType") S3method("==","arrow::Field") S3method("==","arrow::RecordBatch") diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R index 25ec39fdd7d..84c454191c1 100644 --- a/r/R/ArrayData.R +++ b/r/R/ArrayData.R @@ -42,7 +42,7 @@ #' @rdname ArrayData #' @name ArrayData ArrayData <- R6Class("ArrayData", - inherit = `arrow::Object`, + inherit = Object, active = list( type = function() `arrow::DataType`$dispatch(ArrayData__get_type(self)), length = function() ArrayData__get_length(self), diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R index b2f4bd76177..8cab186b974 100644 --- a/r/R/ChunkedArray.R +++ b/r/R/ChunkedArray.R @@ -29,7 +29,7 @@ #' #' @rdname arrow__ChunkedArray #' @name arrow__ChunkedArray -`arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = `arrow::Object`, +`arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = Object, public = list( length = function() ChunkedArray__length(self), chunk = function(i) Array$create(ChunkedArray__chunk(self, i)), diff --git a/r/R/Column.R b/r/R/Column.R index 0487425140d..cd7612eb462 100644 --- a/r/R/Column.R +++ b/r/R/Column.R @@ -29,7 +29,7 @@ #' #' @rdname arrow__Column #' @name arrow__Column -`arrow::Column` <- R6Class("arrow::Column", inherit = `arrow::Object`, +`arrow::Column` <- R6Class("arrow::Column", inherit = Object, public = list( length = function() Column__length(self), data = function() shared_ptr(`arrow::ChunkedArray`, Column__data(self)) diff --git a/r/R/Field.R b/r/R/Field.R index cc2f6cd185d..56c8942f23e 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -29,7 +29,7 @@ #' #' @rdname arrow__Field #' @name arrow__Field -`arrow::Field` <- R6Class("arrow::Field", inherit = `arrow::Object`, +`arrow::Field` <- R6Class("arrow::Field", inherit = Object, public = list( ToString = function() { Field__ToString(self) diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index 8e5ed83c0e3..43464ea99fd 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -29,7 +29,7 @@ #' #' @rdname arrow__RecordBatch #' @name arrow__RecordBatch -`arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = `arrow::Object`, +`arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = Object, public = list( column = function(i) shared_ptr(Array, RecordBatch__column(self, i)), column_name = function(i) RecordBatch__column_name(self, i), diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 6593b0bb0e6..1bb95b954f4 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -29,7 +29,7 @@ #' #' @rdname arrow__RecordBatchReader #' @name arrow__RecordBatchReader -`arrow::RecordBatchReader` <- R6Class("arrow::RecordBatchReader", inherit = `arrow::Object`, +`arrow::RecordBatchReader` <- R6Class("arrow::RecordBatchReader", inherit = Object, public = list( read_next_batch = function() { shared_ptr(`arrow::RecordBatch`, RecordBatchReader__ReadNext(self)) @@ -70,7 +70,7 @@ #' #' @rdname arrow__ipc__RecordBatchFileReader #' @name arrow__ipc__RecordBatchFileReader -`arrow::ipc::RecordBatchFileReader` <- R6Class("arrow::ipc::RecordBatchFileReader", inherit = `arrow::Object`, +`arrow::ipc::RecordBatchFileReader` <- R6Class("arrow::ipc::RecordBatchFileReader", inherit = Object, public = list( get_batch = function(i) shared_ptr(`arrow::RecordBatch`, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)), diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 7185dc2a9a5..fcbcc478258 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -36,7 +36,7 @@ #' #' @rdname arrow__ipc__RecordBatchWriter #' @name arrow__ipc__RecordBatchWriter -`arrow::ipc::RecordBatchWriter` <- R6Class("arrow::ipc::RecordBatchWriter", inherit = `arrow::Object`, +`arrow::ipc::RecordBatchWriter` <- R6Class("arrow::ipc::RecordBatchWriter", inherit = Object, public = list( write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table), diff --git a/r/R/Schema.R b/r/R/Schema.R index 906841b1ccf..be7cfacc8b9 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -42,7 +42,7 @@ #' @rdname arrow__Schema #' @name arrow__Schema `arrow::Schema` <- R6Class("arrow::Schema", - inherit = `arrow::Object`, + inherit = Object, public = list( ToString = function() Schema__ToString(self), num_fields = function() Schema__num_fields(self), diff --git a/r/R/Table.R b/r/R/Table.R index 1255c8a0e1b..d322dc5e1ed 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -29,7 +29,7 @@ #' #' @rdname arrow__Table #' @name arrow__Table -`arrow::Table` <- R6Class("arrow::Table", inherit = `arrow::Object`, +`arrow::Table` <- R6Class("arrow::Table", inherit = Object, public = list( column = function(i) shared_ptr(`arrow::ChunkedArray`, Table__column(self, i)), field = function(i) shared_ptr(`arrow::Field`, Table__field(self, i)), diff --git a/r/R/array.R b/r/R/array.R index 1e3ab51efc1..bce8b3c9f6d 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -65,11 +65,11 @@ #' - `$Slice(offset, length = NULL)` : Construct a zero-copy slice of the array with the indicated offset and length. If length is `NULL`, the slice goes until the end of the array. #' - `$RangeEquals(other, start_idx, end_idx, other_start_idx)` : #' -#' @rdname Array -#' @name Array +#' @rdname array +#' @name array #' @export Array <- R6Class("Array", - inherit = `arrow::Object`, + inherit = Object, public = list( IsNull = function(i) Array__IsNull(self, i), IsValid = function(i) Array__IsValid(self, i), diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 0f0a26b0d81..00a911bbe25 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -44,7 +44,7 @@ option_use_threads <- function() { } #' @include enums.R -`arrow::Object` <- R6Class("arrow::Object", +Object <- R6Class("Object", public = list( initialize = function(xp) self$set_pointer(xp), diff --git a/r/R/buffer.R b/r/R/buffer.R index 12d0699762d..cfd244c5c7d 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -33,7 +33,7 @@ #' #' @rdname arrow__Buffer #' @name arrow__Buffer -`arrow::Buffer` <- R6Class("arrow::Buffer", inherit = `arrow::Object`, +`arrow::Buffer` <- R6Class("arrow::Buffer", inherit = Object, public = list( ZeroPadding = function() Buffer__ZeroPadding(self), data = function() Buffer__data(self) diff --git a/r/R/compression.R b/r/R/compression.R index 6e56a76bbf9..7550b39c5d6 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -19,7 +19,7 @@ #' @include arrow-package.R #' @include io.R -`arrow::util::Codec` <- R6Class("arrow::util::Codec", inherit = `arrow::Object`) +`arrow::util::Codec` <- R6Class("arrow::util::Codec", inherit = Object) `arrow::io::CompressedOutputStream` <- R6Class("arrow::io::CompressedOutputStream", inherit = `arrow::io::OutputStream`) `arrow::io::CompressedInputStream` <- R6Class("arrow::io::CompressedInputStream", inherit = `arrow::io::InputStream`) diff --git a/r/R/compute.R b/r/R/compute.R index 6cf73139d86..18a2493fe55 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -17,7 +17,7 @@ #' @include array.R -`arrow::compute::CastOptions` <- R6Class("arrow::compute::CastOptions", inherit = `arrow::Object`) +`arrow::compute::CastOptions` <- R6Class("arrow::compute::CastOptions", inherit = Object) #' Cast options #' diff --git a/r/R/csv.R b/r/R/csv.R index f2d9fab7403..71e9823b930 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -181,15 +181,15 @@ read_tsv_arrow <- function(file, #' @include arrow-package.R -`arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`, +`arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = Object, public = list( Read = function() shared_ptr(`arrow::Table`, csv___TableReader__Read(self)) ) ) -`arrow::csv::ReadOptions` <- R6Class("arrow::csv::ReadOptions", inherit = `arrow::Object`) -`arrow::csv::ParseOptions` <- R6Class("arrow::csv::ParseOptions", inherit = `arrow::Object`) -`arrow::csv::ConvertOptions` <- R6Class("arrow::csv::ConvertOptions", inherit = `arrow::Object`) +`arrow::csv::ReadOptions` <- R6Class("arrow::csv::ReadOptions", inherit = Object) +`arrow::csv::ParseOptions` <- R6Class("arrow::csv::ParseOptions", inherit = Object) +`arrow::csv::ConvertOptions` <- R6Class("arrow::csv::ConvertOptions", inherit = Object) #' Read options for the Arrow file readers #' diff --git a/r/R/feather.R b/r/R/feather.R index 3319225ce48..6f77ffb1fe2 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -17,7 +17,7 @@ #' @include arrow-package.R -`arrow::ipc::feather::TableWriter` <- R6Class("arrow::ipc::feather::TableWriter", inherit = `arrow::Object`, +`arrow::ipc::feather::TableWriter` <- R6Class("arrow::ipc::feather::TableWriter", inherit = Object, public = list( SetDescription = function(description) ipc___feather___TableWriter__SetDescription(self, description), SetNumRows = function(num_rows) ipc___feather___TableWriter__SetNumRows(self, num_rows), @@ -26,7 +26,7 @@ ) ) -`arrow::ipc::feather::TableReader` <- R6Class("arrow::ipc::feather::TableReader", inherit = `arrow::Object`, +`arrow::ipc::feather::TableReader` <- R6Class("arrow::ipc::feather::TableReader", inherit = Object, public = list( GetDescription = function() ipc___feather___TableReader__GetDescription(self), HasDescription = function() ipc__feather___TableReader__HasDescription(self), diff --git a/r/R/io.R b/r/R/io.R index 59d573f3b14..182876ff91e 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -21,7 +21,7 @@ # OutputStream ------------------------------------------------------------ -`arrow::io::Writable` <- R6Class("arrow::io::Writable", inherit = `arrow::Object`, +`arrow::io::Writable` <- R6Class("arrow::io::Writable", inherit = Object, public = list( write = function(x) io___Writable__write(self, buffer(x)) ) @@ -131,7 +131,7 @@ #' #' @rdname arrow__io__Readable #' @name arrow__io__Readable -`arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = `arrow::Object`, +`arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = Object, public = list( Read = function(nbytes) shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes)) ) diff --git a/r/R/json.R b/r/R/json.R index cd43231e422..7ad67c7c028 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -31,14 +31,14 @@ #' #' @rdname arrow__json__TableReader #' @name arrow__json__TableReader -`arrow::json::TableReader` <- R6Class("arrow::json::TableReader", inherit = `arrow::Object`, +`arrow::json::TableReader` <- R6Class("arrow::json::TableReader", inherit = Object, public = list( Read = function() shared_ptr(`arrow::Table`, json___TableReader__Read(self)) ) ) -`arrow::json::ReadOptions` <- R6Class("arrow::json::ReadOptions", inherit = `arrow::Object`) -`arrow::json::ParseOptions` <- R6Class("arrow::json::ParseOptions", inherit = `arrow::Object`) +`arrow::json::ReadOptions` <- R6Class("arrow::json::ReadOptions", inherit = Object) +`arrow::json::ParseOptions` <- R6Class("arrow::json::ParseOptions", inherit = Object) #' @rdname csv_read_options #' @export diff --git a/r/R/memory_pool.R b/r/R/memory_pool.R index 771e05bebf5..4148f2de8e0 100644 --- a/r/R/memory_pool.R +++ b/r/R/memory_pool.R @@ -30,7 +30,7 @@ #' @rdname arrow___MemoryPool #' @name arrow__MemoryPool `arrow::MemoryPool` <- R6Class("arrow::MemoryPool", - inherit = `arrow::Object`, + inherit = Object, public = list( # TODO: Allocate # TODO: Reallocate diff --git a/r/R/message.R b/r/R/message.R index e0add59ac53..27d1b5da67d 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -29,7 +29,7 @@ #' #' @rdname arrow__ipc__Message #' @name arrow__ipc__Message -`arrow::ipc::Message` <- R6Class("arrow::ipc::Message", inherit = `arrow::Object`, +`arrow::ipc::Message` <- R6Class("arrow::ipc::Message", inherit = Object, public = list( Equals = function(other){ assert_that(inherits(other, "arrow::ipc::Message")) @@ -60,7 +60,7 @@ #' #' @rdname arrow__ipc__MessageReader #' @name arrow__ipc__MessageReader -`arrow::ipc::MessageReader` <- R6Class("arrow::ipc::MessageReader", inherit = `arrow::Object`, +`arrow::ipc::MessageReader` <- R6Class("arrow::ipc::MessageReader", inherit = Object, public = list( ReadNextMessage = function() unique_ptr(`arrow::ipc::Message`, ipc___MessageReader__ReadNextMessage(self)) ) diff --git a/r/R/parquet.R b/r/R/parquet.R index 6f122e57fa3..fe9d25194b7 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -18,7 +18,7 @@ #' @include arrow-package.R `parquet::arrow::FileReader` <- R6Class("parquet::arrow::FileReader", - inherit = `arrow::Object`, + inherit = Object, public = list( ReadTable = function(col_select = NULL) { col_select <- enquo(col_select) @@ -37,7 +37,7 @@ ) `parquet::arrow::ArrowReaderProperties` <- R6Class("parquet::arrow::ArrowReaderProperties", - inherit = `arrow::Object`, + inherit = Object, public = list( read_dictionary = function(column_index) { parquet___arrow___ArrowReaderProperties__get_read_dictionary(self, column_index) diff --git a/r/R/type.R b/r/R/type.R index 9d9df7875be..b5a24ada036 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -18,7 +18,7 @@ #' @include arrow-package.R #' @export -`!=.arrow::Object` <- function(lhs, rhs){ +`!=.Object` <- function(lhs, rhs){ !(lhs == rhs) } @@ -35,7 +35,7 @@ #' @rdname arrow__DataType #' @name arrow__DataType `arrow::DataType` <- R6Class("arrow::DataType", - inherit = `arrow::Object`, + inherit = Object, public = list( ToString = function() { DataType__ToString(self) diff --git a/r/man/array.Rd b/r/man/array.Rd index 8dd07a9bb74..9179174f1c4 100644 --- a/r/man/array.Rd +++ b/r/man/array.Rd @@ -1,7 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/array.R \docType{class} -\name{Array} +\name{array} +\alias{array} \alias{Array} \title{class Array From bbf07993ced80d807709d5d750b00d7bec25e4fb Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 10:08:16 -0700 Subject: [PATCH 06/37] Buffer --- r/NAMESPACE | 16 ++++++------- r/R/ArrayData.R | 6 ++--- r/R/RecordBatchReader.R | 4 ++-- r/R/Schema.R | 2 +- r/R/buffer.R | 29 ++++++++++------------- r/R/io.R | 18 +++++++------- r/R/message.R | 4 ++-- r/R/read_record_batch.R | 4 ++-- r/man/FixedSizeBufferWriter.Rd | 2 +- r/man/{ArrayData.Rd => array-data.Rd} | 3 ++- r/man/arrow__Buffer.Rd | 21 ---------------- r/man/arrow__io__OutputStream.Rd | 2 +- r/man/buffer.Rd | 21 +++++++++++++--- r/man/read_record_batch.Rd | 2 +- r/tests/testthat/test-buffer.R | 20 ++++++++-------- r/tests/testthat/test-bufferreader.R | 2 +- r/tests/testthat/test-message.R | 8 +++---- r/tests/testthat/test-messagereader.R | 16 ++++++------- r/tests/testthat/test-recordbatchreader.R | 4 ++-- r/tests/testthat/test-schema.R | 2 +- 20 files changed, 89 insertions(+), 97 deletions(-) rename r/man/{ArrayData.Rd => array-data.Rd} (90%) delete mode 100644 r/man/arrow__Buffer.Rd diff --git a/r/NAMESPACE b/r/NAMESPACE index 77750c393cc..85c855677b9 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -7,7 +7,7 @@ S3method("==","arrow::RecordBatch") S3method("==","arrow::Schema") S3method("==","arrow::ipc::Message") S3method("==",Array) -S3method(BufferReader,"arrow::Buffer") +S3method(BufferReader,Buffer) S3method(BufferReader,default) S3method(CompressedInputStream,"arrow::io::InputStream") S3method(CompressedInputStream,character) @@ -18,25 +18,25 @@ S3method(FeatherTableReader,"arrow::ipc::feather::TableReader") S3method(FeatherTableReader,character) S3method(FeatherTableReader,raw) S3method(FeatherTableWriter,"arrow::io::OutputStream") -S3method(FixedSizeBufferWriter,"arrow::Buffer") +S3method(FixedSizeBufferWriter,Buffer) S3method(FixedSizeBufferWriter,default) S3method(MessageReader,"arrow::io::InputStream") S3method(MessageReader,default) -S3method(RecordBatchFileReader,"arrow::Buffer") S3method(RecordBatchFileReader,"arrow::io::RandomAccessFile") +S3method(RecordBatchFileReader,Buffer) S3method(RecordBatchFileReader,character) S3method(RecordBatchFileReader,raw) S3method(RecordBatchFileWriter,"arrow::io::OutputStream") S3method(RecordBatchFileWriter,character) -S3method(RecordBatchStreamReader,"arrow::Buffer") S3method(RecordBatchStreamReader,"arrow::io::InputStream") +S3method(RecordBatchStreamReader,Buffer) S3method(RecordBatchStreamReader,raw) S3method(RecordBatchStreamWriter,"arrow::io::OutputStream") S3method(RecordBatchStreamWriter,character) S3method(as.data.frame,"arrow::RecordBatch") S3method(as.data.frame,"arrow::Table") -S3method(as.raw,"arrow::Buffer") -S3method(buffer,"arrow::Buffer") +S3method(as.raw,Buffer) +S3method(buffer,Buffer) S3method(buffer,complex) S3method(buffer,default) S3method(buffer,integer) @@ -61,13 +61,13 @@ S3method(print,"arrow-enum") S3method(read_message,"arrow::io::InputStream") S3method(read_message,"arrow::ipc::MessageReader") S3method(read_message,default) -S3method(read_record_batch,"arrow::Buffer") S3method(read_record_batch,"arrow::io::InputStream") S3method(read_record_batch,"arrow::ipc::Message") +S3method(read_record_batch,Buffer) S3method(read_record_batch,raw) -S3method(read_schema,"arrow::Buffer") S3method(read_schema,"arrow::io::InputStream") S3method(read_schema,"arrow::ipc::Message") +S3method(read_schema,Buffer) S3method(read_schema,raw) S3method(read_table,"arrow::ipc::RecordBatchFileReader") S3method(read_table,"arrow::ipc::RecordBatchStreamReader") diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R index 84c454191c1..e69666070a8 100644 --- a/r/R/ArrayData.R +++ b/r/R/ArrayData.R @@ -39,8 +39,8 @@ #' #' ... #' -#' @rdname ArrayData -#' @name ArrayData +#' @rdname array-data +#' @name array-data ArrayData <- R6Class("ArrayData", inherit = Object, active = list( @@ -48,6 +48,6 @@ ArrayData <- R6Class("ArrayData", length = function() ArrayData__get_length(self), null_count = function() ArrayData__get_null_count(self), offset = function() ArrayData__get_offset(self), - buffers = function() map(ArrayData__buffers(self), shared_ptr, class = `arrow::Buffer`) + buffers = function() map(ArrayData__buffers(self), shared_ptr, class = Buffer) ) ) diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 1bb95b954f4..13913fa8ba0 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -102,7 +102,7 @@ RecordBatchStreamReader <- function(stream){ } #' @export -`RecordBatchStreamReader.arrow::Buffer` <- function(stream) { +`RecordBatchStreamReader.Buffer` <- function(stream) { RecordBatchStreamReader(BufferReader(stream)) } @@ -128,7 +128,7 @@ RecordBatchFileReader <- function(file) { } #' @export -`RecordBatchFileReader.arrow::Buffer` <- function(file) { +`RecordBatchFileReader.Buffer` <- function(file) { RecordBatchFileReader(BufferReader(file)) } diff --git a/r/R/Schema.R b/r/R/Schema.R index be7cfacc8b9..4e524c4288a 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -93,7 +93,7 @@ read_schema <- function(stream, ...) { } #' @export -`read_schema.arrow::Buffer` <- function(stream, ...) { +`read_schema.Buffer` <- function(stream, ...) { stream <- BufferReader(stream) on.exit(stream$close()) shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) diff --git a/r/R/buffer.R b/r/R/buffer.R index cfd244c5c7d..564f0393655 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -18,7 +18,7 @@ #' @include arrow-package.R #' @include enums.R -#' @title class arrow::Buffer +#' @title class Buffer #' #' @usage NULL #' @format NULL @@ -31,9 +31,9 @@ #' - `$size()` : #' - `$capacity()`: #' -#' @rdname arrow__Buffer -#' @name arrow__Buffer -`arrow::Buffer` <- R6Class("arrow::Buffer", inherit = Object, +#' @rdname buffer +#' @name buffer +Buffer <- R6Class("Buffer", inherit = Object, public = list( ZeroPadding = function() Buffer__ZeroPadding(self), data = function() Buffer__data(self) @@ -47,16 +47,16 @@ ) #' @export -`as.raw.arrow::Buffer` <- function(x) x$data() +as.raw.Buffer <- function(x) x$data() -#' Create a [arrow::Buffer][arrow__Buffer] from an R object +#' Create a [Buffer][buffer] from an R object #' #' @param x R object. Only raw, numeric and integer vectors are currently supported #' -#' @return an instance of [arrow::Buffer][arrow__Buffer] that borrows memory from `x` +#' @return an instance of [Buffer][buffer] that borrows memory from `x` #' #' @export -buffer <- function(x){ +buffer <- function(x) { UseMethod("buffer") } @@ -67,26 +67,23 @@ buffer.default <- function(x) { #' @export buffer.raw <- function(x) { - shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x)) + shared_ptr(Buffer, r___RBuffer__initialize(x)) } #' @export buffer.numeric <- function(x) { - shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x)) + shared_ptr(Buffer, r___RBuffer__initialize(x)) } #' @export buffer.integer <- function(x) { - shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x)) + shared_ptr(Buffer, r___RBuffer__initialize(x)) } #' @export buffer.complex <- function(x) { - shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x)) + shared_ptr(Buffer, r___RBuffer__initialize(x)) } #' @export -`buffer.arrow::Buffer` <- function(x) { - x -} - +buffer.Buffer <- function(x) x diff --git a/r/R/io.R b/r/R/io.R index 182876ff91e..796d8415efd 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -35,7 +35,7 @@ #' #' @section Methods: #' -#' - `arrow::Buffer` `Read`(`int` nbytes): Read `nbytes` bytes +#' - Buffer `Read`(`int` nbytes): Read `nbytes` bytes #' - `void` `close`(): close the stream #' #' @rdname arrow__io__OutputStream @@ -93,7 +93,7 @@ `arrow::io::BufferOutputStream` <- R6Class("arrow::io::BufferOutputStream", inherit = `arrow::io::OutputStream`, public = list( capacity = function() io___BufferOutputStream__capacity(self), - getvalue = function() shared_ptr(`arrow::Buffer`, io___BufferOutputStream__Finish(self)), + getvalue = function() shared_ptr(Buffer, io___BufferOutputStream__Finish(self)), Write = function(bytes) io___BufferOutputStream__Write(self, bytes), Tell = function() io___BufferOutputStream__Tell(self) @@ -133,7 +133,7 @@ #' @name arrow__io__Readable `arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = Object, public = list( - Read = function(nbytes) shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes)) + Read = function(nbytes) shared_ptr(Buffer, io___Readable__Read(self, nbytes)) ) ) @@ -178,9 +178,9 @@ Read = function(nbytes = NULL) { if (is.null(nbytes)) { - shared_ptr(`arrow::Buffer`, io___RandomAccessFile__Read0(self)) + shared_ptr(Buffer, io___RandomAccessFile__Read0(self)) } else { - shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes)) + shared_ptr(Buffer, io___Readable__Read(self, nbytes)) } }, @@ -188,7 +188,7 @@ if (is.null(nbytes)) { nbytes <- self$GetSize() - position } - shared_ptr(`arrow::Buffer`, io___RandomAccessFile__ReadAt(self, position, nbytes)) + shared_ptr(Buffer, io___RandomAccessFile__ReadAt(self, position, nbytes)) } ) ) @@ -311,7 +311,7 @@ BufferOutputStream <- function(initial_capacity = 0L) { #' Open a [arrow::io::FixedSizeBufferWriter][arrow__io__FixedSizeBufferWriter] #' -#' @param buffer [arrow::Buffer][arrow__Buffer] or something [buffer()] can handle +#' @param buffer [Buffer][buffer] or something [buffer()] can handle #' #' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] #' @@ -326,7 +326,7 @@ FixedSizeBufferWriter.default <- function(buffer){ } #' @export -`FixedSizeBufferWriter.arrow::Buffer` <- function(buffer){ +`FixedSizeBufferWriter.Buffer` <- function(buffer){ assert_that(buffer$is_mutable) shared_ptr(`arrow::io::FixedSizeBufferWriter`, io___FixedSizeBufferWriter__initialize(buffer)) } @@ -346,6 +346,6 @@ BufferReader.default <- function(x) { } #' @export -`BufferReader.arrow::Buffer` <- function(x) { +`BufferReader.Buffer` <- function(x) { shared_ptr(`arrow::io::BufferReader`, io___BufferReader__initialize(x)) } diff --git a/r/R/message.R b/r/R/message.R index 27d1b5da67d..3d2e2d532d8 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -40,8 +40,8 @@ ), active = list( type = function() ipc___Message__type(self), - metadata = function() shared_ptr(`arrow::Buffer`, ipc___Message__metadata(self)), - body = function() shared_ptr(`arrow::Buffer`, ipc___Message__body(self)) + metadata = function() shared_ptr(Buffer, ipc___Message__metadata(self)), + body = function() shared_ptr(Buffer, ipc___Message__body(self)) ) ) diff --git a/r/R/read_record_batch.R b/r/R/read_record_batch.R index cc57b447985..99304ef6183 100644 --- a/r/R/read_record_batch.R +++ b/r/R/read_record_batch.R @@ -17,7 +17,7 @@ #' read [arrow::RecordBatch][arrow__RecordBatch] as encapsulated IPC message, given a known [arrow::Schema][arrow__Schema] #' -#' @param obj a [arrow::ipc::Message][arrow__ipc__Message], a [arrow::io::InputStream][arrow__io__InputStream], a [arrow::Buffer][arrow__Buffer], or a raw vector +#' @param obj a [arrow::ipc::Message][arrow__ipc__Message], a [arrow::io::InputStream][arrow__io__InputStream], a [Buffer][buffer], or a raw vector #' @param schema a [arrow::Schema][arrow__Schema] #' #' @return a [arrow::RecordBatch][arrow__RecordBatch] @@ -47,7 +47,7 @@ read_record_batch.raw <- function(obj, schema){ } #' @export -`read_record_batch.arrow::Buffer` <- function(obj, schema){ +`read_record_batch.Buffer` <- function(obj, schema){ stream <- BufferReader(obj) on.exit(stream$close()) read_record_batch(stream, schema) diff --git a/r/man/FixedSizeBufferWriter.Rd b/r/man/FixedSizeBufferWriter.Rd index 553d61b76e1..ede2124de98 100644 --- a/r/man/FixedSizeBufferWriter.Rd +++ b/r/man/FixedSizeBufferWriter.Rd @@ -7,7 +7,7 @@ FixedSizeBufferWriter(buffer) } \arguments{ -\item{buffer}{\link[=arrow__Buffer]{arrow::Buffer} or something \code{\link[=buffer]{buffer()}} can handle} +\item{buffer}{\link[=buffer]{Buffer} or something \code{\link[=buffer]{buffer()}} can handle} } \value{ a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} diff --git a/r/man/ArrayData.Rd b/r/man/array-data.Rd similarity index 90% rename from r/man/ArrayData.Rd rename to r/man/array-data.Rd index 2cca881da44..095a474dfe7 100644 --- a/r/man/ArrayData.Rd +++ b/r/man/array-data.Rd @@ -1,7 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ArrayData.R \docType{class} -\name{ArrayData} +\name{array-data} +\alias{array-data} \alias{ArrayData} \title{class ArrayData} \description{ diff --git a/r/man/arrow__Buffer.Rd b/r/man/arrow__Buffer.Rd deleted file mode 100644 index 135da7a20e7..00000000000 --- a/r/man/arrow__Buffer.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/buffer.R -\docType{class} -\name{arrow__Buffer} -\alias{arrow__Buffer} -\alias{arrow::Buffer} -\title{class arrow::Buffer} -\description{ -class arrow::Buffer -} -\section{Methods}{ - -\itemize{ -\item \code{$is_mutable()} : -\item \code{$ZeroPadding()} : -\item \code{$size()} : -\item \code{$capacity()}: -} -} - -\keyword{datasets} diff --git a/r/man/arrow__io__OutputStream.Rd b/r/man/arrow__io__OutputStream.Rd index c41b815c021..a73271b57e3 100644 --- a/r/man/arrow__io__OutputStream.Rd +++ b/r/man/arrow__io__OutputStream.Rd @@ -11,7 +11,7 @@ OutputStream \section{Methods}{ \itemize{ -\item \code{arrow::Buffer} \code{Read}(\code{int} nbytes): Read \code{nbytes} bytes +\item Buffer \code{Read}(\code{int} nbytes): Read \code{nbytes} bytes \item \code{void} \code{close}(): close the stream } } diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd index 60fd25d4bf1..2099668563a 100644 --- a/r/man/buffer.Rd +++ b/r/man/buffer.Rd @@ -1,8 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/buffer.R +\docType{class} \name{buffer} \alias{buffer} -\title{Create a \link[=arrow__Buffer]{arrow::Buffer} from an R object} +\alias{Buffer} +\title{class Buffer} \usage{ buffer(x) } @@ -10,8 +12,21 @@ buffer(x) \item{x}{R object. Only raw, numeric and integer vectors are currently supported} } \value{ -an instance of \link[=arrow__Buffer]{arrow::Buffer} that borrows memory from \code{x} +an instance of \link[=buffer]{Buffer} that borrows memory from \code{x} } \description{ -Create a \link[=arrow__Buffer]{arrow::Buffer} from an R object +class Buffer + +Create a \link[=buffer]{Buffer} from an R object } +\section{Methods}{ + +\itemize{ +\item \code{$is_mutable()} : +\item \code{$ZeroPadding()} : +\item \code{$size()} : +\item \code{$capacity()}: +} +} + +\keyword{datasets} diff --git a/r/man/read_record_batch.Rd b/r/man/read_record_batch.Rd index fef12cbac4a..71e555b04af 100644 --- a/r/man/read_record_batch.Rd +++ b/r/man/read_record_batch.Rd @@ -7,7 +7,7 @@ read_record_batch(obj, schema) } \arguments{ -\item{obj}{a \link[=arrow__ipc__Message]{arrow::ipc::Message}, a \link[=arrow__io__InputStream]{arrow::io::InputStream}, a \link[=arrow__Buffer]{arrow::Buffer}, or a raw vector} +\item{obj}{a \link[=arrow__ipc__Message]{arrow::ipc::Message}, a \link[=arrow__io__InputStream]{arrow::io::InputStream}, a \link[=buffer]{Buffer}, or a raw vector} \item{schema}{a \link[=arrow__Schema]{arrow::Schema}} } diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index 4dfbecaf1f5..152b3b2558d 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -15,37 +15,37 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Buffer") +context("Buffer") -test_that("arrow::Buffer can be created from raw vector", { +test_that("Buffer can be created from raw vector", { vec <- raw(123) buf <- buffer(vec) - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") expect_equal(buf$size, 123) }) -test_that("arrow::Buffer can be created from integer vector", { +test_that("Buffer can be created from integer vector", { vec <- integer(17) buf <- buffer(vec) - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") expect_equal(buf$size, 17 * 4) }) -test_that("arrow::Buffer can be created from numeric vector", { +test_that("Buffer can be created from numeric vector", { vec <- numeric(17) buf <- buffer(vec) - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") expect_equal(buf$size, 17 * 8) }) -test_that("arrow::Buffer can be created from complex vector", { +test_that("Buffer can be created from complex vector", { vec <- complex(3) buf <- buffer(vec) - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") expect_equal(buf$size, 3 * 16) }) -test_that("can convert arrow::Buffer to raw", { +test_that("can convert Buffer to raw", { buf <- buffer(rnorm(10)) expect_equal(buf$data(), as.raw(buf)) }) diff --git a/r/tests/testthat/test-bufferreader.R b/r/tests/testthat/test-bufferreader.R index 72d257101fa..225eb9a8c3d 100644 --- a/r/tests/testthat/test-bufferreader.R +++ b/r/tests/testthat/test-bufferreader.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::BufferReader") +context("BufferReader") test_that("BufferReader can be created from R objects", { num <- BufferReader(numeric(13)) diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R index 5ddff018974..fb58f0305e9 100644 --- a/r/tests/testthat/test-message.R +++ b/r/tests/testthat/test-message.R @@ -25,8 +25,8 @@ test_that("read_message can read from input stream", { message <- read_message(stream) expect_is(message, "arrow::ipc::Message") expect_equal(message$type, MessageType$RECORD_BATCH) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- read_message(stream) expect_null(read_message(stream)) @@ -39,8 +39,8 @@ test_that("read_message() can read Schema messages", { expect_is(message, "arrow::ipc::Message") expect_equal(message$type, MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- read_message(stream) expect_null(read_message(stream)) diff --git a/r/tests/testthat/test-messagereader.R b/r/tests/testthat/test-messagereader.R index c7260fed169..8eadb9d8d37 100644 --- a/r/tests/testthat/test-messagereader.R +++ b/r/tests/testthat/test-messagereader.R @@ -26,8 +26,8 @@ test_that("MessageReader can be created from raw vectors", { message <- reader$ReadNextMessage() expect_is(message, "arrow::ipc::Message") expect_equal(message$type, MessageType$RECORD_BATCH) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- reader$ReadNextMessage() expect_null(message) @@ -40,8 +40,8 @@ test_that("MessageReader can be created from raw vectors", { message <- reader$ReadNextMessage() expect_is(message, "arrow::ipc::Message") expect_equal(message$type, MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- reader$ReadNextMessage() expect_null(message) @@ -60,8 +60,8 @@ test_that("MessageReader can be created from input stream", { message <- reader$ReadNextMessage() expect_is(message, "arrow::ipc::Message") expect_equal(message$type, MessageType$RECORD_BATCH) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- reader$ReadNextMessage() expect_null(message) @@ -78,8 +78,8 @@ test_that("MessageReader can be created from input stream", { message <- reader$ReadNextMessage() expect_is(message, "arrow::ipc::Message") expect_equal(message$type, MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- reader$ReadNextMessage() expect_null(message) diff --git a/r/tests/testthat/test-recordbatchreader.R b/r/tests/testthat/test-recordbatchreader.R index 65f7933b42d..bb6df846632 100644 --- a/r/tests/testthat/test-recordbatchreader.R +++ b/r/tests/testthat/test-recordbatchreader.R @@ -30,7 +30,7 @@ test_that("RecordBatchStreamReader / Writer", { writer$close() buf <- sink$getvalue() - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") reader <- RecordBatchStreamReader(buf) expect_is(reader, "arrow::ipc::RecordBatchStreamReader") @@ -55,7 +55,7 @@ test_that("RecordBatchFileReader / Writer", { writer$close() buf <- sink$getvalue() - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") reader <- RecordBatchFileReader(buf) expect_is(reader, "arrow::ipc::RecordBatchFileReader") diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 10e784f49ec..eec27be5f9c 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -36,7 +36,7 @@ test_that("reading schema from Buffer", { writer$close() buffer <- stream$getvalue() - expect_is(buffer, "arrow::Buffer") + expect_is(buffer, "Buffer") reader <- MessageReader(buffer) expect_is(reader, "arrow::ipc::MessageReader") From 3b4b4921820b945da3ff99c5c3daad91118b759b Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 10:19:50 -0700 Subject: [PATCH 07/37] ChunkedArray --- r/NAMESPACE | 2 +- r/R/ChunkedArray.R | 20 +++++++++---------- r/R/Column.R | 2 +- r/R/Table.R | 2 +- r/R/type.R | 2 +- ...rrow__ChunkedArray.Rd => chunked-array.Rd} | 10 +++++----- r/man/chunked_array.Rd | 4 ++-- r/src/table.cpp | 4 ++-- r/tests/testthat/test-chunkedarray.R | 18 ++++++++--------- r/tests/testthat/test-read-write.R | 6 +++--- r/tests/testthat/test-type.R | 2 +- 11 files changed, 36 insertions(+), 36 deletions(-) rename r/man/{arrow__ChunkedArray.Rd => chunked-array.Rd} (54%) diff --git a/r/NAMESPACE b/r/NAMESPACE index 85c855677b9..47e77af305f 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -73,9 +73,9 @@ S3method(read_table,"arrow::ipc::RecordBatchFileReader") S3method(read_table,"arrow::ipc::RecordBatchStreamReader") S3method(read_table,character) S3method(read_table,raw) -S3method(type,"arrow::ChunkedArray") S3method(type,"arrow::Column") S3method(type,Array) +S3method(type,ChunkedArray) S3method(type,default) S3method(write_arrow,"arrow::ipc::RecordBatchWriter") S3method(write_arrow,character) diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R index 8cab186b974..3e92411102c 100644 --- a/r/R/ChunkedArray.R +++ b/r/R/ChunkedArray.R @@ -17,7 +17,7 @@ #' @include arrow-package.R -#' @title class arrow::ChunkedArray +#' @title class ChunkedArray #' #' @usage NULL #' @format NULL @@ -27,24 +27,24 @@ #' #' TODO #' -#' @rdname arrow__ChunkedArray -#' @name arrow__ChunkedArray -`arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = Object, +#' @rdname chunked-array +#' @name chunked-array +ChunkedArray <- R6Class("ChunkedArray", inherit = Object, public = list( length = function() ChunkedArray__length(self), chunk = function(i) Array$create(ChunkedArray__chunk(self, i)), as_vector = function() ChunkedArray__as_vector(self), Slice = function(offset, length = NULL){ if (is.null(length)) { - shared_ptr(`arrow::ChunkedArray`, ChunkArray__Slice1(self, offset)) + shared_ptr(ChunkedArray, ChunkArray__Slice1(self, offset)) } else { - shared_ptr(`arrow::ChunkedArray`, ChunkArray__Slice2(self, offset, length)) + shared_ptr(ChunkedArray, ChunkArray__Slice2(self, offset, length)) } }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_type, "arrow::DataType")) assert_that(inherits(options, "arrow::compute::CastOptions")) - shared_ptr(`arrow::ChunkedArray`, ChunkedArray__cast(self, target_type, options)) + shared_ptr(ChunkedArray, ChunkedArray__cast(self, target_type, options)) } ), active = list( @@ -55,12 +55,12 @@ ) ) -#' create an [arrow::ChunkedArray][arrow__ChunkedArray] from various R vectors +#' Create a [ChunkedArray][chunked-array] from various R vectors #' #' @param \dots Vectors to coerce #' @param type currently ignored #' #' @export -chunked_array <- function(..., type = NULL){ - shared_ptr(`arrow::ChunkedArray`, ChunkedArray__from_list(list2(...), type)) +chunked_array <- function(..., type = NULL) { + shared_ptr(ChunkedArray, ChunkedArray__from_list(list2(...), type)) } diff --git a/r/R/Column.R b/r/R/Column.R index cd7612eb462..c0babd934fd 100644 --- a/r/R/Column.R +++ b/r/R/Column.R @@ -32,7 +32,7 @@ `arrow::Column` <- R6Class("arrow::Column", inherit = Object, public = list( length = function() Column__length(self), - data = function() shared_ptr(`arrow::ChunkedArray`, Column__data(self)) + data = function() shared_ptr(ChunkedArray, Column__data(self)) ), active = list( diff --git a/r/R/Table.R b/r/R/Table.R index d322dc5e1ed..942248695cb 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -31,7 +31,7 @@ #' @name arrow__Table `arrow::Table` <- R6Class("arrow::Table", inherit = Object, public = list( - column = function(i) shared_ptr(`arrow::ChunkedArray`, Table__column(self, i)), + column = function(i) shared_ptr(ChunkedArray, Table__column(self, i)), field = function(i) shared_ptr(`arrow::Field`, Table__field(self, i)), serialize = function(output_stream, ...) write_table(self, output_stream, ...), diff --git a/r/R/type.R b/r/R/type.R index b5a24ada036..4e88dd59c04 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -117,7 +117,7 @@ type.default <- function(x) { type.Array <- function(x) x$type #' @export -`type.arrow::ChunkedArray` <- function(x) x$type +type.ChunkedArray <- function(x) x$type #' @export `type.arrow::Column` <- function(x) x$type diff --git a/r/man/arrow__ChunkedArray.Rd b/r/man/chunked-array.Rd similarity index 54% rename from r/man/arrow__ChunkedArray.Rd rename to r/man/chunked-array.Rd index a87bf1c0dcc..3a68759b1a1 100644 --- a/r/man/arrow__ChunkedArray.Rd +++ b/r/man/chunked-array.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ChunkedArray.R \docType{class} -\name{arrow__ChunkedArray} -\alias{arrow__ChunkedArray} -\alias{arrow::ChunkedArray} -\title{class arrow::ChunkedArray} +\name{chunked-array} +\alias{chunked-array} +\alias{ChunkedArray} +\title{class ChunkedArray} \description{ -class arrow::ChunkedArray +class ChunkedArray } \section{Methods}{ diff --git a/r/man/chunked_array.Rd b/r/man/chunked_array.Rd index 07dac8a841d..21a890cf5ca 100644 --- a/r/man/chunked_array.Rd +++ b/r/man/chunked_array.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ChunkedArray.R \name{chunked_array} \alias{chunked_array} -\title{create an \link[=arrow__ChunkedArray]{arrow::ChunkedArray} from various R vectors} +\title{Create a \link[=chunked-array]{ChunkedArray} from various R vectors} \usage{ chunked_array(..., type = NULL) } @@ -12,5 +12,5 @@ chunked_array(..., type = NULL) \item{type}{currently ignored} } \description{ -create an \link[=arrow__ChunkedArray]{arrow::ChunkedArray} from various R vectors +Create a \link[=chunked-array]{ChunkedArray} from various R vectors } diff --git a/r/src/table.cpp b/r/src/table.cpp index f024c959a69..e8d08983f58 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -135,7 +135,7 @@ std::shared_ptr Table__from_dots(SEXP lst, SEXP schema_sxp) { SEXP names = Rf_getAttrib(lst, R_NamesSymbol); auto fill_one_column = [&columns, &fields](int j, SEXP x, SEXP name) { - if (Rf_inherits(x, "arrow::ChunkedArray")) { + if (Rf_inherits(x, "ChunkedArray")) { auto chunked_array = arrow::r::extract(x); fields[j] = arrow::field(CHAR(name), chunked_array->type()); columns[j] = chunked_array; @@ -171,7 +171,7 @@ std::shared_ptr Table__from_dots(SEXP lst, SEXP schema_sxp) { schema = arrow::r::extract(schema_sxp); auto fill_one_column = [&columns, &schema](int j, SEXP x) { - if (Rf_inherits(x, "arrow::ChunkedArray")) { + if (Rf_inherits(x, "ChunkedArray")) { auto chunked_array = arrow::r::extract(x); columns[j] = chunked_array; } else if (Rf_inherits(x, "Array")) { diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunkedarray.R index fa0a45b2456..0d25bc353df 100644 --- a/r/tests/testthat/test-chunkedarray.R +++ b/r/tests/testthat/test-chunkedarray.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::ChunkedArray") +context("ChunkedArray") test_that("ChunkedArray", { x <- chunked_array(1:10, 1:10, 1:5) @@ -167,10 +167,10 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", { a_int32 <- a$cast(int32()) a_int64 <- a$cast(int64()) - expect_is(a_int8, "arrow::ChunkedArray") - expect_is(a_int16, "arrow::ChunkedArray") - expect_is(a_int32, "arrow::ChunkedArray") - expect_is(a_int64, "arrow::ChunkedArray") + expect_is(a_int8, "ChunkedArray") + expect_is(a_int16, "ChunkedArray") + expect_is(a_int32, "ChunkedArray") + expect_is(a_int64, "ChunkedArray") expect_equal(a_int8$type, int8()) expect_equal(a_int16$type, int16()) expect_equal(a_int32$type, int32()) @@ -181,10 +181,10 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", { a_uint32 <- a$cast(uint32()) a_uint64 <- a$cast(uint64()) - expect_is(a_uint8, "arrow::ChunkedArray") - expect_is(a_uint16, "arrow::ChunkedArray") - expect_is(a_uint32, "arrow::ChunkedArray") - expect_is(a_uint64, "arrow::ChunkedArray") + expect_is(a_uint8, "ChunkedArray") + expect_is(a_uint16, "ChunkedArray") + expect_is(a_uint32, "ChunkedArray") + expect_is(a_uint64, "ChunkedArray") expect_equal(a_uint8$type, uint8()) expect_equal(a_uint16$type, uint16()) diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index 17d994deab2..cfa78232d68 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -28,7 +28,7 @@ test_that("arrow::table round trip", { expect_equal(tab$num_columns, 3L) expect_equal(tab$num_rows, 10L) - # arrow::ChunkedArray + # ChunkedArray chunked_array_int <- tab$column(0) expect_equal(chunked_array_int$length(), 10L) expect_equal(chunked_array_int$null_count, 0L) @@ -41,7 +41,7 @@ test_that("arrow::table round trip", { expect_equal(chunked_array_int$chunk(i-1L), chunks_int[[i]]) } - # arrow::ChunkedArray + # ChunkedArray chunked_array_dbl <- tab$column(1) expect_equal(chunked_array_dbl$length(), 10L) expect_equal(chunked_array_dbl$null_count, 0L) @@ -54,7 +54,7 @@ test_that("arrow::table round trip", { expect_equal(chunked_array_dbl$chunk(i-1L), chunks_dbl[[i]]) } - # arrow::ChunkedArray + # ChunkedArray chunked_array_raw <- tab$column(2) expect_equal(chunked_array_raw$length(), 10L) expect_equal(chunked_array_raw$null_count, 0L) diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 9f60b5f0f83..19934c6e472 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -22,7 +22,7 @@ test_that("type() gets the right type for arrow::Array", { expect_equal(type(a), a$type) }) -test_that("type() gets the right type for arrow::ChunkedArray", { +test_that("type() gets the right type for ChunkedArray", { a <- chunked_array(1:10, 1:10) expect_equal(type(a), a$type) }) From 4075897675b861a729f98c1f883808e42dbcd5a6 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 10:44:51 -0700 Subject: [PATCH 08/37] compression --- r/NAMESPACE | 8 +-- r/R/compression.R | 58 +++++++++---------- ...utStream.Rd => compressed_input_stream.Rd} | 6 +- ...tStream.Rd => compressed_output_stream.Rd} | 6 +- r/tests/testthat/test-compressed.R | 11 ++-- 5 files changed, 40 insertions(+), 49 deletions(-) rename r/man/{CompressedInputStream.Rd => compressed_input_stream.Rd} (67%) rename r/man/{CompressedOutputStream.Rd => compressed_output_stream.Rd} (71%) diff --git a/r/NAMESPACE b/r/NAMESPACE index 47e77af305f..25ef3ce11ae 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -9,10 +9,6 @@ S3method("==","arrow::ipc::Message") S3method("==",Array) S3method(BufferReader,Buffer) S3method(BufferReader,default) -S3method(CompressedInputStream,"arrow::io::InputStream") -S3method(CompressedInputStream,character) -S3method(CompressedOutputStream,"arrow::io::OutputStream") -S3method(CompressedOutputStream,character) S3method(FeatherTableReader,"arrow::io::RandomAccessFile") S3method(FeatherTableReader,"arrow::ipc::feather::TableReader") S3method(FeatherTableReader,character) @@ -89,8 +85,6 @@ S3method(write_feather_RecordBatch,default) export(Array) export(BufferOutputStream) export(BufferReader) -export(CompressedInputStream) -export(CompressedOutputStream) export(CompressionType) export(DateUnit) export(FeatherTableReader) @@ -115,6 +109,8 @@ export(boolean) export(buffer) export(cast_options) export(chunked_array) +export(compressed_input_stream) +export(compressed_output_stream) export(compression_codec) export(contains) export(csv_convert_options) diff --git a/r/R/compression.R b/r/R/compression.R index 7550b39c5d6..131d5bb5b07 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -19,10 +19,7 @@ #' @include arrow-package.R #' @include io.R -`arrow::util::Codec` <- R6Class("arrow::util::Codec", inherit = Object) - -`arrow::io::CompressedOutputStream` <- R6Class("arrow::io::CompressedOutputStream", inherit = `arrow::io::OutputStream`) -`arrow::io::CompressedInputStream` <- R6Class("arrow::io::CompressedInputStream", inherit = `arrow::io::InputStream`) +Codec <- R6Class("Codec", inherit = Object) #' codec #' @@ -31,10 +28,24 @@ #' @export compression_codec <- function(type = "GZIP") { type <- CompressionType[[match.arg(type, names(CompressionType))]] - unique_ptr(`arrow::util::Codec`, util___Codec__Create(type)) + unique_ptr(Codec, util___Codec__Create(type)) } +CompressedOutputStream <- R6Class("CompressedOutputStream", inherit = `arrow::io::OutputStream`) + +CompressedOutputStream$create <- function(stream, codec = compression_codec()){ + if (.Platform$OS.type == "windows") { + stop("'CompressedOutputStream' is unsupported in Windows.") + } + assert_that(inherits(codec, "Codec")) + if (is.character(stream)) { + stream <- FileOutputStream(stream) + } + assert_that(inherits(stream, "arrow::io::OutputStream")) + shared_ptr(CompressedOutputStream, io___CompressedOutputStream__Make(codec, stream)) +} + #' Compressed output stream #' #' @details This function is not supported in Windows. @@ -42,21 +53,19 @@ compression_codec <- function(type = "GZIP") { #' @param stream Underlying raw output stream #' @param codec a codec #' @export -CompressedOutputStream <- function(stream, codec = compression_codec("GZIP")){ - if (.Platform$OS.type == "windows") stop("'CompressedOutputStream' is unsupported in Windows.") +compressed_output_stream <- CompressedOutputStream$create - UseMethod("CompressedOutputStream") -} -#' @export -CompressedOutputStream.character <- function(stream, codec = compression_codec("GZIP")){ - CompressedOutputStream(FileOutputStream(stream), codec = codec) -} +CompressedInputStream <- R6Class("CompressedInputStream", inherit = `arrow::io::InputStream`) -#' @export -`CompressedOutputStream.arrow::io::OutputStream` <- function(stream, codec = compression_codec("GZIP")) { - assert_that(inherits(codec, "arrow::util::Codec")) - shared_ptr(`arrow::io::CompressedOutputStream`, io___CompressedOutputStream__Make(codec, stream)) +CompressedInputStream$create <- function(stream, codec = compression_codec()){ + # TODO (npr): why would CompressedInputStream work on Windows if CompressedOutputStream doesn't? (and is it still the case that it does not?) + assert_that(inherits(codec, "Codec")) + if (is.character(stream)) { + stream <- ReadableFile(stream) + } + assert_that(inherits(stream, "arrow::io::InputStream")) + shared_ptr(CompressedInputStream, io___CompressedInputStream__Make(codec, stream)) } #' Compressed input stream @@ -64,17 +73,4 @@ CompressedOutputStream.character <- function(stream, codec = compression_codec(" #' @param stream Underlying raw input stream #' @param codec a codec #' @export -CompressedInputStream <- function(stream, codec = codec("GZIP")){ - UseMethod("CompressedInputStream") -} - -#' @export -CompressedInputStream.character <- function(stream, codec = compression_codec("GZIP")){ - CompressedInputStream(ReadableFile(stream), codec = codec) -} - -#' @export -`CompressedInputStream.arrow::io::InputStream` <- function(stream, codec = compression_codec("GZIP")) { - assert_that(inherits(codec, "arrow::util::Codec")) - shared_ptr(`arrow::io::CompressedInputStream`, io___CompressedInputStream__Make(codec, stream)) -} +compressed_input_stream <- CompressedInputStream$create diff --git a/r/man/CompressedInputStream.Rd b/r/man/compressed_input_stream.Rd similarity index 67% rename from r/man/CompressedInputStream.Rd rename to r/man/compressed_input_stream.Rd index cfff053083d..6ac740b28d3 100644 --- a/r/man/CompressedInputStream.Rd +++ b/r/man/compressed_input_stream.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/compression.R -\name{CompressedInputStream} -\alias{CompressedInputStream} +\name{compressed_input_stream} +\alias{compressed_input_stream} \title{Compressed input stream} \usage{ -CompressedInputStream(stream, codec = codec("GZIP")) +compressed_input_stream(stream, codec = compression_codec()) } \arguments{ \item{stream}{Underlying raw input stream} diff --git a/r/man/CompressedOutputStream.Rd b/r/man/compressed_output_stream.Rd similarity index 71% rename from r/man/CompressedOutputStream.Rd rename to r/man/compressed_output_stream.Rd index d32070ebfd1..7da746d78f9 100644 --- a/r/man/CompressedOutputStream.Rd +++ b/r/man/compressed_output_stream.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/compression.R -\name{CompressedOutputStream} -\alias{CompressedOutputStream} +\name{compressed_output_stream} +\alias{compressed_output_stream} \title{Compressed output stream} \usage{ -CompressedOutputStream(stream, codec = compression_codec("GZIP")) +compressed_output_stream(stream, codec = compression_codec()) } \arguments{ \item{stream}{Underlying raw output stream} diff --git a/r/tests/testthat/test-compressed.R b/r/tests/testthat/test-compressed.R index 583f342e6cb..1cde9b80f3b 100644 --- a/r/tests/testthat/test-compressed.R +++ b/r/tests/testthat/test-compressed.R @@ -18,12 +18,12 @@ context("arrow::io::Compressed.*Stream") test_that("can write Buffer to CompressedOutputStream and read back in CompressedInputStream", { - if (.Platform$OS.type == "windows") skip("Unsupported") + skip_on_os("windows") buf <- buffer(as.raw(sample(0:255, size = 1024, replace = TRUE))) tf1 <- tempfile() - stream1 <- CompressedOutputStream(tf1) + stream1 <- CompressedOutputStream$create(tf1) expect_equal(stream1$tell(), 0) stream1$write(buf) expect_equal(stream1$tell(), buf$size) @@ -31,7 +31,7 @@ test_that("can write Buffer to CompressedOutputStream and read back in Compresse tf2 <- tempfile() sink2 <- FileOutputStream(tf2) - stream2 <- CompressedOutputStream(sink2) + stream2 <- compressed_output_stream(sink2) expect_equal(stream2$tell(), 0) stream2$write(buf) expect_equal(stream2$tell(), buf$size) @@ -39,11 +39,11 @@ test_that("can write Buffer to CompressedOutputStream and read back in Compresse sink2$close() - input1 <- CompressedInputStream(tf1) + input1 <- CompressedInputStream$create(tf1) buf1 <- input1$Read(1024L) file2 <- ReadableFile(tf2) - input2 <- CompressedInputStream(file2) + input2 <- compressed_input_stream(file2) buf2 <- input2$Read(1024L) expect_equal(buf, buf1) @@ -52,4 +52,3 @@ test_that("can write Buffer to CompressedOutputStream and read back in Compresse unlink(tf1) unlink(tf2) }) - From 12031adda23b4d3751fd8802d70021298921c6bd Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 10:54:45 -0700 Subject: [PATCH 09/37] Backfill some methods --- r/DESCRIPTION | 4 +-- r/NAMESPACE | 6 ---- r/R/{ArrayData.R => array-data.R} | 0 r/R/buffer.R | 42 +++++++------------------ r/R/{ChunkedArray.R => chunked-array.R} | 8 +++-- r/man/array-data.Rd | 2 +- r/man/chunked-array.Rd | 2 +- r/man/chunked_array.Rd | 2 +- r/tests/testthat/test-buffer.R | 11 +++++++ 9 files changed, 32 insertions(+), 45 deletions(-) rename r/R/{ArrayData.R => array-data.R} (100%) rename r/R/{ChunkedArray.R => chunked-array.R} (96%) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 7f695e23992..4849c4991ed 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -49,8 +49,6 @@ Collate: 'enums.R' 'arrow-package.R' 'type.R' - 'ArrayData.R' - 'ChunkedArray.R' 'Column.R' 'Field.R' 'List.R' @@ -60,9 +58,11 @@ Collate: 'Schema.R' 'Struct.R' 'Table.R' + 'array-data.R' 'array.R' 'arrowExports.R' 'buffer.R' + 'chunked-array.R' 'io.R' 'compression.R' 'compute.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index 25ef3ce11ae..3cc90c04f77 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -32,12 +32,6 @@ S3method(RecordBatchStreamWriter,character) S3method(as.data.frame,"arrow::RecordBatch") S3method(as.data.frame,"arrow::Table") S3method(as.raw,Buffer) -S3method(buffer,Buffer) -S3method(buffer,complex) -S3method(buffer,default) -S3method(buffer,integer) -S3method(buffer,numeric) -S3method(buffer,raw) S3method(csv_table_reader,"arrow::csv::TableReader") S3method(csv_table_reader,"arrow::io::InputStream") S3method(csv_table_reader,character) diff --git a/r/R/ArrayData.R b/r/R/array-data.R similarity index 100% rename from r/R/ArrayData.R rename to r/R/array-data.R diff --git a/r/R/buffer.R b/r/R/buffer.R index 564f0393655..f616f635fe4 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -46,8 +46,15 @@ Buffer <- R6Class("Buffer", inherit = Object, ) ) -#' @export -as.raw.Buffer <- function(x) x$data() +Buffer$create <- function(x) { + if (inherits(x, "Buffer")) { + return(x) + } else if (inherits(x, c("raw", "numeric", "integer", "complex"))) { + return(shared_ptr(Buffer, r___RBuffer__initialize(x))) + } else { + stop("Cannot convert object of class ", class(x), " to arrow::Buffer") + } +} #' Create a [Buffer][buffer] from an R object #' @@ -56,34 +63,7 @@ as.raw.Buffer <- function(x) x$data() #' @return an instance of [Buffer][buffer] that borrows memory from `x` #' #' @export -buffer <- function(x) { - UseMethod("buffer") -} - -#' @export -buffer.default <- function(x) { - stop("cannot convert to Buffer") -} - -#' @export -buffer.raw <- function(x) { - shared_ptr(Buffer, r___RBuffer__initialize(x)) -} - -#' @export -buffer.numeric <- function(x) { - shared_ptr(Buffer, r___RBuffer__initialize(x)) -} - -#' @export -buffer.integer <- function(x) { - shared_ptr(Buffer, r___RBuffer__initialize(x)) -} +buffer <- Buffer$create #' @export -buffer.complex <- function(x) { - shared_ptr(Buffer, r___RBuffer__initialize(x)) -} - -#' @export -buffer.Buffer <- function(x) x +as.raw.Buffer <- function(x) x$data() diff --git a/r/R/ChunkedArray.R b/r/R/chunked-array.R similarity index 96% rename from r/R/ChunkedArray.R rename to r/R/chunked-array.R index 3e92411102c..ac34e347d27 100644 --- a/r/R/ChunkedArray.R +++ b/r/R/chunked-array.R @@ -55,12 +55,14 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object, ) ) +ChunkedArray$create <- function(..., type = NULL) { + shared_ptr(ChunkedArray, ChunkedArray__from_list(list2(...), type)) +} + #' Create a [ChunkedArray][chunked-array] from various R vectors #' #' @param \dots Vectors to coerce #' @param type currently ignored #' #' @export -chunked_array <- function(..., type = NULL) { - shared_ptr(ChunkedArray, ChunkedArray__from_list(list2(...), type)) -} +chunked_array <- ChunkedArray$create diff --git a/r/man/array-data.Rd b/r/man/array-data.Rd index 095a474dfe7..5deeb6c04dd 100644 --- a/r/man/array-data.Rd +++ b/r/man/array-data.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ArrayData.R +% Please edit documentation in R/array-data.R \docType{class} \name{array-data} \alias{array-data} diff --git a/r/man/chunked-array.Rd b/r/man/chunked-array.Rd index 3a68759b1a1..24d962cc18e 100644 --- a/r/man/chunked-array.Rd +++ b/r/man/chunked-array.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ChunkedArray.R +% Please edit documentation in R/chunked-array.R \docType{class} \name{chunked-array} \alias{chunked-array} diff --git a/r/man/chunked_array.Rd b/r/man/chunked_array.Rd index 21a890cf5ca..42193fb57be 100644 --- a/r/man/chunked_array.Rd +++ b/r/man/chunked_array.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ChunkedArray.R +% Please edit documentation in R/chunked-array.R \name{chunked_array} \alias{chunked_array} \title{Create a \link[=chunked-array]{ChunkedArray} from various R vectors} diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index 152b3b2558d..73253c6e36b 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -45,6 +45,17 @@ test_that("Buffer can be created from complex vector", { expect_equal(buf$size, 3 * 16) }) +test_that("buffer buffer buffers buffers", { + expect_is(buffer(buffer(42)), "Buffer") +}) + +test_that("Other types can't be converted to Buffers", { + expect_error( + buffer(data.frame(a="asdf")), + "Cannot convert object of class data.frame to arrow::Buffer" + ) +}) + test_that("can convert Buffer to raw", { buf <- buffer(rnorm(10)) expect_equal(buf$data(), as.raw(buf)) From 1711d3e08378140ee7e88aa573eca2249b5b186f Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 10:59:56 -0700 Subject: [PATCH 10/37] CastOptions --- r/R/RecordBatch.R | 2 +- r/R/Table.R | 2 +- r/R/array.R | 2 +- r/R/chunked-array.R | 2 +- r/R/compute.R | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index 43464ea99fd..abc774842b3 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -55,7 +55,7 @@ cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_schema, "arrow::Schema")) - assert_that(inherits(options, "arrow::compute::CastOptions")) + assert_that(inherits(options, "CastOptions")) assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") shared_ptr(`arrow::RecordBatch`, RecordBatch__cast(self, target_schema, options)) } diff --git a/r/R/Table.R b/r/R/Table.R index 942248695cb..1660349571b 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -38,7 +38,7 @@ cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_schema, "arrow::Schema")) - assert_that(inherits(options, "arrow::compute::CastOptions")) + assert_that(inherits(options, "CastOptions")) assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") shared_ptr(`arrow::Table`, Table__cast(self, target_schema, options)) }, diff --git a/r/R/array.R b/r/R/array.R index bce8b3c9f6d..02ca0656782 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -93,7 +93,7 @@ Array <- R6Class("Array", }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_type, "arrow::DataType")) - assert_that(inherits(options, "arrow::compute::CastOptions")) + assert_that(inherits(options, "CastOptions")) Array$create(Array__cast(self, target_type, options)) } ), diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index ac34e347d27..cc6a112a7bd 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -43,7 +43,7 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object, }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_type, "arrow::DataType")) - assert_that(inherits(options, "arrow::compute::CastOptions")) + assert_that(inherits(options, "CastOptions")) shared_ptr(ChunkedArray, ChunkedArray__cast(self, target_type, options)) } ), diff --git a/r/R/compute.R b/r/R/compute.R index 18a2493fe55..f2eded54c52 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -17,7 +17,7 @@ #' @include array.R -`arrow::compute::CastOptions` <- R6Class("arrow::compute::CastOptions", inherit = Object) +CastOptions <- R6Class("CastOptions", inherit = Object) #' Cast options #' @@ -33,7 +33,7 @@ cast_options <- function( allow_time_truncate = !safe, allow_float_truncate = !safe ){ - shared_ptr(`arrow::compute::CastOptions`, + shared_ptr(CastOptions, compute___CastOptions__initialize(allow_int_overflow, allow_time_truncate, allow_float_truncate) ) } From fbebf2734b87ed00bdd635d0daf0fd0b06bfae2c Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 13:03:45 -0700 Subject: [PATCH 11/37] io --- r/NAMESPACE | 38 +++--- r/R/RecordBatchReader.R | 14 +-- r/R/RecordBatchWriter.R | 8 +- r/R/Schema.R | 6 +- r/R/compression.R | 12 +- r/R/csv.R | 2 +- r/R/dictionary.R | 8 +- r/R/enums.R | 2 +- r/R/feather.R | 23 ++-- r/R/io.R | 140 +++++++--------------- r/R/json.R | 4 +- r/R/message.R | 8 +- r/R/parquet.R | 6 +- r/R/read_record_batch.R | 6 +- r/R/read_table.R | 4 +- r/R/type.R | 2 +- r/R/write_arrow.R | 6 +- r/man/BufferOutputStream.Rd | 17 --- r/man/BufferReader.Rd | 14 --- r/man/FeatherTableReader.Rd | 2 +- r/man/FileOutputStream.Rd | 17 --- r/man/FixedSizeBufferWriter.Rd | 17 --- r/man/MockOutputStream.Rd | 14 --- r/man/ReadableFile.Rd | 17 --- r/man/arrow__DictionaryType.Rd | 6 +- r/man/arrow__io__BufferOutputStream.Rd | 4 +- r/man/arrow__io__BufferReader.Rd | 2 +- r/man/arrow__io__FileOutputStream.Rd | 2 +- r/man/arrow__io__FixedSizeBufferWriter.Rd | 2 +- r/man/arrow__io__InputStream.Rd | 2 +- r/man/arrow__io__MemoryMappedFile.Rd | 2 +- r/man/arrow__io__MockOutputStream.Rd | 2 +- r/man/arrow__io__OutputStream.Rd | 2 +- r/man/arrow__io__RandomAccessFile.Rd | 2 +- r/man/arrow__io__Readable.Rd | 2 +- r/man/arrow__io__ReadableFile.Rd | 2 +- r/man/dictionary.Rd | 2 +- r/man/write_feather.Rd | 2 +- r/man/write_feather_RecordBatch.Rd | 2 +- r/tests/testthat/test-Table.R | 2 +- r/tests/testthat/test-buffer.R | 6 +- r/tests/testthat/test-bufferreader.R | 16 +-- r/tests/testthat/test-compressed.R | 6 +- r/tests/testthat/test-csv.R | 4 +- r/tests/testthat/test-feather.R | 4 +- r/tests/testthat/test-json.R | 6 +- r/tests/testthat/test-message.R | 8 +- r/tests/testthat/test-messagereader.R | 8 +- r/tests/testthat/test-read_record_batch.R | 6 +- r/tests/testthat/test-recordbatchreader.R | 4 +- r/tests/testthat/test-schema.R | 6 +- 51 files changed, 168 insertions(+), 331 deletions(-) delete mode 100644 r/man/BufferOutputStream.Rd delete mode 100644 r/man/BufferReader.Rd delete mode 100644 r/man/FileOutputStream.Rd delete mode 100644 r/man/FixedSizeBufferWriter.Rd delete mode 100644 r/man/MockOutputStream.Rd delete mode 100644 r/man/ReadableFile.Rd diff --git a/r/NAMESPACE b/r/NAMESPACE index 3cc90c04f77..1d689a765be 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -7,57 +7,53 @@ S3method("==","arrow::RecordBatch") S3method("==","arrow::Schema") S3method("==","arrow::ipc::Message") S3method("==",Array) -S3method(BufferReader,Buffer) -S3method(BufferReader,default) -S3method(FeatherTableReader,"arrow::io::RandomAccessFile") S3method(FeatherTableReader,"arrow::ipc::feather::TableReader") +S3method(FeatherTableReader,RandomAccessFile) S3method(FeatherTableReader,character) S3method(FeatherTableReader,raw) -S3method(FeatherTableWriter,"arrow::io::OutputStream") -S3method(FixedSizeBufferWriter,Buffer) -S3method(FixedSizeBufferWriter,default) -S3method(MessageReader,"arrow::io::InputStream") +S3method(FeatherTableWriter,OutputStream) +S3method(MessageReader,InputStream) S3method(MessageReader,default) -S3method(RecordBatchFileReader,"arrow::io::RandomAccessFile") S3method(RecordBatchFileReader,Buffer) +S3method(RecordBatchFileReader,RandomAccessFile) S3method(RecordBatchFileReader,character) S3method(RecordBatchFileReader,raw) -S3method(RecordBatchFileWriter,"arrow::io::OutputStream") +S3method(RecordBatchFileWriter,OutputStream) S3method(RecordBatchFileWriter,character) -S3method(RecordBatchStreamReader,"arrow::io::InputStream") S3method(RecordBatchStreamReader,Buffer) +S3method(RecordBatchStreamReader,InputStream) S3method(RecordBatchStreamReader,raw) -S3method(RecordBatchStreamWriter,"arrow::io::OutputStream") +S3method(RecordBatchStreamWriter,OutputStream) S3method(RecordBatchStreamWriter,character) S3method(as.data.frame,"arrow::RecordBatch") S3method(as.data.frame,"arrow::Table") S3method(as.raw,Buffer) S3method(csv_table_reader,"arrow::csv::TableReader") -S3method(csv_table_reader,"arrow::io::InputStream") +S3method(csv_table_reader,InputStream) S3method(csv_table_reader,character) S3method(csv_table_reader,default) S3method(dim,"arrow::RecordBatch") S3method(dim,"arrow::Table") -S3method(json_table_reader,"arrow::io::InputStream") S3method(json_table_reader,"arrow::json::TableReader") +S3method(json_table_reader,InputStream) S3method(json_table_reader,character) S3method(json_table_reader,default) S3method(length,Array) S3method(names,"arrow::RecordBatch") -S3method(parquet_file_reader,"arrow::io::RandomAccessFile") +S3method(parquet_file_reader,RandomAccessFile) S3method(parquet_file_reader,character) S3method(parquet_file_reader,raw) S3method(print,"arrow-enum") -S3method(read_message,"arrow::io::InputStream") S3method(read_message,"arrow::ipc::MessageReader") +S3method(read_message,InputStream) S3method(read_message,default) -S3method(read_record_batch,"arrow::io::InputStream") S3method(read_record_batch,"arrow::ipc::Message") S3method(read_record_batch,Buffer) +S3method(read_record_batch,InputStream) S3method(read_record_batch,raw) -S3method(read_schema,"arrow::io::InputStream") S3method(read_schema,"arrow::ipc::Message") S3method(read_schema,Buffer) +S3method(read_schema,InputStream) S3method(read_schema,raw) S3method(read_table,"arrow::ipc::RecordBatchFileReader") S3method(read_table,"arrow::ipc::RecordBatchStreamReader") @@ -73,23 +69,17 @@ S3method(write_arrow,raw) S3method(write_feather,"arrow::RecordBatch") S3method(write_feather,data.frame) S3method(write_feather,default) -S3method(write_feather_RecordBatch,"arrow::io::OutputStream") +S3method(write_feather_RecordBatch,OutputStream) S3method(write_feather_RecordBatch,character) S3method(write_feather_RecordBatch,default) export(Array) -export(BufferOutputStream) -export(BufferReader) export(CompressionType) export(DateUnit) export(FeatherTableReader) export(FeatherTableWriter) export(FileMode) -export(FileOutputStream) -export(FixedSizeBufferWriter) export(MessageReader) export(MessageType) -export(MockOutputStream) -export(ReadableFile) export(RecordBatchFileReader) export(RecordBatchFileWriter) export(RecordBatchStreamReader) diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 13913fa8ba0..b5339dceea9 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -92,18 +92,18 @@ RecordBatchStreamReader <- function(stream){ } #' @export -`RecordBatchStreamReader.arrow::io::InputStream` <- function(stream) { +RecordBatchStreamReader.InputStream <- function(stream) { shared_ptr(`arrow::ipc::RecordBatchStreamReader`, ipc___RecordBatchStreamReader__Open(stream)) } #' @export `RecordBatchStreamReader.raw` <- function(stream) { - RecordBatchStreamReader(BufferReader(stream)) + RecordBatchStreamReader(BufferReader$create(stream)) } #' @export `RecordBatchStreamReader.Buffer` <- function(stream) { - RecordBatchStreamReader(BufferReader(stream)) + RecordBatchStreamReader(BufferReader$create(stream)) } @@ -117,22 +117,22 @@ RecordBatchFileReader <- function(file) { } #' @export -`RecordBatchFileReader.arrow::io::RandomAccessFile` <- function(file) { +RecordBatchFileReader.RandomAccessFile <- function(file) { shared_ptr(`arrow::ipc::RecordBatchFileReader`, ipc___RecordBatchFileReader__Open(file)) } #' @export `RecordBatchFileReader.character` <- function(file) { assert_that(length(file) == 1L) - RecordBatchFileReader(ReadableFile(file)) + RecordBatchFileReader(ReadableFile$create(file)) } #' @export `RecordBatchFileReader.Buffer` <- function(file) { - RecordBatchFileReader(BufferReader(file)) + RecordBatchFileReader(BufferReader$create(file)) } #' @export `RecordBatchFileReader.raw` <- function(file) { - RecordBatchFileReader(BufferReader(file)) + RecordBatchFileReader(BufferReader$create(file)) } diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index fcbcc478258..130e7f189f9 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -108,11 +108,11 @@ RecordBatchStreamWriter <- function(sink, schema) { #' @export RecordBatchStreamWriter.character <- function(sink, schema){ - RecordBatchStreamWriter(FileOutputStream(sink), schema) + RecordBatchStreamWriter(FileOutputStream$create(sink), schema) } #' @export -`RecordBatchStreamWriter.arrow::io::OutputStream` <- function(sink, schema){ +RecordBatchStreamWriter.OutputStream <- function(sink, schema){ assert_that(inherits(schema, "arrow::Schema")) shared_ptr(`arrow::ipc::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(sink, schema)) } @@ -168,11 +168,11 @@ RecordBatchFileWriter <- function(sink, schema) { #' @export RecordBatchFileWriter.character <- function(sink, schema){ - RecordBatchFileWriter(FileOutputStream(sink), schema) + RecordBatchFileWriter(FileOutputStream$create(sink), schema) } #' @export -`RecordBatchFileWriter.arrow::io::OutputStream` <- function(sink, schema){ +RecordBatchFileWriter.OutputStream <- function(sink, schema){ assert_that(inherits(schema, "arrow::Schema")) shared_ptr(`arrow::ipc::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(sink, schema)) } diff --git a/r/R/Schema.R b/r/R/Schema.R index 4e524c4288a..6721b3b4476 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -88,20 +88,20 @@ read_schema <- function(stream, ...) { } #' @export -`read_schema.arrow::io::InputStream` <- function(stream, ...) { +read_schema.InputStream <- function(stream, ...) { shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) } #' @export `read_schema.Buffer` <- function(stream, ...) { - stream <- BufferReader(stream) + stream <- BufferReader$create(stream) on.exit(stream$close()) shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) } #' @export `read_schema.raw` <- function(stream, ...) { - stream <- BufferReader(stream) + stream <- BufferReader$create(stream) on.exit(stream$close()) shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) } diff --git a/r/R/compression.R b/r/R/compression.R index 131d5bb5b07..dbf6a1a5c8a 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -32,7 +32,7 @@ compression_codec <- function(type = "GZIP") { } -CompressedOutputStream <- R6Class("CompressedOutputStream", inherit = `arrow::io::OutputStream`) +CompressedOutputStream <- R6Class("CompressedOutputStream", inherit = OutputStream) CompressedOutputStream$create <- function(stream, codec = compression_codec()){ if (.Platform$OS.type == "windows") { @@ -40,9 +40,9 @@ CompressedOutputStream$create <- function(stream, codec = compression_codec()){ } assert_that(inherits(codec, "Codec")) if (is.character(stream)) { - stream <- FileOutputStream(stream) + stream <- FileOutputStream$create(stream) } - assert_that(inherits(stream, "arrow::io::OutputStream")) + assert_that(inherits(stream, "OutputStream")) shared_ptr(CompressedOutputStream, io___CompressedOutputStream__Make(codec, stream)) } @@ -56,15 +56,15 @@ CompressedOutputStream$create <- function(stream, codec = compression_codec()){ compressed_output_stream <- CompressedOutputStream$create -CompressedInputStream <- R6Class("CompressedInputStream", inherit = `arrow::io::InputStream`) +CompressedInputStream <- R6Class("CompressedInputStream", inherit = InputStream) CompressedInputStream$create <- function(stream, codec = compression_codec()){ # TODO (npr): why would CompressedInputStream work on Windows if CompressedOutputStream doesn't? (and is it still the case that it does not?) assert_that(inherits(codec, "Codec")) if (is.character(stream)) { - stream <- ReadableFile(stream) + stream <- ReadableFile$create(stream) } - assert_that(inherits(stream, "arrow::io::InputStream")) + assert_that(inherits(stream, "InputStream")) shared_ptr(CompressedInputStream, io___CompressedInputStream__Make(codec, stream)) } diff --git a/r/R/csv.R b/r/R/csv.R index 71e9823b930..2eb4e8d424e 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -364,7 +364,7 @@ csv_table_reader.default <- function(file, } #' @export -`csv_table_reader.arrow::io::InputStream` <- function(file, +csv_table_reader.InputStream <- function(file, read_options = csv_read_options(), parse_options = csv_parse_options(), convert_options = csv_convert_options(), diff --git a/r/R/dictionary.R b/r/R/dictionary.R index 9262a514b5a..b12c19e23e3 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -17,7 +17,7 @@ #' @include type.R -#' @title class arrow::DictionaryType +#' @title class DictionaryType #' #' @usage NULL #' @format NULL @@ -29,7 +29,7 @@ #' #' @rdname arrow__DictionaryType #' @name arrow__DictionaryType -`arrow::DictionaryType` <- R6Class("arrow::DictionaryType", +DictionaryType <- R6Class("DictionaryType", inherit = `arrow::FixedWidthType`, active = list( @@ -46,7 +46,7 @@ #' @param value_type value type, probably [utf8()] #' @param ordered Is this an ordered dictionary ? #' -#' @return An [arrow::DictionaryType][arrow__DictionaryType] +#' @return An [DictionaryType][arrow__DictionaryType] #' @seealso [Other Arrow data types][data-type] #' @export dictionary <- function(index_type, value_type, ordered = FALSE) { @@ -54,5 +54,5 @@ dictionary <- function(index_type, value_type, ordered = FALSE) { inherits(index_type, "arrow::DataType"), inherits(index_type, "arrow::DataType") ) - shared_ptr(`arrow::DictionaryType`, DictionaryType__initialize(index_type, value_type, ordered)) + shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered)) } diff --git a/r/R/enums.R b/r/R/enums.R index 5c24ce8e6e3..0a909accc4c 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -63,7 +63,7 @@ StatusCode <- enum("arrow::StatusCode", #' @rdname enums #' @export -FileMode <- enum("arrow::io::FileMode", +FileMode <- enum("FileMode", READ = 0L, WRITE = 1L, READWRITE = 2L ) diff --git a/r/R/feather.R b/r/R/feather.R index 6f77ffb1fe2..0cc41dffdc3 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -51,14 +51,14 @@ FeatherTableWriter <- function(stream) { } #' @export -`FeatherTableWriter.arrow::io::OutputStream` <- function(stream){ +FeatherTableWriter.OutputStream <- function(stream){ unique_ptr(`arrow::ipc::feather::TableWriter`, ipc___feather___TableWriter__Open(stream)) } #' Write data in the Feather format #' #' @param data `data.frame` or `arrow::RecordBatch` -#' @param stream A file path or an `arrow::io::OutputStream` +#' @param stream A file path or an OutputStream #' #' @export #' @examples @@ -92,7 +92,7 @@ write_feather.data.frame <- function(data, stream) { #' Write a record batch in the feather format #' #' @param data `data.frame` or `arrow::RecordBatch` -#' @param stream A file path or an `arrow::io::OutputStream` +#' @param stream A file path or an OutputStream #' #' @export #' @keywords internal @@ -101,28 +101,25 @@ write_feather_RecordBatch <- function(data, stream) { } #' @export -#' @method write_feather_RecordBatch default `write_feather_RecordBatch.default` <- function(data, stream) { stop("unsupported") } #' @export -#' @method write_feather_RecordBatch character write_feather_RecordBatch.character <- function(data, stream) { - file_stream <- FileOutputStream(stream) + file_stream <- FileOutputStream$create(stream) on.exit(file_stream$close()) - `write_feather_RecordBatch.arrow::io::OutputStream`(data, file_stream) + write_feather_RecordBatch.OutputStream(data, file_stream) } #' @export -#' @method write_feather_RecordBatch arrow::io::OutputStream -`write_feather_RecordBatch.arrow::io::OutputStream` <- function(data, stream) { +write_feather_RecordBatch.OutputStream <- function(data, stream) { ipc___TableWriter__RecordBatch__WriteFeather(FeatherTableWriter(stream), data) } #' A `arrow::ipc::feather::TableReader` to read from a file #' -#' @param file A file path or `arrow::io::RandomAccessFile` +#' @param file A file path or RandomAccessFile #' @param mmap Is the file memory mapped (applicable to the `character` method) #' @param ... extra parameters #' @@ -136,18 +133,18 @@ FeatherTableReader.character <- function(file, mmap = TRUE, ...) { if (isTRUE(mmap)) { stream <- mmap_open(file, ...) } else { - stream <- ReadableFile(file, ...) + stream <- ReadableFile$create(file, ...) } FeatherTableReader(stream) } #' @export FeatherTableReader.raw <- function(file, mmap = TRUE, ...) { - FeatherTableReader(BufferReader(file), mmap = mmap, ...) + FeatherTableReader(BufferReader$create(file), mmap = mmap, ...) } #' @export -`FeatherTableReader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){ +FeatherTableReader.RandomAccessFile <- function(file, mmap = TRUE, ...){ unique_ptr(`arrow::ipc::feather::TableReader`, ipc___feather___TableReader__Open(file)) } diff --git a/r/R/io.R b/r/R/io.R index 796d8415efd..ac89b2cbd46 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -21,7 +21,7 @@ # OutputStream ------------------------------------------------------------ -`arrow::io::Writable` <- R6Class("arrow::io::Writable", inherit = Object, +Writable <- R6Class("Writable", inherit = Object, public = list( write = function(x) io___Writable__write(self, buffer(x)) ) @@ -40,7 +40,7 @@ #' #' @rdname arrow__io__OutputStream #' @name arrow__io__OutputStream -`arrow::io::OutputStream` <- R6Class("arrow::io::OutputStream", inherit = `arrow::io::Writable`, +OutputStream <- R6Class("OutputStream", inherit = Writable, public = list( close = function() io___OutputStream__Close(self), tell = function() io___OutputStream__Tell(self) @@ -59,7 +59,12 @@ #' #' @rdname arrow__io__FileOutputStream #' @name arrow__io__FileOutputStream -`arrow::io::FileOutputStream` <- R6Class("arrow::io::FileOutputStream", inherit = `arrow::io::OutputStream`) +FileOutputStream <- R6Class("FileOutputStream", inherit = OutputStream) + +FileOutputStream$create <- function(path) { + path <- normalizePath(path, mustWork = FALSE) + shared_ptr(FileOutputStream, io___FileOutputStream__Open(path)) +} #' @title class arrow::io::MockOutputStream #' @@ -74,12 +79,16 @@ #' #' @rdname arrow__io__MockOutputStream #' @name arrow__io__MockOutputStream -`arrow::io::MockOutputStream` <- R6Class("arrow::io::MockOutputStream", inherit = `arrow::io::OutputStream`, +MockOutputStream <- R6Class("MockOutputStream", inherit = OutputStream, public = list( GetExtentBytesWritten = function() io___MockOutputStream__GetExtentBytesWritten(self) ) ) +MockOutputStream$create <- function() { + shared_ptr(MockOutputStream, io___MockOutputStream__initialize()) +} + #' @title class arrow::io::BufferOutputStream #' #' @usage NULL @@ -90,7 +99,7 @@ #' #' @rdname arrow__io__BufferOutputStream #' @name arrow__io__BufferOutputStream -`arrow::io::BufferOutputStream` <- R6Class("arrow::io::BufferOutputStream", inherit = `arrow::io::OutputStream`, +BufferOutputStream <- R6Class("BufferOutputStream", inherit = OutputStream, public = list( capacity = function() io___BufferOutputStream__capacity(self), getvalue = function() shared_ptr(Buffer, io___BufferOutputStream__Finish(self)), @@ -100,6 +109,10 @@ ) ) +BufferOutputStream$create <- function(initial_capacity = 0L) { + shared_ptr(BufferOutputStream, io___BufferOutputStream__Create(initial_capacity)) +} + #' @title class arrow::io::FixedSizeBufferWriter #' #' @usage NULL @@ -113,8 +126,13 @@ #' #' @rdname arrow__io__FixedSizeBufferWriter #' @name arrow__io__FixedSizeBufferWriter -`arrow::io::FixedSizeBufferWriter` <- R6Class("arrow::io::FixedSizeBufferWriter", inherit = `arrow::io::OutputStream`) +FixedSizeBufferWriter <- R6Class("FixedSizeBufferWriter", inherit = OutputStream) +FixedSizeBufferWriter$create <- function(x) { + x <- buffer(x) + assert_that(x$is_mutable) + shared_ptr(FixedSizeBufferWriter, io___FixedSizeBufferWriter__initialize(x)) +} # InputStream ------------------------------------------------------------- @@ -131,7 +149,7 @@ #' #' @rdname arrow__io__Readable #' @name arrow__io__Readable -`arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = Object, +Readable <- R6Class("Readable", inherit = Object, public = list( Read = function(nbytes) shared_ptr(Buffer, io___Readable__Read(self, nbytes)) ) @@ -150,7 +168,7 @@ #' #' @rdname arrow__io__InputStream #' @name arrow__io__InputStream -`arrow::io::InputStream` <- R6Class("arrow::io::InputStream", inherit = `arrow::io::Readable`, +InputStream <- R6Class("InputStream", inherit = Readable, public = list( close = function() io___InputStream__Close(self) ) @@ -169,7 +187,7 @@ #' #' @rdname arrow__io__RandomAccessFile #' @name arrow__io__RandomAccessFile -`arrow::io::RandomAccessFile` <- R6Class("arrow::io::RandomAccessFile", inherit = `arrow::io::InputStream`, +RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream, public = list( GetSize = function() io___RandomAccessFile__GetSize(self), supports_zero_copy = function() io___RandomAccessFile__supports_zero_copy(self), @@ -209,7 +227,7 @@ #' #' @rdname arrow__io__MemoryMappedFile #' @name arrow__io__MemoryMappedFile -`arrow::io::MemoryMappedFile` <- R6Class("arrow::io::MemoryMappedFile", inherit = `arrow::io::RandomAccessFile`, +MemoryMappedFile <- R6Class("MemoryMappedFile", inherit = RandomAccessFile, public = list( Resize = function(size) io___MemoryMappedFile__Resize(self, size) ) @@ -228,7 +246,11 @@ #' #' @rdname arrow__io__ReadableFile #' @name arrow__io__ReadableFile -`arrow::io::ReadableFile` <- R6Class("arrow::io::ReadableFile", inherit = `arrow::io::RandomAccessFile`) +ReadableFile <- R6Class("ReadableFile", inherit = RandomAccessFile) + +ReadableFile$create <- function(path) { + shared_ptr(ReadableFile, io___ReadableFile__Open(normalizePath(path))) +} #' @title class arrow::io::BufferReader #' @@ -242,7 +264,12 @@ #' #' @rdname arrow__io__BufferReader #' @name arrow__io__BufferReader -`arrow::io::BufferReader` <- R6Class("arrow::io::BufferReader", inherit = `arrow::io::RandomAccessFile`) +BufferReader <- R6Class("BufferReader", inherit = RandomAccessFile) + +BufferReader$create <- function(x) { + x <- buffer(x) + shared_ptr(BufferReader, io___BufferReader__initialize(x)) +} #' Create a new read/write memory mapped file of a given size #' @@ -253,7 +280,8 @@ #' #' @export mmap_create <- function(path, size) { - shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(normalizePath(path, mustWork = FALSE), size)) + path <- normalizePath(path, mustWork = FALSE) + shared_ptr(MemoryMappedFile, io___MemoryMappedFile__Create(path, size)) } #' Open a memory mapped file @@ -264,88 +292,6 @@ mmap_create <- function(path, size) { #' @export mmap_open <- function(path, mode = c("read", "write", "readwrite")) { mode <- match(match.arg(mode), c("read", "write", "readwrite")) - 1L - shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(normalizePath(path), mode)) -} - -#' open a [arrow::io::ReadableFile][arrow__io__ReadableFile] -#' -#' @param path file path -#' -#' @return a [arrow::io::ReadableFile][arrow__io__ReadableFile] -#' -#' @export -ReadableFile <- function(path) { - shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(normalizePath(path))) -} - -#' Open a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] -#' -#' @param path file path -#' -#' @return a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] -#' -#' @export -FileOutputStream <- function(path) { - shared_ptr(`arrow::io::FileOutputStream`, io___FileOutputStream__Open(normalizePath(path, mustWork = FALSE))) -} - -#' Open a [arrow::io::MockOutputStream][arrow__io__MockOutputStream] -#' -#' @return a [arrow::io::MockOutputStream][arrow__io__MockOutputStream] -#' -#' @export -MockOutputStream <- function() { - shared_ptr(`arrow::io::MockOutputStream`, io___MockOutputStream__initialize()) -} - -#' Open a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] -#' -#' @param initial_capacity initial capacity -#' -#' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] -#' -#' @export -BufferOutputStream <- function(initial_capacity = 0L) { - shared_ptr(`arrow::io::BufferOutputStream`, io___BufferOutputStream__Create(initial_capacity)) -} - -#' Open a [arrow::io::FixedSizeBufferWriter][arrow__io__FixedSizeBufferWriter] -#' -#' @param buffer [Buffer][buffer] or something [buffer()] can handle -#' -#' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] -#' -#' @export -FixedSizeBufferWriter <- function(buffer){ - UseMethod("FixedSizeBufferWriter") -} - -#' @export -FixedSizeBufferWriter.default <- function(buffer){ - FixedSizeBufferWriter(buffer(buffer)) -} - -#' @export -`FixedSizeBufferWriter.Buffer` <- function(buffer){ - assert_that(buffer$is_mutable) - shared_ptr(`arrow::io::FixedSizeBufferWriter`, io___FixedSizeBufferWriter__initialize(buffer)) -} - -#' Create a [arrow::io::BufferReader][arrow__io__BufferReader] -#' -#' @param x R object to treat as a buffer or a buffer created by [buffer()] -#' -#' @export -BufferReader <- function(x) { - UseMethod("BufferReader") -} - -#' @export -BufferReader.default <- function(x) { - BufferReader(buffer(x)) -} - -#' @export -`BufferReader.Buffer` <- function(x) { - shared_ptr(`arrow::io::BufferReader`, io___BufferReader__initialize(x)) + path <- normalizePath(path) + shared_ptr(MemoryMappedFile, io___MemoryMappedFile__Open(path, mode)) } diff --git a/r/R/json.R b/r/R/json.R index 7ad67c7c028..e24a9a8e345 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -87,7 +87,7 @@ json_table_reader.default <- function(file, parse_options = json_parse_options(), ... ){ - json_table_reader(ReadableFile(file), + json_table_reader(ReadableFile$create(file), read_options = read_options, parse_options = parse_options, ... @@ -95,7 +95,7 @@ json_table_reader.default <- function(file, } #' @export -`json_table_reader.arrow::io::InputStream` <- function(file, +json_table_reader.InputStream <- function(file, read_options = json_read_options(), parse_options = json_parse_options(), ... diff --git a/r/R/message.R b/r/R/message.R index 3d2e2d532d8..0105388628d 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -77,11 +77,11 @@ MessageReader <- function(stream) { #' @export MessageReader.default <- function(stream) { - MessageReader(BufferReader(stream)) + MessageReader(BufferReader$create(stream)) } #' @export -`MessageReader.arrow::io::InputStream` <- function(stream) { +MessageReader.InputStream <- function(stream) { unique_ptr(`arrow::ipc::MessageReader`, ipc___MessageReader__Open(stream)) } @@ -96,11 +96,11 @@ read_message <- function(stream) { #' @export read_message.default<- function(stream) { - read_message(BufferReader(stream)) + read_message(BufferReader$create(stream)) } #' @export -`read_message.arrow::io::InputStream` <- function(stream) { +read_message.InputStream <- function(stream) { unique_ptr(`arrow::ipc::Message`, ipc___ReadMessage(stream) ) } diff --git a/r/R/parquet.R b/r/R/parquet.R index fe9d25194b7..fef9ce1c14e 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -80,7 +80,7 @@ parquet_file_reader <- function(file, props = parquet_arrow_reader_properties(), } #' @export -`parquet_file_reader.arrow::io::RandomAccessFile` <- function(file, props = parquet_arrow_reader_properties(), ...) { +parquet_file_reader.RandomAccessFile <- function(file, props = parquet_arrow_reader_properties(), ...) { unique_ptr(`parquet::arrow::FileReader`, parquet___arrow___FileReader__OpenFile(file, props)) } @@ -93,13 +93,13 @@ parquet_file_reader.character <- function(file, if (isTRUE(memory_map)) { parquet_file_reader(mmap_open(file), props = props, ...) } else { - parquet_file_reader(ReadableFile(file), props = props, ...) + parquet_file_reader(ReadableFile$create(file), props = props, ...) } } #' @export parquet_file_reader.raw <- function(file, props = parquet_arrow_reader_properties(), ...) { - parquet_file_reader(BufferReader(file), props = props, ...) + parquet_file_reader(BufferReader$create(file), props = props, ...) } #' Read a Parquet file diff --git a/r/R/read_record_batch.R b/r/R/read_record_batch.R index 99304ef6183..fddf1415a6a 100644 --- a/r/R/read_record_batch.R +++ b/r/R/read_record_batch.R @@ -34,21 +34,21 @@ read_record_batch <- function(obj, schema){ } #' @export -`read_record_batch.arrow::io::InputStream` <- function(obj, schema) { +read_record_batch.InputStream <- function(obj, schema) { assert_that(inherits(schema, "arrow::Schema")) shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__InputStream__Schema(obj, schema)) } #' @export read_record_batch.raw <- function(obj, schema){ - stream <- BufferReader(obj) + stream <- BufferReader$create(obj) on.exit(stream$close()) read_record_batch(stream, schema) } #' @export `read_record_batch.Buffer` <- function(obj, schema){ - stream <- BufferReader(obj) + stream <- BufferReader$create(obj) on.exit(stream$close()) read_record_batch(stream, schema) } diff --git a/r/R/read_table.R b/r/R/read_table.R index a05d15dff56..848a2607cfe 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -62,7 +62,7 @@ read_table <- function(stream){ #' @export read_table.character <- function(stream) { assert_that(length(stream) == 1L) - stream <- ReadableFile(stream) + stream <- ReadableFile$create(stream) on.exit(stream$close()) batch_reader <- RecordBatchFileReader(stream) shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(batch_reader)) @@ -70,7 +70,7 @@ read_table.character <- function(stream) { #' @export `read_table.raw` <- function(stream) { - stream <- BufferReader(stream) + stream <- BufferReader$create(stream) on.exit(stream$close()) batch_reader <- RecordBatchStreamReader(stream) shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(batch_reader)) diff --git a/r/R/type.R b/r/R/type.R index 4e88dd59c04..769067b8819 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -78,7 +78,7 @@ LIST = shared_ptr(`arrow::ListType`, self$pointer()), STRUCT = shared_ptr(`arrow::StructType`, self$pointer()), UNION = stop("Type UNION not implemented yet"), - DICTIONARY = shared_ptr(`arrow::DictionaryType`, self$pointer()), + DICTIONARY = shared_ptr(DictionaryType, self$pointer()), MAP = stop("Type MAP not implemented yet") ) } diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index f57eff36c57..ae9320e1ef6 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -63,7 +63,7 @@ write_arrow <- function(x, stream, ...) { `write_arrow.character` <- function(x, stream, ...) { assert_that(length(stream) == 1L) x <- to_arrow(x) - file_stream <- FileOutputStream(stream) + file_stream <- FileOutputStream$create(stream) on.exit(file_stream$close()) file_writer <- RecordBatchFileWriter(file_stream, x$schema) on.exit({ @@ -82,7 +82,7 @@ write_arrow <- function(x, stream, ...) { schema <- x$schema # how many bytes do we need - mock_stream <- MockOutputStream() + mock_stream <- MockOutputStream$create() writer <- RecordBatchStreamWriter(mock_stream, schema) writer$write(x) writer$close() @@ -90,7 +90,7 @@ write_arrow <- function(x, stream, ...) { # now that we know the size, stream in a buffer backed by an R raw vector bytes <- raw(n) - buffer_writer <- FixedSizeBufferWriter(buffer(bytes)) + buffer_writer <- FixedSizeBufferWriter$create(buffer(bytes)) writer <- RecordBatchStreamWriter(buffer_writer, schema) writer$write(x) writer$close() diff --git a/r/man/BufferOutputStream.Rd b/r/man/BufferOutputStream.Rd deleted file mode 100644 index 1776f995930..00000000000 --- a/r/man/BufferOutputStream.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{BufferOutputStream} -\alias{BufferOutputStream} -\title{Open a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream}} -\usage{ -BufferOutputStream(initial_capacity = 0L) -} -\arguments{ -\item{initial_capacity}{initial capacity} -} -\value{ -a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} -} -\description{ -Open a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} -} diff --git a/r/man/BufferReader.Rd b/r/man/BufferReader.Rd deleted file mode 100644 index ea5dd790cdd..00000000000 --- a/r/man/BufferReader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{BufferReader} -\alias{BufferReader} -\title{Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader}} -\usage{ -BufferReader(x) -} -\arguments{ -\item{x}{R object to treat as a buffer or a buffer created by \code{\link[=buffer]{buffer()}}} -} -\description{ -Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader} -} diff --git a/r/man/FeatherTableReader.Rd b/r/man/FeatherTableReader.Rd index 3276628d50e..1e5be02d369 100644 --- a/r/man/FeatherTableReader.Rd +++ b/r/man/FeatherTableReader.Rd @@ -7,7 +7,7 @@ FeatherTableReader(file, mmap = TRUE, ...) } \arguments{ -\item{file}{A file path or \code{arrow::io::RandomAccessFile}} +\item{file}{A file path or RandomAccessFile} \item{mmap}{Is the file memory mapped (applicable to the \code{character} method)} diff --git a/r/man/FileOutputStream.Rd b/r/man/FileOutputStream.Rd deleted file mode 100644 index 4155d349d1a..00000000000 --- a/r/man/FileOutputStream.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{FileOutputStream} -\alias{FileOutputStream} -\title{Open a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream}} -\usage{ -FileOutputStream(path) -} -\arguments{ -\item{path}{file path} -} -\value{ -a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream} -} -\description{ -Open a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream} -} diff --git a/r/man/FixedSizeBufferWriter.Rd b/r/man/FixedSizeBufferWriter.Rd deleted file mode 100644 index ede2124de98..00000000000 --- a/r/man/FixedSizeBufferWriter.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{FixedSizeBufferWriter} -\alias{FixedSizeBufferWriter} -\title{Open a \link[=arrow__io__FixedSizeBufferWriter]{arrow::io::FixedSizeBufferWriter}} -\usage{ -FixedSizeBufferWriter(buffer) -} -\arguments{ -\item{buffer}{\link[=buffer]{Buffer} or something \code{\link[=buffer]{buffer()}} can handle} -} -\value{ -a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} -} -\description{ -Open a \link[=arrow__io__FixedSizeBufferWriter]{arrow::io::FixedSizeBufferWriter} -} diff --git a/r/man/MockOutputStream.Rd b/r/man/MockOutputStream.Rd deleted file mode 100644 index 2e3c0b6d3e3..00000000000 --- a/r/man/MockOutputStream.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{MockOutputStream} -\alias{MockOutputStream} -\title{Open a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream}} -\usage{ -MockOutputStream() -} -\value{ -a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream} -} -\description{ -Open a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream} -} diff --git a/r/man/ReadableFile.Rd b/r/man/ReadableFile.Rd deleted file mode 100644 index 11535321bfb..00000000000 --- a/r/man/ReadableFile.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{ReadableFile} -\alias{ReadableFile} -\title{open a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile}} -\usage{ -ReadableFile(path) -} -\arguments{ -\item{path}{file path} -} -\value{ -a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile} -} -\description{ -open a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile} -} diff --git a/r/man/arrow__DictionaryType.Rd b/r/man/arrow__DictionaryType.Rd index ba462ee0114..df9ac8b2f3f 100644 --- a/r/man/arrow__DictionaryType.Rd +++ b/r/man/arrow__DictionaryType.Rd @@ -3,10 +3,10 @@ \docType{class} \name{arrow__DictionaryType} \alias{arrow__DictionaryType} -\alias{arrow::DictionaryType} -\title{class arrow::DictionaryType} +\alias{DictionaryType} +\title{class DictionaryType} \description{ -class arrow::DictionaryType +class DictionaryType } \section{Methods}{ diff --git a/r/man/arrow__io__BufferOutputStream.Rd b/r/man/arrow__io__BufferOutputStream.Rd index e90d1cc0ed8..877d596601a 100644 --- a/r/man/arrow__io__BufferOutputStream.Rd +++ b/r/man/arrow__io__BufferOutputStream.Rd @@ -3,9 +3,9 @@ \docType{class} \name{arrow__io__BufferOutputStream} \alias{arrow__io__BufferOutputStream} -\alias{arrow::io::BufferOutputStream} +\alias{BufferOutputStream} \title{class arrow::io::BufferOutputStream} -\format{An object of class \code{R6ClassGenerator} of length 24.} +\format{An object of class \code{R6ClassGenerator} of length 25.} \description{ class arrow::io::BufferOutputStream } diff --git a/r/man/arrow__io__BufferReader.Rd b/r/man/arrow__io__BufferReader.Rd index 609fec5b6d4..47aa0b951b2 100644 --- a/r/man/arrow__io__BufferReader.Rd +++ b/r/man/arrow__io__BufferReader.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__io__BufferReader} \alias{arrow__io__BufferReader} -\alias{arrow::io::BufferReader} +\alias{BufferReader} \title{class arrow::io::BufferReader} \description{ class arrow::io::BufferReader diff --git a/r/man/arrow__io__FileOutputStream.Rd b/r/man/arrow__io__FileOutputStream.Rd index 92eaac13c9f..582ed09a7bc 100644 --- a/r/man/arrow__io__FileOutputStream.Rd +++ b/r/man/arrow__io__FileOutputStream.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__io__FileOutputStream} \alias{arrow__io__FileOutputStream} -\alias{arrow::io::FileOutputStream} +\alias{FileOutputStream} \title{class arrow::io::FileOutputStream} \description{ class arrow::io::FileOutputStream diff --git a/r/man/arrow__io__FixedSizeBufferWriter.Rd b/r/man/arrow__io__FixedSizeBufferWriter.Rd index 39d8bb69c25..69f069aa62e 100644 --- a/r/man/arrow__io__FixedSizeBufferWriter.Rd +++ b/r/man/arrow__io__FixedSizeBufferWriter.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__io__FixedSizeBufferWriter} \alias{arrow__io__FixedSizeBufferWriter} -\alias{arrow::io::FixedSizeBufferWriter} +\alias{FixedSizeBufferWriter} \title{class arrow::io::FixedSizeBufferWriter} \description{ class arrow::io::FixedSizeBufferWriter diff --git a/r/man/arrow__io__InputStream.Rd b/r/man/arrow__io__InputStream.Rd index 37f83308b64..bc539c4262c 100644 --- a/r/man/arrow__io__InputStream.Rd +++ b/r/man/arrow__io__InputStream.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__io__InputStream} \alias{arrow__io__InputStream} -\alias{arrow::io::InputStream} +\alias{InputStream} \title{class arrow::io::InputStream} \description{ class arrow::io::InputStream diff --git a/r/man/arrow__io__MemoryMappedFile.Rd b/r/man/arrow__io__MemoryMappedFile.Rd index 409bb17302a..6b21cebd0e8 100644 --- a/r/man/arrow__io__MemoryMappedFile.Rd +++ b/r/man/arrow__io__MemoryMappedFile.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__io__MemoryMappedFile} \alias{arrow__io__MemoryMappedFile} -\alias{arrow::io::MemoryMappedFile} +\alias{MemoryMappedFile} \title{class arrow::io::MemoryMappedFile} \description{ class arrow::io::MemoryMappedFile diff --git a/r/man/arrow__io__MockOutputStream.Rd b/r/man/arrow__io__MockOutputStream.Rd index f0b2c06d7a5..68b6b7d616f 100644 --- a/r/man/arrow__io__MockOutputStream.Rd +++ b/r/man/arrow__io__MockOutputStream.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__io__MockOutputStream} \alias{arrow__io__MockOutputStream} -\alias{arrow::io::MockOutputStream} +\alias{MockOutputStream} \title{class arrow::io::MockOutputStream} \description{ class arrow::io::MockOutputStream diff --git a/r/man/arrow__io__OutputStream.Rd b/r/man/arrow__io__OutputStream.Rd index a73271b57e3..0e84b3e9181 100644 --- a/r/man/arrow__io__OutputStream.Rd +++ b/r/man/arrow__io__OutputStream.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__io__OutputStream} \alias{arrow__io__OutputStream} -\alias{arrow::io::OutputStream} +\alias{OutputStream} \title{OutputStream} \description{ OutputStream diff --git a/r/man/arrow__io__RandomAccessFile.Rd b/r/man/arrow__io__RandomAccessFile.Rd index f8cb86abda6..0ff85917b25 100644 --- a/r/man/arrow__io__RandomAccessFile.Rd +++ b/r/man/arrow__io__RandomAccessFile.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__io__RandomAccessFile} \alias{arrow__io__RandomAccessFile} -\alias{arrow::io::RandomAccessFile} +\alias{RandomAccessFile} \title{class arrow::io::RandomAccessFile} \description{ class arrow::io::RandomAccessFile diff --git a/r/man/arrow__io__Readable.Rd b/r/man/arrow__io__Readable.Rd index b0b30a42302..323f6fd20ea 100644 --- a/r/man/arrow__io__Readable.Rd +++ b/r/man/arrow__io__Readable.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__io__Readable} \alias{arrow__io__Readable} -\alias{arrow::io::Readable} +\alias{Readable} \title{class arrow::io::Readable} \description{ class arrow::io::Readable diff --git a/r/man/arrow__io__ReadableFile.Rd b/r/man/arrow__io__ReadableFile.Rd index 440149fbbb4..b40d9c017a2 100644 --- a/r/man/arrow__io__ReadableFile.Rd +++ b/r/man/arrow__io__ReadableFile.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__io__ReadableFile} \alias{arrow__io__ReadableFile} -\alias{arrow::io::ReadableFile} +\alias{ReadableFile} \title{class arrow::io::ReadableFile} \description{ class arrow::io::ReadableFile diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd index 334d67e937d..5eb4c802159 100644 --- a/r/man/dictionary.Rd +++ b/r/man/dictionary.Rd @@ -14,7 +14,7 @@ dictionary(index_type, value_type, ordered = FALSE) \item{ordered}{Is this an ordered dictionary ?} } \value{ -An \link[=arrow__DictionaryType]{arrow::DictionaryType} +An \link[=arrow__DictionaryType]{DictionaryType} } \description{ Create a dictionary type diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd index 9eb20021caf..66dfa678d37 100644 --- a/r/man/write_feather.Rd +++ b/r/man/write_feather.Rd @@ -9,7 +9,7 @@ write_feather(data, stream) \arguments{ \item{data}{\code{data.frame} or \code{arrow::RecordBatch}} -\item{stream}{A file path or an \code{arrow::io::OutputStream}} +\item{stream}{A file path or an OutputStream} } \description{ Write data in the Feather format diff --git a/r/man/write_feather_RecordBatch.Rd b/r/man/write_feather_RecordBatch.Rd index b234f7fda4e..998e57a93fe 100644 --- a/r/man/write_feather_RecordBatch.Rd +++ b/r/man/write_feather_RecordBatch.Rd @@ -9,7 +9,7 @@ write_feather_RecordBatch(data, stream) \arguments{ \item{data}{\code{data.frame} or \code{arrow::RecordBatch}} -\item{stream}{A file path or an \code{arrow::io::OutputStream}} +\item{stream}{A file path or an OutputStream} } \description{ Write a record batch in the feather format diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index a22c9928615..e262000d35e 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -33,7 +33,7 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { tab1 <- read_table(tf) tab2 <- read_table(fs::path_abs(tf)) - readable_file <- ReadableFile(tf) + readable_file <- ReadableFile$create(tf) file_reader1 <- RecordBatchFileReader(readable_file) tab3 <- read_table(file_reader1) readable_file$close() diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index 73253c6e36b..19184604e1c 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -72,14 +72,14 @@ test_that("can read remaining bytes of a RandomAccessFile", { tf <- tempfile() all_bytes <- write_arrow(tab, tf) - file <- ReadableFile(tf) + file <- ReadableFile$create(tf) x <- file$Read(20)$data() y <- file$Read()$data() - file <- ReadableFile(tf) + file <- ReadableFile$create(tf) z <- file$Read()$data() - file <- ReadableFile(tf) + file <- ReadableFile$create(tf) a <- file$ReadAt(20)$data() expect_equal(file$GetSize(), length(x) + length(y)) diff --git a/r/tests/testthat/test-bufferreader.R b/r/tests/testthat/test-bufferreader.R index 225eb9a8c3d..94be16ad569 100644 --- a/r/tests/testthat/test-bufferreader.R +++ b/r/tests/testthat/test-bufferreader.R @@ -18,13 +18,13 @@ context("BufferReader") test_that("BufferReader can be created from R objects", { - num <- BufferReader(numeric(13)) - int <- BufferReader(integer(13)) - raw <- BufferReader(raw(16)) + num <- BufferReader$create(numeric(13)) + int <- BufferReader$create(integer(13)) + raw <- BufferReader$create(raw(16)) - expect_is(num, "arrow::io::BufferReader") - expect_is(int, "arrow::io::BufferReader") - expect_is(raw, "arrow::io::BufferReader") + expect_is(num, "BufferReader") + expect_is(int, "BufferReader") + expect_is(raw, "BufferReader") expect_equal(num$GetSize(), 13*8) expect_equal(int$GetSize(), 13*4) @@ -33,8 +33,8 @@ test_that("BufferReader can be created from R objects", { test_that("BufferReader can be created from Buffer", { buf <- buffer(raw(76)) - reader <- BufferReader(buf) + reader <- BufferReader$create(buf) - expect_is(reader, "arrow::io::BufferReader") + expect_is(reader, "BufferReader") expect_equal(reader$GetSize(), 76) }) diff --git a/r/tests/testthat/test-compressed.R b/r/tests/testthat/test-compressed.R index 1cde9b80f3b..3d0dfdc20e4 100644 --- a/r/tests/testthat/test-compressed.R +++ b/r/tests/testthat/test-compressed.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::io::Compressed.*Stream") +context("Compressed.*Stream") test_that("can write Buffer to CompressedOutputStream and read back in CompressedInputStream", { skip_on_os("windows") @@ -30,7 +30,7 @@ test_that("can write Buffer to CompressedOutputStream and read back in Compresse stream1$close() tf2 <- tempfile() - sink2 <- FileOutputStream(tf2) + sink2 <- FileOutputStream$create(tf2) stream2 <- compressed_output_stream(sink2) expect_equal(stream2$tell(), 0) stream2$write(buf) @@ -42,7 +42,7 @@ test_that("can write Buffer to CompressedOutputStream and read back in Compresse input1 <- CompressedInputStream$create(tf1) buf1 <- input1$Read(1024L) - file2 <- ReadableFile(tf2) + file2 <- ReadableFile$create(tf2) input2 <- compressed_input_stream(file2) buf2 <- input2$Read(1024L) diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R index a2ef4149eab..5904c5efc03 100644 --- a/r/tests/testthat/test-csv.R +++ b/r/tests/testthat/test-csv.R @@ -25,7 +25,7 @@ test_that("Can read csv file", { tab1 <- read_csv_arrow(tf, as_tibble = FALSE) tab2 <- read_csv_arrow(mmap_open(tf), as_tibble = FALSE) - tab3 <- read_csv_arrow(ReadableFile(tf), as_tibble = FALSE) + tab3 <- read_csv_arrow(ReadableFile$create(tf), as_tibble = FALSE) iris$Species <- as.character(iris$Species) tab0 <- table(!!!iris) @@ -42,7 +42,7 @@ test_that("read_csv_arrow(as_tibble=TRUE)", { tab1 <- read_csv_arrow(tf, as_tibble = TRUE) tab2 <- read_csv_arrow(mmap_open(tf), as_tibble = TRUE) - tab3 <- read_csv_arrow(ReadableFile(tf), as_tibble = TRUE) + tab3 <- read_csv_arrow(ReadableFile$create(tf), as_tibble = TRUE) iris$Species <- as.character(iris$Species) expect_equivalent(iris, tab1) diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index 4fe058eadc6..f5e48820440 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -31,7 +31,7 @@ test_that("feather read/write round trip", { expect_true(fs::file_exists(tf2)) tf3 <- tempfile() - stream <- FileOutputStream(tf3) + stream <- FileOutputStream$create(tf3) write_feather(tib, stream) stream$close() expect_true(fs::file_exists(tf3)) @@ -50,7 +50,7 @@ test_that("feather read/write round trip", { expect_is(tab4, "data.frame") # reading directly from arrow::io::ReadableFile - tab5 <- read_feather(ReadableFile(tf3)) + tab5 <- read_feather(ReadableFile$create(tf3)) expect_is(tab5, "data.frame") expect_equal(tib, tab1) diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R index cea7b65cccb..8967982357e 100644 --- a/r/tests/testthat/test-json.R +++ b/r/tests/testthat/test-json.R @@ -29,7 +29,7 @@ test_that("Can read json file with scalars columns (ARROW-5503)", { tab1 <- read_json_arrow(tf, as_tibble = FALSE) tab2 <- read_json_arrow(mmap_open(tf), as_tibble = FALSE) - tab3 <- read_json_arrow(ReadableFile(tf), as_tibble = FALSE) + tab3 <- read_json_arrow(ReadableFile$create(tf), as_tibble = FALSE) expect_equal(tab1, tab2) expect_equal(tab1, tab3) @@ -56,7 +56,7 @@ test_that("read_json_arrow() converts to tibble", { tab1 <- read_json_arrow(tf) tab2 <- read_json_arrow(mmap_open(tf)) - tab3 <- read_json_arrow(ReadableFile(tf)) + tab3 <- read_json_arrow(ReadableFile$create(tf)) expect_is(tab1, "tbl_df") expect_is(tab2, "tbl_df") @@ -100,7 +100,7 @@ test_that("Can read json file with nested columns (ARROW-5503)", { tab1 <- read_json_arrow(tf, as_tibble = FALSE) tab2 <- read_json_arrow(mmap_open(tf), as_tibble = FALSE) - tab3 <- read_json_arrow(ReadableFile(tf), as_tibble = FALSE) + tab3 <- read_json_arrow(ReadableFile$create(tf), as_tibble = FALSE) expect_equal(tab1, tab2) expect_equal(tab1, tab3) diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R index fb58f0305e9..9500fcfa638 100644 --- a/r/tests/testthat/test-message.R +++ b/r/tests/testthat/test-message.R @@ -20,7 +20,7 @@ context("arrow::ipc::Message") test_that("read_message can read from input stream", { batch <- record_batch(x = 1:10) bytes <- batch$serialize() - stream <- BufferReader(bytes) + stream <- BufferReader$create(bytes) message <- read_message(stream) expect_is(message, "arrow::ipc::Message") @@ -34,7 +34,7 @@ test_that("read_message can read from input stream", { test_that("read_message() can read Schema messages", { bytes <- schema(x=int32())$serialize() - stream <- BufferReader(bytes) + stream <- BufferReader$create(bytes) message <- read_message(stream) expect_is(message, "arrow::ipc::Message") @@ -49,14 +49,14 @@ test_that("read_message() can read Schema messages", { test_that("read_message() can handle raw vectors", { batch <- record_batch(x = 1:10) bytes <- batch$serialize() - stream <- BufferReader(bytes) + stream <- BufferReader$create(bytes) message_stream <- read_message(stream) message_raw <- read_message(bytes) expect_equal(message_stream, message_raw) bytes <- schema(x=int32())$serialize() - stream <- BufferReader(bytes) + stream <- BufferReader$create(bytes) message_stream <- read_message(stream) message_raw <- read_message(bytes) diff --git a/r/tests/testthat/test-messagereader.R b/r/tests/testthat/test-messagereader.R index 8eadb9d8d37..582d38d6cdd 100644 --- a/r/tests/testthat/test-messagereader.R +++ b/r/tests/testthat/test-messagereader.R @@ -51,8 +51,8 @@ test_that("MessageReader can be created from input stream", { batch <- record_batch(x = 1:10) bytes <- batch$serialize() - stream <- BufferReader(bytes) - expect_is(stream, "arrow::io::BufferReader") + stream <- BufferReader$create(bytes) + expect_is(stream, "BufferReader") reader <- MessageReader(stream) expect_is(reader, "arrow::ipc::MessageReader") @@ -69,8 +69,8 @@ test_that("MessageReader can be created from input stream", { schema <- schema(x = int32()) bytes <- schema$serialize() - stream <- BufferReader(bytes) - expect_is(stream, "arrow::io::BufferReader") + stream <- BufferReader$create(bytes) + expect_is(stream, "BufferReader") reader <- MessageReader(stream) expect_is(reader, "arrow::ipc::MessageReader") diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read_record_batch.R index adbb192fa59..ad2c116d866 100644 --- a/r/tests/testthat/test-read_record_batch.R +++ b/r/tests/testthat/test-read_record_batch.R @@ -33,7 +33,7 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { tab2 <- read_table(tf) expect_equal(tab, tab2) - stream <- FileOutputStream(tf) + stream <- FileOutputStream$create(tf) writer <- RecordBatchFileWriter(stream, tab$schema) expect_is(writer, "arrow::ipc::RecordBatchFileWriter") writer$write_table(tab) @@ -55,7 +55,7 @@ test_that("read_record_batch() handles (raw|Buffer|InputStream, Schema) (ARROW-3 raw <- batch$serialize() batch2 <- read_record_batch(raw, schema) batch3 <- read_record_batch(buffer(raw), schema) - stream <- BufferReader(raw) + stream <- BufferReader$create(raw) batch4 <- read_record_batch(stream, schema) stream$close() @@ -69,7 +69,7 @@ test_that("read_record_batch() can handle (Message, Schema) parameters (ARROW-34 schema <- batch$schema raw <- batch$serialize() - stream <- BufferReader(raw) + stream <- BufferReader$create(raw) message <- read_message(stream) batch2 <- read_record_batch(message, schema) diff --git a/r/tests/testthat/test-recordbatchreader.R b/r/tests/testthat/test-recordbatchreader.R index bb6df846632..1e5adce0902 100644 --- a/r/tests/testthat/test-recordbatchreader.R +++ b/r/tests/testthat/test-recordbatchreader.R @@ -23,7 +23,7 @@ test_that("RecordBatchStreamReader / Writer", { y = letters[1:10] ) - sink <- BufferOutputStream() + sink <- BufferOutputStream$create() writer <- RecordBatchStreamWriter(sink, batch$schema) expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") writer$write_batch(batch) @@ -48,7 +48,7 @@ test_that("RecordBatchFileReader / Writer", { y = letters[1:10] ) - sink <- BufferOutputStream() + sink <- BufferOutputStream$create() writer <- RecordBatchFileWriter(sink, batch$schema) expect_is(writer, "arrow::ipc::RecordBatchFileWriter") writer$write_batch(batch) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index eec27be5f9c..fb16aeceffd 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -30,7 +30,7 @@ test_that("reading schema from Buffer", { batch <- record_batch(x = 1:10) expect_is(batch, "arrow::RecordBatch") - stream <- BufferOutputStream() + stream <- BufferOutputStream$create() writer <- RecordBatchStreamWriter(stream, batch$schema) expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") writer$close() @@ -45,8 +45,8 @@ test_that("reading schema from Buffer", { expect_is(message, "arrow::ipc::Message") expect_equal(message$type, MessageType$SCHEMA) - stream <- BufferReader(buffer) - expect_is(stream, "arrow::io::BufferReader") + stream <- BufferReader$create(buffer) + expect_is(stream, "BufferReader") message <- read_message(stream) expect_is(message, "arrow::ipc::Message") expect_equal(message$type, MessageType$SCHEMA) From 9bd708fd05777729bdfee8727d3a61ad51f530c7 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 13:19:07 -0700 Subject: [PATCH 12/37] csv --- r/NAMESPACE | 4 - r/R/csv.R | 237 ++++++++++++++++-------------------- r/man/csv_table_reader.Rd | 2 +- r/tests/testthat/test-csv.R | 2 +- 4 files changed, 108 insertions(+), 137 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 1d689a765be..e5c85e89b5f 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -28,10 +28,6 @@ S3method(RecordBatchStreamWriter,character) S3method(as.data.frame,"arrow::RecordBatch") S3method(as.data.frame,"arrow::Table") S3method(as.raw,Buffer) -S3method(csv_table_reader,"arrow::csv::TableReader") -S3method(csv_table_reader,InputStream) -S3method(csv_table_reader,character) -S3method(csv_table_reader,default) S3method(dim,"arrow::RecordBatch") S3method(dim,"arrow::Table") S3method(json_table_reader,"arrow::json::TableReader") diff --git a/r/R/csv.R b/r/R/csv.R index 2eb4e8d424e..a853a9fba57 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -181,15 +181,62 @@ read_tsv_arrow <- function(file, #' @include arrow-package.R -`arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = Object, +CsvTableReader <- R6Class("CsvTableReader", inherit = Object, public = list( Read = function() shared_ptr(`arrow::Table`, csv___TableReader__Read(self)) ) ) -`arrow::csv::ReadOptions` <- R6Class("arrow::csv::ReadOptions", inherit = Object) -`arrow::csv::ParseOptions` <- R6Class("arrow::csv::ParseOptions", inherit = Object) -`arrow::csv::ConvertOptions` <- R6Class("arrow::csv::ConvertOptions", inherit = Object) +CsvTableReader$create <- function(file, + read_options = csv_read_options(), + parse_options = csv_parse_options(), + convert_options = csv_convert_options(), + ...) { + if (is.character(file)) { + file <- mmap_open(file) + } + if (inherits(file, "InputStream")) { + file <- shared_ptr(CsvTableReader, + csv___TableReader__Make(file, read_options, parse_options, convert_options) + ) + } + assert_that(inherits(file, c("CsvTableReader", "TableReader"))) + file +} + +#' Arrow CSV and JSON table readers +#' +#' These methods wrap the Arrow C++ CSV and JSON table readers. +#' For an interface to the CSV reader that's more familiar for R users, see +#' [read_csv_arrow()] +#' +#' @param file A character path to a local file, or an Arrow input stream +#' @param read_options see [csv_read_options()] +#' @param parse_options see [csv_parse_options()] +#' @param convert_options see [csv_convert_options()] +#' @param ... additional parameters. +#' +#' @return An CsvTableReader or `arrow::json::TableReader` R6 +#' object. Call `$Read()` on it to get an Arrow Table. +#' @export +csv_table_reader <- CsvTableReader$create + +CsvReadOptions <- R6Class("CsvReadOptions", inherit = Object) +CsvReadOptions$create <- function(use_threads = option_use_threads(), + block_size = 1048576L, + skip_rows = 0L, + column_names = character(0), + autogenerate_column_names = FALSE) { + shared_ptr(CsvReadOptions, csv___ReadOptions__initialize( + list( + use_threads = use_threads, + block_size = block_size, + skip_rows = skip_rows, + column_names = column_names, + autogenerate_column_names = autogenerate_column_names + ) + )) +} #' Read options for the Arrow file readers #' @@ -206,21 +253,7 @@ read_tsv_arrow <- function(file, #' be "f0", "f1", ..., "fN". #' #' @export -csv_read_options <- function(use_threads = option_use_threads(), - block_size = 1048576L, - skip_rows = 0L, - column_names = character(0), - autogenerate_column_names = FALSE) { - shared_ptr(`arrow::csv::ReadOptions`, csv___ReadOptions__initialize( - list( - use_threads = use_threads, - block_size = block_size, - skip_rows = skip_rows, - column_names = column_names, - autogenerate_column_names = autogenerate_column_names - ) - )) -} +csv_read_options <- CsvReadOptions$create readr_to_csv_read_options <- function(skip, col_names) { if (isTRUE(col_names)) { @@ -234,6 +267,44 @@ readr_to_csv_read_options <- function(skip, col_names) { } } +CsvParseOptions <- R6Class("CsvParseOptions", inherit = Object) +CsvParseOptions$create <- function(delimiter = ",", + quoting = TRUE, + quote_char = '"', + double_quote = TRUE, + escaping = FALSE, + escape_char = '\\', + newlines_in_values = FALSE, + ignore_empty_lines = TRUE) { + + shared_ptr(CsvParseOptions, csv___ParseOptions__initialize( + list( + delimiter = delimiter, + quoting = quoting, + quote_char = quote_char, + double_quote = double_quote, + escaping = escaping, + escape_char = escape_char, + newlines_in_values = newlines_in_values, + ignore_empty_lines = ignore_empty_lines + ) + )) +} + +#' Parsing options for Arrow file readers +#' +#' @param delimiter Field delimiter +#' @param quoting Whether quoting is used +#' @param quote_char Quoting character (if `quoting` is `TRUE`) +#' @param double_quote Whether a quote inside a value is double-quoted +#' @param escaping Whether escaping is used +#' @param escape_char Escaping character (if `escaping` is `TRUE`) +#' @param newlines_in_values Whether values are allowed to contain CR (`0x0d`) and LF (`0x0a`) characters +#' @param ignore_empty_lines Whether empty lines are ignored. If `FALSE`, an empty line represents +#' +#' @export +csv_parse_options <- CsvParseOptions$create + readr_to_csv_parse_options <- function(delim = ",", quote = '"', escape_double = TRUE, @@ -253,37 +324,22 @@ readr_to_csv_parse_options <- function(delim = ",", ) } -#' Parsing options for Arrow file readers -#' -#' @param delimiter Field delimiter -#' @param quoting Whether quoting is used -#' @param quote_char Quoting character (if `quoting` is `TRUE`) -#' @param double_quote Whether a quote inside a value is double-quoted -#' @param escaping Whether escaping is used -#' @param escape_char Escaping character (if `escaping` is `TRUE`) -#' @param newlines_in_values Whether values are allowed to contain CR (`0x0d`) and LF (`0x0a`) characters -#' @param ignore_empty_lines Whether empty lines are ignored. If `FALSE`, an empty line represents -#' -#' @export -csv_parse_options <- function(delimiter = ",", - quoting = TRUE, - quote_char = '"', - double_quote = TRUE, - escaping = FALSE, - escape_char = '\\', - newlines_in_values = FALSE, - ignore_empty_lines = TRUE) { +CsvConvertOptions <- R6Class("CsvConvertOptions", inherit = Object) +CsvConvertOptions$create <- function(check_utf8 = TRUE, + null_values = c("", "NA"), + strings_can_be_null = FALSE) { + # TODO: there are more conversion options available: + # // Optional per-column types (disabling type inference on those columns) + # std::unordered_map> column_types; + # // Recognized spellings for boolean values + # std::vector true_values; + # std::vector false_values; - shared_ptr(`arrow::csv::ParseOptions`, csv___ParseOptions__initialize( + shared_ptr(CsvConvertOptions, csv___ConvertOptions__initialize( list( - delimiter = delimiter, - quoting = quoting, - quote_char = quote_char, - double_quote = double_quote, - escaping = escaping, - escape_char = escape_char, - newlines_in_values = newlines_in_values, - ignore_empty_lines = ignore_empty_lines + check_utf8 = check_utf8, + null_values = null_values, + strings_can_be_null = strings_can_be_null ) )) } @@ -298,89 +354,8 @@ csv_parse_options <- function(delimiter = ",", #' null values. Similar to the `quoted_na` argument to `readr::read_csv()`. #' #' @export -csv_convert_options <- function(check_utf8 = TRUE, - null_values = c("", "NA"), - strings_can_be_null = FALSE) { - shared_ptr(`arrow::csv::ConvertOptions`, csv___ConvertOptions__initialize( - list( - check_utf8 = check_utf8, - null_values = null_values, - strings_can_be_null = strings_can_be_null - ) - )) -} +csv_convert_options <- CsvConvertOptions$create readr_to_csv_convert_options <- function(na, quoted_na) { - csv_convert_options(null_values = na, strings_can_be_null = quoted_na) -} - -#' Arrow CSV and JSON table readers -#' -#' These methods wrap the Arrow C++ CSV and JSON table readers. -#' For an interface to the CSV reader that's more familiar for R users, see -#' [read_csv_arrow()] -#' -#' @param file A character path to a local file, or an Arrow input stream -#' @param read_options see [csv_read_options()] -#' @param parse_options see [csv_parse_options()] -#' @param convert_options see [csv_convert_options()] -#' @param ... additional parameters. -#' -#' @return An `arrow::csv::TableReader` or `arrow::json::TableReader` R6 -#' object. Call `$Read()` on it to get an Arrow Table. -#' @export -csv_table_reader <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... -){ - UseMethod("csv_table_reader") -} - -#' @export -csv_table_reader.default <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... -) { - abort("unsupported") -} - -#' @export -`csv_table_reader.character` <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... -){ - csv_table_reader(mmap_open(file), - read_options = read_options, - parse_options = parse_options, - convert_options = convert_options, - ... - ) -} - -#' @export -csv_table_reader.InputStream <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... -){ - shared_ptr(`arrow::csv::TableReader`, - csv___TableReader__Make(file, read_options, parse_options, convert_options) - ) -} - -#' @export -`csv_table_reader.arrow::csv::TableReader` <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... -){ - file + csv_convert_options(null_values = na, strings_can_be_null = quoted_na) } diff --git a/r/man/csv_table_reader.Rd b/r/man/csv_table_reader.Rd index 1377a63a670..e59b71d99c6 100644 --- a/r/man/csv_table_reader.Rd +++ b/r/man/csv_table_reader.Rd @@ -24,7 +24,7 @@ json_table_reader(file, read_options = json_read_options(), \item{...}{additional parameters.} } \value{ -An \code{arrow::csv::TableReader} or \code{arrow::json::TableReader} R6 +An CsvTableReader or \code{arrow::json::TableReader} R6 object. Call \code{$Read()} on it to get an Arrow Table. } \description{ diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R index 5904c5efc03..fc392675637 100644 --- a/r/tests/testthat/test-csv.R +++ b/r/tests/testthat/test-csv.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::csv::TableReader") +context("CsvTableReader") test_that("Can read csv file", { tf <- tempfile() From 55607a6a8d439368bc0b88481f4bdfcb7ffcc615 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 13:28:21 -0700 Subject: [PATCH 13/37] json --- r/NAMESPACE | 4 - r/R/csv.R | 2 +- r/R/json.R | 153 ++++++++++++------------------ r/man/arrow__json__TableReader.Rd | 6 +- r/man/csv_table_reader.Rd | 2 +- r/man/read_json_arrow.Rd | 2 +- r/tests/testthat/test-json.R | 2 +- 7 files changed, 68 insertions(+), 103 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index e5c85e89b5f..d5fd53902aa 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -30,10 +30,6 @@ S3method(as.data.frame,"arrow::Table") S3method(as.raw,Buffer) S3method(dim,"arrow::RecordBatch") S3method(dim,"arrow::Table") -S3method(json_table_reader,"arrow::json::TableReader") -S3method(json_table_reader,InputStream) -S3method(json_table_reader,character) -S3method(json_table_reader,default) S3method(length,Array) S3method(names,"arrow::RecordBatch") S3method(parquet_file_reader,RandomAccessFile) diff --git a/r/R/csv.R b/r/R/csv.R index a853a9fba57..a1ff66aae16 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -216,7 +216,7 @@ CsvTableReader$create <- function(file, #' @param convert_options see [csv_convert_options()] #' @param ... additional parameters. #' -#' @return An CsvTableReader or `arrow::json::TableReader` R6 +#' @return An CsvTableReader or JsonTableReader R6 #' object. Call `$Read()` on it to get an Arrow Table. #' @export csv_table_reader <- CsvTableReader$create diff --git a/r/R/json.R b/r/R/json.R index e24a9a8e345..875b83dfe70 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -15,11 +15,40 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R +#' Read a JSON file +#' +#' Using [JsonTableReader][arrow__json__TableReader] +#' +#' @inheritParams read_delim_arrow +#' @param ... Additional options, passed to `json_table_reader()` +#' +#' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`. +#' @export +#' @examples +#' \donttest{ +#' try({ +#' tf <- tempfile() +#' on.exit(unlink(tf)) +#' writeLines(' +#' { "hello": 3.5, "world": false, "yo": "thing" } +#' { "hello": 3.25, "world": null } +#' { "hello": 0.0, "world": true, "yo": null } +#' ', tf, useBytes=TRUE) +#' df <- read_json_arrow(tf) +#' }) +#' } +read_json_arrow <- function(file, col_select = NULL, as_tibble = TRUE, ...) { + tab <- json_table_reader(file, ...)$Read()$select(!!enquo(col_select)) + + if (isTRUE(as_tibble)) { + tab <- as.data.frame(tab) + } + tab +} #' @include arrow-package.R #' -#' @title class arrow::json::TableReader +#' @title class JsonTableReader #' #' @usage NULL #' @format NULL @@ -31,19 +60,35 @@ #' #' @rdname arrow__json__TableReader #' @name arrow__json__TableReader -`arrow::json::TableReader` <- R6Class("arrow::json::TableReader", inherit = Object, +JsonTableReader <- R6Class("JsonTableReader", inherit = Object, public = list( Read = function() shared_ptr(`arrow::Table`, json___TableReader__Read(self)) ) ) +JsonTableReader$create <- function(file, + read_options = json_read_options(), + parse_options = json_parse_options(), + ...) { -`arrow::json::ReadOptions` <- R6Class("arrow::json::ReadOptions", inherit = Object) -`arrow::json::ParseOptions` <- R6Class("arrow::json::ParseOptions", inherit = Object) + if (is.character(file)) { + file <- mmap_open(file) + } + if (inherits(file, "InputStream")) { + file <- shared_ptr(JsonTableReader, + json___TableReader__Make(file, read_options, parse_options) + ) + } + assert_that(inherits(file, c("JsonTableReader", "TableReader"))) + file +} -#' @rdname csv_read_options +#' @rdname csv_table_reader #' @export -json_read_options <- function(use_threads = TRUE, block_size = 1048576L) { - shared_ptr(`arrow::json::ReadOptions`, json___ReadOptions__initialize( +json_table_reader <- JsonTableReader$create + +JsonReadOptions <- R6Class("JsonReadOptions", inherit = Object) +JsonReadOptions$create <- function(use_threads = TRUE, block_size = 1048576L) { + shared_ptr(JsonReadOptions, json___ReadOptions__initialize( list( use_threads = use_threads, block_size = block_size @@ -51,96 +96,20 @@ json_read_options <- function(use_threads = TRUE, block_size = 1048576L) { )) } -#' @rdname csv_parse_options +#' @rdname csv_read_options #' @export -json_parse_options <- function(newlines_in_values = FALSE) { - shared_ptr(`arrow::json::ParseOptions`, json___ParseOptions__initialize( +json_read_options <- JsonReadOptions$create + +JsonParseOptions <- R6Class("JsonParseOptions", inherit = Object) +JsonParseOptions$create <- function(newlines_in_values = FALSE) { + shared_ptr(JsonParseOptions, json___ParseOptions__initialize( list( newlines_in_values = newlines_in_values ) )) } -#' @rdname csv_table_reader -#' @export -json_table_reader <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... -){ - UseMethod("json_table_reader") -} - -#' @importFrom rlang abort -#' @export -json_table_reader.default <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... -) { - abort("unsupported") -} - -#' @export -`json_table_reader.character` <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... -){ - json_table_reader(ReadableFile$create(file), - read_options = read_options, - parse_options = parse_options, - ... - ) -} - -#' @export -json_table_reader.InputStream <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... -){ - shared_ptr(`arrow::json::TableReader`, - json___TableReader__Make(file, read_options, parse_options) - ) -} - -#' @export -`json_table_reader.arrow::json::TableReader` <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... -){ - file -} -#' Read a JSON file -#' -#' Use [arrow::json::TableReader][arrow__json__TableReader] from [json_table_reader()] -#' -#' @inheritParams read_delim_arrow -#' @param ... Additional options, passed to `json_table_reader()` -#' -#' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`. +#' @rdname csv_parse_options #' @export -#' @examples -#' \donttest{ -#' try({ -#' tf <- tempfile() -#' on.exit(unlink(tf)) -#' writeLines(' -#' { "hello": 3.5, "world": false, "yo": "thing" } -#' { "hello": 3.25, "world": null } -#' { "hello": 0.0, "world": true, "yo": null } -#' ', tf, useBytes=TRUE) -#' df <- read_json_arrow(tf) -#' }) -#' } -read_json_arrow <- function(file, col_select = NULL, as_tibble = TRUE, ...) { - tab <- json_table_reader(file, ...)$Read()$select(!!enquo(col_select)) - - if (isTRUE(as_tibble)) { - tab <- as.data.frame(tab) - } - tab -} +json_parse_options <- JsonParseOptions$create diff --git a/r/man/arrow__json__TableReader.Rd b/r/man/arrow__json__TableReader.Rd index 69b588f3f23..387a9db3039 100644 --- a/r/man/arrow__json__TableReader.Rd +++ b/r/man/arrow__json__TableReader.Rd @@ -3,10 +3,10 @@ \docType{class} \name{arrow__json__TableReader} \alias{arrow__json__TableReader} -\alias{arrow::json::TableReader} -\title{class arrow::json::TableReader} +\alias{JsonTableReader} +\title{class JsonTableReader} \description{ -class arrow::json::TableReader +class JsonTableReader } \section{Methods}{ diff --git a/r/man/csv_table_reader.Rd b/r/man/csv_table_reader.Rd index e59b71d99c6..a825b7b86c8 100644 --- a/r/man/csv_table_reader.Rd +++ b/r/man/csv_table_reader.Rd @@ -24,7 +24,7 @@ json_table_reader(file, read_options = json_read_options(), \item{...}{additional parameters.} } \value{ -An CsvTableReader or \code{arrow::json::TableReader} R6 +An CsvTableReader or JsonTableReader R6 object. Call \code{$Read()} on it to get an Arrow Table. } \description{ diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd index a3879aceccd..4c2f66869ca 100644 --- a/r/man/read_json_arrow.Rd +++ b/r/man/read_json_arrow.Rd @@ -23,7 +23,7 @@ of columns, as used in \code{dplyr::select()}.} A \code{data.frame}, or an \code{arrow::Table} if \code{as_tibble = FALSE}. } \description{ -Use \link[=arrow__json__TableReader]{arrow::json::TableReader} from \code{\link[=json_table_reader]{json_table_reader()}} +Using \link[=arrow__json__TableReader]{JsonTableReader} } \examples{ \donttest{ diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R index 8967982357e..c436cb72596 100644 --- a/r/tests/testthat/test-json.R +++ b/r/tests/testthat/test-json.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::json::TableReader") +context("JsonTableReader") test_that("Can read json file with scalars columns (ARROW-5503)", { tf <- tempfile() From 0e7877b71c955f47fd96a54549d2a4b722ff3738 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 13:31:12 -0700 Subject: [PATCH 14/37] Drop ::ipc:: --- r/NAMESPACE | 16 +++++------ r/R/RecordBatchReader.R | 16 +++++------ r/R/RecordBatchWriter.R | 28 ++++++++++---------- r/R/Schema.R | 2 +- r/R/enums.R | 2 +- r/R/feather.R | 14 +++++----- r/R/message.R | 20 +++++++------- r/R/read_record_batch.R | 4 +-- r/R/read_table.R | 16 +++++------ r/R/write_arrow.R | 12 ++++----- r/man/FeatherTableReader.Rd | 4 +-- r/man/RecordBatchFileReader.Rd | 4 +-- r/man/RecordBatchFileWriter.Rd | 2 +- r/man/RecordBatchStreamReader.Rd | 4 +-- r/man/RecordBatchStreamWriter.Rd | 2 +- r/man/arrow__ipc__Message.Rd | 6 ++--- r/man/arrow__ipc__MessageReader.Rd | 6 ++--- r/man/arrow__ipc__RecordBatchFileReader.Rd | 6 ++--- r/man/arrow__ipc__RecordBatchFileWriter.Rd | 8 +++--- r/man/arrow__ipc__RecordBatchStreamReader.Rd | 6 ++--- r/man/arrow__ipc__RecordBatchStreamWriter.Rd | 8 +++--- r/man/arrow__ipc__RecordBatchWriter.Rd | 10 +++---- r/man/read_feather.Rd | 2 +- r/man/read_record_batch.Rd | 2 +- r/man/read_table.Rd | 12 ++++----- r/man/write_arrow.Rd | 10 +++---- r/tests/testthat/test-message.R | 6 ++--- r/tests/testthat/test-messagereader.R | 14 +++++----- r/tests/testthat/test-read_record_batch.R | 4 +-- r/tests/testthat/test-recordbatchreader.R | 8 +++--- r/tests/testthat/test-schema.R | 8 +++--- 31 files changed, 131 insertions(+), 131 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index d5fd53902aa..ef2af783b48 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -3,11 +3,11 @@ S3method("!=",Object) S3method("==","arrow::DataType") S3method("==","arrow::Field") +S3method("==","arrow::Message") S3method("==","arrow::RecordBatch") S3method("==","arrow::Schema") -S3method("==","arrow::ipc::Message") S3method("==",Array) -S3method(FeatherTableReader,"arrow::ipc::feather::TableReader") +S3method(FeatherTableReader,"arrow::feather::TableReader") S3method(FeatherTableReader,RandomAccessFile) S3method(FeatherTableReader,character) S3method(FeatherTableReader,raw) @@ -36,26 +36,26 @@ S3method(parquet_file_reader,RandomAccessFile) S3method(parquet_file_reader,character) S3method(parquet_file_reader,raw) S3method(print,"arrow-enum") -S3method(read_message,"arrow::ipc::MessageReader") +S3method(read_message,"arrow::MessageReader") S3method(read_message,InputStream) S3method(read_message,default) -S3method(read_record_batch,"arrow::ipc::Message") +S3method(read_record_batch,"arrow::Message") S3method(read_record_batch,Buffer) S3method(read_record_batch,InputStream) S3method(read_record_batch,raw) -S3method(read_schema,"arrow::ipc::Message") +S3method(read_schema,"arrow::Message") S3method(read_schema,Buffer) S3method(read_schema,InputStream) S3method(read_schema,raw) -S3method(read_table,"arrow::ipc::RecordBatchFileReader") -S3method(read_table,"arrow::ipc::RecordBatchStreamReader") +S3method(read_table,"arrow::RecordBatchFileReader") +S3method(read_table,"arrow::RecordBatchStreamReader") S3method(read_table,character) S3method(read_table,raw) S3method(type,"arrow::Column") S3method(type,Array) S3method(type,ChunkedArray) S3method(type,default) -S3method(write_arrow,"arrow::ipc::RecordBatchWriter") +S3method(write_arrow,"arrow::RecordBatchWriter") S3method(write_arrow,character) S3method(write_arrow,raw) S3method(write_feather,"arrow::RecordBatch") diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index b5339dceea9..8de62143745 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -40,7 +40,7 @@ ) ) -#' @title class arrow::ipc::RecordBatchStreamReader +#' @title class arrow::RecordBatchStreamReader #' #' @usage NULL #' @format NULL @@ -52,13 +52,13 @@ #' #' @rdname arrow__ipc__RecordBatchStreamReader #' @name arrow__ipc__RecordBatchStreamReader -`arrow::ipc::RecordBatchStreamReader` <- R6Class("arrow::ipc::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`, +`arrow::RecordBatchStreamReader` <- R6Class("arrow::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`, public = list( batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = `arrow::RecordBatch`) ) ) -#' @title class arrow::ipc::RecordBatchFileReader +#' @title class arrow::RecordBatchFileReader #' #' @usage NULL #' @format NULL @@ -70,7 +70,7 @@ #' #' @rdname arrow__ipc__RecordBatchFileReader #' @name arrow__ipc__RecordBatchFileReader -`arrow::ipc::RecordBatchFileReader` <- R6Class("arrow::ipc::RecordBatchFileReader", inherit = Object, +`arrow::RecordBatchFileReader` <- R6Class("arrow::RecordBatchFileReader", inherit = Object, public = list( get_batch = function(i) shared_ptr(`arrow::RecordBatch`, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)), @@ -82,7 +82,7 @@ ) ) -#' Create a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] from an input stream +#' Create a [arrow::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] from an input stream #' #' @param stream input stream, an [arrow::io::InputStream][arrow__io__InputStream] or a raw vector #' @@ -93,7 +93,7 @@ RecordBatchStreamReader <- function(stream){ #' @export RecordBatchStreamReader.InputStream <- function(stream) { - shared_ptr(`arrow::ipc::RecordBatchStreamReader`, ipc___RecordBatchStreamReader__Open(stream)) + shared_ptr(`arrow::RecordBatchStreamReader`, ipc___RecordBatchStreamReader__Open(stream)) } #' @export @@ -107,7 +107,7 @@ RecordBatchStreamReader.InputStream <- function(stream) { } -#' Create an [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] from a file +#' Create an [arrow::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] from a file #' #' @param file The file to read from. A file path, or an [arrow::io::RandomAccessFile][arrow__ipc__RecordBatchFileReader] #' @@ -118,7 +118,7 @@ RecordBatchFileReader <- function(file) { #' @export RecordBatchFileReader.RandomAccessFile <- function(file) { - shared_ptr(`arrow::ipc::RecordBatchFileReader`, ipc___RecordBatchFileReader__Open(file)) + shared_ptr(`arrow::RecordBatchFileReader`, ipc___RecordBatchFileReader__Open(file)) } #' @export diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 130e7f189f9..b83b390ba36 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -17,7 +17,7 @@ #' @include arrow-package.R -#' @title class arrow::ipc::RecordBatchWriter +#' @title class arrow::RecordBatchWriter #' #' @usage NULL #' @format NULL @@ -31,12 +31,12 @@ #' #' @section Derived classes: #' -#' - [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] implements the streaming binary format -#' - [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] implements the binary file format +#' - [arrow::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] implements the streaming binary format +#' - [arrow::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] implements the binary file format #' #' @rdname arrow__ipc__RecordBatchWriter #' @name arrow__ipc__RecordBatchWriter -`arrow::ipc::RecordBatchWriter` <- R6Class("arrow::ipc::RecordBatchWriter", inherit = Object, +`arrow::RecordBatchWriter` <- R6Class("arrow::RecordBatchWriter", inherit = Object, public = list( write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table), @@ -57,7 +57,7 @@ ) ) -#' @title class arrow::ipc::RecordBatchStreamWriter +#' @title class arrow::RecordBatchStreamWriter #' #' Writer for the Arrow streaming binary format #' @@ -80,7 +80,7 @@ #' The [RecordBatchStreamWriter()] function creates a record batch stream writer. #' #' @section Methods: -#' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter] +#' inherited from [arrow::RecordBatchWriter][arrow__ipc__RecordBatchWriter] #' #' - `$write_batch(batch)`: Write record batch to stream #' - `$write_table(table)`: write Table to stream @@ -88,7 +88,7 @@ #' #' @rdname arrow__ipc__RecordBatchStreamWriter #' @name arrow__ipc__RecordBatchStreamWriter -`arrow::ipc::RecordBatchStreamWriter` <- R6Class("arrow::ipc::RecordBatchStreamWriter", inherit = `arrow::ipc::RecordBatchWriter`) +`arrow::RecordBatchStreamWriter` <- R6Class("arrow::RecordBatchStreamWriter", inherit = `arrow::RecordBatchWriter`) #' Writer for the Arrow streaming binary format #' @@ -99,7 +99,7 @@ #' #' @param schema The [arrow::Schema][arrow__Schema] for data to be written. #' -#' @return a [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] +#' @return a [arrow::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] #' #' @export RecordBatchStreamWriter <- function(sink, schema) { @@ -114,10 +114,10 @@ RecordBatchStreamWriter.character <- function(sink, schema){ #' @export RecordBatchStreamWriter.OutputStream <- function(sink, schema){ assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::ipc::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(sink, schema)) + shared_ptr(`arrow::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(sink, schema)) } -#' @title class arrow::ipc::RecordBatchFileWriter +#' @title class arrow::RecordBatchFileWriter #' #' Writer for the Arrow binary file format #' @@ -140,7 +140,7 @@ RecordBatchStreamWriter.OutputStream <- function(sink, schema){ #' The [RecordBatchFileWriter()] function creates a record batch stream writer. #' #' @section Methods: -#' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter] +#' inherited from [arrow::RecordBatchWriter][arrow__ipc__RecordBatchWriter] #' #' - `$write_batch(batch)`: Write record batch to stream #' - `$write_table(table)`: write Table to stream @@ -148,7 +148,7 @@ RecordBatchStreamWriter.OutputStream <- function(sink, schema){ #' #' @rdname arrow__ipc__RecordBatchFileWriter #' @name arrow__ipc__RecordBatchFileWriter -`arrow::ipc::RecordBatchFileWriter` <- R6Class("arrow::ipc::RecordBatchFileWriter", inherit = `arrow::ipc::RecordBatchStreamWriter`) +`arrow::RecordBatchFileWriter` <- R6Class("arrow::RecordBatchFileWriter", inherit = `arrow::RecordBatchStreamWriter`) #' Create a record batch file writer from a stream #' @@ -159,7 +159,7 @@ RecordBatchStreamWriter.OutputStream <- function(sink, schema){ #' #' @param schema The [arrow::Schema][arrow__Schema] for data to be written. #' -#' @return an `arrow::ipc::RecordBatchWriter` object +#' @return an `arrow::RecordBatchWriter` object #' #' @export RecordBatchFileWriter <- function(sink, schema) { @@ -174,5 +174,5 @@ RecordBatchFileWriter.character <- function(sink, schema){ #' @export RecordBatchFileWriter.OutputStream <- function(sink, schema){ assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::ipc::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(sink, schema)) + shared_ptr(`arrow::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(sink, schema)) } diff --git a/r/R/Schema.R b/r/R/Schema.R index 6721b3b4476..61746ba49a7 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -107,6 +107,6 @@ read_schema.InputStream <- function(stream, ...) { } #' @export -`read_schema.arrow::ipc::Message` <- function(stream, ...) { +`read_schema.arrow::Message` <- function(stream, ...) { shared_ptr(`arrow::Schema`, ipc___ReadSchema_Message(stream)) } diff --git a/r/R/enums.R b/r/R/enums.R index 0a909accc4c..9db2dc333e9 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -69,7 +69,7 @@ FileMode <- enum("FileMode", #' @rdname enums #' @export -MessageType <- enum("arrow::ipc::Message::Type", +MessageType <- enum("arrow::Message::Type", NONE = 0L, SCHEMA = 1L, DICTIONARY_BATCH = 2L, RECORD_BATCH = 3L, TENSOR = 4L ) diff --git a/r/R/feather.R b/r/R/feather.R index 0cc41dffdc3..69f010db1e0 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -17,7 +17,7 @@ #' @include arrow-package.R -`arrow::ipc::feather::TableWriter` <- R6Class("arrow::ipc::feather::TableWriter", inherit = Object, +`arrow::feather::TableWriter` <- R6Class("arrow::feather::TableWriter", inherit = Object, public = list( SetDescription = function(description) ipc___feather___TableWriter__SetDescription(self, description), SetNumRows = function(num_rows) ipc___feather___TableWriter__SetNumRows(self, num_rows), @@ -26,7 +26,7 @@ ) ) -`arrow::ipc::feather::TableReader` <- R6Class("arrow::ipc::feather::TableReader", inherit = Object, +`arrow::feather::TableReader` <- R6Class("arrow::feather::TableReader", inherit = Object, public = list( GetDescription = function() ipc___feather___TableReader__GetDescription(self), HasDescription = function() ipc__feather___TableReader__HasDescription(self), @@ -52,7 +52,7 @@ FeatherTableWriter <- function(stream) { #' @export FeatherTableWriter.OutputStream <- function(stream){ - unique_ptr(`arrow::ipc::feather::TableWriter`, ipc___feather___TableWriter__Open(stream)) + unique_ptr(`arrow::feather::TableWriter`, ipc___feather___TableWriter__Open(stream)) } #' Write data in the Feather format @@ -117,7 +117,7 @@ write_feather_RecordBatch.OutputStream <- function(data, stream) { ipc___TableWriter__RecordBatch__WriteFeather(FeatherTableWriter(stream), data) } -#' A `arrow::ipc::feather::TableReader` to read from a file +#' A `arrow::feather::TableReader` to read from a file #' #' @param file A file path or RandomAccessFile #' @param mmap Is the file memory mapped (applicable to the `character` method) @@ -145,17 +145,17 @@ FeatherTableReader.raw <- function(file, mmap = TRUE, ...) { #' @export FeatherTableReader.RandomAccessFile <- function(file, mmap = TRUE, ...){ - unique_ptr(`arrow::ipc::feather::TableReader`, ipc___feather___TableReader__Open(file)) + unique_ptr(`arrow::feather::TableReader`, ipc___feather___TableReader__Open(file)) } #' @export -`FeatherTableReader.arrow::ipc::feather::TableReader` <- function(file, mmap = TRUE, ...){ +`FeatherTableReader.arrow::feather::TableReader` <- function(file, mmap = TRUE, ...){ file } #' Read a Feather file #' -#' @param file an `arrow::ipc::feather::TableReader` or whatever the [FeatherTableReader()] function can handle +#' @param file an `arrow::feather::TableReader` or whatever the [FeatherTableReader()] function can handle #' @inheritParams read_delim_arrow #' @param ... additional parameters #' diff --git a/r/R/message.R b/r/R/message.R index 0105388628d..8583133f397 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -17,7 +17,7 @@ #' @include arrow-package.R -#' @title class arrow::ipc::Message +#' @title class arrow::Message #' #' @usage NULL #' @format NULL @@ -29,10 +29,10 @@ #' #' @rdname arrow__ipc__Message #' @name arrow__ipc__Message -`arrow::ipc::Message` <- R6Class("arrow::ipc::Message", inherit = Object, +`arrow::Message` <- R6Class("arrow::Message", inherit = Object, public = list( Equals = function(other){ - assert_that(inherits(other, "arrow::ipc::Message")) + assert_that(inherits(other, "arrow::Message")) ipc___Message__Equals(self, other) }, body_length = function() ipc___Message__body_length(self), @@ -46,9 +46,9 @@ ) #' @export -`==.arrow::ipc::Message` <- function(x, y) x$Equals(y) +`==.arrow::Message` <- function(x, y) x$Equals(y) -#' @title class arrow::ipc::MessageReader +#' @title class arrow::MessageReader #' #' @usage NULL #' @format NULL @@ -60,9 +60,9 @@ #' #' @rdname arrow__ipc__MessageReader #' @name arrow__ipc__MessageReader -`arrow::ipc::MessageReader` <- R6Class("arrow::ipc::MessageReader", inherit = Object, +`arrow::MessageReader` <- R6Class("arrow::MessageReader", inherit = Object, public = list( - ReadNextMessage = function() unique_ptr(`arrow::ipc::Message`, ipc___MessageReader__ReadNextMessage(self)) + ReadNextMessage = function() unique_ptr(`arrow::Message`, ipc___MessageReader__ReadNextMessage(self)) ) ) @@ -82,7 +82,7 @@ MessageReader.default <- function(stream) { #' @export MessageReader.InputStream <- function(stream) { - unique_ptr(`arrow::ipc::MessageReader`, ipc___MessageReader__Open(stream)) + unique_ptr(`arrow::MessageReader`, ipc___MessageReader__Open(stream)) } #' Read a Message from a stream @@ -101,10 +101,10 @@ read_message.default<- function(stream) { #' @export read_message.InputStream <- function(stream) { - unique_ptr(`arrow::ipc::Message`, ipc___ReadMessage(stream) ) + unique_ptr(`arrow::Message`, ipc___ReadMessage(stream) ) } #' @export -`read_message.arrow::ipc::MessageReader` <- function(stream) { +`read_message.arrow::MessageReader` <- function(stream) { stream$ReadNextMessage() } diff --git a/r/R/read_record_batch.R b/r/R/read_record_batch.R index fddf1415a6a..100452f5c23 100644 --- a/r/R/read_record_batch.R +++ b/r/R/read_record_batch.R @@ -17,7 +17,7 @@ #' read [arrow::RecordBatch][arrow__RecordBatch] as encapsulated IPC message, given a known [arrow::Schema][arrow__Schema] #' -#' @param obj a [arrow::ipc::Message][arrow__ipc__Message], a [arrow::io::InputStream][arrow__io__InputStream], a [Buffer][buffer], or a raw vector +#' @param obj a [arrow::Message][arrow__ipc__Message], a [arrow::io::InputStream][arrow__io__InputStream], a [Buffer][buffer], or a raw vector #' @param schema a [arrow::Schema][arrow__Schema] #' #' @return a [arrow::RecordBatch][arrow__RecordBatch] @@ -28,7 +28,7 @@ read_record_batch <- function(obj, schema){ } #' @export -`read_record_batch.arrow::ipc::Message` <- function(obj, schema) { +`read_record_batch.arrow::Message` <- function(obj, schema) { assert_that(inherits(schema, "arrow::Schema")) shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__Message__Schema(obj, schema)) } diff --git a/r/R/read_table.R b/r/R/read_table.R index 848a2607cfe..d52d639ceba 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -19,19 +19,19 @@ #' #' @param stream stream. #' -#' - a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader]: +#' - a [arrow::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader]: #' read an [arrow::Table][arrow__Table] #' from all the record batches in the reader #' -#' - a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader]: +#' - a [arrow::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader]: #' read an [arrow::Table][arrow__Table] from the remaining record batches #' in the reader #' #' - a string file path: interpret the file as an arrow -#' binary file format, and uses a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] +#' binary file format, and uses a [arrow::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] #' to process it. #' -#' - a raw vector: read using a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] +#' - a raw vector: read using a [arrow::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] #' #' @return #' @@ -40,8 +40,8 @@ #' #' @details #' -#' The methods using [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] and -#' [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] offer the most +#' The methods using [arrow::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] and +#' [arrow::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] offer the most #' flexibility. The other methods are for convenience. #' #' @export @@ -50,12 +50,12 @@ read_table <- function(stream){ } #' @export -`read_table.arrow::ipc::RecordBatchFileReader` <- function(stream) { +`read_table.arrow::RecordBatchFileReader` <- function(stream) { shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(stream)) } #' @export -`read_table.arrow::ipc::RecordBatchStreamReader` <- function(stream) { +`read_table.arrow::RecordBatchStreamReader` <- function(stream) { shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(stream)) } diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index ae9320e1ef6..4b259ae85db 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -32,22 +32,22 @@ to_arrow <- function(x) { #' #' @param stream where to serialize to #' -#' - A [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter]: the `$write()` +#' - A [arrow::RecordBatchWriter][arrow__ipc__RecordBatchWriter]: the `$write()` #' of `x` is used. The stream is left open. This uses the streaming format #' or the binary file format depending on the type of the writer. #' #' - A string file path: `x` is serialized with -#' a [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e. +#' a [arrow::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e. #' using the binary file format. #' #' - A raw vector: typically of length zero (its data is ignored, and only used for #' dispatch). `x` is serialized using the streaming format, i.e. using the -#' [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] +#' [arrow::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] #' #' @param ... extra parameters, currently ignored #' -#' `write_arrow` is a convenience function, the classes [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] -#' and [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] can be used for more flexibility. +#' `write_arrow` is a convenience function, the classes [arrow::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] +#' and [arrow::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] can be used for more flexibility. #' #' @export write_arrow <- function(x, stream, ...) { @@ -55,7 +55,7 @@ write_arrow <- function(x, stream, ...) { } #' @export -`write_arrow.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){ +`write_arrow.arrow::RecordBatchWriter` <- function(x, stream, ...){ stream$write(x) } diff --git a/r/man/FeatherTableReader.Rd b/r/man/FeatherTableReader.Rd index 1e5be02d369..cabf8f65200 100644 --- a/r/man/FeatherTableReader.Rd +++ b/r/man/FeatherTableReader.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/feather.R \name{FeatherTableReader} \alias{FeatherTableReader} -\title{A \code{arrow::ipc::feather::TableReader} to read from a file} +\title{A \code{arrow::feather::TableReader} to read from a file} \usage{ FeatherTableReader(file, mmap = TRUE, ...) } @@ -14,5 +14,5 @@ FeatherTableReader(file, mmap = TRUE, ...) \item{...}{extra parameters} } \description{ -A \code{arrow::ipc::feather::TableReader} to read from a file +A \code{arrow::feather::TableReader} to read from a file } diff --git a/r/man/RecordBatchFileReader.Rd b/r/man/RecordBatchFileReader.Rd index 3ea04817e0e..63dee6d068d 100644 --- a/r/man/RecordBatchFileReader.Rd +++ b/r/man/RecordBatchFileReader.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/RecordBatchReader.R \name{RecordBatchFileReader} \alias{RecordBatchFileReader} -\title{Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} from a file} +\title{Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::RecordBatchFileReader} from a file} \usage{ RecordBatchFileReader(file) } @@ -10,5 +10,5 @@ RecordBatchFileReader(file) \item{file}{The file to read from. A file path, or an \link[=arrow__ipc__RecordBatchFileReader]{arrow::io::RandomAccessFile}} } \description{ -Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} from a file +Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::RecordBatchFileReader} from a file } diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd index d89578f97be..481852345f8 100644 --- a/r/man/RecordBatchFileWriter.Rd +++ b/r/man/RecordBatchFileWriter.Rd @@ -16,7 +16,7 @@ RecordBatchFileWriter(sink, schema) \item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.} } \value{ -an \code{arrow::ipc::RecordBatchWriter} object +an \code{arrow::RecordBatchWriter} object } \description{ Create a record batch file writer from a stream diff --git a/r/man/RecordBatchStreamReader.Rd b/r/man/RecordBatchStreamReader.Rd index 4bd0e8ccdc5..491adb162b2 100644 --- a/r/man/RecordBatchStreamReader.Rd +++ b/r/man/RecordBatchStreamReader.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/RecordBatchReader.R \name{RecordBatchStreamReader} \alias{RecordBatchStreamReader} -\title{Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} from an input stream} +\title{Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::RecordBatchStreamReader} from an input stream} \usage{ RecordBatchStreamReader(stream) } @@ -10,5 +10,5 @@ RecordBatchStreamReader(stream) \item{stream}{input stream, an \link[=arrow__io__InputStream]{arrow::io::InputStream} or a raw vector} } \description{ -Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} from an input stream +Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::RecordBatchStreamReader} from an input stream } diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd index 9d9bbc9ceb0..0f783c1e5db 100644 --- a/r/man/RecordBatchStreamWriter.Rd +++ b/r/man/RecordBatchStreamWriter.Rd @@ -16,7 +16,7 @@ RecordBatchStreamWriter(sink, schema) \item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.} } \value{ -a \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} +a \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} } \description{ Writer for the Arrow streaming binary format diff --git a/r/man/arrow__ipc__Message.Rd b/r/man/arrow__ipc__Message.Rd index d3811f8f4c1..ccff140d559 100644 --- a/r/man/arrow__ipc__Message.Rd +++ b/r/man/arrow__ipc__Message.Rd @@ -3,10 +3,10 @@ \docType{class} \name{arrow__ipc__Message} \alias{arrow__ipc__Message} -\alias{arrow::ipc::Message} -\title{class arrow::ipc::Message} +\alias{arrow::Message} +\title{class arrow::Message} \description{ -class arrow::ipc::Message +class arrow::Message } \section{Methods}{ diff --git a/r/man/arrow__ipc__MessageReader.Rd b/r/man/arrow__ipc__MessageReader.Rd index 883e9e0618b..c3641b56215 100644 --- a/r/man/arrow__ipc__MessageReader.Rd +++ b/r/man/arrow__ipc__MessageReader.Rd @@ -3,10 +3,10 @@ \docType{class} \name{arrow__ipc__MessageReader} \alias{arrow__ipc__MessageReader} -\alias{arrow::ipc::MessageReader} -\title{class arrow::ipc::MessageReader} +\alias{arrow::MessageReader} +\title{class arrow::MessageReader} \description{ -class arrow::ipc::MessageReader +class arrow::MessageReader } \section{Methods}{ diff --git a/r/man/arrow__ipc__RecordBatchFileReader.Rd b/r/man/arrow__ipc__RecordBatchFileReader.Rd index 675f636b365..56eea2645d2 100644 --- a/r/man/arrow__ipc__RecordBatchFileReader.Rd +++ b/r/man/arrow__ipc__RecordBatchFileReader.Rd @@ -3,10 +3,10 @@ \docType{class} \name{arrow__ipc__RecordBatchFileReader} \alias{arrow__ipc__RecordBatchFileReader} -\alias{arrow::ipc::RecordBatchFileReader} -\title{class arrow::ipc::RecordBatchFileReader} +\alias{arrow::RecordBatchFileReader} +\title{class arrow::RecordBatchFileReader} \description{ -class arrow::ipc::RecordBatchFileReader +class arrow::RecordBatchFileReader } \section{Methods}{ diff --git a/r/man/arrow__ipc__RecordBatchFileWriter.Rd b/r/man/arrow__ipc__RecordBatchFileWriter.Rd index a80b55941fb..162a8081f25 100644 --- a/r/man/arrow__ipc__RecordBatchFileWriter.Rd +++ b/r/man/arrow__ipc__RecordBatchFileWriter.Rd @@ -3,12 +3,12 @@ \docType{class} \name{arrow__ipc__RecordBatchFileWriter} \alias{arrow__ipc__RecordBatchFileWriter} -\alias{arrow::ipc::RecordBatchFileWriter} -\title{class arrow::ipc::RecordBatchFileWriter +\alias{arrow::RecordBatchFileWriter} +\title{class arrow::RecordBatchFileWriter Writer for the Arrow binary file format} \description{ -class arrow::ipc::RecordBatchFileWriter +class arrow::RecordBatchFileWriter Writer for the Arrow binary file format } @@ -29,7 +29,7 @@ The \code{\link[=RecordBatchFileWriter]{RecordBatchFileWriter()}} function creat \section{Methods}{ -inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter} +inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::RecordBatchWriter} \itemize{ \item \code{$write_batch(batch)}: Write record batch to stream \item \code{$write_table(table)}: write Table to stream diff --git a/r/man/arrow__ipc__RecordBatchStreamReader.Rd b/r/man/arrow__ipc__RecordBatchStreamReader.Rd index 49f57cce057..397198aeb31 100644 --- a/r/man/arrow__ipc__RecordBatchStreamReader.Rd +++ b/r/man/arrow__ipc__RecordBatchStreamReader.Rd @@ -3,10 +3,10 @@ \docType{class} \name{arrow__ipc__RecordBatchStreamReader} \alias{arrow__ipc__RecordBatchStreamReader} -\alias{arrow::ipc::RecordBatchStreamReader} -\title{class arrow::ipc::RecordBatchStreamReader} +\alias{arrow::RecordBatchStreamReader} +\title{class arrow::RecordBatchStreamReader} \description{ -class arrow::ipc::RecordBatchStreamReader +class arrow::RecordBatchStreamReader } \section{Methods}{ diff --git a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd index 3d2030287d1..79055c7a35a 100644 --- a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd +++ b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd @@ -3,12 +3,12 @@ \docType{class} \name{arrow__ipc__RecordBatchStreamWriter} \alias{arrow__ipc__RecordBatchStreamWriter} -\alias{arrow::ipc::RecordBatchStreamWriter} -\title{class arrow::ipc::RecordBatchStreamWriter +\alias{arrow::RecordBatchStreamWriter} +\title{class arrow::RecordBatchStreamWriter Writer for the Arrow streaming binary format} \description{ -class arrow::ipc::RecordBatchStreamWriter +class arrow::RecordBatchStreamWriter Writer for the Arrow streaming binary format } @@ -29,7 +29,7 @@ The \code{\link[=RecordBatchStreamWriter]{RecordBatchStreamWriter()}} function c \section{Methods}{ -inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter} +inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::RecordBatchWriter} \itemize{ \item \code{$write_batch(batch)}: Write record batch to stream \item \code{$write_table(table)}: write Table to stream diff --git a/r/man/arrow__ipc__RecordBatchWriter.Rd b/r/man/arrow__ipc__RecordBatchWriter.Rd index 08593df8524..fb51cc5bae0 100644 --- a/r/man/arrow__ipc__RecordBatchWriter.Rd +++ b/r/man/arrow__ipc__RecordBatchWriter.Rd @@ -3,10 +3,10 @@ \docType{class} \name{arrow__ipc__RecordBatchWriter} \alias{arrow__ipc__RecordBatchWriter} -\alias{arrow::ipc::RecordBatchWriter} -\title{class arrow::ipc::RecordBatchWriter} +\alias{arrow::RecordBatchWriter} +\title{class arrow::RecordBatchWriter} \description{ -class arrow::ipc::RecordBatchWriter +class arrow::RecordBatchWriter } \section{Methods}{ @@ -20,8 +20,8 @@ class arrow::ipc::RecordBatchWriter \section{Derived classes}{ \itemize{ -\item \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} implements the streaming binary format -\item \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter} implements the binary file format +\item \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} implements the streaming binary format +\item \link[=arrow__ipc__RecordBatchFileWriter]{arrow::RecordBatchFileWriter} implements the binary file format } } diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index 07a54246eac..e89632caac1 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -7,7 +7,7 @@ read_feather(file, col_select = NULL, as_tibble = TRUE, ...) } \arguments{ -\item{file}{an \code{arrow::ipc::feather::TableReader} or whatever the \code{\link[=FeatherTableReader]{FeatherTableReader()}} function can handle} +\item{file}{an \code{arrow::feather::TableReader} or whatever the \code{\link[=FeatherTableReader]{FeatherTableReader()}} function can handle} \item{col_select}{A character vector of column names to keep, as in the "select" argument to \code{data.table::fread()}, or a diff --git a/r/man/read_record_batch.Rd b/r/man/read_record_batch.Rd index 71e555b04af..1514bbdf077 100644 --- a/r/man/read_record_batch.Rd +++ b/r/man/read_record_batch.Rd @@ -7,7 +7,7 @@ read_record_batch(obj, schema) } \arguments{ -\item{obj}{a \link[=arrow__ipc__Message]{arrow::ipc::Message}, a \link[=arrow__io__InputStream]{arrow::io::InputStream}, a \link[=buffer]{Buffer}, or a raw vector} +\item{obj}{a \link[=arrow__ipc__Message]{arrow::Message}, a \link[=arrow__io__InputStream]{arrow::io::InputStream}, a \link[=buffer]{Buffer}, or a raw vector} \item{schema}{a \link[=arrow__Schema]{arrow::Schema}} } diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index e556b8b0773..421139df1b9 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -12,16 +12,16 @@ read_arrow(stream) \arguments{ \item{stream}{stream. \itemize{ -\item a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader}: +\item a \link[=arrow__ipc__RecordBatchFileReader]{arrow::RecordBatchFileReader}: read an \link[=arrow__Table]{arrow::Table} from all the record batches in the reader -\item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader}: +\item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::RecordBatchStreamReader}: read an \link[=arrow__Table]{arrow::Table} from the remaining record batches in the reader \item a string file path: interpret the file as an arrow -binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} +binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::RecordBatchFileReader} to process it. -\item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} +\item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::RecordBatchStreamReader} }} } \value{ @@ -34,7 +34,7 @@ to process it. Read an \link[=arrow__Table]{arrow::Table} from a stream } \details{ -The methods using \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} and -\link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} offer the most +The methods using \link[=arrow__ipc__RecordBatchFileReader]{arrow::RecordBatchFileReader} and +\link[=arrow__ipc__RecordBatchStreamReader]{arrow::RecordBatchStreamReader} offer the most flexibility. The other methods are for convenience. } diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index 9ba65cb18f3..815111e763b 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -11,21 +11,21 @@ write_arrow(x, stream, ...) \item{stream}{where to serialize to \itemize{ -\item A \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}: the \code{$write()} +\item A \link[=arrow__ipc__RecordBatchWriter]{arrow::RecordBatchWriter}: the \code{$write()} of \code{x} is used. The stream is left open. This uses the streaming format or the binary file format depending on the type of the writer. \item A string file path: \code{x} is serialized with -a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter}, i.e. +a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::RecordBatchFileWriter}, i.e. using the binary file format. \item A raw vector: typically of length zero (its data is ignored, and only used for dispatch). \code{x} is serialized using the streaming format, i.e. using the -\link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} +\link[=arrow__ipc__RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} }} \item{...}{extra parameters, currently ignored -\code{write_arrow} is a convenience function, the classes \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter} -and \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} can be used for more flexibility.} +\code{write_arrow} is a convenience function, the classes \link[=arrow__ipc__RecordBatchFileWriter]{arrow::RecordBatchFileWriter} +and \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} can be used for more flexibility.} } \description{ Write Arrow formatted data diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R index 9500fcfa638..0f873c0d800 100644 --- a/r/tests/testthat/test-message.R +++ b/r/tests/testthat/test-message.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::ipc::Message") +context("arrow::Message") test_that("read_message can read from input stream", { batch <- record_batch(x = 1:10) @@ -23,7 +23,7 @@ test_that("read_message can read from input stream", { stream <- BufferReader$create(bytes) message <- read_message(stream) - expect_is(message, "arrow::ipc::Message") + expect_is(message, "arrow::Message") expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") @@ -37,7 +37,7 @@ test_that("read_message() can read Schema messages", { stream <- BufferReader$create(bytes) message <- read_message(stream) - expect_is(message, "arrow::ipc::Message") + expect_is(message, "arrow::Message") expect_equal(message$type, MessageType$SCHEMA) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") diff --git a/r/tests/testthat/test-messagereader.R b/r/tests/testthat/test-messagereader.R index 582d38d6cdd..eda991ad840 100644 --- a/r/tests/testthat/test-messagereader.R +++ b/r/tests/testthat/test-messagereader.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::ipc::MessageReader") +context("arrow::MessageReader") test_that("MessageReader can be created from raw vectors", { batch <- record_batch(x = 1:10) @@ -24,7 +24,7 @@ test_that("MessageReader can be created from raw vectors", { reader <- MessageReader(bytes) message <- reader$ReadNextMessage() - expect_is(message, "arrow::ipc::Message") + expect_is(message, "arrow::Message") expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") @@ -38,7 +38,7 @@ test_that("MessageReader can be created from raw vectors", { reader <- MessageReader(bytes) message <- reader$ReadNextMessage() - expect_is(message, "arrow::ipc::Message") + expect_is(message, "arrow::Message") expect_equal(message$type, MessageType$SCHEMA) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") @@ -55,10 +55,10 @@ test_that("MessageReader can be created from input stream", { expect_is(stream, "BufferReader") reader <- MessageReader(stream) - expect_is(reader, "arrow::ipc::MessageReader") + expect_is(reader, "arrow::MessageReader") message <- reader$ReadNextMessage() - expect_is(message, "arrow::ipc::Message") + expect_is(message, "arrow::Message") expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") @@ -73,10 +73,10 @@ test_that("MessageReader can be created from input stream", { expect_is(stream, "BufferReader") reader <- MessageReader(stream) - expect_is(reader, "arrow::ipc::MessageReader") + expect_is(reader, "arrow::MessageReader") message <- reader$ReadNextMessage() - expect_is(message, "arrow::ipc::Message") + expect_is(message, "arrow::Message") expect_equal(message$type, MessageType$SCHEMA) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read_record_batch.R index ad2c116d866..de0c6944d6c 100644 --- a/r/tests/testthat/test-read_record_batch.R +++ b/r/tests/testthat/test-read_record_batch.R @@ -27,7 +27,7 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { tf <- tempfile() writer <- RecordBatchFileWriter(tf, tab$schema) - expect_is(writer, "arrow::ipc::RecordBatchFileWriter") + expect_is(writer, "arrow::RecordBatchFileWriter") writer$write_table(tab) writer$close() tab2 <- read_table(tf) @@ -35,7 +35,7 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { stream <- FileOutputStream$create(tf) writer <- RecordBatchFileWriter(stream, tab$schema) - expect_is(writer, "arrow::ipc::RecordBatchFileWriter") + expect_is(writer, "arrow::RecordBatchFileWriter") writer$write_table(tab) writer$close() tab3 <- read_table(tf) diff --git a/r/tests/testthat/test-recordbatchreader.R b/r/tests/testthat/test-recordbatchreader.R index 1e5adce0902..5d74a7846ed 100644 --- a/r/tests/testthat/test-recordbatchreader.R +++ b/r/tests/testthat/test-recordbatchreader.R @@ -25,7 +25,7 @@ test_that("RecordBatchStreamReader / Writer", { sink <- BufferOutputStream$create() writer <- RecordBatchStreamWriter(sink, batch$schema) - expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") + expect_is(writer, "arrow::RecordBatchStreamWriter") writer$write_batch(batch) writer$close() @@ -33,7 +33,7 @@ test_that("RecordBatchStreamReader / Writer", { expect_is(buf, "Buffer") reader <- RecordBatchStreamReader(buf) - expect_is(reader, "arrow::ipc::RecordBatchStreamReader") + expect_is(reader, "arrow::RecordBatchStreamReader") batch1 <- reader$read_next_batch() expect_is(batch1, "arrow::RecordBatch") @@ -50,7 +50,7 @@ test_that("RecordBatchFileReader / Writer", { sink <- BufferOutputStream$create() writer <- RecordBatchFileWriter(sink, batch$schema) - expect_is(writer, "arrow::ipc::RecordBatchFileWriter") + expect_is(writer, "arrow::RecordBatchFileWriter") writer$write_batch(batch) writer$close() @@ -58,7 +58,7 @@ test_that("RecordBatchFileReader / Writer", { expect_is(buf, "Buffer") reader <- RecordBatchFileReader(buf) - expect_is(reader, "arrow::ipc::RecordBatchFileReader") + expect_is(reader, "arrow::RecordBatchFileReader") batch1 <- reader$get_batch(0L) expect_is(batch1, "arrow::RecordBatch") diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index fb16aeceffd..c186b5f4ef2 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -32,23 +32,23 @@ test_that("reading schema from Buffer", { stream <- BufferOutputStream$create() writer <- RecordBatchStreamWriter(stream, batch$schema) - expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") + expect_is(writer, "arrow::RecordBatchStreamWriter") writer$close() buffer <- stream$getvalue() expect_is(buffer, "Buffer") reader <- MessageReader(buffer) - expect_is(reader, "arrow::ipc::MessageReader") + expect_is(reader, "arrow::MessageReader") message <- reader$ReadNextMessage() - expect_is(message, "arrow::ipc::Message") + expect_is(message, "arrow::Message") expect_equal(message$type, MessageType$SCHEMA) stream <- BufferReader$create(buffer) expect_is(stream, "BufferReader") message <- read_message(stream) - expect_is(message, "arrow::ipc::Message") + expect_is(message, "arrow::Message") expect_equal(message$type, MessageType$SCHEMA) }) From 365fedc4f411d67b7235a1b392af604c812a7496 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 13:55:21 -0700 Subject: [PATCH 15/37] feather --- r/NAMESPACE | 14 --- r/R/feather.R | 171 +++++++++-------------------- r/man/FeatherTableReader.Rd | 18 --- r/man/FeatherTableWriter.Rd | 14 --- r/man/read_feather.Rd | 2 +- r/man/write_feather_RecordBatch.Rd | 17 --- 6 files changed, 55 insertions(+), 181 deletions(-) delete mode 100644 r/man/FeatherTableReader.Rd delete mode 100644 r/man/FeatherTableWriter.Rd delete mode 100644 r/man/write_feather_RecordBatch.Rd diff --git a/r/NAMESPACE b/r/NAMESPACE index ef2af783b48..a89f1bd51e7 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -7,11 +7,6 @@ S3method("==","arrow::Message") S3method("==","arrow::RecordBatch") S3method("==","arrow::Schema") S3method("==",Array) -S3method(FeatherTableReader,"arrow::feather::TableReader") -S3method(FeatherTableReader,RandomAccessFile) -S3method(FeatherTableReader,character) -S3method(FeatherTableReader,raw) -S3method(FeatherTableWriter,OutputStream) S3method(MessageReader,InputStream) S3method(MessageReader,default) S3method(RecordBatchFileReader,Buffer) @@ -58,17 +53,9 @@ S3method(type,default) S3method(write_arrow,"arrow::RecordBatchWriter") S3method(write_arrow,character) S3method(write_arrow,raw) -S3method(write_feather,"arrow::RecordBatch") -S3method(write_feather,data.frame) -S3method(write_feather,default) -S3method(write_feather_RecordBatch,OutputStream) -S3method(write_feather_RecordBatch,character) -S3method(write_feather_RecordBatch,default) export(Array) export(CompressionType) export(DateUnit) -export(FeatherTableReader) -export(FeatherTableWriter) export(FileMode) export(MessageReader) export(MessageType) @@ -152,7 +139,6 @@ export(uint8) export(utf8) export(write_arrow) export(write_feather) -export(write_feather_RecordBatch) export(write_parquet) importFrom(R6,R6Class) importFrom(Rcpp,sourceCpp) diff --git a/r/R/feather.R b/r/R/feather.R index 69f010db1e0..d538c3e297b 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -15,46 +15,6 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R - -`arrow::feather::TableWriter` <- R6Class("arrow::feather::TableWriter", inherit = Object, - public = list( - SetDescription = function(description) ipc___feather___TableWriter__SetDescription(self, description), - SetNumRows = function(num_rows) ipc___feather___TableWriter__SetNumRows(self, num_rows), - Append = function(name, values) ipc___feather___TableWriter__Append(self, name, values), - Finalize = function() ipc___feather___TableWriter__Finalize(self) - ) -) - -`arrow::feather::TableReader` <- R6Class("arrow::feather::TableReader", inherit = Object, - public = list( - GetDescription = function() ipc___feather___TableReader__GetDescription(self), - HasDescription = function() ipc__feather___TableReader__HasDescription(self), - version = function() ipc___feather___TableReader__version(self), - num_rows = function() ipc___feather___TableReader__num_rows(self), - num_columns = function() ipc___feather___TableReader__num_columns(self), - GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i), - GetColumn = function(i) shared_ptr(Array, ipc___feather___TableReader__GetColumn(self, i)), - Read = function(columns) { - shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns)) - } - ) -) - -#' Create `TableWriter` that writes into a stream -#' -#' @param stream an `OutputStream` -#' -#' @export -FeatherTableWriter <- function(stream) { - UseMethod("FeatherTableWriter") -} - -#' @export -FeatherTableWriter.OutputStream <- function(stream){ - unique_ptr(`arrow::feather::TableWriter`, ipc___feather___TableWriter__Open(stream)) -} - #' Write data in the Feather format #' #' @param data `data.frame` or `arrow::RecordBatch` @@ -69,93 +29,40 @@ FeatherTableWriter.OutputStream <- function(stream){ #' write_feather(mtcars, tf) #' }) #' } +#' @include arrow-package.R write_feather <- function(data, stream) { - UseMethod("write_feather", data) -} - -#' @export -write_feather.default <- function(data, stream) { - stop("unsupported") -} - -#' @export -write_feather.data.frame <- function(data, stream) { - write_feather(record_batch(data), stream) -} - -#' @method write_feather arrow::RecordBatch -#' @export -`write_feather.arrow::RecordBatch` <- function(data, stream) { - write_feather_RecordBatch(data, stream) -} - -#' Write a record batch in the feather format -#' -#' @param data `data.frame` or `arrow::RecordBatch` -#' @param stream A file path or an OutputStream -#' -#' @export -#' @keywords internal -write_feather_RecordBatch <- function(data, stream) { - UseMethod("write_feather_RecordBatch", stream) -} - -#' @export -`write_feather_RecordBatch.default` <- function(data, stream) { - stop("unsupported") -} - -#' @export -write_feather_RecordBatch.character <- function(data, stream) { - file_stream <- FileOutputStream$create(stream) - on.exit(file_stream$close()) - write_feather_RecordBatch.OutputStream(data, file_stream) -} - -#' @export -write_feather_RecordBatch.OutputStream <- function(data, stream) { - ipc___TableWriter__RecordBatch__WriteFeather(FeatherTableWriter(stream), data) -} - -#' A `arrow::feather::TableReader` to read from a file -#' -#' @param file A file path or RandomAccessFile -#' @param mmap Is the file memory mapped (applicable to the `character` method) -#' @param ... extra parameters -#' -#' @export -FeatherTableReader <- function(file, mmap = TRUE, ...){ - UseMethod("FeatherTableReader") -} + if (is.data.frame(data)) { + data <- record_batch(data) + } + assert_that(inherits(data, "arrow::RecordBatch")) -#' @export -FeatherTableReader.character <- function(file, mmap = TRUE, ...) { - if (isTRUE(mmap)) { - stream <- mmap_open(file, ...) - } else { - stream <- ReadableFile$create(file, ...) + if (is.character(stream)) { + stream <- FileOutputStream$create(stream) + on.exit(stream$close()) } - FeatherTableReader(stream) -} + assert_that(inherits(stream, "OutputStream")) -#' @export -FeatherTableReader.raw <- function(file, mmap = TRUE, ...) { - FeatherTableReader(BufferReader$create(file), mmap = mmap, ...) + writer <- FeatherTableWriter$create(stream) + ipc___TableWriter__RecordBatch__WriteFeather(writer, data) } -#' @export -FeatherTableReader.RandomAccessFile <- function(file, mmap = TRUE, ...){ - unique_ptr(`arrow::feather::TableReader`, ipc___feather___TableReader__Open(file)) -} +FeatherTableWriter <- R6Class("FeatherTableWriter", inherit = Object, + public = list( + SetDescription = function(description) ipc___feather___TableWriter__SetDescription(self, description), + SetNumRows = function(num_rows) ipc___feather___TableWriter__SetNumRows(self, num_rows), + Append = function(name, values) ipc___feather___TableWriter__Append(self, name, values), + Finalize = function() ipc___feather___TableWriter__Finalize(self) + ) +) -#' @export -`FeatherTableReader.arrow::feather::TableReader` <- function(file, mmap = TRUE, ...){ - file +FeatherTableWriter$create <- function(stream) { + assert_that(inherits(stream, "OutputStream")) + unique_ptr(FeatherTableWriter, ipc___feather___TableWriter__Open(stream)) } #' Read a Feather file #' -#' @param file an `arrow::feather::TableReader` or whatever the [FeatherTableReader()] function can handle +#' @param file an FeatherTableReader or whatever the [FeatherTableReader()] function can handle #' @inheritParams read_delim_arrow #' @param ... additional parameters #' @@ -175,7 +82,7 @@ FeatherTableReader.RandomAccessFile <- function(file, mmap = TRUE, ...){ #' }) #' } read_feather <- function(file, col_select = NULL, as_tibble = TRUE, ...){ - reader <- FeatherTableReader(file, ...) + reader <- FeatherTableReader$create(file, ...) all_columns <- ipc___feather___TableReader__column_names(reader) col_select <- enquo(col_select) @@ -189,3 +96,33 @@ read_feather <- function(file, col_select = NULL, as_tibble = TRUE, ...){ } out } + +FeatherTableReader <- R6Class("FeatherTableReader", inherit = Object, + public = list( + GetDescription = function() ipc___feather___TableReader__GetDescription(self), + HasDescription = function() ipc__feather___TableReader__HasDescription(self), + version = function() ipc___feather___TableReader__version(self), + num_rows = function() ipc___feather___TableReader__num_rows(self), + num_columns = function() ipc___feather___TableReader__num_columns(self), + GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i), + GetColumn = function(i) shared_ptr(Array, ipc___feather___TableReader__GetColumn(self, i)), + Read = function(columns) { + shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns)) + } + ) +) + +FeatherTableReader$create <- function(stream, mmap = TRUE, ...) { + if (is.character(stream)) { + if (isTRUE(mmap)) { + stream <- mmap_open(stream, ...) + } else { + stream <- ReadableFile$create(stream, ...) + } + } else if (is.raw(stream)) { + stream <- BufferReader$create(stream) + } + + assert_that(inherits(stream, "InputStream")) + unique_ptr(FeatherTableReader, ipc___feather___TableReader__Open(stream)) +} diff --git a/r/man/FeatherTableReader.Rd b/r/man/FeatherTableReader.Rd deleted file mode 100644 index cabf8f65200..00000000000 --- a/r/man/FeatherTableReader.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/feather.R -\name{FeatherTableReader} -\alias{FeatherTableReader} -\title{A \code{arrow::feather::TableReader} to read from a file} -\usage{ -FeatherTableReader(file, mmap = TRUE, ...) -} -\arguments{ -\item{file}{A file path or RandomAccessFile} - -\item{mmap}{Is the file memory mapped (applicable to the \code{character} method)} - -\item{...}{extra parameters} -} -\description{ -A \code{arrow::feather::TableReader} to read from a file -} diff --git a/r/man/FeatherTableWriter.Rd b/r/man/FeatherTableWriter.Rd deleted file mode 100644 index 0db0a884fb4..00000000000 --- a/r/man/FeatherTableWriter.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/feather.R -\name{FeatherTableWriter} -\alias{FeatherTableWriter} -\title{Create \code{TableWriter} that writes into a stream} -\usage{ -FeatherTableWriter(stream) -} -\arguments{ -\item{stream}{an \code{OutputStream}} -} -\description{ -Create \code{TableWriter} that writes into a stream -} diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index e89632caac1..3662ae770b4 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -7,7 +7,7 @@ read_feather(file, col_select = NULL, as_tibble = TRUE, ...) } \arguments{ -\item{file}{an \code{arrow::feather::TableReader} or whatever the \code{\link[=FeatherTableReader]{FeatherTableReader()}} function can handle} +\item{file}{an FeatherTableReader or whatever the \code{\link[=FeatherTableReader]{FeatherTableReader()}} function can handle} \item{col_select}{A character vector of column names to keep, as in the "select" argument to \code{data.table::fread()}, or a diff --git a/r/man/write_feather_RecordBatch.Rd b/r/man/write_feather_RecordBatch.Rd deleted file mode 100644 index 998e57a93fe..00000000000 --- a/r/man/write_feather_RecordBatch.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/feather.R -\name{write_feather_RecordBatch} -\alias{write_feather_RecordBatch} -\title{Write a record batch in the feather format} -\usage{ -write_feather_RecordBatch(data, stream) -} -\arguments{ -\item{data}{\code{data.frame} or \code{arrow::RecordBatch}} - -\item{stream}{A file path or an OutputStream} -} -\description{ -Write a record batch in the feather format -} -\keyword{internal} From 702a0b162f182da70f53981ee4fcda0b15494b16 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 14:04:37 -0700 Subject: [PATCH 16/37] Message --- r/NAMESPACE | 6 ++--- r/R/enums.R | 2 +- r/R/message.R | 37 ++++++++++----------------- r/R/read_record_batch.R | 4 +-- r/man/MessageReader.Rd | 14 ---------- r/man/arrow__ipc__Message.Rd | 2 +- r/man/arrow__ipc__MessageReader.Rd | 2 +- r/tests/testthat/test-message.R | 6 ++--- r/tests/testthat/test-messagereader.R | 22 ++++++++-------- r/tests/testthat/test-schema.R | 8 +++--- 10 files changed, 38 insertions(+), 65 deletions(-) delete mode 100644 r/man/MessageReader.Rd diff --git a/r/NAMESPACE b/r/NAMESPACE index a89f1bd51e7..ab6a3d49bd8 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -7,8 +7,6 @@ S3method("==","arrow::Message") S3method("==","arrow::RecordBatch") S3method("==","arrow::Schema") S3method("==",Array) -S3method(MessageReader,InputStream) -S3method(MessageReader,default) S3method(RecordBatchFileReader,Buffer) S3method(RecordBatchFileReader,RandomAccessFile) S3method(RecordBatchFileReader,character) @@ -31,12 +29,12 @@ S3method(parquet_file_reader,RandomAccessFile) S3method(parquet_file_reader,character) S3method(parquet_file_reader,raw) S3method(print,"arrow-enum") -S3method(read_message,"arrow::MessageReader") S3method(read_message,InputStream) +S3method(read_message,MessageReader) S3method(read_message,default) -S3method(read_record_batch,"arrow::Message") S3method(read_record_batch,Buffer) S3method(read_record_batch,InputStream) +S3method(read_record_batch,Message) S3method(read_record_batch,raw) S3method(read_schema,"arrow::Message") S3method(read_schema,Buffer) diff --git a/r/R/enums.R b/r/R/enums.R index 9db2dc333e9..21ea3eaf109 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -69,7 +69,7 @@ FileMode <- enum("FileMode", #' @rdname enums #' @export -MessageType <- enum("arrow::Message::Type", +MessageType <- enum("Message::Type", NONE = 0L, SCHEMA = 1L, DICTIONARY_BATCH = 2L, RECORD_BATCH = 3L, TENSOR = 4L ) diff --git a/r/R/message.R b/r/R/message.R index 8583133f397..21373f9c812 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -29,10 +29,10 @@ #' #' @rdname arrow__ipc__Message #' @name arrow__ipc__Message -`arrow::Message` <- R6Class("arrow::Message", inherit = Object, +Message <- R6Class("Message", inherit = Object, public = list( Equals = function(other){ - assert_that(inherits(other, "arrow::Message")) + assert_that(inherits(other, "Message")) ipc___Message__Equals(self, other) }, body_length = function() ipc___Message__body_length(self), @@ -60,29 +60,18 @@ #' #' @rdname arrow__ipc__MessageReader #' @name arrow__ipc__MessageReader -`arrow::MessageReader` <- R6Class("arrow::MessageReader", inherit = Object, +#' @export +MessageReader <- R6Class("MessageReader", inherit = Object, public = list( - ReadNextMessage = function() unique_ptr(`arrow::Message`, ipc___MessageReader__ReadNextMessage(self)) + ReadNextMessage = function() unique_ptr(Message, ipc___MessageReader__ReadNextMessage(self)) ) ) -#' Open a MessageReader that reads from a stream -#' -#' @param stream an InputStream -#' -#' @export -MessageReader <- function(stream) { - UseMethod("MessageReader") -} - -#' @export -MessageReader.default <- function(stream) { - MessageReader(BufferReader$create(stream)) -} - -#' @export -MessageReader.InputStream <- function(stream) { - unique_ptr(`arrow::MessageReader`, ipc___MessageReader__Open(stream)) +MessageReader$create <- function(stream) { + if (!inherits(stream, "InputStream")) { + stream <- BufferReader$create(stream) + } + unique_ptr(MessageReader, ipc___MessageReader__Open(stream)) } #' Read a Message from a stream @@ -95,16 +84,16 @@ read_message <- function(stream) { } #' @export -read_message.default<- function(stream) { +read_message.default <- function(stream) { read_message(BufferReader$create(stream)) } #' @export read_message.InputStream <- function(stream) { - unique_ptr(`arrow::Message`, ipc___ReadMessage(stream) ) + unique_ptr(Message, ipc___ReadMessage(stream) ) } #' @export -`read_message.arrow::MessageReader` <- function(stream) { +read_message.MessageReader <- function(stream) { stream$ReadNextMessage() } diff --git a/r/R/read_record_batch.R b/r/R/read_record_batch.R index 100452f5c23..e01a9aee209 100644 --- a/r/R/read_record_batch.R +++ b/r/R/read_record_batch.R @@ -28,7 +28,7 @@ read_record_batch <- function(obj, schema){ } #' @export -`read_record_batch.arrow::Message` <- function(obj, schema) { +read_record_batch.Message <- function(obj, schema) { assert_that(inherits(schema, "arrow::Schema")) shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__Message__Schema(obj, schema)) } @@ -47,7 +47,7 @@ read_record_batch.raw <- function(obj, schema){ } #' @export -`read_record_batch.Buffer` <- function(obj, schema){ +read_record_batch.Buffer <- function(obj, schema){ stream <- BufferReader$create(obj) on.exit(stream$close()) read_record_batch(stream, schema) diff --git a/r/man/MessageReader.Rd b/r/man/MessageReader.Rd deleted file mode 100644 index 01589f5d078..00000000000 --- a/r/man/MessageReader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/message.R -\name{MessageReader} -\alias{MessageReader} -\title{Open a MessageReader that reads from a stream} -\usage{ -MessageReader(stream) -} -\arguments{ -\item{stream}{an InputStream} -} -\description{ -Open a MessageReader that reads from a stream -} diff --git a/r/man/arrow__ipc__Message.Rd b/r/man/arrow__ipc__Message.Rd index ccff140d559..86b0539019c 100644 --- a/r/man/arrow__ipc__Message.Rd +++ b/r/man/arrow__ipc__Message.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__ipc__Message} \alias{arrow__ipc__Message} -\alias{arrow::Message} +\alias{Message} \title{class arrow::Message} \description{ class arrow::Message diff --git a/r/man/arrow__ipc__MessageReader.Rd b/r/man/arrow__ipc__MessageReader.Rd index c3641b56215..7417033444e 100644 --- a/r/man/arrow__ipc__MessageReader.Rd +++ b/r/man/arrow__ipc__MessageReader.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__ipc__MessageReader} \alias{arrow__ipc__MessageReader} -\alias{arrow::MessageReader} +\alias{MessageReader} \title{class arrow::MessageReader} \description{ class arrow::MessageReader diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R index 0f873c0d800..c6cd9fe4b09 100644 --- a/r/tests/testthat/test-message.R +++ b/r/tests/testthat/test-message.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Message") +context("Message") test_that("read_message can read from input stream", { batch <- record_batch(x = 1:10) @@ -23,7 +23,7 @@ test_that("read_message can read from input stream", { stream <- BufferReader$create(bytes) message <- read_message(stream) - expect_is(message, "arrow::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") @@ -37,7 +37,7 @@ test_that("read_message() can read Schema messages", { stream <- BufferReader$create(bytes) message <- read_message(stream) - expect_is(message, "arrow::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$SCHEMA) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") diff --git a/r/tests/testthat/test-messagereader.R b/r/tests/testthat/test-messagereader.R index eda991ad840..0bd6d66c544 100644 --- a/r/tests/testthat/test-messagereader.R +++ b/r/tests/testthat/test-messagereader.R @@ -15,16 +15,16 @@ # specific language governing permissions and limitations # under the License. -context("arrow::MessageReader") +context("MessageReader") test_that("MessageReader can be created from raw vectors", { batch <- record_batch(x = 1:10) bytes <- batch$serialize() - reader <- MessageReader(bytes) + reader <- MessageReader$create(bytes) message <- reader$ReadNextMessage() - expect_is(message, "arrow::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") @@ -35,10 +35,10 @@ test_that("MessageReader can be created from raw vectors", { schema <- schema(x = int32()) bytes <- schema$serialize() - reader <- MessageReader(bytes) + reader <- MessageReader$create(bytes) message <- reader$ReadNextMessage() - expect_is(message, "arrow::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$SCHEMA) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") @@ -54,11 +54,11 @@ test_that("MessageReader can be created from input stream", { stream <- BufferReader$create(bytes) expect_is(stream, "BufferReader") - reader <- MessageReader(stream) - expect_is(reader, "arrow::MessageReader") + reader <- MessageReader$create(stream) + expect_is(reader, "MessageReader") message <- reader$ReadNextMessage() - expect_is(message, "arrow::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") @@ -72,11 +72,11 @@ test_that("MessageReader can be created from input stream", { stream <- BufferReader$create(bytes) expect_is(stream, "BufferReader") - reader <- MessageReader(stream) - expect_is(reader, "arrow::MessageReader") + reader <- MessageReader$create(stream) + expect_is(reader, "MessageReader") message <- reader$ReadNextMessage() - expect_is(message, "arrow::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$SCHEMA) expect_is(message$body, "Buffer") expect_is(message$metadata, "Buffer") diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index c186b5f4ef2..6656da2cddc 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -38,17 +38,17 @@ test_that("reading schema from Buffer", { buffer <- stream$getvalue() expect_is(buffer, "Buffer") - reader <- MessageReader(buffer) - expect_is(reader, "arrow::MessageReader") + reader <- MessageReader$create(buffer) + expect_is(reader, "MessageReader") message <- reader$ReadNextMessage() - expect_is(message, "arrow::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$SCHEMA) stream <- BufferReader$create(buffer) expect_is(stream, "BufferReader") message <- read_message(stream) - expect_is(message, "arrow::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$SCHEMA) }) From 730313e3a72888599593c728dc57aa17c3a53cc1 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 14:32:59 -0700 Subject: [PATCH 17/37] One more find/replace, esp. RecordBatch* --- r/NAMESPACE | 45 ++--- r/R/Column.R | 4 +- r/R/Field.R | 16 +- r/R/List.R | 10 +- r/R/RecordBatch.R | 44 ++--- r/R/RecordBatchReader.R | 88 +++------- r/R/RecordBatchWriter.R | 76 +++----- r/R/Schema.R | 18 +- r/R/Struct.R | 8 +- r/R/Table.R | 20 +-- r/R/array-data.R | 2 +- r/R/array.R | 6 +- r/R/chunked-array.R | 4 +- r/R/csv.R | 4 +- r/R/dictionary.R | 10 +- r/R/enums.R | 10 +- r/R/feather.R | 11 +- r/R/json.R | 4 +- r/R/memory_pool.R | 4 +- r/R/message.R | 2 +- r/R/parquet.R | 8 +- r/R/read_record_batch.R | 8 +- r/R/read_table.R | 16 +- r/R/type.R | 172 +++++++++---------- r/R/write_arrow.R | 12 +- r/README.md | 4 +- r/man/RecordBatchFileReader.Rd | 14 -- r/man/RecordBatchFileWriter.Rd | 23 --- r/man/RecordBatchStreamReader.Rd | 14 -- r/man/RecordBatchStreamWriter.Rd | 23 --- r/man/arrow__Column.Rd | 2 +- r/man/arrow__DataType.Rd | 2 +- r/man/arrow__Field.Rd | 2 +- r/man/arrow__FixedWidthType.Rd | 2 +- r/man/arrow__RecordBatch.Rd | 2 +- r/man/arrow__RecordBatchReader.Rd | 2 +- r/man/arrow__Schema.Rd | 2 +- r/man/arrow__Table.Rd | 2 +- r/man/arrow___MemoryPool.Rd | 2 +- r/man/arrow__ipc__RecordBatchFileReader.Rd | 2 +- r/man/arrow__ipc__RecordBatchFileWriter.Rd | 4 +- r/man/arrow__ipc__RecordBatchStreamReader.Rd | 2 +- r/man/arrow__ipc__RecordBatchStreamWriter.Rd | 4 +- r/man/arrow__ipc__RecordBatchWriter.Rd | 2 +- r/man/data-type.Rd | 2 +- r/man/enums.Rd | 2 +- r/man/field.Rd | 6 +- r/man/read_delim_arrow.Rd | 2 +- r/man/read_feather.Rd | 3 +- r/man/read_json_arrow.Rd | 2 +- r/man/write_feather.Rd | 2 +- r/src/datatype.cpp | 4 +- r/src/recordbatch.cpp | 2 +- r/src/table.cpp | 6 +- r/tests/testthat/test-Array.R | 2 +- r/tests/testthat/test-RecordBatch.R | 2 +- r/tests/testthat/test-Table.R | 10 +- r/tests/testthat/test-arraydata.R | 2 +- r/tests/testthat/test-data-type.R | 2 +- r/tests/testthat/test-feather.R | 2 +- r/tests/testthat/test-field.R | 2 +- r/tests/testthat/test-read-write.R | 4 +- r/tests/testthat/test-read_record_batch.R | 8 +- r/tests/testthat/test-recordbatchreader.R | 22 +-- r/tests/testthat/test-schema.R | 8 +- 65 files changed, 327 insertions(+), 480 deletions(-) delete mode 100644 r/man/RecordBatchFileReader.Rd delete mode 100644 r/man/RecordBatchFileWriter.Rd delete mode 100644 r/man/RecordBatchStreamReader.Rd delete mode 100644 r/man/RecordBatchStreamWriter.Rd diff --git a/r/NAMESPACE b/r/NAMESPACE index ab6a3d49bd8..a7058304f20 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -1,30 +1,19 @@ # Generated by roxygen2: do not edit by hand S3method("!=",Object) -S3method("==","arrow::DataType") -S3method("==","arrow::Field") -S3method("==","arrow::Message") -S3method("==","arrow::RecordBatch") -S3method("==","arrow::Schema") S3method("==",Array) -S3method(RecordBatchFileReader,Buffer) -S3method(RecordBatchFileReader,RandomAccessFile) -S3method(RecordBatchFileReader,character) -S3method(RecordBatchFileReader,raw) -S3method(RecordBatchFileWriter,OutputStream) -S3method(RecordBatchFileWriter,character) -S3method(RecordBatchStreamReader,Buffer) -S3method(RecordBatchStreamReader,InputStream) -S3method(RecordBatchStreamReader,raw) -S3method(RecordBatchStreamWriter,OutputStream) -S3method(RecordBatchStreamWriter,character) -S3method(as.data.frame,"arrow::RecordBatch") -S3method(as.data.frame,"arrow::Table") +S3method("==",DataType) +S3method("==",Field) +S3method("==",Message) +S3method("==",RecordBatch) +S3method("==",Schema) +S3method(as.data.frame,RecordBatch) +S3method(as.data.frame,Table) S3method(as.raw,Buffer) -S3method(dim,"arrow::RecordBatch") -S3method(dim,"arrow::Table") +S3method(dim,RecordBatch) +S3method(dim,Table) S3method(length,Array) -S3method(names,"arrow::RecordBatch") +S3method(names,RecordBatch) S3method(parquet_file_reader,RandomAccessFile) S3method(parquet_file_reader,character) S3method(parquet_file_reader,raw) @@ -36,19 +25,19 @@ S3method(read_record_batch,Buffer) S3method(read_record_batch,InputStream) S3method(read_record_batch,Message) S3method(read_record_batch,raw) -S3method(read_schema,"arrow::Message") S3method(read_schema,Buffer) S3method(read_schema,InputStream) +S3method(read_schema,Message) S3method(read_schema,raw) -S3method(read_table,"arrow::RecordBatchFileReader") -S3method(read_table,"arrow::RecordBatchStreamReader") +S3method(read_table,RecordBatchFileReader) +S3method(read_table,RecordBatchStreamReader) S3method(read_table,character) S3method(read_table,raw) -S3method(type,"arrow::Column") S3method(type,Array) S3method(type,ChunkedArray) +S3method(type,Column) S3method(type,default) -S3method(write_arrow,"arrow::RecordBatchWriter") +S3method(write_arrow,RecordBatchWriter) S3method(write_arrow,character) S3method(write_arrow,raw) export(Array) @@ -57,10 +46,6 @@ export(DateUnit) export(FileMode) export(MessageReader) export(MessageType) -export(RecordBatchFileReader) -export(RecordBatchFileWriter) -export(RecordBatchStreamReader) -export(RecordBatchStreamWriter) export(StatusCode) export(TimeUnit) export(Type) diff --git a/r/R/Column.R b/r/R/Column.R index c0babd934fd..f6c49945eec 100644 --- a/r/R/Column.R +++ b/r/R/Column.R @@ -29,7 +29,7 @@ #' #' @rdname arrow__Column #' @name arrow__Column -`arrow::Column` <- R6Class("arrow::Column", inherit = Object, +Column <- R6Class("Column", inherit = Object, public = list( length = function() Column__length(self), data = function() shared_ptr(ChunkedArray, Column__data(self)) @@ -37,6 +37,6 @@ active = list( null_count = function() Column__null_count(self), - type = function() `arrow::DataType`$dispatch(Column__type(self)) + type = function() DataType$dispatch(Column__type(self)) ) ) diff --git a/r/R/Field.R b/r/R/Field.R index 56c8942f23e..3f5ff1f9cfe 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -29,13 +29,13 @@ #' #' @rdname arrow__Field #' @name arrow__Field -`arrow::Field` <- R6Class("arrow::Field", inherit = Object, +Field <- R6Class("Field", inherit = Object, public = list( ToString = function() { Field__ToString(self) }, Equals = function(other) { - inherits(other, "arrow::Field") && Field__Equals(self, other) + inherits(other, "Field") && Field__Equals(self, other) } ), @@ -47,20 +47,20 @@ Field__nullable(self) }, type = function() { - `arrow::DataType`$dispatch(Field__type(self)) + DataType$dispatch(Field__type(self)) } ) ) #' @export -`==.arrow::Field` <- function(lhs, rhs){ +`==.Field` <- function(lhs, rhs){ lhs$Equals(rhs) } -#' Factory for a `arrow::Field` +#' Factory for a Field #' #' @param name field name -#' @param type logical type, instance of `arrow::DataType` +#' @param type logical type, instance of DataType #' @param metadata currently ignored #' #' @examples @@ -72,7 +72,7 @@ #' @export field <- function(name, type, metadata) { assert_that(inherits(name, "character"), length(name) == 1L) - if (!inherits(type, "arrow::DataType")) { + if (!inherits(type, "DataType")) { if (identical(type, double())) { # Magic so that we don't have to mask this base function type <- float64() @@ -81,7 +81,7 @@ field <- function(name, type, metadata) { } } assert_that(missing(metadata), msg = "metadata= is currently ignored") - shared_ptr(`arrow::Field`, Field__initialize(name, type, TRUE)) + shared_ptr(Field, Field__initialize(name, type, TRUE)) } .fields <- function(.list){ diff --git a/r/R/List.R b/r/R/List.R index a970fb895a9..0d50ff2a19d 100644 --- a/r/R/List.R +++ b/r/R/List.R @@ -17,14 +17,14 @@ #' @include type.R -`arrow::ListType` <- R6Class("arrow::ListType", - inherit = `arrow::NestedType`, +ListType <- R6Class("ListType", + inherit = NestedType, active = list( - value_field = function() shared_ptr(`arrow::Field`, ListType__value_field(self)), - value_type = function() `arrow::DataType`$dispatch(ListType__value_type(self)) + value_field = function() shared_ptr(Field, ListType__value_field(self)), + value_type = function() DataType$dispatch(ListType__value_type(self)) ) ) #' @rdname data-type #' @export -list_of <- function(type) shared_ptr(`arrow::ListType`, list__(type)) +list_of <- function(type) shared_ptr(ListType, list__(type)) diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index abc774842b3..906b51fe8ae 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -29,63 +29,73 @@ #' #' @rdname arrow__RecordBatch #' @name arrow__RecordBatch -`arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = Object, +RecordBatch <- R6Class("RecordBatch", inherit = Object, public = list( column = function(i) shared_ptr(Array, RecordBatch__column(self, i)), column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), Equals = function(other) { - assert_that(inherits(other, "arrow::RecordBatch")) + assert_that(inherits(other, "RecordBatch")) RecordBatch__Equals(self, other) }, RemoveColumn = function(i){ - shared_ptr(`arrow::RecordBatch`, RecordBatch__RemoveColumn(self, i)) + shared_ptr(RecordBatch, RecordBatch__RemoveColumn(self, i)) }, Slice = function(offset, length = NULL) { if (is.null(length)) { - shared_ptr(`arrow::RecordBatch`, RecordBatch__Slice1(self, offset)) + shared_ptr(RecordBatch, RecordBatch__Slice1(self, offset)) } else { - shared_ptr(`arrow::RecordBatch`, RecordBatch__Slice2(self, offset, length)) + shared_ptr(RecordBatch, RecordBatch__Slice2(self, offset, length)) } }, serialize = function() ipc___SerializeRecordBatch__Raw(self), cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_schema, "arrow::Schema")) + assert_that(inherits(target_schema, "Schema")) assert_that(inherits(options, "CastOptions")) assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") - shared_ptr(`arrow::RecordBatch`, RecordBatch__cast(self, target_schema, options)) + shared_ptr(RecordBatch, RecordBatch__cast(self, target_schema, options)) } ), active = list( num_columns = function() RecordBatch__num_columns(self), num_rows = function() RecordBatch__num_rows(self), - schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)), + schema = function() shared_ptr(Schema, RecordBatch__schema(self)), columns = function() map(RecordBatch__columns(self), shared_ptr, Array) ) ) +RecordBatch$create <- function(..., schema = NULL){ + arrays <- list2(...) + # making sure there are always names + if (is.null(names(arrays))) { + names(arrays) <- rep_len("", length(arrays)) + } + stopifnot(length(arrays) > 0) + shared_ptr(RecordBatch, RecordBatch__from_arrays(schema, arrays)) +} + #' @export -`names.arrow::RecordBatch` <- function(x) { +names.RecordBatch <- function(x) { x$names() } #' @export -`==.arrow::RecordBatch` <- function(x, y) { +`==.RecordBatch` <- function(x, y) { x$Equals(y) } #' @export -`dim.arrow::RecordBatch` <- function(x) { +dim.RecordBatch <- function(x) { c(x$num_rows, x$num_columns) } #' @export -`as.data.frame.arrow::RecordBatch` <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){ +as.data.frame.RecordBatch <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){ RecordBatch__to_dataframe(x, use_threads = option_use_threads()) } @@ -96,12 +106,4 @@ #' #' @return a [arrow::RecordBatch][arrow__RecordBatch] #' @export -record_batch <- function(..., schema = NULL){ - arrays <- list2(...) - # making sure there are always names - if (is.null(names(arrays))) { - names(arrays) <- rep_len("", length(arrays)) - } - stopifnot(length(arrays) > 0) - shared_ptr(`arrow::RecordBatch`, RecordBatch__from_arrays(schema, arrays)) -} +record_batch <- RecordBatch$create diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 8de62143745..323ef1f1405 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -29,14 +29,14 @@ #' #' @rdname arrow__RecordBatchReader #' @name arrow__RecordBatchReader -`arrow::RecordBatchReader` <- R6Class("arrow::RecordBatchReader", inherit = Object, +RecordBatchReader <- R6Class("RecordBatchReader", inherit = Object, public = list( read_next_batch = function() { - shared_ptr(`arrow::RecordBatch`, RecordBatchReader__ReadNext(self)) + shared_ptr(RecordBatch, RecordBatchReader__ReadNext(self)) } ), active = list( - schema = function() shared_ptr(`arrow::Schema`, RecordBatchReader__schema(self)) + schema = function() shared_ptr(Schema, RecordBatchReader__schema(self)) ) ) @@ -52,11 +52,19 @@ #' #' @rdname arrow__ipc__RecordBatchStreamReader #' @name arrow__ipc__RecordBatchStreamReader -`arrow::RecordBatchStreamReader` <- R6Class("arrow::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`, +RecordBatchStreamReader <- R6Class("RecordBatchStreamReader", inherit = RecordBatchReader, public = list( - batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = `arrow::RecordBatch`) + batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = RecordBatch) ) ) +RecordBatchStreamReader$create <- function(stream){ + if (inherits(stream, c("raw", "Buffer"))) { + stream <- BufferReader$create(stream) + } + assert_that(inherits(stream, "InputStream")) + + shared_ptr(RecordBatchStreamReader, ipc___RecordBatchStreamReader__Open(stream)) +} #' @title class arrow::RecordBatchFileReader #' @@ -70,69 +78,27 @@ #' #' @rdname arrow__ipc__RecordBatchFileReader #' @name arrow__ipc__RecordBatchFileReader -`arrow::RecordBatchFileReader` <- R6Class("arrow::RecordBatchFileReader", inherit = Object, +RecordBatchFileReader <- R6Class("RecordBatchFileReader", inherit = Object, + # Why doesn't this inherit from RecordBatchReader? public = list( - get_batch = function(i) shared_ptr(`arrow::RecordBatch`, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)), + get_batch = function(i) shared_ptr(RecordBatch, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)), - batches = function() map(ipc___RecordBatchFileReader__batches(self), shared_ptr, class = `arrow::RecordBatch`) + batches = function() map(ipc___RecordBatchFileReader__batches(self), shared_ptr, class = RecordBatch) ), active = list( num_record_batches = function() ipc___RecordBatchFileReader__num_record_batches(self), - schema = function() shared_ptr(`arrow::Schema`, ipc___RecordBatchFileReader__schema(self)) + schema = function() shared_ptr(Schema, ipc___RecordBatchFileReader__schema(self)) ) ) -#' Create a [arrow::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] from an input stream -#' -#' @param stream input stream, an [arrow::io::InputStream][arrow__io__InputStream] or a raw vector -#' -#' @export -RecordBatchStreamReader <- function(stream){ - UseMethod("RecordBatchStreamReader") -} - -#' @export -RecordBatchStreamReader.InputStream <- function(stream) { - shared_ptr(`arrow::RecordBatchStreamReader`, ipc___RecordBatchStreamReader__Open(stream)) -} - -#' @export -`RecordBatchStreamReader.raw` <- function(stream) { - RecordBatchStreamReader(BufferReader$create(stream)) -} - -#' @export -`RecordBatchStreamReader.Buffer` <- function(stream) { - RecordBatchStreamReader(BufferReader$create(stream)) -} - - -#' Create an [arrow::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] from a file -#' -#' @param file The file to read from. A file path, or an [arrow::io::RandomAccessFile][arrow__ipc__RecordBatchFileReader] -#' -#' @export -RecordBatchFileReader <- function(file) { - UseMethod("RecordBatchFileReader") -} - -#' @export -RecordBatchFileReader.RandomAccessFile <- function(file) { - shared_ptr(`arrow::RecordBatchFileReader`, ipc___RecordBatchFileReader__Open(file)) -} - -#' @export -`RecordBatchFileReader.character` <- function(file) { - assert_that(length(file) == 1L) - RecordBatchFileReader(ReadableFile$create(file)) -} - -#' @export -`RecordBatchFileReader.Buffer` <- function(file) { - RecordBatchFileReader(BufferReader$create(file)) -} +RecordBatchFileReader$create <- function(file) { + if (inherits(file, c("raw", "Buffer"))) { + file <- BufferReader$create(file) + } else if (is.character(file)) { + assert_that(length(file) == 1L) + file <- ReadableFile$create(file) + } + assert_that(inherits(file, "RandomAccessFile")) -#' @export -`RecordBatchFileReader.raw` <- function(file) { - RecordBatchFileReader(BufferReader$create(file)) + shared_ptr(RecordBatchFileReader, ipc___RecordBatchFileReader__Open(file)) } diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index b83b390ba36..8600cbd33b7 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -36,15 +36,15 @@ #' #' @rdname arrow__ipc__RecordBatchWriter #' @name arrow__ipc__RecordBatchWriter -`arrow::RecordBatchWriter` <- R6Class("arrow::RecordBatchWriter", inherit = Object, +RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = Object, public = list( write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table), write = function(x) { - if (inherits(x, "arrow::RecordBatch")) { + if (inherits(x, "RecordBatch")) { self$write_batch(x) - } else if (inherits(x, "arrow::Table")) { + } else if (inherits(x, "Table")) { self$write_table(x) } else if (inherits(x, "data.frame")) { self$write_table(table(x)) @@ -68,7 +68,7 @@ #' @section usage: #' #' ``` -#' writer <- RecordBatchStreamWriter(sink, schema) +#' writer <- RecordBatchStreamWriter$create(sink, schema) #' #' writer$write_batch(batch) #' writer$write_table(table) @@ -88,33 +88,16 @@ #' #' @rdname arrow__ipc__RecordBatchStreamWriter #' @name arrow__ipc__RecordBatchStreamWriter -`arrow::RecordBatchStreamWriter` <- R6Class("arrow::RecordBatchStreamWriter", inherit = `arrow::RecordBatchWriter`) +RecordBatchStreamWriter <- R6Class("RecordBatchStreamWriter", inherit = RecordBatchWriter) -#' Writer for the Arrow streaming binary format -#' -#' @param sink Where to write. Can either be: -#' -#' - A string file path -#' - [arrow::io::OutputStream][arrow__io__OutputStream] -#' -#' @param schema The [arrow::Schema][arrow__Schema] for data to be written. -#' -#' @return a [arrow::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] -#' -#' @export -RecordBatchStreamWriter <- function(sink, schema) { - UseMethod("RecordBatchStreamWriter") -} - -#' @export -RecordBatchStreamWriter.character <- function(sink, schema){ - RecordBatchStreamWriter(FileOutputStream$create(sink), schema) -} +RecordBatchStreamWriter$create <- function(sink, schema) { + if (is.character(sink)) { + sink <- FileOutputStream$create(sink) + } + assert_that(inherits(sink, "OutputStream")) + assert_that(inherits(schema, "Schema")) -#' @export -RecordBatchStreamWriter.OutputStream <- function(sink, schema){ - assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(sink, schema)) + shared_ptr(RecordBatchStreamWriter, ipc___RecordBatchStreamWriter__Open(sink, schema)) } #' @title class arrow::RecordBatchFileWriter @@ -128,7 +111,7 @@ RecordBatchStreamWriter.OutputStream <- function(sink, schema){ #' @section usage: #' #' ``` -#' writer <- RecordBatchFileWriter(sink, schema) +#' writer <- RecordBatchFileWriter$create(sink, schema) #' #' writer$write_batch(batch) #' writer$write_table(table) @@ -148,31 +131,14 @@ RecordBatchStreamWriter.OutputStream <- function(sink, schema){ #' #' @rdname arrow__ipc__RecordBatchFileWriter #' @name arrow__ipc__RecordBatchFileWriter -`arrow::RecordBatchFileWriter` <- R6Class("arrow::RecordBatchFileWriter", inherit = `arrow::RecordBatchStreamWriter`) +RecordBatchFileWriter <- R6Class("RecordBatchFileWriter", inherit = RecordBatchStreamWriter) -#' Create a record batch file writer from a stream -#' -#' @param sink Where to write. Can either be: -#' -#' - a string file path -#' - [arrow::io::OutputStream][arrow__io__OutputStream] -#' -#' @param schema The [arrow::Schema][arrow__Schema] for data to be written. -#' -#' @return an `arrow::RecordBatchWriter` object -#' -#' @export -RecordBatchFileWriter <- function(sink, schema) { - UseMethod("RecordBatchFileWriter") -} - -#' @export -RecordBatchFileWriter.character <- function(sink, schema){ - RecordBatchFileWriter(FileOutputStream$create(sink), schema) -} +RecordBatchFileWriter$create <- function(sink, schema) { + if (is.character(sink)) { + sink <- FileOutputStream$create(sink) + } + assert_that(inherits(sink, "OutputStream")) + assert_that(inherits(schema, "Schema")) -#' @export -RecordBatchFileWriter.OutputStream <- function(sink, schema){ - assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(sink, schema)) + shared_ptr(RecordBatchFileWriter, ipc___RecordBatchFileWriter__Open(sink, schema)) } diff --git a/r/R/Schema.R b/r/R/Schema.R index 61746ba49a7..57e585daec1 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -41,12 +41,12 @@ #' #' @rdname arrow__Schema #' @name arrow__Schema -`arrow::Schema` <- R6Class("arrow::Schema", +Schema <- R6Class("Schema", inherit = Object, public = list( ToString = function() Schema__ToString(self), num_fields = function() Schema__num_fields(self), - field = function(i) shared_ptr(`arrow::Field`, Schema__field(self, i)), + field = function(i) shared_ptr(Field, Schema__field(self, i)), serialize = function() Schema__serialize(self), Equals = function(other, check_metadata = TRUE) Schema__Equals(self, other, isTRUE(check_metadata)) ), @@ -56,7 +56,7 @@ ) #' @export -`==.arrow::Schema` <- function(lhs, rhs){ +`==.Schema` <- function(lhs, rhs){ lhs$Equals(rhs) } @@ -74,7 +74,7 @@ #' @export # TODO (npr): add examples once ARROW-5505 merges schema <- function(...){ - shared_ptr(`arrow::Schema`, schema_(.fields(list2(...)))) + shared_ptr(Schema, schema_(.fields(list2(...)))) } #' read a Schema from a stream @@ -89,24 +89,24 @@ read_schema <- function(stream, ...) { #' @export read_schema.InputStream <- function(stream, ...) { - shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) + shared_ptr(Schema, ipc___ReadSchema_InputStream(stream)) } #' @export `read_schema.Buffer` <- function(stream, ...) { stream <- BufferReader$create(stream) on.exit(stream$close()) - shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) + shared_ptr(Schema, ipc___ReadSchema_InputStream(stream)) } #' @export `read_schema.raw` <- function(stream, ...) { stream <- BufferReader$create(stream) on.exit(stream$close()) - shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) + shared_ptr(Schema, ipc___ReadSchema_InputStream(stream)) } #' @export -`read_schema.arrow::Message` <- function(stream, ...) { - shared_ptr(`arrow::Schema`, ipc___ReadSchema_Message(stream)) +read_schema.Message <- function(stream, ...) { + shared_ptr(Schema, ipc___ReadSchema_Message(stream)) } diff --git a/r/R/Struct.R b/r/R/Struct.R index fa35b7ec0f2..ab72d4757bf 100644 --- a/r/R/Struct.R +++ b/r/R/Struct.R @@ -17,10 +17,10 @@ #' @include type.R -`arrow::StructType` <- R6Class("arrow::StructType", - inherit = `arrow::NestedType`, +StructType <- R6Class("StructType", + inherit = NestedType, public = list( - GetFieldByName = function(name) shared_ptr(`arrow::Field`, StructType__GetFieldByName(self, name)), + GetFieldByName = function(name) shared_ptr(Field, StructType__GetFieldByName(self, name)), GetFieldIndex = function(name) StructType__GetFieldIndex(self, name) ) ) @@ -29,5 +29,5 @@ #' @export struct <- function(...){ xp <- struct_(.fields(list(...))) - shared_ptr(`arrow::StructType`, xp) + shared_ptr(StructType, xp) } diff --git a/r/R/Table.R b/r/R/Table.R index 1660349571b..fc8a6418c80 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -29,18 +29,18 @@ #' #' @rdname arrow__Table #' @name arrow__Table -`arrow::Table` <- R6Class("arrow::Table", inherit = Object, +Table <- R6Class("Table", inherit = Object, public = list( column = function(i) shared_ptr(ChunkedArray, Table__column(self, i)), - field = function(i) shared_ptr(`arrow::Field`, Table__field(self, i)), + field = function(i) shared_ptr(Field, Table__field(self, i)), serialize = function(output_stream, ...) write_table(self, output_stream, ...), cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_schema, "arrow::Schema")) + assert_that(inherits(target_schema, "Schema")) assert_that(inherits(options, "CastOptions")) assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") - shared_ptr(`arrow::Table`, Table__cast(self, target_schema, options)) + shared_ptr(Table, Table__cast(self, target_schema, options)) }, select = function(spec) { @@ -51,7 +51,7 @@ all_vars <- Table__column_names(self) vars <- vars_select(all_vars, !!spec) indices <- match(vars, all_vars) - shared_ptr(`arrow::Table`, Table__select(self, indices)) + shared_ptr(Table, Table__select(self, indices)) } } @@ -60,8 +60,8 @@ active = list( num_columns = function() Table__num_columns(self), num_rows = function() Table__num_rows(self), - schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)), - columns = function() map(Table__columns(self), shared_ptr, class = `arrow::Column`) + schema = function() shared_ptr(Schema, Table__schema(self)), + columns = function() map(Table__columns(self), shared_ptr, class = Column) ) ) @@ -80,15 +80,15 @@ table <- function(..., schema = NULL){ names(dots) <- rep_len("", length(dots)) } stopifnot(length(dots) > 0) - shared_ptr(`arrow::Table`, Table__from_dots(dots, schema)) + shared_ptr(Table, Table__from_dots(dots, schema)) } #' @export -`as.data.frame.arrow::Table` <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){ +as.data.frame.Table <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){ Table__to_dataframe(x, use_threads = option_use_threads()) } #' @export -`dim.arrow::Table` <- function(x) { +dim.Table <- function(x) { c(x$num_rows, x$num_columns) } diff --git a/r/R/array-data.R b/r/R/array-data.R index e69666070a8..041e3c6a32b 100644 --- a/r/R/array-data.R +++ b/r/R/array-data.R @@ -44,7 +44,7 @@ ArrayData <- R6Class("ArrayData", inherit = Object, active = list( - type = function() `arrow::DataType`$dispatch(ArrayData__get_type(self)), + type = function() DataType$dispatch(ArrayData__get_type(self)), length = function() ArrayData__get_length(self), null_count = function() ArrayData__get_null_count(self), offset = function() ArrayData__get_offset(self), diff --git a/r/R/array.R b/r/R/array.R index 02ca0656782..f643a8ddbdf 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -92,7 +92,7 @@ Array <- R6Class("Array", Array__RangeEquals(self, other, start_idx, end_idx, other_start_idx) }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_type, "arrow::DataType")) + assert_that(inherits(target_type, "DataType")) assert_that(inherits(options, "CastOptions")) Array$create(Array__cast(self, target_type, options)) } @@ -100,7 +100,7 @@ Array <- R6Class("Array", active = list( null_count = function() Array__null_count(self), offset = function() Array__offset(self), - type = function() `arrow::DataType`$dispatch(Array__type(self)) + type = function() DataType$dispatch(Array__type(self)) ) ) @@ -127,7 +127,7 @@ ListArray <- R6Class("ListArray", inherit = Array, raw_value_offsets = function() ListArray__raw_value_offsets(self) ), active = list( - value_type = function() `arrow::DataType`$dispatch(ListArray__value_type(self)) + value_type = function() DataType$dispatch(ListArray__value_type(self)) ) ) diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index cc6a112a7bd..f9e2034b377 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -42,7 +42,7 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object, } }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_type, "arrow::DataType")) + assert_that(inherits(target_type, "DataType")) assert_that(inherits(options, "CastOptions")) shared_ptr(ChunkedArray, ChunkedArray__cast(self, target_type, options)) } @@ -51,7 +51,7 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object, null_count = function() ChunkedArray__null_count(self), num_chunks = function() ChunkedArray__num_chunks(self), chunks = function() map(ChunkedArray__chunks(self), ~ Array$create(.x)), - type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self)) + type = function() DataType$dispatch(ChunkedArray__type(self)) ) ) diff --git a/r/R/csv.R b/r/R/csv.R index a1ff66aae16..7b557ed0c17 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -66,7 +66,7 @@ #' @param as_tibble Should the function return a `data.frame` or an #' [arrow::Table][arrow__Table]? #' -#' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`. +#' @return A `data.frame`, or an Table if `as_tibble = FALSE`. #' @export #' @examples #' \donttest{ @@ -183,7 +183,7 @@ read_tsv_arrow <- function(file, CsvTableReader <- R6Class("CsvTableReader", inherit = Object, public = list( - Read = function() shared_ptr(`arrow::Table`, csv___TableReader__Read(self)) + Read = function() shared_ptr(Table, csv___TableReader__Read(self)) ) ) diff --git a/r/R/dictionary.R b/r/R/dictionary.R index b12c19e23e3..ea2c676a4bf 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -30,11 +30,11 @@ #' @rdname arrow__DictionaryType #' @name arrow__DictionaryType DictionaryType <- R6Class("DictionaryType", - inherit = `arrow::FixedWidthType`, + inherit = FixedWidthType, active = list( - index_type = function() `arrow::DataType`$dispatch(DictionaryType__index_type(self)), - value_type = function() `arrow::DataType`$dispatch(DictionaryType__value_type(self)), + index_type = function() DataType$dispatch(DictionaryType__index_type(self)), + value_type = function() DataType$dispatch(DictionaryType__value_type(self)), name = function() DictionaryType__name(self), ordered = function() DictionaryType__ordered(self) ) @@ -51,8 +51,8 @@ DictionaryType <- R6Class("DictionaryType", #' @export dictionary <- function(index_type, value_type, ordered = FALSE) { assert_that( - inherits(index_type, "arrow::DataType"), - inherits(index_type, "arrow::DataType") + inherits(index_type, "DataType"), + inherits(index_type, "DataType") ) shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered)) } diff --git a/r/R/enums.R b/r/R/enums.R index 21ea3eaf109..5a5cdfa3d25 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -31,17 +31,17 @@ enum <- function(class, ..., .list = list(...)){ #' @name enums #' @export #' @keywords internal -TimeUnit <- enum("arrow::TimeUnit::type", +TimeUnit <- enum("TimeUnit::type", SECOND = 0L, MILLI = 1L, MICRO = 2L, NANO = 3L ) #' @rdname enums #' @export -DateUnit <- enum("arrow::DateUnit", DAY = 0L, MILLI = 1L) +DateUnit <- enum("DateUnit", DAY = 0L, MILLI = 1L) #' @rdname enums #' @export -Type <- enum("arrow::Type::type", +Type <- enum("Type::type", "NA" = 0L, BOOL = 1L, UINT8 = 2L, INT8 = 3L, UINT16 = 4L, INT16 = 5L, UINT32 = 6L, INT32 = 7L, UINT64 = 8L, INT64 = 9L, HALF_FLOAT = 10L, FLOAT = 11L, DOUBLE = 12L, STRING = 13L, @@ -52,7 +52,7 @@ Type <- enum("arrow::Type::type", #' @rdname enums #' @export -StatusCode <- enum("arrow::StatusCode", +StatusCode <- enum("StatusCode", OK = 0L, OutOfMemory = 1L, KeyError = 2L, TypeError = 3L, Invalid = 4L, IOError = 5L, CapacityError = 6L, IndexError = 7L, UnknownError = 9L, NotImplemented = 10L, SerializationError = 11L, @@ -75,6 +75,6 @@ MessageType <- enum("Message::Type", #' @rdname enums #' @export -CompressionType <- enum("arrow::Compression::type", +CompressionType <- enum("Compression::type", UNCOMPRESSED = 0L, SNAPPY = 1L, GZIP = 2L, BROTLI = 3L, ZSTD = 4L, LZ4 = 5L, LZO = 6L, BZ2 = 7L ) diff --git a/r/R/feather.R b/r/R/feather.R index d538c3e297b..269d16811cb 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -17,7 +17,7 @@ #' Write data in the Feather format #' -#' @param data `data.frame` or `arrow::RecordBatch` +#' @param data `data.frame` or RecordBatch #' @param stream A file path or an OutputStream #' #' @export @@ -34,7 +34,7 @@ write_feather <- function(data, stream) { if (is.data.frame(data)) { data <- record_batch(data) } - assert_that(inherits(data, "arrow::RecordBatch")) + assert_that(inherits(data, "RecordBatch")) if (is.character(stream)) { stream <- FileOutputStream$create(stream) @@ -62,7 +62,8 @@ FeatherTableWriter$create <- function(stream) { #' Read a Feather file #' -#' @param file an FeatherTableReader or whatever the [FeatherTableReader()] function can handle +#' @param file A character file path, a raw vector, or `InputStream`, passed to +#' `FeatherTableReader$create()`. #' @inheritParams read_delim_arrow #' @param ... additional parameters #' @@ -81,7 +82,7 @@ FeatherTableWriter$create <- function(stream) { #' df <- read_feather(tf, col_select = starts_with("Sepal")) #' }) #' } -read_feather <- function(file, col_select = NULL, as_tibble = TRUE, ...){ +read_feather <- function(file, col_select = NULL, as_tibble = TRUE, ...) { reader <- FeatherTableReader$create(file, ...) all_columns <- ipc___feather___TableReader__column_names(reader) @@ -107,7 +108,7 @@ FeatherTableReader <- R6Class("FeatherTableReader", inherit = Object, GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i), GetColumn = function(i) shared_ptr(Array, ipc___feather___TableReader__GetColumn(self, i)), Read = function(columns) { - shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns)) + shared_ptr(Table, ipc___feather___TableReader__Read(self, columns)) } ) ) diff --git a/r/R/json.R b/r/R/json.R index 875b83dfe70..6c3ca3dd970 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -22,7 +22,7 @@ #' @inheritParams read_delim_arrow #' @param ... Additional options, passed to `json_table_reader()` #' -#' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`. +#' @return A `data.frame`, or an Table if `as_tibble = FALSE`. #' @export #' @examples #' \donttest{ @@ -62,7 +62,7 @@ read_json_arrow <- function(file, col_select = NULL, as_tibble = TRUE, ...) { #' @name arrow__json__TableReader JsonTableReader <- R6Class("JsonTableReader", inherit = Object, public = list( - Read = function() shared_ptr(`arrow::Table`, json___TableReader__Read(self)) + Read = function() shared_ptr(Table, json___TableReader__Read(self)) ) ) JsonTableReader$create <- function(file, diff --git a/r/R/memory_pool.R b/r/R/memory_pool.R index 4148f2de8e0..f96ee64eb43 100644 --- a/r/R/memory_pool.R +++ b/r/R/memory_pool.R @@ -29,7 +29,7 @@ #' #' @rdname arrow___MemoryPool #' @name arrow__MemoryPool -`arrow::MemoryPool` <- R6Class("arrow::MemoryPool", +MemoryPool <- R6Class("MemoryPool", inherit = Object, public = list( # TODO: Allocate @@ -45,5 +45,5 @@ #' @return the default [arrow::MemoryPool][arrow__MemoryPool] #' @export default_memory_pool <- function() { - shared_ptr(`arrow::MemoryPool`, MemoryPool__default()) + shared_ptr(MemoryPool, MemoryPool__default()) } diff --git a/r/R/message.R b/r/R/message.R index 21373f9c812..60cff08da34 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -46,7 +46,7 @@ Message <- R6Class("Message", inherit = Object, ) #' @export -`==.arrow::Message` <- function(x, y) x$Equals(y) +`==.Message` <- function(x, y) x$Equals(y) #' @title class arrow::MessageReader #' diff --git a/r/R/parquet.R b/r/R/parquet.R index fef9ce1c14e..d93001b4cbf 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -23,15 +23,15 @@ ReadTable = function(col_select = NULL) { col_select <- enquo(col_select) if(quo_is_null(col_select)) { - shared_ptr(`arrow::Table`, parquet___arrow___FileReader__ReadTable1(self)) + shared_ptr(Table, parquet___arrow___FileReader__ReadTable1(self)) } else { - all_vars <- shared_ptr(`arrow::Schema`, parquet___arrow___FileReader__GetSchema(self))$names + all_vars <- shared_ptr(Schema, parquet___arrow___FileReader__GetSchema(self))$names indices <- match(vars_select(all_vars, !!col_select), all_vars) - 1L - shared_ptr(`arrow::Table`, parquet___arrow___FileReader__ReadTable2(self, indices)) + shared_ptr(Table, parquet___arrow___FileReader__ReadTable2(self, indices)) } }, GetSchema = function() { - shared_ptr(`arrow::Schema`, parquet___arrow___FileReader__GetSchema(self)) + shared_ptr(Schema, parquet___arrow___FileReader__GetSchema(self)) } ) ) diff --git a/r/R/read_record_batch.R b/r/R/read_record_batch.R index e01a9aee209..11ac43b67f4 100644 --- a/r/R/read_record_batch.R +++ b/r/R/read_record_batch.R @@ -29,14 +29,14 @@ read_record_batch <- function(obj, schema){ #' @export read_record_batch.Message <- function(obj, schema) { - assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__Message__Schema(obj, schema)) + assert_that(inherits(schema, "Schema")) + shared_ptr(RecordBatch, ipc___ReadRecordBatch__Message__Schema(obj, schema)) } #' @export read_record_batch.InputStream <- function(obj, schema) { - assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__InputStream__Schema(obj, schema)) + assert_that(inherits(schema, "Schema")) + shared_ptr(RecordBatch, ipc___ReadRecordBatch__InputStream__Schema(obj, schema)) } #' @export diff --git a/r/R/read_table.R b/r/R/read_table.R index d52d639ceba..5b4bdc535d9 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -50,13 +50,13 @@ read_table <- function(stream){ } #' @export -`read_table.arrow::RecordBatchFileReader` <- function(stream) { - shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(stream)) +read_table.RecordBatchFileReader <- function(stream) { + shared_ptr(Table, Table__from_RecordBatchFileReader(stream)) } #' @export -`read_table.arrow::RecordBatchStreamReader` <- function(stream) { - shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(stream)) +read_table.RecordBatchStreamReader <- function(stream) { + shared_ptr(Table, Table__from_RecordBatchStreamReader(stream)) } #' @export @@ -64,16 +64,16 @@ read_table.character <- function(stream) { assert_that(length(stream) == 1L) stream <- ReadableFile$create(stream) on.exit(stream$close()) - batch_reader <- RecordBatchFileReader(stream) - shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(batch_reader)) + batch_reader <- RecordBatchFileReader$create(stream) + shared_ptr(Table, Table__from_RecordBatchFileReader(batch_reader)) } #' @export `read_table.raw` <- function(stream) { stream <- BufferReader$create(stream) on.exit(stream$close()) - batch_reader <- RecordBatchStreamReader(stream) - shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(batch_reader)) + batch_reader <- RecordBatchStreamReader$create(stream) + shared_ptr(Table, Table__from_RecordBatchStreamReader(batch_reader)) } #' @rdname read_table diff --git a/r/R/type.R b/r/R/type.R index 769067b8819..7352e14e048 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -34,21 +34,21 @@ #' #' @rdname arrow__DataType #' @name arrow__DataType -`arrow::DataType` <- R6Class("arrow::DataType", +DataType <- R6Class("DataType", inherit = Object, public = list( ToString = function() { DataType__ToString(self) }, Equals = function(other) { - assert_that(inherits(other, "arrow::DataType")) + assert_that(inherits(other, "DataType")) DataType__Equals(self, other) }, num_children = function() { DataType__num_children(self) }, children = function() { - map(DataType__children_pointer(self), shared_ptr, class= `arrow::Field`) + map(DataType__children_pointer(self), shared_ptr, class = Field) }, ..dispatch = function(){ @@ -70,13 +70,13 @@ BINARY = stop("Type BINARY not implemented yet"), DATE32 = date32(), DATE64 = date64(), - TIMESTAMP = shared_ptr(`arrow::Timestamp`,self$pointer()), - TIME32 = shared_ptr(`arrow::Time32`,self$pointer()), - TIME64 = shared_ptr(`arrow::Time64`,self$pointer()), + TIMESTAMP = shared_ptr(Timestamp,self$pointer()), + TIME32 = shared_ptr(Time32,self$pointer()), + TIME64 = shared_ptr(Time64,self$pointer()), INTERVAL = stop("Type INTERVAL not implemented yet"), - DECIMAL = shared_ptr(`arrow::Decimal128Type`, self$pointer()), - LIST = shared_ptr(`arrow::ListType`, self$pointer()), - STRUCT = shared_ptr(`arrow::StructType`, self$pointer()), + DECIMAL = shared_ptr(Decimal128Type, self$pointer()), + LIST = shared_ptr(ListType, self$pointer()), + STRUCT = shared_ptr(StructType, self$pointer()), UNION = stop("Type UNION not implemented yet"), DICTIONARY = shared_ptr(DictionaryType, self$pointer()), MAP = stop("Type MAP not implemented yet") @@ -94,8 +94,8 @@ ) ) -`arrow::DataType`$dispatch <- function(xp){ - shared_ptr(`arrow::DataType`, xp)$..dispatch() +DataType$dispatch <- function(xp){ + shared_ptr(DataType, xp)$..dispatch() } #' infer the arrow Array type from an R vector @@ -110,7 +110,7 @@ type <- function(x) { #' @export type.default <- function(x) { - `arrow::DataType`$dispatch(Array__infer_type(x)) + DataType$dispatch(Array__infer_type(x)) } #' @export @@ -120,7 +120,7 @@ type.Array <- function(x) x$type type.ChunkedArray <- function(x) x$type #' @export -`type.arrow::Column` <- function(x) x$type +type.Column <- function(x) x$type #----- metadata @@ -137,119 +137,119 @@ type.ChunkedArray <- function(x) x$type #' #' @rdname arrow__FixedWidthType #' @name arrow__FixedWidthType -`arrow::FixedWidthType` <- R6Class("arrow::FixedWidthType", - inherit = `arrow::DataType`, +FixedWidthType <- R6Class("FixedWidthType", + inherit = DataType, active = list( bit_width = function() FixedWidthType__bit_width(self) ) ) #' @export -`==.arrow::DataType` <- function(lhs, rhs){ +`==.DataType` <- function(lhs, rhs){ lhs$Equals(rhs) } -"arrow::Int8" <- R6Class("arrow::Int8", - inherit = `arrow::FixedWidthType` +"Int8" <- R6Class("Int8", + inherit = FixedWidthType ) -"arrow::Int16" <- R6Class("arrow::Int16", - inherit = `arrow::FixedWidthType` +"Int16" <- R6Class("Int16", + inherit = FixedWidthType ) -"arrow::Int32" <- R6Class("arrow::Int32", - inherit = `arrow::FixedWidthType` +"Int32" <- R6Class("Int32", + inherit = FixedWidthType ) -"arrow::Int64" <- R6Class("arrow::Int64", - inherit = `arrow::FixedWidthType` +"Int64" <- R6Class("Int64", + inherit = FixedWidthType ) -"arrow::UInt8" <- R6Class("arrow::UInt8", - inherit = `arrow::FixedWidthType` +"UInt8" <- R6Class("UInt8", + inherit = FixedWidthType ) -"arrow::UInt16" <- R6Class("arrow::UInt16", - inherit = `arrow::FixedWidthType` +"UInt16" <- R6Class("UInt16", + inherit = FixedWidthType ) -"arrow::UInt32" <- R6Class("arrow::UInt32", - inherit = `arrow::FixedWidthType` +"UInt32" <- R6Class("UInt32", + inherit = FixedWidthType ) -"arrow::UInt64" <- R6Class("arrow::UInt64", - inherit = `arrow::FixedWidthType` +"UInt64" <- R6Class("UInt64", + inherit = FixedWidthType ) -"arrow::Float16" <- R6Class("arrow::Float16", - inherit = `arrow::FixedWidthType` +"Float16" <- R6Class("Float16", + inherit = FixedWidthType ) -"arrow::Float32" <- R6Class("arrow::Float32", - inherit = `arrow::FixedWidthType` +"Float32" <- R6Class("Float32", + inherit = FixedWidthType ) -"arrow::Float64" <- R6Class("arrow::Float64", - inherit = `arrow::FixedWidthType` +"Float64" <- R6Class("Float64", + inherit = FixedWidthType ) -"arrow::Boolean" <- R6Class("arrow::Boolean", - inherit = `arrow::FixedWidthType` +"Boolean" <- R6Class("Boolean", + inherit = FixedWidthType ) -"arrow::Utf8" <- R6Class("arrow::Utf8", - inherit = `arrow::DataType` +"Utf8" <- R6Class("Utf8", + inherit = DataType ) -`arrow::DateType` <- R6Class("arrow::DateType", - inherit = `arrow::FixedWidthType`, +DateType <- R6Class("DateType", + inherit = FixedWidthType, public = list( unit = function() DateType__unit(self) ) ) -"arrow::Date32" <- R6Class("arrow::Date32", - inherit = `arrow::DateType` +"Date32" <- R6Class("Date32", + inherit = DateType ) -"arrow::Date64" <- R6Class("arrow::Date64", - inherit = `arrow::DateType` +"Date64" <- R6Class("Date64", + inherit = DateType ) -"arrow::TimeType" <- R6Class("arrow::TimeType", - inherit = `arrow::FixedWidthType`, +"TimeType" <- R6Class("TimeType", + inherit = FixedWidthType, public = list( unit = function() TimeType__unit(self) ) ) -"arrow::Time32" <- R6Class("arrow::Time32", - inherit = `arrow::TimeType` +"Time32" <- R6Class("Time32", + inherit = TimeType ) -"arrow::Time64" <- R6Class("arrow::Time64", - inherit = `arrow::TimeType` +"Time64" <- R6Class("Time64", + inherit = TimeType ) -"arrow::Null" <- R6Class("arrow::Null", - inherit = `arrow::DataType` +"Null" <- R6Class("Null", + inherit = DataType ) -`arrow::Timestamp` <- R6Class( - "arrow::Timestamp", - inherit = `arrow::FixedWidthType` , +Timestamp <- R6Class( + "Timestamp", + inherit = FixedWidthType , public = list( timezone = function() TimestampType__timezone(self), unit = function() TimestampType__unit(self) ) ) -`arrow::DecimalType` <- R6Class("arrow:::DecimalType", - inherit = `arrow::FixedWidthType`, +DecimalType <- R6Class(":DecimalType", + inherit = FixedWidthType, public = list( precision = function() DecimalType__precision(self), scale = function() DecimalType__scale(self) ) ) -"arrow::Decimal128Type" <- R6Class("arrow::Decimal128Type", - inherit = `arrow::DecimalType` +"Decimal128Type" <- R6Class("Decimal128Type", + inherit = DecimalType ) #' Apache Arrow data types @@ -280,7 +280,7 @@ type.ChunkedArray <- function(x) x$type #' @param ... For `struct()`, a named list of types to define the struct columns #' #' @name data-type -#' @return An Arrow type object inheriting from `arrow::DataType`. +#' @return An Arrow type object inheriting from DataType. #' @export #' @seealso [dictionary()] for creating a dictionary (factor-like) type. #' @examples @@ -290,39 +290,39 @@ type.ChunkedArray <- function(x) x$type #' timestamp("ms", timezone = "CEST") #' time64("ns") #' } -int8 <- function() shared_ptr(`arrow::Int8`, Int8__initialize()) +int8 <- function() shared_ptr(Int8, Int8__initialize()) #' @rdname data-type #' @export -int16 <- function() shared_ptr(`arrow::Int16`, Int16__initialize()) +int16 <- function() shared_ptr(Int16, Int16__initialize()) #' @rdname data-type #' @export -int32 <- function() shared_ptr(`arrow::Int32`, Int32__initialize()) +int32 <- function() shared_ptr(Int32, Int32__initialize()) #' @rdname data-type #' @export -int64 <- function() shared_ptr(`arrow::Int64`, Int64__initialize()) +int64 <- function() shared_ptr(Int64, Int64__initialize()) #' @rdname data-type #' @export -uint8 <- function() shared_ptr(`arrow::UInt8`, UInt8__initialize()) +uint8 <- function() shared_ptr(UInt8, UInt8__initialize()) #' @rdname data-type #' @export -uint16 <- function() shared_ptr(`arrow::UInt16`, UInt16__initialize()) +uint16 <- function() shared_ptr(UInt16, UInt16__initialize()) #' @rdname data-type #' @export -uint32 <- function() shared_ptr(`arrow::UInt32`, UInt32__initialize()) +uint32 <- function() shared_ptr(UInt32, UInt32__initialize()) #' @rdname data-type #' @export -uint64 <- function() shared_ptr(`arrow::UInt64`, UInt64__initialize()) +uint64 <- function() shared_ptr(UInt64, UInt64__initialize()) #' @rdname data-type #' @export -float16 <- function() shared_ptr(`arrow::Float16`, Float16__initialize()) +float16 <- function() shared_ptr(Float16, Float16__initialize()) #' @rdname data-type #' @export @@ -330,7 +330,7 @@ halffloat <- float16 #' @rdname data-type #' @export -float32 <- function() shared_ptr(`arrow::Float32`, Float32__initialize()) +float32 <- function() shared_ptr(Float32, Float32__initialize()) #' @rdname data-type #' @export @@ -338,11 +338,11 @@ float <- float32 #' @rdname data-type #' @export -float64 <- function() shared_ptr(`arrow::Float64`, Float64__initialize()) +float64 <- function() shared_ptr(Float64, Float64__initialize()) #' @rdname data-type #' @export -boolean <- function() shared_ptr(`arrow::Boolean`, Boolean__initialize()) +boolean <- function() shared_ptr(Boolean, Boolean__initialize()) #' @rdname data-type #' @export @@ -350,7 +350,7 @@ bool <- boolean #' @rdname data-type #' @export -utf8 <- function() shared_ptr(`arrow::Utf8`, Utf8__initialize()) +utf8 <- function() shared_ptr(Utf8, Utf8__initialize()) #' @rdname data-type #' @export @@ -358,11 +358,11 @@ string <- utf8 #' @rdname data-type #' @export -date32 <- function() shared_ptr(`arrow::Date32`, Date32__initialize()) +date32 <- function() shared_ptr(Date32, Date32__initialize()) #' @rdname data-type #' @export -date64 <- function() shared_ptr(`arrow::Date64`, Date64__initialize()) +date64 <- function() shared_ptr(Date64, Date64__initialize()) #' @rdname data-type #' @export @@ -371,7 +371,7 @@ time32 <- function(unit = c("ms", "s")) { unit <- match.arg(unit) } unit <- make_valid_time_unit(unit, valid_time32_units) - shared_ptr(`arrow::Time32`, Time32__initialize(unit)) + shared_ptr(Time32, Time32__initialize(unit)) } valid_time32_units <- c( @@ -422,12 +422,12 @@ time64 <- function(unit = c("ns", "us")) { unit <- match.arg(unit) } unit <- make_valid_time_unit(unit, valid_time64_units) - shared_ptr(`arrow::Time64`, Time64__initialize(unit)) + shared_ptr(Time64, Time64__initialize(unit)) } #' @rdname data-type #' @export -null <- function() shared_ptr(`arrow::Null`, Null__initialize()) +null <- function() shared_ptr(Null, Null__initialize()) #' @rdname data-type #' @export @@ -437,15 +437,15 @@ timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone) { } unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units)) if (missing(timezone)) { - shared_ptr(`arrow::Timestamp`, Timestamp__initialize1(unit)) + shared_ptr(Timestamp, Timestamp__initialize1(unit)) } else { assert_that(is.character(timezone), length(timezone) == 1) - shared_ptr(`arrow::Timestamp`, Timestamp__initialize2(unit, timezone)) + shared_ptr(Timestamp, Timestamp__initialize2(unit, timezone)) } } #' @rdname data-type #' @export -decimal <- function(precision, scale) shared_ptr(`arrow::Decimal128Type`, Decimal128Type__initialize(precision, scale)) +decimal <- function(precision, scale) shared_ptr(Decimal128Type, Decimal128Type__initialize(precision, scale)) -`arrow::NestedType` <- R6Class("arrow::NestedType", inherit = `arrow::DataType`) +NestedType <- R6Class("NestedType", inherit = DataType) diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index 4b259ae85db..0e3ab840931 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -19,8 +19,8 @@ to_arrow <- function(x) { UseMethod("to_arrow") } -`to_arrow.arrow::RecordBatch` <- function(x) x -`to_arrow.arrow::Table` <- function(x) x +to_arrow.RecordBatch <- function(x) x +to_arrow.Table <- function(x) x # splice the data frame as arguments of table() # see ?rlang::list2() @@ -55,7 +55,7 @@ write_arrow <- function(x, stream, ...) { } #' @export -`write_arrow.arrow::RecordBatchWriter` <- function(x, stream, ...){ +write_arrow.RecordBatchWriter <- function(x, stream, ...){ stream$write(x) } @@ -65,7 +65,7 @@ write_arrow <- function(x, stream, ...) { x <- to_arrow(x) file_stream <- FileOutputStream$create(stream) on.exit(file_stream$close()) - file_writer <- RecordBatchFileWriter(file_stream, x$schema) + file_writer <- RecordBatchFileWriter$create(file_stream, x$schema) on.exit({ # Re-set the exit code to close both connections, LIFO file_writer$close() @@ -83,7 +83,7 @@ write_arrow <- function(x, stream, ...) { # how many bytes do we need mock_stream <- MockOutputStream$create() - writer <- RecordBatchStreamWriter(mock_stream, schema) + writer <- RecordBatchStreamWriter$create(mock_stream, schema) writer$write(x) writer$close() n <- mock_stream$GetExtentBytesWritten() @@ -91,7 +91,7 @@ write_arrow <- function(x, stream, ...) { # now that we know the size, stream in a buffer backed by an R raw vector bytes <- raw(n) buffer_writer <- FixedSizeBufferWriter$create(buffer(bytes)) - writer <- RecordBatchStreamWriter(buffer_writer, schema) + writer <- RecordBatchStreamWriter$create(buffer_writer, schema) writer$write(x) writer$close() diff --git a/r/README.md b/r/README.md index 4a6af748faf..8fc13331785 100644 --- a/r/README.md +++ b/r/README.md @@ -71,11 +71,11 @@ set.seed(24) tab <- arrow::table(x = 1:10, y = rnorm(10)) tab$schema -#> arrow::Schema +#> Schema #> x: int32 #> y: double tab -#> arrow::Table +#> Table as.data.frame(tab) #> x y #> 1 1 -0.545880758 diff --git a/r/man/RecordBatchFileReader.Rd b/r/man/RecordBatchFileReader.Rd deleted file mode 100644 index 63dee6d068d..00000000000 --- a/r/man/RecordBatchFileReader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\name{RecordBatchFileReader} -\alias{RecordBatchFileReader} -\title{Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::RecordBatchFileReader} from a file} -\usage{ -RecordBatchFileReader(file) -} -\arguments{ -\item{file}{The file to read from. A file path, or an \link[=arrow__ipc__RecordBatchFileReader]{arrow::io::RandomAccessFile}} -} -\description{ -Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::RecordBatchFileReader} from a file -} diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd deleted file mode 100644 index 481852345f8..00000000000 --- a/r/man/RecordBatchFileWriter.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{RecordBatchFileWriter} -\alias{RecordBatchFileWriter} -\title{Create a record batch file writer from a stream} -\usage{ -RecordBatchFileWriter(sink, schema) -} -\arguments{ -\item{sink}{Where to write. Can either be: -\itemize{ -\item a string file path -\item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} -}} - -\item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.} -} -\value{ -an \code{arrow::RecordBatchWriter} object -} -\description{ -Create a record batch file writer from a stream -} diff --git a/r/man/RecordBatchStreamReader.Rd b/r/man/RecordBatchStreamReader.Rd deleted file mode 100644 index 491adb162b2..00000000000 --- a/r/man/RecordBatchStreamReader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\name{RecordBatchStreamReader} -\alias{RecordBatchStreamReader} -\title{Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::RecordBatchStreamReader} from an input stream} -\usage{ -RecordBatchStreamReader(stream) -} -\arguments{ -\item{stream}{input stream, an \link[=arrow__io__InputStream]{arrow::io::InputStream} or a raw vector} -} -\description{ -Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::RecordBatchStreamReader} from an input stream -} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd deleted file mode 100644 index 0f783c1e5db..00000000000 --- a/r/man/RecordBatchStreamWriter.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{RecordBatchStreamWriter} -\alias{RecordBatchStreamWriter} -\title{Writer for the Arrow streaming binary format} -\usage{ -RecordBatchStreamWriter(sink, schema) -} -\arguments{ -\item{sink}{Where to write. Can either be: -\itemize{ -\item A string file path -\item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} -}} - -\item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.} -} -\value{ -a \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} -} -\description{ -Writer for the Arrow streaming binary format -} diff --git a/r/man/arrow__Column.Rd b/r/man/arrow__Column.Rd index 6a0ee6a40a5..73fd7075076 100644 --- a/r/man/arrow__Column.Rd +++ b/r/man/arrow__Column.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__Column} \alias{arrow__Column} -\alias{arrow::Column} +\alias{Column} \title{class arrow::Column} \description{ class arrow::Column diff --git a/r/man/arrow__DataType.Rd b/r/man/arrow__DataType.Rd index 4eeb05110d2..81c64142043 100644 --- a/r/man/arrow__DataType.Rd +++ b/r/man/arrow__DataType.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__DataType} \alias{arrow__DataType} -\alias{arrow::DataType} +\alias{DataType} \title{class arrow::DataType} \description{ class arrow::DataType diff --git a/r/man/arrow__Field.Rd b/r/man/arrow__Field.Rd index 893a65aa08e..9dfb114ba8f 100644 --- a/r/man/arrow__Field.Rd +++ b/r/man/arrow__Field.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__Field} \alias{arrow__Field} -\alias{arrow::Field} +\alias{Field} \title{class arrow::Field} \description{ class arrow::Field diff --git a/r/man/arrow__FixedWidthType.Rd b/r/man/arrow__FixedWidthType.Rd index 075c0eeeb14..414e6a06f41 100644 --- a/r/man/arrow__FixedWidthType.Rd +++ b/r/man/arrow__FixedWidthType.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__FixedWidthType} \alias{arrow__FixedWidthType} -\alias{arrow::FixedWidthType} +\alias{FixedWidthType} \title{class arrow::FixedWidthType} \description{ class arrow::FixedWidthType diff --git a/r/man/arrow__RecordBatch.Rd b/r/man/arrow__RecordBatch.Rd index 40ba6323ee0..04468c185d5 100644 --- a/r/man/arrow__RecordBatch.Rd +++ b/r/man/arrow__RecordBatch.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__RecordBatch} \alias{arrow__RecordBatch} -\alias{arrow::RecordBatch} +\alias{RecordBatch} \title{class arrow::RecordBatch} \description{ class arrow::RecordBatch diff --git a/r/man/arrow__RecordBatchReader.Rd b/r/man/arrow__RecordBatchReader.Rd index b3ccd3f1749..d66e4c833e0 100644 --- a/r/man/arrow__RecordBatchReader.Rd +++ b/r/man/arrow__RecordBatchReader.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__RecordBatchReader} \alias{arrow__RecordBatchReader} -\alias{arrow::RecordBatchReader} +\alias{RecordBatchReader} \title{class arrow::RecordBatchReader} \description{ class arrow::RecordBatchReader diff --git a/r/man/arrow__Schema.Rd b/r/man/arrow__Schema.Rd index b657ff2c4a8..6279a768091 100644 --- a/r/man/arrow__Schema.Rd +++ b/r/man/arrow__Schema.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__Schema} \alias{arrow__Schema} -\alias{arrow::Schema} +\alias{Schema} \title{class arrow::Schema} \description{ class arrow::Schema diff --git a/r/man/arrow__Table.Rd b/r/man/arrow__Table.Rd index 139db980acf..0800bdce7a5 100644 --- a/r/man/arrow__Table.Rd +++ b/r/man/arrow__Table.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__Table} \alias{arrow__Table} -\alias{arrow::Table} +\alias{Table} \title{class arrow::Table} \description{ class arrow::Table diff --git a/r/man/arrow___MemoryPool.Rd b/r/man/arrow___MemoryPool.Rd index 9189e8be4a3..0189a98c13c 100644 --- a/r/man/arrow___MemoryPool.Rd +++ b/r/man/arrow___MemoryPool.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__MemoryPool} \alias{arrow__MemoryPool} -\alias{arrow::MemoryPool} +\alias{MemoryPool} \title{class arrow::MemoryPool} \description{ class arrow::MemoryPool diff --git a/r/man/arrow__ipc__RecordBatchFileReader.Rd b/r/man/arrow__ipc__RecordBatchFileReader.Rd index 56eea2645d2..7dc2b48da6b 100644 --- a/r/man/arrow__ipc__RecordBatchFileReader.Rd +++ b/r/man/arrow__ipc__RecordBatchFileReader.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__ipc__RecordBatchFileReader} \alias{arrow__ipc__RecordBatchFileReader} -\alias{arrow::RecordBatchFileReader} +\alias{RecordBatchFileReader} \title{class arrow::RecordBatchFileReader} \description{ class arrow::RecordBatchFileReader diff --git a/r/man/arrow__ipc__RecordBatchFileWriter.Rd b/r/man/arrow__ipc__RecordBatchFileWriter.Rd index 162a8081f25..6f0f5e70f29 100644 --- a/r/man/arrow__ipc__RecordBatchFileWriter.Rd +++ b/r/man/arrow__ipc__RecordBatchFileWriter.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__ipc__RecordBatchFileWriter} \alias{arrow__ipc__RecordBatchFileWriter} -\alias{arrow::RecordBatchFileWriter} +\alias{RecordBatchFileWriter} \title{class arrow::RecordBatchFileWriter Writer for the Arrow binary file format} @@ -13,7 +13,7 @@ class arrow::RecordBatchFileWriter Writer for the Arrow binary file format } \section{usage}{ -\preformatted{writer <- RecordBatchFileWriter(sink, schema) +\preformatted{writer <- RecordBatchFileWriter$create(sink, schema) writer$write_batch(batch) writer$write_table(table) diff --git a/r/man/arrow__ipc__RecordBatchStreamReader.Rd b/r/man/arrow__ipc__RecordBatchStreamReader.Rd index 397198aeb31..feb24ea9add 100644 --- a/r/man/arrow__ipc__RecordBatchStreamReader.Rd +++ b/r/man/arrow__ipc__RecordBatchStreamReader.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__ipc__RecordBatchStreamReader} \alias{arrow__ipc__RecordBatchStreamReader} -\alias{arrow::RecordBatchStreamReader} +\alias{RecordBatchStreamReader} \title{class arrow::RecordBatchStreamReader} \description{ class arrow::RecordBatchStreamReader diff --git a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd index 79055c7a35a..12308e4688e 100644 --- a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd +++ b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__ipc__RecordBatchStreamWriter} \alias{arrow__ipc__RecordBatchStreamWriter} -\alias{arrow::RecordBatchStreamWriter} +\alias{RecordBatchStreamWriter} \title{class arrow::RecordBatchStreamWriter Writer for the Arrow streaming binary format} @@ -13,7 +13,7 @@ class arrow::RecordBatchStreamWriter Writer for the Arrow streaming binary format } \section{usage}{ -\preformatted{writer <- RecordBatchStreamWriter(sink, schema) +\preformatted{writer <- RecordBatchStreamWriter$create(sink, schema) writer$write_batch(batch) writer$write_table(table) diff --git a/r/man/arrow__ipc__RecordBatchWriter.Rd b/r/man/arrow__ipc__RecordBatchWriter.Rd index fb51cc5bae0..de83e9b8488 100644 --- a/r/man/arrow__ipc__RecordBatchWriter.Rd +++ b/r/man/arrow__ipc__RecordBatchWriter.Rd @@ -3,7 +3,7 @@ \docType{class} \name{arrow__ipc__RecordBatchWriter} \alias{arrow__ipc__RecordBatchWriter} -\alias{arrow::RecordBatchWriter} +\alias{RecordBatchWriter} \title{class arrow::RecordBatchWriter} \description{ class arrow::RecordBatchWriter diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index a0355cdb1fa..45219f22641 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -98,7 +98,7 @@ take any of those four values.} \item{...}{For \code{struct()}, a named list of types to define the struct columns} } \value{ -An Arrow type object inheriting from \code{arrow::DataType}. +An Arrow type object inheriting from DataType. } \description{ These functions create type objects corresponding to Arrow types. Use them diff --git a/r/man/enums.Rd b/r/man/enums.Rd index c55170e1ec0..f7f76c3c035 100644 --- a/r/man/enums.Rd +++ b/r/man/enums.Rd @@ -11,7 +11,7 @@ \alias{MessageType} \alias{CompressionType} \title{Arrow enums} -\format{An object of class \code{arrow::TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.} +\format{An object of class \code{TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.} \usage{ TimeUnit diff --git a/r/man/field.Rd b/r/man/field.Rd index 8cf260a08f1..8fa371736eb 100644 --- a/r/man/field.Rd +++ b/r/man/field.Rd @@ -2,19 +2,19 @@ % Please edit documentation in R/Field.R \name{field} \alias{field} -\title{Factory for a \code{arrow::Field}} +\title{Factory for a Field} \usage{ field(name, type, metadata) } \arguments{ \item{name}{field name} -\item{type}{logical type, instance of \code{arrow::DataType}} +\item{type}{logical type, instance of DataType} \item{metadata}{currently ignored} } \description{ -Factory for a \code{arrow::Field} +Factory for a Field } \examples{ \donttest{ diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd index 4af5e9a027e..7e4ef997d93 100644 --- a/r/man/read_delim_arrow.Rd +++ b/r/man/read_delim_arrow.Rd @@ -74,7 +74,7 @@ parsing options provided in other arguments (e.g. \code{delim}, \code{quote}, et \link[=arrow__Table]{arrow::Table}?} } \value{ -A \code{data.frame}, or an \code{arrow::Table} if \code{as_tibble = FALSE}. +A \code{data.frame}, or an Table if \code{as_tibble = FALSE}. } \description{ These functions uses the Arrow C++ CSV reader to read into a \code{data.frame}. diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index 3662ae770b4..48ff908721e 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -7,7 +7,8 @@ read_feather(file, col_select = NULL, as_tibble = TRUE, ...) } \arguments{ -\item{file}{an FeatherTableReader or whatever the \code{\link[=FeatherTableReader]{FeatherTableReader()}} function can handle} +\item{file}{A character file path, a raw vector, or \code{InputStream}, passed to +\code{FeatherTableReader$create()}.} \item{col_select}{A character vector of column names to keep, as in the "select" argument to \code{data.table::fread()}, or a diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd index 4c2f66869ca..06bc09cf431 100644 --- a/r/man/read_json_arrow.Rd +++ b/r/man/read_json_arrow.Rd @@ -20,7 +20,7 @@ of columns, as used in \code{dplyr::select()}.} \item{...}{Additional options, passed to \code{json_table_reader()}} } \value{ -A \code{data.frame}, or an \code{arrow::Table} if \code{as_tibble = FALSE}. +A \code{data.frame}, or an Table if \code{as_tibble = FALSE}. } \description{ Using \link[=arrow__json__TableReader]{JsonTableReader} diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd index 66dfa678d37..24636a09cb0 100644 --- a/r/man/write_feather.Rd +++ b/r/man/write_feather.Rd @@ -7,7 +7,7 @@ write_feather(data, stream) } \arguments{ -\item{data}{\code{data.frame} or \code{arrow::RecordBatch}} +\item{data}{\code{data.frame} or RecordBatch} \item{stream}{A file path or an OutputStream} } diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index f625c89c9e7..19ba471787f 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -116,12 +116,12 @@ std::shared_ptr Time64__initialize(arrow::TimeUnit::type unit) // [[arrow::export]] SEXP list__(SEXP x) { - if (Rf_inherits(x, "arrow::Field")) { + if (Rf_inherits(x, "Field")) { Rcpp::ConstReferenceSmartPtrInputParameter> field(x); return wrap(arrow::list(field)); } - if (Rf_inherits(x, "arrow::DataType")) { + if (Rf_inherits(x, "DataType")) { Rcpp::ConstReferenceSmartPtrInputParameter> type(x); return wrap(arrow::list(type)); } diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index 11d3d6ab163..706851a4d11 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -234,7 +234,7 @@ std::shared_ptr RecordBatch__from_arrays__known_schema( // [[arrow::export]] std::shared_ptr RecordBatch__from_arrays(SEXP schema_sxp, SEXP lst) { - if (Rf_inherits(schema_sxp, "arrow::Schema")) { + if (Rf_inherits(schema_sxp, "Schema")) { return RecordBatch__from_arrays__known_schema( arrow::r::extract(schema_sxp), lst); } diff --git a/r/src/table.cpp b/r/src/table.cpp index e8d08983f58..cc1e81df4eb 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -100,7 +100,7 @@ std::shared_ptr Table__select(const std::shared_ptr& bool all_record_batches(SEXP lst) { R_xlen_t n = XLENGTH(lst); for (R_xlen_t i = 0; i < n; i++) { - if (!Rf_inherits(VECTOR_ELT(lst, i), "arrow::RecordBatch")) return false; + if (!Rf_inherits(VECTOR_ELT(lst, i), "RecordBatch")) return false; } return true; } @@ -114,7 +114,7 @@ std::shared_ptr Table__from_dots(SEXP lst, SEXP schema_sxp) { auto batches = arrow::r::List_to_shared_ptr_vector(lst); std::shared_ptr tab; - if (Rf_inherits(schema_sxp, "arrow::Schema")) { + if (Rf_inherits(schema_sxp, "Schema")) { auto schema = arrow::r::extract(schema_sxp); STOP_IF_NOT_OK(arrow::Table::FromRecordBatches(schema, batches, &tab)); } else { @@ -166,7 +166,7 @@ std::shared_ptr Table__from_dots(SEXP lst, SEXP schema_sxp) { } schema = std::make_shared(std::move(fields)); - } else if (Rf_inherits(schema_sxp, "arrow::Schema")) { + } else if (Rf_inherits(schema_sxp, "Schema")) { // use the schema that is given schema = arrow::r::extract(schema_sxp); diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index b0bec91b2c8..a50dbbbe9ca 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Array") +context("Array") test_that("Array", { x <- Array$create(c(1:10, 1:10, 1:5)) diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index d8ba8da0b59..e9eff464818 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::RecordBatch") +context("RecordBatch") test_that("RecordBatch", { tbl <- tibble::tibble( diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index e262000d35e..f675c7aa688 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Table") +context("Table") test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { tbl <- tibble::tibble( @@ -34,21 +34,21 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { tab2 <- read_table(fs::path_abs(tf)) readable_file <- ReadableFile$create(tf) - file_reader1 <- RecordBatchFileReader(readable_file) + file_reader1 <- RecordBatchFileReader$create(readable_file) tab3 <- read_table(file_reader1) readable_file$close() mmap_file <- mmap_open(tf) - file_reader2 <- RecordBatchFileReader(mmap_file) + file_reader2 <- RecordBatchFileReader$create(mmap_file) tab4 <- read_table(file_reader2) mmap_file$close() tab5 <- read_table(bytes) - stream_reader <- RecordBatchStreamReader(bytes) + stream_reader <- RecordBatchStreamReader$create(bytes) tab6 <- read_table(stream_reader) - file_reader <- RecordBatchFileReader(tf) + file_reader <- RecordBatchFileReader$create(tf) tab7 <- read_table(file_reader) expect_equal(tab, tab1) diff --git a/r/tests/testthat/test-arraydata.R b/r/tests/testthat/test-arraydata.R index e05a8306005..78904823d89 100644 --- a/r/tests/testthat/test-arraydata.R +++ b/r/tests/testthat/test-arraydata.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::ArrayData") +context("ArrayData") test_that("string vectors with only empty strings and nulls don't allocate a data buffer (ARROW-3693)", { a <- Array$create("") diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index b16da2cca1b..fd8bef1fc79 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::DataType") +context("DataType") test_that("null type works as expected",{ x <- null() diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index f5e48820440..0ab20bff546 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -95,7 +95,7 @@ test_that("feather handles col_select = ", { test_that("feather read/write round trip", { tab1 <- read_feather(feather_file, as_tibble = FALSE) - expect_is(tab1, "arrow::Table") + expect_is(tab1, "Table") expect_equal(tib, as.data.frame(tab1)) }) diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R index 5d63a7f45fe..d7de087d12f 100644 --- a/r/tests/testthat/test-field.R +++ b/r/tests/testthat/test-field.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Field") +context("Field") test_that("field() factory", { x <- field("x", int32()) diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index cfa78232d68..94ca28a2265 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -17,7 +17,7 @@ context("read-write") -test_that("arrow::table round trip", { +test_that("table round trip", { tbl <- tibble::tibble( int = 1:10, dbl = as.numeric(1:10), @@ -76,7 +76,7 @@ test_that("arrow::table round trip", { unlink(tf) }) -test_that("arrow::table round trip handles NA in integer and numeric", { +test_that("table round trip handles NA in integer and numeric", { tbl <- tibble::tibble( int = c(NA, 2:10), dbl = as.numeric(c(1:5, NA, 7:9, NA)), diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read_record_batch.R index de0c6944d6c..f64625d9e17 100644 --- a/r/tests/testthat/test-read_record_batch.R +++ b/r/tests/testthat/test-read_record_batch.R @@ -26,16 +26,16 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { ) tf <- tempfile() - writer <- RecordBatchFileWriter(tf, tab$schema) - expect_is(writer, "arrow::RecordBatchFileWriter") + writer <- RecordBatchFileWriter$create(tf, tab$schema) + expect_is(writer, "RecordBatchFileWriter") writer$write_table(tab) writer$close() tab2 <- read_table(tf) expect_equal(tab, tab2) stream <- FileOutputStream$create(tf) - writer <- RecordBatchFileWriter(stream, tab$schema) - expect_is(writer, "arrow::RecordBatchFileWriter") + writer <- RecordBatchFileWriter$create(stream, tab$schema) + expect_is(writer, "RecordBatchFileWriter") writer$write_table(tab) writer$close() tab3 <- read_table(tf) diff --git a/r/tests/testthat/test-recordbatchreader.R b/r/tests/testthat/test-recordbatchreader.R index 5d74a7846ed..67aca97602e 100644 --- a/r/tests/testthat/test-recordbatchreader.R +++ b/r/tests/testthat/test-recordbatchreader.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::RecordBatch.*(Reader|Writer)") +context("RecordBatch.*(Reader|Writer)") test_that("RecordBatchStreamReader / Writer", { batch <- record_batch( @@ -24,19 +24,19 @@ test_that("RecordBatchStreamReader / Writer", { ) sink <- BufferOutputStream$create() - writer <- RecordBatchStreamWriter(sink, batch$schema) - expect_is(writer, "arrow::RecordBatchStreamWriter") + writer <- RecordBatchStreamWriter$create(sink, batch$schema) + expect_is(writer, "RecordBatchStreamWriter") writer$write_batch(batch) writer$close() buf <- sink$getvalue() expect_is(buf, "Buffer") - reader <- RecordBatchStreamReader(buf) - expect_is(reader, "arrow::RecordBatchStreamReader") + reader <- RecordBatchStreamReader$create(buf) + expect_is(reader, "RecordBatchStreamReader") batch1 <- reader$read_next_batch() - expect_is(batch1, "arrow::RecordBatch") + expect_is(batch1, "RecordBatch") expect_equal(batch, batch1) expect_null(reader$read_next_batch()) @@ -49,19 +49,19 @@ test_that("RecordBatchFileReader / Writer", { ) sink <- BufferOutputStream$create() - writer <- RecordBatchFileWriter(sink, batch$schema) - expect_is(writer, "arrow::RecordBatchFileWriter") + writer <- RecordBatchFileWriter$create(sink, batch$schema) + expect_is(writer, "RecordBatchFileWriter") writer$write_batch(batch) writer$close() buf <- sink$getvalue() expect_is(buf, "Buffer") - reader <- RecordBatchFileReader(buf) - expect_is(reader, "arrow::RecordBatchFileReader") + reader <- RecordBatchFileReader$create(buf) + expect_is(reader, "RecordBatchFileReader") batch1 <- reader$get_batch(0L) - expect_is(batch1, "arrow::RecordBatch") + expect_is(batch1, "RecordBatch") expect_equal(batch, batch1) expect_equal(reader$num_record_batches, 1L) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 6656da2cddc..f50adc22a42 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Schema") +context("Schema") test_that("Alternate type names are supported", { expect_equal( @@ -28,11 +28,11 @@ test_that("reading schema from Buffer", { # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter # maybe there is an easier way to serialize a schema batch <- record_batch(x = 1:10) - expect_is(batch, "arrow::RecordBatch") + expect_is(batch, "RecordBatch") stream <- BufferOutputStream$create() - writer <- RecordBatchStreamWriter(stream, batch$schema) - expect_is(writer, "arrow::RecordBatchStreamWriter") + writer <- RecordBatchStreamWriter$create(stream, batch$schema) + expect_is(writer, "RecordBatchStreamWriter") writer$close() buffer <- stream$getvalue() From b6945114ffa245ec50b8b003b41870981476dfa9 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 14:46:02 -0700 Subject: [PATCH 18/37] Remove defunct Column class --- r/DESCRIPTION | 3 +-- r/R/Column.R | 42 ------------------------------------------ r/man/arrow__Column.Rd | 17 ----------------- 3 files changed, 1 insertion(+), 61 deletions(-) delete mode 100644 r/R/Column.R delete mode 100644 r/man/arrow__Column.Rd diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 4849c4991ed..e31f2c9bd36 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -48,9 +48,8 @@ Suggests: Collate: 'enums.R' 'arrow-package.R' - 'type.R' - 'Column.R' 'Field.R' + 'type.R' 'List.R' 'RecordBatch.R' 'RecordBatchReader.R' diff --git a/r/R/Column.R b/r/R/Column.R deleted file mode 100644 index f6c49945eec..00000000000 --- a/r/R/Column.R +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#' @include type.R - -#' @title class arrow::Column -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__Column -#' @name arrow__Column -Column <- R6Class("Column", inherit = Object, - public = list( - length = function() Column__length(self), - data = function() shared_ptr(ChunkedArray, Column__data(self)) - ), - - active = list( - null_count = function() Column__null_count(self), - type = function() DataType$dispatch(Column__type(self)) - ) -) diff --git a/r/man/arrow__Column.Rd b/r/man/arrow__Column.Rd deleted file mode 100644 index 73fd7075076..00000000000 --- a/r/man/arrow__Column.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Column.R -\docType{class} -\name{arrow__Column} -\alias{arrow__Column} -\alias{Column} -\title{class arrow::Column} -\description{ -class arrow::Column -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} From 2d1b73875f4cb531b1d38bb2ed28db7ccd837d9a Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 15:12:29 -0700 Subject: [PATCH 19/37] Replace table() with Table() --- r/NAMESPACE | 4 +- r/R/Schema.R | 10 +- r/R/Table.R | 18 +-- r/R/buffer.R | 1 + r/R/chunked-array.R | 1 + r/R/type.R | 138 ++++++---------------- r/R/write_arrow.R | 4 +- r/README.Rmd | 2 +- r/README.md | 2 +- r/man/arrow__Table.Rd | 10 ++ r/man/table.Rd | 19 --- r/tests/testthat/test-Table.R | 20 ++-- r/tests/testthat/test-buffer.R | 2 +- r/tests/testthat/test-csv.R | 4 +- r/tests/testthat/test-read-write.R | 4 +- r/tests/testthat/test-read_record_batch.R | 2 +- 16 files changed, 86 insertions(+), 155 deletions(-) delete mode 100644 r/man/table.Rd diff --git a/r/NAMESPACE b/r/NAMESPACE index a7058304f20..de29b9c34b9 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -41,12 +41,15 @@ S3method(write_arrow,RecordBatchWriter) S3method(write_arrow,character) S3method(write_arrow,raw) export(Array) +export(Buffer) +export(ChunkedArray) export(CompressionType) export(DateUnit) export(FileMode) export(MessageReader) export(MessageType) export(StatusCode) +export(Table) export(TimeUnit) export(Type) export(arrow_available) @@ -110,7 +113,6 @@ export(schema) export(starts_with) export(string) export(struct) -export(table) export(time32) export(time64) export(timestamp) diff --git a/r/R/Schema.R b/r/R/Schema.R index 57e585daec1..4a7041cce12 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -55,10 +55,10 @@ Schema <- R6Class("Schema", ) ) +Schema$create <- function(...) shared_ptr(Schema, schema_(.fields(list2(...)))) + #' @export -`==.Schema` <- function(lhs, rhs){ - lhs$Equals(rhs) -} +`==.Schema` <- function(lhs, rhs) lhs$Equals(rhs) #' Create a schema #' @@ -73,9 +73,7 @@ Schema <- R6Class("Schema", #' #' @export # TODO (npr): add examples once ARROW-5505 merges -schema <- function(...){ - shared_ptr(Schema, schema_(.fields(list2(...)))) -} +schema <- Schema$create #' read a Schema from a stream #' diff --git a/r/R/Table.R b/r/R/Table.R index fc8a6418c80..33fcfa12f6e 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -23,12 +23,20 @@ #' @format NULL #' @docType class #' +#' @section Factory: +#' +#' The `Table$create()` function takes the following arguments: +#' +#' * `...`` arrays, chunked arrays, or R vectors +#' * `schema` a schema. The default (`NULL`) infers the schema from the `...` +#' #' @section Methods: #' #' TODO #' #' @rdname arrow__Table #' @name arrow__Table +#' @export Table <- R6Class("Table", inherit = Object, public = list( column = function(i) shared_ptr(ChunkedArray, Table__column(self, i)), @@ -65,15 +73,7 @@ Table <- R6Class("Table", inherit = Object, ) ) -#' Create an arrow::Table from a data frame -#' -#' @param ... arrays, chunked arrays, or R vectors -#' @param schema a schema. The default (`NULL`) infers the schema from the `...` -#' -#' @return an arrow::Table -#' -#' @export -table <- function(..., schema = NULL){ +Table$create <- function(..., schema = NULL){ dots <- list2(...) # making sure there are always names if (is.null(names(dots))) { diff --git a/r/R/buffer.R b/r/R/buffer.R index f616f635fe4..d4a5df616b7 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -33,6 +33,7 @@ #' #' @rdname buffer #' @name buffer +#' @export Buffer <- R6Class("Buffer", inherit = Object, public = list( ZeroPadding = function() Buffer__ZeroPadding(self), diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index f9e2034b377..4fa60805f98 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -29,6 +29,7 @@ #' #' @rdname chunked-array #' @name chunked-array +#' @export ChunkedArray <- R6Class("ChunkedArray", inherit = Object, public = list( length = function() ChunkedArray__length(self), diff --git a/r/R/type.R b/r/R/type.R index 7352e14e048..a379f81fcaa 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -51,7 +51,7 @@ DataType <- R6Class("DataType", map(DataType__children_pointer(self), shared_ptr, class = Field) }, - ..dispatch = function(){ + ..dispatch = function() { switch(names(Type)[self$id + 1], "NA" = null(), BOOL = boolean(), @@ -70,9 +70,9 @@ DataType <- R6Class("DataType", BINARY = stop("Type BINARY not implemented yet"), DATE32 = date32(), DATE64 = date64(), - TIMESTAMP = shared_ptr(Timestamp,self$pointer()), - TIME32 = shared_ptr(Time32,self$pointer()), - TIME64 = shared_ptr(Time64,self$pointer()), + TIMESTAMP = shared_ptr(Timestamp, self$pointer()), + TIME32 = shared_ptr(Time32, self$pointer()), + TIME64 = shared_ptr(Time64, self$pointer()), INTERVAL = stop("Type INTERVAL not implemented yet"), DECIMAL = shared_ptr(Decimal128Type, self$pointer()), LIST = shared_ptr(ListType, self$pointer()), @@ -85,18 +85,12 @@ DataType <- R6Class("DataType", ), active = list( - id = function(){ - DataType__id(self) - }, - name = function() { - DataType__name(self) - } + id = function() DataType__id(self), + name = function() DataType__name(self) ) ) -DataType$dispatch <- function(xp){ - shared_ptr(DataType, xp)$..dispatch() -} +DataType$dispatch <- function(xp) shared_ptr(DataType, xp)$..dispatch() #' infer the arrow Array type from an R vector #' @@ -104,14 +98,10 @@ DataType$dispatch <- function(xp){ #' #' @return an arrow logical type #' @export -type <- function(x) { - UseMethod("type") -} +type <- function(x) UseMethod("type") #' @export -type.default <- function(x) { - DataType$dispatch(Array__infer_type(x)) -} +type.default <- function(x) DataType$dispatch(Array__infer_type(x)) #' @export type.Array <- function(x) x$type @@ -145,60 +135,21 @@ FixedWidthType <- R6Class("FixedWidthType", ) #' @export -`==.DataType` <- function(lhs, rhs){ - lhs$Equals(rhs) -} - -"Int8" <- R6Class("Int8", - inherit = FixedWidthType -) - -"Int16" <- R6Class("Int16", - inherit = FixedWidthType -) - -"Int32" <- R6Class("Int32", - inherit = FixedWidthType -) - -"Int64" <- R6Class("Int64", - inherit = FixedWidthType -) - - -"UInt8" <- R6Class("UInt8", - inherit = FixedWidthType -) - -"UInt16" <- R6Class("UInt16", - inherit = FixedWidthType -) - -"UInt32" <- R6Class("UInt32", - inherit = FixedWidthType -) - -"UInt64" <- R6Class("UInt64", - inherit = FixedWidthType -) - -"Float16" <- R6Class("Float16", - inherit = FixedWidthType -) -"Float32" <- R6Class("Float32", - inherit = FixedWidthType -) -"Float64" <- R6Class("Float64", - inherit = FixedWidthType -) - -"Boolean" <- R6Class("Boolean", - inherit = FixedWidthType -) - -"Utf8" <- R6Class("Utf8", - inherit = DataType -) +`==.DataType` <- function(lhs, rhs) lhs$Equals(rhs) + +Int8 <- R6Class("Int8", inherit = FixedWidthType) +Int16 <- R6Class("Int16", inherit = FixedWidthType) +Int32 <- R6Class("Int32", inherit = FixedWidthType) +Int64 <- R6Class("Int64", inherit = FixedWidthType) +UInt8 <- R6Class("UInt8", inherit = FixedWidthType) +UInt16 <- R6Class("UInt16", inherit = FixedWidthType) +UInt32 <- R6Class("UInt32", inherit = FixedWidthType) +UInt64 <- R6Class("UInt64", inherit = FixedWidthType) +Float16 <- R6Class("Float16", inherit = FixedWidthType) +Float32 <- R6Class("Float32", inherit = FixedWidthType) +Float64 <- R6Class("Float64", inherit = FixedWidthType) +Boolean <- R6Class("Boolean", inherit = FixedWidthType) +Utf8 <- R6Class("Utf8", inherit = DataType) DateType <- R6Class("DateType", inherit = FixedWidthType, @@ -206,51 +157,38 @@ DateType <- R6Class("DateType", unit = function() DateType__unit(self) ) ) +Date32 <- R6Class("Date32", inherit = DateType) +Date64 <- R6Class("Date64", inherit = DateType) -"Date32" <- R6Class("Date32", - inherit = DateType -) -"Date64" <- R6Class("Date64", - inherit = DateType -) - -"TimeType" <- R6Class("TimeType", +TimeType <- R6Class("TimeType", inherit = FixedWidthType, public = list( unit = function() TimeType__unit(self) ) ) -"Time32" <- R6Class("Time32", - inherit = TimeType -) -"Time64" <- R6Class("Time64", - inherit = TimeType -) +Time32 <- R6Class("Time32", inherit = TimeType) +Time64 <- R6Class("Time64", inherit = TimeType) -"Null" <- R6Class("Null", - inherit = DataType -) +Null <- R6Class("Null", inherit = DataType) -Timestamp <- R6Class( - "Timestamp", - inherit = FixedWidthType , +Timestamp <- R6Class("Timestamp", + inherit = FixedWidthType, public = list( timezone = function() TimestampType__timezone(self), unit = function() TimestampType__unit(self) ) ) -DecimalType <- R6Class(":DecimalType", +DecimalType <- R6Class("DecimalType", inherit = FixedWidthType, public = list( precision = function() DecimalType__precision(self), scale = function() DecimalType__scale(self) ) ) +Decimal128Type <- R6Class("Decimal128Type", inherit = DecimalType) -"Decimal128Type" <- R6Class("Decimal128Type", - inherit = DecimalType -) +NestedType <- R6Class("NestedType", inherit = DataType) #' Apache Arrow data types #' @@ -446,6 +384,6 @@ timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone) { #' @rdname data-type #' @export -decimal <- function(precision, scale) shared_ptr(Decimal128Type, Decimal128Type__initialize(precision, scale)) - -NestedType <- R6Class("NestedType", inherit = DataType) +decimal <- function(precision, scale) { + shared_ptr(Decimal128Type, Decimal128Type__initialize(precision, scale)) +} diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index 0e3ab840931..7eca2dcd1fa 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -22,9 +22,9 @@ to_arrow <- function(x) { to_arrow.RecordBatch <- function(x) x to_arrow.Table <- function(x) x -# splice the data frame as arguments of table() +# splice the data frame as arguments of Table$create() # see ?rlang::list2() -`to_arrow.data.frame` <- function(x) table(!!!x) +`to_arrow.data.frame` <- function(x) Table$create(!!!x) #' Write Arrow formatted data #' diff --git a/r/README.Rmd b/r/README.Rmd index 0fad65925d8..1460f21be37 100644 --- a/r/README.Rmd +++ b/r/README.Rmd @@ -54,7 +54,7 @@ When installing from source, if the R and C++ library versions do not match, ins library(arrow) set.seed(24) -tab <- arrow::table(x = 1:10, y = rnorm(10)) +tab <- Table$create(x = 1:10, y = rnorm(10)) tab$schema tab as.data.frame(tab) diff --git a/r/README.md b/r/README.md index 8fc13331785..9733da91a95 100644 --- a/r/README.md +++ b/r/README.md @@ -69,7 +69,7 @@ Arrow C++ library first. library(arrow) set.seed(24) -tab <- arrow::table(x = 1:10, y = rnorm(10)) +tab <- Table$create(x = 1:10, y = rnorm(10)) tab$schema #> Schema #> x: int32 diff --git a/r/man/arrow__Table.Rd b/r/man/arrow__Table.Rd index 0800bdce7a5..cb70c7c6d06 100644 --- a/r/man/arrow__Table.Rd +++ b/r/man/arrow__Table.Rd @@ -8,6 +8,16 @@ \description{ class arrow::Table } +\section{Factory}{ + + +The \code{Table$create()} function takes the following arguments: +\itemize{ +\item `...`` arrays, chunked arrays, or R vectors +\item \code{schema} a schema. The default (\code{NULL}) infers the schema from the \code{...} +} +} + \section{Methods}{ diff --git a/r/man/table.Rd b/r/man/table.Rd deleted file mode 100644 index fbf9632a03a..00000000000 --- a/r/man/table.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Table.R -\name{table} -\alias{table} -\title{Create an arrow::Table from a data frame} -\usage{ -table(..., schema = NULL) -} -\arguments{ -\item{...}{arrays, chunked arrays, or R vectors} - -\item{schema}{a schema. The default (\code{NULL}) infers the schema from the \code{...}} -} -\value{ -an arrow::Table -} -\description{ -Create an arrow::Table from a data frame -} diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index f675c7aa688..674a4d09c75 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -23,7 +23,7 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), chr = letters[1:10] ) - tab <- arrow::table(!!!tbl) + tab <- Table$create(!!!tbl) tf <- tempfile() write_arrow(tab, tf) @@ -64,7 +64,7 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { }) test_that("Table cast (ARROW-3741)", { - tab <- table(x = 1:10, y = 1:10) + tab <- Table$create(x = 1:10, y = 1:10) expect_error(tab$cast(schema(x = int32()))) expect_error(tab$cast(schema(x = int32(), z = int32()))) @@ -77,14 +77,14 @@ test_that("Table cast (ARROW-3741)", { }) test_that("Table dim() and nrow() (ARROW-3816)", { - tab <- table(x = 1:10, y = 1:10) + tab <- Table$create(x = 1:10, y = 1:10) expect_equal(dim(tab), c(10L, 2L)) expect_equal(nrow(tab), 10L) }) test_that("table() handles record batches with splicing", { batch <- record_batch(x = 1:2, y = letters[1:2]) - tab <- table(batch, batch, batch) + tab <- Table$create(batch, batch, batch) expect_equal(tab$schema, batch$schema) expect_equal(tab$num_rows, 6L) expect_equivalent( @@ -93,7 +93,7 @@ test_that("table() handles record batches with splicing", { ) batches <- list(batch, batch, batch) - tab <- table(!!!batches) + tab <- Table$create(!!!batches) expect_equal(tab$schema, batch$schema) expect_equal(tab$num_rows, 6L) expect_equivalent( @@ -108,7 +108,7 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", { v <- rnorm(10) tbl <- tibble::tibble(x = 1:10, y = letters[1:10]) - tab <- table(a = a, b = ca, c = v, !!!tbl) + tab <- Table$create(a = a, b = ca, c = v, !!!tbl) expect_equal( tab$schema, schema(a = int32(), b = int32(), c = float64(), x = int32(), y = utf8()) @@ -123,15 +123,15 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", { test_that("table() auto splices (ARROW-5718)", { df <- tibble::tibble(x = 1:10, y = letters[1:10]) - tab1 <- table(df) - tab2 <- table(!!!df) + tab1 <- Table$create(df) + tab2 <- Table$create(!!!df) expect_equal(tab1, tab2) expect_equal(tab1$schema, schema(x = int32(), y = utf8())) expect_equivalent(as.data.frame(tab1), df) s <- schema(x = float64(), y = utf8()) - tab3 <- table(df, schema = s) - tab4 <- table(!!!df, schema = s) + tab3 <- Table$create(df, schema = s) + tab4 <- Table$create(!!!df, schema = s) expect_equal(tab3, tab4) expect_equal(tab3$schema, s) expect_equivalent(as.data.frame(tab3), df) diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index 19184604e1c..d8baa5c5bee 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -67,7 +67,7 @@ test_that("can read remaining bytes of a RandomAccessFile", { lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), chr = letters[1:10] ) - tab <- arrow::table(!!!tbl) + tab <- Table$create(!!!tbl) tf <- tempfile() all_bytes <- write_arrow(tab, tf) diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R index fc392675637..24354a14186 100644 --- a/r/tests/testthat/test-csv.R +++ b/r/tests/testthat/test-csv.R @@ -28,7 +28,7 @@ test_that("Can read csv file", { tab3 <- read_csv_arrow(ReadableFile$create(tf), as_tibble = FALSE) iris$Species <- as.character(iris$Species) - tab0 <- table(!!!iris) + tab0 <- Table$create(!!!iris) expect_equal(tab0, tab1) expect_equal(tab0, tab2) expect_equal(tab0, tab3) @@ -171,7 +171,7 @@ test_that("read_csv_arrow() respects col_select", { write.csv(iris, tf, row.names = FALSE, quote = FALSE) tab <- read_csv_arrow(tf, col_select = starts_with("Sepal"), as_tibble = FALSE) - expect_equal(tab, table(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width)) + expect_equal(tab, Table$create(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width)) tib <- read_csv_arrow(tf, col_select = starts_with("Sepal"), as_tibble = TRUE) expect_equal(tib, tibble::tibble(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width)) diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index 94ca28a2265..ec56d6a783b 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -24,7 +24,7 @@ test_that("table round trip", { raw = as.raw(1:10) ) - tab <- arrow::table(!!!tbl) + tab <- Table$create(!!!tbl) expect_equal(tab$num_columns, 3L) expect_equal(tab$num_rows, 10L) @@ -83,7 +83,7 @@ test_that("table round trip handles NA in integer and numeric", { raw = as.raw(1:10) ) - tab <- arrow::table(!!!tbl) + tab <- Table$create(!!!tbl) expect_equal(tab$num_columns, 3L) expect_equal(tab$num_rows, 10L) diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read_record_batch.R index f64625d9e17..222be775db1 100644 --- a/r/tests/testthat/test-read_record_batch.R +++ b/r/tests/testthat/test-read_record_batch.R @@ -18,7 +18,7 @@ context("read_record_batch()") test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { - tab <- table( + tab <- Table$create( int = 1:10, dbl = as.numeric(1:10), lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), From 71cac57aaec14ac4eaf10636e6bfd03d1a10f0c4 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 16:07:55 -0700 Subject: [PATCH 20/37] Clean up Rd file names, experiment with documenting constructors, and start updating pkgdown --- r/R/Field.R | 20 ++---- r/R/RecordBatch.R | 8 +-- r/R/RecordBatchReader.R | 12 ++-- r/R/RecordBatchWriter.R | 20 +++--- r/R/Schema.R | 21 +++--- r/R/Table.R | 4 +- r/R/csv.R | 2 +- r/R/dictionary.R | 6 +- r/R/feather.R | 2 +- r/R/io.R | 46 ++++++------ r/R/json.R | 8 +-- r/R/memory_pool.R | 8 +-- r/R/message.R | 8 +-- r/R/parquet.R | 4 +- r/R/read_record_batch.R | 8 +-- r/R/read_table.R | 20 +++--- r/R/type.R | 8 +-- r/R/write_arrow.R | 12 ++-- r/_pkgdown.yml | 72 +++++++------------ ...rOutputStream.Rd => BufferOutputStream.Rd} | 3 +- ...w__io__BufferReader.Rd => BufferReader.Rd} | 3 +- r/man/{arrow__DataType.Rd => DataType.Rd} | 3 +- ...w__DictionaryType.Rd => DictionaryType.Rd} | 3 +- ...ileOutputStream.Rd => FileOutputStream.Rd} | 3 +- ...fferWriter.Rd => FixedSizeBufferWriter.Rd} | 3 +- ...w__FixedWidthType.Rd => FixedWidthType.Rd} | 3 +- ...row__io__InputStream.Rd => InputStream.Rd} | 3 +- ...son__TableReader.Rd => JsonTableReader.Rd} | 5 +- ...emoryMappedFile.Rd => MemoryMappedFile.Rd} | 3 +- .../{arrow___MemoryPool.Rd => MemoryPool.Rd} | 3 +- r/man/{arrow__ipc__Message.Rd => Message.Rd} | 3 +- ...ipc__MessageReader.Rd => MessageReader.Rd} | 3 +- ...ockOutputStream.Rd => MockOutputStream.Rd} | 3 +- ...w__io__OutputStream.Rd => OutputStream.Rd} | 3 +- ...andomAccessFile.Rd => RandomAccessFile.Rd} | 3 +- r/man/{arrow__io__Readable.Rd => Readable.Rd} | 3 +- ...w__io__ReadableFile.Rd => ReadableFile.Rd} | 3 +- .../{arrow__RecordBatch.Rd => RecordBatch.Rd} | 3 +- ...FileReader.Rd => RecordBatchFileReader.Rd} | 3 +- ...FileWriter.Rd => RecordBatchFileWriter.Rd} | 5 +- ...ordBatchReader.Rd => RecordBatchReader.Rd} | 3 +- ...amReader.Rd => RecordBatchStreamReader.Rd} | 3 +- ...amWriter.Rd => RecordBatchStreamWriter.Rd} | 5 +- ...ordBatchWriter.Rd => RecordBatchWriter.Rd} | 7 +- r/man/{arrow__Table.Rd => Table.Rd} | 3 +- r/man/arrow__Field.Rd | 17 ----- r/man/arrow__Schema.Rd | 29 -------- r/man/default_memory_pool.Rd | 6 +- r/man/dictionary.Rd | 2 +- r/man/field.Rd | 26 +++++-- r/man/mmap_create.Rd | 2 +- r/man/read_delim_arrow.Rd | 2 +- r/man/read_feather.Rd | 4 +- r/man/read_json_arrow.Rd | 4 +- r/man/read_parquet.Rd | 4 +- r/man/read_record_batch.Rd | 10 +-- r/man/read_table.Rd | 22 +++--- r/man/record_batch.Rd | 6 +- r/man/schema.Rd | 30 ++++++-- r/man/write_arrow.Rd | 12 ++-- r/man/write_parquet.Rd | 2 +- 61 files changed, 242 insertions(+), 313 deletions(-) rename r/man/{arrow__io__BufferOutputStream.Rd => BufferOutputStream.Rd} (81%) rename r/man/{arrow__io__BufferReader.Rd => BufferReader.Rd} (80%) rename r/man/{arrow__DataType.Rd => DataType.Rd} (83%) rename r/man/{arrow__DictionaryType.Rd => DictionaryType.Rd} (80%) rename r/man/{arrow__io__FileOutputStream.Rd => FileOutputStream.Rd} (78%) rename r/man/{arrow__io__FixedSizeBufferWriter.Rd => FixedSizeBufferWriter.Rd} (77%) rename r/man/{arrow__FixedWidthType.Rd => FixedWidthType.Rd} (81%) rename r/man/{arrow__io__InputStream.Rd => InputStream.Rd} (80%) rename r/man/{arrow__json__TableReader.Rd => JsonTableReader.Rd} (62%) rename r/man/{arrow__io__MemoryMappedFile.Rd => MemoryMappedFile.Rd} (83%) rename r/man/{arrow___MemoryPool.Rd => MemoryPool.Rd} (82%) rename r/man/{arrow__ipc__Message.Rd => Message.Rd} (80%) rename r/man/{arrow__ipc__MessageReader.Rd => MessageReader.Rd} (78%) rename r/man/{arrow__io__MockOutputStream.Rd => MockOutputStream.Rd} (78%) rename r/man/{arrow__io__OutputStream.Rd => OutputStream.Rd} (84%) rename r/man/{arrow__io__RandomAccessFile.Rd => RandomAccessFile.Rd} (78%) rename r/man/{arrow__io__Readable.Rd => Readable.Rd} (81%) rename r/man/{arrow__io__ReadableFile.Rd => ReadableFile.Rd} (80%) rename r/man/{arrow__RecordBatch.Rd => RecordBatch.Rd} (82%) rename r/man/{arrow__ipc__RecordBatchFileReader.Rd => RecordBatchFileReader.Rd} (77%) rename r/man/{arrow__ipc__RecordBatchFileWriter.Rd => RecordBatchFileWriter.Rd} (83%) rename r/man/{arrow__RecordBatchReader.Rd => RecordBatchReader.Rd} (80%) rename r/man/{arrow__ipc__RecordBatchStreamReader.Rd => RecordBatchStreamReader.Rd} (77%) rename r/man/{arrow__ipc__RecordBatchStreamWriter.Rd => RecordBatchStreamWriter.Rd} (83%) rename r/man/{arrow__ipc__RecordBatchWriter.Rd => RecordBatchWriter.Rd} (60%) rename r/man/{arrow__Table.Rd => Table.Rd} (92%) delete mode 100644 r/man/arrow__Field.Rd delete mode 100644 r/man/arrow__Schema.Rd diff --git a/r/R/Field.R b/r/R/Field.R index 3f5ff1f9cfe..77c59f794f5 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -16,19 +16,16 @@ # under the License. #' @include arrow-package.R - #' @title class arrow::Field -#' -#' @usage NULL -#' @format NULL #' @docType class #' #' @section Methods: #' -#' TODO +#' - `f$ToString()`: convert to a string +#' - `f$Equals(other)`: test for equality. More naturally called as `f == other` #' -#' @rdname arrow__Field -#' @name arrow__Field +#' @rdname Field +#' @name Field Field <- R6Class("Field", inherit = Object, public = list( ToString = function() { @@ -57,18 +54,15 @@ Field <- R6Class("Field", inherit = Object, lhs$Equals(rhs) } -#' Factory for a Field -#' #' @param name field name -#' @param type logical type, instance of DataType +#' @param type logical type, instance of [DataType] #' @param metadata currently ignored #' #' @examples #' \donttest{ -#' try({ -#' field("x", int32()) -#' }) +#' field("x", int32()) #' } +#' @rdname Field #' @export field <- function(name, type, metadata) { assert_that(inherits(name, "character"), length(name) == 1L) diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index 906b51fe8ae..b9f64dd834e 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -27,8 +27,8 @@ #' #' TODO #' -#' @rdname arrow__RecordBatch -#' @name arrow__RecordBatch +#' @rdname RecordBatch +#' @name RecordBatch RecordBatch <- R6Class("RecordBatch", inherit = Object, public = list( column = function(i) shared_ptr(Array, RecordBatch__column(self, i)), @@ -99,11 +99,11 @@ as.data.frame.RecordBatch <- function(x, row.names = NULL, optional = FALSE, use RecordBatch__to_dataframe(x, use_threads = option_use_threads()) } -#' Create an [arrow::RecordBatch][arrow__RecordBatch] from a data frame +#' Create an [arrow::RecordBatch][RecordBatch] from a data frame #' #' @param ... A variable number of Array #' @param schema a arrow::Schema #' -#' @return a [arrow::RecordBatch][arrow__RecordBatch] +#' @return a [arrow::RecordBatch][RecordBatch] #' @export record_batch <- RecordBatch$create diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 323ef1f1405..1527a49e1aa 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -27,8 +27,8 @@ #' #' TODO #' -#' @rdname arrow__RecordBatchReader -#' @name arrow__RecordBatchReader +#' @rdname RecordBatchReader +#' @name RecordBatchReader RecordBatchReader <- R6Class("RecordBatchReader", inherit = Object, public = list( read_next_batch = function() { @@ -50,8 +50,8 @@ RecordBatchReader <- R6Class("RecordBatchReader", inherit = Object, #' #' TODO #' -#' @rdname arrow__ipc__RecordBatchStreamReader -#' @name arrow__ipc__RecordBatchStreamReader +#' @rdname RecordBatchStreamReader +#' @name RecordBatchStreamReader RecordBatchStreamReader <- R6Class("RecordBatchStreamReader", inherit = RecordBatchReader, public = list( batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = RecordBatch) @@ -76,8 +76,8 @@ RecordBatchStreamReader$create <- function(stream){ #' #' TODO #' -#' @rdname arrow__ipc__RecordBatchFileReader -#' @name arrow__ipc__RecordBatchFileReader +#' @rdname RecordBatchFileReader +#' @name RecordBatchFileReader RecordBatchFileReader <- R6Class("RecordBatchFileReader", inherit = Object, # Why doesn't this inherit from RecordBatchReader? public = list( diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 8600cbd33b7..acf5588e9ee 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -31,11 +31,11 @@ #' #' @section Derived classes: #' -#' - [arrow::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] implements the streaming binary format -#' - [arrow::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] implements the binary file format +#' - [arrow::RecordBatchStreamWriter][RecordBatchStreamWriter] implements the streaming binary format +#' - [arrow::RecordBatchFileWriter][RecordBatchFileWriter] implements the binary file format #' -#' @rdname arrow__ipc__RecordBatchWriter -#' @name arrow__ipc__RecordBatchWriter +#' @rdname RecordBatchWriter +#' @name RecordBatchWriter RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = Object, public = list( write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), @@ -80,14 +80,14 @@ RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = Object, #' The [RecordBatchStreamWriter()] function creates a record batch stream writer. #' #' @section Methods: -#' inherited from [arrow::RecordBatchWriter][arrow__ipc__RecordBatchWriter] +#' inherited from [arrow::RecordBatchWriter][RecordBatchWriter] #' #' - `$write_batch(batch)`: Write record batch to stream #' - `$write_table(table)`: write Table to stream #' - `$close()`: close stream #' -#' @rdname arrow__ipc__RecordBatchStreamWriter -#' @name arrow__ipc__RecordBatchStreamWriter +#' @rdname RecordBatchStreamWriter +#' @name RecordBatchStreamWriter RecordBatchStreamWriter <- R6Class("RecordBatchStreamWriter", inherit = RecordBatchWriter) RecordBatchStreamWriter$create <- function(sink, schema) { @@ -123,14 +123,14 @@ RecordBatchStreamWriter$create <- function(sink, schema) { #' The [RecordBatchFileWriter()] function creates a record batch stream writer. #' #' @section Methods: -#' inherited from [arrow::RecordBatchWriter][arrow__ipc__RecordBatchWriter] +#' inherited from [arrow::RecordBatchWriter][RecordBatchWriter] #' #' - `$write_batch(batch)`: Write record batch to stream #' - `$write_table(table)`: write Table to stream #' - `$close()`: close stream #' -#' @rdname arrow__ipc__RecordBatchFileWriter -#' @name arrow__ipc__RecordBatchFileWriter +#' @rdname RecordBatchFileWriter +#' @name RecordBatchFileWriter RecordBatchFileWriter <- R6Class("RecordBatchFileWriter", inherit = RecordBatchStreamWriter) RecordBatchFileWriter$create <- function(sink, schema) { diff --git a/r/R/Schema.R b/r/R/Schema.R index 4a7041cce12..1016279770f 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -16,9 +16,13 @@ # under the License. #' @include arrow-package.R - #' @title class arrow::Schema #' +#' @description Create a `Schema` when you +#' want to convert an R `data.frame` to Arrow but don't want to rely on the +#' default mapping of R types to Arrow types, such as when you want to choose a +#' specific numeric precision. +#' #' @usage NULL #' @format NULL #' @docType class @@ -39,8 +43,8 @@ #' - `$num_fields()`: returns the number of fields #' - `$field(i)`: returns the field at index `i` (0-based) #' -#' @rdname arrow__Schema -#' @name arrow__Schema +#' @rdname Schema +#' @name Schema Schema <- R6Class("Schema", inherit = Object, public = list( @@ -60,18 +64,9 @@ Schema$create <- function(...) shared_ptr(Schema, schema_(.fields(list2(...)))) #' @export `==.Schema` <- function(lhs, rhs) lhs$Equals(rhs) -#' Create a schema -#' -#' This function lets you define a schema for a table. This is useful when you -#' want to convert an R `data.frame` to Arrow but don't want to rely on the -#' default mapping of R types to Arrow types, such as when you want to choose a -#' specific numeric precision. -#' #' @param ... named list of [data types][data-type] -#' -#' @return A [schema][arrow__Schema] object. -#' #' @export +#' @rdname Schema # TODO (npr): add examples once ARROW-5505 merges schema <- Schema$create diff --git a/r/R/Table.R b/r/R/Table.R index 33fcfa12f6e..91764994d4a 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -34,8 +34,8 @@ #' #' TODO #' -#' @rdname arrow__Table -#' @name arrow__Table +#' @rdname Table +#' @name Table #' @export Table <- R6Class("Table", inherit = Object, public = list( diff --git a/r/R/csv.R b/r/R/csv.R index 7b557ed0c17..7c249ae0dd6 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -64,7 +64,7 @@ #' @param convert_options see [csv_convert_options()] #' @param read_options see [csv_read_options()] #' @param as_tibble Should the function return a `data.frame` or an -#' [arrow::Table][arrow__Table]? +#' [arrow::Table][Table]? #' #' @return A `data.frame`, or an Table if `as_tibble = FALSE`. #' @export diff --git a/r/R/dictionary.R b/r/R/dictionary.R index ea2c676a4bf..ce4a1b2f366 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -27,8 +27,8 @@ #' #' TODO #' -#' @rdname arrow__DictionaryType -#' @name arrow__DictionaryType +#' @rdname DictionaryType +#' @name DictionaryType DictionaryType <- R6Class("DictionaryType", inherit = FixedWidthType, @@ -46,7 +46,7 @@ DictionaryType <- R6Class("DictionaryType", #' @param value_type value type, probably [utf8()] #' @param ordered Is this an ordered dictionary ? #' -#' @return An [DictionaryType][arrow__DictionaryType] +#' @return A [DictionaryType] #' @seealso [Other Arrow data types][data-type] #' @export dictionary <- function(index_type, value_type, ordered = FALSE) { diff --git a/r/R/feather.R b/r/R/feather.R index 269d16811cb..34902cf5ce4 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -67,7 +67,7 @@ FeatherTableWriter$create <- function(stream) { #' @inheritParams read_delim_arrow #' @param ... additional parameters #' -#' @return A `data.frame` if `as_tibble` is `TRUE` (the default), or a [arrow::Table][arrow__Table] otherwise +#' @return A `data.frame` if `as_tibble` is `TRUE` (the default), or a [arrow::Table][Table] otherwise #' #' @export #' @examples diff --git a/r/R/io.R b/r/R/io.R index ac89b2cbd46..bb82d17a6bd 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -38,8 +38,8 @@ Writable <- R6Class("Writable", inherit = Object, #' - Buffer `Read`(`int` nbytes): Read `nbytes` bytes #' - `void` `close`(): close the stream #' -#' @rdname arrow__io__OutputStream -#' @name arrow__io__OutputStream +#' @rdname OutputStream +#' @name OutputStream OutputStream <- R6Class("OutputStream", inherit = Writable, public = list( close = function() io___OutputStream__Close(self), @@ -57,8 +57,8 @@ OutputStream <- R6Class("OutputStream", inherit = Writable, #' #' TODO #' -#' @rdname arrow__io__FileOutputStream -#' @name arrow__io__FileOutputStream +#' @rdname FileOutputStream +#' @name FileOutputStream FileOutputStream <- R6Class("FileOutputStream", inherit = OutputStream) FileOutputStream$create <- function(path) { @@ -77,8 +77,8 @@ FileOutputStream$create <- function(path) { #' #' TODO #' -#' @rdname arrow__io__MockOutputStream -#' @name arrow__io__MockOutputStream +#' @rdname MockOutputStream +#' @name MockOutputStream MockOutputStream <- R6Class("MockOutputStream", inherit = OutputStream, public = list( GetExtentBytesWritten = function() io___MockOutputStream__GetExtentBytesWritten(self) @@ -97,8 +97,8 @@ MockOutputStream$create <- function() { #' #' TODO #' -#' @rdname arrow__io__BufferOutputStream -#' @name arrow__io__BufferOutputStream +#' @rdname BufferOutputStream +#' @name BufferOutputStream BufferOutputStream <- R6Class("BufferOutputStream", inherit = OutputStream, public = list( capacity = function() io___BufferOutputStream__capacity(self), @@ -124,8 +124,8 @@ BufferOutputStream$create <- function(initial_capacity = 0L) { #' #' TODO #' -#' @rdname arrow__io__FixedSizeBufferWriter -#' @name arrow__io__FixedSizeBufferWriter +#' @rdname FixedSizeBufferWriter +#' @name FixedSizeBufferWriter FixedSizeBufferWriter <- R6Class("FixedSizeBufferWriter", inherit = OutputStream) FixedSizeBufferWriter$create <- function(x) { @@ -147,8 +147,8 @@ FixedSizeBufferWriter$create <- function(x) { #' #' TODO #' -#' @rdname arrow__io__Readable -#' @name arrow__io__Readable +#' @rdname Readable +#' @name Readable Readable <- R6Class("Readable", inherit = Object, public = list( Read = function(nbytes) shared_ptr(Buffer, io___Readable__Read(self, nbytes)) @@ -166,8 +166,8 @@ Readable <- R6Class("Readable", inherit = Object, #' #' TODO #' -#' @rdname arrow__io__InputStream -#' @name arrow__io__InputStream +#' @rdname InputStream +#' @name InputStream InputStream <- R6Class("InputStream", inherit = Readable, public = list( close = function() io___InputStream__Close(self) @@ -185,8 +185,8 @@ InputStream <- R6Class("InputStream", inherit = Readable, #' #' TODO #' -#' @rdname arrow__io__RandomAccessFile -#' @name arrow__io__RandomAccessFile +#' @rdname RandomAccessFile +#' @name RandomAccessFile RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream, public = list( GetSize = function() io___RandomAccessFile__GetSize(self), @@ -225,8 +225,8 @@ RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream, #' @seealso [mmap_open()], [mmap_create()] #' #' -#' @rdname arrow__io__MemoryMappedFile -#' @name arrow__io__MemoryMappedFile +#' @rdname MemoryMappedFile +#' @name MemoryMappedFile MemoryMappedFile <- R6Class("MemoryMappedFile", inherit = RandomAccessFile, public = list( Resize = function(size) io___MemoryMappedFile__Resize(self, size) @@ -244,8 +244,8 @@ MemoryMappedFile <- R6Class("MemoryMappedFile", inherit = RandomAccessFile, #' #' TODO #' -#' @rdname arrow__io__ReadableFile -#' @name arrow__io__ReadableFile +#' @rdname ReadableFile +#' @name ReadableFile ReadableFile <- R6Class("ReadableFile", inherit = RandomAccessFile) ReadableFile$create <- function(path) { @@ -262,8 +262,8 @@ ReadableFile$create <- function(path) { #' #' TODO #' -#' @rdname arrow__io__BufferReader -#' @name arrow__io__BufferReader +#' @rdname BufferReader +#' @name BufferReader BufferReader <- R6Class("BufferReader", inherit = RandomAccessFile) BufferReader$create <- function(x) { @@ -276,7 +276,7 @@ BufferReader$create <- function(x) { #' @param path file path #' @param size size in bytes #' -#' @return a [arrow::io::MemoryMappedFile][arrow__io__MemoryMappedFile] +#' @return a [arrow::io::MemoryMappedFile][MemoryMappedFile] #' #' @export mmap_create <- function(path, size) { diff --git a/r/R/json.R b/r/R/json.R index 6c3ca3dd970..5bf0e124874 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -17,7 +17,7 @@ #' Read a JSON file #' -#' Using [JsonTableReader][arrow__json__TableReader] +#' Using [JsonTableReader] #' #' @inheritParams read_delim_arrow #' @param ... Additional options, passed to `json_table_reader()` @@ -56,10 +56,10 @@ read_json_arrow <- function(file, col_select = NULL, as_tibble = TRUE, ...) { #' #' @section Methods: #' -#' - `Read()` : read the JSON file as an [arrow::Table][arrow__Table] +#' - `Read()` : read the JSON file as an [arrow::Table][Table] #' -#' @rdname arrow__json__TableReader -#' @name arrow__json__TableReader +#' @rdname JsonTableReader +#' @name JsonTableReader JsonTableReader <- R6Class("JsonTableReader", inherit = Object, public = list( Read = function() shared_ptr(Table, json___TableReader__Read(self)) diff --git a/r/R/memory_pool.R b/r/R/memory_pool.R index f96ee64eb43..67b5428e089 100644 --- a/r/R/memory_pool.R +++ b/r/R/memory_pool.R @@ -27,8 +27,8 @@ #' #' TODO #' -#' @rdname arrow___MemoryPool -#' @name arrow__MemoryPool +#' @rdname MemoryPool +#' @name MemoryPool MemoryPool <- R6Class("MemoryPool", inherit = Object, public = list( @@ -40,9 +40,9 @@ MemoryPool <- R6Class("MemoryPool", ) ) -#' default [arrow::MemoryPool][arrow__MemoryPool] +#' default [arrow::MemoryPool][MemoryPool] #' -#' @return the default [arrow::MemoryPool][arrow__MemoryPool] +#' @return the default [arrow::MemoryPool][MemoryPool] #' @export default_memory_pool <- function() { shared_ptr(MemoryPool, MemoryPool__default()) diff --git a/r/R/message.R b/r/R/message.R index 60cff08da34..ff2e949a1a6 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -27,8 +27,8 @@ #' #' TODO #' -#' @rdname arrow__ipc__Message -#' @name arrow__ipc__Message +#' @rdname Message +#' @name Message Message <- R6Class("Message", inherit = Object, public = list( Equals = function(other){ @@ -58,8 +58,8 @@ Message <- R6Class("Message", inherit = Object, #' #' TODO #' -#' @rdname arrow__ipc__MessageReader -#' @name arrow__ipc__MessageReader +#' @rdname MessageReader +#' @name MessageReader #' @export MessageReader <- R6Class("MessageReader", inherit = Object, public = list( diff --git a/r/R/parquet.R b/r/R/parquet.R index d93001b4cbf..f9779b7607a 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -110,7 +110,7 @@ parquet_file_reader.raw <- function(file, props = parquet_arrow_reader_propertie #' @inheritParams read_delim_arrow #' @inheritParams parquet_file_reader #' -#' @return A [arrow::Table][arrow__Table], or a `data.frame` if `as_tibble` is +#' @return A [arrow::Table][Table], or a `data.frame` if `as_tibble` is #' `TRUE`. #' @examples #' \donttest{ @@ -134,7 +134,7 @@ read_parquet <- function(file, col_select = NULL, as_tibble = TRUE, props = parq #' [Parquet](https://parquet.apache.org/) is a columnar storage file format. #' This function enables you to write Parquet files from R. #' -#' @param table An [arrow::Table][arrow__Table], or an object convertible to it +#' @param table An [arrow::Table][Table], or an object convertible to it #' @param file a file path #' #' @examples diff --git a/r/R/read_record_batch.R b/r/R/read_record_batch.R index 11ac43b67f4..65754a0a063 100644 --- a/r/R/read_record_batch.R +++ b/r/R/read_record_batch.R @@ -15,12 +15,12 @@ # specific language governing permissions and limitations # under the License. -#' read [arrow::RecordBatch][arrow__RecordBatch] as encapsulated IPC message, given a known [arrow::Schema][arrow__Schema] +#' read [arrow::RecordBatch][RecordBatch] as encapsulated IPC message, given a known [arrow::Schema][schema] #' -#' @param obj a [arrow::Message][arrow__ipc__Message], a [arrow::io::InputStream][arrow__io__InputStream], a [Buffer][buffer], or a raw vector -#' @param schema a [arrow::Schema][arrow__Schema] +#' @param obj a [arrow::Message][Message], a [arrow::io::InputStream][InputStream], a [Buffer][buffer], or a raw vector +#' @param schema a [arrow::Schema][schema] #' -#' @return a [arrow::RecordBatch][arrow__RecordBatch] +#' @return a [arrow::RecordBatch][RecordBatch] #' #' @export read_record_batch <- function(obj, schema){ diff --git a/r/R/read_table.R b/r/R/read_table.R index 5b4bdc535d9..c58bd5b7be8 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -15,33 +15,33 @@ # specific language governing permissions and limitations # under the License. -#' Read an [arrow::Table][arrow__Table] from a stream +#' Read an [arrow::Table][Table] from a stream #' #' @param stream stream. #' -#' - a [arrow::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader]: -#' read an [arrow::Table][arrow__Table] +#' - a [arrow::RecordBatchFileReader][RecordBatchFileReader]: +#' read an [arrow::Table][Table] #' from all the record batches in the reader #' -#' - a [arrow::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader]: -#' read an [arrow::Table][arrow__Table] from the remaining record batches +#' - a [arrow::RecordBatchStreamReader][RecordBatchStreamReader]: +#' read an [arrow::Table][Table] from the remaining record batches #' in the reader #' #' - a string file path: interpret the file as an arrow -#' binary file format, and uses a [arrow::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] +#' binary file format, and uses a [arrow::RecordBatchFileReader][RecordBatchFileReader] #' to process it. #' -#' - a raw vector: read using a [arrow::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] +#' - a raw vector: read using a [arrow::RecordBatchStreamReader][RecordBatchStreamReader] #' #' @return #' -#' - `read_table` returns an [arrow::Table][arrow__Table] +#' - `read_table` returns an [arrow::Table][Table] #' - `read_arrow` returns a `data.frame` #' #' @details #' -#' The methods using [arrow::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] and -#' [arrow::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] offer the most +#' The methods using [arrow::RecordBatchFileReader][RecordBatchFileReader] and +#' [arrow::RecordBatchStreamReader][RecordBatchStreamReader] offer the most #' flexibility. The other methods are for convenience. #' #' @export diff --git a/r/R/type.R b/r/R/type.R index a379f81fcaa..987121fab81 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -32,8 +32,8 @@ #' #' TODO #' -#' @rdname arrow__DataType -#' @name arrow__DataType +#' @rdname DataType +#' @name DataType DataType <- R6Class("DataType", inherit = Object, public = list( @@ -125,8 +125,8 @@ type.Column <- function(x) x$type #' #' TODO #' -#' @rdname arrow__FixedWidthType -#' @name arrow__FixedWidthType +#' @rdname FixedWidthType +#' @name FixedWidthType FixedWidthType <- R6Class("FixedWidthType", inherit = DataType, active = list( diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index 7eca2dcd1fa..b74a8945013 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -28,26 +28,26 @@ to_arrow.Table <- function(x) x #' Write Arrow formatted data #' -#' @param x an [arrow::Table][arrow__Table], an [arrow::RecordBatch][arrow__RecordBatch] or a data.frame +#' @param x an [arrow::Table][Table], an [arrow::RecordBatch][RecordBatch] or a data.frame #' #' @param stream where to serialize to #' -#' - A [arrow::RecordBatchWriter][arrow__ipc__RecordBatchWriter]: the `$write()` +#' - A [arrow::RecordBatchWriter][RecordBatchWriter]: the `$write()` #' of `x` is used. The stream is left open. This uses the streaming format #' or the binary file format depending on the type of the writer. #' #' - A string file path: `x` is serialized with -#' a [arrow::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e. +#' a [arrow::RecordBatchFileWriter][RecordBatchFileWriter], i.e. #' using the binary file format. #' #' - A raw vector: typically of length zero (its data is ignored, and only used for #' dispatch). `x` is serialized using the streaming format, i.e. using the -#' [arrow::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] +#' [arrow::RecordBatchStreamWriter][RecordBatchStreamWriter] #' #' @param ... extra parameters, currently ignored #' -#' `write_arrow` is a convenience function, the classes [arrow::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] -#' and [arrow::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] can be used for more flexibility. +#' `write_arrow` is a convenience function, the classes [arrow::RecordBatchFileWriter][RecordBatchFileWriter] +#' and [arrow::RecordBatchStreamWriter][RecordBatchStreamWriter] can be used for more flexibility. #' #' @export write_arrow <- function(x, stream, ...) { diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index 98baa35fafc..6283c5000a9 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -60,77 +60,59 @@ reference: - json_read_options - json_table_reader - parquet_file_reader + - FeatherTableReader + - FeatherTableWriter + - JsonTableReader - title: Arrow data containers contents: - buffer - array + - ArrayData - chunked_array + - ChunkedArray - record_batch - - table + - RecordBatch + - Table - read_message - read_record_batch - read_table - title: Arrow data types and schema contents: - - schema + - Schema - type - dictionary - - field + - Field - read_schema - data-type -- title: R6 classes - contents: - - arrow__Array - - arrow__ArrayData - - arrow__Buffer - - arrow__ChunkedArray - - arrow__Column - - arrow__DataType - - arrow__DictionaryType - - arrow__Field - - arrow__FixedWidthType - - arrow__RecordBatch - - arrow__RecordBatchReader - - arrow__Schema - - arrow__Table - - arrow__MemoryPool - - arrow__io__BufferOutputStream - - arrow__io__BufferReader - - arrow__io__FileOutputStream - - arrow__io__FixedSizeBufferWriter - - arrow__io__InputStream - - arrow__io__MemoryMappedFile - - arrow__io__MockOutputStream - - arrow__io__OutputStream - - arrow__io__RandomAccessFile - - arrow__io__Readable - - arrow__io__ReadableFile - - arrow__json__TableReader - - arrow__ipc__Message - - arrow__ipc__MessageReader - - arrow__ipc__RecordBatchFileReader - - arrow__ipc__RecordBatchFileWriter - - arrow__ipc__RecordBatchStreamReader - - arrow__ipc__RecordBatchStreamWriter - - arrow__ipc__RecordBatchWriter -- title: Other functions + - DataType + - DictionaryType + - FixedWidthType + - cast_options +- title: Input/Output contents: + - Buffer + - RecordBatchReader + - MemoryPool - BufferOutputStream - BufferReader - - CompressedInputStream - - CompressedOutputStream - - FeatherTableReader - - FeatherTableWriter - FileOutputStream - FixedSizeBufferWriter - - MessageReader + - InputStream + - MemoryMappedFile - MockOutputStream + - OutputStream + - RandomAccessFile + - Readable - ReadableFile + - Message + - MessageReader - RecordBatchFileReader - RecordBatchFileWriter - RecordBatchStreamReader - RecordBatchStreamWriter - - cast_options + - RecordBatchWriter + - CompressedInputStream + - CompressedOutputStream - compression_codec - default_memory_pool - mmap_create diff --git a/r/man/arrow__io__BufferOutputStream.Rd b/r/man/BufferOutputStream.Rd similarity index 81% rename from r/man/arrow__io__BufferOutputStream.Rd rename to r/man/BufferOutputStream.Rd index 877d596601a..b9d5fed1279 100644 --- a/r/man/arrow__io__BufferOutputStream.Rd +++ b/r/man/BufferOutputStream.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__BufferOutputStream} -\alias{arrow__io__BufferOutputStream} +\name{BufferOutputStream} \alias{BufferOutputStream} \title{class arrow::io::BufferOutputStream} \format{An object of class \code{R6ClassGenerator} of length 25.} diff --git a/r/man/arrow__io__BufferReader.Rd b/r/man/BufferReader.Rd similarity index 80% rename from r/man/arrow__io__BufferReader.Rd rename to r/man/BufferReader.Rd index 47aa0b951b2..5c1ed335d35 100644 --- a/r/man/arrow__io__BufferReader.Rd +++ b/r/man/BufferReader.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__BufferReader} -\alias{arrow__io__BufferReader} +\name{BufferReader} \alias{BufferReader} \title{class arrow::io::BufferReader} \description{ diff --git a/r/man/arrow__DataType.Rd b/r/man/DataType.Rd similarity index 83% rename from r/man/arrow__DataType.Rd rename to r/man/DataType.Rd index 81c64142043..64755c7d691 100644 --- a/r/man/arrow__DataType.Rd +++ b/r/man/DataType.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/type.R \docType{class} -\name{arrow__DataType} -\alias{arrow__DataType} +\name{DataType} \alias{DataType} \title{class arrow::DataType} \description{ diff --git a/r/man/arrow__DictionaryType.Rd b/r/man/DictionaryType.Rd similarity index 80% rename from r/man/arrow__DictionaryType.Rd rename to r/man/DictionaryType.Rd index df9ac8b2f3f..4d64cb1d986 100644 --- a/r/man/arrow__DictionaryType.Rd +++ b/r/man/DictionaryType.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \docType{class} -\name{arrow__DictionaryType} -\alias{arrow__DictionaryType} +\name{DictionaryType} \alias{DictionaryType} \title{class DictionaryType} \description{ diff --git a/r/man/arrow__io__FileOutputStream.Rd b/r/man/FileOutputStream.Rd similarity index 78% rename from r/man/arrow__io__FileOutputStream.Rd rename to r/man/FileOutputStream.Rd index 582ed09a7bc..bd37eb7d6dc 100644 --- a/r/man/arrow__io__FileOutputStream.Rd +++ b/r/man/FileOutputStream.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__FileOutputStream} -\alias{arrow__io__FileOutputStream} +\name{FileOutputStream} \alias{FileOutputStream} \title{class arrow::io::FileOutputStream} \description{ diff --git a/r/man/arrow__io__FixedSizeBufferWriter.Rd b/r/man/FixedSizeBufferWriter.Rd similarity index 77% rename from r/man/arrow__io__FixedSizeBufferWriter.Rd rename to r/man/FixedSizeBufferWriter.Rd index 69f069aa62e..89f61e1397d 100644 --- a/r/man/arrow__io__FixedSizeBufferWriter.Rd +++ b/r/man/FixedSizeBufferWriter.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__FixedSizeBufferWriter} -\alias{arrow__io__FixedSizeBufferWriter} +\name{FixedSizeBufferWriter} \alias{FixedSizeBufferWriter} \title{class arrow::io::FixedSizeBufferWriter} \description{ diff --git a/r/man/arrow__FixedWidthType.Rd b/r/man/FixedWidthType.Rd similarity index 81% rename from r/man/arrow__FixedWidthType.Rd rename to r/man/FixedWidthType.Rd index 414e6a06f41..e06e8a47cc5 100644 --- a/r/man/arrow__FixedWidthType.Rd +++ b/r/man/FixedWidthType.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/type.R \docType{class} -\name{arrow__FixedWidthType} -\alias{arrow__FixedWidthType} +\name{FixedWidthType} \alias{FixedWidthType} \title{class arrow::FixedWidthType} \description{ diff --git a/r/man/arrow__io__InputStream.Rd b/r/man/InputStream.Rd similarity index 80% rename from r/man/arrow__io__InputStream.Rd rename to r/man/InputStream.Rd index bc539c4262c..d1fb12c6929 100644 --- a/r/man/arrow__io__InputStream.Rd +++ b/r/man/InputStream.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__InputStream} -\alias{arrow__io__InputStream} +\name{InputStream} \alias{InputStream} \title{class arrow::io::InputStream} \description{ diff --git a/r/man/arrow__json__TableReader.Rd b/r/man/JsonTableReader.Rd similarity index 62% rename from r/man/arrow__json__TableReader.Rd rename to r/man/JsonTableReader.Rd index 387a9db3039..80867df82c7 100644 --- a/r/man/arrow__json__TableReader.Rd +++ b/r/man/JsonTableReader.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/json.R \docType{class} -\name{arrow__json__TableReader} -\alias{arrow__json__TableReader} +\name{JsonTableReader} \alias{JsonTableReader} \title{class JsonTableReader} \description{ @@ -11,7 +10,7 @@ class JsonTableReader \section{Methods}{ \itemize{ -\item \code{Read()} : read the JSON file as an \link[=arrow__Table]{arrow::Table} +\item \code{Read()} : read the JSON file as an \link[=Table]{arrow::Table} } } diff --git a/r/man/arrow__io__MemoryMappedFile.Rd b/r/man/MemoryMappedFile.Rd similarity index 83% rename from r/man/arrow__io__MemoryMappedFile.Rd rename to r/man/MemoryMappedFile.Rd index 6b21cebd0e8..02f7120a614 100644 --- a/r/man/arrow__io__MemoryMappedFile.Rd +++ b/r/man/MemoryMappedFile.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__MemoryMappedFile} -\alias{arrow__io__MemoryMappedFile} +\name{MemoryMappedFile} \alias{MemoryMappedFile} \title{class arrow::io::MemoryMappedFile} \description{ diff --git a/r/man/arrow___MemoryPool.Rd b/r/man/MemoryPool.Rd similarity index 82% rename from r/man/arrow___MemoryPool.Rd rename to r/man/MemoryPool.Rd index 0189a98c13c..c9d49657f9a 100644 --- a/r/man/arrow___MemoryPool.Rd +++ b/r/man/MemoryPool.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/memory_pool.R \docType{class} -\name{arrow__MemoryPool} -\alias{arrow__MemoryPool} +\name{MemoryPool} \alias{MemoryPool} \title{class arrow::MemoryPool} \description{ diff --git a/r/man/arrow__ipc__Message.Rd b/r/man/Message.Rd similarity index 80% rename from r/man/arrow__ipc__Message.Rd rename to r/man/Message.Rd index 86b0539019c..f699d513b72 100644 --- a/r/man/arrow__ipc__Message.Rd +++ b/r/man/Message.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/message.R \docType{class} -\name{arrow__ipc__Message} -\alias{arrow__ipc__Message} +\name{Message} \alias{Message} \title{class arrow::Message} \description{ diff --git a/r/man/arrow__ipc__MessageReader.Rd b/r/man/MessageReader.Rd similarity index 78% rename from r/man/arrow__ipc__MessageReader.Rd rename to r/man/MessageReader.Rd index 7417033444e..cabfa66eb86 100644 --- a/r/man/arrow__ipc__MessageReader.Rd +++ b/r/man/MessageReader.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/message.R \docType{class} -\name{arrow__ipc__MessageReader} -\alias{arrow__ipc__MessageReader} +\name{MessageReader} \alias{MessageReader} \title{class arrow::MessageReader} \description{ diff --git a/r/man/arrow__io__MockOutputStream.Rd b/r/man/MockOutputStream.Rd similarity index 78% rename from r/man/arrow__io__MockOutputStream.Rd rename to r/man/MockOutputStream.Rd index 68b6b7d616f..35392155399 100644 --- a/r/man/arrow__io__MockOutputStream.Rd +++ b/r/man/MockOutputStream.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__MockOutputStream} -\alias{arrow__io__MockOutputStream} +\name{MockOutputStream} \alias{MockOutputStream} \title{class arrow::io::MockOutputStream} \description{ diff --git a/r/man/arrow__io__OutputStream.Rd b/r/man/OutputStream.Rd similarity index 84% rename from r/man/arrow__io__OutputStream.Rd rename to r/man/OutputStream.Rd index 0e84b3e9181..87a79ac578e 100644 --- a/r/man/arrow__io__OutputStream.Rd +++ b/r/man/OutputStream.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__OutputStream} -\alias{arrow__io__OutputStream} +\name{OutputStream} \alias{OutputStream} \title{OutputStream} \description{ diff --git a/r/man/arrow__io__RandomAccessFile.Rd b/r/man/RandomAccessFile.Rd similarity index 78% rename from r/man/arrow__io__RandomAccessFile.Rd rename to r/man/RandomAccessFile.Rd index 0ff85917b25..ac53ac5b98c 100644 --- a/r/man/arrow__io__RandomAccessFile.Rd +++ b/r/man/RandomAccessFile.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__RandomAccessFile} -\alias{arrow__io__RandomAccessFile} +\name{RandomAccessFile} \alias{RandomAccessFile} \title{class arrow::io::RandomAccessFile} \description{ diff --git a/r/man/arrow__io__Readable.Rd b/r/man/Readable.Rd similarity index 81% rename from r/man/arrow__io__Readable.Rd rename to r/man/Readable.Rd index 323f6fd20ea..5f46c7ec479 100644 --- a/r/man/arrow__io__Readable.Rd +++ b/r/man/Readable.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__Readable} -\alias{arrow__io__Readable} +\name{Readable} \alias{Readable} \title{class arrow::io::Readable} \description{ diff --git a/r/man/arrow__io__ReadableFile.Rd b/r/man/ReadableFile.Rd similarity index 80% rename from r/man/arrow__io__ReadableFile.Rd rename to r/man/ReadableFile.Rd index b40d9c017a2..8cd3960b918 100644 --- a/r/man/arrow__io__ReadableFile.Rd +++ b/r/man/ReadableFile.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \docType{class} -\name{arrow__io__ReadableFile} -\alias{arrow__io__ReadableFile} +\name{ReadableFile} \alias{ReadableFile} \title{class arrow::io::ReadableFile} \description{ diff --git a/r/man/arrow__RecordBatch.Rd b/r/man/RecordBatch.Rd similarity index 82% rename from r/man/arrow__RecordBatch.Rd rename to r/man/RecordBatch.Rd index 04468c185d5..a2b333c2944 100644 --- a/r/man/arrow__RecordBatch.Rd +++ b/r/man/RecordBatch.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RecordBatch.R \docType{class} -\name{arrow__RecordBatch} -\alias{arrow__RecordBatch} +\name{RecordBatch} \alias{RecordBatch} \title{class arrow::RecordBatch} \description{ diff --git a/r/man/arrow__ipc__RecordBatchFileReader.Rd b/r/man/RecordBatchFileReader.Rd similarity index 77% rename from r/man/arrow__ipc__RecordBatchFileReader.Rd rename to r/man/RecordBatchFileReader.Rd index 7dc2b48da6b..574d7fc3252 100644 --- a/r/man/arrow__ipc__RecordBatchFileReader.Rd +++ b/r/man/RecordBatchFileReader.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RecordBatchReader.R \docType{class} -\name{arrow__ipc__RecordBatchFileReader} -\alias{arrow__ipc__RecordBatchFileReader} +\name{RecordBatchFileReader} \alias{RecordBatchFileReader} \title{class arrow::RecordBatchFileReader} \description{ diff --git a/r/man/arrow__ipc__RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd similarity index 83% rename from r/man/arrow__ipc__RecordBatchFileWriter.Rd rename to r/man/RecordBatchFileWriter.Rd index 6f0f5e70f29..8a80e25d1fc 100644 --- a/r/man/arrow__ipc__RecordBatchFileWriter.Rd +++ b/r/man/RecordBatchFileWriter.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RecordBatchWriter.R \docType{class} -\name{arrow__ipc__RecordBatchFileWriter} -\alias{arrow__ipc__RecordBatchFileWriter} +\name{RecordBatchFileWriter} \alias{RecordBatchFileWriter} \title{class arrow::RecordBatchFileWriter @@ -29,7 +28,7 @@ The \code{\link[=RecordBatchFileWriter]{RecordBatchFileWriter()}} function creat \section{Methods}{ -inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::RecordBatchWriter} +inherited from \link[=RecordBatchWriter]{arrow::RecordBatchWriter} \itemize{ \item \code{$write_batch(batch)}: Write record batch to stream \item \code{$write_table(table)}: write Table to stream diff --git a/r/man/arrow__RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd similarity index 80% rename from r/man/arrow__RecordBatchReader.Rd rename to r/man/RecordBatchReader.Rd index d66e4c833e0..c2121cd7014 100644 --- a/r/man/arrow__RecordBatchReader.Rd +++ b/r/man/RecordBatchReader.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RecordBatchReader.R \docType{class} -\name{arrow__RecordBatchReader} -\alias{arrow__RecordBatchReader} +\name{RecordBatchReader} \alias{RecordBatchReader} \title{class arrow::RecordBatchReader} \description{ diff --git a/r/man/arrow__ipc__RecordBatchStreamReader.Rd b/r/man/RecordBatchStreamReader.Rd similarity index 77% rename from r/man/arrow__ipc__RecordBatchStreamReader.Rd rename to r/man/RecordBatchStreamReader.Rd index feb24ea9add..f20c78fe2d6 100644 --- a/r/man/arrow__ipc__RecordBatchStreamReader.Rd +++ b/r/man/RecordBatchStreamReader.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RecordBatchReader.R \docType{class} -\name{arrow__ipc__RecordBatchStreamReader} -\alias{arrow__ipc__RecordBatchStreamReader} +\name{RecordBatchStreamReader} \alias{RecordBatchStreamReader} \title{class arrow::RecordBatchStreamReader} \description{ diff --git a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd similarity index 83% rename from r/man/arrow__ipc__RecordBatchStreamWriter.Rd rename to r/man/RecordBatchStreamWriter.Rd index 12308e4688e..169dd988616 100644 --- a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd +++ b/r/man/RecordBatchStreamWriter.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RecordBatchWriter.R \docType{class} -\name{arrow__ipc__RecordBatchStreamWriter} -\alias{arrow__ipc__RecordBatchStreamWriter} +\name{RecordBatchStreamWriter} \alias{RecordBatchStreamWriter} \title{class arrow::RecordBatchStreamWriter @@ -29,7 +28,7 @@ The \code{\link[=RecordBatchStreamWriter]{RecordBatchStreamWriter()}} function c \section{Methods}{ -inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::RecordBatchWriter} +inherited from \link[=RecordBatchWriter]{arrow::RecordBatchWriter} \itemize{ \item \code{$write_batch(batch)}: Write record batch to stream \item \code{$write_table(table)}: write Table to stream diff --git a/r/man/arrow__ipc__RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd similarity index 60% rename from r/man/arrow__ipc__RecordBatchWriter.Rd rename to r/man/RecordBatchWriter.Rd index de83e9b8488..e48aad74b3d 100644 --- a/r/man/arrow__ipc__RecordBatchWriter.Rd +++ b/r/man/RecordBatchWriter.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RecordBatchWriter.R \docType{class} -\name{arrow__ipc__RecordBatchWriter} -\alias{arrow__ipc__RecordBatchWriter} +\name{RecordBatchWriter} \alias{RecordBatchWriter} \title{class arrow::RecordBatchWriter} \description{ @@ -20,8 +19,8 @@ class arrow::RecordBatchWriter \section{Derived classes}{ \itemize{ -\item \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} implements the streaming binary format -\item \link[=arrow__ipc__RecordBatchFileWriter]{arrow::RecordBatchFileWriter} implements the binary file format +\item \link[=RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} implements the streaming binary format +\item \link[=RecordBatchFileWriter]{arrow::RecordBatchFileWriter} implements the binary file format } } diff --git a/r/man/arrow__Table.Rd b/r/man/Table.Rd similarity index 92% rename from r/man/arrow__Table.Rd rename to r/man/Table.Rd index cb70c7c6d06..19747ea7de5 100644 --- a/r/man/arrow__Table.Rd +++ b/r/man/Table.Rd @@ -1,8 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Table.R \docType{class} -\name{arrow__Table} -\alias{arrow__Table} +\name{Table} \alias{Table} \title{class arrow::Table} \description{ diff --git a/r/man/arrow__Field.Rd b/r/man/arrow__Field.Rd deleted file mode 100644 index 9dfb114ba8f..00000000000 --- a/r/man/arrow__Field.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Field.R -\docType{class} -\name{arrow__Field} -\alias{arrow__Field} -\alias{Field} -\title{class arrow::Field} -\description{ -class arrow::Field -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__Schema.Rd b/r/man/arrow__Schema.Rd deleted file mode 100644 index 6279a768091..00000000000 --- a/r/man/arrow__Schema.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Schema.R -\docType{class} -\name{arrow__Schema} -\alias{arrow__Schema} -\alias{Schema} -\title{class arrow::Schema} -\description{ -class arrow::Schema -} -\section{Usage}{ -\preformatted{s <- schema(...) - -s$ToString() -s$num_fields() -s$field(i) -} -} - -\section{Methods}{ - -\itemize{ -\item \code{$ToString()}: convert to a string -\item \code{$num_fields()}: returns the number of fields -\item \code{$field(i)}: returns the field at index \code{i} (0-based) -} -} - -\keyword{datasets} diff --git a/r/man/default_memory_pool.Rd b/r/man/default_memory_pool.Rd index 1725ff0e10a..e33c07bb50a 100644 --- a/r/man/default_memory_pool.Rd +++ b/r/man/default_memory_pool.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/memory_pool.R \name{default_memory_pool} \alias{default_memory_pool} -\title{default \link[=arrow__MemoryPool]{arrow::MemoryPool}} +\title{default \link[=MemoryPool]{arrow::MemoryPool}} \usage{ default_memory_pool() } \value{ -the default \link[=arrow__MemoryPool]{arrow::MemoryPool} +the default \link[=MemoryPool]{arrow::MemoryPool} } \description{ -default \link[=arrow__MemoryPool]{arrow::MemoryPool} +default \link[=MemoryPool]{arrow::MemoryPool} } diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd index 5eb4c802159..183513e8c47 100644 --- a/r/man/dictionary.Rd +++ b/r/man/dictionary.Rd @@ -14,7 +14,7 @@ dictionary(index_type, value_type, ordered = FALSE) \item{ordered}{Is this an ordered dictionary ?} } \value{ -An \link[=arrow__DictionaryType]{DictionaryType} +A \link{DictionaryType} } \description{ Create a dictionary type diff --git a/r/man/field.Rd b/r/man/field.Rd index 8fa371736eb..846a0b0b1b8 100644 --- a/r/man/field.Rd +++ b/r/man/field.Rd @@ -1,25 +1,37 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Field.R -\name{field} +\docType{class} +\name{Field} +\alias{Field} \alias{field} -\title{Factory for a Field} +\title{class arrow::Field} +\format{An object of class \code{R6ClassGenerator} of length 24.} \usage{ +Field + field(name, type, metadata) } \arguments{ \item{name}{field name} -\item{type}{logical type, instance of DataType} +\item{type}{logical type, instance of \link{DataType}} \item{metadata}{currently ignored} } \description{ -Factory for a Field +class arrow::Field +} +\section{Methods}{ + +\itemize{ +\item \code{f$ToString()}: convert to a string +\item \code{f$Equals(other)}: test for equality. More naturally called as \code{f == other} } +} + \examples{ \donttest{ -try({ - field("x", int32()) -}) +field("x", int32()) } } +\keyword{datasets} diff --git a/r/man/mmap_create.Rd b/r/man/mmap_create.Rd index 050ae18c76f..b8551934808 100644 --- a/r/man/mmap_create.Rd +++ b/r/man/mmap_create.Rd @@ -12,7 +12,7 @@ mmap_create(path, size) \item{size}{size in bytes} } \value{ -a \link[=arrow__io__MemoryMappedFile]{arrow::io::MemoryMappedFile} +a \link[=MemoryMappedFile]{arrow::io::MemoryMappedFile} } \description{ Create a new read/write memory mapped file of a given size diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd index 7e4ef997d93..4e3522cbbf1 100644 --- a/r/man/read_delim_arrow.Rd +++ b/r/man/read_delim_arrow.Rd @@ -71,7 +71,7 @@ parsing options provided in other arguments (e.g. \code{delim}, \code{quote}, et \item{read_options}{see \code{\link[=csv_read_options]{csv_read_options()}}} \item{as_tibble}{Should the function return a \code{data.frame} or an -\link[=arrow__Table]{arrow::Table}?} +\link[=Table]{arrow::Table}?} } \value{ A \code{data.frame}, or an Table if \code{as_tibble = FALSE}. diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index 48ff908721e..ead6e32e22e 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -16,12 +16,12 @@ read_feather(file, col_select = NULL, as_tibble = TRUE, ...) of columns, as used in \code{dplyr::select()}.} \item{as_tibble}{Should the function return a \code{data.frame} or an -\link[=arrow__Table]{arrow::Table}?} +\link[=Table]{arrow::Table}?} \item{...}{additional parameters} } \value{ -A \code{data.frame} if \code{as_tibble} is \code{TRUE} (the default), or a \link[=arrow__Table]{arrow::Table} otherwise +A \code{data.frame} if \code{as_tibble} is \code{TRUE} (the default), or a \link[=Table]{arrow::Table} otherwise } \description{ Read a Feather file diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd index 06bc09cf431..e870b4174c9 100644 --- a/r/man/read_json_arrow.Rd +++ b/r/man/read_json_arrow.Rd @@ -15,7 +15,7 @@ read_json_arrow(file, col_select = NULL, as_tibble = TRUE, ...) of columns, as used in \code{dplyr::select()}.} \item{as_tibble}{Should the function return a \code{data.frame} or an -\link[=arrow__Table]{arrow::Table}?} +\link[=Table]{arrow::Table}?} \item{...}{Additional options, passed to \code{json_table_reader()}} } @@ -23,7 +23,7 @@ of columns, as used in \code{dplyr::select()}.} A \code{data.frame}, or an Table if \code{as_tibble = FALSE}. } \description{ -Using \link[=arrow__json__TableReader]{JsonTableReader} +Using \link{JsonTableReader} } \examples{ \donttest{ diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index e63e30cc297..8bf07fb0f4c 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -16,14 +16,14 @@ read_parquet(file, col_select = NULL, as_tibble = TRUE, of columns, as used in \code{dplyr::select()}.} \item{as_tibble}{Should the function return a \code{data.frame} or an -\link[=arrow__Table]{arrow::Table}?} +\link[=Table]{arrow::Table}?} \item{props}{reader file properties, as created by \code{\link[=parquet_arrow_reader_properties]{parquet_arrow_reader_properties()}}} \item{...}{additional parameters} } \value{ -A \link[=arrow__Table]{arrow::Table}, or a \code{data.frame} if \code{as_tibble} is +A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_tibble} is \code{TRUE}. } \description{ diff --git a/r/man/read_record_batch.Rd b/r/man/read_record_batch.Rd index 1514bbdf077..ba91ce0d66f 100644 --- a/r/man/read_record_batch.Rd +++ b/r/man/read_record_batch.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/read_record_batch.R \name{read_record_batch} \alias{read_record_batch} -\title{read \link[=arrow__RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=arrow__Schema]{arrow::Schema}} +\title{read \link[=RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=schema]{arrow::Schema}} \usage{ read_record_batch(obj, schema) } \arguments{ -\item{obj}{a \link[=arrow__ipc__Message]{arrow::Message}, a \link[=arrow__io__InputStream]{arrow::io::InputStream}, a \link[=buffer]{Buffer}, or a raw vector} +\item{obj}{a \link[=Message]{arrow::Message}, a \link[=InputStream]{arrow::io::InputStream}, a \link[=buffer]{Buffer}, or a raw vector} -\item{schema}{a \link[=arrow__Schema]{arrow::Schema}} +\item{schema}{a \link[=schema]{arrow::Schema}} } \value{ -a \link[=arrow__RecordBatch]{arrow::RecordBatch} +a \link[=RecordBatch]{arrow::RecordBatch} } \description{ -read \link[=arrow__RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=arrow__Schema]{arrow::Schema} +read \link[=RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=schema]{arrow::Schema} } diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index 421139df1b9..9f76fec5421 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -3,7 +3,7 @@ \name{read_table} \alias{read_table} \alias{read_arrow} -\title{Read an \link[=arrow__Table]{arrow::Table} from a stream} +\title{Read an \link[=Table]{arrow::Table} from a stream} \usage{ read_table(stream) @@ -12,29 +12,29 @@ read_arrow(stream) \arguments{ \item{stream}{stream. \itemize{ -\item a \link[=arrow__ipc__RecordBatchFileReader]{arrow::RecordBatchFileReader}: -read an \link[=arrow__Table]{arrow::Table} +\item a \link[=RecordBatchFileReader]{arrow::RecordBatchFileReader}: +read an \link[=Table]{arrow::Table} from all the record batches in the reader -\item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::RecordBatchStreamReader}: -read an \link[=arrow__Table]{arrow::Table} from the remaining record batches +\item a \link[=RecordBatchStreamReader]{arrow::RecordBatchStreamReader}: +read an \link[=Table]{arrow::Table} from the remaining record batches in the reader \item a string file path: interpret the file as an arrow -binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::RecordBatchFileReader} +binary file format, and uses a \link[=RecordBatchFileReader]{arrow::RecordBatchFileReader} to process it. -\item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::RecordBatchStreamReader} +\item a raw vector: read using a \link[=RecordBatchStreamReader]{arrow::RecordBatchStreamReader} }} } \value{ \itemize{ -\item \code{read_table} returns an \link[=arrow__Table]{arrow::Table} +\item \code{read_table} returns an \link[=Table]{arrow::Table} \item \code{read_arrow} returns a \code{data.frame} } } \description{ -Read an \link[=arrow__Table]{arrow::Table} from a stream +Read an \link[=Table]{arrow::Table} from a stream } \details{ -The methods using \link[=arrow__ipc__RecordBatchFileReader]{arrow::RecordBatchFileReader} and -\link[=arrow__ipc__RecordBatchStreamReader]{arrow::RecordBatchStreamReader} offer the most +The methods using \link[=RecordBatchFileReader]{arrow::RecordBatchFileReader} and +\link[=RecordBatchStreamReader]{arrow::RecordBatchStreamReader} offer the most flexibility. The other methods are for convenience. } diff --git a/r/man/record_batch.Rd b/r/man/record_batch.Rd index 44efe2e2217..ab91e543725 100644 --- a/r/man/record_batch.Rd +++ b/r/man/record_batch.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/RecordBatch.R \name{record_batch} \alias{record_batch} -\title{Create an \link[=arrow__RecordBatch]{arrow::RecordBatch} from a data frame} +\title{Create an \link[=RecordBatch]{arrow::RecordBatch} from a data frame} \usage{ record_batch(..., schema = NULL) } @@ -12,8 +12,8 @@ record_batch(..., schema = NULL) \item{schema}{a arrow::Schema} } \value{ -a \link[=arrow__RecordBatch]{arrow::RecordBatch} +a \link[=RecordBatch]{arrow::RecordBatch} } \description{ -Create an \link[=arrow__RecordBatch]{arrow::RecordBatch} from a data frame +Create an \link[=RecordBatch]{arrow::RecordBatch} from a data frame } diff --git a/r/man/schema.Rd b/r/man/schema.Rd index 622e5a7e94c..7af930db132 100644 --- a/r/man/schema.Rd +++ b/r/man/schema.Rd @@ -1,20 +1,38 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Schema.R -\name{schema} +\docType{class} +\name{Schema} +\alias{Schema} \alias{schema} -\title{Create a schema} +\title{class arrow::Schema} \usage{ schema(...) } \arguments{ \item{...}{named list of \link[=data-type]{data types}} } -\value{ -A \link[=arrow__Schema]{schema} object. -} \description{ -This function lets you define a schema for a table. This is useful when you +Create a \code{Schema} when you want to convert an R \code{data.frame} to Arrow but don't want to rely on the default mapping of R types to Arrow types, such as when you want to choose a specific numeric precision. } +\section{Usage}{ +\preformatted{s <- schema(...) + +s$ToString() +s$num_fields() +s$field(i) +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$ToString()}: convert to a string +\item \code{$num_fields()}: returns the number of fields +\item \code{$field(i)}: returns the field at index \code{i} (0-based) +} +} + +\keyword{datasets} diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index 815111e763b..a1091a34e61 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -7,25 +7,25 @@ write_arrow(x, stream, ...) } \arguments{ -\item{x}{an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch} or a data.frame} +\item{x}{an \link[=Table]{arrow::Table}, an \link[=RecordBatch]{arrow::RecordBatch} or a data.frame} \item{stream}{where to serialize to \itemize{ -\item A \link[=arrow__ipc__RecordBatchWriter]{arrow::RecordBatchWriter}: the \code{$write()} +\item A \link[=RecordBatchWriter]{arrow::RecordBatchWriter}: the \code{$write()} of \code{x} is used. The stream is left open. This uses the streaming format or the binary file format depending on the type of the writer. \item A string file path: \code{x} is serialized with -a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::RecordBatchFileWriter}, i.e. +a \link[=RecordBatchFileWriter]{arrow::RecordBatchFileWriter}, i.e. using the binary file format. \item A raw vector: typically of length zero (its data is ignored, and only used for dispatch). \code{x} is serialized using the streaming format, i.e. using the -\link[=arrow__ipc__RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} +\link[=RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} }} \item{...}{extra parameters, currently ignored -\code{write_arrow} is a convenience function, the classes \link[=arrow__ipc__RecordBatchFileWriter]{arrow::RecordBatchFileWriter} -and \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} can be used for more flexibility.} +\code{write_arrow} is a convenience function, the classes \link[=RecordBatchFileWriter]{arrow::RecordBatchFileWriter} +and \link[=RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} can be used for more flexibility.} } \description{ Write Arrow formatted data diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index 192d950d82a..d24901a292a 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -7,7 +7,7 @@ write_parquet(table, file) } \arguments{ -\item{table}{An \link[=arrow__Table]{arrow::Table}, or an object convertible to it} +\item{table}{An \link[=Table]{arrow::Table}, or an object convertible to it} \item{file}{a file path} } From 85a8d3631127f7fa681b3c34dc8f5e1299c39351 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 5 Sep 2019 20:06:56 -0700 Subject: [PATCH 21/37] Start vignette draft explaining the class and naming conventions --- r/vignettes/arrow.Rmd | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 r/vignettes/arrow.Rmd diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd new file mode 100644 index 00000000000..9ce29c59b69 --- /dev/null +++ b/r/vignettes/arrow.Rmd @@ -0,0 +1,21 @@ +--- +title: "Using the Arrow C++ Library in R" +description: "This document describes the low-level interface to the Apache Arrow C++ library in R and reviews the patterns and conventions of the R package." +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using the Arrow C++ Library in R} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +The Apache Arrow C++ library provides rich, powerful features for working with columnar data. The `arrow` R package provides both a low-level interface to the C++ library and some higher-level, R-flavored tools for working with it. This vignette provides an overview of how the pieces fit together, and it describes the conventions that the classes and methods follow in R. + +# Class structure + +C++ is an object-oriented language, so the core logic of the Arrow library is encapsulated in classes and methods. In the R package, these classes are implemented as `R6` reference classes, most of which are exported from the namespace. + +In order to match the C++ naming conventions, the `R6` classes are in TitleCase, e.g. `RecordBatch`. This makes it easy to look up the relevant C++ implementations in the [code](https://github.com/apache/arrow/tree/master/cpp) or [documentation](https://arrow.apache.org/docs/cpp/). To simplify things in R, the C++ library namespaces are generally dropped or flattened; that is, where the C++ library has `arrow::io::FileOutputStream`, it is just `FileOutputStream` in the R package. One exception is for the file readers, where the namespace is necessary to disambiguate. So `arrow::csv::TableReader` becomes `CsvTableReader`, and `arrow::json::TableReader` becomes `JsonTableReader`. + +Some of these classes are not meant to be instantiated directly; they may be base classes or other kinds of helpers. For those that you should be able to create, use the `$create()` method to instantiate an object. For example, `rb <- RecordBatch$create(int = 1:10, dbl = as.numeric(1:10))` will create a `RecordBatch`. Many of these factory methods that an R user might most often encounter also have a `snake_case` alias, in order to be more familiar for contemporary R users. So `record_batch(int = 1:10, dbl = as.numeric(1:10))` would do the same as `RecordBatch$create()` above. + +The typical user of the `arrow` R package may never deal directly with the `R6` objects. We provide more R-friendly wrapper functions as a higher-level interface to the C++ library. An R user can call `read_parquet()` without knowing or caring that they're instantiating a `ParquetFileReader` object and calling the `$ReadFile()` method on it. The classes are there and available to the advanced programmer who wants fine-grained control over how the C++ library is used. From 3e4cfe71cdb1c0cfe5166b38878f2ea0adefad71 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 09:25:32 -0700 Subject: [PATCH 22/37] Clean up parquet classes and document the R6 --- dev/release/rat_exclude_files.txt | 1 + r/NAMESPACE | 7 +- r/R/Table.R | 2 +- r/R/parquet.R | 187 ++++++++++++++--------- r/_pkgdown.yml | 5 +- r/man/ParquetFileReader.Rd | 43 ++++++ r/man/ParquetReaderProperties.Rd | 27 ++++ r/man/Table.Rd | 2 +- r/man/parquet_arrow_reader_properties.Rd | 15 -- r/man/parquet_file_reader.Rd | 18 --- r/man/read_parquet.Rd | 11 +- 11 files changed, 197 insertions(+), 121 deletions(-) create mode 100644 r/man/ParquetFileReader.Rd create mode 100644 r/man/ParquetReaderProperties.Rd delete mode 100644 r/man/parquet_arrow_reader_properties.Rd delete mode 100644 r/man/parquet_file_reader.Rd diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 32e0b72af60..8ba64d19217 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -213,6 +213,7 @@ r/README.md r/README.Rmd r/man/*.Rd r/cran-comments.md +r/vignettes/*.Rmd .gitattributes ruby/red-arrow/.yardopts rust/arrow/test/data/*.csv diff --git a/r/NAMESPACE b/r/NAMESPACE index de29b9c34b9..d5904e0022c 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -14,9 +14,6 @@ S3method(dim,RecordBatch) S3method(dim,Table) S3method(length,Array) S3method(names,RecordBatch) -S3method(parquet_file_reader,RandomAccessFile) -S3method(parquet_file_reader,character) -S3method(parquet_file_reader,raw) S3method(print,"arrow-enum") S3method(read_message,InputStream) S3method(read_message,MessageReader) @@ -48,6 +45,8 @@ export(DateUnit) export(FileMode) export(MessageReader) export(MessageType) +export(ParquetFileReader) +export(ParquetReaderProperties) export(StatusCode) export(Table) export(TimeUnit) @@ -95,8 +94,6 @@ export(mmap_open) export(null) export(num_range) export(one_of) -export(parquet_arrow_reader_properties) -export(parquet_file_reader) export(read_arrow) export(read_csv_arrow) export(read_delim_arrow) diff --git a/r/R/Table.R b/r/R/Table.R index 91764994d4a..8e0e2320db7 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -27,7 +27,7 @@ #' #' The `Table$create()` function takes the following arguments: #' -#' * `...`` arrays, chunked arrays, or R vectors +#' * `...` arrays, chunked arrays, or R vectors #' * `schema` a schema. The default (`NULL`) infers the schema from the `...` #' #' @section Methods: diff --git a/r/R/parquet.R b/r/R/parquet.R index f9779b7607a..a77c5facdcd 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -15,14 +15,79 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R -`parquet::arrow::FileReader` <- R6Class("parquet::arrow::FileReader", +#' Read a Parquet file +#' +#' '[Parquet](https://parquet.apache.org/)' is a columnar storage file format. +#' This function enables you to read Parquet files into R. +#' +#' @inheritParams read_delim_arrow +#' @param props [ParquetReaderProperties] +#' +#' @return A [arrow::Table][Table], or a `data.frame` if `as_tibble` is +#' `TRUE`. +#' @examples +#' \donttest{ +#' df <- read_parquet(system.file("v0.7.1.parquet", package="arrow")) +#' head(df) +#' } +#' @export +read_parquet <- function(file, + col_select = NULL, + as_tibble = TRUE, + props = ParquetReaderProperties$create(), + ...) { + reader <- ParquetFileReader$create(file, props = props, ...) + tab <- reader$ReadTable(!!enquo(col_select)) + + if (as_tibble) { + tab <- as.data.frame(tab) + } + tab +} + +#' @title ParquetFileReader class +#' @rdname ParquetFileReader +#' @name ParquetFileReader +#' @docType class +#' @usage NULL +#' @format NULL +#' @description This class enables you to interact with Parquet files. +#' +#' @section Factory: +#' +#' The `ParquetFileReader$create()` factor method instantiates the object and +#' takes the following arguments: +#' +#' - `file` A character file name, raw vector, or Arrow file connection object +#' (e.g. `RandomAccessFile`). +#' - `props` Optional [ParquetReaderProperties] +#' - `mmap` Logical: whether to memory-map the file (default `TRUE`) +#' - `...` Additional arguments, currently ignored +#' +#' @section Methods: +#' +#' - `$ReadTable(col_select)`: get an `arrow::Table` from the file, possibly +#' with columns filtered by a character vector of column names or a +#' `tidyselect` specification. +#' - `$GetSchema()`: get the `arrow::Schema` of the data in the file +#' +#' @export +#' @examples +#' \donttest{ +#' f <- system.file("v0.7.1.parquet", package="arrow") +#' pq <- ParquetFileReader$create(f) +#' pq$GetSchema() +#' tab <- pq$ReadTable() +#' tab$schema +#' } +#' @include arrow-package.R +ParquetFileReader <- R6Class("ParquetFileReader", inherit = Object, public = list( ReadTable = function(col_select = NULL) { col_select <- enquo(col_select) - if(quo_is_null(col_select)) { + if (quo_is_null(col_select)) { shared_ptr(Table, parquet___arrow___FileReader__ReadTable1(self)) } else { all_vars <- shared_ptr(Schema, parquet___arrow___FileReader__GetSchema(self))$names @@ -36,7 +101,47 @@ ) ) -`parquet::arrow::ArrowReaderProperties` <- R6Class("parquet::arrow::ArrowReaderProperties", +ParquetFileReader$create <- function(file, + props = ParquetReaderProperties$create(), + mmap = TRUE, + ...) { + if (is.character(file)) { + if (isTRUE(mmap)) { + file <- mmap_open(file) + } else { + file <- ReadableFile$create(file) + } + } else if (is.raw(file)) { + file <- BufferReader$create(file) + } + assert_that(inherits(file, "RandomAccessFile")) + assert_that(inherits(props, "ParquetReaderProperties")) + + unique_ptr(ParquetFileReader, parquet___arrow___FileReader__OpenFile(file, props)) +} + +#' @title ParquetReaderProperties class +#' @rdname ParquetReaderProperties +#' @name ParquetReaderProperties +#' @docType class +#' @usage NULL +#' @format NULL +#' @description This class holds settings to control how a Parquet file is read +#' by [ParquetFileReader]. +#' +#' @section Factory: +#' +#' The `ParquetReaderProperties$create()` factor method instantiates the object +#' and takes the following arguments: +#' +#' - `use_threads` Logical: whether to use multithreading (default `TRUE`) +#' +#' @section Methods: +#' +#' TODO +#' +#' @export +ParquetReaderProperties <- R6Class("ParquetReaderProperties", inherit = Object, public = list( read_dictionary = function(column_index) { @@ -57,77 +162,13 @@ ) ) -#' Create a new ArrowReaderProperties instance -#' -#' @param use_threads use threads? -#' -#' @export -#' @keywords internal -parquet_arrow_reader_properties <- function(use_threads = option_use_threads()) { - shared_ptr(`parquet::arrow::ArrowReaderProperties`, parquet___arrow___ArrowReaderProperties__Make(isTRUE(use_threads))) -} - -#' Parquet file reader -#' -#' @inheritParams read_delim_arrow -#' @param props reader file properties, as created by [parquet_arrow_reader_properties()] -#' -#' @param ... additional parameters -#' -#' @export -parquet_file_reader <- function(file, props = parquet_arrow_reader_properties(), ...) { - UseMethod("parquet_file_reader") -} - -#' @export -parquet_file_reader.RandomAccessFile <- function(file, props = parquet_arrow_reader_properties(), ...) { - unique_ptr(`parquet::arrow::FileReader`, parquet___arrow___FileReader__OpenFile(file, props)) -} - -#' @export -parquet_file_reader.character <- function(file, - props = parquet_arrow_reader_properties(), - memory_map = TRUE, - ...) { - file <- normalizePath(file) - if (isTRUE(memory_map)) { - parquet_file_reader(mmap_open(file), props = props, ...) - } else { - parquet_file_reader(ReadableFile$create(file), props = props, ...) - } -} - -#' @export -parquet_file_reader.raw <- function(file, props = parquet_arrow_reader_properties(), ...) { - parquet_file_reader(BufferReader$create(file), props = props, ...) +ParquetReaderProperties$create <- function(use_threads = option_use_threads()) { + shared_ptr( + ParquetReaderProperties, + parquet___arrow___ArrowReaderProperties__Make(isTRUE(use_threads)) + ) } -#' Read a Parquet file -#' -#' '[Parquet](https://parquet.apache.org/)' is a columnar storage file format. -#' This function enables you to read Parquet files into R. -#' -#' @inheritParams read_delim_arrow -#' @inheritParams parquet_file_reader -#' -#' @return A [arrow::Table][Table], or a `data.frame` if `as_tibble` is -#' `TRUE`. -#' @examples -#' \donttest{ -#' try({ -#' df <- read_parquet(system.file("v0.7.1.parquet", package="arrow")) -#' }) -#' } -#' @export -read_parquet <- function(file, col_select = NULL, as_tibble = TRUE, props = parquet_arrow_reader_properties(), ...) { - reader <- parquet_file_reader(file, props = props, ...) - tab <- reader$ReadTable(!!enquo(col_select)) - - if (as_tibble) { - tab <- as.data.frame(tab) - } - tab -} #' Write Parquet file to disk #' diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index 6283c5000a9..9f64189f70b 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -52,6 +52,8 @@ reference: - write_arrow - write_feather - write_parquet +- title: C++ reader/writer interface + contents: - csv_convert_options - csv_parse_options - csv_read_options @@ -59,7 +61,8 @@ reference: - json_parse_options - json_read_options - json_table_reader - - parquet_file_reader + - ParquetFileReader + - ParquetReaderProperties - FeatherTableReader - FeatherTableWriter - JsonTableReader diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd new file mode 100644 index 00000000000..27d7a1554e1 --- /dev/null +++ b/r/man/ParquetFileReader.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parquet.R +\docType{class} +\name{ParquetFileReader} +\alias{ParquetFileReader} +\title{ParquetFileReader class} +\description{ +This class enables you to interact with Parquet files. +} +\section{Factory}{ + + +The \code{ParquetFileReader$create()} factor method instantiates the object and +takes the following arguments: +\itemize{ +\item \code{file} A character file name, raw vector, or Arrow file connection object +(e.g. \code{RandomAccessFile}). +\item \code{props} Optional \link{ParquetReaderProperties} +\item \code{mmap} Logical: whether to memory-map the file (default \code{TRUE}) +\item \code{...} Additional arguments, currently ignored +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$ReadTable(col_select)}: get an \code{arrow::Table} from the file, possibly +with columns filtered by a character vector of column names or a +\code{tidyselect} specification. +\item \code{$GetSchema()}: get the \code{arrow::Schema} of the data in the file +} +} + +\examples{ +\donttest{ +f <- system.file("v0.7.1.parquet", package="arrow") +pq <- ParquetFileReader$create(f) +pq$GetSchema() +tab <- pq$ReadTable() +tab$schema +} +} +\keyword{datasets} diff --git a/r/man/ParquetReaderProperties.Rd b/r/man/ParquetReaderProperties.Rd new file mode 100644 index 00000000000..4929b4a0160 --- /dev/null +++ b/r/man/ParquetReaderProperties.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parquet.R +\docType{class} +\name{ParquetReaderProperties} +\alias{ParquetReaderProperties} +\title{ParquetReaderProperties class} +\description{ +This class holds settings to control how a Parquet file is read +by \link{ParquetFileReader}. +} +\section{Factory}{ + + +The \code{ParquetReaderProperties$create()} factor method instantiates the object +and takes the following arguments: +\itemize{ +\item \code{use_threads} Logical: whether to use multithreading (default \code{TRUE}) +} +} + +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/Table.Rd b/r/man/Table.Rd index 19747ea7de5..38f86916fcc 100644 --- a/r/man/Table.Rd +++ b/r/man/Table.Rd @@ -12,7 +12,7 @@ class arrow::Table The \code{Table$create()} function takes the following arguments: \itemize{ -\item `...`` arrays, chunked arrays, or R vectors +\item \code{...} arrays, chunked arrays, or R vectors \item \code{schema} a schema. The default (\code{NULL}) infers the schema from the \code{...} } } diff --git a/r/man/parquet_arrow_reader_properties.Rd b/r/man/parquet_arrow_reader_properties.Rd deleted file mode 100644 index eed75669b1c..00000000000 --- a/r/man/parquet_arrow_reader_properties.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parquet.R -\name{parquet_arrow_reader_properties} -\alias{parquet_arrow_reader_properties} -\title{Create a new ArrowReaderProperties instance} -\usage{ -parquet_arrow_reader_properties(use_threads = option_use_threads()) -} -\arguments{ -\item{use_threads}{use threads?} -} -\description{ -Create a new ArrowReaderProperties instance -} -\keyword{internal} diff --git a/r/man/parquet_file_reader.Rd b/r/man/parquet_file_reader.Rd deleted file mode 100644 index 6c42855d1d4..00000000000 --- a/r/man/parquet_file_reader.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parquet.R -\name{parquet_file_reader} -\alias{parquet_file_reader} -\title{Parquet file reader} -\usage{ -parquet_file_reader(file, props = parquet_arrow_reader_properties(), ...) -} -\arguments{ -\item{file}{A character path to a local file, or an Arrow input stream} - -\item{props}{reader file properties, as created by \code{\link[=parquet_arrow_reader_properties]{parquet_arrow_reader_properties()}}} - -\item{...}{additional parameters} -} -\description{ -Parquet file reader -} diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index 8bf07fb0f4c..42ffed44ae4 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -5,7 +5,7 @@ \title{Read a Parquet file} \usage{ read_parquet(file, col_select = NULL, as_tibble = TRUE, - props = parquet_arrow_reader_properties(), ...) + props = ParquetReaderProperties$create(), ...) } \arguments{ \item{file}{A character path to a local file, or an Arrow input stream} @@ -18,9 +18,7 @@ of columns, as used in \code{dplyr::select()}.} \item{as_tibble}{Should the function return a \code{data.frame} or an \link[=Table]{arrow::Table}?} -\item{props}{reader file properties, as created by \code{\link[=parquet_arrow_reader_properties]{parquet_arrow_reader_properties()}}} - -\item{...}{additional parameters} +\item{props}{\link{ParquetReaderProperties}} } \value{ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_tibble} is @@ -32,8 +30,7 @@ This function enables you to read Parquet files into R. } \examples{ \donttest{ -try({ - df <- read_parquet(system.file("v0.7.1.parquet", package="arrow")) -}) +df <- read_parquet(system.file("v0.7.1.parquet", package="arrow")) +head(df) } } From 96873e1cdbc4a180f9f8f3d3fadc6f96e79d73d5 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 09:44:43 -0700 Subject: [PATCH 23/37] Factor out make_readable_file --- r/R/RecordBatchReader.R | 9 +-------- r/R/csv.R | 17 ++++++----------- r/R/feather.R | 16 +++------------- r/R/io.R | 20 ++++++++++++++++++++ r/R/json.R | 15 +++++---------- r/R/parquet.R | 11 +---------- r/R/read_table.R | 4 ++-- r/man/make_readable_file.Rd | 20 ++++++++++++++++++++ r/man/read_delim_arrow.Rd | 2 +- r/man/read_json_arrow.Rd | 2 +- r/man/read_parquet.Rd | 2 +- 11 files changed, 61 insertions(+), 57 deletions(-) create mode 100644 r/man/make_readable_file.Rd diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 1527a49e1aa..056de939342 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -92,13 +92,6 @@ RecordBatchFileReader <- R6Class("RecordBatchFileReader", inherit = Object, ) RecordBatchFileReader$create <- function(file) { - if (inherits(file, c("raw", "Buffer"))) { - file <- BufferReader$create(file) - } else if (is.character(file)) { - assert_that(length(file) == 1L) - file <- ReadableFile$create(file) - } - assert_that(inherits(file, "RandomAccessFile")) - + file <- make_readable_file(file) shared_ptr(RecordBatchFileReader, ipc___RecordBatchFileReader__Open(file)) } diff --git a/r/R/csv.R b/r/R/csv.R index 7c249ae0dd6..7acb667c9e0 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -32,7 +32,7 @@ #' `parse_options`, `convert_options`, or `read_options` arguments, or you can #' call [csv_table_reader()] directly for lower-level access. #' -#' @param file A character path to a local file, or an Arrow input stream +#' @inheritParams make_readable_file #' @param delim Single character used to separate fields within a record. #' @param quote Single character used to quote strings. #' @param escape_double Does the file escape quotes by doubling them? @@ -192,16 +192,11 @@ CsvTableReader$create <- function(file, parse_options = csv_parse_options(), convert_options = csv_convert_options(), ...) { - if (is.character(file)) { - file <- mmap_open(file) - } - if (inherits(file, "InputStream")) { - file <- shared_ptr(CsvTableReader, - csv___TableReader__Make(file, read_options, parse_options, convert_options) - ) - } - assert_that(inherits(file, c("CsvTableReader", "TableReader"))) - file + file <- make_readable_file(file) + shared_ptr( + CsvTableReader, + csv___TableReader__Make(file, read_options, parse_options, convert_options) + ) } #' Arrow CSV and JSON table readers diff --git a/r/R/feather.R b/r/R/feather.R index 34902cf5ce4..add70bdc606 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -113,17 +113,7 @@ FeatherTableReader <- R6Class("FeatherTableReader", inherit = Object, ) ) -FeatherTableReader$create <- function(stream, mmap = TRUE, ...) { - if (is.character(stream)) { - if (isTRUE(mmap)) { - stream <- mmap_open(stream, ...) - } else { - stream <- ReadableFile$create(stream, ...) - } - } else if (is.raw(stream)) { - stream <- BufferReader$create(stream) - } - - assert_that(inherits(stream, "InputStream")) - unique_ptr(FeatherTableReader, ipc___feather___TableReader__Open(stream)) +FeatherTableReader$create <- function(file, mmap = TRUE, ...) { + file <- make_readable_file(file, mmap) + unique_ptr(FeatherTableReader, ipc___feather___TableReader__Open(file)) } diff --git a/r/R/io.R b/r/R/io.R index bb82d17a6bd..e13899a693c 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -295,3 +295,23 @@ mmap_open <- function(path, mode = c("read", "write", "readwrite")) { path <- normalizePath(path) shared_ptr(MemoryMappedFile, io___MemoryMappedFile__Open(path, mode)) } + +#' Handle a range of possible input sources +#' @param file A character file name, raw vector, or an Arrow input stream +#' @param mmap Logical: whether to memory-map the file (default `TRUE`) +#' @return An `InputStream` or a subclass of one. +#' @keywords internal +make_readable_file <- function(file, mmap = TRUE) { + if (is.character(file)) { + assert_that(length(file) == 1L) + if (isTRUE(mmap)) { + file <- mmap_open(file) + } else { + file <- ReadableFile$create(file) + } + } else if (inherits(file, c("raw", "Buffer"))) { + file <- BufferReader$create(file) + } + assert_that(inherits(file, "InputStream")) + file +} diff --git a/r/R/json.R b/r/R/json.R index 5bf0e124874..bf038a18fa9 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -70,16 +70,11 @@ JsonTableReader$create <- function(file, parse_options = json_parse_options(), ...) { - if (is.character(file)) { - file <- mmap_open(file) - } - if (inherits(file, "InputStream")) { - file <- shared_ptr(JsonTableReader, - json___TableReader__Make(file, read_options, parse_options) - ) - } - assert_that(inherits(file, c("JsonTableReader", "TableReader"))) - file + file <- make_readable_file(file) + shared_ptr( + JsonTableReader, + json___TableReader__Make(file, read_options, parse_options) + ) } #' @rdname csv_table_reader diff --git a/r/R/parquet.R b/r/R/parquet.R index a77c5facdcd..f9ff9939b47 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -105,16 +105,7 @@ ParquetFileReader$create <- function(file, props = ParquetReaderProperties$create(), mmap = TRUE, ...) { - if (is.character(file)) { - if (isTRUE(mmap)) { - file <- mmap_open(file) - } else { - file <- ReadableFile$create(file) - } - } else if (is.raw(file)) { - file <- BufferReader$create(file) - } - assert_that(inherits(file, "RandomAccessFile")) + file <- make_readable_file(file, mmap) assert_that(inherits(props, "ParquetReaderProperties")) unique_ptr(ParquetFileReader, parquet___arrow___FileReader__OpenFile(file, props)) diff --git a/r/R/read_table.R b/r/R/read_table.R index c58bd5b7be8..f372d035b90 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -69,7 +69,7 @@ read_table.character <- function(stream) { } #' @export -`read_table.raw` <- function(stream) { +read_table.raw <- function(stream) { stream <- BufferReader$create(stream) on.exit(stream$close()) batch_reader <- RecordBatchStreamReader$create(stream) @@ -78,6 +78,6 @@ read_table.character <- function(stream) { #' @rdname read_table #' @export -read_arrow <- function(stream){ +read_arrow <- function(stream) { as.data.frame(read_table(stream)) } diff --git a/r/man/make_readable_file.Rd b/r/man/make_readable_file.Rd new file mode 100644 index 00000000000..4163cdddd19 --- /dev/null +++ b/r/man/make_readable_file.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{make_readable_file} +\alias{make_readable_file} +\title{Handle a range of possible input sources} +\usage{ +make_readable_file(file, mmap = TRUE) +} +\arguments{ +\item{file}{A character file name, raw vector, or an Arrow input stream} + +\item{mmap}{Logical: whether to memory-map the file (default \code{TRUE})} +} +\value{ +An \code{InputStream} or a subclass of one. +} +\description{ +Handle a range of possible input sources +} +\keyword{internal} diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd index 4e3522cbbf1..d0cd4fb2e2e 100644 --- a/r/man/read_delim_arrow.Rd +++ b/r/man/read_delim_arrow.Rd @@ -25,7 +25,7 @@ read_tsv_arrow(file, quote = "\\"", escape_double = TRUE, read_options = NULL, as_tibble = TRUE) } \arguments{ -\item{file}{A character path to a local file, or an Arrow input stream} +\item{file}{A character file name, raw vector, or an Arrow input stream} \item{delim}{Single character used to separate fields within a record.} diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd index e870b4174c9..7fcad2c4509 100644 --- a/r/man/read_json_arrow.Rd +++ b/r/man/read_json_arrow.Rd @@ -7,7 +7,7 @@ read_json_arrow(file, col_select = NULL, as_tibble = TRUE, ...) } \arguments{ -\item{file}{A character path to a local file, or an Arrow input stream} +\item{file}{A character file name, raw vector, or an Arrow input stream} \item{col_select}{A character vector of column names to keep, as in the "select" argument to \code{data.table::fread()}, or a diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index 42ffed44ae4..3984f318424 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -8,7 +8,7 @@ read_parquet(file, col_select = NULL, as_tibble = TRUE, props = ParquetReaderProperties$create(), ...) } \arguments{ -\item{file}{A character path to a local file, or an Arrow input stream} +\item{file}{A character file name, raw vector, or an Arrow input stream} \item{col_select}{A character vector of column names to keep, as in the "select" argument to \code{data.table::fread()}, or a From 5fd49ef4b2b231fa74121f70b10d28bef72eeb92 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 10:20:45 -0700 Subject: [PATCH 24/37] Fix check failures --- r/DESCRIPTION | 2 ++ r/R/parquet.R | 1 + r/man/read_parquet.Rd | 2 ++ 3 files changed, 5 insertions(+) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index e31f2c9bd36..f0414c079cd 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -36,10 +36,12 @@ Imports: utils Roxygen: list(markdown = TRUE) RoxygenNote: 6.1.1 +VignetteBuilder: knitr Suggests: covr, fs, hms, + knitr, lubridate, rmarkdown, testthat, diff --git a/r/R/parquet.R b/r/R/parquet.R index f9ff9939b47..5cd6a79bb4c 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -23,6 +23,7 @@ #' #' @inheritParams read_delim_arrow #' @param props [ParquetReaderProperties] +#' @param ... Additional arguments passed to `ParquetFileReader$create()` #' #' @return A [arrow::Table][Table], or a `data.frame` if `as_tibble` is #' `TRUE`. diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index 3984f318424..5a12e7199f2 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -19,6 +19,8 @@ of columns, as used in \code{dplyr::select()}.} \link[=Table]{arrow::Table}?} \item{props}{\link{ParquetReaderProperties}} + +\item{...}{Additional arguments passed to \code{ParquetFileReader$create()}} } \value{ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_tibble} is From 495abf663a24c635555b7cc8600000e148932e62 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 11:25:20 -0700 Subject: [PATCH 25/37] Fill in documentation and standardize file naming --- r/DESCRIPTION | 16 ++--- r/NAMESPACE | 8 ++- r/R/Field.R | 32 +++++---- r/R/Schema.R | 3 +- r/R/array-data.R | 12 ++-- r/R/buffer.R | 14 ++-- r/R/compression.R | 46 +++++++------ r/R/feather.R | 65 ++++++++++++++++++- r/R/parquet.R | 4 +- ...ordBatchReader.R => record-batch-reader.R} | 0 ...ordBatchWriter.R => record-batch-writer.R} | 0 r/R/{RecordBatch.R => record-batch.R} | 0 r/_pkgdown.yml | 3 +- r/man/{array-data.Rd => ArrayData.Rd} | 8 +-- r/man/FeatherTableReader.Rd | 39 +++++++++++ r/man/FeatherTableWriter.Rd | 35 ++++++++++ r/man/ParquetFileReader.Rd | 2 +- r/man/ParquetReaderProperties.Rd | 2 +- r/man/RecordBatch.Rd | 2 +- r/man/RecordBatchFileReader.Rd | 2 +- r/man/RecordBatchFileWriter.Rd | 2 +- r/man/RecordBatchReader.Rd | 2 +- r/man/RecordBatchStreamReader.Rd | 2 +- r/man/RecordBatchStreamWriter.Rd | 2 +- r/man/RecordBatchWriter.Rd | 2 +- r/man/Table.Rd | 2 +- r/man/buffer.Rd | 6 +- r/man/compressed_input_stream.Rd | 16 ----- r/man/compressed_output_stream.Rd | 19 ------ r/man/compression.Rd | 31 +++++++++ r/man/data-type.Rd | 2 +- r/man/field.Rd | 8 ++- r/man/read_feather.Rd | 3 +- r/man/read_schema.Rd | 2 +- r/man/record_batch.Rd | 2 +- r/man/schema.Rd | 4 +- r/tests/testthat/test-compressed.R | 4 +- 37 files changed, 273 insertions(+), 129 deletions(-) rename r/R/{RecordBatchReader.R => record-batch-reader.R} (100%) rename r/R/{RecordBatchWriter.R => record-batch-writer.R} (100%) rename r/R/{RecordBatch.R => record-batch.R} (100%) rename r/man/{array-data.Rd => ArrayData.Rd} (70%) create mode 100644 r/man/FeatherTableReader.Rd create mode 100644 r/man/FeatherTableWriter.Rd delete mode 100644 r/man/compressed_input_stream.Rd delete mode 100644 r/man/compressed_output_stream.Rd create mode 100644 r/man/compression.Rd diff --git a/r/DESCRIPTION b/r/DESCRIPTION index f0414c079cd..ef44196f0ae 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -50,15 +50,7 @@ Suggests: Collate: 'enums.R' 'arrow-package.R' - 'Field.R' 'type.R' - 'List.R' - 'RecordBatch.R' - 'RecordBatchReader.R' - 'RecordBatchWriter.R' - 'Schema.R' - 'Struct.R' - 'Table.R' 'array-data.R' 'array.R' 'arrowExports.R' @@ -70,13 +62,21 @@ Collate: 'csv.R' 'dictionary.R' 'feather.R' + 'field.R' 'install-arrow.R' 'json.R' + 'list.R' 'memory_pool.R' 'message.R' 'parquet.R' 'read_record_batch.R' 'read_table.R' + 'record-batch-reader.R' + 'record-batch-writer.R' + 'record-batch.R' 'reexports-bit64.R' 'reexports-tidyselect.R' + 'schema.R' + 'struct.R' + 'table.R' 'write_arrow.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index d5904e0022c..9509a23839e 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -40,13 +40,19 @@ S3method(write_arrow,raw) export(Array) export(Buffer) export(ChunkedArray) +export(CompressedInputStream) +export(CompressedOutputStream) export(CompressionType) export(DateUnit) +export(FeatherTableReader) +export(FeatherTableWriter) +export(Field) export(FileMode) export(MessageReader) export(MessageType) export(ParquetFileReader) export(ParquetReaderProperties) +export(Schema) export(StatusCode) export(Table) export(TimeUnit) @@ -57,8 +63,6 @@ export(boolean) export(buffer) export(cast_options) export(chunked_array) -export(compressed_input_stream) -export(compressed_output_stream) export(compression_codec) export(contains) export(csv_convert_options) diff --git a/r/R/Field.R b/r/R/Field.R index 77c59f794f5..4533b4d94c2 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -18,7 +18,9 @@ #' @include arrow-package.R #' @title class arrow::Field #' @docType class -#' +#' @description `field()` lets you create an `arrow::Field` that maps a +#' [DataType][data-type] to a column name. Fields are contained in +#' [Schemas][Schema]. #' @section Methods: #' #' - `f$ToString()`: convert to a string @@ -26,6 +28,7 @@ #' #' @rdname Field #' @name Field +#' @export Field <- R6Class("Field", inherit = Object, public = list( ToString = function() { @@ -48,6 +51,19 @@ Field <- R6Class("Field", inherit = Object, } ) ) +Field$create <- function(name, type, metadata) { + assert_that(inherits(name, "character"), length(name) == 1L) + if (!inherits(type, "DataType")) { + if (identical(type, double())) { + # Magic so that we don't have to mask this base function + type <- float64() + } else { + stop(name, " must be arrow::DataType, not ", class(type), call. = FALSE) + } + } + assert_that(missing(metadata), msg = "metadata= is currently ignored") + shared_ptr(Field, Field__initialize(name, type, TRUE)) +} #' @export `==.Field` <- function(lhs, rhs){ @@ -64,19 +80,7 @@ Field <- R6Class("Field", inherit = Object, #' } #' @rdname Field #' @export -field <- function(name, type, metadata) { - assert_that(inherits(name, "character"), length(name) == 1L) - if (!inherits(type, "DataType")) { - if (identical(type, double())) { - # Magic so that we don't have to mask this base function - type <- float64() - } else { - stop(name, " must be arrow::DataType, not ", class(type), call. = FALSE) - } - } - assert_that(missing(metadata), msg = "metadata= is currently ignored") - shared_ptr(Field, Field__initialize(name, type, TRUE)) -} +field <- Field$create .fields <- function(.list){ assert_that(!is.null(nms <- names(.list))) diff --git a/r/R/Schema.R b/r/R/Schema.R index 1016279770f..1886277a52b 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -16,7 +16,7 @@ # under the License. #' @include arrow-package.R -#' @title class arrow::Schema +#' @title Schema class #' #' @description Create a `Schema` when you #' want to convert an R `data.frame` to Arrow but don't want to rely on the @@ -45,6 +45,7 @@ #' #' @rdname Schema #' @name Schema +#' @export Schema <- R6Class("Schema", inherit = Object, public = list( diff --git a/r/R/array-data.R b/r/R/array-data.R index 041e3c6a32b..f0797f50d0b 100644 --- a/r/R/array-data.R +++ b/r/R/array-data.R @@ -15,13 +15,12 @@ # specific language governing permissions and limitations # under the License. -#' @include type.R - -#' @title class ArrayData -#' +#' @title ArrayData class #' @usage NULL #' @format NULL #' @docType class +#' @description The `ArrayData` class allows you to get and inspect the data +#' inside an `arrow::Array`. #' #' @section Usage: #' @@ -39,8 +38,9 @@ #' #' ... #' -#' @rdname array-data -#' @name array-data +#' @rdname ArrayData +#' @name ArrayData +#' @include type.R ArrayData <- R6Class("ArrayData", inherit = Object, active = list( diff --git a/r/R/buffer.R b/r/R/buffer.R index d4a5df616b7..2edd3213437 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -15,15 +15,11 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R -#' @include enums.R - #' @title class Buffer -#' #' @usage NULL #' @format NULL #' @docType class -#' +#' @description `buffer()` lets you create an `arrow::Buffer` from an R object #' @section Methods: #' #' - `$is_mutable()` : @@ -34,6 +30,8 @@ #' @rdname buffer #' @name buffer #' @export +#' @include arrow-package.R +#' @include enums.R Buffer <- R6Class("Buffer", inherit = Object, public = list( ZeroPadding = function() Buffer__ZeroPadding(self), @@ -57,12 +55,8 @@ Buffer$create <- function(x) { } } -#' Create a [Buffer][buffer] from an R object -#' #' @param x R object. Only raw, numeric and integer vectors are currently supported -#' -#' @return an instance of [Buffer][buffer] that borrows memory from `x` -#' +#' @return an instance of `Buffer` that borrows memory from `x` #' @export buffer <- Buffer$create diff --git a/r/R/compression.R b/r/R/compression.R index dbf6a1a5c8a..028b919effd 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -31,9 +31,31 @@ compression_codec <- function(type = "GZIP") { unique_ptr(Codec, util___Codec__Create(type)) } - +#' @title Compressed stream classes +#' @rdname compression +#' @name compression +#' @aliases CompressedInputStream CompressedOutputStream +#' @docType class +#' @usage NULL +#' @format NULL +#' @description `CompressedInputStream` and `CompressedOutputStream` +#' allow you to apply a [compression_codec()] to an +#' input or output stream. +#' +#' @section Factory: +#' +#' The `CompressedInputStream$create()` and `CompressedOutputStream$create()` +#' factory methods instantiate the object and take the following arguments: +#' +#' - `stream` An `InputStream` or `OutputStream`, respectively +#' - `codec` A `Codec` +#' +#' @section Methods: +#' +#' Methods are inherited from [InputStream] and [OutputStream], respectively +#' @export +#' @include arrow-package.R CompressedOutputStream <- R6Class("CompressedOutputStream", inherit = OutputStream) - CompressedOutputStream$create <- function(stream, codec = compression_codec()){ if (.Platform$OS.type == "windows") { stop("'CompressedOutputStream' is unsupported in Windows.") @@ -46,18 +68,11 @@ CompressedOutputStream$create <- function(stream, codec = compression_codec()){ shared_ptr(CompressedOutputStream, io___CompressedOutputStream__Make(codec, stream)) } -#' Compressed output stream -#' -#' @details This function is not supported in Windows. -#' -#' @param stream Underlying raw output stream -#' @param codec a codec +#' @rdname compression +#' @usage NULL +#' @format NULL #' @export -compressed_output_stream <- CompressedOutputStream$create - - CompressedInputStream <- R6Class("CompressedInputStream", inherit = InputStream) - CompressedInputStream$create <- function(stream, codec = compression_codec()){ # TODO (npr): why would CompressedInputStream work on Windows if CompressedOutputStream doesn't? (and is it still the case that it does not?) assert_that(inherits(codec, "Codec")) @@ -67,10 +82,3 @@ CompressedInputStream$create <- function(stream, codec = compression_codec()){ assert_that(inherits(stream, "InputStream")) shared_ptr(CompressedInputStream, io___CompressedInputStream__Make(codec, stream)) } - -#' Compressed input stream -#' -#' @param stream Underlying raw input stream -#' @param codec a codec -#' @export -compressed_input_stream <- CompressedInputStream$create diff --git a/r/R/feather.R b/r/R/feather.R index add70bdc606..d8b309da091 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -46,6 +46,35 @@ write_feather <- function(data, stream) { ipc___TableWriter__RecordBatch__WriteFeather(writer, data) } +#' @title FeatherTableWriter class +#' @rdname FeatherTableWriter +#' @name FeatherTableWriter +#' @docType class +#' @usage NULL +#' @format NULL +#' @description This class enables you to write Feather files. See its usage in +#' [write_feather()]. +#' +#' @section Factory: +#' +#' The `FeatherTableWriter$create()` factory method instantiates the object and +#' takes the following argument: +#' +#' - `stream` An `OutputStream` +#' +#' @section Methods: +#' +#' - `$GetDescription()` +#' - `$HasDescription()` +#' - `$version()` +#' - `$num_rows()` +#' - `$num_columns()` +#' - `$GetColumnName()` +#' - `$GetColumn()` +#' - `$Read(columns)` +#' +#' @export +#' @include arrow-package.R FeatherTableWriter <- R6Class("FeatherTableWriter", inherit = Object, public = list( SetDescription = function(description) ipc___feather___TableWriter__SetDescription(self, description), @@ -67,7 +96,8 @@ FeatherTableWriter$create <- function(stream) { #' @inheritParams read_delim_arrow #' @param ... additional parameters #' -#' @return A `data.frame` if `as_tibble` is `TRUE` (the default), or a [arrow::Table][Table] otherwise +#' @return A `data.frame` if `as_tibble` is `TRUE` (the default), or an +#' [arrow::Table][Table] otherwise #' #' @export #' @examples @@ -98,6 +128,39 @@ read_feather <- function(file, col_select = NULL, as_tibble = TRUE, ...) { out } +#' @title FeatherTableReader class +#' @rdname FeatherTableReader +#' @name FeatherTableReader +#' @docType class +#' @usage NULL +#' @format NULL +#' @description This class enables you to interact with Feather files. Create +#' one to connect to a file or other InputStream, and call `Read()` on it to +#' make an `arrow::Table`. See its usage in [`read_feather()`]. +#' +#' @section Factory: +#' +#' The `FeatherTableReader$create()` factory method instantiates the object and +#' takes the following arguments: +#' +#' - `file` A character file name, raw vector, or Arrow file connection object +#' (e.g. `RandomAccessFile`). +#' - `mmap` Logical: whether to memory-map the file (default `TRUE`) +#' - `...` Additional arguments, currently ignored +#' +#' @section Methods: +#' +#' - `$GetDescription()` +#' - `$HasDescription()` +#' - `$version()` +#' - `$num_rows()` +#' - `$num_columns()` +#' - `$GetColumnName()` +#' - `$GetColumn()` +#' - `$Read(columns)` +#' +#' @export +#' @include arrow-package.R FeatherTableReader <- R6Class("FeatherTableReader", inherit = Object, public = list( GetDescription = function() ipc___feather___TableReader__GetDescription(self), diff --git a/r/R/parquet.R b/r/R/parquet.R index 5cd6a79bb4c..b680a4da5bf 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -57,7 +57,7 @@ read_parquet <- function(file, #' #' @section Factory: #' -#' The `ParquetFileReader$create()` factor method instantiates the object and +#' The `ParquetFileReader$create()` factory method instantiates the object and #' takes the following arguments: #' #' - `file` A character file name, raw vector, or Arrow file connection object @@ -123,7 +123,7 @@ ParquetFileReader$create <- function(file, #' #' @section Factory: #' -#' The `ParquetReaderProperties$create()` factor method instantiates the object +#' The `ParquetReaderProperties$create()` factory method instantiates the object #' and takes the following arguments: #' #' - `use_threads` Logical: whether to use multithreading (default `TRUE`) diff --git a/r/R/RecordBatchReader.R b/r/R/record-batch-reader.R similarity index 100% rename from r/R/RecordBatchReader.R rename to r/R/record-batch-reader.R diff --git a/r/R/RecordBatchWriter.R b/r/R/record-batch-writer.R similarity index 100% rename from r/R/RecordBatchWriter.R rename to r/R/record-batch-writer.R diff --git a/r/R/RecordBatch.R b/r/R/record-batch.R similarity index 100% rename from r/R/RecordBatch.R rename to r/R/record-batch.R diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index 9f64189f70b..b915c2f130e 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -114,8 +114,7 @@ reference: - RecordBatchStreamReader - RecordBatchStreamWriter - RecordBatchWriter - - CompressedInputStream - - CompressedOutputStream + - compression - compression_codec - default_memory_pool - mmap_create diff --git a/r/man/array-data.Rd b/r/man/ArrayData.Rd similarity index 70% rename from r/man/array-data.Rd rename to r/man/ArrayData.Rd index 5deeb6c04dd..035fee8ac17 100644 --- a/r/man/array-data.Rd +++ b/r/man/ArrayData.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/array-data.R \docType{class} -\name{array-data} -\alias{array-data} +\name{ArrayData} \alias{ArrayData} -\title{class ArrayData} +\title{ArrayData class} \description{ -class ArrayData +The \code{ArrayData} class allows you to get and inspect the data +inside an \code{arrow::Array}. } \section{Usage}{ \preformatted{data <- Array$create(x)$data() diff --git a/r/man/FeatherTableReader.Rd b/r/man/FeatherTableReader.Rd new file mode 100644 index 00000000000..c0956d4c106 --- /dev/null +++ b/r/man/FeatherTableReader.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/feather.R +\docType{class} +\name{FeatherTableReader} +\alias{FeatherTableReader} +\title{FeatherTableReader class} +\description{ +This class enables you to interact with Feather files. Create +one to connect to a file or other InputStream, and call \code{Read()} on it to +make an \code{arrow::Table}. See its usage in \code{\link[=read_feather]{read_feather()}}. +} +\section{Factory}{ + + +The \code{FeatherTableReader$create()} factory method instantiates the object and +takes the following arguments: +\itemize{ +\item \code{file} A character file name, raw vector, or Arrow file connection object +(e.g. \code{RandomAccessFile}). +\item \code{mmap} Logical: whether to memory-map the file (default \code{TRUE}) +\item \code{...} Additional arguments, currently ignored +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$GetDescription()} +\item \code{$HasDescription()} +\item \code{$version()} +\item \code{$num_rows()} +\item \code{$num_columns()} +\item \code{$GetColumnName()} +\item \code{$GetColumn()} +\item \code{$Read(columns)} +} +} + +\keyword{datasets} diff --git a/r/man/FeatherTableWriter.Rd b/r/man/FeatherTableWriter.Rd new file mode 100644 index 00000000000..e127bd89443 --- /dev/null +++ b/r/man/FeatherTableWriter.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/feather.R +\docType{class} +\name{FeatherTableWriter} +\alias{FeatherTableWriter} +\title{FeatherTableWriter class} +\description{ +This class enables you to write Feather files. See its usage in +\code{\link[=write_feather]{write_feather()}}. +} +\section{Factory}{ + + +The \code{FeatherTableWriter$create()} factory method instantiates the object and +takes the following argument: +\itemize{ +\item \code{stream} An \code{OutputStream} +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$GetDescription()} +\item \code{$HasDescription()} +\item \code{$version()} +\item \code{$num_rows()} +\item \code{$num_columns()} +\item \code{$GetColumnName()} +\item \code{$GetColumn()} +\item \code{$Read(columns)} +} +} + +\keyword{datasets} diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd index 27d7a1554e1..a9b6115c604 100644 --- a/r/man/ParquetFileReader.Rd +++ b/r/man/ParquetFileReader.Rd @@ -10,7 +10,7 @@ This class enables you to interact with Parquet files. \section{Factory}{ -The \code{ParquetFileReader$create()} factor method instantiates the object and +The \code{ParquetFileReader$create()} factory method instantiates the object and takes the following arguments: \itemize{ \item \code{file} A character file name, raw vector, or Arrow file connection object diff --git a/r/man/ParquetReaderProperties.Rd b/r/man/ParquetReaderProperties.Rd index 4929b4a0160..c183b2a2609 100644 --- a/r/man/ParquetReaderProperties.Rd +++ b/r/man/ParquetReaderProperties.Rd @@ -11,7 +11,7 @@ by \link{ParquetFileReader}. \section{Factory}{ -The \code{ParquetReaderProperties$create()} factor method instantiates the object +The \code{ParquetReaderProperties$create()} factory method instantiates the object and takes the following arguments: \itemize{ \item \code{use_threads} Logical: whether to use multithreading (default \code{TRUE}) diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd index a2b333c2944..1e99e4d42bf 100644 --- a/r/man/RecordBatch.Rd +++ b/r/man/RecordBatch.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatch.R +% Please edit documentation in R/record-batch.R \docType{class} \name{RecordBatch} \alias{RecordBatch} diff --git a/r/man/RecordBatchFileReader.Rd b/r/man/RecordBatchFileReader.Rd index 574d7fc3252..c4814ad7656 100644 --- a/r/man/RecordBatchFileReader.Rd +++ b/r/man/RecordBatchFileReader.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R +% Please edit documentation in R/record-batch-reader.R \docType{class} \name{RecordBatchFileReader} \alias{RecordBatchFileReader} diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd index 8a80e25d1fc..bbb103dddf0 100644 --- a/r/man/RecordBatchFileWriter.Rd +++ b/r/man/RecordBatchFileWriter.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R +% Please edit documentation in R/record-batch-writer.R \docType{class} \name{RecordBatchFileWriter} \alias{RecordBatchFileWriter} diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd index c2121cd7014..06e0b9686f3 100644 --- a/r/man/RecordBatchReader.Rd +++ b/r/man/RecordBatchReader.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R +% Please edit documentation in R/record-batch-reader.R \docType{class} \name{RecordBatchReader} \alias{RecordBatchReader} diff --git a/r/man/RecordBatchStreamReader.Rd b/r/man/RecordBatchStreamReader.Rd index f20c78fe2d6..825f29a72f9 100644 --- a/r/man/RecordBatchStreamReader.Rd +++ b/r/man/RecordBatchStreamReader.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R +% Please edit documentation in R/record-batch-reader.R \docType{class} \name{RecordBatchStreamReader} \alias{RecordBatchStreamReader} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd index 169dd988616..2f9d627fa17 100644 --- a/r/man/RecordBatchStreamWriter.Rd +++ b/r/man/RecordBatchStreamWriter.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R +% Please edit documentation in R/record-batch-writer.R \docType{class} \name{RecordBatchStreamWriter} \alias{RecordBatchStreamWriter} diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd index e48aad74b3d..db8d8e7980f 100644 --- a/r/man/RecordBatchWriter.Rd +++ b/r/man/RecordBatchWriter.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R +% Please edit documentation in R/record-batch-writer.R \docType{class} \name{RecordBatchWriter} \alias{RecordBatchWriter} diff --git a/r/man/Table.Rd b/r/man/Table.Rd index 38f86916fcc..21dc8371eef 100644 --- a/r/man/Table.Rd +++ b/r/man/Table.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Table.R +% Please edit documentation in R/table.R \docType{class} \name{Table} \alias{Table} diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd index 2099668563a..49712dc409e 100644 --- a/r/man/buffer.Rd +++ b/r/man/buffer.Rd @@ -12,12 +12,10 @@ buffer(x) \item{x}{R object. Only raw, numeric and integer vectors are currently supported} } \value{ -an instance of \link[=buffer]{Buffer} that borrows memory from \code{x} +an instance of \code{Buffer} that borrows memory from \code{x} } \description{ -class Buffer - -Create a \link[=buffer]{Buffer} from an R object +\code{buffer()} lets you create an \code{arrow::Buffer} from an R object } \section{Methods}{ diff --git a/r/man/compressed_input_stream.Rd b/r/man/compressed_input_stream.Rd deleted file mode 100644 index 6ac740b28d3..00000000000 --- a/r/man/compressed_input_stream.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compression.R -\name{compressed_input_stream} -\alias{compressed_input_stream} -\title{Compressed input stream} -\usage{ -compressed_input_stream(stream, codec = compression_codec()) -} -\arguments{ -\item{stream}{Underlying raw input stream} - -\item{codec}{a codec} -} -\description{ -Compressed input stream -} diff --git a/r/man/compressed_output_stream.Rd b/r/man/compressed_output_stream.Rd deleted file mode 100644 index 7da746d78f9..00000000000 --- a/r/man/compressed_output_stream.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compression.R -\name{compressed_output_stream} -\alias{compressed_output_stream} -\title{Compressed output stream} -\usage{ -compressed_output_stream(stream, codec = compression_codec()) -} -\arguments{ -\item{stream}{Underlying raw output stream} - -\item{codec}{a codec} -} -\description{ -Compressed output stream -} -\details{ -This function is not supported in Windows. -} diff --git a/r/man/compression.Rd b/r/man/compression.Rd new file mode 100644 index 00000000000..34cc90df34a --- /dev/null +++ b/r/man/compression.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compression.R +\docType{class} +\name{compression} +\alias{compression} +\alias{CompressedOutputStream} +\alias{CompressedInputStream} +\title{Compressed stream classes} +\description{ +\code{CompressedInputStream} and \code{CompressedOutputStream} +allow you to apply a \code{\link[=compression_codec]{compression_codec()}} to an +input or output stream. +} +\section{Factory}{ + + +The \code{CompressedInputStream$create()} and \code{CompressedOutputStream$create()} +factory methods instantiate the object and take the following arguments: +\itemize{ +\item \code{stream} An \code{InputStream} or \code{OutputStream}, respectively +\item \code{codec} A \code{Codec} +} +} + +\section{Methods}{ + + +Methods are inherited from \link{InputStream} and \link{OutputStream}, respectively +} + +\keyword{datasets} diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 45219f22641..9280738194f 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/type.R, R/List.R, R/Struct.R +% Please edit documentation in R/type.R, R/list.R, R/struct.R \name{data-type} \alias{data-type} \alias{int8} diff --git a/r/man/field.Rd b/r/man/field.Rd index 846a0b0b1b8..c4325f4c821 100644 --- a/r/man/field.Rd +++ b/r/man/field.Rd @@ -1,11 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Field.R +% Please edit documentation in R/field.R \docType{class} \name{Field} \alias{Field} \alias{field} \title{class arrow::Field} -\format{An object of class \code{R6ClassGenerator} of length 24.} +\format{An object of class \code{R6ClassGenerator} of length 25.} \usage{ Field @@ -19,7 +19,9 @@ field(name, type, metadata) \item{metadata}{currently ignored} } \description{ -class arrow::Field +\code{field()} lets you create an \code{arrow::Field} that maps a +\link[=data-type]{DataType} to a column name. Fields are contained in +\link[=Schema]{Schemas}. } \section{Methods}{ diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index ead6e32e22e..7e720059af7 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -21,7 +21,8 @@ of columns, as used in \code{dplyr::select()}.} \item{...}{additional parameters} } \value{ -A \code{data.frame} if \code{as_tibble} is \code{TRUE} (the default), or a \link[=Table]{arrow::Table} otherwise +A \code{data.frame} if \code{as_tibble} is \code{TRUE} (the default), or an +\link[=Table]{arrow::Table} otherwise } \description{ Read a Feather file diff --git a/r/man/read_schema.Rd b/r/man/read_schema.Rd index 408fd1baaa5..1573be2bd5b 100644 --- a/r/man/read_schema.Rd +++ b/r/man/read_schema.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Schema.R +% Please edit documentation in R/schema.R \name{read_schema} \alias{read_schema} \title{read a Schema from a stream} diff --git a/r/man/record_batch.Rd b/r/man/record_batch.Rd index ab91e543725..bf53abd7092 100644 --- a/r/man/record_batch.Rd +++ b/r/man/record_batch.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatch.R +% Please edit documentation in R/record-batch.R \name{record_batch} \alias{record_batch} \title{Create an \link[=RecordBatch]{arrow::RecordBatch} from a data frame} diff --git a/r/man/schema.Rd b/r/man/schema.Rd index 7af930db132..2f960dbce6c 100644 --- a/r/man/schema.Rd +++ b/r/man/schema.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Schema.R +% Please edit documentation in R/schema.R \docType{class} \name{Schema} \alias{Schema} \alias{schema} -\title{class arrow::Schema} +\title{Schema class} \usage{ schema(...) } diff --git a/r/tests/testthat/test-compressed.R b/r/tests/testthat/test-compressed.R index 3d0dfdc20e4..008f974215b 100644 --- a/r/tests/testthat/test-compressed.R +++ b/r/tests/testthat/test-compressed.R @@ -31,7 +31,7 @@ test_that("can write Buffer to CompressedOutputStream and read back in Compresse tf2 <- tempfile() sink2 <- FileOutputStream$create(tf2) - stream2 <- compressed_output_stream(sink2) + stream2 <- CompressedOutputStream$create(sink2) expect_equal(stream2$tell(), 0) stream2$write(buf) expect_equal(stream2$tell(), buf$size) @@ -43,7 +43,7 @@ test_that("can write Buffer to CompressedOutputStream and read back in Compresse buf1 <- input1$Read(1024L) file2 <- ReadableFile$create(tf2) - input2 <- compressed_input_stream(file2) + input2 <- CompressedInputStream$create(file2) buf2 <- input2$Read(1024L) expect_equal(buf, buf1) From e6b75f4e02ea1c065d647e6084cb5c956d8b010e Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 15:46:32 -0700 Subject: [PATCH 26/37] Consolidate and document reader/writer classes; also fix ARROW-6449 --- r/NAMESPACE | 12 ++ r/R/compression.R | 4 +- r/R/io.R | 188 ++++++++-------------- r/R/parquet.R | 14 +- r/R/record-batch-reader.R | 49 +++--- r/R/record-batch-writer.R | 101 ++++-------- r/_pkgdown.yml | 26 +-- r/man/BufferOutputStream.Rd | 17 -- r/man/BufferReader.Rd | 16 -- r/man/FileOutputStream.Rd | 16 -- r/man/FixedSizeBufferWriter.Rd | 16 -- r/man/InputStream.Rd | 38 ++++- r/man/MemoryMappedFile.Rd | 19 --- r/man/MockOutputStream.Rd | 16 -- r/man/OutputStream.Rd | 37 ++++- r/man/ParquetFileReader.Rd | 2 +- r/man/ParquetReaderProperties.Rd | 7 +- r/man/RandomAccessFile.Rd | 16 -- r/man/Readable.Rd | 16 -- r/man/ReadableFile.Rd | 16 -- r/man/RecordBatchFileReader.Rd | 16 -- r/man/RecordBatchFileWriter.Rd | 39 ----- r/man/RecordBatchReader.Rd | 31 +++- r/man/RecordBatchStreamReader.Rd | 16 -- r/man/RecordBatchStreamWriter.Rd | 39 ----- r/man/RecordBatchWriter.Rd | 37 +++-- r/man/compression.Rd | 2 +- r/man/write_parquet.Rd | 8 +- r/tests/testthat/test-buffer.R | 2 + r/tests/testthat/test-recordbatchreader.R | 6 +- 30 files changed, 289 insertions(+), 533 deletions(-) delete mode 100644 r/man/BufferOutputStream.Rd delete mode 100644 r/man/BufferReader.Rd delete mode 100644 r/man/FileOutputStream.Rd delete mode 100644 r/man/FixedSizeBufferWriter.Rd delete mode 100644 r/man/MemoryMappedFile.Rd delete mode 100644 r/man/MockOutputStream.Rd delete mode 100644 r/man/RandomAccessFile.Rd delete mode 100644 r/man/Readable.Rd delete mode 100644 r/man/ReadableFile.Rd delete mode 100644 r/man/RecordBatchFileReader.Rd delete mode 100644 r/man/RecordBatchFileWriter.Rd delete mode 100644 r/man/RecordBatchStreamReader.Rd delete mode 100644 r/man/RecordBatchStreamWriter.Rd diff --git a/r/NAMESPACE b/r/NAMESPACE index 9509a23839e..936d9170a2d 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -39,6 +39,8 @@ S3method(write_arrow,character) S3method(write_arrow,raw) export(Array) export(Buffer) +export(BufferOutputStream) +export(BufferReader) export(ChunkedArray) export(CompressedInputStream) export(CompressedOutputStream) @@ -48,10 +50,20 @@ export(FeatherTableReader) export(FeatherTableWriter) export(Field) export(FileMode) +export(FileOutputStream) +export(FixedSizeBufferWriter) +export(MemoryMappedFile) export(MessageReader) export(MessageType) +export(MockOutputStream) export(ParquetFileReader) export(ParquetReaderProperties) +export(RandomAccessFile) +export(ReadableFile) +export(RecordBatchFileReader) +export(RecordBatchFileWriter) +export(RecordBatchStreamReader) +export(RecordBatchStreamWriter) export(Schema) export(StatusCode) export(Table) diff --git a/r/R/compression.R b/r/R/compression.R index 028b919effd..11e56d53f05 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -38,7 +38,7 @@ compression_codec <- function(type = "GZIP") { #' @docType class #' @usage NULL #' @format NULL -#' @description `CompressedInputStream` and `CompressedOutputStream` +#' @description `CompressedInputStream` and `CompressedOutputStream` #' allow you to apply a [compression_codec()] to an #' input or output stream. #' @@ -47,7 +47,7 @@ compression_codec <- function(type = "GZIP") { #' The `CompressedInputStream$create()` and `CompressedOutputStream$create()` #' factory methods instantiate the object and take the following arguments: #' -#' - `stream` An `InputStream` or `OutputStream`, respectively +#' - `stream` An [InputStream] or [OutputStream], respectively #' - `codec` A `Codec` #' #' @section Methods: diff --git a/r/R/io.R b/r/R/io.R index e13899a693c..db0b4e35c5f 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -27,16 +27,37 @@ Writable <- R6Class("Writable", inherit = Object, ) ) -#' @title OutputStream -#' +#' @title OutputStream classes +#' @description `FileOutputStream` is for writing to a file; +#' `BufferOutputStream` and `FixedSizeBufferWriter` write to buffers; +#' `MockOutputStream` just reports back how many bytes it received, for testing +#' purposes. You can create one and pass it to any of the table writers, for +#' example. #' @usage NULL #' @format NULL #' @docType class +#' @section Factory: +#' +#' The `$create()` factory methods instantiate the `OutputStream` object and +#' take the following arguments, depending on the subclass: +#' +#' - `path` For `FileOutputStream`, a character file name +#' - `initial_capacity` For `BufferOutputStream`, the size in bytes of the +#' buffer. +#' - `x` For `FixedSizeBufferWriter`, a [Buffer] or an object that can be +#' made into a buffer via `buffer()`. +#' +#' `MockOutputStream$create()` does not take any arguments. #' #' @section Methods: #' -#' - Buffer `Read`(`int` nbytes): Read `nbytes` bytes -#' - `void` `close`(): close the stream +#' - `$tell()`: return the position in the stream +#' - `$close()`: close the stream +#' - `$write(x)`: send `x` to the stream +#' - `$capacity()`: for `BufferOutputStream` +#' - `$getvalue()`: for `BufferOutputStream` +#' - `$GetExtentBytesWritten()`: for `MockOutputStream`, report how many bytes +#' were sent. #' #' @rdname OutputStream #' @name OutputStream @@ -47,87 +68,50 @@ OutputStream <- R6Class("OutputStream", inherit = Writable, ) ) -#' @title class arrow::io::FileOutputStream -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname FileOutputStream -#' @name FileOutputStream +#' @rdname OutputStream +#' @export FileOutputStream <- R6Class("FileOutputStream", inherit = OutputStream) - FileOutputStream$create <- function(path) { path <- normalizePath(path, mustWork = FALSE) shared_ptr(FileOutputStream, io___FileOutputStream__Open(path)) } -#' @title class arrow::io::MockOutputStream -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname MockOutputStream -#' @name MockOutputStream +#' @rdname OutputStream +#' @export MockOutputStream <- R6Class("MockOutputStream", inherit = OutputStream, public = list( GetExtentBytesWritten = function() io___MockOutputStream__GetExtentBytesWritten(self) ) ) - MockOutputStream$create <- function() { shared_ptr(MockOutputStream, io___MockOutputStream__initialize()) } -#' @title class arrow::io::BufferOutputStream -#' #' @usage NULL -#' @docType class -#' @section Methods: -#' -#' TODO -#' -#' @rdname BufferOutputStream -#' @name BufferOutputStream +#' @format NULL +#' @rdname OutputStream +#' @export BufferOutputStream <- R6Class("BufferOutputStream", inherit = OutputStream, public = list( capacity = function() io___BufferOutputStream__capacity(self), getvalue = function() shared_ptr(Buffer, io___BufferOutputStream__Finish(self)), - - Write = function(bytes) io___BufferOutputStream__Write(self, bytes), - Tell = function() io___BufferOutputStream__Tell(self) + write = function(bytes) io___BufferOutputStream__Write(self, bytes), + tell = function() io___BufferOutputStream__Tell(self) ) ) - BufferOutputStream$create <- function(initial_capacity = 0L) { shared_ptr(BufferOutputStream, io___BufferOutputStream__Create(initial_capacity)) } -#' @title class arrow::io::FixedSizeBufferWriter -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname FixedSizeBufferWriter -#' @name FixedSizeBufferWriter +#' @rdname OutputStream +#' @export FixedSizeBufferWriter <- R6Class("FixedSizeBufferWriter", inherit = OutputStream) - FixedSizeBufferWriter$create <- function(x) { x <- buffer(x) assert_that(x$is_mutable) @@ -136,35 +120,43 @@ FixedSizeBufferWriter$create <- function(x) { # InputStream ------------------------------------------------------------- -#' @title class arrow::io::Readable -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname Readable -#' @name Readable + Readable <- R6Class("Readable", inherit = Object, public = list( Read = function(nbytes) shared_ptr(Buffer, io___Readable__Read(self, nbytes)) ) ) -#' @title class arrow::io::InputStream -#' +#' @title InputStream classes +#' @description `RandomAccessFile` inherits from `InputStream` and is a base +#' class for: `ReadableFile` for reading from a file; `MemoryMappedFile` for +#' the same but with memory mapping; and `BufferReader` for reading from a +#' buffer. Use these with the various table readers. #' @usage NULL #' @format NULL #' @docType class +#' @section Factory: #' +#' The `$create()` factory methods instantiate the `InputStream` object and +#' take the following arguments, depending on the subclass: +#' +#' - `path` For `ReadableFile`, a character file name +#' - `x` For `BufferReader`, a [Buffer] or an object that can be +#' made into a buffer via `buffer()`. +#' +#' To instantiate a `MemoryMappedFile`, call [mmap_open()]. #' #' @section Methods: #' -#' TODO +#' - `$GetSize()`: +#' - `$supports_zero_copy()`: Logical +#' - `$seek(position)`: go to that position in the stream +#' - `$tell()`: return the position in the stream +#' - `$close()`: close the stream +#' - `$Read(nbytes)`: read data from the stream, either a specified `nbytes` or +#' all, if `nbytes` is not provided +#' - `$ReadAt(position, nbytes)`: similar to `$seek(position)$Read(nbytes)` +#' - `$Resize(size)`: for a `MemoryMappedFile` that is writeable #' #' @rdname InputStream #' @name InputStream @@ -174,25 +166,16 @@ InputStream <- R6Class("InputStream", inherit = Readable, ) ) -#' @title class arrow::io::RandomAccessFile -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname RandomAccessFile -#' @name RandomAccessFile +#' @rdname InputStream +#' @export RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream, public = list( GetSize = function() io___RandomAccessFile__GetSize(self), supports_zero_copy = function() io___RandomAccessFile__supports_zero_copy(self), - Seek = function(position) io___RandomAccessFile__Seek(self, position), - Tell = function() io___RandomAccessFile__Tell(self), + seek = function(position) io___RandomAccessFile__Seek(self, position), + tell = function() io___RandomAccessFile__Tell(self), Read = function(nbytes = NULL) { if (is.null(nbytes)) { @@ -211,61 +194,30 @@ RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream, ) ) -#' @title class arrow::io::MemoryMappedFile -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @seealso [mmap_open()], [mmap_create()] -#' -#' -#' @rdname MemoryMappedFile -#' @name MemoryMappedFile +#' @rdname InputStream +#' @export MemoryMappedFile <- R6Class("MemoryMappedFile", inherit = RandomAccessFile, public = list( Resize = function(size) io___MemoryMappedFile__Resize(self, size) ) ) -#' @title class arrow::io::ReadableFile -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname ReadableFile -#' @name ReadableFile +#' @rdname InputStream +#' @export ReadableFile <- R6Class("ReadableFile", inherit = RandomAccessFile) - ReadableFile$create <- function(path) { shared_ptr(ReadableFile, io___ReadableFile__Open(normalizePath(path))) } -#' @title class arrow::io::BufferReader -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname BufferReader -#' @name BufferReader +#' @rdname InputStream +#' @export BufferReader <- R6Class("BufferReader", inherit = RandomAccessFile) - BufferReader$create <- function(x) { x <- buffer(x) shared_ptr(BufferReader, io___BufferReader__initialize(x)) diff --git a/r/R/parquet.R b/r/R/parquet.R index b680a4da5bf..65128f7c872 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -79,7 +79,7 @@ read_parquet <- function(file, #' f <- system.file("v0.7.1.parquet", package="arrow") #' pq <- ParquetFileReader$create(f) #' pq$GetSchema() -#' tab <- pq$ReadTable() +#' tab <- pq$ReadTable(starts_with("c")) #' tab$schema #' } #' @include arrow-package.R @@ -130,7 +130,9 @@ ParquetFileReader$create <- function(file, #' #' @section Methods: #' -#' TODO +#' - `$read_dictionary(column_index)` +#' - `$set_read_dictionary(column_index, read_dict)` +#' - `$use_threads(use_threads)` #' #' @export ParquetReaderProperties <- R6Class("ParquetReaderProperties", @@ -172,11 +174,9 @@ ParquetReaderProperties$create <- function(use_threads = option_use_threads()) { #' #' @examples #' \donttest{ -#' try({ -#' tf <- tempfile(fileext = ".parquet") -#' on.exit(unlink(tf)) -#' write_parquet(tibble::tibble(x = 1:5), tf) -#' }) +#' tf <- tempfile(fileext = ".parquet") +#' on.exit(unlink(tf)) +#' write_parquet(tibble::tibble(x = 1:5), tf) #' } #' @export write_parquet <- function(table, file) { diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R index 056de939342..4122d3ddae1 100644 --- a/r/R/record-batch-reader.R +++ b/r/R/record-batch-reader.R @@ -15,20 +15,36 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R -#' @title class arrow::RecordBatchReader -#' +#' @title RecordBatchReader classes +#' @description `RecordBatchFileReader` and `RecordBatchStreamReader` are +#' interfaces for generating record batches from different input sources. #' @usage NULL #' @format NULL #' @docType class +#' @section Factory: +#' +#' The `RecordBatchFileReader$create()` and `RecordBatchStreamReader$create()` +#' factory methods instantiate the object and +#' take a single argument, named according to the class: +#' +#' - `file` A character file name, raw vector, or Arrow file connection object +#' (e.g. `RandomAccessFile`). +#' - `stream` A raw vector, [Buffer], or `InputStream`. #' #' @section Methods: #' -#' TODO +#' - `$read_next_batch()`: Returns a `RecordBatch` +#' - `$schema()`: Returns a [Schema] +#' - `$batches()`: Returns a list of `RecordBatch`es +#' - `$get_batch(i)`: For `RecordBatchFileReader`, return a particular batch +#' by an integer index. +#' - `$num_record_batches()`: For `RecordBatchFileReader`, see how many batches +#' are in the file. #' #' @rdname RecordBatchReader #' @name RecordBatchReader +#' @include arrow-package.R RecordBatchReader <- R6Class("RecordBatchReader", inherit = Object, public = list( read_next_batch = function() { @@ -40,18 +56,10 @@ RecordBatchReader <- R6Class("RecordBatchReader", inherit = Object, ) ) -#' @title class arrow::RecordBatchStreamReader -#' +#' @rdname RecordBatchReader #' @usage NULL #' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname RecordBatchStreamReader -#' @name RecordBatchStreamReader +#' @export RecordBatchStreamReader <- R6Class("RecordBatchStreamReader", inherit = RecordBatchReader, public = list( batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = RecordBatch) @@ -66,18 +74,10 @@ RecordBatchStreamReader$create <- function(stream){ shared_ptr(RecordBatchStreamReader, ipc___RecordBatchStreamReader__Open(stream)) } -#' @title class arrow::RecordBatchFileReader -#' +#' @rdname RecordBatchReader #' @usage NULL #' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname RecordBatchFileReader -#' @name RecordBatchFileReader +#' @export RecordBatchFileReader <- R6Class("RecordBatchFileReader", inherit = Object, # Why doesn't this inherit from RecordBatchReader? public = list( @@ -90,7 +90,6 @@ RecordBatchFileReader <- R6Class("RecordBatchFileReader", inherit = Object, schema = function() shared_ptr(Schema, ipc___RecordBatchFileReader__schema(self)) ) ) - RecordBatchFileReader$create <- function(file) { file <- make_readable_file(file) shared_ptr(RecordBatchFileReader, ipc___RecordBatchFileReader__Open(file)) diff --git a/r/R/record-batch-writer.R b/r/R/record-batch-writer.R index acf5588e9ee..d58efbb5a11 100644 --- a/r/R/record-batch-writer.R +++ b/r/R/record-batch-writer.R @@ -15,27 +15,42 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R - -#' @title class arrow::RecordBatchWriter -#' +#' @title RecordBatchWriter classes +#' @description `RecordBatchFileWriter` and `RecordBatchStreamWriter` are +#' interfaces for writing record batches to either the binary file or streaming +#' format. #' @usage NULL #' @format NULL #' @docType class +#' @section Usage: #' -#' @section Methods: +#' ``` +#' writer <- RecordBatchStreamWriter$create(sink, schema) #' -#' - `$write_batch(batch)`: Write record batch to stream -#' - `$write_table(table)`: write Table to stream -#' - `$close()`: close stream +#' writer$write_batch(batch) +#' writer$write_table(table) +#' writer$close() +#' ``` +#' @section Factory: #' -#' @section Derived classes: +#' The `RecordBatchFileWriter$create()` and `RecordBatchStreamWriter$create()` +#' factory methods instantiate the object and +#' take a single argument, named according to the class: +#' +#' - `sink` A character file name or an `OutputStream`. +#' - `schema` A [Schema] for the data to be written. +#' +#' @section Methods: #' -#' - [arrow::RecordBatchStreamWriter][RecordBatchStreamWriter] implements the streaming binary format -#' - [arrow::RecordBatchFileWriter][RecordBatchFileWriter] implements the binary file format +#' - `$write(x)`: Write a [RecordBatch], [Table], or `data.frame`, dispatching +#' to the methods below appropriately +#' - `$write_batch(batch)`: Write a `RecordBatch` to stream +#' - `$write_table(table)`: Write a `Table` to stream +#' - `$close()`: close stream #' #' @rdname RecordBatchWriter #' @name RecordBatchWriter +#' @include arrow-package.R RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = Object, public = list( write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), @@ -57,39 +72,11 @@ RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = Object, ) ) -#' @title class arrow::RecordBatchStreamWriter -#' -#' Writer for the Arrow streaming binary format -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' @section usage: -#' -#' ``` -#' writer <- RecordBatchStreamWriter$create(sink, schema) -#' -#' writer$write_batch(batch) -#' writer$write_table(table) -#' writer$close() -#' ``` -#' -#' @section Factory: -#' -#' The [RecordBatchStreamWriter()] function creates a record batch stream writer. -#' -#' @section Methods: -#' inherited from [arrow::RecordBatchWriter][RecordBatchWriter] -#' -#' - `$write_batch(batch)`: Write record batch to stream -#' - `$write_table(table)`: write Table to stream -#' - `$close()`: close stream -#' -#' @rdname RecordBatchStreamWriter -#' @name RecordBatchStreamWriter +#' @rdname RecordBatchWriter +#' @export RecordBatchStreamWriter <- R6Class("RecordBatchStreamWriter", inherit = RecordBatchWriter) - RecordBatchStreamWriter$create <- function(sink, schema) { if (is.character(sink)) { sink <- FileOutputStream$create(sink) @@ -100,39 +87,11 @@ RecordBatchStreamWriter$create <- function(sink, schema) { shared_ptr(RecordBatchStreamWriter, ipc___RecordBatchStreamWriter__Open(sink, schema)) } -#' @title class arrow::RecordBatchFileWriter -#' -#' Writer for the Arrow binary file format -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' @section usage: -#' -#' ``` -#' writer <- RecordBatchFileWriter$create(sink, schema) -#' -#' writer$write_batch(batch) -#' writer$write_table(table) -#' writer$close() -#' ``` -#' -#' @section Factory: -#' -#' The [RecordBatchFileWriter()] function creates a record batch stream writer. -#' -#' @section Methods: -#' inherited from [arrow::RecordBatchWriter][RecordBatchWriter] -#' -#' - `$write_batch(batch)`: Write record batch to stream -#' - `$write_table(table)`: write Table to stream -#' - `$close()`: close stream -#' -#' @rdname RecordBatchFileWriter -#' @name RecordBatchFileWriter +#' @rdname RecordBatchWriter +#' @export RecordBatchFileWriter <- R6Class("RecordBatchFileWriter", inherit = RecordBatchStreamWriter) - RecordBatchFileWriter$create <- function(sink, schema) { if (is.character(sink)) { sink <- FileOutputStream$create(sink) diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index b915c2f130e..bad95d2e90a 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -49,6 +49,7 @@ reference: - read_json_arrow - read_feather - read_parquet + - read_table - write_arrow - write_feather - write_parquet @@ -66,6 +67,8 @@ reference: - FeatherTableReader - FeatherTableWriter - JsonTableReader + - RecordBatchReader + - RecordBatchWriter - title: Arrow data containers contents: - buffer @@ -78,7 +81,6 @@ reference: - Table - read_message - read_record_batch - - read_table - title: Arrow data types and schema contents: - Schema @@ -93,29 +95,13 @@ reference: - cast_options - title: Input/Output contents: - - Buffer - - RecordBatchReader - - MemoryPool - - BufferOutputStream - - BufferReader - - FileOutputStream - - FixedSizeBufferWriter - InputStream - - MemoryMappedFile - - MockOutputStream + - mmap_open + - mmap_create - OutputStream - - RandomAccessFile - - Readable - - ReadableFile - Message - MessageReader - - RecordBatchFileReader - - RecordBatchFileWriter - - RecordBatchStreamReader - - RecordBatchStreamWriter - - RecordBatchWriter - compression - compression_codec + - MemoryPool - default_memory_pool - - mmap_create - - mmap_open diff --git a/r/man/BufferOutputStream.Rd b/r/man/BufferOutputStream.Rd deleted file mode 100644 index b9d5fed1279..00000000000 --- a/r/man/BufferOutputStream.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{BufferOutputStream} -\alias{BufferOutputStream} -\title{class arrow::io::BufferOutputStream} -\format{An object of class \code{R6ClassGenerator} of length 25.} -\description{ -class arrow::io::BufferOutputStream -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/BufferReader.Rd b/r/man/BufferReader.Rd deleted file mode 100644 index 5c1ed335d35..00000000000 --- a/r/man/BufferReader.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{BufferReader} -\alias{BufferReader} -\title{class arrow::io::BufferReader} -\description{ -class arrow::io::BufferReader -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/FileOutputStream.Rd b/r/man/FileOutputStream.Rd deleted file mode 100644 index bd37eb7d6dc..00000000000 --- a/r/man/FileOutputStream.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{FileOutputStream} -\alias{FileOutputStream} -\title{class arrow::io::FileOutputStream} -\description{ -class arrow::io::FileOutputStream -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/FixedSizeBufferWriter.Rd b/r/man/FixedSizeBufferWriter.Rd deleted file mode 100644 index 89f61e1397d..00000000000 --- a/r/man/FixedSizeBufferWriter.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{FixedSizeBufferWriter} -\alias{FixedSizeBufferWriter} -\title{class arrow::io::FixedSizeBufferWriter} -\description{ -class arrow::io::FixedSizeBufferWriter -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/InputStream.Rd b/r/man/InputStream.Rd index d1fb12c6929..57b49c99dd8 100644 --- a/r/man/InputStream.Rd +++ b/r/man/InputStream.Rd @@ -3,14 +3,44 @@ \docType{class} \name{InputStream} \alias{InputStream} -\title{class arrow::io::InputStream} +\alias{RandomAccessFile} +\alias{MemoryMappedFile} +\alias{ReadableFile} +\alias{BufferReader} +\title{InputStream classes} \description{ -class arrow::io::InputStream +\code{RandomAccessFile} inherits from \code{InputStream} and is a base +class for: \code{ReadableFile} for reading from a file; \code{MemoryMappedFile} for +the same but with memory mapping; and \code{BufferReader} for reading from a +buffer. Use these with the various table readers. +} +\section{Factory}{ + + +The \code{$create()} factory methods instantiate the \code{InputStream} object and +take the following arguments, depending on the subclass: +\itemize{ +\item \code{path} For \code{ReadableFile}, a character file name +\item \code{x} For \code{BufferReader}, a \link{Buffer} or an object that can be +made into a buffer via \code{buffer()}. } -\section{Methods}{ +To instantiate a \code{MemoryMappedFile}, call \code{\link[=mmap_open]{mmap_open()}}. +} + +\section{Methods}{ -TODO +\itemize{ +\item \code{$GetSize()}: +\item \code{$supports_zero_copy()}: Logical +\item \code{$seek(position)}: go to that position in the stream +\item \code{$tell()}: return the position in the stream +\item \code{$close()}: close the stream +\item \code{$Read(nbytes)}: read data from the stream, either a specified \code{nbytes} or +all, if \code{nbytes} is not provided +\item \code{$ReadAt(position, nbytes)}: similar to \code{$seek(position)$Read(nbytes)} +\item \code{$Resize(size)}: for a \code{MemoryMappedFile} that is writeable +} } \keyword{datasets} diff --git a/r/man/MemoryMappedFile.Rd b/r/man/MemoryMappedFile.Rd deleted file mode 100644 index 02f7120a614..00000000000 --- a/r/man/MemoryMappedFile.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{MemoryMappedFile} -\alias{MemoryMappedFile} -\title{class arrow::io::MemoryMappedFile} -\description{ -class arrow::io::MemoryMappedFile -} -\section{Methods}{ - - -TODO -} - -\seealso{ -\code{\link[=mmap_open]{mmap_open()}}, \code{\link[=mmap_create]{mmap_create()}} -} -\keyword{datasets} diff --git a/r/man/MockOutputStream.Rd b/r/man/MockOutputStream.Rd deleted file mode 100644 index 35392155399..00000000000 --- a/r/man/MockOutputStream.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{MockOutputStream} -\alias{MockOutputStream} -\title{class arrow::io::MockOutputStream} -\description{ -class arrow::io::MockOutputStream -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/OutputStream.Rd b/r/man/OutputStream.Rd index 87a79ac578e..95661d172d3 100644 --- a/r/man/OutputStream.Rd +++ b/r/man/OutputStream.Rd @@ -3,15 +3,44 @@ \docType{class} \name{OutputStream} \alias{OutputStream} -\title{OutputStream} +\alias{FileOutputStream} +\alias{MockOutputStream} +\alias{BufferOutputStream} +\alias{FixedSizeBufferWriter} +\title{OutputStream classes} \description{ -OutputStream +\code{FileOutputStream} is for writing to a file; +\code{BufferOutputStream} and \code{FixedSizeBufferWriter} write to buffers; +\code{MockOutputStream} just reports back how many bytes it received, for testing +purposes. You can create one and pass it to any of the table writers, for +example. } +\section{Factory}{ + + +The \code{$create()} factory methods instantiate the \code{OutputStream} object and +take the following arguments, depending on the subclass: +\itemize{ +\item \code{path} For \code{FileOutputStream}, a character file name +\item \code{initial_capacity} For \code{BufferOutputStream}, the size in bytes of the +buffer. +\item \code{x} For \code{FixedSizeBufferWriter}, a \link{Buffer} or an object that can be +made into a buffer via \code{buffer()}. +} + +\code{MockOutputStream$create()} does not take any arguments. +} + \section{Methods}{ \itemize{ -\item Buffer \code{Read}(\code{int} nbytes): Read \code{nbytes} bytes -\item \code{void} \code{close}(): close the stream +\item \code{$tell()}: return the position in the stream +\item \code{$close()}: close the stream +\item \code{$write(x)}: send \code{x} to the stream +\item \code{$capacity()}: for \code{BufferOutputStream} +\item \code{$getvalue()}: for \code{BufferOutputStream} +\item \code{$GetExtentBytesWritten()}: for \code{MockOutputStream}, report how many bytes +were sent. } } diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd index a9b6115c604..1ebc20cddc1 100644 --- a/r/man/ParquetFileReader.Rd +++ b/r/man/ParquetFileReader.Rd @@ -36,7 +36,7 @@ with columns filtered by a character vector of column names or a f <- system.file("v0.7.1.parquet", package="arrow") pq <- ParquetFileReader$create(f) pq$GetSchema() -tab <- pq$ReadTable() +tab <- pq$ReadTable(starts_with("c")) tab$schema } } diff --git a/r/man/ParquetReaderProperties.Rd b/r/man/ParquetReaderProperties.Rd index c183b2a2609..90de601a6db 100644 --- a/r/man/ParquetReaderProperties.Rd +++ b/r/man/ParquetReaderProperties.Rd @@ -20,8 +20,11 @@ and takes the following arguments: \section{Methods}{ - -TODO +\itemize{ +\item \code{$read_dictionary(column_index)} +\item \code{$set_read_dictionary(column_index, read_dict)} +\item \code{$use_threads(use_threads)} +} } \keyword{datasets} diff --git a/r/man/RandomAccessFile.Rd b/r/man/RandomAccessFile.Rd deleted file mode 100644 index ac53ac5b98c..00000000000 --- a/r/man/RandomAccessFile.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{RandomAccessFile} -\alias{RandomAccessFile} -\title{class arrow::io::RandomAccessFile} -\description{ -class arrow::io::RandomAccessFile -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/Readable.Rd b/r/man/Readable.Rd deleted file mode 100644 index 5f46c7ec479..00000000000 --- a/r/man/Readable.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{Readable} -\alias{Readable} -\title{class arrow::io::Readable} -\description{ -class arrow::io::Readable -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/ReadableFile.Rd b/r/man/ReadableFile.Rd deleted file mode 100644 index 8cd3960b918..00000000000 --- a/r/man/ReadableFile.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{ReadableFile} -\alias{ReadableFile} -\title{class arrow::io::ReadableFile} -\description{ -class arrow::io::ReadableFile -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/RecordBatchFileReader.Rd b/r/man/RecordBatchFileReader.Rd deleted file mode 100644 index c4814ad7656..00000000000 --- a/r/man/RecordBatchFileReader.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/record-batch-reader.R -\docType{class} -\name{RecordBatchFileReader} -\alias{RecordBatchFileReader} -\title{class arrow::RecordBatchFileReader} -\description{ -class arrow::RecordBatchFileReader -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd deleted file mode 100644 index bbb103dddf0..00000000000 --- a/r/man/RecordBatchFileWriter.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/record-batch-writer.R -\docType{class} -\name{RecordBatchFileWriter} -\alias{RecordBatchFileWriter} -\title{class arrow::RecordBatchFileWriter - -Writer for the Arrow binary file format} -\description{ -class arrow::RecordBatchFileWriter - -Writer for the Arrow binary file format -} -\section{usage}{ -\preformatted{writer <- RecordBatchFileWriter$create(sink, schema) - -writer$write_batch(batch) -writer$write_table(table) -writer$close() -} -} - -\section{Factory}{ - - -The \code{\link[=RecordBatchFileWriter]{RecordBatchFileWriter()}} function creates a record batch stream writer. -} - -\section{Methods}{ - -inherited from \link[=RecordBatchWriter]{arrow::RecordBatchWriter} -\itemize{ -\item \code{$write_batch(batch)}: Write record batch to stream -\item \code{$write_table(table)}: write Table to stream -\item \code{$close()}: close stream -} -} - -\keyword{datasets} diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd index 06e0b9686f3..5ed6ba4b4b9 100644 --- a/r/man/RecordBatchReader.Rd +++ b/r/man/RecordBatchReader.Rd @@ -3,14 +3,37 @@ \docType{class} \name{RecordBatchReader} \alias{RecordBatchReader} -\title{class arrow::RecordBatchReader} +\alias{RecordBatchStreamReader} +\alias{RecordBatchFileReader} +\title{RecordBatchReader classes} \description{ -class arrow::RecordBatchReader +\code{RecordBatchFileReader} and \code{RecordBatchStreamReader} are +interfaces for generating record batches from different input sources. } -\section{Methods}{ +\section{Factory}{ + +The \code{RecordBatchFileReader$create()} and \code{RecordBatchStreamReader$create()} +factory methods instantiate the object and +take a single argument, named according to the class: +\itemize{ +\item \code{file} A character file name, raw vector, or Arrow file connection object +(e.g. \code{RandomAccessFile}). +\item \code{stream} A raw vector, \link{Buffer}, or \code{InputStream}. +} +} -TODO +\section{Methods}{ + +\itemize{ +\item \code{$read_next_batch()}: Returns a \code{RecordBatch} +\item \code{$schema()}: Returns a \link{Schema} +\item \code{$batches()}: Returns a list of \code{RecordBatch}es +\item \code{$get_batch(i)}: For \code{RecordBatchFileReader}, return a particular batch +by an integer index. +\item \code{$num_record_batches()}: For \code{RecordBatchFileReader}, see how many batches +are in the file. +} } \keyword{datasets} diff --git a/r/man/RecordBatchStreamReader.Rd b/r/man/RecordBatchStreamReader.Rd deleted file mode 100644 index 825f29a72f9..00000000000 --- a/r/man/RecordBatchStreamReader.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/record-batch-reader.R -\docType{class} -\name{RecordBatchStreamReader} -\alias{RecordBatchStreamReader} -\title{class arrow::RecordBatchStreamReader} -\description{ -class arrow::RecordBatchStreamReader -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd deleted file mode 100644 index 2f9d627fa17..00000000000 --- a/r/man/RecordBatchStreamWriter.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/record-batch-writer.R -\docType{class} -\name{RecordBatchStreamWriter} -\alias{RecordBatchStreamWriter} -\title{class arrow::RecordBatchStreamWriter - -Writer for the Arrow streaming binary format} -\description{ -class arrow::RecordBatchStreamWriter - -Writer for the Arrow streaming binary format -} -\section{usage}{ -\preformatted{writer <- RecordBatchStreamWriter$create(sink, schema) - -writer$write_batch(batch) -writer$write_table(table) -writer$close() -} -} - -\section{Factory}{ - - -The \code{\link[=RecordBatchStreamWriter]{RecordBatchStreamWriter()}} function creates a record batch stream writer. -} - -\section{Methods}{ - -inherited from \link[=RecordBatchWriter]{arrow::RecordBatchWriter} -\itemize{ -\item \code{$write_batch(batch)}: Write record batch to stream -\item \code{$write_table(table)}: write Table to stream -\item \code{$close()}: close stream -} -} - -\keyword{datasets} diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd index db8d8e7980f..46e0b87ba45 100644 --- a/r/man/RecordBatchWriter.Rd +++ b/r/man/RecordBatchWriter.Rd @@ -3,24 +3,43 @@ \docType{class} \name{RecordBatchWriter} \alias{RecordBatchWriter} -\title{class arrow::RecordBatchWriter} +\alias{RecordBatchStreamWriter} +\alias{RecordBatchFileWriter} +\title{RecordBatchWriter classes} \description{ -class arrow::RecordBatchWriter +\code{RecordBatchFileWriter} and \code{RecordBatchStreamWriter} are +interfaces for writing record batches to either the binary file or streaming +format. } -\section{Methods}{ +\section{Usage}{ +\preformatted{writer <- RecordBatchStreamWriter$create(sink, schema) + +writer$write_batch(batch) +writer$write_table(table) +writer$close() +} +} + +\section{Factory}{ + +The \code{RecordBatchFileWriter$create()} and \code{RecordBatchStreamWriter$create()} +factory methods instantiate the object and +take a single argument, named according to the class: \itemize{ -\item \code{$write_batch(batch)}: Write record batch to stream -\item \code{$write_table(table)}: write Table to stream -\item \code{$close()}: close stream +\item \code{sink} A character file name or an \code{OutputStream}. +\item \code{schema} A \link{Schema} for the data to be written. } } -\section{Derived classes}{ +\section{Methods}{ \itemize{ -\item \link[=RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} implements the streaming binary format -\item \link[=RecordBatchFileWriter]{arrow::RecordBatchFileWriter} implements the binary file format +\item \code{$write(x)}: Write a \link{RecordBatch}, \link{Table}, or \code{data.frame}, dispatching +to the methods below appropriately +\item \code{$write_batch(batch)}: Write a \code{RecordBatch} to stream +\item \code{$write_table(table)}: Write a \code{Table} to stream +\item \code{$close()}: close stream } } diff --git a/r/man/compression.Rd b/r/man/compression.Rd index 34cc90df34a..e9d0ca5d493 100644 --- a/r/man/compression.Rd +++ b/r/man/compression.Rd @@ -17,7 +17,7 @@ input or output stream. The \code{CompressedInputStream$create()} and \code{CompressedOutputStream$create()} factory methods instantiate the object and take the following arguments: \itemize{ -\item \code{stream} An \code{InputStream} or \code{OutputStream}, respectively +\item \code{stream} An \link{InputStream} or \link{OutputStream}, respectively \item \code{codec} A \code{Codec} } } diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index d24901a292a..b0fb7bc6761 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -17,10 +17,8 @@ This function enables you to write Parquet files from R. } \examples{ \donttest{ -try({ - tf <- tempfile(fileext = ".parquet") - on.exit(unlink(tf)) - write_parquet(tibble::tibble(x = 1:5), tf) -}) +tf <- tempfile(fileext = ".parquet") +on.exit(unlink(tf)) +write_parquet(tibble::tibble(x = 1:5), tf) } } diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index d8baa5c5bee..948d5df70e3 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -73,7 +73,9 @@ test_that("can read remaining bytes of a RandomAccessFile", { all_bytes <- write_arrow(tab, tf) file <- ReadableFile$create(tf) + expect_equal(file$tell(), 0) x <- file$Read(20)$data() + expect_equal(file$tell(), 20) y <- file$Read()$data() file <- ReadableFile$create(tf) diff --git a/r/tests/testthat/test-recordbatchreader.R b/r/tests/testthat/test-recordbatchreader.R index 67aca97602e..b557f0669ac 100644 --- a/r/tests/testthat/test-recordbatchreader.R +++ b/r/tests/testthat/test-recordbatchreader.R @@ -24,9 +24,11 @@ test_that("RecordBatchStreamReader / Writer", { ) sink <- BufferOutputStream$create() + expect_equal(sink$tell(), 0) writer <- RecordBatchStreamWriter$create(sink, batch$schema) expect_is(writer, "RecordBatchStreamWriter") writer$write_batch(batch) + expect_true(sink$tell() > 0) writer$close() buf <- sink$getvalue() @@ -60,9 +62,9 @@ test_that("RecordBatchFileReader / Writer", { reader <- RecordBatchFileReader$create(buf) expect_is(reader, "RecordBatchFileReader") - batch1 <- reader$get_batch(0L) + batch1 <- reader$get_batch(0) expect_is(batch1, "RecordBatch") expect_equal(batch, batch1) - expect_equal(reader$num_record_batches, 1L) + expect_equal(reader$num_record_batches, 1) }) From 8683f100f2392cb9e25de25734084413318ca8fe Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 16:10:00 -0700 Subject: [PATCH 27/37] Add content to vignette from blog post --- r/vignettes/arrow.Rmd | 67 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd index 9ce29c59b69..e7ab08b03b2 100644 --- a/r/vignettes/arrow.Rmd +++ b/r/vignettes/arrow.Rmd @@ -10,7 +10,72 @@ vignette: > The Apache Arrow C++ library provides rich, powerful features for working with columnar data. The `arrow` R package provides both a low-level interface to the C++ library and some higher-level, R-flavored tools for working with it. This vignette provides an overview of how the pieces fit together, and it describes the conventions that the classes and methods follow in R. -# Class structure +# Reading and writing files + +The `arrow` package provides some simple functions for using the Arrow C++ library to read and write files. These functions are designed to drop into your normal R workflow without requiring any knowledge of the Arrow C++ library and use naming conventions and arguments that follow popular R packages, particularly `readr`. The readers return `data.frame`s (or if you use the `tibble` package, they will act like `tbl_df`s), and the writers take `data.frame`s. + +Importantly, `arrow` provides basic read and write support for the [Apache +Parquet](https://parquet.apache.org/) columnar data file format, without having to set up a database. + +```r +library(arrow) +df <- read_parquet("path/to/file.parquet") +``` + +This function, along with the other readers in the package, takes an optional +`col_select` argument, inspired by the +[`vroom`](https://vroom.r-lib.org/reference/vroom.html) package. This argument +lets you use the ["tidyselect" helper functions](https://tidyselect.r-lib.org/reference/select_helpers.html), as you can do in `dplyr::select()`, to specify that you only want to keep certain columns. You may also provide a character vector of column names to keep, as in the "select" argument to `data.table::fread()`. By narrowing your selection at read time, you can load a `data.frame` with less memory overhead. + +For example, suppose you had written the `iris` dataset to Parquet. You could +read a `data.frame` with only the columns `c("Sepal.Length", "Sepal.Width")` by +doing + +```r +df <- read_parquet("iris.parquet", col_select = starts_with("Sepal")) +``` + +Just as you can read, you can write Parquet files: + +```r +write_parquet(df, "path/to/different_file.parquet") +``` + +The `arrow` package also includes a faster and more robust implementation of the +Feather file format, providing `read_feather()` and +`write_feather()`. [Feather](https://github.com/wesm/feather) was one of the +initial applications of Apache Arrow for Python and R, providing an efficient, +common file format language-agnostic data frame storage, along with +implementations in R and Python. + +As Arrow progressed, development of Feather moved to the +[`apache/arrow`](https://github.com/apache/arrow) project, and for the last two +years, the Python implementation of Feather has just been a wrapper around +`pyarrow`. This meant that as Arrow progressed and bugs were fixed, the Python +version of Feather got the improvements but sadly R did not. + +In the `arrow` package, the R implementation of Feather depends +on the same underlying C++ library as the Python version does. This should +result in more reliable and consistent behavior across the two languages, as +well as [improved performance](https://wesmckinney.com/blog/feather-arrow-future/). + +In addition to these readers and writers, the `arrow` package has wrappers for +other readers in the C++ library; see `?read_csv_arrow` and +`?read_json_arrow`. These readers are being developed to optimize for the +memory layout of the Arrow columnar format and are not intended as a direct +replacement for existing R CSV readers (`base::read.csv`, `readr::read_csv`, +`data.table::fread`) that return an R `data.frame`. + +# Access to Arrow messages, buffers, and streams + +The `arrow` package also provides many lower-level bindings to the C++ library, which enable you +to access and manipulate Arrow objects. You can use these to build connectors +to other applications and services that use Arrow. One example is Spark: the +[`sparklyr`](https://spark.rstudio.com/) package has support for using Arrow to +move data to and from Spark, yielding [significant performance +gains](http://arrow.apache.org/blog/2019/01/25/r-spark-improvements/). + +# Class structure and package conventions C++ is an object-oriented language, so the core logic of the Arrow library is encapsulated in classes and methods. In the R package, these classes are implemented as `R6` reference classes, most of which are exported from the namespace. From 0150d9923a4236f74545760a4d8f6c1e7271dd74 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 16:19:36 -0700 Subject: [PATCH 28/37] Rename Field.R to field.R --- r/R/{Field.R => field.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r/R/{Field.R => field.R} (100%) diff --git a/r/R/Field.R b/r/R/field.R similarity index 100% rename from r/R/Field.R rename to r/R/field.R From 924edd1c43f292d354d302fe2b679652a1c131ca Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 16:20:06 -0700 Subject: [PATCH 29/37] Rename List.R to list.R --- r/R/{List.R => list.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r/R/{List.R => list.R} (100%) diff --git a/r/R/List.R b/r/R/list.R similarity index 100% rename from r/R/List.R rename to r/R/list.R From 358290bc6053e9863df1996bce424c6fb4516a43 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 16:20:21 -0700 Subject: [PATCH 30/37] Rename Schema.R to schema.R --- r/R/{Schema.R => schema.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r/R/{Schema.R => schema.R} (100%) diff --git a/r/R/Schema.R b/r/R/schema.R similarity index 100% rename from r/R/Schema.R rename to r/R/schema.R From 8bd52d722f9222a3e2070e844e718c76a9d8c99e Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 16:20:36 -0700 Subject: [PATCH 31/37] Rename Struct.R to struct.R --- r/R/{Struct.R => struct.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r/R/{Struct.R => struct.R} (100%) diff --git a/r/R/Struct.R b/r/R/struct.R similarity index 100% rename from r/R/Struct.R rename to r/R/struct.R From 35f00f52d196cd002cc244deac90a573e63888c8 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Sep 2019 16:20:52 -0700 Subject: [PATCH 32/37] Rename Table.R to table.R --- r/R/{Table.R => table.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r/R/{Table.R => table.R} (100%) diff --git a/r/R/Table.R b/r/R/table.R similarity index 100% rename from r/R/Table.R rename to r/R/table.R From adf1cf916b25c50563556d804e71f4c59592e93d Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Tue, 10 Sep 2019 11:14:29 -0700 Subject: [PATCH 33/37] File renaming (not case-sensitive) --- r/DESCRIPTION | 8 +- r/R/arrowExports.R | 24 ++-- r/R/{memory_pool.R => memory-pool.R} | 0 ...ead_record_batch.R => read-record-batch.R} | 0 r/R/{read_table.R => read-table.R} | 0 r/R/{write_arrow.R => write-arrow.R} | 0 r/man/MemoryPool.Rd | 2 +- r/man/default_memory_pool.Rd | 2 +- r/man/read_record_batch.Rd | 2 +- r/man/read_table.Rd | 2 +- r/man/write_arrow.Rd | 2 +- ...ray__to_vector.cpp => array_to_vector.cpp} | 0 r/src/arrowExports.cpp | 108 +++++++++--------- .../{test-arraydata.R => test-array-data.R} | 0 ...st-bufferreader.R => test-buffer-reader.R} | 0 ...st-chunkedarray.R => test-chunked-array.R} | 0 ...-messagereader.R => test-message-reader.R} | 0 ...ecord_batch.R => test-read-record-batch.R} | 0 ...tchreader.R => test-record-batch-reader.R} | 0 ...hreadpoolcapacity.R => test-thread-pool.R} | 0 20 files changed, 75 insertions(+), 75 deletions(-) rename r/R/{memory_pool.R => memory-pool.R} (100%) rename r/R/{read_record_batch.R => read-record-batch.R} (100%) rename r/R/{read_table.R => read-table.R} (100%) rename r/R/{write_arrow.R => write-arrow.R} (100%) rename r/src/{array__to_vector.cpp => array_to_vector.cpp} (100%) rename r/tests/testthat/{test-arraydata.R => test-array-data.R} (100%) rename r/tests/testthat/{test-bufferreader.R => test-buffer-reader.R} (100%) rename r/tests/testthat/{test-chunkedarray.R => test-chunked-array.R} (100%) rename r/tests/testthat/{test-messagereader.R => test-message-reader.R} (100%) rename r/tests/testthat/{test-read_record_batch.R => test-read-record-batch.R} (100%) rename r/tests/testthat/{test-recordbatchreader.R => test-record-batch-reader.R} (100%) rename r/tests/testthat/{test-cputhreadpoolcapacity.R => test-thread-pool.R} (100%) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index ef44196f0ae..a4b72d94de0 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -66,11 +66,11 @@ Collate: 'install-arrow.R' 'json.R' 'list.R' - 'memory_pool.R' + 'memory-pool.R' 'message.R' 'parquet.R' - 'read_record_batch.R' - 'read_table.R' + 'read-record-batch.R' + 'read-table.R' 'record-batch-reader.R' 'record-batch-writer.R' 'record-batch.R' @@ -79,4 +79,4 @@ Collate: 'schema.R' 'struct.R' 'table.R' - 'write_arrow.R' + 'write-arrow.R' diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 7af16a72539..d98a6b06c07 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -100,6 +100,18 @@ ListArray__raw_value_offsets <- function(array){ .Call(`_arrow_ListArray__raw_value_offsets` , array) } +Array__infer_type <- function(x){ + .Call(`_arrow_Array__infer_type` , x) +} + +Array__from_vector <- function(x, s_type){ + .Call(`_arrow_Array__from_vector` , x, s_type) +} + +ChunkedArray__from_list <- function(chunks, s_type){ + .Call(`_arrow_ChunkedArray__from_list` , chunks, s_type) +} + Array__as_vector <- function(array){ .Call(`_arrow_Array__as_vector` , array) } @@ -116,18 +128,6 @@ Table__to_dataframe <- function(table, use_threads){ .Call(`_arrow_Table__to_dataframe` , table, use_threads) } -Array__infer_type <- function(x){ - .Call(`_arrow_Array__infer_type` , x) -} - -Array__from_vector <- function(x, s_type){ - .Call(`_arrow_Array__from_vector` , x, s_type) -} - -ChunkedArray__from_list <- function(chunks, s_type){ - .Call(`_arrow_ChunkedArray__from_list` , chunks, s_type) -} - ArrayData__get_type <- function(x){ .Call(`_arrow_ArrayData__get_type` , x) } diff --git a/r/R/memory_pool.R b/r/R/memory-pool.R similarity index 100% rename from r/R/memory_pool.R rename to r/R/memory-pool.R diff --git a/r/R/read_record_batch.R b/r/R/read-record-batch.R similarity index 100% rename from r/R/read_record_batch.R rename to r/R/read-record-batch.R diff --git a/r/R/read_table.R b/r/R/read-table.R similarity index 100% rename from r/R/read_table.R rename to r/R/read-table.R diff --git a/r/R/write_arrow.R b/r/R/write-arrow.R similarity index 100% rename from r/R/write_arrow.R rename to r/R/write-arrow.R diff --git a/r/man/MemoryPool.Rd b/r/man/MemoryPool.Rd index c9d49657f9a..e69fc8b4db9 100644 --- a/r/man/MemoryPool.Rd +++ b/r/man/MemoryPool.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/memory_pool.R +% Please edit documentation in R/memory-pool.R \docType{class} \name{MemoryPool} \alias{MemoryPool} diff --git a/r/man/default_memory_pool.Rd b/r/man/default_memory_pool.Rd index e33c07bb50a..859b40631af 100644 --- a/r/man/default_memory_pool.Rd +++ b/r/man/default_memory_pool.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/memory_pool.R +% Please edit documentation in R/memory-pool.R \name{default_memory_pool} \alias{default_memory_pool} \title{default \link[=MemoryPool]{arrow::MemoryPool}} diff --git a/r/man/read_record_batch.Rd b/r/man/read_record_batch.Rd index ba91ce0d66f..f335bae2012 100644 --- a/r/man/read_record_batch.Rd +++ b/r/man/read_record_batch.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_record_batch.R +% Please edit documentation in R/read-record-batch.R \name{read_record_batch} \alias{read_record_batch} \title{read \link[=RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=schema]{arrow::Schema}} diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index 9f76fec5421..9475fbe4abb 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_table.R +% Please edit documentation in R/read-table.R \name{read_table} \alias{read_table} \alias{read_arrow} diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index a1091a34e61..1820e0e1536 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/write_arrow.R +% Please edit documentation in R/write-arrow.R \name{write_arrow} \alias{write_arrow} \title{Write Arrow formatted data} diff --git a/r/src/array__to_vector.cpp b/r/src/array_to_vector.cpp similarity index 100% rename from r/src/array__to_vector.cpp rename to r/src/array_to_vector.cpp diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 5af51c59677..0ebac0cc5a1 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -394,7 +394,54 @@ RcppExport SEXP _arrow_ListArray__raw_value_offsets(SEXP array_sexp){ } #endif -// array__to_vector.cpp +// array_from_vector.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Array__infer_type(SEXP x); +RcppExport SEXP _arrow_Array__infer_type(SEXP x_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter::type x(x_sexp); + return Rcpp::wrap(Array__infer_type(x)); +END_RCPP +} +#else +RcppExport SEXP _arrow_Array__infer_type(SEXP x_sexp){ + Rf_error("Cannot call Array__infer_type(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + +// array_from_vector.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Array__from_vector(SEXP x, SEXP s_type); +RcppExport SEXP _arrow_Array__from_vector(SEXP x_sexp, SEXP s_type_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter::type x(x_sexp); + Rcpp::traits::input_parameter::type s_type(s_type_sexp); + return Rcpp::wrap(Array__from_vector(x, s_type)); +END_RCPP +} +#else +RcppExport SEXP _arrow_Array__from_vector(SEXP x_sexp, SEXP s_type_sexp){ + Rf_error("Cannot call Array__from_vector(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + +// array_from_vector.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr ChunkedArray__from_list(Rcpp::List chunks, SEXP s_type); +RcppExport SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter::type chunks(chunks_sexp); + Rcpp::traits::input_parameter::type s_type(s_type_sexp); + return Rcpp::wrap(ChunkedArray__from_list(chunks, s_type)); +END_RCPP +} +#else +RcppExport SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ + Rf_error("Cannot call ChunkedArray__from_list(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + +// array_to_vector.cpp #if defined(ARROW_R_WITH_ARROW) SEXP Array__as_vector(const std::shared_ptr& array); RcppExport SEXP _arrow_Array__as_vector(SEXP array_sexp){ @@ -409,7 +456,7 @@ RcppExport SEXP _arrow_Array__as_vector(SEXP array_sexp){ } #endif -// array__to_vector.cpp +// array_to_vector.cpp #if defined(ARROW_R_WITH_ARROW) SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array); RcppExport SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp){ @@ -424,7 +471,7 @@ RcppExport SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp){ } #endif -// array__to_vector.cpp +// array_to_vector.cpp #if defined(ARROW_R_WITH_ARROW) Rcpp::List RecordBatch__to_dataframe(const std::shared_ptr& batch, bool use_threads); RcppExport SEXP _arrow_RecordBatch__to_dataframe(SEXP batch_sexp, SEXP use_threads_sexp){ @@ -440,7 +487,7 @@ RcppExport SEXP _arrow_RecordBatch__to_dataframe(SEXP batch_sexp, SEXP use_threa } #endif -// array__to_vector.cpp +// array_to_vector.cpp #if defined(ARROW_R_WITH_ARROW) Rcpp::List Table__to_dataframe(const std::shared_ptr& table, bool use_threads); RcppExport SEXP _arrow_Table__to_dataframe(SEXP table_sexp, SEXP use_threads_sexp){ @@ -456,53 +503,6 @@ RcppExport SEXP _arrow_Table__to_dataframe(SEXP table_sexp, SEXP use_threads_sex } #endif -// array_from_vector.cpp -#if defined(ARROW_R_WITH_ARROW) -std::shared_ptr Array__infer_type(SEXP x); -RcppExport SEXP _arrow_Array__infer_type(SEXP x_sexp){ -BEGIN_RCPP - Rcpp::traits::input_parameter::type x(x_sexp); - return Rcpp::wrap(Array__infer_type(x)); -END_RCPP -} -#else -RcppExport SEXP _arrow_Array__infer_type(SEXP x_sexp){ - Rf_error("Cannot call Array__infer_type(). Please use arrow::install_arrow() to install required runtime libraries. "); -} -#endif - -// array_from_vector.cpp -#if defined(ARROW_R_WITH_ARROW) -std::shared_ptr Array__from_vector(SEXP x, SEXP s_type); -RcppExport SEXP _arrow_Array__from_vector(SEXP x_sexp, SEXP s_type_sexp){ -BEGIN_RCPP - Rcpp::traits::input_parameter::type x(x_sexp); - Rcpp::traits::input_parameter::type s_type(s_type_sexp); - return Rcpp::wrap(Array__from_vector(x, s_type)); -END_RCPP -} -#else -RcppExport SEXP _arrow_Array__from_vector(SEXP x_sexp, SEXP s_type_sexp){ - Rf_error("Cannot call Array__from_vector(). Please use arrow::install_arrow() to install required runtime libraries. "); -} -#endif - -// array_from_vector.cpp -#if defined(ARROW_R_WITH_ARROW) -std::shared_ptr ChunkedArray__from_list(Rcpp::List chunks, SEXP s_type); -RcppExport SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ -BEGIN_RCPP - Rcpp::traits::input_parameter::type chunks(chunks_sexp); - Rcpp::traits::input_parameter::type s_type(s_type_sexp); - return Rcpp::wrap(ChunkedArray__from_list(chunks, s_type)); -END_RCPP -} -#else -RcppExport SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ - Rf_error("Cannot call ChunkedArray__from_list(). Please use arrow::install_arrow() to install required runtime libraries. "); -} -#endif - // arraydata.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr ArrayData__get_type(const std::shared_ptr& x); @@ -3725,13 +3725,13 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2}, { "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2}, { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, + { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, + { "_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 2}, + { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, { "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, { "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, { "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, - { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, - { "_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 2}, - { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, { "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, { "_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, { "_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, diff --git a/r/tests/testthat/test-arraydata.R b/r/tests/testthat/test-array-data.R similarity index 100% rename from r/tests/testthat/test-arraydata.R rename to r/tests/testthat/test-array-data.R diff --git a/r/tests/testthat/test-bufferreader.R b/r/tests/testthat/test-buffer-reader.R similarity index 100% rename from r/tests/testthat/test-bufferreader.R rename to r/tests/testthat/test-buffer-reader.R diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunked-array.R similarity index 100% rename from r/tests/testthat/test-chunkedarray.R rename to r/tests/testthat/test-chunked-array.R diff --git a/r/tests/testthat/test-messagereader.R b/r/tests/testthat/test-message-reader.R similarity index 100% rename from r/tests/testthat/test-messagereader.R rename to r/tests/testthat/test-message-reader.R diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read-record-batch.R similarity index 100% rename from r/tests/testthat/test-read_record_batch.R rename to r/tests/testthat/test-read-record-batch.R diff --git a/r/tests/testthat/test-recordbatchreader.R b/r/tests/testthat/test-record-batch-reader.R similarity index 100% rename from r/tests/testthat/test-recordbatchreader.R rename to r/tests/testthat/test-record-batch-reader.R diff --git a/r/tests/testthat/test-cputhreadpoolcapacity.R b/r/tests/testthat/test-thread-pool.R similarity index 100% rename from r/tests/testthat/test-cputhreadpoolcapacity.R rename to r/tests/testthat/test-thread-pool.R From caf3265d3133e436297653cdd97eb2202339ac94 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Tue, 10 Sep 2019 12:42:37 -0700 Subject: [PATCH 34/37] PR feedback from romain --- r/R/array-data.R | 2 +- r/R/array.R | 4 ++-- r/R/chunked-array.R | 4 ++-- r/R/dictionary.R | 4 ++-- r/R/field.R | 2 +- r/R/list.R | 2 +- r/R/type.R | 4 ++-- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/r/R/array-data.R b/r/R/array-data.R index f0797f50d0b..a141ba758c3 100644 --- a/r/R/array-data.R +++ b/r/R/array-data.R @@ -44,7 +44,7 @@ ArrayData <- R6Class("ArrayData", inherit = Object, active = list( - type = function() DataType$dispatch(ArrayData__get_type(self)), + type = function() DataType$create(ArrayData__get_type(self)), length = function() ArrayData__get_length(self), null_count = function() ArrayData__get_null_count(self), offset = function() ArrayData__get_offset(self), diff --git a/r/R/array.R b/r/R/array.R index f643a8ddbdf..9175c323488 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -100,7 +100,7 @@ Array <- R6Class("Array", active = list( null_count = function() Array__null_count(self), offset = function() Array__offset(self), - type = function() DataType$dispatch(Array__type(self)) + type = function() DataType$create(Array__type(self)) ) ) @@ -127,7 +127,7 @@ ListArray <- R6Class("ListArray", inherit = Array, raw_value_offsets = function() ListArray__raw_value_offsets(self) ), active = list( - value_type = function() DataType$dispatch(ListArray__value_type(self)) + value_type = function() DataType$create(ListArray__value_type(self)) ) ) diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index 4fa60805f98..16ec942b41f 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -51,8 +51,8 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object, active = list( null_count = function() ChunkedArray__null_count(self), num_chunks = function() ChunkedArray__num_chunks(self), - chunks = function() map(ChunkedArray__chunks(self), ~ Array$create(.x)), - type = function() DataType$dispatch(ChunkedArray__type(self)) + chunks = function() map(ChunkedArray__chunks(self), Array$create), + type = function() DataType$create(ChunkedArray__type(self)) ) ) diff --git a/r/R/dictionary.R b/r/R/dictionary.R index ce4a1b2f366..ab33c3e1982 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -33,8 +33,8 @@ DictionaryType <- R6Class("DictionaryType", inherit = FixedWidthType, active = list( - index_type = function() DataType$dispatch(DictionaryType__index_type(self)), - value_type = function() DataType$dispatch(DictionaryType__value_type(self)), + index_type = function() DataType$create(DictionaryType__index_type(self)), + value_type = function() DataType$create(DictionaryType__value_type(self)), name = function() DictionaryType__name(self), ordered = function() DictionaryType__ordered(self) ) diff --git a/r/R/field.R b/r/R/field.R index 4533b4d94c2..89e3b2322f2 100644 --- a/r/R/field.R +++ b/r/R/field.R @@ -47,7 +47,7 @@ Field <- R6Class("Field", inherit = Object, Field__nullable(self) }, type = function() { - DataType$dispatch(Field__type(self)) + DataType$create(Field__type(self)) } ) ) diff --git a/r/R/list.R b/r/R/list.R index 0d50ff2a19d..31c72bb3034 100644 --- a/r/R/list.R +++ b/r/R/list.R @@ -21,7 +21,7 @@ ListType <- R6Class("ListType", inherit = NestedType, active = list( value_field = function() shared_ptr(Field, ListType__value_field(self)), - value_type = function() DataType$dispatch(ListType__value_type(self)) + value_type = function() DataType$create(ListType__value_type(self)) ) ) diff --git a/r/R/type.R b/r/R/type.R index 987121fab81..1604e58eac0 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -90,7 +90,7 @@ DataType <- R6Class("DataType", ) ) -DataType$dispatch <- function(xp) shared_ptr(DataType, xp)$..dispatch() +DataType$create <- function(xp) shared_ptr(DataType, xp)$..dispatch() #' infer the arrow Array type from an R vector #' @@ -101,7 +101,7 @@ DataType$dispatch <- function(xp) shared_ptr(DataType, xp)$..dispatch() type <- function(x) UseMethod("type") #' @export -type.default <- function(x) DataType$dispatch(Array__infer_type(x)) +type.default <- function(x) DataType$create(Array__infer_type(x)) #' @export type.Array <- function(x) x$type From 01084ce7d0d7e6163c38968bb4951803e5ff0606 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Tue, 10 Sep 2019 12:52:53 -0700 Subject: [PATCH 35/37] Factor out assert_is() --- r/DESCRIPTION | 1 + r/R/array.R | 6 +++--- r/R/chunked-array.R | 4 ++-- r/R/compression.R | 8 +++---- r/R/feather.R | 6 +++--- r/R/io.R | 2 +- r/R/message.R | 2 +- r/R/parquet.R | 2 +- r/R/read-record-batch.R | 4 ++-- r/R/record-batch-reader.R | 2 +- r/R/record-batch-writer.R | 8 +++---- r/R/record-batch.R | 6 +++--- r/R/table.R | 4 ++-- r/R/type.R | 17 +-------------- r/R/util.R | 19 +++++++++++++++++ r/man/field.Rd | 39 ----------------------------------- r/man/schema.Rd | 38 ---------------------------------- r/tests/testthat/test-arrow.R | 17 +++++++++++++++ 18 files changed, 65 insertions(+), 120 deletions(-) create mode 100644 r/R/util.R delete mode 100644 r/man/field.Rd delete mode 100644 r/man/schema.Rd diff --git a/r/DESCRIPTION b/r/DESCRIPTION index a4b72d94de0..7b68377d41f 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -79,4 +79,5 @@ Collate: 'schema.R' 'struct.R' 'table.R' + 'util.R' 'write-arrow.R' diff --git a/r/R/array.R b/r/R/array.R index 9175c323488..9aa8be9652b 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -88,12 +88,12 @@ Array <- R6Class("Array", } }, RangeEquals = function(other, start_idx, end_idx, other_start_idx) { - assert_that(inherits(other, "Array")) + assert_is(other, "Array") Array__RangeEquals(self, other, start_idx, end_idx, other_start_idx) }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_type, "DataType")) - assert_that(inherits(options, "CastOptions")) + assert_is(target_type, "DataType") + assert_is(options, "CastOptions") Array$create(Array__cast(self, target_type, options)) } ), diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index 16ec942b41f..ef8009b67f8 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -43,8 +43,8 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object, } }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_type, "DataType")) - assert_that(inherits(options, "CastOptions")) + assert_is(target_type, "DataType") + assert_is(options, "CastOptions") shared_ptr(ChunkedArray, ChunkedArray__cast(self, target_type, options)) } ), diff --git a/r/R/compression.R b/r/R/compression.R index 11e56d53f05..a58defe640a 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -60,11 +60,11 @@ CompressedOutputStream$create <- function(stream, codec = compression_codec()){ if (.Platform$OS.type == "windows") { stop("'CompressedOutputStream' is unsupported in Windows.") } - assert_that(inherits(codec, "Codec")) + assert_is(codec, "Codec") if (is.character(stream)) { stream <- FileOutputStream$create(stream) } - assert_that(inherits(stream, "OutputStream")) + assert_is(stream, "OutputStream") shared_ptr(CompressedOutputStream, io___CompressedOutputStream__Make(codec, stream)) } @@ -75,10 +75,10 @@ CompressedOutputStream$create <- function(stream, codec = compression_codec()){ CompressedInputStream <- R6Class("CompressedInputStream", inherit = InputStream) CompressedInputStream$create <- function(stream, codec = compression_codec()){ # TODO (npr): why would CompressedInputStream work on Windows if CompressedOutputStream doesn't? (and is it still the case that it does not?) - assert_that(inherits(codec, "Codec")) + assert_is(codec, "Codec") if (is.character(stream)) { stream <- ReadableFile$create(stream) } - assert_that(inherits(stream, "InputStream")) + assert_is(stream, "InputStream") shared_ptr(CompressedInputStream, io___CompressedInputStream__Make(codec, stream)) } diff --git a/r/R/feather.R b/r/R/feather.R index d8b309da091..d835ae266ff 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -34,13 +34,13 @@ write_feather <- function(data, stream) { if (is.data.frame(data)) { data <- record_batch(data) } - assert_that(inherits(data, "RecordBatch")) + assert_is(data, "RecordBatch") if (is.character(stream)) { stream <- FileOutputStream$create(stream) on.exit(stream$close()) } - assert_that(inherits(stream, "OutputStream")) + assert_is(stream, "OutputStream") writer <- FeatherTableWriter$create(stream) ipc___TableWriter__RecordBatch__WriteFeather(writer, data) @@ -85,7 +85,7 @@ FeatherTableWriter <- R6Class("FeatherTableWriter", inherit = Object, ) FeatherTableWriter$create <- function(stream) { - assert_that(inherits(stream, "OutputStream")) + assert_is(stream, "OutputStream") unique_ptr(FeatherTableWriter, ipc___feather___TableWriter__Open(stream)) } diff --git a/r/R/io.R b/r/R/io.R index db0b4e35c5f..f5390e32b25 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -264,6 +264,6 @@ make_readable_file <- function(file, mmap = TRUE) { } else if (inherits(file, c("raw", "Buffer"))) { file <- BufferReader$create(file) } - assert_that(inherits(file, "InputStream")) + assert_is(file, "InputStream") file } diff --git a/r/R/message.R b/r/R/message.R index ff2e949a1a6..701d157fd43 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -32,7 +32,7 @@ Message <- R6Class("Message", inherit = Object, public = list( Equals = function(other){ - assert_that(inherits(other, "Message")) + assert_is(other, "Message") ipc___Message__Equals(self, other) }, body_length = function() ipc___Message__body_length(self), diff --git a/r/R/parquet.R b/r/R/parquet.R index 65128f7c872..3e85570ea64 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -107,7 +107,7 @@ ParquetFileReader$create <- function(file, mmap = TRUE, ...) { file <- make_readable_file(file, mmap) - assert_that(inherits(props, "ParquetReaderProperties")) + assert_is(props, "ParquetReaderProperties") unique_ptr(ParquetFileReader, parquet___arrow___FileReader__OpenFile(file, props)) } diff --git a/r/R/read-record-batch.R b/r/R/read-record-batch.R index 65754a0a063..112f6539b39 100644 --- a/r/R/read-record-batch.R +++ b/r/R/read-record-batch.R @@ -29,13 +29,13 @@ read_record_batch <- function(obj, schema){ #' @export read_record_batch.Message <- function(obj, schema) { - assert_that(inherits(schema, "Schema")) + assert_is(schema, "Schema") shared_ptr(RecordBatch, ipc___ReadRecordBatch__Message__Schema(obj, schema)) } #' @export read_record_batch.InputStream <- function(obj, schema) { - assert_that(inherits(schema, "Schema")) + assert_is(schema, "Schema") shared_ptr(RecordBatch, ipc___ReadRecordBatch__InputStream__Schema(obj, schema)) } diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R index 4122d3ddae1..dadf15e815c 100644 --- a/r/R/record-batch-reader.R +++ b/r/R/record-batch-reader.R @@ -69,7 +69,7 @@ RecordBatchStreamReader$create <- function(stream){ if (inherits(stream, c("raw", "Buffer"))) { stream <- BufferReader$create(stream) } - assert_that(inherits(stream, "InputStream")) + assert_is(stream, "InputStream") shared_ptr(RecordBatchStreamReader, ipc___RecordBatchStreamReader__Open(stream)) } diff --git a/r/R/record-batch-writer.R b/r/R/record-batch-writer.R index d58efbb5a11..208ceb1c3c5 100644 --- a/r/R/record-batch-writer.R +++ b/r/R/record-batch-writer.R @@ -81,8 +81,8 @@ RecordBatchStreamWriter$create <- function(sink, schema) { if (is.character(sink)) { sink <- FileOutputStream$create(sink) } - assert_that(inherits(sink, "OutputStream")) - assert_that(inherits(schema, "Schema")) + assert_is(sink, "OutputStream") + assert_is(schema, "Schema") shared_ptr(RecordBatchStreamWriter, ipc___RecordBatchStreamWriter__Open(sink, schema)) } @@ -96,8 +96,8 @@ RecordBatchFileWriter$create <- function(sink, schema) { if (is.character(sink)) { sink <- FileOutputStream$create(sink) } - assert_that(inherits(sink, "OutputStream")) - assert_that(inherits(schema, "Schema")) + assert_is(sink, "OutputStream") + assert_is(schema, "Schema") shared_ptr(RecordBatchFileWriter, ipc___RecordBatchFileWriter__Open(sink, schema)) } diff --git a/r/R/record-batch.R b/r/R/record-batch.R index b9f64dd834e..fecba3abb69 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -35,7 +35,7 @@ RecordBatch <- R6Class("RecordBatch", inherit = Object, column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), Equals = function(other) { - assert_that(inherits(other, "RecordBatch")) + assert_is(other, "RecordBatch") RecordBatch__Equals(self, other) }, @@ -54,8 +54,8 @@ RecordBatch <- R6Class("RecordBatch", inherit = Object, serialize = function() ipc___SerializeRecordBatch__Raw(self), cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_schema, "Schema")) - assert_that(inherits(options, "CastOptions")) + assert_is(target_schema, "Schema") + assert_is(options, "CastOptions") assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") shared_ptr(RecordBatch, RecordBatch__cast(self, target_schema, options)) } diff --git a/r/R/table.R b/r/R/table.R index 8e0e2320db7..6339ac6c8c2 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -45,8 +45,8 @@ Table <- R6Class("Table", inherit = Object, serialize = function(output_stream, ...) write_table(self, output_stream, ...), cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_schema, "Schema")) - assert_that(inherits(options, "CastOptions")) + assert_is(target_schema, "Schema") + assert_is(options, "CastOptions") assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") shared_ptr(Table, Table__cast(self, target_schema, options)) }, diff --git a/r/R/type.R b/r/R/type.R index 1604e58eac0..36d81e293c0 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -41,7 +41,7 @@ DataType <- R6Class("DataType", DataType__ToString(self) }, Equals = function(other) { - assert_that(inherits(other, "DataType")) + assert_is(other, "DataType") DataType__Equals(self, other) }, num_children = function() { @@ -338,21 +338,6 @@ make_valid_time_unit <- function(unit, valid_units) { unit } -oxford_paste <- function(x, conjunction = "and") { - if (is.character(x)) { - x <- paste0('"', x, '"') - } - if (length(x) < 2) { - return(x) - } - x[length(x)] <- paste(conjunction, x[length(x)]) - if (length(x) > 2) { - return(paste(x, collapse = ", ")) - } else { - return(paste(x, collapse = " ")) - } -} - #' @rdname data-type #' @export time64 <- function(unit = c("ns", "us")) { diff --git a/r/R/util.R b/r/R/util.R new file mode 100644 index 00000000000..19e77258e65 --- /dev/null +++ b/r/R/util.R @@ -0,0 +1,19 @@ +oxford_paste <- function(x, conjunction = "and") { + if (is.character(x)) { + x <- paste0('"', x, '"') + } + if (length(x) < 2) { + return(x) + } + x[length(x)] <- paste(conjunction, x[length(x)]) + if (length(x) > 2) { + return(paste(x, collapse = ", ")) + } else { + return(paste(x, collapse = " ")) + } +} + +assert_is <- function(object, class) { + msg <- paste(substitute(object), "must be a", oxford_paste(class, "or")) + assert_that(inherits(object, class), msg = msg) +} diff --git a/r/man/field.Rd b/r/man/field.Rd deleted file mode 100644 index c4325f4c821..00000000000 --- a/r/man/field.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/field.R -\docType{class} -\name{Field} -\alias{Field} -\alias{field} -\title{class arrow::Field} -\format{An object of class \code{R6ClassGenerator} of length 25.} -\usage{ -Field - -field(name, type, metadata) -} -\arguments{ -\item{name}{field name} - -\item{type}{logical type, instance of \link{DataType}} - -\item{metadata}{currently ignored} -} -\description{ -\code{field()} lets you create an \code{arrow::Field} that maps a -\link[=data-type]{DataType} to a column name. Fields are contained in -\link[=Schema]{Schemas}. -} -\section{Methods}{ - -\itemize{ -\item \code{f$ToString()}: convert to a string -\item \code{f$Equals(other)}: test for equality. More naturally called as \code{f == other} -} -} - -\examples{ -\donttest{ -field("x", int32()) -} -} -\keyword{datasets} diff --git a/r/man/schema.Rd b/r/man/schema.Rd deleted file mode 100644 index 2f960dbce6c..00000000000 --- a/r/man/schema.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/schema.R -\docType{class} -\name{Schema} -\alias{Schema} -\alias{schema} -\title{Schema class} -\usage{ -schema(...) -} -\arguments{ -\item{...}{named list of \link[=data-type]{data types}} -} -\description{ -Create a \code{Schema} when you -want to convert an R \code{data.frame} to Arrow but don't want to rely on the -default mapping of R types to Arrow types, such as when you want to choose a -specific numeric precision. -} -\section{Usage}{ -\preformatted{s <- schema(...) - -s$ToString() -s$num_fields() -s$field(i) -} -} - -\section{Methods}{ - -\itemize{ -\item \code{$ToString()}: convert to a string -\item \code{$num_fields()}: returns the number of fields -\item \code{$field(i)}: returns the field at index \code{i} (0-based) -} -} - -\keyword{datasets} diff --git a/r/tests/testthat/test-arrow.R b/r/tests/testthat/test-arrow.R index 0685bd06c70..7856d4fb5c6 100644 --- a/r/tests/testthat/test-arrow.R +++ b/r/tests/testthat/test-arrow.R @@ -22,3 +22,20 @@ if (identical(Sys.getenv("TEST_R_WITH_ARROW"), "TRUE")) { expect_true(arrow_available()) }) } + +r_only({ + test_that("assert_is", { + x <- 42 + expect_true(assert_is(x, "numeric")) + expect_true(assert_is(x, c("numeric", "character"))) + expect_error(assert_is(x, "factor"), 'x must be a "factor"') + expect_error( + assert_is(x, c("factor", "list")), + 'x must be a "factor" or "list"' + ) + expect_error( + assert_is(x, c("factor", "character", "list")), + 'x must be a "factor", "character", or "list"' + ) + }) +}) From 22c9d0420bf5c0c354f0742f7cbc9eecee6054a5 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Tue, 10 Sep 2019 13:17:22 -0700 Subject: [PATCH 36/37] More doc cleaning --- r/R/array.R | 28 ++++++++++----------------- r/R/buffer.R | 7 +++++-- r/R/chunked-array.R | 27 ++++++++++++++++++-------- r/R/schema.R | 4 ++-- r/man/ChunkedArray.Rd | 44 ++++++++++++++++++++++++++++++++++++++++++ r/man/Field.Rd | 39 +++++++++++++++++++++++++++++++++++++ r/man/Schema.Rd | 38 ++++++++++++++++++++++++++++++++++++ r/man/array.Rd | 33 ++++++++++++++----------------- r/man/buffer.Rd | 8 +++++++- r/man/chunked-array.Rd | 17 ---------------- r/man/chunked_array.Rd | 16 --------------- 11 files changed, 178 insertions(+), 83 deletions(-) create mode 100644 r/man/ChunkedArray.Rd create mode 100644 r/man/Field.Rd create mode 100644 r/man/Schema.Rd delete mode 100644 r/man/chunked-array.Rd delete mode 100644 r/man/chunked_array.Rd diff --git a/r/R/array.R b/r/R/array.R index 9aa8be9652b..ac5474cf0a6 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -17,32 +17,24 @@ #' @include arrow-package.R -#' @title class Array -#' -#' Array base type. Immutable data array with some logical type and some length. -#' +#' @title Array class +#' @description Array base type. Immutable data array with some logical type +#' and some length. #' @usage NULL #' @format NULL #' @docType class #' +#' @section Factory: +#' The `Array$create()` factory method instantiates an `Array` and +#' takes the following arguments: +#' * `x`: an R vector, list, or `data.frame` +#' * `type`: an optional [data type][data-type] for `x`. If omitted, the type +#' will be inferred from the data. #' @section Usage: #' #' ``` #' a <- Array$create(x) -#' -#' a$IsNull(i) -#' a$IsValid(i) -#' a$length() or length(a) -#' a$offset() -#' a$null_count() -#' a$type() -#' a$type_id() -#' a$Equals(b) -#' a$ApproxEquals(b) -#' a$as_vector() -#' a$ToString() -#' a$Slice(offset, length = NULL) -#' a$RangeEquals(other, start_idx, end_idx, other_start_idx) +#' length(a) #' #' print(a) #' a == a diff --git a/r/R/buffer.R b/r/R/buffer.R index 2edd3213437..d1f789175cc 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -15,11 +15,14 @@ # specific language governing permissions and limitations # under the License. -#' @title class Buffer +#' @title Buffer class #' @usage NULL #' @format NULL #' @docType class -#' @description `buffer()` lets you create an `arrow::Buffer` from an R object +#' @description A Buffer is an object containing a pointer to a piece of +#' contiguous memory with a particular size. +#' @section Factory: +#' `buffer()` lets you create an `arrow::Buffer` from an R object #' @section Methods: #' #' - `$is_mutable()` : diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index ef8009b67f8..5429efff4aa 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -17,18 +17,31 @@ #' @include arrow-package.R -#' @title class ChunkedArray -#' +#' @title ChunkedArray class #' @usage NULL #' @format NULL #' @docType class +#' @description A `ChunkedArray` is a data structure managing a list of +#' primitive Arrow [Arrays][Array] logically as one large array. +#' @section Factory: +#' The `ChunkedArray$create()` factory method instantiates the object from +#' various Arrays or R vectors. `chunked_array()` is an alias for it. #' #' @section Methods: #' -#' TODO +#' - `$length()` +#' - `$chunk(i)` +#' - `$as_vector()` +#' - `$Slice(offset, length = NULL)` +#' - `$cast(target_type, safe = TRUE, options = cast_options(safe))` +#' - `$null_count()` +#' - `$chunks()` +#' - `$num_chunks()` +#' - `$type()` #' -#' @rdname chunked-array -#' @name chunked-array +#' @rdname ChunkedArray +#' @name ChunkedArray +#' @seealso [Array] #' @export ChunkedArray <- R6Class("ChunkedArray", inherit = Object, public = list( @@ -60,10 +73,8 @@ ChunkedArray$create <- function(..., type = NULL) { shared_ptr(ChunkedArray, ChunkedArray__from_list(list2(...), type)) } -#' Create a [ChunkedArray][chunked-array] from various R vectors -#' #' @param \dots Vectors to coerce #' @param type currently ignored -#' +#' @rdname ChunkedArray #' @export chunked_array <- ChunkedArray$create diff --git a/r/R/schema.R b/r/R/schema.R index 1886277a52b..9f28fb53d17 100644 --- a/r/R/schema.R +++ b/r/R/schema.R @@ -87,14 +87,14 @@ read_schema.InputStream <- function(stream, ...) { } #' @export -`read_schema.Buffer` <- function(stream, ...) { +read_schema.Buffer <- function(stream, ...) { stream <- BufferReader$create(stream) on.exit(stream$close()) shared_ptr(Schema, ipc___ReadSchema_InputStream(stream)) } #' @export -`read_schema.raw` <- function(stream, ...) { +read_schema.raw <- function(stream, ...) { stream <- BufferReader$create(stream) on.exit(stream$close()) shared_ptr(Schema, ipc___ReadSchema_InputStream(stream)) diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd new file mode 100644 index 00000000000..7e617243594 --- /dev/null +++ b/r/man/ChunkedArray.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/chunked-array.R +\docType{class} +\name{ChunkedArray} +\alias{ChunkedArray} +\alias{chunked_array} +\title{ChunkedArray class} +\usage{ +chunked_array(..., type = NULL) +} +\arguments{ +\item{\dots}{Vectors to coerce} + +\item{type}{currently ignored} +} +\description{ +A \code{ChunkedArray} is a data structure managing a list of +primitive Arrow \link[=Array]{Arrays} logically as one large array. +} +\section{Factory}{ + +The \code{ChunkedArray$create()} factory method instantiates the object from +various Arrays or R vectors. \code{chunked_array()} is an alias for it. +} + +\section{Methods}{ + +\itemize{ +\item \code{$length()} +\item \code{$chunk(i)} +\item \code{$as_vector()} +\item \code{$Slice(offset, length = NULL)} +\item \code{$cast(target_type, safe = TRUE, options = cast_options(safe))} +\item \code{$null_count()} +\item \code{$chunks()} +\item \code{$num_chunks()} +\item \code{$type()} +} +} + +\seealso{ +\link{Array} +} +\keyword{datasets} diff --git a/r/man/Field.Rd b/r/man/Field.Rd new file mode 100644 index 00000000000..c4325f4c821 --- /dev/null +++ b/r/man/Field.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/field.R +\docType{class} +\name{Field} +\alias{Field} +\alias{field} +\title{class arrow::Field} +\format{An object of class \code{R6ClassGenerator} of length 25.} +\usage{ +Field + +field(name, type, metadata) +} +\arguments{ +\item{name}{field name} + +\item{type}{logical type, instance of \link{DataType}} + +\item{metadata}{currently ignored} +} +\description{ +\code{field()} lets you create an \code{arrow::Field} that maps a +\link[=data-type]{DataType} to a column name. Fields are contained in +\link[=Schema]{Schemas}. +} +\section{Methods}{ + +\itemize{ +\item \code{f$ToString()}: convert to a string +\item \code{f$Equals(other)}: test for equality. More naturally called as \code{f == other} +} +} + +\examples{ +\donttest{ +field("x", int32()) +} +} +\keyword{datasets} diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd new file mode 100644 index 00000000000..2f960dbce6c --- /dev/null +++ b/r/man/Schema.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/schema.R +\docType{class} +\name{Schema} +\alias{Schema} +\alias{schema} +\title{Schema class} +\usage{ +schema(...) +} +\arguments{ +\item{...}{named list of \link[=data-type]{data types}} +} +\description{ +Create a \code{Schema} when you +want to convert an R \code{data.frame} to Arrow but don't want to rely on the +default mapping of R types to Arrow types, such as when you want to choose a +specific numeric precision. +} +\section{Usage}{ +\preformatted{s <- schema(...) + +s$ToString() +s$num_fields() +s$field(i) +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$ToString()}: convert to a string +\item \code{$num_fields()}: returns the number of fields +\item \code{$field(i)}: returns the field at index \code{i} (0-based) +} +} + +\keyword{datasets} diff --git a/r/man/array.Rd b/r/man/array.Rd index 9179174f1c4..73234746f93 100644 --- a/r/man/array.Rd +++ b/r/man/array.Rd @@ -4,30 +4,25 @@ \name{array} \alias{array} \alias{Array} -\title{class Array - -Array base type. Immutable data array with some logical type and some length.} +\title{Array class} \description{ -class Array +Array base type. Immutable data array with some logical type +and some length. +} +\section{Factory}{ -Array base type. Immutable data array with some logical type and some length. +The \code{Array$create()} factory method instantiates an \code{Array} and +takes the following arguments: +\itemize{ +\item \code{x}: an R vector, list, or \code{data.frame} +\item \code{type}: an optional \link[=data-type]{data type} for \code{x}. If omitted, the type +will be inferred from the data. } +} + \section{Usage}{ \preformatted{a <- Array$create(x) - -a$IsNull(i) -a$IsValid(i) -a$length() or length(a) -a$offset() -a$null_count() -a$type() -a$type_id() -a$Equals(b) -a$ApproxEquals(b) -a$as_vector() -a$ToString() -a$Slice(offset, length = NULL) -a$RangeEquals(other, start_idx, end_idx, other_start_idx) +length(a) print(a) a == a diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd index 49712dc409e..5481ca55964 100644 --- a/r/man/buffer.Rd +++ b/r/man/buffer.Rd @@ -4,7 +4,7 @@ \name{buffer} \alias{buffer} \alias{Buffer} -\title{class Buffer} +\title{Buffer class} \usage{ buffer(x) } @@ -15,8 +15,14 @@ buffer(x) an instance of \code{Buffer} that borrows memory from \code{x} } \description{ +A Buffer is an object containing a pointer to a piece of +contiguous memory with a particular size. +} +\section{Factory}{ + \code{buffer()} lets you create an \code{arrow::Buffer} from an R object } + \section{Methods}{ \itemize{ diff --git a/r/man/chunked-array.Rd b/r/man/chunked-array.Rd deleted file mode 100644 index 24d962cc18e..00000000000 --- a/r/man/chunked-array.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/chunked-array.R -\docType{class} -\name{chunked-array} -\alias{chunked-array} -\alias{ChunkedArray} -\title{class ChunkedArray} -\description{ -class ChunkedArray -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/chunked_array.Rd b/r/man/chunked_array.Rd deleted file mode 100644 index 42193fb57be..00000000000 --- a/r/man/chunked_array.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/chunked-array.R -\name{chunked_array} -\alias{chunked_array} -\title{Create a \link[=chunked-array]{ChunkedArray} from various R vectors} -\usage{ -chunked_array(..., type = NULL) -} -\arguments{ -\item{\dots}{Vectors to coerce} - -\item{type}{currently ignored} -} -\description{ -Create a \link[=chunked-array]{ChunkedArray} from various R vectors -} From 3c6f85bfb6020f54da5aee90a70fd5917381a1f6 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Tue, 10 Sep 2019 14:05:27 -0700 Subject: [PATCH 37/37] :rat: --- r/R/util.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/r/R/util.R b/r/R/util.R index 19e77258e65..0b122f9d224 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + oxford_paste <- function(x, conjunction = "and") { if (is.character(x)) { x <- paste0('"', x, '"')