From 86afac8707dc13248663acf9f99872638edbdc6b Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 26 Aug 2019 09:48:19 -0700 Subject: [PATCH 1/6] mv R6.R type.R --- r/R/arrow-package.R | 20 ++++++++++++++++++++ r/R/{R6.R => type.R} | 20 -------------------- 2 files changed, 20 insertions(+), 20 deletions(-) rename r/R/{R6.R => type.R} (95%) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 86e909e7329..0d3a7aec57b 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -42,3 +42,23 @@ arrow_available <- function() { option_use_threads <- function() { !is_false(getOption("arrow.use_threads")) } + +#' @include enums.R +`arrow::Object` <- R6Class("arrow::Object", + public = list( + initialize = function(xp) self$set_pointer(xp), + + pointer = function() self$`.:xp:.`, + `.:xp:.` = NULL, + set_pointer = function(xp){ + self$`.:xp:.` <- xp + }, + print = function(...){ + cat(class(self)[[1]], "\n") + if (!is.null(self$ToString)){ + cat(self$ToString(), "\n") + } + invisible(self) + } + ) +) diff --git a/r/R/R6.R b/r/R/type.R similarity index 95% rename from r/R/R6.R rename to r/R/type.R index 06dd6f0472b..9eee6cd0639 100644 --- a/r/R/R6.R +++ b/r/R/type.R @@ -15,26 +15,6 @@ # specific language governing permissions and limitations # under the License. -#' @include enums.R -`arrow::Object` <- R6Class("arrow::Object", - public = list( - initialize = function(xp) self$set_pointer(xp), - - pointer = function() self$`.:xp:.`, - `.:xp:.` = NULL, - set_pointer = function(xp){ - self$`.:xp:.` <- xp - }, - print = function(...){ - cat(class(self)[[1]], "\n") - if (!is.null(self$ToString)){ - cat(self$ToString(), "\n") - } - invisible(self) - } - ) -) - shared_ptr <- function(class, xp) { if (!shared_ptr_is_null(xp)) class$new(xp) } From 4dcf3d645c3865c2afcc81d3d86cd398f500fcd7 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 26 Aug 2019 10:05:16 -0700 Subject: [PATCH 2/6] Finish the move --- r/DESCRIPTION | 4 ++-- r/R/ArrayData.R | 2 +- r/R/ChunkedArray.R | 2 +- r/R/Column.R | 2 +- r/R/Field.R | 2 +- r/R/List.R | 2 +- r/R/RecordBatch.R | 2 +- r/R/RecordBatchReader.R | 2 +- r/R/RecordBatchWriter.R | 2 +- r/R/Schema.R | 2 +- r/R/Struct.R | 2 +- r/R/Table.R | 2 +- r/R/array.R | 2 +- r/R/buffer.R | 2 +- r/R/compression.R | 2 +- r/R/csv.R | 2 +- r/R/dictionary.R | 2 +- r/R/feather.R | 2 +- r/R/io.R | 2 +- r/R/json.R | 4 ++-- r/R/memory_pool.R | 2 +- r/R/message.R | 2 +- r/R/parquet.R | 2 +- r/_pkgdown.yml | 12 +++++++----- r/man/DataType.Rd | 22 +++++++++++----------- r/man/arrow__DataType.Rd | 2 +- r/man/arrow__FixedWidthType.Rd | 2 +- r/man/type.Rd | 2 +- r/tests/testthat/test-schema.R | 7 +++++++ 29 files changed, 53 insertions(+), 44 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 2df07e68744..0f6813c5350 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -47,7 +47,7 @@ Suggests: vctrs Collate: 'enums.R' - 'R6.R' + 'arrow-package.R' 'ArrayData.R' 'ChunkedArray.R' 'Column.R' @@ -60,7 +60,6 @@ Collate: 'Struct.R' 'Table.R' 'array.R' - 'arrow-package.R' 'arrowExports.R' 'buffer.R' 'io.R' @@ -78,4 +77,5 @@ Collate: 'read_table.R' 'reexports-bit64.R' 'reexports-tidyselect.R' + 'type.R' 'write_arrow.R' diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R index f18317913ba..75d58a8516e 100644 --- a/r/R/ArrayData.R +++ b/r/R/ArrayData.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::ArrayData #' diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R index fa9aaee1ca3..e407a494065 100644 --- a/r/R/ChunkedArray.R +++ b/r/R/ChunkedArray.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::ChunkedArray #' diff --git a/r/R/Column.R b/r/R/Column.R index fb8af1ea315..8c30b05feef 100644 --- a/r/R/Column.R +++ b/r/R/Column.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::Column #' diff --git a/r/R/Field.R b/r/R/Field.R index d5bdf2250e1..644a701b666 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::Field #' diff --git a/r/R/List.R b/r/R/List.R index efd8839f39b..1376b07b75d 100644 --- a/r/R/List.R +++ b/r/R/List.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R `arrow::ListType` <- R6Class("arrow::ListType", inherit = `arrow::NestedType`, diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index 6446c95c931..de8b01ef180 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::RecordBatch #' diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 6dab2d1ff76..922424e4eb3 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::RecordBatchReader #' diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 59aa9847a1f..c654f178961 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::ipc::RecordBatchWriter #' diff --git a/r/R/Schema.R b/r/R/Schema.R index 0d471960256..c1bf51891df 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::Schema #' diff --git a/r/R/Struct.R b/r/R/Struct.R index 820e1a895ef..d4468db8741 100644 --- a/r/R/Struct.R +++ b/r/R/Struct.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R `arrow::StructType` <- R6Class("arrow::StructType", inherit = `arrow::NestedType`, diff --git a/r/R/Table.R b/r/R/Table.R index 15ea48fe7c1..1255c8a0e1b 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' #' @title class arrow::Table #' diff --git a/r/R/array.R b/r/R/array.R index deb3bc53893..fd7c6ef7c8d 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::Array #' diff --git a/r/R/buffer.R b/r/R/buffer.R index 327d8535a14..12d0699762d 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @include enums.R #' @title class arrow::Buffer diff --git a/r/R/compression.R b/r/R/compression.R index e10fef1bd2e..db6c98b9665 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -16,7 +16,7 @@ # under the License. #' @include enums.R -#' @include R6.R +#' @include arrow-package.R #' @include io.R `arrow::util::Codec` <- R6Class("arrow::util::Codec", inherit = `arrow::Object`) diff --git a/r/R/csv.R b/r/R/csv.R index 5b5d36cbe0b..d6c6db06bd4 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -189,7 +189,7 @@ read_tsv_arrow <- function(file, eval.parent(mc) } -#' @include R6.R +#' @include arrow-package.R `arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`, public = list( diff --git a/r/R/dictionary.R b/r/R/dictionary.R index bfe2373aefe..acddd5176fb 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::DictionaryType #' diff --git a/r/R/feather.R b/r/R/feather.R index 8bcbe2b80e0..db905489eb4 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R `arrow::ipc::feather::TableWriter` <- R6Class("arrow::ipc::feather::TableWriter", inherit = `arrow::Object`, public = list( diff --git a/r/R/io.R b/r/R/io.R index 5d7d99cb5e8..6bb1121236d 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @include enums.R #' @include buffer.R diff --git a/r/R/json.R b/r/R/json.R index dce130e61a1..35cdb1b3e78 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R -#' @include R6.R +#' @include arrow-package.R #' #' @title class arrow::json::TableReader #' diff --git a/r/R/memory_pool.R b/r/R/memory_pool.R index 88c2c7bc198..771e05bebf5 100644 --- a/r/R/memory_pool.R +++ b/r/R/memory_pool.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' #' @title class arrow::MemoryPool #' diff --git a/r/R/message.R b/r/R/message.R index 98d9248a747..e0add59ac53 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R #' @title class arrow::ipc::Message #' diff --git a/r/R/parquet.R b/r/R/parquet.R index 4fcff6b7b1b..d914a5df0a1 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include R6.R +#' @include arrow-package.R `parquet::arrow::FileReader` <- R6Class("parquet::arrow::FileReader", inherit = `arrow::Object`, diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index 552eff97252..5ede310ee2f 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -66,15 +66,18 @@ reference: - array - chunked_array - record_batch - - schema - table + - read_message + - read_record_batch + - read_table +- title: Arrow data types and schema + contents: + - schema - type - dictionary - field - - read_message - - read_record_batch - read_schema - - read_table + - TimeUnit - title: R6 classes contents: - arrow__Array @@ -116,7 +119,6 @@ reference: - BufferReader - CompressedInputStream - CompressedOutputStream - - TimeUnit - FeatherTableReader - FeatherTableWriter - FileOutputStream diff --git a/r/man/DataType.Rd b/r/man/DataType.Rd index bf5f1d40456..10d797f5672 100644 --- a/r/man/DataType.Rd +++ b/r/man/DataType.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/enums.R, R/R6.R, R/List.R, R/Struct.R +% Please edit documentation in R/enums.R, R/List.R, R/Struct.R, R/type.R \docType{data} \name{TimeUnit} \alias{TimeUnit} @@ -9,6 +9,8 @@ \alias{FileMode} \alias{MessageType} \alias{CompressionType} +\alias{list_of} +\alias{struct} \alias{int8} \alias{int16} \alias{int32} @@ -29,8 +31,6 @@ \alias{null} \alias{timestamp} \alias{decimal} -\alias{list_of} -\alias{struct} \title{Apache Arrow data types} \format{An object of class \code{arrow::TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.} \usage{ @@ -48,6 +48,10 @@ MessageType CompressionType +list_of(type) + +struct(...) + int8() int16() @@ -87,12 +91,12 @@ null() timestamp(unit, timezone) decimal(precision, scale) - -list_of(type) - -struct(...) } \arguments{ +\item{type}{type} + +\item{...}{...} + \item{unit}{time unit} \item{timezone}{time zone} @@ -100,10 +104,6 @@ struct(...) \item{precision}{precision} \item{scale}{scale} - -\item{type}{type} - -\item{...}{...} } \description{ Apache Arrow data types diff --git a/r/man/arrow__DataType.Rd b/r/man/arrow__DataType.Rd index 53bd6327d91..4eeb05110d2 100644 --- a/r/man/arrow__DataType.Rd +++ b/r/man/arrow__DataType.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/R6.R +% Please edit documentation in R/type.R \docType{class} \name{arrow__DataType} \alias{arrow__DataType} diff --git a/r/man/arrow__FixedWidthType.Rd b/r/man/arrow__FixedWidthType.Rd index 610a4003429..075c0eeeb14 100644 --- a/r/man/arrow__FixedWidthType.Rd +++ b/r/man/arrow__FixedWidthType.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/R6.R +% Please edit documentation in R/type.R \docType{class} \name{arrow__FixedWidthType} \alias{arrow__FixedWidthType} diff --git a/r/man/type.Rd b/r/man/type.Rd index 3e2b4f408a2..2f85e4a6ac6 100644 --- a/r/man/type.Rd +++ b/r/man/type.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/R6.R +% Please edit documentation in R/type.R \name{type} \alias{type} \title{infer the arrow Array type from an R vector} diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index ff40b816ea6..0608b178c74 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -17,6 +17,13 @@ context("arrow::Schema") +test_that("Alternate type names are supported", { + expect_equal( + schema(a = int32(), b = double(), c = bool(), d = string()), + schema(a = int32(), b = float64(), c = boolean(), d = utf8()) + ) +}) + test_that("reading schema from Buffer", { # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter # maybe there is an easier way to serialize a schema From d845cd3e06a72f617ded97385d54dfe722f24302 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 26 Aug 2019 10:31:48 -0700 Subject: [PATCH 3/6] Add aliases for creating types based on their printed names --- r/R/Field.R | 9 ++++++++- r/R/type.R | 16 ++++++++++++++++ r/tests/testthat/test-field.R | 4 ++++ r/tests/testthat/test-schema.R | 4 ++-- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/r/R/Field.R b/r/R/Field.R index 644a701b666..cc2f6cd185d 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -72,7 +72,14 @@ #' @export field <- function(name, type, metadata) { assert_that(inherits(name, "character"), length(name) == 1L) - assert_that(inherits(type, "arrow::DataType")) + if (!inherits(type, "arrow::DataType")) { + if (identical(type, double())) { + # Magic so that we don't have to mask this base function + type <- float64() + } else { + stop(name, " must be arrow::DataType, not ", class(type), call. = FALSE) + } + } assert_that(missing(metadata), msg = "metadata= is currently ignored") shared_ptr(`arrow::Field`, Field__initialize(name, type, TRUE)) } diff --git a/r/R/type.R b/r/R/type.R index 9eee6cd0639..89109b33109 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -305,10 +305,18 @@ uint64 <- function() shared_ptr(`arrow::UInt64`, UInt64__initialize()) #' @export float16 <- function() shared_ptr(`arrow::Float16`, Float16__initialize()) +#' @rdname DataType +#' @export +halffloat <- float16 + #' @rdname DataType #' @export float32 <- function() shared_ptr(`arrow::Float32`, Float32__initialize()) +#' @rdname DataType +#' @export +float <- float32 + #' @rdname DataType #' @export float64 <- function() shared_ptr(`arrow::Float64`, Float64__initialize()) @@ -317,10 +325,18 @@ float64 <- function() shared_ptr(`arrow::Float64`, Float64__initialize()) #' @export boolean <- function() shared_ptr(`arrow::Boolean`, Boolean__initialize()) +#' @rdname DataType +#' @export +bool <- boolean + #' @rdname DataType #' @export utf8 <- function() shared_ptr(`arrow::Utf8`, Utf8__initialize()) +#' @rdname DataType +#' @export +string <- utf8 + #' @rdname DataType #' @export date32 <- function() shared_ptr(`arrow::Date32`, Date32__initialize()) diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R index aaa2875510a..5d63a7f45fe 100644 --- a/r/tests/testthat/test-field.R +++ b/r/tests/testthat/test-field.R @@ -24,3 +24,7 @@ test_that("field() factory", { expect_true(x == x) expect_false(x == field("x", int64())) }) + +test_that("Field validation", { + expect_error(schema(b = 32), "b must be arrow::DataType, not numeric") +}) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 0608b178c74..387aee97eff 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -19,8 +19,8 @@ context("arrow::Schema") test_that("Alternate type names are supported", { expect_equal( - schema(a = int32(), b = double(), c = bool(), d = string()), - schema(a = int32(), b = float64(), c = boolean(), d = utf8()) + schema(b = double(), c = bool(), d = string(), e = float(), f = halffloat()), + schema(b = float64(), c = boolean(), d = utf8(), e = float32(), f = float16()) ) }) From 640e27d07276a102fe176698e4c0ccb0bf11cc8b Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 26 Aug 2019 10:43:54 -0700 Subject: [PATCH 4/6] Update namespace and collation --- r/DESCRIPTION | 2 +- r/NAMESPACE | 4 ++++ r/R/ArrayData.R | 2 +- r/R/Column.R | 2 +- r/R/List.R | 2 +- r/R/Struct.R | 2 +- r/R/arrow-package.R | 8 ++++++++ r/R/dictionary.R | 2 +- r/R/type.R | 8 +------- r/man/DataType.Rd | 34 +++++++++++++++++++++++----------- 10 files changed, 42 insertions(+), 24 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 0f6813c5350..2e1728ac353 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -48,6 +48,7 @@ Suggests: Collate: 'enums.R' 'arrow-package.R' + 'type.R' 'ArrayData.R' 'ChunkedArray.R' 'Column.R' @@ -77,5 +78,4 @@ Collate: 'read_table.R' 'reexports-bit64.R' 'reexports-tidyselect.R' - 'type.R' 'write_arrow.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index 3a413c0e802..17fe78ee773 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -122,6 +122,7 @@ export(TimeUnit) export(Type) export(array) export(arrow_available) +export(bool) export(boolean) export(buffer) export(cast_options) @@ -140,9 +141,11 @@ export(dictionary) export(ends_with) export(everything) export(field) +export(float) export(float16) export(float32) export(float64) +export(halffloat) export(install_arrow) export(int16) export(int32) @@ -175,6 +178,7 @@ export(read_tsv_arrow) export(record_batch) export(schema) export(starts_with) +export(string) export(struct) export(table) export(time32) diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R index 75d58a8516e..d9f307bf540 100644 --- a/r/R/ArrayData.R +++ b/r/R/ArrayData.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R +#' @include type.R #' @title class arrow::ArrayData #' diff --git a/r/R/Column.R b/r/R/Column.R index 8c30b05feef..0487425140d 100644 --- a/r/R/Column.R +++ b/r/R/Column.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R +#' @include type.R #' @title class arrow::Column #' diff --git a/r/R/List.R b/r/R/List.R index 1376b07b75d..b21c3921dcd 100644 --- a/r/R/List.R +++ b/r/R/List.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R +#' @include type.R `arrow::ListType` <- R6Class("arrow::ListType", inherit = `arrow::NestedType`, diff --git a/r/R/Struct.R b/r/R/Struct.R index d4468db8741..331c1334901 100644 --- a/r/R/Struct.R +++ b/r/R/Struct.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R +#' @include type.R `arrow::StructType` <- R6Class("arrow::StructType", inherit = `arrow::NestedType`, diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 0d3a7aec57b..0f0a26b0d81 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -62,3 +62,11 @@ option_use_threads <- function() { } ) ) + +shared_ptr <- function(class, xp) { + if (!shared_ptr_is_null(xp)) class$new(xp) +} + +unique_ptr <- function(class, xp) { + if (!unique_ptr_is_null(xp)) class$new(xp) +} diff --git a/r/R/dictionary.R b/r/R/dictionary.R index acddd5176fb..ff31c6a5326 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R +#' @include type.R #' @title class arrow::DictionaryType #' diff --git a/r/R/type.R b/r/R/type.R index 89109b33109..1eae6c9c4b0 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -15,13 +15,7 @@ # specific language governing permissions and limitations # under the License. -shared_ptr <- function(class, xp) { - if (!shared_ptr_is_null(xp)) class$new(xp) -} - -unique_ptr <- function(class, xp) { - if (!unique_ptr_is_null(xp)) class$new(xp) -} +#' @include arrow-package.R #' @export `!=.arrow::Object` <- function(lhs, rhs){ diff --git a/r/man/DataType.Rd b/r/man/DataType.Rd index 10d797f5672..2d4be4358c0 100644 --- a/r/man/DataType.Rd +++ b/r/man/DataType.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/enums.R, R/List.R, R/Struct.R, R/type.R +% Please edit documentation in R/enums.R, R/type.R, R/List.R, R/Struct.R \docType{data} \name{TimeUnit} \alias{TimeUnit} @@ -9,8 +9,6 @@ \alias{FileMode} \alias{MessageType} \alias{CompressionType} -\alias{list_of} -\alias{struct} \alias{int8} \alias{int16} \alias{int32} @@ -20,10 +18,14 @@ \alias{uint32} \alias{uint64} \alias{float16} +\alias{halffloat} \alias{float32} +\alias{float} \alias{float64} \alias{boolean} +\alias{bool} \alias{utf8} +\alias{string} \alias{date32} \alias{date64} \alias{time32} @@ -31,6 +33,8 @@ \alias{null} \alias{timestamp} \alias{decimal} +\alias{list_of} +\alias{struct} \title{Apache Arrow data types} \format{An object of class \code{arrow::TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.} \usage{ @@ -48,10 +52,6 @@ MessageType CompressionType -list_of(type) - -struct(...) - int8() int16() @@ -70,14 +70,22 @@ uint64() float16() +halffloat() + float32() +float() + float64() boolean() +bool() + utf8() +string() + date32() date64() @@ -91,12 +99,12 @@ null() timestamp(unit, timezone) decimal(precision, scale) -} -\arguments{ -\item{type}{type} -\item{...}{...} +list_of(type) +struct(...) +} +\arguments{ \item{unit}{time unit} \item{timezone}{time zone} @@ -104,6 +112,10 @@ decimal(precision, scale) \item{precision}{precision} \item{scale}{scale} + +\item{type}{type} + +\item{...}{...} } \description{ Apache Arrow data types From 40c824616cbea07a856ae08ab1110b243b3f632e Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 26 Aug 2019 11:42:21 -0700 Subject: [PATCH 5/6] Add docs for types and schema --- r/R/List.R | 2 +- r/R/Schema.R | 12 +++-- r/R/Struct.R | 2 +- r/R/dictionary.R | 6 +-- r/R/enums.R | 16 +++--- r/R/type.R | 79 +++++++++++++++++----------- r/_pkgdown.yml | 2 +- r/man/DataType.Rd | 123 -------------------------------------------- r/man/data-type.Rd | 122 +++++++++++++++++++++++++++++++++++++++++++ r/man/dictionary.Rd | 9 ++-- r/man/enums.Rd | 34 ++++++++++++ r/man/schema.Rd | 11 ++-- 12 files changed, 241 insertions(+), 177 deletions(-) delete mode 100644 r/man/DataType.Rd create mode 100644 r/man/data-type.Rd create mode 100644 r/man/enums.Rd diff --git a/r/R/List.R b/r/R/List.R index b21c3921dcd..a970fb895a9 100644 --- a/r/R/List.R +++ b/r/R/List.R @@ -25,6 +25,6 @@ ) ) -#' @rdname DataType +#' @rdname data-type #' @export list_of <- function(type) shared_ptr(`arrow::ListType`, list__(type)) diff --git a/r/R/Schema.R b/r/R/Schema.R index c1bf51891df..906841b1ccf 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -60,13 +60,19 @@ lhs$Equals(rhs) } -#' Schema factory +#' Create a schema #' -#' @param ... named list of data types +#' This function lets you define a schema for a table. This is useful when you +#' want to convert an R `data.frame` to Arrow but don't want to rely on the +#' default mapping of R types to Arrow types, such as when you want to choose a +#' specific numeric precision. #' -#' @return a [schema][arrow__Schema] +#' @param ... named list of [data types][data-type] +#' +#' @return A [schema][arrow__Schema] object. #' #' @export +# TODO (npr): add examples once ARROW-5505 merges schema <- function(...){ shared_ptr(`arrow::Schema`, schema_(.fields(list2(...)))) } diff --git a/r/R/Struct.R b/r/R/Struct.R index 331c1334901..fa35b7ec0f2 100644 --- a/r/R/Struct.R +++ b/r/R/Struct.R @@ -25,7 +25,7 @@ ) ) -#' @rdname DataType +#' @rdname data-type #' @export struct <- function(...){ xp <- struct_(.fields(list(...))) diff --git a/r/R/dictionary.R b/r/R/dictionary.R index ff31c6a5326..9262a514b5a 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -40,14 +40,14 @@ ) ) -#' dictionary type factory +#' Create a dictionary type #' #' @param index_type index type, e.g. [int32()] #' @param value_type value type, probably [utf8()] #' @param ordered Is this an ordered dictionary ? #' -#' @return a [arrow::DictionaryType][arrow__DictionaryType] -#' +#' @return An [arrow::DictionaryType][arrow__DictionaryType] +#' @seealso [Other Arrow data types][data-type] #' @export dictionary <- function(index_type, value_type, ordered = FALSE) { assert_that( diff --git a/r/R/enums.R b/r/R/enums.R index e45277b0e6c..5c24ce8e6e3 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -27,17 +27,19 @@ enum <- function(class, ..., .list = list(...)){ ) } -#' @rdname DataType +#' Arrow enums +#' @name enums #' @export +#' @keywords internal TimeUnit <- enum("arrow::TimeUnit::type", SECOND = 0L, MILLI = 1L, MICRO = 2L, NANO = 3L ) -#' @rdname DataType +#' @rdname enums #' @export DateUnit <- enum("arrow::DateUnit", DAY = 0L, MILLI = 1L) -#' @rdname DataType +#' @rdname enums #' @export Type <- enum("arrow::Type::type", "NA" = 0L, BOOL = 1L, UINT8 = 2L, INT8 = 3L, UINT16 = 4L, INT16 = 5L, @@ -48,7 +50,7 @@ Type <- enum("arrow::Type::type", UNION = 25L, DICTIONARY = 26L, MAP = 27L ) -#' @rdname DataType +#' @rdname enums #' @export StatusCode <- enum("arrow::StatusCode", OK = 0L, OutOfMemory = 1L, KeyError = 2L, TypeError = 3L, @@ -59,19 +61,19 @@ StatusCode <- enum("arrow::StatusCode", PlasmaStoreFull = 22L, PlasmaObjectAlreadySealed = 23L ) -#' @rdname DataType +#' @rdname enums #' @export FileMode <- enum("arrow::io::FileMode", READ = 0L, WRITE = 1L, READWRITE = 2L ) -#' @rdname DataType +#' @rdname enums #' @export MessageType <- enum("arrow::ipc::Message::Type", NONE = 0L, SCHEMA = 1L, DICTIONARY_BATCH = 2L, RECORD_BATCH = 3L, TENSOR = 4L ) -#' @rdname DataType +#' @rdname enums #' @export CompressionType <- enum("arrow::Compression::type", UNCOMPRESSED = 0L, SNAPPY = 1L, GZIP = 2L, BROTLI = 3L, ZSTD = 4L, LZ4 = 5L, LZO = 6L, BZ2 = 7L diff --git a/r/R/type.R b/r/R/type.R index 1eae6c9c4b0..b763ed80ebc 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -254,104 +254,121 @@ type.default <- function(x) { #' Apache Arrow data types #' -#' Apache Arrow data types +#' These functions create type objects corresponding to Arrow types. Use them +#' when defining a [schema()] or as inputs to other types, like `struct`. Most +#' of these functions don't take arguments, but a few do. +#' +#' A few functions have aliases: +#' +#' * `utf8()` and `string()` +#' * `float16()` and `halffloat()` +#' * `float32()` and `float()` +#' * `bool()` and `boolean()` +#' * Called from `schema()` or `struct()`, `double()` also is supported as a +#' way of creating a `float64()` #' -#' @param unit time unit -#' @param timezone time zone -#' @param precision precision -#' @param scale scale -#' @param type type -#' @param ... ... +#' @param unit For date/time types, the time unit (day, second, millisecond, etc.) +#' @param timezone For `timestamp()`, an optional time zone. +#' @param precision For `decimal()`, precision +#' @param scale For `decimal()`, scale +#' @param type For `list_of()`, a data type to make a list-of-type +#' @param ... For `struct()`, a named list of types to define the struct columns #' -#' @rdname DataType +#' @name data-type #' @export +#' @seealso [dictionary()] for creating a dictionary (factor-like) type. +#' @examples +#' \donttest{ +#' bool() +#' struct(a = int32(), b = double()) +#' } int8 <- function() shared_ptr(`arrow::Int8`, Int8__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export int16 <- function() shared_ptr(`arrow::Int16`, Int16__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export int32 <- function() shared_ptr(`arrow::Int32`, Int32__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export int64 <- function() shared_ptr(`arrow::Int64`, Int64__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export uint8 <- function() shared_ptr(`arrow::UInt8`, UInt8__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export uint16 <- function() shared_ptr(`arrow::UInt16`, UInt16__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export uint32 <- function() shared_ptr(`arrow::UInt32`, UInt32__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export uint64 <- function() shared_ptr(`arrow::UInt64`, UInt64__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export float16 <- function() shared_ptr(`arrow::Float16`, Float16__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export halffloat <- float16 -#' @rdname DataType +#' @rdname data-type #' @export float32 <- function() shared_ptr(`arrow::Float32`, Float32__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export float <- float32 -#' @rdname DataType +#' @rdname data-type #' @export float64 <- function() shared_ptr(`arrow::Float64`, Float64__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export boolean <- function() shared_ptr(`arrow::Boolean`, Boolean__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export bool <- boolean -#' @rdname DataType +#' @rdname data-type #' @export utf8 <- function() shared_ptr(`arrow::Utf8`, Utf8__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export string <- utf8 -#' @rdname DataType +#' @rdname data-type #' @export date32 <- function() shared_ptr(`arrow::Date32`, Date32__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export date64 <- function() shared_ptr(`arrow::Date64`, Date64__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export time32 <- function(unit) shared_ptr(`arrow::Time32`, Time32__initialize(unit)) -#' @rdname DataType +#' @rdname data-type #' @export time64 <- function(unit) shared_ptr(`arrow::Time64`, Time64__initialize(unit)) -#' @rdname DataType +#' @rdname data-type #' @export null <- function() shared_ptr(`arrow::Null`, Null__initialize()) -#' @rdname DataType +#' @rdname data-type #' @export timestamp <- function(unit, timezone) { if (missing(timezone)) { @@ -361,7 +378,7 @@ timestamp <- function(unit, timezone) { } } -#' @rdname DataType +#' @rdname data-type #' @export decimal <- function(precision, scale) shared_ptr(`arrow::Decimal128Type`, Decimal128Type__initialize(precision, scale)) diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index 5ede310ee2f..98baa35fafc 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -77,7 +77,7 @@ reference: - dictionary - field - read_schema - - TimeUnit + - data-type - title: R6 classes contents: - arrow__Array diff --git a/r/man/DataType.Rd b/r/man/DataType.Rd deleted file mode 100644 index 2d4be4358c0..00000000000 --- a/r/man/DataType.Rd +++ /dev/null @@ -1,123 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/enums.R, R/type.R, R/List.R, R/Struct.R -\docType{data} -\name{TimeUnit} -\alias{TimeUnit} -\alias{DateUnit} -\alias{Type} -\alias{StatusCode} -\alias{FileMode} -\alias{MessageType} -\alias{CompressionType} -\alias{int8} -\alias{int16} -\alias{int32} -\alias{int64} -\alias{uint8} -\alias{uint16} -\alias{uint32} -\alias{uint64} -\alias{float16} -\alias{halffloat} -\alias{float32} -\alias{float} -\alias{float64} -\alias{boolean} -\alias{bool} -\alias{utf8} -\alias{string} -\alias{date32} -\alias{date64} -\alias{time32} -\alias{time64} -\alias{null} -\alias{timestamp} -\alias{decimal} -\alias{list_of} -\alias{struct} -\title{Apache Arrow data types} -\format{An object of class \code{arrow::TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.} -\usage{ -TimeUnit - -DateUnit - -Type - -StatusCode - -FileMode - -MessageType - -CompressionType - -int8() - -int16() - -int32() - -int64() - -uint8() - -uint16() - -uint32() - -uint64() - -float16() - -halffloat() - -float32() - -float() - -float64() - -boolean() - -bool() - -utf8() - -string() - -date32() - -date64() - -time32(unit) - -time64(unit) - -null() - -timestamp(unit, timezone) - -decimal(precision, scale) - -list_of(type) - -struct(...) -} -\arguments{ -\item{unit}{time unit} - -\item{timezone}{time zone} - -\item{precision}{precision} - -\item{scale}{scale} - -\item{type}{type} - -\item{...}{...} -} -\description{ -Apache Arrow data types -} -\keyword{datasets} diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd new file mode 100644 index 00000000000..316ff495f2e --- /dev/null +++ b/r/man/data-type.Rd @@ -0,0 +1,122 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/type.R, R/List.R, R/Struct.R +\name{data-type} +\alias{data-type} +\alias{int8} +\alias{int16} +\alias{int32} +\alias{int64} +\alias{uint8} +\alias{uint16} +\alias{uint32} +\alias{uint64} +\alias{float16} +\alias{halffloat} +\alias{float32} +\alias{float} +\alias{float64} +\alias{boolean} +\alias{bool} +\alias{utf8} +\alias{string} +\alias{date32} +\alias{date64} +\alias{time32} +\alias{time64} +\alias{null} +\alias{timestamp} +\alias{decimal} +\alias{list_of} +\alias{struct} +\title{Apache Arrow data types} +\usage{ +int8() + +int16() + +int32() + +int64() + +uint8() + +uint16() + +uint32() + +uint64() + +float16() + +halffloat() + +float32() + +float() + +float64() + +boolean() + +bool() + +utf8() + +string() + +date32() + +date64() + +time32(unit) + +time64(unit) + +null() + +timestamp(unit, timezone) + +decimal(precision, scale) + +list_of(type) + +struct(...) +} +\arguments{ +\item{unit}{For date/time types, the time unit (day, second, millisecond, etc.)} + +\item{timezone}{For \code{timestamp()}, an optional time zone.} + +\item{precision}{For \code{decimal()}, precision} + +\item{scale}{For \code{decimal()}, scale} + +\item{type}{For \code{list_of()}, a data type to make a list-of-type} + +\item{...}{For \code{struct()}, a named list of types to define the struct columns} +} +\description{ +These functions create type objects corresponding to Arrow types. Use them +when defining a \code{\link[=schema]{schema()}} or as inputs to other types, like \code{struct}. Most +of these functions don't take arguments, but a few do. +} +\details{ +A few functions have aliases: +\itemize{ +\item \code{utf8()} and \code{string()} +\item \code{float16()} and \code{halffloat()} +\item \code{float32()} and \code{float()} +\item \code{bool()} and \code{boolean()} +\item Called from \code{schema()} or \code{struct()}, \code{double()} also is supported as a +way of creating a \code{float64()} +} +} +\examples{ +\donttest{ +bool() +struct(a = int32(), b = double()) +} +} +\seealso{ +\code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type. +} diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd index 9662328b11a..334d67e937d 100644 --- a/r/man/dictionary.Rd +++ b/r/man/dictionary.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dictionary.R \name{dictionary} \alias{dictionary} -\title{dictionary type factory} +\title{Create a dictionary type} \usage{ dictionary(index_type, value_type, ordered = FALSE) } @@ -14,8 +14,11 @@ dictionary(index_type, value_type, ordered = FALSE) \item{ordered}{Is this an ordered dictionary ?} } \value{ -a \link[=arrow__DictionaryType]{arrow::DictionaryType} +An \link[=arrow__DictionaryType]{arrow::DictionaryType} } \description{ -dictionary type factory +Create a dictionary type +} +\seealso{ +\link[=data-type]{Other Arrow data types} } diff --git a/r/man/enums.Rd b/r/man/enums.Rd new file mode 100644 index 00000000000..c55170e1ec0 --- /dev/null +++ b/r/man/enums.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/enums.R +\docType{data} +\name{enums} +\alias{enums} +\alias{TimeUnit} +\alias{DateUnit} +\alias{Type} +\alias{StatusCode} +\alias{FileMode} +\alias{MessageType} +\alias{CompressionType} +\title{Arrow enums} +\format{An object of class \code{arrow::TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.} +\usage{ +TimeUnit + +DateUnit + +Type + +StatusCode + +FileMode + +MessageType + +CompressionType +} +\description{ +Arrow enums +} +\keyword{datasets} +\keyword{internal} diff --git a/r/man/schema.Rd b/r/man/schema.Rd index ad3bcb1f4e0..622e5a7e94c 100644 --- a/r/man/schema.Rd +++ b/r/man/schema.Rd @@ -2,16 +2,19 @@ % Please edit documentation in R/Schema.R \name{schema} \alias{schema} -\title{Schema factory} +\title{Create a schema} \usage{ schema(...) } \arguments{ -\item{...}{named list of data types} +\item{...}{named list of \link[=data-type]{data types}} } \value{ -a \link[=arrow__Schema]{schema} +A \link[=arrow__Schema]{schema} object. } \description{ -Schema factory +This function lets you define a schema for a table. This is useful when you +want to convert an R \code{data.frame} to Arrow but don't want to rely on the +default mapping of R types to Arrow types, such as when you want to choose a +specific numeric precision. } From 087133db16c54fe717975dd014ee2b1fbe4afcef Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 26 Aug 2019 13:13:17 -0700 Subject: [PATCH 6/6] Rename test file --- r/tests/testthat/{test-DataType.R => test-data-type.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r/tests/testthat/{test-DataType.R => test-data-type.R} (100%) diff --git a/r/tests/testthat/test-DataType.R b/r/tests/testthat/test-data-type.R similarity index 100% rename from r/tests/testthat/test-DataType.R rename to r/tests/testthat/test-data-type.R