From 754d4d2318895d5e74fc80140d986eed3c69459a Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Mar 2020 11:10:29 -0800 Subject: [PATCH 1/4] Add constructor for binary and fixed size binary types; more validation --- r/NAMESPACE | 1 + r/R/arrowExports.R | 4 ++++ r/R/type.R | 12 ++++++++++++ r/man/data-type.Rd | 3 +++ r/src/arrowExports.cpp | 15 +++++++++++++++ r/src/datatype.cpp | 14 +++++++++++++- r/tests/testthat/test-data-type.R | 24 ++++++++++++++++++++++++ 7 files changed, 72 insertions(+), 1 deletion(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index fea6813f95c..e3da0461af5 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -142,6 +142,7 @@ export(TimeUnit) export(Type) export(UnionDataset) export(arrow_available) +export(binary) export(bool) export(boolean) export(buffer) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 38d6688e8ec..a2f6560a0c6 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -516,6 +516,10 @@ Utf8__initialize <- function(){ .Call(`_arrow_Utf8__initialize` ) } +Binary__initialize <- function(){ + .Call(`_arrow_Binary__initialize` ) +} + Date32__initialize <- function(){ .Call(`_arrow_Date32__initialize` ) } diff --git a/r/R/type.R b/r/R/type.R index 69b6cd9d4d1..fb98148bb44 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -140,6 +140,8 @@ Float32 <- R6Class("Float32", inherit = FixedWidthType) Float64 <- R6Class("Float64", inherit = FixedWidthType) Boolean <- R6Class("Boolean", inherit = FixedWidthType) Utf8 <- R6Class("Utf8", inherit = DataType) +Binary <- R6Class("Binary", inherit = DataType) +FixedSizeBinary <- R6Class("FixedSizeBinary", inherit = FixedWidthType) DateType <- R6Class("DateType", inherit = FixedWidthType, @@ -280,6 +282,16 @@ bool <- boolean #' @export utf8 <- function() shared_ptr(Utf8, Utf8__initialize()) +#' @rdname data-type +#' @export +binary <- function(byte_width = NULL) { + if (is.null(byte_width)) { + shared_ptr(Binary, Binary__initialize()) + } else { + shared_ptr(FixedSizeBinary, FixedSizeBinary__initialize(byte_width)) + } +} + #' @rdname data-type #' @export string <- utf8 diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 43cec048365..4b1bbd8935a 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -18,6 +18,7 @@ \alias{boolean} \alias{bool} \alias{utf8} +\alias{binary} \alias{string} \alias{date32} \alias{date64} @@ -62,6 +63,8 @@ bool() utf8() +binary(byte_width = NULL) + string() date32() diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index c22fb75854a..5aab8da0f48 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -2005,6 +2005,20 @@ RcppExport SEXP _arrow_Utf8__initialize(){ } #endif +// datatype.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Binary__initialize(); +RcppExport SEXP _arrow_Binary__initialize(){ +BEGIN_RCPP + return Rcpp::wrap(Binary__initialize()); +END_RCPP +} +#else +RcppExport SEXP _arrow_Binary__initialize(){ + Rf_error("Cannot call Binary__initialize(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // datatype.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr Date32__initialize(); @@ -6027,6 +6041,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, { "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, { "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, + { "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, { "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0}, { "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, { "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index a0d60a1766f..573cfee5be3 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -73,6 +73,9 @@ std::shared_ptr Boolean__initialize() { return arrow::boolean() // [[arrow::export]] std::shared_ptr Utf8__initialize() { return arrow::utf8(); } +// [[arrow::export]] +std::shared_ptr Binary__initialize() { return arrow::binary(); } + // [[arrow::export]] std::shared_ptr Date32__initialize() { return arrow::date32(); } @@ -85,11 +88,20 @@ std::shared_ptr Null__initialize() { return arrow::null(); } // [[arrow::export]] std::shared_ptr Decimal128Type__initialize(int32_t precision, int32_t scale) { - return arrow::decimal(precision, scale); + // Use the builder that validates inputs + std::shared_ptr out; + STOP_IF_NOT_OK(arrow::Decimal128Type::Make(precision, scale, &out)); + return out; } // [[arrow::export]] std::shared_ptr FixedSizeBinary__initialize(int32_t byte_width) { + if (byte_width == NA_INTEGER) { + Rcpp::stop("'byte_width' cannot be NA"); + } + if (byte_width < 1) { + Rcpp::stop("'byte_width' must be > 0"); + } return arrow::fixed_size_binary(byte_width); } diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index 1ee18fe9839..9d8948a8fef 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -379,6 +379,8 @@ test_that("DictionaryType validation", { dictionary(utf8(), int32()), "Dictionary index type should be signed integer, got string" ) + expect_error(dictionary(4, utf8()), 'index_type must be a "DataType"') + expect_error(dictionary(int8(), "strings"), 'value_type must be a "DataType"') }) test_that("decimal type and validation", { @@ -386,5 +388,27 @@ test_that("decimal type and validation", { expect_error(decimal("four"), '"precision" must be an integer') expect_error(decimal(4), 'argument "scale" is missing, with no default') expect_error(decimal(4, "two"), '"scale" must be an integer') + expect_error(decimal(NA, 2), '"precision" must be an integer') + expect_error(decimal(0, 2), "Invalid: Decimal precision out of range: 0") + expect_error(decimal(100, 2), "Invalid: Decimal precision out of range: 100") + expect_error(decimal(4, NA), '"scale" must be an integer') + expect_is(decimal(4, 2), "Decimal128Type") + +}) + +test_that("Binary", { + expect_is(binary(), "Binary") + expect_equal(binary()$ToString(), "binary") +}) + +test_that("FixedSizeBinary", { + expect_is(binary(4), "FixedSizeBinary") + expect_equal(binary(4)$ToString(), "fixed_size_binary[4]") + + # input validation + expect_error(binary(NA), "'byte_width' cannot be NA") + expect_error(binary(-1), "'byte_width' must be > 0") + expect_error(binary("four"), class = "Rcpp::not_compatible") + expect_error(binary(c(2, 4)), class = "Rcpp::not_compatible") }) From 197fb92b7ad96a9c0dd7dcc2061d5550422c620e Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 6 Mar 2020 15:48:25 -0800 Subject: [PATCH 2/4] Update docs --- r/R/type.R | 3 +++ r/man/data-type.Rd | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/r/R/type.R b/r/R/type.R index fb98148bb44..60a1861ba6a 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -204,6 +204,9 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can #' take any of those four values. #' @param timezone For `timestamp()`, an optional time zone string. +#' @param byte_width For `binary()`, an optional integer width to create a +#' `FixedWidthBinary` type. The default `NULL` results in a `BinaryType` with +#' variable width. #' @param precision For `decimal()`, precision #' @param scale For `decimal()`, scale #' @param type For `list_of()`, a data type to make a list-of-type diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 4b1bbd8935a..330cd36609f 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -86,6 +86,10 @@ list_of(type) struct(...) } \arguments{ +\item{byte_width}{For \code{binary()}, an optional integer width to create a +\code{FixedWidthBinary} type. The default \code{NULL} results in a \code{BinaryType} with +variable width.} + \item{unit}{For time/timestamp types, the time unit. \code{time32()} can take either "s" or "ms", while \code{time64()} can be "us" or "ns". \code{timestamp()} can take any of those four values.} From e761c4a80429e82c7c88fb4123a32dd03c4f77ad Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 9 Mar 2020 11:53:02 -0400 Subject: [PATCH 3/4] Fix doclet --- r/R/type.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/type.R b/r/R/type.R index 60a1861ba6a..a64f335b827 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -205,7 +205,7 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' take any of those four values. #' @param timezone For `timestamp()`, an optional time zone string. #' @param byte_width For `binary()`, an optional integer width to create a -#' `FixedWidthBinary` type. The default `NULL` results in a `BinaryType` with +#' `FixedSizeBinary` type. The default `NULL` results in a `BinaryType` with #' variable width. #' @param precision For `decimal()`, precision #' @param scale For `decimal()`, scale From 570b2e6dfa5dc6e9f8f5bf2aac5943e3128bbc30 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 9 Mar 2020 08:53:55 -0700 Subject: [PATCH 4/4] (manually) update docs --- r/man/data-type.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 330cd36609f..6881b9603b2 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -87,7 +87,7 @@ struct(...) } \arguments{ \item{byte_width}{For \code{binary()}, an optional integer width to create a -\code{FixedWidthBinary} type. The default \code{NULL} results in a \code{BinaryType} with +\code{FixedSizeBinary} type. The default \code{NULL} results in a \code{BinaryType} with variable width.} \item{unit}{For time/timestamp types, the time unit. \code{time32()} can take