diff --git a/r/NAMESPACE b/r/NAMESPACE index fea6813f95c..e3da0461af5 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -142,6 +142,7 @@ export(TimeUnit) export(Type) export(UnionDataset) export(arrow_available) +export(binary) export(bool) export(boolean) export(buffer) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 38d6688e8ec..a2f6560a0c6 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -516,6 +516,10 @@ Utf8__initialize <- function(){ .Call(`_arrow_Utf8__initialize` ) } +Binary__initialize <- function(){ + .Call(`_arrow_Binary__initialize` ) +} + Date32__initialize <- function(){ .Call(`_arrow_Date32__initialize` ) } diff --git a/r/R/type.R b/r/R/type.R index 69b6cd9d4d1..a64f335b827 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -140,6 +140,8 @@ Float32 <- R6Class("Float32", inherit = FixedWidthType) Float64 <- R6Class("Float64", inherit = FixedWidthType) Boolean <- R6Class("Boolean", inherit = FixedWidthType) Utf8 <- R6Class("Utf8", inherit = DataType) +Binary <- R6Class("Binary", inherit = DataType) +FixedSizeBinary <- R6Class("FixedSizeBinary", inherit = FixedWidthType) DateType <- R6Class("DateType", inherit = FixedWidthType, @@ -202,6 +204,9 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can #' take any of those four values. #' @param timezone For `timestamp()`, an optional time zone string. +#' @param byte_width For `binary()`, an optional integer width to create a +#' `FixedSizeBinary` type. The default `NULL` results in a `BinaryType` with +#' variable width. #' @param precision For `decimal()`, precision #' @param scale For `decimal()`, scale #' @param type For `list_of()`, a data type to make a list-of-type @@ -280,6 +285,16 @@ bool <- boolean #' @export utf8 <- function() shared_ptr(Utf8, Utf8__initialize()) +#' @rdname data-type +#' @export +binary <- function(byte_width = NULL) { + if (is.null(byte_width)) { + shared_ptr(Binary, Binary__initialize()) + } else { + shared_ptr(FixedSizeBinary, FixedSizeBinary__initialize(byte_width)) + } +} + #' @rdname data-type #' @export string <- utf8 diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 43cec048365..6881b9603b2 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -18,6 +18,7 @@ \alias{boolean} \alias{bool} \alias{utf8} +\alias{binary} \alias{string} \alias{date32} \alias{date64} @@ -62,6 +63,8 @@ bool() utf8() +binary(byte_width = NULL) + string() date32() @@ -83,6 +86,10 @@ list_of(type) struct(...) } \arguments{ +\item{byte_width}{For \code{binary()}, an optional integer width to create a +\code{FixedSizeBinary} type. The default \code{NULL} results in a \code{BinaryType} with +variable width.} + \item{unit}{For time/timestamp types, the time unit. \code{time32()} can take either "s" or "ms", while \code{time64()} can be "us" or "ns". \code{timestamp()} can take any of those four values.} diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index c22fb75854a..5aab8da0f48 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -2005,6 +2005,20 @@ RcppExport SEXP _arrow_Utf8__initialize(){ } #endif +// datatype.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Binary__initialize(); +RcppExport SEXP _arrow_Binary__initialize(){ +BEGIN_RCPP + return Rcpp::wrap(Binary__initialize()); +END_RCPP +} +#else +RcppExport SEXP _arrow_Binary__initialize(){ + Rf_error("Cannot call Binary__initialize(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // datatype.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr Date32__initialize(); @@ -6027,6 +6041,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, { "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, { "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, + { "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, { "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0}, { "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, { "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index a0d60a1766f..573cfee5be3 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -73,6 +73,9 @@ std::shared_ptr Boolean__initialize() { return arrow::boolean() // [[arrow::export]] std::shared_ptr Utf8__initialize() { return arrow::utf8(); } +// [[arrow::export]] +std::shared_ptr Binary__initialize() { return arrow::binary(); } + // [[arrow::export]] std::shared_ptr Date32__initialize() { return arrow::date32(); } @@ -85,11 +88,20 @@ std::shared_ptr Null__initialize() { return arrow::null(); } // [[arrow::export]] std::shared_ptr Decimal128Type__initialize(int32_t precision, int32_t scale) { - return arrow::decimal(precision, scale); + // Use the builder that validates inputs + std::shared_ptr out; + STOP_IF_NOT_OK(arrow::Decimal128Type::Make(precision, scale, &out)); + return out; } // [[arrow::export]] std::shared_ptr FixedSizeBinary__initialize(int32_t byte_width) { + if (byte_width == NA_INTEGER) { + Rcpp::stop("'byte_width' cannot be NA"); + } + if (byte_width < 1) { + Rcpp::stop("'byte_width' must be > 0"); + } return arrow::fixed_size_binary(byte_width); } diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index 1ee18fe9839..9d8948a8fef 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -379,6 +379,8 @@ test_that("DictionaryType validation", { dictionary(utf8(), int32()), "Dictionary index type should be signed integer, got string" ) + expect_error(dictionary(4, utf8()), 'index_type must be a "DataType"') + expect_error(dictionary(int8(), "strings"), 'value_type must be a "DataType"') }) test_that("decimal type and validation", { @@ -386,5 +388,27 @@ test_that("decimal type and validation", { expect_error(decimal("four"), '"precision" must be an integer') expect_error(decimal(4), 'argument "scale" is missing, with no default') expect_error(decimal(4, "two"), '"scale" must be an integer') + expect_error(decimal(NA, 2), '"precision" must be an integer') + expect_error(decimal(0, 2), "Invalid: Decimal precision out of range: 0") + expect_error(decimal(100, 2), "Invalid: Decimal precision out of range: 100") + expect_error(decimal(4, NA), '"scale" must be an integer') + expect_is(decimal(4, 2), "Decimal128Type") + +}) + +test_that("Binary", { + expect_is(binary(), "Binary") + expect_equal(binary()$ToString(), "binary") +}) + +test_that("FixedSizeBinary", { + expect_is(binary(4), "FixedSizeBinary") + expect_equal(binary(4)$ToString(), "fixed_size_binary[4]") + + # input validation + expect_error(binary(NA), "'byte_width' cannot be NA") + expect_error(binary(-1), "'byte_width' must be > 0") + expect_error(binary("four"), class = "Rcpp::not_compatible") + expect_error(binary(c(2, 4)), class = "Rcpp::not_compatible") })