From 29576552676b0a57f11f428199c9ee94750bbd3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 7 Dec 2021 10:54:27 +0000 Subject: [PATCH 01/33] update NEWS --- r/NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/NEWS.md b/r/NEWS.md index 5df8c2d5793..22bc8f0472a 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,11 +19,12 @@ # arrow 6.0.1.9000 +* Added `decimal256()` * updated `write_csv_arrow()` to follow the signature of `readr::write_csv()`. The following arguments are supported: * `file` identical to `sink` * `col_names` identical to `include_header` * other arguments are currently unsupported, but the function errors with a meaningful message. -* Added `decimal128()` (identical to `decimal()`) as the name is more explicit and updated docs to encourage its use. +* Added `decimal128()` (identical to `decimal()`) as the name is more explicit and updated docs to encourage its use. # arrow 6.0.1 From 387ae07f94569a9cf9a9acf1b4d77cfb2b227657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 7 Dec 2021 11:46:43 +0000 Subject: [PATCH 02/33] added tests for `decimal256()` and updated older tests to use ` instead of " --- r/tests/testthat/test-data-type.R | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index d2795722928..4d943af3853 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -387,19 +387,27 @@ test_that("DictionaryType validation", { test_that("decimal type and validation", { expect_r6_class(decimal(4, 2), "Decimal128Type") - expect_error(decimal("four"), '"precision" must be an integer') - expect_error(decimal(4, "two"), '"scale" must be an integer') - expect_error(decimal(NA, 2), '"precision" must be an integer') - expect_error(decimal(4, NA), '"scale" must be an integer') + expect_error(decimal("four"), '`precision` must be an integer') + expect_error(decimal(4, "two"), '`scale` must be an integer') + expect_error(decimal(NA, 2), '`precision` must be an integer') + expect_error(decimal(4, NA), '`scale` must be an integer') # decimal() is just an alias for decimal128() for backwards compatibility expect_r6_class(decimal128(4, 2), "Decimal128Type") expect_identical(class(decimal(2, 4)), class(decimal128(2, 4))) - expect_error(decimal128("four"), '"precision" must be an integer') - expect_error(decimal128(4, "two"), '"scale" must be an integer') - expect_error(decimal128(NA, 2), '"precision" must be an integer') - expect_error(decimal128(4, NA), '"scale" must be an integer') + expect_error(decimal128("four"), '`precision` must be an integer') + expect_error(decimal128(4, "two"), '`scale` must be an integer') + expect_error(decimal128(NA, 2), '`precision` must be an integer') + expect_error(decimal128(4, NA), '`scale` must be an integer') + expect_error(decimal128(3:4, NA), "`precision` must have size 1. not size 2") + + expect_r6_class(decimal256(4, 2), "Decimal256Type") + + expect_error(decimal256("four"), '`precision` must be an integer') + expect_error(decimal256(4, "two"), '`scale` must be an integer') + expect_error(decimal256(NA, 2), '`precision` must be an integer') + expect_error(decimal256(4, NA), '`scale` must be an integer') }) test_that("Binary", { From 7ab86fc362351b4eba2a120f255089740bf455ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 7 Dec 2021 11:49:04 +0000 Subject: [PATCH 03/33] added `decimal256()` and `check_decimal_args()` functions --- r/R/type.R | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index 60f0045e514..c1e12330407 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -150,8 +150,11 @@ DecimalType <- R6Class("DecimalType", scale = function() DecimalType__scale(self) ) ) + Decimal128Type <- R6Class("Decimal128Type", inherit = DecimalType) +Decimal256Type <- R6Class("Decimal256Type", inherit = DecimalType) + NestedType <- R6Class("NestedType", inherit = DataType) #' Apache Arrow data types @@ -201,6 +204,10 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' Use `decimal128()` as the name is more informative and `decimal()` might be #' deprecated in the future. #' +#' `decimal256()` creates a `decimal256` type, which allows for higher maximum +#' precision. For most use cases, the maximum precision offered by `decimal128` +#' is sufficient, and it will result in a more compact and more efficient encoding. +#' #' @param unit For time/timestamp types, the time unit. `time32()` can take #' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can #' take any of those four values. @@ -209,7 +216,8 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' @param list_size list size for `FixedSizeList` type. #' @param precision For `decimal()`, `decimal128()` the number of significant #' digits the arrow `decimal` type can represent. The maximum precision for -#' `decimal()` and `decimal128()` is 38 significant digits. +#' `decimal()` and `decimal128()` is 38 significant digits, while for +#' `decimal256()` it is 76 digits. #' @param scale For `decimal()` and `decimal128()`, the number of digits after #' the decimal point. It can be negative. #' @param type For `list_of()`, a data type to make a list-of-type @@ -378,22 +386,38 @@ timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone = "") { #' @rdname data-type #' @export decimal128 <- function(precision, scale) { + args <- check_decimal_args(precision, scale) + Decimal128Type__initialize(args$precision, args$scale) +} + +#' @rdname data-type +#' @export +decimal <- decimal128 + +#' @rdname data-type +#' @export +decimal256 <- function(precision, scale) { + args <- check_decimal_args(precision, scale) + Decimal256Type_initialize(args$precision, args$scale) +} + +check_decimal_args <- function(precision, scale) { if (is.numeric(precision)) { - precision <- as.integer(precision) + precision <- vec_cast(precision, to = integer()) + vec_assert(precision, size = 1L) } else { - stop('"precision" must be an integer', call. = FALSE) + stop('`precision` must be an integer', call. = FALSE) } + if (is.numeric(scale)) { - scale <- as.integer(scale) + scale <- vec_cast(scale, to = integer()) + vec_assert(scale, size = 1L) } else { - stop('"scale" must be an integer', call. = FALSE) + stop('`scale` must be an integer', call. = FALSE) } - Decimal128Type__initialize(precision, scale) -} -#' @rdname data-type -#' @export -decimal <- decimal128 + list(precision = precision, scale = scale) +} StructType <- R6Class("StructType", inherit = NestedType, @@ -496,6 +520,7 @@ canonical_type_str <- function(type_str) { null = "null", timestamp = "timestamp", decimal128 = "decimal128", + decimal256 = "decimal256", struct = "struct", list_of = "list", list = "list", From 1253ba3eee0016ded7fb2906353bf14d7c5361d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 7 Dec 2021 11:49:32 +0000 Subject: [PATCH 04/33] docs --- r/NAMESPACE | 1 + r/man/data-type.Rd | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index df4f36408e3..c8aae1b9346 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -215,6 +215,7 @@ export(date32) export(date64) export(decimal) export(decimal128) +export(decimal256) export(default_memory_pool) export(dictionary) export(ends_with) diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 2b1c4bbff25..32bdb64ad4d 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -31,6 +31,7 @@ \alias{timestamp} \alias{decimal128} \alias{decimal} +\alias{decimal256} \alias{struct} \alias{list_of} \alias{large_list_of} @@ -96,6 +97,8 @@ decimal128(precision, scale) decimal(precision, scale) +decimal256(precision, scale) + struct(...) list_of(type) @@ -115,7 +118,8 @@ take any of those four values.} \item{precision}{For \code{decimal()}, \code{decimal128()} the number of significant digits the arrow \code{decimal} type can represent. The maximum precision for -\code{decimal()} and \code{decimal128()} is 38 significant digits.} +\code{decimal()} and \code{decimal128()} is 38 significant digits, while for +\code{decimal256()} it is 76 digits.} \item{scale}{For \code{decimal()} and \code{decimal128()}, the number of digits after the decimal point. It can be negative.} @@ -175,6 +179,10 @@ and power of 10. \code{decimal()} is identical to \code{decimal128()}, defined for backward compatibility. Use \code{decimal128()} as the name is more informative and \code{decimal()} might be deprecated in the future. + +\code{decimal256()} creates a \code{decimal256} type, which allows for higher maximum +precision. For most use cases, the maximum precision offered by \code{decimal128} +is sufficient, and it will result in a more compact and more efficient encoding. } \examples{ \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} From e17401c689bc883310af9c1384524f33825dcb54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 7 Dec 2021 11:59:49 +0000 Subject: [PATCH 05/33] update datatype + generate `arrowExports` --- r/R/arrowExports.R | 4 ++++ r/src/arrowExports.cpp | 17 +++++++++++++++++ r/src/datatype.cpp | 9 +++++++++ 3 files changed, 30 insertions(+) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index c1e0fca788d..5eaf1b91c5b 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -696,6 +696,10 @@ Decimal128Type__initialize <- function(precision, scale) { .Call(`_arrow_Decimal128Type__initialize`, precision, scale) } +Decimal256Type__initialize <- function(precision, scale) { + .Call(`_arrow_Decimal256Type__initialize`, precision, scale) +} + FixedSizeBinary__initialize <- function(byte_width) { .Call(`_arrow_FixedSizeBinary__initialize`, byte_width) } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index ef388b09208..95e7d458929 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -2730,6 +2730,22 @@ extern "C" SEXP _arrow_Decimal128Type__initialize(SEXP precision_sexp, SEXP scal } #endif +// datatype.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Decimal256Type__initialize(int32_t precision, int32_t scale); +extern "C" SEXP _arrow_Decimal256Type__initialize(SEXP precision_sexp, SEXP scale_sexp){ +BEGIN_CPP11 + arrow::r::Input::type precision(precision_sexp); + arrow::r::Input::type scale(scale_sexp); + return cpp11::as_sexp(Decimal256Type__initialize(precision, scale)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_Decimal256Type__initialize(SEXP precision_sexp, SEXP scale_sexp){ + Rf_error("Cannot call Decimal256Type__initialize(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // datatype.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr FixedSizeBinary__initialize(R_xlen_t byte_width); @@ -7345,6 +7361,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, { "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, { "_arrow_Decimal128Type__initialize", (DL_FUNC) &_arrow_Decimal128Type__initialize, 2}, + { "_arrow_Decimal256Type__initialize", (DL_FUNC) &_arrow_Decimal256Type__initialize, 2}, { "_arrow_FixedSizeBinary__initialize", (DL_FUNC) &_arrow_FixedSizeBinary__initialize, 1}, { "_arrow_Timestamp__initialize", (DL_FUNC) &_arrow_Timestamp__initialize, 2}, { "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1}, diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index bfc6687ed5b..942b6f0133d 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -82,6 +82,8 @@ const char* r6_class_name::get( case Type::DECIMAL128: return "Decimal128Type"; + case Type::DECIMAL256: + return "Decimal256Type"; case Type::LIST: return "ListType"; @@ -180,6 +182,13 @@ std::shared_ptr Decimal128Type__initialize(int32_t precision, return ValueOrStop(arrow::Decimal128Type::Make(precision, scale)); } +// [[arrow::export]] +std::shared_ptr Decimal256Type__initialize(int32_t precision, + int32_t scale) { + // Use the builder that validates inputs + return ValueOrStop(arrow::Decimal256Type::Make(precision, scale)); +} + // [[arrow::export]] std::shared_ptr FixedSizeBinary__initialize(R_xlen_t byte_width) { if (byte_width == NA_INTEGER) { From df96c161ba058d435f190a9b79ed711bdcd8bad6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 7 Dec 2021 12:40:40 +0000 Subject: [PATCH 06/33] typo + namespace `vec_assert()` + tests checking the `precision` and `scale` are size 1 --- r/R/type.R | 6 +++--- r/tests/testthat/test-data-type.R | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index c1e12330407..85d70a79e36 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -398,20 +398,20 @@ decimal <- decimal128 #' @export decimal256 <- function(precision, scale) { args <- check_decimal_args(precision, scale) - Decimal256Type_initialize(args$precision, args$scale) + Decimal256Type__initialize(args$precision, args$scale) } check_decimal_args <- function(precision, scale) { if (is.numeric(precision)) { precision <- vec_cast(precision, to = integer()) - vec_assert(precision, size = 1L) + vctrs::vec_assert(precision, size = 1L) } else { stop('`precision` must be an integer', call. = FALSE) } if (is.numeric(scale)) { scale <- vec_cast(scale, to = integer()) - vec_assert(scale, size = 1L) + vctrs::vec_assert(scale, size = 1L) } else { stop('`scale` must be an integer', call. = FALSE) } diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index 4d943af3853..98c54cd36d4 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -401,6 +401,7 @@ test_that("decimal type and validation", { expect_error(decimal128(NA, 2), '`precision` must be an integer') expect_error(decimal128(4, NA), '`scale` must be an integer') expect_error(decimal128(3:4, NA), "`precision` must have size 1. not size 2") + expect_error(decimal128(4, 2:3), "`scale` must have size 1. not size 2") expect_r6_class(decimal256(4, 2), "Decimal256Type") @@ -408,6 +409,8 @@ test_that("decimal type and validation", { expect_error(decimal256(4, "two"), '`scale` must be an integer') expect_error(decimal256(NA, 2), '`precision` must be an integer') expect_error(decimal256(4, NA), '`scale` must be an integer') + expect_error(decimal256(3:4, NA), "`precision` must have size 1. not size 2") + expect_error(decimal256(4, 2:3), "`scale` must have size 1. not size 2") }) test_that("Binary", { From 7cf85db9b0881e9a444dbb0cd427e60ae90574a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 7 Dec 2021 13:59:51 +0000 Subject: [PATCH 07/33] added test on type and dplyr funcs type --- r/tests/testthat/test-dplyr-funcs-type.R | 17 ++++++++++++++--- r/tests/testthat/test-type.R | 4 ++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 22696eb2ae6..c5fd83cb0f4 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -209,39 +209,50 @@ test_that("type checks with is() giving Arrow types", { i32 = Array$create(1, int32()), dec = Array$create(pi)$cast(decimal(3, 2)), dec128 = Array$create(pi)$cast(decimal128(3, 2)), + dec256 = Array$create(pi)$cast(decimal256(3, 2)), f64 = Array$create(1.1, float64()), str = Array$create("a", arrow::string()) ) %>% transmute( i32_is_i32 = is(i32, int32()), i32_is_dec = is(i32, decimal(3, 2)), i32_is_dec128 = is(i32, decimal128(3, 2)), + i32_is_dec256 = is(i32, decimal256(3, 2)), i32_is_i64 = is(i32, float64()), i32_is_str = is(i32, arrow::string()), dec_is_i32 = is(dec, int32()), dec_is_dec = is(dec, decimal(3, 2)), dec_is_dec128 = is(dec, decimal128(3, 2)), + dec_is_dec256 = is(dec, decimal256(3, 2)), dec_is_i64 = is(dec, float64()), dec_is_str = is(dec, arrow::string()), dec128_is_i32 = is(dec128, int32()), dec128_is_dec128 = is(dec128, decimal128(3, 2)), + dec128_is_dec256 = is(dec128, decimal256(3, 2)), dec128_is_i64 = is(dec128, float64()), dec128_is_str = is(dec128, arrow::string()), + dec256_is_i32 = is(dec128, int32()), + dec256_is_dec128 = is(dec128, decimal128(3, 2)), + dec256_is_dec256 = is(dec128, decimal256(3, 2)), + dec256_is_i64 = is(dec128, float64()), + dec256_is_str = is(dec128, arrow::string()), f64_is_i32 = is(f64, int32()), f64_is_dec = is(f64, decimal(3, 2)), f64_is_dec128 = is(f64, decimal128(3, 2)), + f64_is_dec256 = is(f64, decimal256(3, 2)), f64_is_i64 = is(f64, float64()), f64_is_str = is(f64, arrow::string()), str_is_i32 = is(str, int32()), str_is_dec128 = is(str, decimal128(3, 2)), + str_is_dec256 = is(str, decimal256(3, 2)), str_is_i64 = is(str, float64()), str_is_str = is(str, arrow::string()) ) %>% collect() %>% t() %>% as.vector(), - c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, - TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, - TRUE) + c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, + FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, + FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE) ) # with class2=string expect_equal( diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 8046ebbaa57..64a305f8eb8 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -164,6 +164,10 @@ test_that("Type strings are correctly canonicalized", { canonical_type_str("decimal128"), sub("^([^([<]+).*$", "\\1", decimal128(3, 2)$ToString()) ) + expect_equal( + canonical_type_str("decimal256"), + sub("^([^([<]+).*$", "\\1", decimal256(3, 2)$ToString()) + ) expect_equal( canonical_type_str("struct"), sub("^([^([<]+).*$", "\\1", struct(foo = int32())$ToString()) From c0bd2793fe7b59975354e509d2e2f2a3ae592021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 7 Dec 2021 14:22:21 +0000 Subject: [PATCH 08/33] updated `decimal()` to call either `decimal128()` or `decimal256()` based on precision + docs --- r/R/type.R | 27 +++++++++++++++++++-------- r/man/data-type.Rd | 17 ++++++++++------- r/src/array_to_vector.cpp | 11 ++++++++--- r/tests/testthat/test-chunked-array.R | 2 +- 4 files changed, 38 insertions(+), 19 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index 85d70a79e36..f2155f4940a 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -200,7 +200,9 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' negative, `scale` causes the number to be expressed using scientific notation #' and power of 10. #' -#' `decimal()` is identical to `decimal128()`, defined for backward compatibility. +#' `decimal()` creates either a `decimal128` or a `decimal256` depending on the +#' `precision`. If the `precision` is greater than 38 a `decimal256` is returned, +#' otherwise a `decimal128`. #' Use `decimal128()` as the name is more informative and `decimal()` might be #' deprecated in the future. #' @@ -214,12 +216,13 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' @param timezone For `timestamp()`, an optional time zone string. #' @param byte_width byte width for `FixedSizeBinary` type. #' @param list_size list size for `FixedSizeList` type. -#' @param precision For `decimal()`, `decimal128()` the number of significant -#' digits the arrow `decimal` type can represent. The maximum precision for -#' `decimal()` and `decimal128()` is 38 significant digits, while for -#' `decimal256()` it is 76 digits. -#' @param scale For `decimal()` and `decimal128()`, the number of digits after -#' the decimal point. It can be negative. +#' @param precision For `decimal()`, `decimal128()`, and `decimal256()` the +#' number of significant digits the arrow `decimal` type can represent. The +#' maximum precision for `decimal128()` is 38 significant digits, while for +#' `decimal256()` it is 76 digits. `decimal()` will use it to choose which +#' type of decimal to return. +#' @param scale For `decimal()`, `decimal128()`, and `decimal256()` the number +#' of digits after the decimal point. It can be negative. #' @param type For `list_of()`, a data type to make a list-of-type #' @param ... For `struct()`, a named list of types to define the struct columns #' @@ -392,7 +395,15 @@ decimal128 <- function(precision, scale) { #' @rdname data-type #' @export -decimal <- decimal128 +decimal <- function(precision, scale) { + args <- check_decimal_args(precision, scale) + + if (args$precision > 38) { + decimal256(args$precision, args$scale) + } else { + decimal128(args$precision, args$scale) + } +} #' @rdname data-type #' @export diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 32bdb64ad4d..7257631fd1b 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -116,13 +116,14 @@ take any of those four values.} \item{timezone}{For \code{timestamp()}, an optional time zone string.} -\item{precision}{For \code{decimal()}, \code{decimal128()} the number of significant -digits the arrow \code{decimal} type can represent. The maximum precision for -\code{decimal()} and \code{decimal128()} is 38 significant digits, while for -\code{decimal256()} it is 76 digits.} +\item{precision}{For \code{decimal()}, \code{decimal128()}, and \code{decimal256()} the +number of significant digits the arrow \code{decimal} type can represent. The +maximum precision for \code{decimal128()} is 38 significant digits, while for +\code{decimal256()} it is 76 digits. \code{decimal()} will use it to choose which +type of decimal to return.} -\item{scale}{For \code{decimal()} and \code{decimal128()}, the number of digits after -the decimal point. It can be negative.} +\item{scale}{For \code{decimal()}, \code{decimal128()}, and \code{decimal256()} the number +of digits after the decimal point. It can be negative.} \item{...}{For \code{struct()}, a named list of types to define the struct columns} @@ -176,7 +177,9 @@ The \code{scale} can be thought of as an argument that controls rounding. When negative, \code{scale} causes the number to be expressed using scientific notation and power of 10. -\code{decimal()} is identical to \code{decimal128()}, defined for backward compatibility. +\code{decimal()} creates either a \code{decimal128} or a \code{decimal256} depending on the +\code{precision}. If the \code{precision} is greater than 38 a \code{decimal256} is returned, +otherwise a \code{decimal128}. Use \code{decimal128()} as the name is more informative and \code{decimal()} might be deprecated in the future. diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 3ac32d7c395..cf32bffbc1a 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -905,6 +905,7 @@ class Converter_Timestamp : public Converter_Time { } }; +template class Converter_Decimal : public Converter { public: explicit Converter_Decimal(const std::shared_ptr& chunked_array) @@ -919,8 +920,9 @@ class Converter_Decimal : public Converter { Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, R_xlen_t start, R_xlen_t n, size_t chunk_index) const { + using DecimalArray = typename TypeTraits::ArrayType; auto p_data = REAL(data) + start; - const auto& decimals_arr = checked_cast(*array); + const auto& decimals_arr = checked_cast(*array); auto ingest_one = [&](R_xlen_t i) { p_data[i] = std::stod(decimals_arr.FormatValue(i).c_str()); @@ -1217,7 +1219,10 @@ std::shared_ptr Converter::Make( } case Type::DECIMAL128: - return std::make_shared(chunked_array); + return std::make_shared>(chunked_array); + + case Type::DECIMAL256: + return std::make_shared>(chunked_array); // nested case Type::STRUCT: @@ -1245,7 +1250,7 @@ std::shared_ptr Converter::Make( break; } - cpp11::stop("cannot handle Array of type ", type->name().c_str()); + cpp11::stop("cannot handle Array of type <%s>", type->name().c_str()); } std::shared_ptr to_chunks(const std::shared_ptr& array) { diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 73e536514a2..95968b5d4e3 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -206,7 +206,7 @@ test_that("ChunkedArray supports empty arrays (ARROW-13761)", { int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64(), float32(), float64(), timestamp("ns"), binary(), large_binary(), fixed_size_binary(32), date32(), date64(), - decimal128(4, 2), dictionary(), struct(x = int32()) + decimal128(4, 2), decimal256(4, 2), dictionary(), struct(x = int32()) ) empty_filter <- ChunkedArray$create(type = bool()) From 1cc6b20d28d91bb75a94d9fb8cdfd7c3e5ace2de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 7 Dec 2021 14:54:22 +0000 Subject: [PATCH 09/33] updated NEWS with changes to `decimal()` --- r/NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/NEWS.md b/r/NEWS.md index 22bc8f0472a..458993e714d 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,12 +19,12 @@ # arrow 6.0.1.9000 -* Added `decimal256()` +* Added `decimal256()`. Updated `decimal()`, which now calls `decimal256()` or `decimal128()` based on the value of the `precision` argument. * updated `write_csv_arrow()` to follow the signature of `readr::write_csv()`. The following arguments are supported: * `file` identical to `sink` * `col_names` identical to `include_header` * other arguments are currently unsupported, but the function errors with a meaningful message. -* Added `decimal128()` (identical to `decimal()`) as the name is more explicit and updated docs to encourage its use. +* Added `decimal128()` (~~identical to `decimal()`~~) as the name is more explicit and updated docs to encourage its use. # arrow 6.0.1 From 619a3d60d9ccc30e84511d826ecfe4a5f04301f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 7 Dec 2021 14:59:53 +0000 Subject: [PATCH 10/33] updated `decimal` docs & tests --- r/R/type.R | 19 ++++++++++--------- r/man/data-type.Rd | 19 ++++++++++--------- r/tests/testthat/test-data-type.R | 7 +++++-- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index f2155f4940a..7d0d0c92e73 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -184,7 +184,7 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' `bit64::integer64` object) by setting `options(arrow.int64_downcast = #' FALSE)`. #' -#' `decimal128()` creates a `decimal128` type. Arrow decimals are fixed-point +#' `decimal128()` creates a `Decimal128Type`. Arrow decimals are fixed-point #' decimal numbers encoded as a scalar integer. The `precision` is the number of #' significant digits that the decimal type can represent; the `scale` is the #' number of digits after the decimal point. For example, the number 1234.567 @@ -200,16 +200,17 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' negative, `scale` causes the number to be expressed using scientific notation #' and power of 10. #' -#' `decimal()` creates either a `decimal128` or a `decimal256` depending on the -#' `precision`. If the `precision` is greater than 38 a `decimal256` is returned, -#' otherwise a `decimal128`. -#' Use `decimal128()` as the name is more informative and `decimal()` might be -#' deprecated in the future. -#' -#' `decimal256()` creates a `decimal256` type, which allows for higher maximum -#' precision. For most use cases, the maximum precision offered by `decimal128` +#' `decimal256()` creates a `Decimal256Type`, which allows for higher maximum +#' precision. For most use cases, the maximum precision offered by `Decimal128Type` #' is sufficient, and it will result in a more compact and more efficient encoding. #' +#' #' `decimal()` creates either a `Decimal128Type` or a `Decimal256Type` +#' depending on the `precision` value. If the `precision` is greater than 38 a +#' `Decimal256Type` is returned, otherwise a `Decimal128Type`. +#' +#' Use `decimal128()` or `decimal256()` as the names are more informative than +#' `decimal()`. +#' #' @param unit For time/timestamp types, the time unit. `time32()` can take #' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can #' take any of those four values. diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 7257631fd1b..f872c6ddbfd 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -161,7 +161,7 @@ are translated to R objects, \code{uint32} and \code{uint64} are converted to \c types, this conversion can be disabled (so that \code{int64} always yields a \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}. -\code{decimal128()} creates a \code{decimal128} type. Arrow decimals are fixed-point +\code{decimal128()} creates a \code{Decimal128Type}. Arrow decimals are fixed-point decimal numbers encoded as a scalar integer. The \code{precision} is the number of significant digits that the decimal type can represent; the \code{scale} is the number of digits after the decimal point. For example, the number 1234.567 @@ -177,15 +177,16 @@ The \code{scale} can be thought of as an argument that controls rounding. When negative, \code{scale} causes the number to be expressed using scientific notation and power of 10. -\code{decimal()} creates either a \code{decimal128} or a \code{decimal256} depending on the -\code{precision}. If the \code{precision} is greater than 38 a \code{decimal256} is returned, -otherwise a \code{decimal128}. -Use \code{decimal128()} as the name is more informative and \code{decimal()} might be -deprecated in the future. - -\code{decimal256()} creates a \code{decimal256} type, which allows for higher maximum -precision. For most use cases, the maximum precision offered by \code{decimal128} +\code{decimal256()} creates a \code{Decimal256Type}, which allows for higher maximum +precision. For most use cases, the maximum precision offered by \code{Decimal128Type} is sufficient, and it will result in a more compact and more efficient encoding. + +#' \code{decimal()} creates either a \code{Decimal128Type} or a \code{Decimal256Type} +depending on the \code{precision}. If the \code{precision} is greater than 38 a +\code{Decimal256Type} is returned, otherwise a \code{Decimal128Type}. + +Use \code{decimal128()} or \code{decimal256()} as the names are more informative than +\code{decimal()}. } \examples{ \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index 98c54cd36d4..a6907fa5494 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -386,15 +386,18 @@ test_that("DictionaryType validation", { test_that("decimal type and validation", { expect_r6_class(decimal(4, 2), "Decimal128Type") + expect_r6_class(decimal(39, 2), "Decimal256Type") expect_error(decimal("four"), '`precision` must be an integer') expect_error(decimal(4, "two"), '`scale` must be an integer') expect_error(decimal(NA, 2), '`precision` must be an integer') expect_error(decimal(4, NA), '`scale` must be an integer') - # decimal() is just an alias for decimal128() for backwards compatibility + # decimal() creates either decimal128 or decimal256 based on precision + expect_identical(class(decimal(38, 2)), class(decimal128(38, 2))) + expect_identical(class(decimal(39, 2)), class(decimal256(38, 2))) + expect_r6_class(decimal128(4, 2), "Decimal128Type") - expect_identical(class(decimal(2, 4)), class(decimal128(2, 4))) expect_error(decimal128("four"), '`precision` must be an integer') expect_error(decimal128(4, "two"), '`scale` must be an integer') From 6162dee173be48c2d12db076b93e09bc3f7c0959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 8 Dec 2021 09:49:23 +0000 Subject: [PATCH 11/33] docs + lintr complaints --- r/R/type.R | 6 +++--- r/man/data-type.Rd | 2 +- r/tests/testthat/test-data-type.R | 24 ++++++++++++------------ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index 7d0d0c92e73..06c1c81d05c 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -205,7 +205,7 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' is sufficient, and it will result in a more compact and more efficient encoding. #' #' #' `decimal()` creates either a `Decimal128Type` or a `Decimal256Type` -#' depending on the `precision` value. If the `precision` is greater than 38 a +#' depending on the `precision` value. If `precision` is greater than 38 a #' `Decimal256Type` is returned, otherwise a `Decimal128Type`. #' #' Use `decimal128()` or `decimal256()` as the names are more informative than @@ -418,14 +418,14 @@ check_decimal_args <- function(precision, scale) { precision <- vec_cast(precision, to = integer()) vctrs::vec_assert(precision, size = 1L) } else { - stop('`precision` must be an integer', call. = FALSE) + stop("`precision` must be an integer", call. = FALSE) } if (is.numeric(scale)) { scale <- vec_cast(scale, to = integer()) vctrs::vec_assert(scale, size = 1L) } else { - stop('`scale` must be an integer', call. = FALSE) + stop("`scale` must be an integer", call. = FALSE) } list(precision = precision, scale = scale) diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index f872c6ddbfd..689970d1b6d 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -182,7 +182,7 @@ precision. For most use cases, the maximum precision offered by \code{Decimal128 is sufficient, and it will result in a more compact and more efficient encoding. #' \code{decimal()} creates either a \code{Decimal128Type} or a \code{Decimal256Type} -depending on the \code{precision}. If the \code{precision} is greater than 38 a +depending on the \code{precision} value. If \code{precision} is greater than 38 a \code{Decimal256Type} is returned, otherwise a \code{Decimal128Type}. Use \code{decimal128()} or \code{decimal256()} as the names are more informative than diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index a6907fa5494..d932b81e7bd 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -388,10 +388,10 @@ test_that("decimal type and validation", { expect_r6_class(decimal(4, 2), "Decimal128Type") expect_r6_class(decimal(39, 2), "Decimal256Type") - expect_error(decimal("four"), '`precision` must be an integer') - expect_error(decimal(4, "two"), '`scale` must be an integer') - expect_error(decimal(NA, 2), '`precision` must be an integer') - expect_error(decimal(4, NA), '`scale` must be an integer') + expect_error(decimal("four"), "`precision` must be an integer") + expect_error(decimal(4, "two"), "`scale` must be an integer") + expect_error(decimal(NA, 2), "`precision` must be an integer") + expect_error(decimal(4, NA), "`scale` must be an integer") # decimal() creates either decimal128 or decimal256 based on precision expect_identical(class(decimal(38, 2)), class(decimal128(38, 2))) @@ -399,19 +399,19 @@ test_that("decimal type and validation", { expect_r6_class(decimal128(4, 2), "Decimal128Type") - expect_error(decimal128("four"), '`precision` must be an integer') - expect_error(decimal128(4, "two"), '`scale` must be an integer') - expect_error(decimal128(NA, 2), '`precision` must be an integer') - expect_error(decimal128(4, NA), '`scale` must be an integer') + expect_error(decimal128("four"), "`precision` must be an integer") + expect_error(decimal128(4, "two"), "`scale` must be an integer") + expect_error(decimal128(NA, 2), "`precision` must be an integer") + expect_error(decimal128(4, NA), "`scale` must be an integer") expect_error(decimal128(3:4, NA), "`precision` must have size 1. not size 2") expect_error(decimal128(4, 2:3), "`scale` must have size 1. not size 2") expect_r6_class(decimal256(4, 2), "Decimal256Type") - expect_error(decimal256("four"), '`precision` must be an integer') - expect_error(decimal256(4, "two"), '`scale` must be an integer') - expect_error(decimal256(NA, 2), '`precision` must be an integer') - expect_error(decimal256(4, NA), '`scale` must be an integer') + expect_error(decimal256("four"), "`precision` must be an integer") + expect_error(decimal256(4, "two"), "`scale` must be an integer") + expect_error(decimal256(NA, 2), "`precision` must be an integer") + expect_error(decimal256(4, NA), "`scale` must be an integer") expect_error(decimal256(3:4, NA), "`precision` must have size 1. not size 2") expect_error(decimal256(4, 2:3), "`scale` must have size 1. not size 2") }) From c623389ca45ff6051049f9e6358ff27782868825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 8 Dec 2021 09:54:46 +0000 Subject: [PATCH 12/33] news change --- r/NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/NEWS.md b/r/NEWS.md index 458993e714d..49bc79417ea 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -24,7 +24,7 @@ * `file` identical to `sink` * `col_names` identical to `include_header` * other arguments are currently unsupported, but the function errors with a meaningful message. -* Added `decimal128()` (~~identical to `decimal()`~~) as the name is more explicit and updated docs to encourage its use. +* Added `decimal128()` (identical to `decimal()`) as the name is more explicit and updated docs to encourage its use. # arrow 6.0.1 From e9c40bc45e6ac213d5b849c9df4b281dcfb329d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 8 Dec 2021 10:13:51 +0000 Subject: [PATCH 13/33] catch `RPrimitiveConverter` error and then cast --- r/R/array.R | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/r/R/array.R b/r/R/array.R index 65fb450f8de..2a367e70085 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -187,7 +187,18 @@ Array$create <- function(x, type = NULL) { } return(out) } - vec_to_Array(x, type) + tryCatch( + vec_to_Array(x, type), + error = function(cnd) { + if (!is.null(type)) { + # try again and then cast + vec_to_Array(x, NULL)$cast(type) + } else { + signalCondition(cnd) + } + } + ) + } #' @include arrowExports.R Array$import_from_c <- ImportArray From 36f566f2f9f9afb7b973351ff900455f900988b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 8 Dec 2021 10:59:43 +0000 Subject: [PATCH 14/33] reoder decimal definitions --- r/R/type.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index 06c1c81d05c..bb7d823f39c 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -394,6 +394,13 @@ decimal128 <- function(precision, scale) { Decimal128Type__initialize(args$precision, args$scale) } +#' @rdname data-type +#' @export +decimal256 <- function(precision, scale) { + args <- check_decimal_args(precision, scale) + Decimal256Type__initialize(args$precision, args$scale) +} + #' @rdname data-type #' @export decimal <- function(precision, scale) { @@ -406,13 +413,6 @@ decimal <- function(precision, scale) { } } -#' @rdname data-type -#' @export -decimal256 <- function(precision, scale) { - args <- check_decimal_args(precision, scale) - Decimal256Type__initialize(args$precision, args$scale) -} - check_decimal_args <- function(precision, scale) { if (is.numeric(precision)) { precision <- vec_cast(precision, to = integer()) From 9d0b317ada07c9b9ecae39a15b1637a44921cd7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 8 Dec 2021 11:01:39 +0000 Subject: [PATCH 15/33] reoder decimals take 2 --- r/R/type.R | 22 +++++++++++----------- r/man/data-type.Rd | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index bb7d823f39c..6eaa38352c0 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -389,28 +389,28 @@ timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone = "") { #' @rdname data-type #' @export -decimal128 <- function(precision, scale) { +decimal <- function(precision, scale) { args <- check_decimal_args(precision, scale) - Decimal128Type__initialize(args$precision, args$scale) + + if (args$precision > 38) { + decimal256(args$precision, args$scale) + } else { + decimal128(args$precision, args$scale) + } } #' @rdname data-type #' @export -decimal256 <- function(precision, scale) { +decimal128 <- function(precision, scale) { args <- check_decimal_args(precision, scale) - Decimal256Type__initialize(args$precision, args$scale) + Decimal128Type__initialize(args$precision, args$scale) } #' @rdname data-type #' @export -decimal <- function(precision, scale) { +decimal256 <- function(precision, scale) { args <- check_decimal_args(precision, scale) - - if (args$precision > 38) { - decimal256(args$precision, args$scale) - } else { - decimal128(args$precision, args$scale) - } + Decimal256Type__initialize(args$precision, args$scale) } check_decimal_args <- function(precision, scale) { diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 689970d1b6d..314f285d5c7 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -29,8 +29,8 @@ \alias{time64} \alias{null} \alias{timestamp} -\alias{decimal128} \alias{decimal} +\alias{decimal128} \alias{decimal256} \alias{struct} \alias{list_of} @@ -93,10 +93,10 @@ null() timestamp(unit = c("s", "ms", "us", "ns"), timezone = "") -decimal128(precision, scale) - decimal(precision, scale) +decimal128(precision, scale) + decimal256(precision, scale) struct(...) From f8bfacf41b5a1856c155328e1bc98d12a7a6e3b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 8 Dec 2021 14:12:00 +0000 Subject: [PATCH 16/33] attempt to return original error if casting fails --- r/R/array.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/r/R/array.R b/r/R/array.R index 2a367e70085..02c7593d164 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -192,7 +192,12 @@ Array$create <- function(x, type = NULL) { error = function(cnd) { if (!is.null(type)) { # try again and then cast - vec_to_Array(x, NULL)$cast(type) + tryCatch( + vec_to_Array(x, NULL)$cast(type), + error = function(cnd2) { + signalCondition(cnd2) + } + ) } else { signalCondition(cnd) } From d476428e19ecf14e8b7330706f9bd0d09bb5bed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 8 Dec 2021 14:13:09 +0000 Subject: [PATCH 17/33] revert to `cnd` (not `cnd2`) --- r/R/array.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/array.R b/r/R/array.R index 02c7593d164..456893418ad 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -195,7 +195,7 @@ Array$create <- function(x, type = NULL) { tryCatch( vec_to_Array(x, NULL)$cast(type), error = function(cnd2) { - signalCondition(cnd2) + signalCondition(cnd) } ) } else { From 307195d4d038f3cc3f98cf51df1376524aed1232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 8 Dec 2021 15:30:13 +0000 Subject: [PATCH 18/33] update array$create to correctly capture and return the initial error message if the casting fails --- r/R/array.R | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/r/R/array.R b/r/R/array.R index 456893418ad..41e422a8174 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -187,20 +187,23 @@ Array$create <- function(x, type = NULL) { } return(out) } + + if (is.null(type)) { + return(vec_to_Array(x, type)) + } + + # a type is given, this needs more care tryCatch( vec_to_Array(x, type), error = function(cnd) { - if (!is.null(type)) { - # try again and then cast - tryCatch( - vec_to_Array(x, NULL)$cast(type), - error = function(cnd2) { - signalCondition(cnd) - } - ) - } else { - signalCondition(cnd) - } + tryCatch( + vec_to_Array(x, NULL)$cast(type), + error = function(cnd2) { + # the casting approach failed, + # so just rethrow the original error + stop(conditionMessage(cnd), call. = FALSE) + } + ) } ) From 36b996d92bfbe7eb1a4dfd08f634a18412cc5361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 9 Dec 2021 12:56:01 +0000 Subject: [PATCH 19/33] docs + skipped the `decimal256()` unit test for empty ChunkedArrays --- r/R/type.R | 2 +- r/man/data-type.Rd | 2 +- r/tests/testthat/test-chunked-array.R | 25 ++++++++++++++++++++++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index 6eaa38352c0..eb71cd7432e 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -205,7 +205,7 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' is sufficient, and it will result in a more compact and more efficient encoding. #' #' #' `decimal()` creates either a `Decimal128Type` or a `Decimal256Type` -#' depending on the `precision` value. If `precision` is greater than 38 a +#' depending on the value for `precision`. If `precision` is greater than 38 a #' `Decimal256Type` is returned, otherwise a `Decimal128Type`. #' #' Use `decimal128()` or `decimal256()` as the names are more informative than diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 314f285d5c7..a8e653ae819 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -182,7 +182,7 @@ precision. For most use cases, the maximum precision offered by \code{Decimal128 is sufficient, and it will result in a more compact and more efficient encoding. #' \code{decimal()} creates either a \code{Decimal128Type} or a \code{Decimal256Type} -depending on the \code{precision} value. If \code{precision} is greater than 38 a +depending on the value for \code{precision}. If \code{precision} is greater than 38 a \code{Decimal256Type} is returned, otherwise a \code{Decimal128Type}. Use \code{decimal128()} or \code{decimal256()} as the names are more informative than diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 95968b5d4e3..e359b13f9e5 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -206,7 +206,8 @@ test_that("ChunkedArray supports empty arrays (ARROW-13761)", { int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64(), float32(), float64(), timestamp("ns"), binary(), large_binary(), fixed_size_binary(32), date32(), date64(), - decimal128(4, 2), decimal256(4, 2), dictionary(), struct(x = int32()) + decimal128(4, 2), #decimal256(4, 2), # not yet supported - see below + dictionary(), struct(x = int32()) ) empty_filter <- ChunkedArray$create(type = bool()) @@ -228,6 +229,28 @@ test_that("ChunkedArray supports empty arrays (ARROW-13761)", { expect_identical(length(as.vector(zero_empty_chunks)), 1L) } } + + skip("array_filter has no kernel matching input types `(array[decimal256(4, 2)], array[bool])`") + # once the above Jira is addressed decimal256() can be moved together with the + # other types. this chunk can be deleted once decimal256() is added to types + type <- decimal256(4, 2) + empty_filter <- ChunkedArray$create(type = bool()) + one_empty_chunk <- ChunkedArray$create(type = type) + expect_type_equal(one_empty_chunk$type, type) + if (type != struct(x = int32())) { + expect_identical(length(one_empty_chunk), length(as.vector(one_empty_chunk))) + } else { + # struct -> tbl and length(tbl) is num_columns instead of num_rows + expect_identical(length(as.vector(one_empty_chunk)), 1L) + } + zero_empty_chunks <- one_empty_chunk$Filter(empty_filter) + expect_equal(zero_empty_chunks$num_chunks, 0) + expect_type_equal(zero_empty_chunks$type, type) + if (type != struct(x = int32())) { + expect_identical(length(zero_empty_chunks), length(as.vector(zero_empty_chunks))) + } else { + expect_identical(length(as.vector(zero_empty_chunks)), 1L) + } }) test_that("integer types casts for ChunkedArray (ARROW-3741)", { From ea19fd327abb08391ad0a9eaab9582e5117acd37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 9 Dec 2021 12:56:55 +0000 Subject: [PATCH 20/33] `Array$create()` tries casting and gives the user a hint if it works --- r/R/array.R | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/r/R/array.R b/r/R/array.R index 41e422a8174..0fa7fa2ff00 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -196,18 +196,15 @@ Array$create <- function(x, type = NULL) { tryCatch( vec_to_Array(x, type), error = function(cnd) { - tryCatch( - vec_to_Array(x, NULL)$cast(type), - error = function(cnd2) { - # the casting approach failed, - # so just rethrow the original error - stop(conditionMessage(cnd), call. = FALSE) - } - ) + attempt <- try(vec_to_Array(x, NULL)$cast(type), silent = TRUE) + abort( + c(conditionMessage(cnd), + i = if (!inherits(attempt, "try-error")) "You might want to try casting manually. `Array$create(...)$cast(...)` seems to work.") + ) } ) - } + #' @include arrowExports.R Array$import_from_c <- ImportArray From 20f054f2c3743e55abb77454dbda407ecd079943 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 9 Dec 2021 13:02:06 +0000 Subject: [PATCH 21/33] slightly improved hint --- r/R/array.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/array.R b/r/R/array.R index 0fa7fa2ff00..cbe4241533d 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -199,7 +199,7 @@ Array$create <- function(x, type = NULL) { attempt <- try(vec_to_Array(x, NULL)$cast(type), silent = TRUE) abort( c(conditionMessage(cnd), - i = if (!inherits(attempt, "try-error")) "You might want to try casting manually. `Array$create(...)$cast(...)` seems to work.") + i = if (!inherits(attempt, "try-error")) "You might want to try casting manually with `Array$create(...)$cast(...)`.") ) } ) From c4442fbc6eb5fa44db7f8fc8c1daeec953777134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 9 Dec 2021 13:05:10 +0000 Subject: [PATCH 22/33] lint --- r/R/array.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/r/R/array.R b/r/R/array.R index cbe4241533d..649c119d7ff 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -199,8 +199,11 @@ Array$create <- function(x, type = NULL) { attempt <- try(vec_to_Array(x, NULL)$cast(type), silent = TRUE) abort( c(conditionMessage(cnd), - i = if (!inherits(attempt, "try-error")) "You might want to try casting manually with `Array$create(...)$cast(...)`.") + i = if (!inherits(attempt, "try-error")) { + "You might want to try casting manually with `Array$create(...)$cast(...)`." + } ) + ) } ) } From 84879afdadc5df9cc526db550bba6b8ddcd14433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 9 Dec 2021 20:26:07 +0000 Subject: [PATCH 23/33] removing the unused DecimalArray --- r/src/array_to_vector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index cf32bffbc1a..1e39be3553b 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -920,7 +920,7 @@ class Converter_Decimal : public Converter { Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, R_xlen_t start, R_xlen_t n, size_t chunk_index) const { - using DecimalArray = typename TypeTraits::ArrayType; + //using DecimalArray = typename TypeTraits::ArrayType; auto p_data = REAL(data) + start; const auto& decimals_arr = checked_cast(*array); From 7eb138b154b0794d0a693a4211330766ae08ad5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 9 Dec 2021 20:35:39 +0000 Subject: [PATCH 24/33] removed the line as the C++ linter doesn't like commented code --- r/src/array_to_vector.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 1e39be3553b..0f0d988ec80 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -920,7 +920,6 @@ class Converter_Decimal : public Converter { Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, R_xlen_t start, R_xlen_t n, size_t chunk_index) const { - //using DecimalArray = typename TypeTraits::ArrayType; auto p_data = REAL(data) + start; const auto& decimals_arr = checked_cast(*array); From 068755f50d05a362675f3e2082a717853b76f4fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 10 Dec 2021 10:15:31 +0000 Subject: [PATCH 25/33] add back in the definition for DecimalArray and use it --- r/src/array_to_vector.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 0f0d988ec80..f381bf086d8 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -920,8 +920,9 @@ class Converter_Decimal : public Converter { Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, R_xlen_t start, R_xlen_t n, size_t chunk_index) const { + using DecimalArray = typename TypeTraits::ArrayType; auto p_data = REAL(data) + start; - const auto& decimals_arr = checked_cast(*array); + const auto& decimals_arr = checked_cast(*array); auto ingest_one = [&](R_xlen_t i) { p_data[i] = std::stod(decimals_arr.FormatValue(i).c_str()); From 1f02407029d9fc40ecbc8c0c28ac7a4ee5791341 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 15 Dec 2021 14:51:05 +0000 Subject: [PATCH 26/33] add tests for the `decimal` precision range --- r/tests/testthat/test-data-type.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index d932b81e7bd..12815ebe16d 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -392,6 +392,8 @@ test_that("decimal type and validation", { expect_error(decimal(4, "two"), "`scale` must be an integer") expect_error(decimal(NA, 2), "`precision` must be an integer") expect_error(decimal(4, NA), "`scale` must be an integer") + expect_error(decimal(0, 2), "Invalid: Decimal precision out of range [1, 38]: 0", fixed = TRUE) + expect_error(decimal(100, 2), "Invalid: Decimal precision out of range [1, 76]: 100", fixed = TRUE) # decimal() creates either decimal128 or decimal256 based on precision expect_identical(class(decimal(38, 2)), class(decimal128(38, 2))) @@ -405,6 +407,9 @@ test_that("decimal type and validation", { expect_error(decimal128(4, NA), "`scale` must be an integer") expect_error(decimal128(3:4, NA), "`precision` must have size 1. not size 2") expect_error(decimal128(4, 2:3), "`scale` must have size 1. not size 2") + expect_error(decimal128(0, 2), "Invalid: Decimal precision out of range [1, 38]: 0", fixed = TRUE) + expect_error(decimal128(100, 2), "Invalid: Decimal precision out of range [1, 38]: 100", fixed = TRUE) + expect_r6_class(decimal256(4, 2), "Decimal256Type") @@ -414,6 +419,8 @@ test_that("decimal type and validation", { expect_error(decimal256(4, NA), "`scale` must be an integer") expect_error(decimal256(3:4, NA), "`precision` must have size 1. not size 2") expect_error(decimal256(4, 2:3), "`scale` must have size 1. not size 2") + expect_error(decimal256(0, 2), "Invalid: Decimal precision out of range [1, 76]: 0", fixed = TRUE) + expect_error(decimal256(100, 2), "Invalid: Decimal precision out of range [1, 76]: 100", fixed = TRUE) }) test_that("Binary", { From e9a27f9831b998a5c29991566fdf6afea5994a4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 20 Dec 2021 13:47:16 +0000 Subject: [PATCH 27/33] added unit test to check the manual casting hint --- r/tests/testthat/test-Array.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index ce23c260908..383d49e1fe2 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -795,6 +795,26 @@ test_that("Array$create() should have helpful error", { expect_error(Array$create(list()), "Requires at least one element to infer") expect_error(Array$create(list(lgl, lgl, int)), "Expecting a logical vector") expect_error(Array$create(list(char, num, char)), "Expecting a character vector") + + # hint at casting if direct fails and casting looks like it might work + expect_error( + Array$create(as.double(1:10), type = decimal(4, 2)), + "You might want to try casting manually" + ) + # until ARROW-15159 is solved casting from integer to decimal requires a + # precision of at least 12 + # precision < 12 => the attempt to cast errors and the original error message + # is shown + expect_error( + Array$create(1:10, type = decimal(11, 2)), + "NotImplemented: Extend" + ) + # precision >= 12 => the attempt succeeds and the error message now includes + # a hint to cast manually + expect_error( + Array$create(1:10, type = decimal(12, 2)), + "You might want to try casting manually" + ) }) test_that("Array$View() (ARROW-6542)", { From fb1c39a194738d85314066416a0ca50ddda2b082 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 20 Dec 2021 14:13:04 +0000 Subject: [PATCH 28/33] improve comment for the `tryCatch()` + `try()` attempts --- r/R/array.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/r/R/array.R b/r/R/array.R index 649c119d7ff..0e472207f54 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -192,7 +192,10 @@ Array$create <- function(x, type = NULL) { return(vec_to_Array(x, type)) } - # a type is given, this needs more care + # when a type is given, try to create a vector of the desired type. If that + # fails, attempt to cast and if casting is successful, suggest to the user + # to try casting manually. If the casting fails, return the original error + # message. tryCatch( vec_to_Array(x, type), error = function(cnd) { From 4622e226f23c38e32f93e3355a34f68ce33e5664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 20 Dec 2021 14:52:00 +0000 Subject: [PATCH 29/33] add TODO and reference the C++ unit tests Jira ticket (ARROW-15162) --- r/tests/testthat/test-data-type.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index 12815ebe16d..30a54796d98 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -392,6 +392,7 @@ test_that("decimal type and validation", { expect_error(decimal(4, "two"), "`scale` must be an integer") expect_error(decimal(NA, 2), "`precision` must be an integer") expect_error(decimal(4, NA), "`scale` must be an integer") + # TODO remove precision range tests below once functionality is tested in C++ (ARROW-15162) expect_error(decimal(0, 2), "Invalid: Decimal precision out of range [1, 38]: 0", fixed = TRUE) expect_error(decimal(100, 2), "Invalid: Decimal precision out of range [1, 76]: 100", fixed = TRUE) @@ -407,6 +408,7 @@ test_that("decimal type and validation", { expect_error(decimal128(4, NA), "`scale` must be an integer") expect_error(decimal128(3:4, NA), "`precision` must have size 1. not size 2") expect_error(decimal128(4, 2:3), "`scale` must have size 1. not size 2") + # TODO remove precision range tests below once functionality is tested in C++ (ARROW-15162) expect_error(decimal128(0, 2), "Invalid: Decimal precision out of range [1, 38]: 0", fixed = TRUE) expect_error(decimal128(100, 2), "Invalid: Decimal precision out of range [1, 38]: 100", fixed = TRUE) @@ -419,6 +421,7 @@ test_that("decimal type and validation", { expect_error(decimal256(4, NA), "`scale` must be an integer") expect_error(decimal256(3:4, NA), "`precision` must have size 1. not size 2") expect_error(decimal256(4, 2:3), "`scale` must have size 1. not size 2") + # TODO remove precision range tests below once functionality is tested in C++ (ARROW-15162) expect_error(decimal256(0, 2), "Invalid: Decimal precision out of range [1, 76]: 0", fixed = TRUE) expect_error(decimal256(100, 2), "Invalid: Decimal precision out of range [1, 76]: 100", fixed = TRUE) }) From 8ec06ede2a44d296ec8e7a3f06558f7939f3f913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 20 Dec 2021 15:01:30 +0000 Subject: [PATCH 30/33] removed superfluous unit tests --- r/tests/testthat/test-Array.R | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 383d49e1fe2..a4648912e48 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -801,20 +801,13 @@ test_that("Array$create() should have helpful error", { Array$create(as.double(1:10), type = decimal(4, 2)), "You might want to try casting manually" ) - # until ARROW-15159 is solved casting from integer to decimal requires a - # precision of at least 12 - # precision < 12 => the attempt to cast errors and the original error message - # is shown - expect_error( - Array$create(1:10, type = decimal(11, 2)), - "NotImplemented: Extend" - ) - # precision >= 12 => the attempt succeeds and the error message now includes - # a hint to cast manually + expect_error( Array$create(1:10, type = decimal(12, 2)), "You might want to try casting manually" ) + + }) test_that("Array$View() (ARROW-6542)", { From 9f9031ea2cb89a01c8fd2b5b3c91458f9134074f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 20 Dec 2021 16:37:28 +0000 Subject: [PATCH 31/33] added Jira ticket number to empty array filtering tests for `decimal256()` --- r/tests/testthat/test-chunked-array.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index e359b13f9e5..4cd8b5387be 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -206,7 +206,7 @@ test_that("ChunkedArray supports empty arrays (ARROW-13761)", { int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64(), float32(), float64(), timestamp("ns"), binary(), large_binary(), fixed_size_binary(32), date32(), date64(), - decimal128(4, 2), #decimal256(4, 2), # not yet supported - see below + decimal128(4, 2), #decimal256(4, 2), # un-comment once ARROW-15166 is solved dictionary(), struct(x = int32()) ) @@ -231,8 +231,9 @@ test_that("ChunkedArray supports empty arrays (ARROW-13761)", { } skip("array_filter has no kernel matching input types `(array[decimal256(4, 2)], array[bool])`") - # once the above Jira is addressed decimal256() can be moved together with the - # other types. this chunk can be deleted once decimal256() is added to types + # TODO once ARROW-15166 is addressed decimal256() can be moved together with + # the other types (separated to be able to skip). this whole chunk can be + # deleted type <- decimal256(4, 2) empty_filter <- ChunkedArray$create(type = bool()) one_empty_chunk <- ChunkedArray$create(type = type) From 6640e8c23bf7768561cd965fef30f4c7cb719ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 20 Dec 2021 19:02:47 +0000 Subject: [PATCH 32/33] test `Array$create()` fallback is as expected when casting doesn't work --- r/tests/testthat/test-Array.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index a4648912e48..6e30612b61d 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -807,7 +807,12 @@ test_that("Array$create() should have helpful error", { "You might want to try casting manually" ) - + a <- expect_error(Array$create("one", int32())) + b <- expect_error(vec_to_Array("one", int32())) + # the captured conditions (errors) are not identical, but their messages should be + expect_s3_class(a, "rlang_error") + expect_s3_class(b, "simpleError") + expect_identical(a$message, b$message) }) test_that("Array$View() (ARROW-6542)", { From db7148deaa847a0a7b862e47523bd3ee606492ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 20 Dec 2021 19:22:38 +0000 Subject: [PATCH 33/33] use `expect_equal` when comparing messages --- r/tests/testthat/test-Array.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 6e30612b61d..c4e3174b149 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -812,7 +812,7 @@ test_that("Array$create() should have helpful error", { # the captured conditions (errors) are not identical, but their messages should be expect_s3_class(a, "rlang_error") expect_s3_class(b, "simpleError") - expect_identical(a$message, b$message) + expect_equal(a$message, b$message) }) test_that("Array$View() (ARROW-6542)", {