diff --git a/r/NAMESPACE b/r/NAMESPACE index 31fd9cf6a87..df4f36408e3 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -214,6 +214,7 @@ export(dataset_factory) export(date32) export(date64) export(decimal) +export(decimal128) export(default_memory_pool) export(dictionary) export(ends_with) diff --git a/r/NEWS.md b/r/NEWS.md index 7666b49c7a7..67c34b3fcd1 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,6 +19,8 @@ # arrow 6.0.1.9000 +* Added `decimal128()` (identical to `decimal()`) as the name is more explicit and updated docs to encourage its use. + # arrow 6.0.1 * Joins now support inclusion of dictionary columns, and multiple crashes have been fixed diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 56e8810937a..ccd7ded3cca 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -201,7 +201,7 @@ nse_funcs$is.numeric <- function(x) { is.numeric(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c( "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE", - "DECIMAL", "DECIMAL256" + "DECIMAL128", "DECIMAL256" )]) } nse_funcs$is.double <- function(x) { diff --git a/r/R/enums.R b/r/R/enums.R index 4e69b7a190e..17d0484b997 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -65,7 +65,7 @@ Type <- enum("Type::type", TIME64 = 20L, INTERVAL_MONTHS = 21L, INTERVAL_DAY_TIME = 22L, - DECIMAL = 23L, + DECIMAL128 = 23L, DECIMAL256 = 24L, LIST = 25L, STRUCT = 26L, diff --git a/r/R/type.R b/r/R/type.R index ac3dcf3e95f..60f0045e514 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -181,33 +181,37 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' `bit64::integer64` object) by setting `options(arrow.int64_downcast = #' FALSE)`. #' -#' `decimal()` creates a `decimal128` type. Arrow decimals are fixed-point +#' `decimal128()` creates a `decimal128` type. Arrow decimals are fixed-point #' decimal numbers encoded as a scalar integer. The `precision` is the number of #' significant digits that the decimal type can represent; the `scale` is the #' number of digits after the decimal point. For example, the number 1234.567 #' has a precision of 7 and a scale of 3. Note that `scale` can be negative. #' -#' As an example, `decimal(7, 3)` can exactly represent the numbers 1234.567 and +#' As an example, `decimal128(7, 3)` can exactly represent the numbers 1234.567 and #' -1234.567 (encoded internally as the 128-bit integers 1234567 and -1234567, #' respectively), but neither 12345.67 nor 123.4567. #' -#' `decimal(5, -3)` can exactly represent the number 12345000 (encoded +#' `decimal128(5, -3)` can exactly represent the number 12345000 (encoded #' internally as the 128-bit integer 12345), but neither 123450000 nor 1234500. #' The `scale` can be thought of as an argument that controls rounding. When #' negative, `scale` causes the number to be expressed using scientific notation #' and power of 10. #' +#' `decimal()` is identical to `decimal128()`, defined for backward compatibility. +#' Use `decimal128()` as the name is more informative and `decimal()` might be +#' deprecated in the future. +#' #' @param unit For time/timestamp types, the time unit. `time32()` can take #' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can #' take any of those four values. #' @param timezone For `timestamp()`, an optional time zone string. #' @param byte_width byte width for `FixedSizeBinary` type. #' @param list_size list size for `FixedSizeList` type. -#' @param precision For `decimal()`, the number of significant digits -#' the arrow `decimal` type can represent. The maximum precision for -#' `decimal()` is 38 significant digits. -#' @param scale For `decimal()`, the number of digits after the decimal -#' point. It can be negative. +#' @param precision For `decimal()`, `decimal128()` the number of significant +#' digits the arrow `decimal` type can represent. The maximum precision for +#' `decimal()` and `decimal128()` is 38 significant digits. +#' @param scale For `decimal()` and `decimal128()`, the number of digits after +#' the decimal point. It can be negative. #' @param type For `list_of()`, a data type to make a list-of-type #' @param ... For `struct()`, a named list of types to define the struct columns #' @@ -373,7 +377,7 @@ timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone = "") { #' @rdname data-type #' @export -decimal <- function(precision, scale) { +decimal128 <- function(precision, scale) { if (is.numeric(precision)) { precision <- as.integer(precision) } else { @@ -387,6 +391,10 @@ decimal <- function(precision, scale) { Decimal128Type__initialize(precision, scale) } +#' @rdname data-type +#' @export +decimal <- decimal128 + StructType <- R6Class("StructType", inherit = NestedType, public = list( @@ -487,7 +495,7 @@ canonical_type_str <- function(type_str) { time64 = "time64", null = "null", timestamp = "timestamp", - decimal = "decimal128", + decimal128 = "decimal128", struct = "struct", list_of = "list", list = "list", diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 2b2313571b2..2b1c4bbff25 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -29,6 +29,7 @@ \alias{time64} \alias{null} \alias{timestamp} +\alias{decimal128} \alias{decimal} \alias{struct} \alias{list_of} @@ -91,6 +92,8 @@ null() timestamp(unit = c("s", "ms", "us", "ns"), timezone = "") +decimal128(precision, scale) + decimal(precision, scale) struct(...) @@ -110,12 +113,12 @@ take any of those four values.} \item{timezone}{For \code{timestamp()}, an optional time zone string.} -\item{precision}{For \code{decimal()}, the number of significant digits -the arrow \code{decimal} type can represent. The maximum precision for -\code{decimal()} is 38 significant digits.} +\item{precision}{For \code{decimal()}, \code{decimal128()} the number of significant +digits the arrow \code{decimal} type can represent. The maximum precision for +\code{decimal()} and \code{decimal128()} is 38 significant digits.} -\item{scale}{For \code{decimal()}, the number of digits after the decimal -point. It can be negative.} +\item{scale}{For \code{decimal()} and \code{decimal128()}, the number of digits after +the decimal point. It can be negative.} \item{...}{For \code{struct()}, a named list of types to define the struct columns} @@ -153,21 +156,25 @@ are translated to R objects, \code{uint32} and \code{uint64} are converted to \c types, this conversion can be disabled (so that \code{int64} always yields a \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}. -\code{decimal()} creates a \code{decimal128} type. Arrow decimals are fixed-point +\code{decimal128()} creates a \code{decimal128} type. Arrow decimals are fixed-point decimal numbers encoded as a scalar integer. The \code{precision} is the number of significant digits that the decimal type can represent; the \code{scale} is the number of digits after the decimal point. For example, the number 1234.567 has a precision of 7 and a scale of 3. Note that \code{scale} can be negative. -As an example, \code{decimal(7, 3)} can exactly represent the numbers 1234.567 and +As an example, \code{decimal128(7, 3)} can exactly represent the numbers 1234.567 and -1234.567 (encoded internally as the 128-bit integers 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567. -\code{decimal(5, -3)} can exactly represent the number 12345000 (encoded +\code{decimal128(5, -3)} can exactly represent the number 12345000 (encoded internally as the 128-bit integer 12345), but neither 123450000 nor 1234500. The \code{scale} can be thought of as an argument that controls rounding. When negative, \code{scale} causes the number to be expressed using scientific notation and power of 10. + +\code{decimal()} is identical to \code{decimal128()}, defined for backward compatibility. +Use \code{decimal128()} as the name is more informative and \code{decimal()} might be +deprecated in the future. } \examples{ \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 49f6d987f85..3ac32d7c395 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -1216,7 +1216,7 @@ std::shared_ptr Converter::Make( return std::make_shared(chunked_array); } - case Type::DECIMAL: + case Type::DECIMAL128: return std::make_shared(chunked_array); // nested diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index ffbe1ecc5e0..bfc6687ed5b 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -80,7 +80,7 @@ const char* r6_class_name::get( case Type::TIME64: return "Time64"; - case Type::DECIMAL: + case Type::DECIMAL128: return "Decimal128Type"; case Type::LIST: diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index c931ddec5a2..73e536514a2 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -206,7 +206,7 @@ test_that("ChunkedArray supports empty arrays (ARROW-13761)", { int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64(), float32(), float64(), timestamp("ns"), binary(), large_binary(), fixed_size_binary(32), date32(), date64(), - decimal(4, 2), dictionary(), struct(x = int32()) + decimal128(4, 2), dictionary(), struct(x = int32()) ) empty_filter <- ChunkedArray$create(type = bool()) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index 98dc9ebfbda..d2795722928 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -385,16 +385,21 @@ test_that("DictionaryType validation", { }) test_that("decimal type and validation", { - expect_error(decimal()) + expect_r6_class(decimal(4, 2), "Decimal128Type") + expect_error(decimal("four"), '"precision" must be an integer') - expect_error(decimal(4)) expect_error(decimal(4, "two"), '"scale" must be an integer') expect_error(decimal(NA, 2), '"precision" must be an integer') - expect_error(decimal(0, 2), "Invalid: Decimal precision out of range [1, 38]: 0", fixed = TRUE) - expect_error(decimal(100, 2), "Invalid: Decimal precision out of range [1, 38]: 100", fixed = TRUE) expect_error(decimal(4, NA), '"scale" must be an integer') - expect_r6_class(decimal(4, 2), "Decimal128Type") + # decimal() is just an alias for decimal128() for backwards compatibility + expect_r6_class(decimal128(4, 2), "Decimal128Type") + expect_identical(class(decimal(2, 4)), class(decimal128(2, 4))) + + expect_error(decimal128("four"), '"precision" must be an integer') + expect_error(decimal128(4, "two"), '"scale" must be an integer') + expect_error(decimal128(NA, 2), '"precision" must be an integer') + expect_error(decimal128(4, NA), '"scale" must be an integer') }) test_that("Binary", { diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 31184477b37..22696eb2ae6 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -208,33 +208,40 @@ test_that("type checks with is() giving Arrow types", { Table$create( i32 = Array$create(1, int32()), dec = Array$create(pi)$cast(decimal(3, 2)), + dec128 = Array$create(pi)$cast(decimal128(3, 2)), f64 = Array$create(1.1, float64()), str = Array$create("a", arrow::string()) ) %>% transmute( i32_is_i32 = is(i32, int32()), i32_is_dec = is(i32, decimal(3, 2)), + i32_is_dec128 = is(i32, decimal128(3, 2)), i32_is_i64 = is(i32, float64()), i32_is_str = is(i32, arrow::string()), dec_is_i32 = is(dec, int32()), dec_is_dec = is(dec, decimal(3, 2)), + dec_is_dec128 = is(dec, decimal128(3, 2)), dec_is_i64 = is(dec, float64()), dec_is_str = is(dec, arrow::string()), + dec128_is_i32 = is(dec128, int32()), + dec128_is_dec128 = is(dec128, decimal128(3, 2)), + dec128_is_i64 = is(dec128, float64()), + dec128_is_str = is(dec128, arrow::string()), f64_is_i32 = is(f64, int32()), f64_is_dec = is(f64, decimal(3, 2)), + f64_is_dec128 = is(f64, decimal128(3, 2)), f64_is_i64 = is(f64, float64()), f64_is_str = is(f64, arrow::string()), str_is_i32 = is(str, int32()), - str_is_dec = is(str, decimal(3, 2)), + str_is_dec128 = is(str, decimal128(3, 2)), str_is_i64 = is(str, float64()), str_is_str = is(str, arrow::string()) ) %>% collect() %>% t() %>% as.vector(), - c( - TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, - FALSE, FALSE, FALSE, FALSE, TRUE - ) + c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, + TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, + TRUE) ) # with class2=string expect_equal( diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 3821fb4503a..8046ebbaa57 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -157,9 +157,13 @@ test_that("Type strings are correctly canonicalized", { sub("^([^([<]+).*$", "\\1", timestamp()$ToString()) ) expect_equal( - canonical_type_str("decimal"), + canonical_type_str("decimal128"), sub("^([^([<]+).*$", "\\1", decimal(3, 2)$ToString()) ) + expect_equal( + canonical_type_str("decimal128"), + sub("^([^([<]+).*$", "\\1", decimal128(3, 2)$ToString()) + ) expect_equal( canonical_type_str("struct"), sub("^([^([<]+).*$", "\\1", struct(foo = int32())$ToString())