From 9aa5db8ac64767e3c6c62986c6fab837728995a4 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Sun, 31 May 2020 13:48:48 -0700 Subject: [PATCH 01/11] Add sum methods using call_function --- r/NAMESPACE | 12 ++++++++++++ r/R/array.R | 26 ++++++++++++++++++++++++++ r/R/chunked-array.R | 12 ++++++++++++ r/R/scalar.R | 13 +++++++++++++ r/tests/testthat/test-Array.R | 21 +++++++++++++++++++++ r/tests/testthat/test-chunked-array.R | 8 ++++++++ r/tests/testthat/test-scalar.R | 6 ++++++ 7 files changed, 98 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index 6a8c0f9b3ea..bb86caabdd3 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -15,8 +15,17 @@ S3method(Ops,ChunkedArray) S3method(Ops,Expression) S3method(Ops,array_expression) S3method(all,equal.ArrowObject) +S3method(as.character,Array) +S3method(as.character,ChunkedArray) +S3method(as.character,Scalar) S3method(as.data.frame,RecordBatch) S3method(as.data.frame,Table) +S3method(as.double,Array) +S3method(as.double,ChunkedArray) +S3method(as.double,Scalar) +S3method(as.integer,Array) +S3method(as.integer,ChunkedArray) +S3method(as.integer,Scalar) S3method(as.list,RecordBatch) S3method(as.list,Table) S3method(as.raw,Buffer) @@ -58,6 +67,9 @@ S3method(read_message,MessageReader) S3method(read_message,default) S3method(row.names,RecordBatch) S3method(row.names,Table) +S3method(sum,Array) +S3method(sum,ChunkedArray) +S3method(sum,Scalar) S3method(tail,Array) S3method(tail,ChunkedArray) S3method(tail,RecordBatch) diff --git a/r/R/array.R b/r/R/array.R index f17ea56cb01..56102581cd7 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -341,3 +341,29 @@ is.Array <- function(x, type = NULL) { } is_it } + +#' @export +sum.Array <- function(..., na.rm = FALSE) { + args <- list(...) + assert_that(length(args) == 1) # TODO: make chunked array if there are multiple arrays + a <- ..1 + if (!na.rm && a$null_count > 0) { + # Arrow sum function always drops NAs so handle that here + Scalar$create(NA_integer_, type = a$type) + } else { + if (inherits(a$type, "Boolean")) { + # Bool sum not implemented so cast to int + a <- a$cast(int8()) + } + shared_ptr(Scalar, call_function("sum", a)) + } +} + +#' @export +as.double.Array <- function(x, ...) as.double(as.vector(x), ...) + +#' @export +as.integer.Array <- function(x, ...) as.integer(as.vector(x), ...) + +#' @export +as.character.Array <- function(x, ...) as.character(as.vector(x), ...) diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index f3a3eaec9fb..16fd78dd069 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -151,3 +151,15 @@ head.ChunkedArray <- head.Array #' @export tail.ChunkedArray <- tail.Array + +#' @export +as.double.ChunkedArray <- as.double.Array + +#' @export +as.integer.ChunkedArray <- as.integer.Array + +#' @export +as.character.ChunkedArray <- as.character.Array + +#' @export +sum.ChunkedArray <- sum.Array diff --git a/r/R/scalar.R b/r/R/scalar.R index df06f7b3a38..d2adb6d5599 100644 --- a/r/R/scalar.R +++ b/r/R/scalar.R @@ -38,6 +38,7 @@ Scalar <- R6Class("Scalar", inherit = ArrowObject, ), active = list( is_valid = function() Scalar__is_valid(self), + null_count = function() sum(!self$is_valid), type = function() DataType$create(Scalar__type(self)) ) ) @@ -62,3 +63,15 @@ is.na.Scalar <- function(x) !x$is_valid #' @export as.vector.Scalar <- function(x, mode) x$as_vector() + +#' @export +as.double.Scalar <- as.double.Array + +#' @export +as.integer.Scalar <- as.integer.Array + +#' @export +as.character.Scalar <- as.character.Array + +#' @export +sum.Scalar <- sum.Array diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 6d61d9ac5fc..ea6af7c13ad 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -599,3 +599,24 @@ test_that("Array$ApproxEquals", { expect_true(a$ApproxEquals(b)) expect_false(a$ApproxEquals(vec)) }) + +test_that("sum.Array", { + ints <- 1:5 + a <- Array$create(ints) + expect_is(sum(a), "Scalar") + expect_identical(as.integer(sum(a)), sum(ints)) + + floats <- c(1.3, 2.4, 3) + f <- Array$create(floats) + expect_identical(as.numeric(sum(f)), sum(floats)) + + floats <- c(floats, NA) + na <- Array$create(floats) + expect_identical(as.numeric(sum(na)), sum(floats)) + expect_is(sum(na, na.rm = TRUE), "Scalar") + expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE)) + + bools <- c(TRUE, TRUE, FALSE) + b <- Array$create(bools) + expect_identical(as.integer(sum(b)), sum(bools)) +}) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index c627fff3af1..93ce3b71f0d 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -392,3 +392,11 @@ test_that("ChunkedArray$Equals", { expect_true(a$Equals(b)) expect_false(a$Equals(vec)) }) + + +test_that("sum", { + a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5) + expect_is(sum(a), "Scalar") + expect_identical(as.vector(sum(a)), NA) + expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35) +}) diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R index 570fc802334..d91a4bbe655 100644 --- a/r/tests/testthat/test-scalar.R +++ b/r/tests/testthat/test-scalar.R @@ -48,3 +48,9 @@ test_that("Creating Scalars of a different type and casting them", { expect_type_equal(Scalar$create(4L, int8())$type, int8()) expect_type_equal(Scalar$create(4L)$cast(float32())$type, float32()) }) + +test_that("sum", { + skip("No sum method in arrow for Scalar") + s <- Scalar$create(4) + expect_identical(as.numeric(s), as.numeric(sum(s))) +}) From f9766347d998d20eb9d9ecd689dec73498675371 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Sun, 31 May 2020 15:36:54 -0700 Subject: [PATCH 02/11] Fix test --- r/tests/testthat/test-chunked-array.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 93ce3b71f0d..aee45bc171d 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -397,6 +397,6 @@ test_that("ChunkedArray$Equals", { test_that("sum", { a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5) expect_is(sum(a), "Scalar") - expect_identical(as.vector(sum(a)), NA) + expect_true(is.na(as.vector(sum(a)))) expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35) }) From bcc138ace52769abc19f4fa7dbdc0b4cfe641147 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Sat, 6 Jun 2020 14:36:59 -0700 Subject: [PATCH 03/11] Move sum methods to compute.R --- r/DESCRIPTION | 2 +- r/R/array.R | 17 ------------- r/R/chunked-array.R | 3 --- r/R/compute.R | 35 ++++++++++++++++++++++----- r/R/scalar.R | 3 --- r/tests/testthat/test-Array.R | 21 ---------------- r/tests/testthat/test-chunked-array.R | 8 ------ r/tests/testthat/test-compute.R | 34 ++++++++++++++++++++++++++ r/tests/testthat/test-scalar.R | 6 ----- 9 files changed, 64 insertions(+), 65 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index ffc473f365d..4678d0a236b 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -60,6 +60,7 @@ Collate: 'chunked-array.R' 'io.R' 'compression.R' + 'scalar.R' 'compute.R' 'config.R' 'csv.R' @@ -85,7 +86,6 @@ Collate: 'record-batch-writer.R' 'reexports-bit64.R' 'reexports-tidyselect.R' - 'scalar.R' 'schema.R' 'struct.R' 'util.R' diff --git a/r/R/array.R b/r/R/array.R index 56102581cd7..194cdeb42f8 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -342,23 +342,6 @@ is.Array <- function(x, type = NULL) { is_it } -#' @export -sum.Array <- function(..., na.rm = FALSE) { - args <- list(...) - assert_that(length(args) == 1) # TODO: make chunked array if there are multiple arrays - a <- ..1 - if (!na.rm && a$null_count > 0) { - # Arrow sum function always drops NAs so handle that here - Scalar$create(NA_integer_, type = a$type) - } else { - if (inherits(a$type, "Boolean")) { - # Bool sum not implemented so cast to int - a <- a$cast(int8()) - } - shared_ptr(Scalar, call_function("sum", a)) - } -} - #' @export as.double.Array <- function(x, ...) as.double(as.vector(x), ...) diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index 16fd78dd069..0cd5f3075c1 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -160,6 +160,3 @@ as.integer.ChunkedArray <- as.integer.Array #' @export as.character.ChunkedArray <- as.character.Array - -#' @export -sum.ChunkedArray <- sum.Array diff --git a/r/R/compute.R b/r/R/compute.R index 000c5c86f07..540bbd1ecc4 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -16,6 +16,8 @@ # under the License. #' @include array.R +#' @include chunked-array.R +#' @include scalar.R call_function <- function(function_name, ..., options = list()) { assert_that(is.string(function_name)) @@ -32,13 +34,34 @@ CastOptions <- R6Class("CastOptions", inherit = ArrowObject) #' @param allow_float_truncate allow float truncate, `!safe` by default #' #' @export -cast_options <- function( - safe = TRUE, - allow_int_overflow = !safe, - allow_time_truncate = !safe, - allow_float_truncate = !safe -){ +cast_options <- function(safe = TRUE, + allow_int_overflow = !safe, + allow_time_truncate = !safe, + allow_float_truncate = !safe) { shared_ptr(CastOptions, compute___CastOptions__initialize(allow_int_overflow, allow_time_truncate, allow_float_truncate) ) } + +#' @export +sum.Array <- function(..., na.rm = FALSE) { + args <- list(...) + assert_that(length(args) == 1) # TODO: make chunked array if there are multiple arrays + a <- ..1 + if (!na.rm && a$null_count > 0) { + # Arrow sum function always drops NAs so handle that here + Scalar$create(NA_integer_, type = a$type) + } else { + if (inherits(a$type, "Boolean")) { + # Bool sum not implemented so cast to int + a <- a$cast(int8()) + } + shared_ptr(Scalar, call_function("sum", a)) + } +} + +#' @export +sum.ChunkedArray <- sum.Array + +#' @export +sum.Scalar <- sum.Array diff --git a/r/R/scalar.R b/r/R/scalar.R index d2adb6d5599..f824c2ed8f6 100644 --- a/r/R/scalar.R +++ b/r/R/scalar.R @@ -72,6 +72,3 @@ as.integer.Scalar <- as.integer.Array #' @export as.character.Scalar <- as.character.Array - -#' @export -sum.Scalar <- sum.Array diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index ea6af7c13ad..6d61d9ac5fc 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -599,24 +599,3 @@ test_that("Array$ApproxEquals", { expect_true(a$ApproxEquals(b)) expect_false(a$ApproxEquals(vec)) }) - -test_that("sum.Array", { - ints <- 1:5 - a <- Array$create(ints) - expect_is(sum(a), "Scalar") - expect_identical(as.integer(sum(a)), sum(ints)) - - floats <- c(1.3, 2.4, 3) - f <- Array$create(floats) - expect_identical(as.numeric(sum(f)), sum(floats)) - - floats <- c(floats, NA) - na <- Array$create(floats) - expect_identical(as.numeric(sum(na)), sum(floats)) - expect_is(sum(na, na.rm = TRUE), "Scalar") - expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE)) - - bools <- c(TRUE, TRUE, FALSE) - b <- Array$create(bools) - expect_identical(as.integer(sum(b)), sum(bools)) -}) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index aee45bc171d..c627fff3af1 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -392,11 +392,3 @@ test_that("ChunkedArray$Equals", { expect_true(a$Equals(b)) expect_false(a$Equals(vec)) }) - - -test_that("sum", { - a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5) - expect_is(sum(a), "Scalar") - expect_true(is.na(as.vector(sum(a)))) - expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35) -}) diff --git a/r/tests/testthat/test-compute.R b/r/tests/testthat/test-compute.R index 1d0d23a788d..49ffcb211cd 100644 --- a/r/tests/testthat/test-compute.R +++ b/r/tests/testthat/test-compute.R @@ -17,6 +17,40 @@ context("compute") +test_that("sum.Array", { + ints <- 1:5 + a <- Array$create(ints) + expect_is(sum(a), "Scalar") + expect_identical(as.integer(sum(a)), sum(ints)) + + floats <- c(1.3, 2.4, 3) + f <- Array$create(floats) + expect_identical(as.numeric(sum(f)), sum(floats)) + + floats <- c(floats, NA) + na <- Array$create(floats) + expect_identical(as.numeric(sum(na)), sum(floats)) + expect_is(sum(na, na.rm = TRUE), "Scalar") + expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE)) + + bools <- c(TRUE, TRUE, FALSE) + b <- Array$create(bools) + expect_identical(as.integer(sum(b)), sum(bools)) +}) + +test_that("sum.ChunkedArray", { + a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5) + expect_is(sum(a), "Scalar") + expect_true(is.na(as.vector(sum(a)))) + expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35) +}) + +test_that("sum.Scalar", { + skip("No sum method in arrow for Scalar") + s <- Scalar$create(4) + expect_identical(as.numeric(s), as.numeric(sum(s))) +}) + test_that("Bad input handling of call_function", { expect_error(call_function("sum", 2, 3), "to_datum: Not implemented for type double") }) diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R index d91a4bbe655..570fc802334 100644 --- a/r/tests/testthat/test-scalar.R +++ b/r/tests/testthat/test-scalar.R @@ -48,9 +48,3 @@ test_that("Creating Scalars of a different type and casting them", { expect_type_equal(Scalar$create(4L, int8())$type, int8()) expect_type_equal(Scalar$create(4L)$cast(float32())$type, float32()) }) - -test_that("sum", { - skip("No sum method in arrow for Scalar") - s <- Scalar$create(4) - expect_identical(as.numeric(s), as.numeric(sum(s))) -}) From aa7db36cc8bff22f5b84db77c35ad4993496c440 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Sat, 6 Jun 2020 14:55:43 -0700 Subject: [PATCH 04/11] Add mean bindings --- r/NAMESPACE | 3 +++ r/R/compute.R | 23 ++++++++++++++++++++++ r/tests/testthat/test-compute.R | 34 +++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index bb86caabdd3..01604ab0a9b 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -53,6 +53,9 @@ S3method(length,Array) S3method(length,ChunkedArray) S3method(length,Scalar) S3method(length,Schema) +S3method(mean,Array) +S3method(mean,ChunkedArray) +S3method(mean,Scalar) S3method(names,Dataset) S3method(names,RecordBatch) S3method(names,ScannerBuilder) diff --git a/r/R/compute.R b/r/R/compute.R index 540bbd1ecc4..474908684dc 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -65,3 +65,26 @@ sum.ChunkedArray <- sum.Array #' @export sum.Scalar <- sum.Array + +#' @export +mean.Array <- function(..., na.rm = FALSE) { + args <- list(...) + assert_that(length(args) == 1) # TODO: make chunked array if there are multiple arrays + a <- ..1 + if (!na.rm && a$null_count > 0) { + # Arrow sum/mean function always drops NAs so handle that here + Scalar$create(NA_integer_, type = a$type) + } else { + if (inherits(a$type, "Boolean")) { + # Bool sum/mean not implemented so cast to int + a <- a$cast(int8()) + } + shared_ptr(Scalar, call_function("mean", a)) + } +} + +#' @export +mean.ChunkedArray <- mean.Array + +#' @export +mean.Scalar <- mean.Array diff --git a/r/tests/testthat/test-compute.R b/r/tests/testthat/test-compute.R index 49ffcb211cd..adcc202d810 100644 --- a/r/tests/testthat/test-compute.R +++ b/r/tests/testthat/test-compute.R @@ -51,6 +51,40 @@ test_that("sum.Scalar", { expect_identical(as.numeric(s), as.numeric(sum(s))) }) +test_that("mean.Array", { + ints <- 1:4 + a <- Array$create(ints) + expect_is(mean(a), "Scalar") + expect_identical(as.vector(mean(a)), mean(ints)) + + floats <- c(1.3, 2.4, 3) + f <- Array$create(floats) + expect_identical(as.vector(mean(f)), mean(floats)) + + floats <- c(floats, NA) + na <- Array$create(floats) + expect_identical(as.vector(mean(na)), mean(floats)) + expect_is(mean(na, na.rm = TRUE), "Scalar") + expect_identical(as.vector(mean(na, na.rm = TRUE)), mean(floats, na.rm = TRUE)) + + bools <- c(TRUE, TRUE, FALSE) + b <- Array$create(bools) + expect_identical(as.vector(mean(b)), mean(bools)) +}) + +test_that("mean.ChunkedArray", { + a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5) + expect_is(mean(a), "Scalar") + expect_true(is.na(as.vector(mean(a)))) + expect_identical(as.vector(mean(a, na.rm = TRUE)), 35/13) +}) + +test_that("mean.Scalar", { + skip("No mean method in arrow for Scalar") + s <- Scalar$create(4) + expect_identical(as.vector(s), mean(s)) +}) + test_that("Bad input handling of call_function", { expect_error(call_function("sum", 2, 3), "to_datum: Not implemented for type double") }) From 349272cb78d791d7fb3872f5280f45de516b6965 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Sat, 6 Jun 2020 15:01:35 -0700 Subject: [PATCH 05/11] Remove stale comments from #7318 --- r/R/chunked-array.R | 2 -- r/R/record-batch.R | 2 -- r/R/table.R | 2 -- 3 files changed, 6 deletions(-) diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index 0cd5f3075c1..5a8df533c8a 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -75,8 +75,6 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowObject, if (is.integer(i)) { i <- Array$create(i) } - # Invalid: Tried executing function with non-value type: ChunkedArray - # so use old methods shared_ptr(ChunkedArray, call_function("take", self, i)) }, Filter = function(i, keep_na = TRUE) { diff --git a/r/R/record-batch.R b/r/R/record-batch.R index 78d28cf3cb4..2ecde6fb285 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -119,8 +119,6 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, i <- Array$create(i) } assert_is(i, "Array") - # Invalid: Tried executing function with non-value type: RecordBatch - # so use old methods shared_ptr(RecordBatch, call_function("take", self, i)) }, Filter = function(i, keep_na = TRUE) { diff --git a/r/R/table.R b/r/R/table.R index 785a10979c6..73a25e76315 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -146,8 +146,6 @@ Table <- R6Class("Table", inherit = ArrowObject, if (is.integer(i)) { i <- Array$create(i) } - # Invalid: Tried executing function with non-value type: Table - # so use old methods shared_ptr(Table, call_function("take", self, i)) }, Filter = function(i, keep_na = TRUE) { From a05f2fbe4293b6a0b9c955e5d4d2cdb2490e99c7 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Sat, 6 Jun 2020 15:09:26 -0700 Subject: [PATCH 06/11] Add comments with JIRA todos --- r/R/compute.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/r/R/compute.R b/r/R/compute.R index 474908684dc..0ea64ea73c7 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -50,10 +50,12 @@ sum.Array <- function(..., na.rm = FALSE) { a <- ..1 if (!na.rm && a$null_count > 0) { # Arrow sum function always drops NAs so handle that here + # https://issues.apache.org/jira/browse/ARROW-9054 Scalar$create(NA_integer_, type = a$type) } else { if (inherits(a$type, "Boolean")) { # Bool sum not implemented so cast to int + # https://issues.apache.org/jira/browse/ARROW-9055 a <- a$cast(int8()) } shared_ptr(Scalar, call_function("sum", a)) @@ -73,10 +75,12 @@ mean.Array <- function(..., na.rm = FALSE) { a <- ..1 if (!na.rm && a$null_count > 0) { # Arrow sum/mean function always drops NAs so handle that here + # https://issues.apache.org/jira/browse/ARROW-9054 Scalar$create(NA_integer_, type = a$type) } else { if (inherits(a$type, "Boolean")) { # Bool sum/mean not implemented so cast to int + # https://issues.apache.org/jira/browse/ARROW-9055 a <- a$cast(int8()) } shared_ptr(Scalar, call_function("mean", a)) From f09f4f59dab711c60a03a75a6f62158731398cd8 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Sat, 6 Jun 2020 15:19:33 -0700 Subject: [PATCH 07/11] More JIRA --- r/tests/testthat/test-compute.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-compute.R b/r/tests/testthat/test-compute.R index adcc202d810..30f293b9e66 100644 --- a/r/tests/testthat/test-compute.R +++ b/r/tests/testthat/test-compute.R @@ -46,7 +46,7 @@ test_that("sum.ChunkedArray", { }) test_that("sum.Scalar", { - skip("No sum method in arrow for Scalar") + skip("No sum method in arrow for Scalar: ARROW-9056") s <- Scalar$create(4) expect_identical(as.numeric(s), as.numeric(sum(s))) }) @@ -80,7 +80,7 @@ test_that("mean.ChunkedArray", { }) test_that("mean.Scalar", { - skip("No mean method in arrow for Scalar") + skip("No mean method in arrow for Scalar: ARROW-9056") s <- Scalar$create(4) expect_identical(as.vector(s), mean(s)) }) From aecbb21afde337f5e97b82cae87530fae4d72c3e Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 8 Jun 2020 08:30:09 -0700 Subject: [PATCH 08/11] Refactor --- r/R/compute.R | 66 ++++++++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 40 deletions(-) diff --git a/r/R/compute.R b/r/R/compute.R index 0ea64ea73c7..a2a25d0c300 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -24,52 +24,25 @@ call_function <- function(function_name, ..., options = list()) { compute__CallFunction(function_name, list(...), options) } -CastOptions <- R6Class("CastOptions", inherit = ArrowObject) +#' @export +sum.Array <- function(..., na.rm = FALSE) scalar_aggregate("sum", ..., na.rm = na.rm) -#' Cast options -#' -#' @param safe enforce safe conversion -#' @param allow_int_overflow allow int conversion, `!safe` by default -#' @param allow_time_truncate allow time truncate, `!safe` by default -#' @param allow_float_truncate allow float truncate, `!safe` by default -#' #' @export -cast_options <- function(safe = TRUE, - allow_int_overflow = !safe, - allow_time_truncate = !safe, - allow_float_truncate = !safe) { - shared_ptr(CastOptions, - compute___CastOptions__initialize(allow_int_overflow, allow_time_truncate, allow_float_truncate) - ) -} +sum.ChunkedArray <- sum.Array #' @export -sum.Array <- function(..., na.rm = FALSE) { - args <- list(...) - assert_that(length(args) == 1) # TODO: make chunked array if there are multiple arrays - a <- ..1 - if (!na.rm && a$null_count > 0) { - # Arrow sum function always drops NAs so handle that here - # https://issues.apache.org/jira/browse/ARROW-9054 - Scalar$create(NA_integer_, type = a$type) - } else { - if (inherits(a$type, "Boolean")) { - # Bool sum not implemented so cast to int - # https://issues.apache.org/jira/browse/ARROW-9055 - a <- a$cast(int8()) - } - shared_ptr(Scalar, call_function("sum", a)) - } -} +sum.Scalar <- sum.Array #' @export -sum.ChunkedArray <- sum.Array +mean.Array <- function(..., na.rm = FALSE) scalar_aggregate("mean", ..., na.rm = na.rm) #' @export -sum.Scalar <- sum.Array +mean.ChunkedArray <- mean.Array #' @export -mean.Array <- function(..., na.rm = FALSE) { +mean.Scalar <- mean.Array + +scalar_aggregate <- function(FUN, ..., na.rm = FALSE) { args <- list(...) assert_that(length(args) == 1) # TODO: make chunked array if there are multiple arrays a <- ..1 @@ -83,12 +56,25 @@ mean.Array <- function(..., na.rm = FALSE) { # https://issues.apache.org/jira/browse/ARROW-9055 a <- a$cast(int8()) } - shared_ptr(Scalar, call_function("mean", a)) + shared_ptr(Scalar, call_function(FUN, a)) } } -#' @export -mean.ChunkedArray <- mean.Array +CastOptions <- R6Class("CastOptions", inherit = ArrowObject) +#' Cast options +#' +#' @param safe enforce safe conversion +#' @param allow_int_overflow allow int conversion, `!safe` by default +#' @param allow_time_truncate allow time truncate, `!safe` by default +#' @param allow_float_truncate allow float truncate, `!safe` by default +#' #' @export -mean.Scalar <- mean.Array +cast_options <- function(safe = TRUE, + allow_int_overflow = !safe, + allow_time_truncate = !safe, + allow_float_truncate = !safe) { + shared_ptr(CastOptions, + compute___CastOptions__initialize(allow_int_overflow, allow_time_truncate, allow_float_truncate) + ) +} From e23ff0a9fefe7b79643ea605cc59ac5dabf7d8d8 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 8 Jun 2020 09:00:32 -0700 Subject: [PATCH 09/11] Failed attempt at min/max methods --- r/R/compute.R | 6 ++++++ r/src/compute.cpp | 6 ++++++ r/tests/testthat/test-compute.R | 21 +++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/r/R/compute.R b/r/R/compute.R index a2a25d0c300..940ec8fe186 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -42,6 +42,12 @@ mean.ChunkedArray <- mean.Array #' @export mean.Scalar <- mean.Array +min.Array <- function(..., na.rm = FALSE) { + extrema <- scalar_aggregate("minmax", ..., na.rm = na.rm) + # TODO: StructScalar needs field accessor methods in C++: ARROW-9070 + Scalar$create(as.vector(extrema)$min) +} + scalar_aggregate <- function(FUN, ..., na.rm = FALSE) { args <- list(...) assert_that(length(args) == 1) # TODO: make chunked array if there are multiple arrays diff --git a/r/src/compute.cpp b/r/src/compute.cpp index b83576d3d26..11ac2aeda69 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -160,6 +160,12 @@ std::shared_ptr make_compute_options( return out; } + if (func_name == "minmax") { + auto out = std::make_shared( + arrow::compute::MinMaxOptions::Defaults()); + return out; + } + return nullptr; } diff --git a/r/tests/testthat/test-compute.R b/r/tests/testthat/test-compute.R index 30f293b9e66..02d6f997ee4 100644 --- a/r/tests/testthat/test-compute.R +++ b/r/tests/testthat/test-compute.R @@ -88,3 +88,24 @@ test_that("mean.Scalar", { test_that("Bad input handling of call_function", { expect_error(call_function("sum", 2, 3), "to_datum: Not implemented for type double") }) + +test_that("min/max.Array", { + ints <- 1:4 + a <- Array$create(ints) + expect_is(min(a), "Scalar") + expect_identical(as.vector(min(a)), min(ints)) + + floats <- c(1.3, 3, 2.4) + f <- Array$create(floats) + expect_identical(as.vector(min(f)), min(floats)) + + floats <- c(floats, NA) + na <- Array$create(floats) + expect_identical(as.vector(min(na)), min(floats)) + expect_is(min(na, na.rm = TRUE), "Scalar") + expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE)) + + bools <- c(TRUE, TRUE, FALSE) + b <- Array$create(bools) + expect_identical(as.vector(min(b)), min(bools)) +}) From adb207c8bd503fa50ef0fae844495d16d482c4eb Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 8 Jun 2020 10:15:44 -0700 Subject: [PATCH 10/11] Skip --- r/tests/testthat/test-compute.R | 1 + 1 file changed, 1 insertion(+) diff --git a/r/tests/testthat/test-compute.R b/r/tests/testthat/test-compute.R index 02d6f997ee4..c3af4ce1388 100644 --- a/r/tests/testthat/test-compute.R +++ b/r/tests/testthat/test-compute.R @@ -90,6 +90,7 @@ test_that("Bad input handling of call_function", { }) test_that("min/max.Array", { + skip("arrow::MakeArrayFromScalar can't handle struct: ARROW-6604") ints <- 1:4 a <- Array$create(ints) expect_is(min(a), "Scalar") From 1151d8fed117d7bc328335e14b35efadfc175055 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 8 Jun 2020 16:35:41 -0700 Subject: [PATCH 11/11] Handle last todo --- r/R/compute.R | 23 ++++++++++++++++++++--- r/tests/testthat/test-compute.R | 6 ++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/r/R/compute.R b/r/R/compute.R index 940ec8fe186..a860b0ef91f 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -49,9 +49,7 @@ min.Array <- function(..., na.rm = FALSE) { } scalar_aggregate <- function(FUN, ..., na.rm = FALSE) { - args <- list(...) - assert_that(length(args) == 1) # TODO: make chunked array if there are multiple arrays - a <- ..1 + a <- collect_arrays_from_dots(list(...)) if (!na.rm && a$null_count > 0) { # Arrow sum/mean function always drops NAs so handle that here # https://issues.apache.org/jira/browse/ARROW-9054 @@ -66,6 +64,25 @@ scalar_aggregate <- function(FUN, ..., na.rm = FALSE) { } } +collect_arrays_from_dots <- function(dots) { + # Given a list that may contain both Arrays and ChunkedArrays, + # return a single ChunkedArray containing all of those chunks + # (may return a regular Array if there is only one element in dots) + assert_that(all(map_lgl(dots, is.Array))) + if (length(dots) == 1) { + return(dots[[1]]) + } + + arrays <- unlist(lapply(dots, function(x) { + if (inherits(x, "ChunkedArray")) { + x$chunks + } else { + x + } + })) + ChunkedArray$create(!!!arrays) +} + CastOptions <- R6Class("CastOptions", inherit = ArrowObject) #' Cast options diff --git a/r/tests/testthat/test-compute.R b/r/tests/testthat/test-compute.R index c3af4ce1388..4c2d0f26bd4 100644 --- a/r/tests/testthat/test-compute.R +++ b/r/tests/testthat/test-compute.R @@ -45,6 +45,12 @@ test_that("sum.ChunkedArray", { expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35) }) +test_that("sum dots", { + a1 <- Array$create(1:4) + a2 <- ChunkedArray$create(1:4, c(1:4, NA), 1:5) + expect_identical(as.numeric(sum(a1, a2, na.rm = TRUE)), 45) +}) + test_that("sum.Scalar", { skip("No sum method in arrow for Scalar: ARROW-9056") s <- Scalar$create(4)