From 60158d4863a841e91180f36648b8ef86d3664aba Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 12 Apr 2021 18:25:36 +0100 Subject: [PATCH 1/6] Add test for any and all with chunked arrays --- r/tests/testthat/test-chunked-array.R | 49 +++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index e72067a6d5f..74b98480f8b 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -135,6 +135,55 @@ test_that("ChunkedArray handles NaN", { expect_equal(as.vector(is.nan(x)), c(is.nan(data[[1]]), is.nan(data[[2]]), is.nan(data[[3]]))) }) +test_that("ChunkedArray handles any", { + data <- list(as.numeric(1:10), c(NA, 2:10), c(1:3, NA, 5L)) + x <- chunked_array(!!!data) + + expect_equal(x$type, float64()) + expect_equal(x$num_chunks, 3L) + expect_equal(length(x), 25L) + expect_equal(as.vector(x), c(1:10, c(NaN, 2:10), c(1:3, NaN, 5))) + + chunk <- x$chunks + expect_equal(as.vector(any(chunk[[1]] > 5)), any(data[[1]] > 5)) + expect_equal(as.vector(any(chunk[[2]] < 1)), any(data[[2]] < 1)) + expect_equal(as.vector(any(chunk[[2]] < 1, na.rm =TRUE)), any(data[[2]] < 1, na.rm = TRUE)) + + data_logical <- list(c(TRUE, FALSE, TRUE), c(NA, FALSE)) + x2 <- chunked_array(!!!data_logical) + chunks2 <- x2$chunks + expect_equal(any(chunks2), any(data_logical[[1]])) + expect_equal(any(chunks2[[2]]), any(data_logical[[2]])) + expect_equal(any(chunks2[[2]], na.rm =TRUE), any(data_logical[[2]], na.rm =TRUE)) + + + +}) + +test_that("ChunkedArray handles all", { + data <- list(as.numeric(1:10), c(NaN, 2:10), c(1:3, NaN, 5L)) + x <- chunked_array(!!!data) + + expect_equal(x$type, float64()) + expect_equal(x$num_chunks, 3L) + expect_equal(length(x), 25L) + expect_equal(as.vector(x), c(1:10, c(NaN, 2:10), c(1:3, NaN, 5))) + + chunk <- x$chunks + expect_equal(as.vector(all(chunk[[1]] > 5)), all(data[[1]] > 5)) + expect_equal(as.vector(all(chunk[[2]] < 1)), all(data[[2]] < 1)) + expect_equal(as.vector(all(chunk[[2]] < 1, na.rm =TRUE)), all(data[[2]] < 1, na.rm = TRUE)) + + data_logical <- list(c(TRUE, FALSE, TRUE), c(NA, FALSE)) + x2 <- chunked_array(!!!data_logical) + chunks2 <- x2$chunks + expect_equal(all(chunks2[[1]]), all(data_logical[[1]])) + expect_equal(all(chunks2[[2]]), all(data_logical[[2]])) + expect_equal(all(chunks2[[2]], na.rm =TRUE), all(data_logical[[2]], na.rm =TRUE)) + + +}) + test_that("ChunkedArray supports logical vectors (ARROW-3341)", { # with NA data <- purrr::rerun(3, sample(c(TRUE, FALSE, NA), 100, replace = TRUE)) From 8a237fbd1c41c407a0f99a5edb71991da096dc1f Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 14 Apr 2021 14:02:10 +0100 Subject: [PATCH 2/6] Add tests for any/all with Arrays, remove redundant tests for ChunkedArrays --- r/tests/testthat/test-Array.R | 31 +++++++++++++++++++++++++++ r/tests/testthat/test-chunked-array.R | 13 +---------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index b4fa8296d3a..37345053575 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -818,3 +818,34 @@ test_that("auto int64 conversion to int can be disabled (ARROW-10093)", { expect_true(inherits(as.data.frame(batch)$x, "integer64")) }) }) + +test_that("Array handles any", { + + data <- c(1:10, NA) + array_data <- Array$create(data) + + expect_equal(any(array_data > 5), any(data > 5)) + expect_equal(any(array_data < 1), any(data < 1)) + expect_equal(any(array_data < 1, na.rm = TRUE), any(data < 1, na.rm = TRUE)) + + data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE) + array_data_logical <- Array$create(data_logical) + + expect_equal(any(array_data_logical), any(data_logical)) + expect_equal(any(array_data_logical, na.rm =TRUE), any(data_logical, na.rm =TRUE)) +}) + +test_that("Array handles all", { + data <- c(1:10, NA) + array_data <- Array$create(data) + + expect_equal(all(array_data > 5), all(data > 5)) + expect_equal(all(array_data < 11), all(data < 11)) + expect_equal(all(array_data < 11, na.rm = TRUE), all(data < 11, na.rm = TRUE)) + + data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE) + array_data_logical <- Array$create(data_logical) + + expect_equal(all(array_data_logical), all(data_logical)) + expect_equal(all(array_data_logical, na.rm =TRUE), all(data_logical, na.rm =TRUE)) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 74b98480f8b..e610c178197 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -139,11 +139,6 @@ test_that("ChunkedArray handles any", { data <- list(as.numeric(1:10), c(NA, 2:10), c(1:3, NA, 5L)) x <- chunked_array(!!!data) - expect_equal(x$type, float64()) - expect_equal(x$num_chunks, 3L) - expect_equal(length(x), 25L) - expect_equal(as.vector(x), c(1:10, c(NaN, 2:10), c(1:3, NaN, 5))) - chunk <- x$chunks expect_equal(as.vector(any(chunk[[1]] > 5)), any(data[[1]] > 5)) expect_equal(as.vector(any(chunk[[2]] < 1)), any(data[[2]] < 1)) @@ -156,25 +151,19 @@ test_that("ChunkedArray handles any", { expect_equal(any(chunks2[[2]]), any(data_logical[[2]])) expect_equal(any(chunks2[[2]], na.rm =TRUE), any(data_logical[[2]], na.rm =TRUE)) - - }) test_that("ChunkedArray handles all", { data <- list(as.numeric(1:10), c(NaN, 2:10), c(1:3, NaN, 5L)) x <- chunked_array(!!!data) - expect_equal(x$type, float64()) - expect_equal(x$num_chunks, 3L) - expect_equal(length(x), 25L) - expect_equal(as.vector(x), c(1:10, c(NaN, 2:10), c(1:3, NaN, 5))) - chunk <- x$chunks expect_equal(as.vector(all(chunk[[1]] > 5)), all(data[[1]] > 5)) expect_equal(as.vector(all(chunk[[2]] < 1)), all(data[[2]] < 1)) expect_equal(as.vector(all(chunk[[2]] < 1, na.rm =TRUE)), all(data[[2]] < 1, na.rm = TRUE)) data_logical <- list(c(TRUE, FALSE, TRUE), c(NA, FALSE)) + x2 <- chunked_array(!!!data_logical) chunks2 <- x2$chunks expect_equal(all(chunks2[[1]]), all(data_logical[[1]])) From 2a20973fef81a39623b7be9d749398cdd206aa36 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 09:33:02 +0100 Subject: [PATCH 3/6] Update NAMESPACE --- r/NAMESPACE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index 567353876ca..4cc8df46d33 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -22,7 +22,9 @@ S3method("names<-",ArrowTabular) S3method(Ops,ArrowDatum) S3method(Ops,Expression) S3method(Ops,array_expression) +S3method(all,ArrowDatum) S3method(all,equal.ArrowObject) +S3method(any,ArrowDatum) S3method(as.character,ArrowDatum) S3method(as.character,FileFormat) S3method(as.character,FragmentScanOptions) From 3eaa1f8f9dc480efeb742ec3dc3f2596b5c92ad8 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 09:33:13 +0100 Subject: [PATCH 4/6] Add any/all bindings --- r/R/compute.R | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/r/R/compute.R b/r/R/compute.R index 1b79d76f037..eddfc3ce2b9 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -186,6 +186,32 @@ unique.ArrowDatum <- function(x, incomparables = FALSE, ...) { call_function("unique", x) } +#' @export +any.ArrowDatum <- function(..., na.rm = FALSE){ + + a <- collect_arrays_from_dots(list(...)) + result <- call_function("any", a) + + if(!na.rm && a$null_count > 0 && !as.vector(result)){ + Scalar$create(NA) + } else { + result + } +} + +#' @export +all.ArrowDatum <- function(..., na.rm = FALSE){ + + a <- collect_arrays_from_dots(list(...)) + result <- call_function("all", a) + + if(!na.rm && a$null_count > 0 && as.vector(result)){ + Scalar$create(NA) + } else { + result + } +} + #' `match` and `%in%` for Arrow objects #' #' `base::match()` is not a generic, so we can't just define Arrow methods for From 12cb6cb6563e20d19c121668d65514c378b3219d Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 09:33:57 +0100 Subject: [PATCH 5/6] Refactor tests for any and all --- r/tests/testthat/test-Array.R | 31 ------------------ r/tests/testthat/test-chunked-array.R | 38 ----------------------- r/tests/testthat/test-compute-aggregate.R | 30 ++++++++++++++++++ 3 files changed, 30 insertions(+), 69 deletions(-) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 37345053575..b4fa8296d3a 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -818,34 +818,3 @@ test_that("auto int64 conversion to int can be disabled (ARROW-10093)", { expect_true(inherits(as.data.frame(batch)$x, "integer64")) }) }) - -test_that("Array handles any", { - - data <- c(1:10, NA) - array_data <- Array$create(data) - - expect_equal(any(array_data > 5), any(data > 5)) - expect_equal(any(array_data < 1), any(data < 1)) - expect_equal(any(array_data < 1, na.rm = TRUE), any(data < 1, na.rm = TRUE)) - - data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE) - array_data_logical <- Array$create(data_logical) - - expect_equal(any(array_data_logical), any(data_logical)) - expect_equal(any(array_data_logical, na.rm =TRUE), any(data_logical, na.rm =TRUE)) -}) - -test_that("Array handles all", { - data <- c(1:10, NA) - array_data <- Array$create(data) - - expect_equal(all(array_data > 5), all(data > 5)) - expect_equal(all(array_data < 11), all(data < 11)) - expect_equal(all(array_data < 11, na.rm = TRUE), all(data < 11, na.rm = TRUE)) - - data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE) - array_data_logical <- Array$create(data_logical) - - expect_equal(all(array_data_logical), all(data_logical)) - expect_equal(all(array_data_logical, na.rm =TRUE), all(data_logical, na.rm =TRUE)) -}) \ No newline at end of file diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index e610c178197..e72067a6d5f 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -135,44 +135,6 @@ test_that("ChunkedArray handles NaN", { expect_equal(as.vector(is.nan(x)), c(is.nan(data[[1]]), is.nan(data[[2]]), is.nan(data[[3]]))) }) -test_that("ChunkedArray handles any", { - data <- list(as.numeric(1:10), c(NA, 2:10), c(1:3, NA, 5L)) - x <- chunked_array(!!!data) - - chunk <- x$chunks - expect_equal(as.vector(any(chunk[[1]] > 5)), any(data[[1]] > 5)) - expect_equal(as.vector(any(chunk[[2]] < 1)), any(data[[2]] < 1)) - expect_equal(as.vector(any(chunk[[2]] < 1, na.rm =TRUE)), any(data[[2]] < 1, na.rm = TRUE)) - - data_logical <- list(c(TRUE, FALSE, TRUE), c(NA, FALSE)) - x2 <- chunked_array(!!!data_logical) - chunks2 <- x2$chunks - expect_equal(any(chunks2), any(data_logical[[1]])) - expect_equal(any(chunks2[[2]]), any(data_logical[[2]])) - expect_equal(any(chunks2[[2]], na.rm =TRUE), any(data_logical[[2]], na.rm =TRUE)) - -}) - -test_that("ChunkedArray handles all", { - data <- list(as.numeric(1:10), c(NaN, 2:10), c(1:3, NaN, 5L)) - x <- chunked_array(!!!data) - - chunk <- x$chunks - expect_equal(as.vector(all(chunk[[1]] > 5)), all(data[[1]] > 5)) - expect_equal(as.vector(all(chunk[[2]] < 1)), all(data[[2]] < 1)) - expect_equal(as.vector(all(chunk[[2]] < 1, na.rm =TRUE)), all(data[[2]] < 1, na.rm = TRUE)) - - data_logical <- list(c(TRUE, FALSE, TRUE), c(NA, FALSE)) - - x2 <- chunked_array(!!!data_logical) - chunks2 <- x2$chunks - expect_equal(all(chunks2[[1]]), all(data_logical[[1]])) - expect_equal(all(chunks2[[2]]), all(data_logical[[2]])) - expect_equal(all(chunks2[[2]], na.rm =TRUE), all(data_logical[[2]], na.rm =TRUE)) - - -}) - test_that("ChunkedArray supports logical vectors (ARROW-3341)", { # with NA data <- purrr::rerun(3, sample(c(TRUE, FALSE, NA), 100, replace = TRUE)) diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R index 77010579d78..0621b7779c7 100644 --- a/r/tests/testthat/test-compute-aggregate.R +++ b/r/tests/testthat/test-compute-aggregate.R @@ -351,3 +351,33 @@ test_that("value_counts", { expect_identical(as.data.frame(value_counts(a)), result_df) expect_identical(as.vector(value_counts(a)$counts), result_df$counts) }) + +test_that("any.Array and any.ChunkedArray", { + + data <- c(1:10, NA, NA) + + expect_vector_equal(any(input > 5), data) + expect_vector_equal(any(input < 1), data) + expect_vector_equal(any(input < 1, na.rm = TRUE), data) + + data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE) + + expect_vector_equal(any(input), data_logical) + expect_vector_equal(any(input, na.rm = TRUE), data_logical) + +}) + +test_that("all.Array and all.ChunkedArray", { + + data <- c(1:10, NA, NA) + + expect_vector_equal(all(input > 5), data) + expect_vector_equal(all(input < 11), data) + expect_vector_equal(all(input < 11, na.rm = TRUE), data) + + data_logical <- c(TRUE, TRUE, NA) + + expect_vector_equal(all(input), data_logical) + expect_vector_equal(all(input, na.rm = TRUE), data_logical) + +}) From 8c27964f97ab249bb58528cb9b04236964ba742f Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Wed, 21 Apr 2021 12:55:23 -0700 Subject: [PATCH 6/6] Add comments and tweak whitespace --- r/R/compute.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/r/R/compute.R b/r/R/compute.R index eddfc3ce2b9..0641bf1615c 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -192,7 +192,9 @@ any.ArrowDatum <- function(..., na.rm = FALSE){ a <- collect_arrays_from_dots(list(...)) result <- call_function("any", a) - if(!na.rm && a$null_count > 0 && !as.vector(result)){ + if (!na.rm && a$null_count > 0 && !as.vector(result)) { + # Three-valued logic: with na.rm = FALSE, any(c(TRUE, NA)) returns TRUE but any(c(FALSE, NA)) returns NA + # TODO: C++ library should take na.rm for any/all (like ARROW-9054) Scalar$create(NA) } else { result @@ -205,7 +207,8 @@ all.ArrowDatum <- function(..., na.rm = FALSE){ a <- collect_arrays_from_dots(list(...)) result <- call_function("all", a) - if(!na.rm && a$null_count > 0 && as.vector(result)){ + if (!na.rm && a$null_count > 0 && as.vector(result)) { + # See comment above in any() about three-valued logic Scalar$create(NA) } else { result