From 46775b4b43132fb4384c5a5c830eff3c32af2ff2 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 15 Apr 2021 15:20:15 +0100 Subject: [PATCH 01/26] Import na.* functions from stats --- r/R/arrow-package.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 30d59491d79..51f4987484c 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -#' @importFrom stats quantile median +#' @importFrom stats quantile median na.omit na.exclude na.pass na.fail #' @importFrom R6 R6Class #' @importFrom purrr as_mapper map map2 map_chr map_dfr map_int map_lgl keep #' @importFrom assertthat assert_that is.string From f826a247bc58f8e649089ea53287843c12033170 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 15 Apr 2021 15:22:31 +0100 Subject: [PATCH 02/26] Import na.* functions from stats in namespace --- r/NAMESPACE | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index 567353876ca..e0eed13394a 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -318,6 +318,10 @@ importFrom(rlang,set_names) importFrom(rlang,syms) importFrom(rlang,warn) importFrom(stats,median) +importFrom(stats,na.exclude) +importFrom(stats,na.fail) +importFrom(stats,na.omit) +importFrom(stats,na.pass) importFrom(stats,quantile) importFrom(tidyselect,contains) importFrom(tidyselect,ends_with) From 88de5c5fb088f5edbeabdbdbae63631707d61f85 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 15 Apr 2021 17:13:18 +0100 Subject: [PATCH 03/26] Add tests for na.omit, na.exclude, na.fail, and na.pass for ChunkedArray objects --- r/tests/testthat/test-chunked-array.R | 42 +++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index e72067a6d5f..033d35c4c1d 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -135,6 +135,48 @@ test_that("ChunkedArray handles NaN", { expect_equal(as.vector(is.nan(x)), c(is.nan(data[[1]]), is.nan(data[[2]]), is.nan(data[[3]]))) }) +test_that("ChunkedArray handles na.omit", { + data <- list(as.numeric(1:10), c(NA_real_, 2:10), c(NA_real_)) + x <- chunked_array(!!!data) + chunks <- x$chunks + expect_equal(na.omit(chunks[[1]]), na.omit(data[[1]])) + expect_equal(na.omit(chunks[[2]]), na.omit(data[[2]])) + expect_equal(na.omit(chunks[[3]]), na.omit(data[[3]])) + expect_vector(na.omit(x), na.omit(c(data[[1]], data[[2]], data[[3]]))) +}) + +test_that("ChunkedArray handles na.exclude", { + data <- list(as.numeric(1:10), c(NA_real_, 2:10), c(NA_real_)) + x <- chunked_array(!!!data) + chunks <- x$chunks + expect_equal(na.exclude(chunks[[1]]), na.exclude(data[[1]])) + expect_equal(na.exclude(chunks[[2]]), na.exclude(data[[2]])) + expect_equal(na.exclude(chunks[[3]]), na.exclude(data[[3]])) + expect_vector(na.exclude(x), na.exclude(c(data[[1]], data[[2]], data[[3]]))) +}) + +test_that("ChunkedArray handles na.fail", { + data <- list(as.numeric(1:10), c(NA_real_, 2:10), c(NA_real_)) + x <- chunked_array(!!!data) + chunks <- x$chunks + expect_equal(na.fail(chunks[[1]]), na.fail(data[[1]])) + expect_equal(na.fail(chunks[[2]]), na.fail(data[[2]])) + expect_equal(na.fail(chunks[[3]]), na.fail(data[[3]])) + expect_error(na.fail(x), regexp = "missing values in object") + +}) + +test_that("ChunkedArray handles na.pass", { + data <- list(as.numeric(1:10), c(NA_real_, 2:10), c(NA_real_)) + x <- chunked_array(!!!data) + chunks <- x$chunks + expect_equal(na.pass(chunks[[1]]), na.pass(data[[1]])) + expect_equal(na.pass(chunks[[2]]), na.pass(data[[2]])) + expect_equal(na.pass(chunks[[3]]), na.pass(data[[3]])) + expect_vector(as.vector(na.pass(x)), unlist(na.pass(data))) +}) + + test_that("ChunkedArray supports logical vectors (ARROW-3341)", { # with NA data <- purrr::rerun(3, sample(c(TRUE, FALSE, NA), 100, replace = TRUE)) From 6fc0d5d65162682a17e7e4d7689bf7415d21b2ac Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 13:19:44 +0100 Subject: [PATCH 04/26] Add na.omit, na.exclude, na.pass, and na.fail functions for ArrowDatum objects --- r/R/arrow-datum.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index dd43307c9cc..7d371378f37 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -46,6 +46,29 @@ as.vector.ArrowDatum <- function(x, mode) { ) } +#'@export +na.omit.ArrowDatum <- function(x){ + x$Filter(!is.na(x)) +} + +#'@export +na.exclude.ArrowDatum <- function(x){ + x$Filter(!is.na(x)) +} + +#'@export +na.pass.ArrowDatum <- function(x){ + x +} + +#'@export +na.fail.ArrowDatum <- function(x){ + if(x$null_count > 0){ + stop("missing values in object") + } + x +} + filter_rows <- function(x, i, keep_na = TRUE, ...) { # General purpose function for [ row subsetting with R semantics # Based on the input for `i`, calls x$Filter, x$Slice, or x$Take @@ -163,3 +186,4 @@ sort.ArrowDatum <- function(x, decreasing = FALSE, na.last = NA, ...) { tbl$x$Take(tbl$SortIndices(names = c("is_na", "x"), descending = c(TRUE, decreasing))) } } + From c180cc8a7e71e7753e9d51f88d3175f92a17f12f Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 13:20:53 +0100 Subject: [PATCH 05/26] Add expect_vector_error and expect_vector_equivalent functions for testing --- r/tests/testthat/helper-expectation.R | 114 ++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 5 deletions(-) diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R index 2ebd44f7bba..0dae2d4449f 100644 --- a/r/tests/testthat/helper-expectation.R +++ b/r/tests/testthat/helper-expectation.R @@ -158,7 +158,6 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in ...) { expr <- rlang::enquo(expr) expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))) - skip_msg <- NULL if (is.null(skip_array)) { @@ -173,12 +172,11 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in if (is.null(skip_chunked_array)) { # split input vector into two to exercise ChunkedArray with >1 chunk - vec_split <- length(vec) %/% 2 - vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)] - vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)] + split_vector <- split_vector_as_list(vec) + via_chunked <- rlang::eval_tidy( expr, - rlang::new_data_mask(rlang::env(input = ChunkedArray$create(vec1, vec2))) + rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]]))) ) expect_vector(via_chunked, expected, ...) } else { @@ -189,3 +187,109 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in skip(paste(skip_msg, collpase = "\n")) } } + +expect_vector_equivalent <- function(expr, # A vectorized R expression containing `input` as its input + vec, # A vector as reference, will make Array/ChunkedArray with + skip_array = NULL, # Msg, if should skip Array test + skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test + ...) { + expr <- rlang::enquo(expr) + expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))) + skip_msg <- NULL + + if (is.null(skip_array)) { + via_array <- rlang::eval_tidy( + expr, + rlang::new_data_mask(rlang::env(input = Array$create(vec))) + ) + + expect_equivalent(as.vector(via_array), expected, ...) + } else { + skip_msg <- c(skip_msg, skip_array) + } + + if (is.null(skip_chunked_array)) { + # split input vector into two to exercise ChunkedArray with >1 chunk + split_vector <- split_vector_as_list(vec) + + via_chunked <- rlang::eval_tidy( + expr, + rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]]))) + ) + expect_equivalent(as.vector(via_chunked), expected, ...) + } else { + skip_msg <- c(skip_msg, skip_chunked_array) + } + + if (!is.null(skip_msg)) { + skip(paste(skip_msg, collpase = "\n")) + } +} + +expect_vector_error <- function(expr, # A vectorized R expression containing `input` as its input + vec, # A vector as reference, will make Array/ChunkedArray with + skip_array = NULL, # Msg, if should skip Array test + skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test + ...) { + + expr <- rlang::enquo(expr) + + msg <- tryCatch( + rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))), + error = function (e) { + msg <- conditionMessage(e) + + pattern <- i18ize_error_messages() + + if (grepl(pattern, msg)) { + msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg) + } + msg + } + ) + + expect_true(identical(typeof(msg), "character"), label = "vector errored") + + skip_msg <- NULL + + if (is.null(skip_array)) { + + expect_error( + rlang::eval_tidy( + expr, + rlang::new_data_mask(rlang::env(input = Array$create(vec))) + ), + msg, + ... + ) + } else { + skip_msg <- c(skip_msg, skip_array) + } + + if (is.null(skip_chunked_array)) { + # split input vector into two to exercise ChunkedArray with >1 chunk + split_vector <- split_vector_as_list(vec) + + expect_error( + rlang::eval_tidy( + expr, + rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]]))) + ), + msg, + ... + ) + } else { + skip_msg <- c(skip_msg, skip_chunked_array) + } + + if (!is.null(skip_msg)) { + skip(paste(skip_msg, collpase = "\n")) + } +} + +split_vector_as_list <- function(vec){ + vec_split <- length(vec) %/% 2 + vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)] + vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)] + list(vec1, vec2) +} From ec75960dcf7cace3f749c802c0015301283faed0 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 13:21:38 +0100 Subject: [PATCH 06/26] Remove individual tests for na.* functions in favour of using expect_vector_equal etc --- r/tests/testthat/test-chunked-array.R | 42 --------------------------- r/tests/testthat/test-na-omit.R | 34 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 42 deletions(-) create mode 100644 r/tests/testthat/test-na-omit.R diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 033d35c4c1d..e72067a6d5f 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -135,48 +135,6 @@ test_that("ChunkedArray handles NaN", { expect_equal(as.vector(is.nan(x)), c(is.nan(data[[1]]), is.nan(data[[2]]), is.nan(data[[3]]))) }) -test_that("ChunkedArray handles na.omit", { - data <- list(as.numeric(1:10), c(NA_real_, 2:10), c(NA_real_)) - x <- chunked_array(!!!data) - chunks <- x$chunks - expect_equal(na.omit(chunks[[1]]), na.omit(data[[1]])) - expect_equal(na.omit(chunks[[2]]), na.omit(data[[2]])) - expect_equal(na.omit(chunks[[3]]), na.omit(data[[3]])) - expect_vector(na.omit(x), na.omit(c(data[[1]], data[[2]], data[[3]]))) -}) - -test_that("ChunkedArray handles na.exclude", { - data <- list(as.numeric(1:10), c(NA_real_, 2:10), c(NA_real_)) - x <- chunked_array(!!!data) - chunks <- x$chunks - expect_equal(na.exclude(chunks[[1]]), na.exclude(data[[1]])) - expect_equal(na.exclude(chunks[[2]]), na.exclude(data[[2]])) - expect_equal(na.exclude(chunks[[3]]), na.exclude(data[[3]])) - expect_vector(na.exclude(x), na.exclude(c(data[[1]], data[[2]], data[[3]]))) -}) - -test_that("ChunkedArray handles na.fail", { - data <- list(as.numeric(1:10), c(NA_real_, 2:10), c(NA_real_)) - x <- chunked_array(!!!data) - chunks <- x$chunks - expect_equal(na.fail(chunks[[1]]), na.fail(data[[1]])) - expect_equal(na.fail(chunks[[2]]), na.fail(data[[2]])) - expect_equal(na.fail(chunks[[3]]), na.fail(data[[3]])) - expect_error(na.fail(x), regexp = "missing values in object") - -}) - -test_that("ChunkedArray handles na.pass", { - data <- list(as.numeric(1:10), c(NA_real_, 2:10), c(NA_real_)) - x <- chunked_array(!!!data) - chunks <- x$chunks - expect_equal(na.pass(chunks[[1]]), na.pass(data[[1]])) - expect_equal(na.pass(chunks[[2]]), na.pass(data[[2]])) - expect_equal(na.pass(chunks[[3]]), na.pass(data[[3]])) - expect_vector(as.vector(na.pass(x)), unlist(na.pass(data))) -}) - - test_that("ChunkedArray supports logical vectors (ARROW-3341)", { # with NA data <- purrr::rerun(3, sample(c(TRUE, FALSE, NA), 100, replace = TRUE)) diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R new file mode 100644 index 00000000000..a489dbcbeea --- /dev/null +++ b/r/tests/testthat/test-na-omit.R @@ -0,0 +1,34 @@ +data_no_na <- c(2:10) +data_na <- c(NA_real_, data_no_na) +scalar_na <- Scalar$create(NA) +scalar_one <- Scalar$create(1) + +test_that("na.omit on Array and ChunkedArray", { + expect_vector_equal(na.omit(input), data_no_na) + expect_vector_equivalent(na.omit(input), data_na) +}) + +test_that("na.exclude on Array and ChunkedArray", { + expect_vector_equal(na.exclude(input), data_no_na) + expect_vector_equivalent(na.exclude(input), data_na) +}) + +test_that("na.fail on Array and ChunkedArray", { + expect_vector_equivalent(na.fail(input), data_no_na) + expect_vector_error(na.fail(input), data_na) +}) + +test_that("na.pass on Array and ChunkedArray", { + expect_vector_equivalent(na.pass(input), data_no_na) + expect_vector_equal(na.pass(input), data_na) +}) + +test_that("na.fail on Scalar", { + expect_error(na.fail(scalar_na), regexp = "missing values in object") + expect_vector(na.fail(scalar_one), na.fail(1)) +}) + +test_that("na.pass on Scalar", { + expect_vector(na.pass(scalar_na), na.pass(NA)) + expect_vector(na.pass(scalar_one), na.pass(1)) +}) From 74fb6f7751d1496999eddbbcacd6409d82c4aa7a Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 14:48:13 +0100 Subject: [PATCH 07/26] Add tests for Table object and na.* functions --- r/tests/testthat/test-na-omit.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R index a489dbcbeea..f929d954c5d 100644 --- a/r/tests/testthat/test-na-omit.R +++ b/r/tests/testthat/test-na-omit.R @@ -2,6 +2,7 @@ data_no_na <- c(2:10) data_na <- c(NA_real_, data_no_na) scalar_na <- Scalar$create(NA) scalar_one <- Scalar$create(1) +tbl <- Table$create(example_data) test_that("na.omit on Array and ChunkedArray", { expect_vector_equal(na.omit(input), data_no_na) @@ -32,3 +33,19 @@ test_that("na.pass on Scalar", { expect_vector(na.pass(scalar_na), na.pass(NA)) expect_vector(na.pass(scalar_one), na.pass(1)) }) + +test_that("na.omit on Table", { + expect_data_frame(na.omit(tbl), na.omit(example_data)) +}) + +test_that("na.exclude on Table", { + expect_data_frame(na.exclude(tbl), na.exclude(example_data)) +}) + +test_that("na.fail on Table", { + expect_data_frame(na.fail(tbl), na.fail(example_data)) +}) + +test_that("na.pass on Table", { + expect_data_frame(na.pass(tbl), na.pass(example_data)) +}) From 275e19d2927c28925732242395dd140ad5c98ecd Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 16:05:33 +0100 Subject: [PATCH 08/26] Remove duplicate identical function and remove na.pass --- r/R/arrow-datum.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 7d371378f37..5cc9cec87f7 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -52,14 +52,7 @@ na.omit.ArrowDatum <- function(x){ } #'@export -na.exclude.ArrowDatum <- function(x){ - x$Filter(!is.na(x)) -} - -#'@export -na.pass.ArrowDatum <- function(x){ - x -} +na.exclude.ArrowDatum <- na.omit.ArrowDatum #'@export na.fail.ArrowDatum <- function(x){ From fb22de4783e72b9ca285f7a12594005ec0f4e4c3 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 16:06:01 +0100 Subject: [PATCH 09/26] Implement na.fail, na.omit, and na.exclude for ArrowTabular objects --- r/R/arrow-tabular.R | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index f32111688a2..1a980a75dfd 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -211,6 +211,31 @@ head.ArrowTabular <- head.ArrowDatum #' @export tail.ArrowTabular <- tail.ArrowDatum +#' @export +na.fail.ArrowTabular <- function(x){ + + na_count <- sum(purrr::map_int(x$columns, ~.x$null_count)) + if(na_count > 0){ + stop("missing values in object") + } + x + +} + +#' @export +na.omit.ArrowTabular <- function(x){ + + na_expr <- paste0("!is.na(", names(x), ")", collapse = ",") + filter_expr <- paste0("dplyr::filter(x,", na_expr, ")") + expression <- rlang::parse_expr(filter_expr) + + rlang::eval_tidy(expression) + +} + +#' @export +na.exclude.ArrowTabular <- na.omit.ArrowTabular + ToString_tabular <- function(x, ...) { # Generic to work with both RecordBatch and Table sch <- unlist(strsplit(x$schema$ToString(), "\n")) From c42a91e4adf78713ef767862922a2955884d652c Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 16:06:54 +0100 Subject: [PATCH 10/26] Remove redundant na.pass tests, and swap out expect_data_frame for expect_equivalent as failing due to attribute mismatch --- r/tests/testthat/test-na-omit.R | 47 ++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R index f929d954c5d..d537455e88a 100644 --- a/r/tests/testthat/test-na-omit.R +++ b/r/tests/testthat/test-na-omit.R @@ -1,8 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + data_no_na <- c(2:10) data_na <- c(NA_real_, data_no_na) scalar_na <- Scalar$create(NA) scalar_one <- Scalar$create(1) tbl <- Table$create(example_data) +batch <- record_batch(example_data) test_that("na.omit on Array and ChunkedArray", { expect_vector_equal(na.omit(input), data_no_na) @@ -19,33 +37,32 @@ test_that("na.fail on Array and ChunkedArray", { expect_vector_error(na.fail(input), data_na) }) -test_that("na.pass on Array and ChunkedArray", { - expect_vector_equivalent(na.pass(input), data_no_na) - expect_vector_equal(na.pass(input), data_na) -}) - test_that("na.fail on Scalar", { expect_error(na.fail(scalar_na), regexp = "missing values in object") expect_vector(na.fail(scalar_one), na.fail(1)) }) -test_that("na.pass on Scalar", { - expect_vector(na.pass(scalar_na), na.pass(NA)) - expect_vector(na.pass(scalar_one), na.pass(1)) -}) - test_that("na.omit on Table", { - expect_data_frame(na.omit(tbl), na.omit(example_data)) + expect_equivalent(as.data.frame(na.omit(tbl)), na.omit(example_data)) }) test_that("na.exclude on Table", { - expect_data_frame(na.exclude(tbl), na.exclude(example_data)) + expect_equivalent(as.data.frame(na.exclude(tbl)), na.exclude(example_data)) }) test_that("na.fail on Table", { - expect_data_frame(na.fail(tbl), na.fail(example_data)) + expect_error(na.fail(tbl), "missing values in object") }) -test_that("na.pass on Table", { - expect_data_frame(na.pass(tbl), na.pass(example_data)) +test_that("na.omit on RecordBatch", { + expect_equivalent(as.data.frame(na.omit(batch)), na.omit(example_data)) }) + +test_that("na.exclude on RecordBatch", { + expect_equivalent(as.data.frame(na.exclude(batch)), na.omit(example_data)) +}) + +test_that("na.fail on RecordBatch", { + expect_error(na.fail(batch), "missing values in object") +}) + From f0716bdd6fa67417f9cccbd5740088417372f549 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 16:07:57 +0100 Subject: [PATCH 11/26] Update NAMESPACE --- r/NAMESPACE | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index e0eed13394a..1dda6e82b4a 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -62,6 +62,12 @@ S3method(max,ArrowDatum) S3method(mean,ArrowDatum) S3method(median,ArrowDatum) S3method(min,ArrowDatum) +S3method(na.exclude,ArrowDatum) +S3method(na.exclude,ArrowTabular) +S3method(na.fail,ArrowDatum) +S3method(na.fail,ArrowTabular) +S3method(na.omit,ArrowDatum) +S3method(na.omit,ArrowTabular) S3method(names,Dataset) S3method(names,FeatherReader) S3method(names,RecordBatch) From 3786af408f82662b6b6376e13941c49983f3223a Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 16:30:32 +0100 Subject: [PATCH 12/26] Update function arguments to match generics --- r/R/arrow-datum.R | 4 ++-- r/R/arrow-tabular.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 5cc9cec87f7..5ddc5f03361 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -47,7 +47,7 @@ as.vector.ArrowDatum <- function(x, mode) { } #'@export -na.omit.ArrowDatum <- function(x){ +na.omit.ArrowDatum <- function(x, ...){ x$Filter(!is.na(x)) } @@ -55,7 +55,7 @@ na.omit.ArrowDatum <- function(x){ na.exclude.ArrowDatum <- na.omit.ArrowDatum #'@export -na.fail.ArrowDatum <- function(x){ +na.fail.ArrowDatum <- function(x, ...){ if(x$null_count > 0){ stop("missing values in object") } diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index 1a980a75dfd..cf25b95dff2 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -212,7 +212,7 @@ head.ArrowTabular <- head.ArrowDatum tail.ArrowTabular <- tail.ArrowDatum #' @export -na.fail.ArrowTabular <- function(x){ +na.fail.ArrowTabular <- function(x, ...){ na_count <- sum(purrr::map_int(x$columns, ~.x$null_count)) if(na_count > 0){ @@ -223,7 +223,7 @@ na.fail.ArrowTabular <- function(x){ } #' @export -na.omit.ArrowTabular <- function(x){ +na.omit.ArrowTabular <- function(x, ...){ na_expr <- paste0("!is.na(", names(x), ")", collapse = ",") filter_expr <- paste0("dplyr::filter(x,", na_expr, ")") From 1b5f4f57cd3953d8030828d229cc26e10d0852ff Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 16:34:13 +0100 Subject: [PATCH 13/26] Add in test for na.fail on Scalar --- r/tests/testthat/test-na-omit.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R index d537455e88a..8aa9074a8f3 100644 --- a/r/tests/testthat/test-na-omit.R +++ b/r/tests/testthat/test-na-omit.R @@ -22,6 +22,11 @@ scalar_one <- Scalar$create(1) tbl <- Table$create(example_data) batch <- record_batch(example_data) +test_that("na.fail on Scalar", { + expect_vector(na.fail(scalar_one), 1) + expect_error(na.fail(scalar_na), "missing values in object") +}) + test_that("na.omit on Array and ChunkedArray", { expect_vector_equal(na.omit(input), data_no_na) expect_vector_equivalent(na.omit(input), data_na) From 5c25e51bc39c675a2d9d06dcbff66498b569294b Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 16 Apr 2021 17:03:06 +0100 Subject: [PATCH 14/26] Update function arguments to match generics --- r/R/arrow-datum.R | 10 +++++----- r/R/arrow-tabular.R | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 5ddc5f03361..0adb1c97d4f 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -47,19 +47,19 @@ as.vector.ArrowDatum <- function(x, mode) { } #'@export -na.omit.ArrowDatum <- function(x, ...){ - x$Filter(!is.na(x)) +na.omit.ArrowDatum <- function(object, ...){ + object$Filter(!is.na(object)) } #'@export na.exclude.ArrowDatum <- na.omit.ArrowDatum #'@export -na.fail.ArrowDatum <- function(x, ...){ - if(x$null_count > 0){ +na.fail.ArrowDatum <- function(object, ...){ + if(object$null_count > 0){ stop("missing values in object") } - x + object } filter_rows <- function(x, i, keep_na = TRUE, ...) { diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index cf25b95dff2..a4042b3c0c7 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -212,21 +212,21 @@ head.ArrowTabular <- head.ArrowDatum tail.ArrowTabular <- tail.ArrowDatum #' @export -na.fail.ArrowTabular <- function(x, ...){ +na.fail.ArrowTabular <- function(object, ...){ - na_count <- sum(purrr::map_int(x$columns, ~.x$null_count)) + na_count <- sum(purrr::map_int(object$columns, ~.x$null_count)) if(na_count > 0){ stop("missing values in object") } - x + object } #' @export -na.omit.ArrowTabular <- function(x, ...){ +na.omit.ArrowTabular <- function(object, ...){ - na_expr <- paste0("!is.na(", names(x), ")", collapse = ",") - filter_expr <- paste0("dplyr::filter(x,", na_expr, ")") + na_expr <- paste0("!is.na(", names(object), ")", collapse = ",") + filter_expr <- paste0("dplyr::filter(object,", na_expr, ")") expression <- rlang::parse_expr(filter_expr) rlang::eval_tidy(expression) From 9ad23dc6b637c0e41d84e3f6c00994c002e02522 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 09:41:01 +0100 Subject: [PATCH 15/26] Fix spacing in arrow-datum --- r/R/arrow-datum.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 0adb1c97d4f..94ccb89d4e4 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -46,15 +46,15 @@ as.vector.ArrowDatum <- function(x, mode) { ) } -#'@export +#' @export na.omit.ArrowDatum <- function(object, ...){ object$Filter(!is.na(object)) } -#'@export +#' @export na.exclude.ArrowDatum <- na.omit.ArrowDatum -#'@export +#' @export na.fail.ArrowDatum <- function(object, ...){ if(object$null_count > 0){ stop("missing values in object") From c0d9f3040848381f6492f7fb64652cd72f37ae7d Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 09:41:44 +0100 Subject: [PATCH 16/26] Remove unnecessary spacing --- r/R/arrow-tabular.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index a4042b3c0c7..dd68897c09c 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -219,7 +219,6 @@ na.fail.ArrowTabular <- function(object, ...){ stop("missing values in object") } object - } #' @export @@ -230,7 +229,6 @@ na.omit.ArrowTabular <- function(object, ...){ expression <- rlang::parse_expr(filter_expr) rlang::eval_tidy(expression) - } #' @export From ffa99325915f4494cc6ffece4a2d6d389a4730ec Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 09:42:12 +0100 Subject: [PATCH 17/26] More spacing --- r/R/arrow-tabular.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index dd68897c09c..bfaf53d118b 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -213,7 +213,6 @@ tail.ArrowTabular <- tail.ArrowDatum #' @export na.fail.ArrowTabular <- function(object, ...){ - na_count <- sum(purrr::map_int(object$columns, ~.x$null_count)) if(na_count > 0){ stop("missing values in object") @@ -223,7 +222,6 @@ na.fail.ArrowTabular <- function(object, ...){ #' @export na.omit.ArrowTabular <- function(object, ...){ - na_expr <- paste0("!is.na(", names(object), ")", collapse = ",") filter_expr <- paste0("dplyr::filter(object,", na_expr, ")") expression <- rlang::parse_expr(filter_expr) From 5b8d300656c6f2ad230df449f9afe77399adc42e Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 10:36:33 +0100 Subject: [PATCH 18/26] Update expect_vector_equal have have ignore_attr argument and expect_vector to use this arg --- r/tests/testthat/helper-expectation.R | 48 ++++----------------------- r/tests/testthat/test-na-omit.R | 6 ++-- 2 files changed, 9 insertions(+), 45 deletions(-) diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R index 0dae2d4449f..5c0925e98ca 100644 --- a/r/tests/testthat/helper-expectation.R +++ b/r/tests/testthat/helper-expectation.R @@ -15,8 +15,9 @@ # specific language governing permissions and limitations # under the License. -expect_vector <- function(x, y, ...) { - expect_equal(as.vector(x), y, ...) +expect_vector <- function(x, y, ignore_attr = FALSE, ...) { + expect_fun <- ifelse(ignore_attr, expect_equivalent, expect_equal) + expect_fun(as.vector(x), y, ...) } expect_data_frame <- function(x, y, ...) { @@ -155,6 +156,7 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in vec, # A vector as reference, will make Array/ChunkedArray with skip_array = NULL, # Msg, if should skip Array test skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test + ignore_attr = FALSE, # ignore attributes? ...) { expr <- rlang::enquo(expr) expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))) @@ -165,7 +167,7 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in expr, rlang::new_data_mask(rlang::env(input = Array$create(vec))) ) - expect_vector(via_array, expected, ...) + expect_vector(via_array, expected, ignore_attr, ...) } else { skip_msg <- c(skip_msg, skip_array) } @@ -178,7 +180,7 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in expr, rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]]))) ) - expect_vector(via_chunked, expected, ...) + expect_vector(via_chunked, expected, ignore_attr, ...) } else { skip_msg <- c(skip_msg, skip_chunked_array) } @@ -188,44 +190,6 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in } } -expect_vector_equivalent <- function(expr, # A vectorized R expression containing `input` as its input - vec, # A vector as reference, will make Array/ChunkedArray with - skip_array = NULL, # Msg, if should skip Array test - skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test - ...) { - expr <- rlang::enquo(expr) - expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))) - skip_msg <- NULL - - if (is.null(skip_array)) { - via_array <- rlang::eval_tidy( - expr, - rlang::new_data_mask(rlang::env(input = Array$create(vec))) - ) - - expect_equivalent(as.vector(via_array), expected, ...) - } else { - skip_msg <- c(skip_msg, skip_array) - } - - if (is.null(skip_chunked_array)) { - # split input vector into two to exercise ChunkedArray with >1 chunk - split_vector <- split_vector_as_list(vec) - - via_chunked <- rlang::eval_tidy( - expr, - rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]]))) - ) - expect_equivalent(as.vector(via_chunked), expected, ...) - } else { - skip_msg <- c(skip_msg, skip_chunked_array) - } - - if (!is.null(skip_msg)) { - skip(paste(skip_msg, collpase = "\n")) - } -} - expect_vector_error <- function(expr, # A vectorized R expression containing `input` as its input vec, # A vector as reference, will make Array/ChunkedArray with skip_array = NULL, # Msg, if should skip Array test diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R index 8aa9074a8f3..bbdaae5f511 100644 --- a/r/tests/testthat/test-na-omit.R +++ b/r/tests/testthat/test-na-omit.R @@ -29,16 +29,16 @@ test_that("na.fail on Scalar", { test_that("na.omit on Array and ChunkedArray", { expect_vector_equal(na.omit(input), data_no_na) - expect_vector_equivalent(na.omit(input), data_na) + expect_vector_equal(na.omit(input), data_na, ignore_attr=TRUE) }) test_that("na.exclude on Array and ChunkedArray", { expect_vector_equal(na.exclude(input), data_no_na) - expect_vector_equivalent(na.exclude(input), data_na) + expect_vector_equal(na.exclude(input), data_na, ignore_attr=TRUE) }) test_that("na.fail on Array and ChunkedArray", { - expect_vector_equivalent(na.fail(input), data_no_na) + expect_vector_equal(na.fail(input), data_no_na, ignore_attr=TRUE) expect_vector_error(na.fail(input), data_na) }) From 6c0a699c9c30f735e475deae09810cb1497d0780 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 10:38:45 +0100 Subject: [PATCH 19/26] Reorder vector with NA at end --- r/tests/testthat/test-na-omit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R index bbdaae5f511..6b527b0c2cd 100644 --- a/r/tests/testthat/test-na-omit.R +++ b/r/tests/testthat/test-na-omit.R @@ -16,7 +16,7 @@ # under the License. data_no_na <- c(2:10) -data_na <- c(NA_real_, data_no_na) +data_na <- c(data_no_na, NA_real_) scalar_na <- Scalar$create(NA) scalar_one <- Scalar$create(1) tbl <- Table$create(example_data) From 351bdedc2986770cd609e052420b79d7ab0e0b52 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 11:41:31 +0100 Subject: [PATCH 20/26] Refactor na.omit to exclude dplyr use --- r/R/arrow-tabular.R | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index bfaf53d118b..421b5efb594 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -222,11 +222,17 @@ na.fail.ArrowTabular <- function(object, ...){ #' @export na.omit.ArrowTabular <- function(object, ...){ - na_expr <- paste0("!is.na(", names(object), ")", collapse = ",") - filter_expr <- paste0("dplyr::filter(object,", na_expr, ")") - expression <- rlang::parse_expr(filter_expr) - rlang::eval_tidy(expression) + filter_text = paste0( + ".data$Filter(", + paste0("!is.na(.data$", names(object), ")", collapse = " & "), + ")" + ) + + filter = rlang::parse_expr(filter_text) + + rlang::eval_tidy(filter, rlang::new_data_mask(rlang::env(.data = object))) + } #' @export From 09844a6cf2dcd66c2d273a7c48e07c3379ff3949 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 11:43:24 +0100 Subject: [PATCH 21/26] Remove unnecessary whitespace --- r/R/arrow-tabular.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index 421b5efb594..674df4d135f 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -222,17 +222,13 @@ na.fail.ArrowTabular <- function(object, ...){ #' @export na.omit.ArrowTabular <- function(object, ...){ - filter_text = paste0( ".data$Filter(", paste0("!is.na(.data$", names(object), ")", collapse = " & "), ")" ) - filter = rlang::parse_expr(filter_text) - rlang::eval_tidy(filter, rlang::new_data_mask(rlang::env(.data = object))) - } #' @export From 2712b307c8692978eac506942440614d807196fb Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 20 Apr 2021 16:09:11 +0100 Subject: [PATCH 22/26] Update na.omit to use purrr::map and purrr::reduce instead of doing freaky evaluation things --- r/R/arrow-tabular.R | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index 674df4d135f..81b58b90726 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -222,13 +222,9 @@ na.fail.ArrowTabular <- function(object, ...){ #' @export na.omit.ArrowTabular <- function(object, ...){ - filter_text = paste0( - ".data$Filter(", - paste0("!is.na(.data$", names(object), ")", collapse = " & "), - ")" - ) - filter = rlang::parse_expr(filter_text) - rlang::eval_tidy(filter, rlang::new_data_mask(rlang::env(.data = object))) + not_na <- purrr::map(object$columns, ~!is.na(.x)) + not_na_agg <- purrr::reduce(not_na, `&`) + object$Filter(not_na_agg) } #' @export From 8451d3dfdf9659b45e2e3006a3396faaa99bc6f3 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 21 Apr 2021 15:45:38 +0100 Subject: [PATCH 23/26] Rename expect_vector to expect_as_vector --- r/tests/testthat/helper-expectation.R | 6 ++-- r/tests/testthat/test-Array.R | 44 +++++++++++++-------------- r/tests/testthat/test-RecordBatch.R | 22 +++++++------- r/tests/testthat/test-Table.R | 24 +++++++-------- r/tests/testthat/test-chunked-array.R | 38 +++++++++++------------ r/tests/testthat/test-compute-arith.R | 2 +- r/tests/testthat/test-na-omit.R | 4 +-- 7 files changed, 70 insertions(+), 70 deletions(-) diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R index 5c0925e98ca..595b183e555 100644 --- a/r/tests/testthat/helper-expectation.R +++ b/r/tests/testthat/helper-expectation.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -expect_vector <- function(x, y, ignore_attr = FALSE, ...) { +expect_as_vector <- function(x, y, ignore_attr = FALSE, ...) { expect_fun <- ifelse(ignore_attr, expect_equivalent, expect_equal) expect_fun(as.vector(x), y, ...) } @@ -167,7 +167,7 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in expr, rlang::new_data_mask(rlang::env(input = Array$create(vec))) ) - expect_vector(via_array, expected, ignore_attr, ...) + expect_as_vector(via_array, expected, ignore_attr, ...) } else { skip_msg <- c(skip_msg, skip_array) } @@ -180,7 +180,7 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in expr, rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]]))) ) - expect_vector(via_chunked, expected, ignore_attr, ...) + expect_as_vector(via_chunked, expected, ignore_attr, ...) } else { skip_msg <- c(skip_msg, skip_chunked_array) } diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index b4fa8296d3a..e064f81cdfa 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -94,13 +94,13 @@ test_that("Slice() and RangeEquals()", { y <- x$Slice(10) expect_equal(y$type, int32()) expect_equal(length(y), 15L) - expect_vector(y, c(101:110, 201:205)) + expect_as_vector(y, c(101:110, 201:205)) expect_true(x$RangeEquals(y, 10, 24)) expect_false(x$RangeEquals(y, 9, 23)) expect_false(x$RangeEquals(y, 11, 24)) z <- x$Slice(10, 5) - expect_vector(z, c(101:105)) + expect_as_vector(z, c(101:105)) expect_true(x$RangeEquals(z, 10, 15, 0)) # Input validation @@ -708,12 +708,12 @@ test_that("Array$Take()", { test_that("[ method on Array", { vec <- 11:20 a <- Array$create(vec) - expect_vector(a[5:9], vec[5:9]) - expect_vector(a[c(9, 3, 5)], vec[c(9, 3, 5)]) - expect_vector(a[rep(c(TRUE, FALSE), 5)], vec[c(1, 3, 5, 7, 9)]) - expect_vector(a[rep(c(TRUE, FALSE, NA, FALSE, TRUE), 2)], c(11, NA, 15, 16, NA, 20)) - expect_vector(a[-4], vec[-4]) - expect_vector(a[-1], vec[-1]) + expect_as_vector(a[5:9], vec[5:9]) + expect_as_vector(a[c(9, 3, 5)], vec[c(9, 3, 5)]) + expect_as_vector(a[rep(c(TRUE, FALSE), 5)], vec[c(1, 3, 5, 7, 9)]) + expect_as_vector(a[rep(c(TRUE, FALSE, NA, FALSE, TRUE), 2)], c(11, NA, 15, 16, NA, 20)) + expect_as_vector(a[-4], vec[-4]) + expect_as_vector(a[-1], vec[-1]) }) test_that("[ accepts Arrays and otherwise handles bad input", { @@ -724,12 +724,12 @@ test_that("[ accepts Arrays and otherwise handles bad input", { a[Array$create(ind)], "Cannot extract rows with an Array of type double" ) - expect_vector(a[Array$create(ind - 1, type = int8())], vec[ind]) - expect_vector(a[Array$create(ind - 1, type = uint8())], vec[ind]) - expect_vector(a[ChunkedArray$create(8, 2, 4, type = uint8())], vec[ind]) + expect_as_vector(a[Array$create(ind - 1, type = int8())], vec[ind]) + expect_as_vector(a[Array$create(ind - 1, type = uint8())], vec[ind]) + expect_as_vector(a[ChunkedArray$create(8, 2, 4, type = uint8())], vec[ind]) filt <- seq_along(vec) %in% ind - expect_vector(a[Array$create(filt)], vec[filt]) + expect_as_vector(a[Array$create(filt)], vec[filt]) expect_error( a["string"], @@ -754,21 +754,21 @@ test_that("[ accepts Expressions", { vec <- 11:20 a <- Array$create(vec) b <- Array$create(1:10) - expect_vector(a[b > 4], vec[5:10]) + expect_as_vector(a[b > 4], vec[5:10]) }) test_that("Array head/tail", { vec <- 11:20 a <- Array$create(vec) - expect_vector(head(a), head(vec)) - expect_vector(head(a, 4), head(vec, 4)) - expect_vector(head(a, 40), head(vec, 40)) - expect_vector(head(a, -4), head(vec, -4)) - expect_vector(head(a, -40), head(vec, -40)) - expect_vector(tail(a), tail(vec)) - expect_vector(tail(a, 4), tail(vec, 4)) - expect_vector(tail(a, 40), tail(vec, 40)) - expect_vector(tail(a, -40), tail(vec, -40)) + expect_as_vector(head(a), head(vec)) + expect_as_vector(head(a, 4), head(vec, 4)) + expect_as_vector(head(a, 40), head(vec, 40)) + expect_as_vector(head(a, -4), head(vec, -4)) + expect_as_vector(head(a, -40), head(vec, -40)) + expect_as_vector(tail(a), tail(vec)) + expect_as_vector(tail(a, 4), tail(vec, 4)) + expect_as_vector(tail(a, 40), tail(vec, 40)) + expect_as_vector(tail(a, -40), tail(vec, -40)) }) test_that("Dictionary array: create from arrays, not factor", { diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index ff7f17eca6e..c3797914741 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -155,9 +155,9 @@ test_that("[ on RecordBatch", { }) test_that("[[ and $ on RecordBatch", { - expect_vector(batch[["int"]], tbl$int) - expect_vector(batch$int, tbl$int) - expect_vector(batch[[4]], tbl$chr) + expect_as_vector(batch[["int"]], tbl$int) + expect_as_vector(batch$int, tbl$int) + expect_as_vector(batch[[4]], tbl$chr) expect_null(batch$qwerty) expect_null(batch[["asdf"]]) expect_error(batch[[c(4, 3)]]) @@ -190,16 +190,16 @@ test_that("[[<- assignment", { # can replace a column by index batch[[2]] <- as.numeric(10:1) - expect_vector(batch[[2]], as.numeric(10:1)) + expect_as_vector(batch[[2]], as.numeric(10:1)) # can add a column by index batch[[5]] <- as.numeric(10:1) - expect_vector(batch[[5]], as.numeric(10:1)) - expect_vector(batch[["5"]], as.numeric(10:1)) + expect_as_vector(batch[[5]], as.numeric(10:1)) + expect_as_vector(batch[["5"]], as.numeric(10:1)) # can replace a column batch[["int"]] <- 10:1 - expect_vector(batch[["int"]], 10:1) + expect_as_vector(batch[["int"]], 10:1) # can use $ batch$new <- NULL @@ -207,11 +207,11 @@ test_that("[[<- assignment", { expect_identical(dim(batch), c(10L, 4L)) batch$int <- 1:10 - expect_vector(batch$int, 1:10) + expect_as_vector(batch$int, 1:10) # recycling batch[["atom"]] <- 1L - expect_vector(batch[["atom"]], rep(1L, 10)) + expect_as_vector(batch[["atom"]], rep(1L, 10)) expect_error( batch[["atom"]] <- 1:6, @@ -221,7 +221,7 @@ test_that("[[<- assignment", { # assign Arrow array array <- Array$create(c(10:1)) batch$array <- array - expect_vector(batch$array, 10:1) + expect_as_vector(batch$array, 10:1) # nonsense indexes expect_error(batch[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical") @@ -498,4 +498,4 @@ test_that("Handling string data with embedded nuls", { fixed = TRUE ) }) -}) \ No newline at end of file +}) diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index 86bda393e2d..3788d416426 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -105,7 +105,7 @@ test_that("[, [[, $ for Table", { expect_data_frame(tab[6:7,], tbl[6:7,]) expect_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4]) expect_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)]) - expect_vector(tab[, "chr", drop = TRUE], tbl$chr) + expect_as_vector(tab[, "chr", drop = TRUE], tbl$chr) # Take within a single chunk expect_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4]) expect_data_frame(tab[rep(c(FALSE, TRUE), 5),], tbl[c(2, 4, 6, 8, 10),]) @@ -123,9 +123,9 @@ test_that("[, [[, $ for Table", { # Expression expect_data_frame(tab[tab$int > 6,], tbl[tbl$int > 6,]) - expect_vector(tab[["int"]], tbl$int) - expect_vector(tab$int, tbl$int) - expect_vector(tab[[4]], tbl$chr) + expect_as_vector(tab[["int"]], tbl$int) + expect_as_vector(tab$int, tbl$int) + expect_as_vector(tab[[4]], tbl$chr) expect_null(tab$qwerty) expect_null(tab[["asdf"]]) # List-like column slicing @@ -173,16 +173,16 @@ test_that("[[<- assignment", { # can replace a column by index tab[[2]] <- as.numeric(10:1) - expect_vector(tab[[2]], as.numeric(10:1)) + expect_as_vector(tab[[2]], as.numeric(10:1)) # can add a column by index tab[[5]] <- as.numeric(10:1) - expect_vector(tab[[5]], as.numeric(10:1)) - expect_vector(tab[["5"]], as.numeric(10:1)) + expect_as_vector(tab[[5]], as.numeric(10:1)) + expect_as_vector(tab[["5"]], as.numeric(10:1)) # can replace a column tab[["int"]] <- 10:1 - expect_vector(tab[["int"]], 10:1) + expect_as_vector(tab[["int"]], 10:1) # can use $ tab$new <- NULL @@ -190,11 +190,11 @@ test_that("[[<- assignment", { expect_identical(dim(tab), c(10L, 4L)) tab$int <- 1:10 - expect_vector(tab$int, 1:10) + expect_as_vector(tab$int, 1:10) # recycling tab[["atom"]] <- 1L - expect_vector(tab[["atom"]], rep(1L, 10)) + expect_as_vector(tab[["atom"]], rep(1L, 10)) expect_error( tab[["atom"]] <- 1:6, @@ -204,10 +204,10 @@ test_that("[[<- assignment", { # assign Arrow array and chunked_array array <- Array$create(c(10:1)) tab$array <- array - expect_vector(tab$array, 10:1) + expect_as_vector(tab$array, 10:1) tab$chunked <- chunked_array(1:10) - expect_vector(tab$chunked, 1:10) + expect_as_vector(tab$chunked, 1:10) # nonsense indexes expect_error(tab[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical") diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index e72067a6d5f..f5b2dca2e44 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -312,30 +312,30 @@ test_that("[ ChunkedArray", { one_chunk <- chunked_array(2:11) x <- chunked_array(1:10, 31:40, 51:55) # Slice - expect_vector(x[8:12], c(8:10, 31:32)) + expect_as_vector(x[8:12], c(8:10, 31:32)) # Take from same chunk - expect_vector(x[c(11, 15, 12)], c(31, 35, 32)) + expect_as_vector(x[c(11, 15, 12)], c(31, 35, 32)) # Take from multiple chunks (calls Concatenate) - expect_vector(x[c(2, 11, 15, 12, 3)], c(2, 31, 35, 32, 3)) + expect_as_vector(x[c(2, 11, 15, 12, 3)], c(2, 31, 35, 32, 3)) # Take with Array (note these are 0-based) take1 <- Array$create(c(10L, 14L, 11L)) - expect_vector(x[take1], c(31, 35, 32)) + expect_as_vector(x[take1], c(31, 35, 32)) # Take with ChunkedArray take2 <- ChunkedArray$create(c(10L, 14L), 11L) - expect_vector(x[take2], c(31, 35, 32)) + expect_as_vector(x[take2], c(31, 35, 32)) # Filter (with recycling) - expect_vector( + expect_as_vector( one_chunk[c(FALSE, TRUE, FALSE, FALSE, TRUE)], c(3, 6, 8, 11) ) # Filter where both are 1-chunk - expect_vector( + expect_as_vector( one_chunk[ChunkedArray$create(rep(c(FALSE, TRUE, FALSE, FALSE, TRUE), 2))], c(3, 6, 8, 11) ) # Filter multi-chunk with logical (-> Array) - expect_vector( + expect_as_vector( x[c(FALSE, TRUE, FALSE, FALSE, TRUE)], c(2, 5, 7, 10, 32, 35, 37, 40, 52, 55) ) @@ -343,7 +343,7 @@ test_that("[ ChunkedArray", { p1 <- c(FALSE, TRUE, FALSE, FALSE, TRUE) p2 <- c(TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE) filt <- ChunkedArray$create(p1, p2, p2) - expect_vector( + expect_as_vector( x[filt], c(2, 5, 6, 8, 9, 35, 36, 38, 39, 55) ) @@ -352,15 +352,15 @@ test_that("[ ChunkedArray", { test_that("ChunkedArray head/tail", { vec <- 11:20 a <- ChunkedArray$create(11:15, 16:20) - expect_vector(head(a), head(vec)) - expect_vector(head(a, 4), head(vec, 4)) - expect_vector(head(a, 40), head(vec, 40)) - expect_vector(head(a, -4), head(vec, -4)) - expect_vector(head(a, -40), head(vec, -40)) - expect_vector(tail(a), tail(vec)) - expect_vector(tail(a, 4), tail(vec, 4)) - expect_vector(tail(a, 40), tail(vec, 40)) - expect_vector(tail(a, -40), tail(vec, -40)) + expect_as_vector(head(a), head(vec)) + expect_as_vector(head(a, 4), head(vec, 4)) + expect_as_vector(head(a, 40), head(vec, 40)) + expect_as_vector(head(a, -4), head(vec, -4)) + expect_as_vector(head(a, -40), head(vec, -40)) + expect_as_vector(tail(a), tail(vec)) + expect_as_vector(tail(a, 4), tail(vec, 4)) + expect_as_vector(tail(a, 40), tail(vec, 40)) + expect_as_vector(tail(a, -40), tail(vec, -40)) }) test_that("ChunkedArray$Equals", { @@ -410,4 +410,4 @@ test_that("Handling string data with embedded nuls", { fixed = TRUE ) }) -}) \ No newline at end of file +}) diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R index 9d146fd04e6..0b6d8e8dd17 100644 --- a/r/tests/testthat/test-compute-arith.R +++ b/r/tests/testthat/test-compute-arith.R @@ -23,7 +23,7 @@ test_that("Addition", { expect_equal(a + 4L, Array$create(c(5:8, NA_integer_))) expect_identical(as.vector(a + 4L), c(5:8, NA_integer_)) expect_equal(a + 4L, Array$create(c(5:8, NA_integer_))) - expect_vector(a + 4L, c(5:8, NA_integer_)) + expect_as_vector(a + 4L, c(5:8, NA_integer_)) expect_equal(a + NA_integer_, Array$create(rep(NA_integer_, 5))) a8 <- a$cast(int8()) diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R index 6b527b0c2cd..fd1372fdc5d 100644 --- a/r/tests/testthat/test-na-omit.R +++ b/r/tests/testthat/test-na-omit.R @@ -23,7 +23,7 @@ tbl <- Table$create(example_data) batch <- record_batch(example_data) test_that("na.fail on Scalar", { - expect_vector(na.fail(scalar_one), 1) + expect_as_vector(na.fail(scalar_one), 1) expect_error(na.fail(scalar_na), "missing values in object") }) @@ -44,7 +44,7 @@ test_that("na.fail on Array and ChunkedArray", { test_that("na.fail on Scalar", { expect_error(na.fail(scalar_na), regexp = "missing values in object") - expect_vector(na.fail(scalar_one), na.fail(1)) + expect_as_vector(na.fail(scalar_one), na.fail(1)) }) test_that("na.omit on Table", { From 9d6ada9bdc28d6f6ec22c76cc91e97a18dfb08c8 Mon Sep 17 00:00:00 2001 From: Nic Date: Wed, 21 Apr 2021 15:49:05 +0100 Subject: [PATCH 24/26] Refactor NA count to for loop for efficiency Co-authored-by: Neal Richardson --- r/R/arrow-tabular.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index 81b58b90726..d063fd281e2 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -213,9 +213,10 @@ tail.ArrowTabular <- tail.ArrowDatum #' @export na.fail.ArrowTabular <- function(object, ...){ - na_count <- sum(purrr::map_int(object$columns, ~.x$null_count)) - if(na_count > 0){ - stop("missing values in object") + for (col in seq_len(object$num_columns)) { + if (object$column(col - 1L)$null_count > 0) { + stop("missing values in object", call. = FALSE) + } } object } From f11328495a8789ab4b5cca3c4dbe9c547c81bd92 Mon Sep 17 00:00:00 2001 From: Nic Date: Wed, 21 Apr 2021 16:14:24 +0100 Subject: [PATCH 25/26] Update na.omit to use a single array expression Co-authored-by: Neal Richardson --- r/R/arrow-tabular.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index d063fd281e2..bba5ad5f5e6 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -223,9 +223,9 @@ na.fail.ArrowTabular <- function(object, ...){ #' @export na.omit.ArrowTabular <- function(object, ...){ - not_na <- purrr::map(object$columns, ~!is.na(.x)) - not_na_agg <- purrr::reduce(not_na, `&`) - object$Filter(not_na_agg) + not_na <- map(object$columns, ~build_array_expression("is_valid", .x)) + not_na_agg <- Reduce("&", not_na) + object$Filter(eval_array_expression(not_na_agg)) } #' @export From ed5236560d5fad3e254e67e85fcba6bdb81fd50a Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Wed, 21 Apr 2021 13:00:22 -0700 Subject: [PATCH 26/26] Update r/R/arrow-datum.R --- r/R/arrow-datum.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 94ccb89d4e4..4edcb200ea0 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -56,8 +56,8 @@ na.exclude.ArrowDatum <- na.omit.ArrowDatum #' @export na.fail.ArrowDatum <- function(object, ...){ - if(object$null_count > 0){ - stop("missing values in object") + if (object$null_count > 0) { + stop("missing values in object", call. = FALSE) } object } @@ -179,4 +179,3 @@ sort.ArrowDatum <- function(x, decreasing = FALSE, na.last = NA, ...) { tbl$x$Take(tbl$SortIndices(names = c("is_na", "x"), descending = c(TRUE, decreasing))) } } -