Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
46775b4
Import na.* functions from stats
thisisnic Apr 15, 2021
f826a24
Import na.* functions from stats in namespace
thisisnic Apr 15, 2021
88de5c5
Add tests for na.omit, na.exclude, na.fail, and na.pass for ChunkedAr…
thisisnic Apr 15, 2021
6fc0d5d
Add na.omit, na.exclude, na.pass, and na.fail functions for ArrowDatu…
thisisnic Apr 16, 2021
c180cc8
Add expect_vector_error and expect_vector_equivalent functions for te…
thisisnic Apr 16, 2021
ec75960
Remove individual tests for na.* functions in favour of using expect_…
thisisnic Apr 16, 2021
74fb6f7
Add tests for Table object and na.* functions
thisisnic Apr 16, 2021
275e19d
Remove duplicate identical function and remove na.pass
thisisnic Apr 16, 2021
fb22de4
Implement na.fail, na.omit, and na.exclude for ArrowTabular objects
thisisnic Apr 16, 2021
c42a91e
Remove redundant na.pass tests, and swap out expect_data_frame for ex…
thisisnic Apr 16, 2021
f0716bd
Update NAMESPACE
thisisnic Apr 16, 2021
3786af4
Update function arguments to match generics
thisisnic Apr 16, 2021
1b5f4f5
Add in test for na.fail on Scalar
thisisnic Apr 16, 2021
5c25e51
Update function arguments to match generics
thisisnic Apr 16, 2021
9ad23dc
Fix spacing in arrow-datum
thisisnic Apr 20, 2021
c0d9f30
Remove unnecessary spacing
thisisnic Apr 20, 2021
ffa9932
More spacing
thisisnic Apr 20, 2021
5b8d300
Update expect_vector_equal have have ignore_attr argument and expect_…
thisisnic Apr 20, 2021
6c0a699
Reorder vector with NA at end
thisisnic Apr 20, 2021
351bded
Refactor na.omit to exclude dplyr use
thisisnic Apr 20, 2021
09844a6
Remove unnecessary whitespace
thisisnic Apr 20, 2021
2712b30
Update na.omit to use purrr::map and purrr::reduce instead of doing f…
thisisnic Apr 20, 2021
8451d3d
Rename expect_vector to expect_as_vector
thisisnic Apr 21, 2021
9d6ada9
Refactor NA count to for loop for efficiency
thisisnic Apr 21, 2021
f113284
Update na.omit to use a single array expression
thisisnic Apr 21, 2021
ed52365
Update r/R/arrow-datum.R
nealrichardson Apr 21, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ S3method(max,ArrowDatum)
S3method(mean,ArrowDatum)
S3method(median,ArrowDatum)
S3method(min,ArrowDatum)
S3method(na.exclude,ArrowDatum)
S3method(na.exclude,ArrowTabular)
S3method(na.fail,ArrowDatum)
S3method(na.fail,ArrowTabular)
S3method(na.omit,ArrowDatum)
S3method(na.omit,ArrowTabular)
S3method(names,Dataset)
S3method(names,FeatherReader)
S3method(names,RecordBatch)
Expand Down Expand Up @@ -318,6 +324,10 @@ importFrom(rlang,set_names)
importFrom(rlang,syms)
importFrom(rlang,warn)
importFrom(stats,median)
importFrom(stats,na.exclude)
importFrom(stats,na.fail)
importFrom(stats,na.omit)
importFrom(stats,na.pass)
importFrom(stats,quantile)
importFrom(tidyselect,contains)
importFrom(tidyselect,ends_with)
Expand Down
16 changes: 16 additions & 0 deletions r/R/arrow-datum.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,22 @@ as.vector.ArrowDatum <- function(x, mode) {
)
}

#' @export
na.omit.ArrowDatum <- function(object, ...){
object$Filter(!is.na(object))
}

#' @export
na.exclude.ArrowDatum <- na.omit.ArrowDatum

#' @export
na.fail.ArrowDatum <- function(object, ...){
if (object$null_count > 0) {
stop("missing values in object", call. = FALSE)
}
object
}

filter_rows <- function(x, i, keep_na = TRUE, ...) {
# General purpose function for [ row subsetting with R semantics
# Based on the input for `i`, calls x$Filter, x$Slice, or x$Take
Expand Down
2 changes: 1 addition & 1 deletion r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

#' @importFrom stats quantile median
#' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
#' @importFrom R6 R6Class
#' @importFrom purrr as_mapper map map2 map_chr map_dfr map_int map_lgl keep
#' @importFrom assertthat assert_that is.string
Expand Down
20 changes: 20 additions & 0 deletions r/R/arrow-tabular.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,26 @@ head.ArrowTabular <- head.ArrowDatum
#' @export
tail.ArrowTabular <- tail.ArrowDatum

#' @export
na.fail.ArrowTabular <- function(object, ...){
for (col in seq_len(object$num_columns)) {
if (object$column(col - 1L)$null_count > 0) {
stop("missing values in object", call. = FALSE)
}
}
object
}

#' @export
na.omit.ArrowTabular <- function(object, ...){
not_na <- map(object$columns, ~build_array_expression("is_valid", .x))
not_na_agg <- Reduce("&", not_na)
object$Filter(eval_array_expression(not_na_agg))
}

#' @export
na.exclude.ArrowTabular <- na.omit.ArrowTabular

ToString_tabular <- function(x, ...) {
# Generic to work with both RecordBatch and Table
sch <- unlist(strsplit(x$schema$ToString(), "\n"))
Expand Down
86 changes: 77 additions & 9 deletions r/tests/testthat/helper-expectation.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
# specific language governing permissions and limitations
# under the License.

expect_vector <- function(x, y, ...) {
expect_equal(as.vector(x), y, ...)
expect_as_vector <- function(x, y, ignore_attr = FALSE, ...) {
expect_fun <- ifelse(ignore_attr, expect_equivalent, expect_equal)
expect_fun(as.vector(x), y, ...)
}

expect_data_frame <- function(x, y, ...) {
Expand Down Expand Up @@ -155,32 +156,31 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
vec, # A vector as reference, will make Array/ChunkedArray with
skip_array = NULL, # Msg, if should skip Array test
skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
ignore_attr = FALSE, # ignore attributes?
...) {
expr <- rlang::enquo(expr)
expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec)))

skip_msg <- NULL

if (is.null(skip_array)) {
via_array <- rlang::eval_tidy(
expr,
rlang::new_data_mask(rlang::env(input = Array$create(vec)))
)
expect_vector(via_array, expected, ...)
expect_as_vector(via_array, expected, ignore_attr, ...)
} else {
skip_msg <- c(skip_msg, skip_array)
}

if (is.null(skip_chunked_array)) {
# split input vector into two to exercise ChunkedArray with >1 chunk
vec_split <- length(vec) %/% 2
vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)]
vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
split_vector <- split_vector_as_list(vec)

via_chunked <- rlang::eval_tidy(
expr,
rlang::new_data_mask(rlang::env(input = ChunkedArray$create(vec1, vec2)))
rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
)
expect_vector(via_chunked, expected, ...)
expect_as_vector(via_chunked, expected, ignore_attr, ...)
} else {
skip_msg <- c(skip_msg, skip_chunked_array)
}
Expand All @@ -189,3 +189,71 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
skip(paste(skip_msg, collpase = "\n"))
}
}

expect_vector_error <- function(expr, # A vectorized R expression containing `input` as its input
vec, # A vector as reference, will make Array/ChunkedArray with
skip_array = NULL, # Msg, if should skip Array test
skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
...) {

expr <- rlang::enquo(expr)

msg <- tryCatch(
rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))),
error = function (e) {
msg <- conditionMessage(e)

pattern <- i18ize_error_messages()

if (grepl(pattern, msg)) {
msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
}
msg
}
)

expect_true(identical(typeof(msg), "character"), label = "vector errored")

skip_msg <- NULL

if (is.null(skip_array)) {

expect_error(
rlang::eval_tidy(
expr,
rlang::new_data_mask(rlang::env(input = Array$create(vec)))
),
msg,
...
)
} else {
skip_msg <- c(skip_msg, skip_array)
}

if (is.null(skip_chunked_array)) {
# split input vector into two to exercise ChunkedArray with >1 chunk
split_vector <- split_vector_as_list(vec)

expect_error(
rlang::eval_tidy(
expr,
rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
),
msg,
...
)
} else {
skip_msg <- c(skip_msg, skip_chunked_array)
}

if (!is.null(skip_msg)) {
skip(paste(skip_msg, collpase = "\n"))
}
}

split_vector_as_list <- function(vec){
vec_split <- length(vec) %/% 2
vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)]
vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
list(vec1, vec2)
}
44 changes: 22 additions & 22 deletions r/tests/testthat/test-Array.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ test_that("Slice() and RangeEquals()", {
y <- x$Slice(10)
expect_equal(y$type, int32())
expect_equal(length(y), 15L)
expect_vector(y, c(101:110, 201:205))
expect_as_vector(y, c(101:110, 201:205))
expect_true(x$RangeEquals(y, 10, 24))
expect_false(x$RangeEquals(y, 9, 23))
expect_false(x$RangeEquals(y, 11, 24))

z <- x$Slice(10, 5)
expect_vector(z, c(101:105))
expect_as_vector(z, c(101:105))
expect_true(x$RangeEquals(z, 10, 15, 0))

# Input validation
Expand Down Expand Up @@ -708,12 +708,12 @@ test_that("Array$Take()", {
test_that("[ method on Array", {
vec <- 11:20
a <- Array$create(vec)
expect_vector(a[5:9], vec[5:9])
expect_vector(a[c(9, 3, 5)], vec[c(9, 3, 5)])
expect_vector(a[rep(c(TRUE, FALSE), 5)], vec[c(1, 3, 5, 7, 9)])
expect_vector(a[rep(c(TRUE, FALSE, NA, FALSE, TRUE), 2)], c(11, NA, 15, 16, NA, 20))
expect_vector(a[-4], vec[-4])
expect_vector(a[-1], vec[-1])
expect_as_vector(a[5:9], vec[5:9])
expect_as_vector(a[c(9, 3, 5)], vec[c(9, 3, 5)])
expect_as_vector(a[rep(c(TRUE, FALSE), 5)], vec[c(1, 3, 5, 7, 9)])
expect_as_vector(a[rep(c(TRUE, FALSE, NA, FALSE, TRUE), 2)], c(11, NA, 15, 16, NA, 20))
expect_as_vector(a[-4], vec[-4])
expect_as_vector(a[-1], vec[-1])
})

test_that("[ accepts Arrays and otherwise handles bad input", {
Expand All @@ -724,12 +724,12 @@ test_that("[ accepts Arrays and otherwise handles bad input", {
a[Array$create(ind)],
"Cannot extract rows with an Array of type double"
)
expect_vector(a[Array$create(ind - 1, type = int8())], vec[ind])
expect_vector(a[Array$create(ind - 1, type = uint8())], vec[ind])
expect_vector(a[ChunkedArray$create(8, 2, 4, type = uint8())], vec[ind])
expect_as_vector(a[Array$create(ind - 1, type = int8())], vec[ind])
expect_as_vector(a[Array$create(ind - 1, type = uint8())], vec[ind])
expect_as_vector(a[ChunkedArray$create(8, 2, 4, type = uint8())], vec[ind])

filt <- seq_along(vec) %in% ind
expect_vector(a[Array$create(filt)], vec[filt])
expect_as_vector(a[Array$create(filt)], vec[filt])

expect_error(
a["string"],
Expand All @@ -754,21 +754,21 @@ test_that("[ accepts Expressions", {
vec <- 11:20
a <- Array$create(vec)
b <- Array$create(1:10)
expect_vector(a[b > 4], vec[5:10])
expect_as_vector(a[b > 4], vec[5:10])
})

test_that("Array head/tail", {
vec <- 11:20
a <- Array$create(vec)
expect_vector(head(a), head(vec))
expect_vector(head(a, 4), head(vec, 4))
expect_vector(head(a, 40), head(vec, 40))
expect_vector(head(a, -4), head(vec, -4))
expect_vector(head(a, -40), head(vec, -40))
expect_vector(tail(a), tail(vec))
expect_vector(tail(a, 4), tail(vec, 4))
expect_vector(tail(a, 40), tail(vec, 40))
expect_vector(tail(a, -40), tail(vec, -40))
expect_as_vector(head(a), head(vec))
expect_as_vector(head(a, 4), head(vec, 4))
expect_as_vector(head(a, 40), head(vec, 40))
expect_as_vector(head(a, -4), head(vec, -4))
expect_as_vector(head(a, -40), head(vec, -40))
expect_as_vector(tail(a), tail(vec))
expect_as_vector(tail(a, 4), tail(vec, 4))
expect_as_vector(tail(a, 40), tail(vec, 40))
expect_as_vector(tail(a, -40), tail(vec, -40))
})

test_that("Dictionary array: create from arrays, not factor", {
Expand Down
22 changes: 11 additions & 11 deletions r/tests/testthat/test-RecordBatch.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,9 @@ test_that("[ on RecordBatch", {
})

test_that("[[ and $ on RecordBatch", {
expect_vector(batch[["int"]], tbl$int)
expect_vector(batch$int, tbl$int)
expect_vector(batch[[4]], tbl$chr)
expect_as_vector(batch[["int"]], tbl$int)
expect_as_vector(batch$int, tbl$int)
expect_as_vector(batch[[4]], tbl$chr)
expect_null(batch$qwerty)
expect_null(batch[["asdf"]])
expect_error(batch[[c(4, 3)]])
Expand Down Expand Up @@ -190,28 +190,28 @@ test_that("[[<- assignment", {

# can replace a column by index
batch[[2]] <- as.numeric(10:1)
expect_vector(batch[[2]], as.numeric(10:1))
expect_as_vector(batch[[2]], as.numeric(10:1))

# can add a column by index
batch[[5]] <- as.numeric(10:1)
expect_vector(batch[[5]], as.numeric(10:1))
expect_vector(batch[["5"]], as.numeric(10:1))
expect_as_vector(batch[[5]], as.numeric(10:1))
expect_as_vector(batch[["5"]], as.numeric(10:1))

# can replace a column
batch[["int"]] <- 10:1
expect_vector(batch[["int"]], 10:1)
expect_as_vector(batch[["int"]], 10:1)

# can use $
batch$new <- NULL
expect_null(as.vector(batch$new))
expect_identical(dim(batch), c(10L, 4L))

batch$int <- 1:10
expect_vector(batch$int, 1:10)
expect_as_vector(batch$int, 1:10)

# recycling
batch[["atom"]] <- 1L
expect_vector(batch[["atom"]], rep(1L, 10))
expect_as_vector(batch[["atom"]], rep(1L, 10))

expect_error(
batch[["atom"]] <- 1:6,
Expand All @@ -221,7 +221,7 @@ test_that("[[<- assignment", {
# assign Arrow array
array <- Array$create(c(10:1))
batch$array <- array
expect_vector(batch$array, 10:1)
expect_as_vector(batch$array, 10:1)

# nonsense indexes
expect_error(batch[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical")
Expand Down Expand Up @@ -498,4 +498,4 @@ test_that("Handling string data with embedded nuls", {
fixed = TRUE
)
})
})
})
Loading