diff --git a/r/DESCRIPTION b/r/DESCRIPTION index d605fefd4db..0dc44277ccd 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -39,6 +39,7 @@ Imports: vctrs Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source") RoxygenNote: 7.1.2 +Config/testthat/edition: 3 VignetteBuilder: knitr Suggests: DBI, diff --git a/r/NAMESPACE b/r/NAMESPACE index f89a7352ec9..e90fcdf3451 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -185,6 +185,7 @@ export(TimeUnit) export(TimestampParser) export(Type) export(UnionDataset) +export(all_of) export(arrow_available) export(arrow_info) export(arrow_with_dataset) @@ -345,6 +346,7 @@ importFrom(stats,na.fail) importFrom(stats,na.omit) importFrom(stats,na.pass) importFrom(stats,quantile) +importFrom(tidyselect,all_of) importFrom(tidyselect,contains) importFrom(tidyselect,ends_with) importFrom(tidyselect,eval_select) diff --git a/r/R/dataset.R b/r/R/dataset.R index 072a0f3ae96..79ac693f214 100644 --- a/r/R/dataset.R +++ b/r/R/dataset.R @@ -354,7 +354,7 @@ tail.Dataset <- function(x, n = 6L, ...) { return(x[, i]) } if (!missing(j)) { - x <- select.Dataset(x, j) + x <- select.Dataset(x, all_of(j)) } if (!missing(i)) { diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index 65421cd1e7f..4c58d32c7e7 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -89,7 +89,7 @@ mutate.arrow_dplyr_query <- function(.data, # Respect .before and .after if (!quo_is_null(.before) || !quo_is_null(.after)) { new <- setdiff(new_vars, old_vars) - .data <- dplyr::relocate(.data, !!new, .before = !!.before, .after = !!.after) + .data <- dplyr::relocate(.data, all_of(new), .before = !!.before, .after = !!.after) } # Respect .keep diff --git a/r/R/reexports-tidyselect.R b/r/R/reexports-tidyselect.R index 2566207c52a..cd0de2849b3 100644 --- a/r/R/reexports-tidyselect.R +++ b/r/R/reexports-tidyselect.R @@ -41,3 +41,6 @@ tidyselect::starts_with #' @importFrom tidyselect last_col #' @export tidyselect::last_col +#' @importFrom tidyselect all_of +#' @export +tidyselect::all_of diff --git a/r/man/reexports.Rd b/r/man/reexports.Rd index 73825979030..591158c72f4 100644 --- a/r/man/reexports.Rd +++ b/r/man/reexports.Rd @@ -14,6 +14,7 @@ \alias{one_of} \alias{starts_with} \alias{last_col} +\alias{all_of} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -23,6 +24,6 @@ below to see their documentation. \describe{ \item{bit64}{\code{\link[bit64:bit64-package]{print.integer64}}, \code{\link[bit64:bit64-package]{str.integer64}}} - \item{tidyselect}{\code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:everything]{last_col}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}} + \item{tidyselect}{\code{\link[tidyselect]{all_of}}, \code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:everything]{last_col}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}} }} diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R index e765bd6cf54..5a3e8680074 100644 --- a/r/tests/testthat/helper-expectation.R +++ b/r/tests/testthat/helper-expectation.R @@ -15,13 +15,8 @@ # specific language governing permissions and limitations # under the License. -expect_as_vector <- function(x, y, ignore_attr = FALSE, ...) { - expect_fun <- if (ignore_attr) { - expect_equivalent - } else { - expect_equal - } - expect_fun(as.vector(x), y, ...) +expect_as_vector <- function(x, y, ...) { + expect_equal(as.vector(x), y, ...) } expect_data_frame <- function(x, y, ...) { @@ -33,20 +28,19 @@ expect_r6_class <- function(object, class) { expect_s3_class(object, "R6") } -expect_equivalent <- function(object, expected, ...) { - # HACK: dplyr includes an all.equal.tbl_df method that is causing failures. - # They look spurious, like: - # `Can't join on 'b' x 'b' because of incompatible types (tbl_df/tbl/data.frame / tbl_df/tbl/data.frame)` # nolint - if (tibble::is_tibble(object)) { - class(object) <- "data.frame" - } - if (tibble::is_tibble(expected)) { - class(expected) <- "data.frame" +expect_equal <- function(object, expected, ignore_attr = FALSE, ..., info = NULL, label = NULL) { + if (inherits(object, "ArrowObject") && inherits(expected, "ArrowObject")) { + mc <- match.call() + expect_true( + all.equal(object, expected, check.attributes = !ignore_attr), + info = info, + label = paste(rlang::as_label(mc[["object"]]), "==", rlang::as_label(mc[["expected"]])) + ) + } else { + testthat::expect_equal(object, expected, ignore_attr = ignore_attr, ..., info = info, label = label) } - testthat::expect_equivalent(object, expected, ...) } -# expect_equal but for DataTypes, so the error prints better expect_type_equal <- function(object, expected, ...) { if (is.Array(object)) { object <- object$type @@ -54,7 +48,7 @@ expect_type_equal <- function(object, expected, ...) { if (is.Array(expected)) { expected <- expected$type } - expect_equal(object, expected, ..., label = object$ToString(), expected.label = expected$ToString()) + expect_equal(object, expected, ...) } expect_match_arg_error <- function(object, values = c()) { @@ -79,7 +73,7 @@ verify_output <- function(...) { #' * `NA` (the default) for ensuring no warning message #' * `TRUE` is a special case to mean to check for the #' "not supported in Arrow; pulling data into R" message. -#' @param ... additional arguments, passed to `expect_equivalent()` +#' @param ... additional arguments, passed to `expect_equal()` expect_dplyr_equal <- function(expr, tbl, skip_record_batch = NULL, diff --git a/r/tests/testthat/helper-parquet.R b/r/tests/testthat/helper-parquet.R index e2fd761b7fa..7697d24d39d 100644 --- a/r/tests/testthat/helper-parquet.R +++ b/r/tests/testthat/helper-parquet.R @@ -16,7 +16,7 @@ # under the License. expect_parquet_roundtrip <- function(tab, ...) { - expect_equivalent(parquet_roundtrip(tab, ...), tab) + expect_equal(parquet_roundtrip(tab, ...), tab) } parquet_roundtrip <- function(x, ...) { diff --git a/r/tests/testthat/helper-roundtrip.R b/r/tests/testthat/helper-roundtrip.R index 4aa435cd298..80bcb42f1be 100644 --- a/r/tests/testthat/helper-roundtrip.R +++ b/r/tests/testthat/helper-roundtrip.R @@ -17,27 +17,28 @@ expect_array_roundtrip <- function(x, type, as = NULL) { a <- Array$create(x, type = as) - expect_type_equal(a$type, type) + expect_equal(a$type, type) expect_identical(length(a), length(x)) if (!inherits(type, c("ListType", "LargeListType", "FixedSizeListType"))) { # TODO: revisit how missingness works with ListArrays # R list objects don't handle missingness the same way as other vectors. # Is there some vctrs thing we should do on the roundtrip back to R? - expect_equal(as.vector(is.na(a)), is.na(x)) + expect_as_vector(is.na(a), is.na(x)) } - expect_equivalent(as.vector(a), x) + roundtrip <- as.vector(a) + expect_equal(roundtrip, x, ignore_attr = TRUE) # Make sure the storage mode is the same on roundtrip (esp. integer vs. numeric) - expect_identical(typeof(as.vector(a)), typeof(x)) + expect_identical(typeof(roundtrip), typeof(x)) if (length(x)) { a_sliced <- a$Slice(1) x_sliced <- x[-1] - expect_type_equal(a_sliced$type, type) + expect_equal(a_sliced$type, type) expect_identical(length(a_sliced), length(x_sliced)) if (!inherits(type, c("ListType", "LargeListType", "FixedSizeListType"))) { - expect_equal(as.vector(is.na(a_sliced)), is.na(x_sliced)) + expect_as_vector(is.na(a_sliced), is.na(x_sliced)) } - expect_equivalent(as.vector(a_sliced), x_sliced) + expect_as_vector(a_sliced, x_sliced, ignore_attr = TRUE) } invisible(a) } diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index a2fd7bfec86..520c50e218b 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("Array") - test_that("Integer Array", { ints <- c(1:10, 1:10, 1:5) x <- expect_array_roundtrip(ints, int32()) @@ -157,8 +155,8 @@ test_that("Array supports NA", { expect_true(x_int$IsNull(10L)) expect_true(x_dbl$IsNull(10)) - expect_equal(as.vector(is.na(x_int)), c(rep(FALSE, 10), TRUE)) - expect_equal(as.vector(is.na(x_dbl)), c(rep(FALSE, 10), TRUE)) + expect_as_vector(is.na(x_int), c(rep(FALSE, 10), TRUE)) + expect_as_vector(is.na(x_dbl), c(rep(FALSE, 10), TRUE)) # Input validation expect_error(x_int$IsValid("ten")) @@ -438,12 +436,12 @@ test_that("Array$as_vector() converts to integer (ARROW-3794)", { i8 <- (-128):127 a <- Array$create(i8)$cast(int8()) expect_type_equal(a, int8()) - expect_equal(as.vector(a), i8) + expect_as_vector(a, i8) u8 <- 0:255 a <- Array$create(u8)$cast(uint8()) expect_type_equal(a, uint8()) - expect_equal(as.vector(a), u8) + expect_as_vector(a, u8) }) test_that("Arrays of {,u}int{32,64} convert to integer if they can fit", { @@ -474,7 +472,7 @@ test_that("Array$create() handles data frame -> struct arrays (ARROW-3811)", { df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10]) a <- Array$create(df) expect_type_equal(a$type, struct(x = int32(), y = float64(), z = utf8())) - expect_equivalent(as.vector(a), df) + expect_as_vector(a, df) df <- structure( list(col = structure(list(structure(list(list(structure(1))), class = "inner")), class = "outer")), @@ -482,7 +480,7 @@ test_that("Array$create() handles data frame -> struct arrays (ARROW-3811)", { ) a <- Array$create(df) expect_type_equal(a$type, struct(col = list_of(list_of(list_of(float64()))))) - expect_equivalent(as.vector(a), df) + expect_as_vector(a, df, ignore_attr = TRUE) }) test_that("StructArray methods", { @@ -791,7 +789,7 @@ test_that("is.Array", { test_that("Array$Take()", { a <- Array$create(10:20) - expect_equal(as.vector(a$Take(c(4, 2))), c(14, 12)) + expect_as_vector(a$Take(c(4, 2)), c(14, 12)) }) test_that("[ method on Array", { diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index dc327c07981..ff9dd9e6f9e 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -339,7 +339,7 @@ test_that("record_batch() handles data frame columns", { tib <- tibble::tibble(x = 1:10, y = 1:10) # because tib is named here, this becomes a struct array batch <- record_batch(a = 1:10, b = tib) - expect_equivalent( + expect_equal( batch$schema, schema( a = int32(), @@ -347,7 +347,7 @@ test_that("record_batch() handles data frame columns", { ) ) out <- as.data.frame(batch) - expect_equivalent(out, tibble::tibble(a = 1:10, b = tib)) + expect_equal(out, tibble::tibble(a = 1:10, b = tib)) # if not named, columns from tib are auto spliced batch2 <- record_batch(a = 1:10, tib) @@ -356,7 +356,7 @@ test_that("record_batch() handles data frame columns", { schema(a = int32(), x = int32(), y = int32()) ) out <- as.data.frame(batch2) - expect_equivalent(out, tibble::tibble(a = 1:10, !!!tib)) + expect_equal(out, tibble::tibble(a = 1:10, !!!tib)) }) test_that("record_batch() handles data frame columns with schema spec", { @@ -365,9 +365,9 @@ test_that("record_batch() handles data frame columns with schema spec", { tib_float$y <- as.numeric(tib_float$y) schema <- schema(a = int32(), b = struct(x = int16(), y = float64())) batch <- record_batch(a = 1:10, b = tib, schema = schema) - expect_equivalent(batch$schema, schema) + expect_equal(batch$schema, schema) out <- as.data.frame(batch) - expect_equivalent(out, tibble::tibble(a = 1:10, b = tib_float)) + expect_equal(out, tibble::tibble(a = 1:10, b = tib_float)) schema <- schema(a = int32(), b = struct(x = int16(), y = utf8())) expect_error(record_batch(a = 1:10, b = tib, schema = schema)) @@ -385,21 +385,27 @@ test_that("record_batch() auto splices (ARROW-5718)", { batch4 <- record_batch(!!!df, z = 1:10) expect_equal(batch3, batch4) expect_equal(batch3$schema, schema(x = int32(), y = utf8(), z = int32())) - expect_equivalent(as.data.frame(batch3), cbind(df, data.frame(z = 1:10))) + expect_equal( + as.data.frame(batch3), + tibble::as_tibble(cbind(df, data.frame(z = 1:10))) + ) s <- schema(x = float64(), y = utf8()) batch5 <- record_batch(df, schema = s) batch6 <- record_batch(!!!df, schema = s) expect_equal(batch5, batch6) expect_equal(batch5$schema, s) - expect_equivalent(as.data.frame(batch5), df) + expect_equal(as.data.frame(batch5), df) s2 <- schema(x = float64(), y = utf8(), z = int16()) batch7 <- record_batch(df, z = 1:10, schema = s2) batch8 <- record_batch(!!!df, z = 1:10, schema = s2) expect_equal(batch7, batch8) expect_equal(batch7$schema, s2) - expect_equivalent(as.data.frame(batch7), cbind(df, data.frame(z = 1:10))) + expect_equal( + as.data.frame(batch7), + tibble::as_tibble(cbind(df, data.frame(z = 1:10))) + ) }) test_that("record_batch() only auto splice data frames", { @@ -411,7 +417,11 @@ test_that("record_batch() only auto splice data frames", { test_that("record_batch() handles null type (ARROW-7064)", { batch <- record_batch(a = 1:10, n = vctrs::unspecified(10)) - expect_equivalent(batch$schema, schema(a = int32(), n = null())) + expect_equal( + batch$schema, + schema(a = int32(), n = null()), + ignore_attr = TRUE + ) }) test_that("record_batch() scalar recycling with vectors", { @@ -481,7 +491,7 @@ test_that("RecordBatch$Equals(check_metadata)", { expect_false(rb1$Equals(rb2, check_metadata = TRUE)) expect_failure(expect_equal(rb1, rb2)) # expect_equal has check_metadata=TRUE - expect_equivalent(rb1, rb2) # expect_equivalent has check_metadata=FALSE + expect_equal(rb1, rb2, ignore_attr = TRUE) # this passes check_metadata=FALSE expect_false(rb1$Equals(24)) # Not a RecordBatch }) @@ -526,9 +536,10 @@ test_that("Handling string data with embedded nuls", { withr::with_options(list(arrow.skip_nul = TRUE), { expect_warning( - expect_equivalent( + expect_equal( as.data.frame(batch_with_nul)$b, - c("person", "woman", "man", "camera", "tv") + c("person", "woman", "man", "camera", "tv"), + ignore_attr = TRUE ), "Stripping '\\0' (nul) from character vector", fixed = TRUE diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index 00ba4036164..44144c00baf 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -306,7 +306,7 @@ test_that("table() handles record batches with splicing", { tab <- Table$create(batch, batch, batch) expect_equal(tab$schema, batch$schema) expect_equal(tab$num_rows, 6L) - expect_equivalent( + expect_equal( as.data.frame(tab), vctrs::vec_rbind(as.data.frame(batch), as.data.frame(batch), as.data.frame(batch)) ) @@ -315,7 +315,7 @@ test_that("table() handles record batches with splicing", { tab <- Table$create(!!!batches) expect_equal(tab$schema, batch$schema) expect_equal(tab$num_rows, 6L) - expect_equivalent( + expect_equal( as.data.frame(tab), vctrs::vec_rbind(!!!purrr::map(batches, as.data.frame)) ) @@ -347,14 +347,14 @@ test_that("table() auto splices (ARROW-5718)", { tab2 <- Table$create(!!!df) expect_equal(tab1, tab2) expect_equal(tab1$schema, schema(x = int32(), y = utf8())) - expect_equivalent(as.data.frame(tab1), df) + expect_equal(as.data.frame(tab1), df) s <- schema(x = float64(), y = utf8()) tab3 <- Table$create(df, schema = s) tab4 <- Table$create(!!!df, schema = s) expect_equal(tab3, tab4) expect_equal(tab3$schema, s) - expect_equivalent(as.data.frame(tab3), df) + expect_equal(as.data.frame(tab3), df) }) test_that("Validation when creating table with schema (ARROW-10953)", { @@ -412,14 +412,14 @@ test_that("Table$Equals(check_metadata)", { expect_false(tab1$Equals(tab2, check_metadata = TRUE)) expect_failure(expect_equal(tab1, tab2)) # expect_equal has check_metadata=TRUE - expect_equivalent(tab1, tab2) # expect_equivalent has check_metadata=FALSE + expect_equal(tab1, tab2, ignore_attr = TRUE) # this sets check_metadata=FALSE expect_false(tab1$Equals(24)) # Not a Table }) test_that("Table handles null type (ARROW-7064)", { tab <- Table$create(a = 1:10, n = vctrs::unspecified(10)) - expect_equivalent(tab$schema, schema(a = int32(), n = null())) + expect_equal(tab$schema, schema(a = int32(), n = null()), ignore_attr = TRUE) }) test_that("Can create table with specific dictionary types", { diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R index 8cb989b1d4c..eb811686850 100644 --- a/r/tests/testthat/test-altrep.R +++ b/r/tests/testthat/test-altrep.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("altrep") - skip_if(getRversion() <= "3.5.0") test_that("altrep vectors from int32 and dbl arrays with no nulls", { diff --git a/r/tests/testthat/test-array-data.R b/r/tests/testthat/test-array-data.R index cfa0e629f04..05d070d8a88 100644 --- a/r/tests/testthat/test-array-data.R +++ b/r/tests/testthat/test-array-data.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("ArrayData") - test_that("string vectors with only empty strings and nulls don't allocate a data buffer (ARROW-3693)", { a <- Array$create("") expect_equal(a$length(), 1L) diff --git a/r/tests/testthat/test-arrow.R b/r/tests/testthat/test-arrow.R index 0c296aa3081..48970ab8945 100644 --- a/r/tests/testthat/test-arrow.R +++ b/r/tests/testthat/test-arrow.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("General checks") - if (!identical(tolower(Sys.getenv("TEST_R_WITHOUT_LIBARROW")), "true")) { testthat::test_that("Arrow C++ is available", { skip_on_cran() @@ -69,10 +67,10 @@ test_that("MemoryPool calls gc() to free memory when allocation fails (ARROW-100 skip_on_valgrind() env <- new.env() - trace(gc, print = FALSE, tracer = function() { + suppressMessages(trace(gc, print = FALSE, tracer = function() { env$gc_was_called <- TRUE - }) - on.exit(untrace(gc)) + })) + on.exit(suppressMessages(untrace(gc))) # We expect this should fail because we don't have this much memory, # but it should gc() and retry (and fail again) expect_error(BufferOutputStream$create(2**60)) diff --git a/r/tests/testthat/test-backwards-compatibility.R b/r/tests/testthat/test-backwards-compatibility.R index 145a21de7ff..32e86d5f68c 100644 --- a/r/tests/testthat/test-backwards-compatibility.R +++ b/r/tests/testthat/test-backwards-compatibility.R @@ -114,7 +114,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) { # though classes are always checked, so that must be removed before checking. example_with_metadata_sans_special_class <- example_with_metadata example_with_metadata_sans_special_class$a <- unclass(example_with_metadata_sans_special_class$a) - expect_equal(df, example_with_metadata_sans_special_class, check.attributes = FALSE) + expect_equal(df, example_with_metadata_sans_special_class, ignore_attr = TRUE) }) } diff --git a/r/tests/testthat/test-buffer-reader.R b/r/tests/testthat/test-buffer-reader.R index 865ee7d4e4b..b790ed0dafd 100644 --- a/r/tests/testthat/test-buffer-reader.R +++ b/r/tests/testthat/test-buffer-reader.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("BufferReader") - test_that("BufferReader can be created from R objects", { num <- BufferReader$create(numeric(13)) int <- BufferReader$create(integer(13)) diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index 0d24ab02537..9b3ebc6de9c 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("Buffer") - test_that("Buffer can be created from raw vector", { vec <- raw(123) buf <- buffer(vec) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 8ff4e6684c4..4a191ee36ec 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("ChunkedArray") expect_chunked_roundtrip <- function(x, type) { a <- ChunkedArray$create(!!!x) @@ -30,8 +29,8 @@ expect_chunked_roundtrip <- function(x, type) { # Is there some vctrs thing we should do on the roundtrip back to R? expect_identical(as.vector(is.na(a)), is.na(flat_x)) } - expect_equal(as.vector(a), flat_x) - expect_equal(as.vector(a$chunk(0)), x[[1]]) + expect_as_vector(a, flat_x) + expect_as_vector(a$chunk(0), x[[1]]) if (length(flat_x)) { a_sliced <- a$Slice(1) @@ -41,7 +40,7 @@ expect_chunked_roundtrip <- function(x, type) { if (!inherits(type, "ListType")) { expect_identical(as.vector(is.na(a_sliced)), is.na(x_sliced)) } - expect_equal(as.vector(a_sliced), x_sliced) + expect_as_vector(a_sliced, x_sliced) } invisible(a) } @@ -53,7 +52,7 @@ test_that("ChunkedArray", { expect_equal(y$type, int32()) expect_equal(y$num_chunks, 3L) expect_equal(length(y), 17L) - expect_equal(as.vector(y), c(9:10, 1:10, 1:5)) + expect_as_vector(y, c(9:10, 1:10, 1:5)) z <- x$Slice(8, 5) expect_equal(z$type, int32()) @@ -114,10 +113,10 @@ test_that("ChunkedArray handles Inf", { expect_equal(x$type, float64()) expect_equal(x$num_chunks, 3L) expect_equal(length(x), 25L) - expect_equal(as.vector(x), c(c(Inf, 2:10), c(1:3, Inf, 5), 1:10)) + expect_as_vector(x, c(c(Inf, 2:10), c(1:3, Inf, 5), 1:10)) chunks <- x$chunks - expect_equal(as.vector(is.infinite(chunks[[2]])), is.infinite(data[[2]])) + expect_as_vector(is.infinite(chunks[[2]]), is.infinite(data[[2]])) expect_equal( as.vector(is.infinite(x)), c(is.infinite(data[[1]]), is.infinite(data[[2]]), is.infinite(data[[3]])) @@ -130,11 +129,11 @@ test_that("ChunkedArray handles NA", { expect_equal(x$type, int32()) expect_equal(x$num_chunks, 3L) expect_equal(length(x), 25L) - expect_equal(as.vector(x), c(1:10, c(NA, 2:10), c(1:3, NA, 5))) + expect_as_vector(x, c(1:10, c(NA, 2:10), c(1:3, NA, 5))) chunks <- x$chunks - expect_equal(as.vector(is.na(chunks[[2]])), is.na(data[[2]])) - expect_equal(as.vector(is.na(x)), c(is.na(data[[1]]), is.na(data[[2]]), is.na(data[[3]]))) + expect_as_vector(is.na(chunks[[2]]), is.na(data[[2]])) + expect_as_vector(is.na(x), c(is.na(data[[1]]), is.na(data[[2]]), is.na(data[[3]]))) }) test_that("ChunkedArray handles NaN", { @@ -144,11 +143,11 @@ test_that("ChunkedArray handles NaN", { expect_equal(x$type, float64()) expect_equal(x$num_chunks, 3L) expect_equal(length(x), 25L) - expect_equal(as.vector(x), c(1:10, c(NaN, 2:10), c(1:3, NaN, 5))) + expect_as_vector(x, c(1:10, c(NaN, 2:10), c(1:3, NaN, 5))) chunks <- x$chunks - expect_equal(as.vector(is.nan(chunks[[2]])), is.nan(data[[2]])) - expect_equal(as.vector(is.nan(x)), c(is.nan(data[[1]]), is.nan(data[[2]]), is.nan(data[[3]]))) + expect_as_vector(is.nan(chunks[[2]]), is.nan(data[[2]])) + expect_as_vector(is.nan(x), c(is.nan(data[[1]]), is.nan(data[[2]]), is.nan(data[[3]]))) }) test_that("ChunkedArray supports logical vectors (ARROW-3341)", { @@ -295,7 +294,7 @@ test_that("chunked_array() uses the first ... to infer type", { test_that("chunked_array() handles downcasting", { a <- chunked_array(10L, 10) expect_type_equal(a$type, int32()) - expect_equal(as.vector(a), c(10L, 10L)) + expect_as_vector(a, c(10L, 10L)) }) test_that("chunked_array() makes chunks of the same type", { @@ -324,7 +323,7 @@ test_that("chunked_array() handles data frame -> struct arrays (ARROW-3811)", { df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10]) a <- chunked_array(df, df) expect_type_equal(a$type, struct(x = int32(), y = float64(), z = utf8())) - expect_equivalent(a$as_vector(), rbind(df, df)) + expect_equal(a$as_vector(), rbind(df, df), ignore_attr = TRUE) }) test_that("ChunkedArray$View() (ARROW-6542)", { diff --git a/r/tests/testthat/test-compressed.R b/r/tests/testthat/test-compressed.R index fbef8eb9314..d796e3e7546 100644 --- a/r/tests/testthat/test-compressed.R +++ b/r/tests/testthat/test-compressed.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("Compressed.*Stream") - test_that("codec_is_available", { expect_true(codec_is_available("uncompressed")) # Always true expect_match_arg_error(codec_is_available("sdfasdf")) diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R index eb1282e6ffb..03d0a775143 100644 --- a/r/tests/testthat/test-compute-aggregate.R +++ b/r/tests/testthat/test-compute-aggregate.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("compute: aggregation") - test_that("list_compute_functions", { allfuncs <- list_compute_functions() expect_false(all(grepl("min", allfuncs))) @@ -205,15 +203,15 @@ test_that("max.ChunkedArray", { test_that("Edge cases", { a <- Array$create(NA) for (type in c(int32(), float64(), bool())) { - expect_equal(as.vector(sum(a$cast(type), na.rm = TRUE)), sum(NA, na.rm = TRUE)) - expect_equal(as.vector(mean(a$cast(type), na.rm = TRUE)), mean(NA, na.rm = TRUE)) - expect_equal( - as.vector(min(a$cast(type), na.rm = TRUE)), + expect_as_vector(sum(a$cast(type), na.rm = TRUE), sum(NA, na.rm = TRUE)) + expect_as_vector(mean(a$cast(type), na.rm = TRUE), mean(NA, na.rm = TRUE)) + expect_as_vector( + min(a$cast(type), na.rm = TRUE), # Suppress the base R warning about no non-missing arguments suppressWarnings(min(NA, na.rm = TRUE)) ) - expect_equal( - as.vector(max(a$cast(type), na.rm = TRUE)), + expect_as_vector( + max(a$cast(type), na.rm = TRUE), suppressWarnings(max(NA, na.rm = TRUE)) ) } diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R index 373237ff9a1..f35b74079d0 100644 --- a/r/tests/testthat/test-compute-sort.R +++ b/r/tests/testthat/test-compute-sort.R @@ -15,9 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("compute: sorting") - -library(dplyr) +library(dplyr, warn.conflicts = FALSE) # randomize order of rows in test data tbl <- slice_sample(example_data_for_sorting, prop = 1L) @@ -154,4 +152,4 @@ test_that("RecordBatch$SortIndices()", { as.data.frame(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))), tbl %>% arrange(desc(chr), desc(int), desc(dbl)) ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R index cfb0cd228b9..c3c9010b857 100644 --- a/r/tests/testthat/test-csv.R +++ b/r/tests/testthat/test-csv.R @@ -44,7 +44,7 @@ test_that("read_csv_arrow(as_data_frame=TRUE)", { write.csv(tbl, tf, row.names = FALSE) tab1 <- read_csv_arrow(tf, as_data_frame = TRUE) - expect_equivalent(tbl, tab1) + expect_equal(tbl, tab1) }) test_that("read_delim_arrow parsing options: delim", { @@ -54,8 +54,8 @@ test_that("read_delim_arrow parsing options: delim", { write.table(tbl, tf, sep = "\t", row.names = FALSE) tab1 <- read_tsv_arrow(tf) tab2 <- read_delim_arrow(tf, delim = "\t") - expect_equivalent(tab1, tab2) - expect_equivalent(tbl, tab1) + expect_equal(tab1, tab2) + expect_equal(tbl, tab1) }) test_that("read_delim_arrow parsing options: quote", { @@ -69,9 +69,9 @@ test_that("read_delim_arrow parsing options: quote", { # Is this a problem? # Component “a”: target is integer64, current is numeric tab1$a <- as.numeric(tab1$a) - expect_equivalent( + expect_equal( tab1, - data.frame(a = c(1, 2), b = c("abc", "def"), stringsAsFactors = FALSE) + tibble::tibble(a = c(1, 2), b = c("abc", "def")) ) }) @@ -89,7 +89,7 @@ test_that("read_csv_arrow parsing options: col_names", { tab1 <- read_csv_arrow(tf, col_names = names(tbl)) expect_identical(names(tab1), names(tbl)) - expect_equivalent(tbl, tab1) + expect_equal(tbl, tab1) # This errors (correctly) because I haven't given enough names # but the error message is "Invalid: Empty CSV file", which is not accurate @@ -113,7 +113,7 @@ test_that("read_csv_arrow parsing options: skip", { tab1 <- read_csv_arrow(tf, skip = 2) expect_identical(names(tab1), names(tbl)) - expect_equivalent(tbl, tab1) + expect_equal(tbl, tab1) }) test_that("read_csv_arrow parsing options: skip_empty_rows", { @@ -175,7 +175,7 @@ test_that("read_csv_arrow() can detect compression from file name", { write.csv(tbl, gzfile(tf), row.names = FALSE, quote = FALSE) tab1 <- read_csv_arrow(tf) - expect_equivalent(tbl, tab1) + expect_equal(tbl, tab1) }) test_that("read_csv_arrow(schema=)", { @@ -223,7 +223,7 @@ test_that("read_csv_arrow() can read timestamps", { expect_equal(tbl, df) df <- read_csv_arrow(tf, col_types = "t", col_names = "time", skip = 1) - expect_equal(tbl, df, check.tzone = FALSE) # col_types = "t" makes timezone-naive timestamp + expect_equal(tbl, df, ignore_attr = "tzone") # col_types = "t" makes timezone-naive timestamp }) test_that("read_csv_arrow(timestamp_parsers=)", { diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index 51d73b589c8..a9d0879b8a0 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("DataType") - test_that("null type works as expected", { x <- null() expect_equal(x$id, 0L) @@ -331,8 +329,8 @@ test_that("list type works as expected", { expect_false(x == null()) expect_equal(x$num_fields, 1L) expect_equal( - x$fields(), - list(field("item", int32())) + x$fields()[[1]], + field("item", int32()) ) expect_equal(x$value_type, int32()) expect_equal(x$value_field, field("item", int32())) @@ -347,8 +345,12 @@ test_that("struct type works as expected", { expect_false(x == null()) expect_equal(x$num_fields, 2L) expect_equal( - x$fields(), - list(field("x", int32()), field("y", boolean())) + x$fields()[[1]], + field("x", int32()) + ) + expect_equal( + x$fields()[[2]], + field("y", boolean()) ) expect_equal(x$GetFieldIndex("x"), 0L) expect_equal(x$GetFieldIndex("y"), 1L) diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 837bf8048c5..ebe3bf81511 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -17,9 +17,7 @@ skip_if_not_available("dataset") -context("Dataset") - -library(dplyr) +library(dplyr, warn.conflicts = FALSE) dataset_dir <- make_temp_dir() hive_dir <- make_temp_dir() @@ -98,7 +96,7 @@ test_that("Simple interface for datasets", { expect_r6_class(ds$format, "ParquetFileFormat") expect_r6_class(ds$filesystem, "LocalFileSystem") expect_r6_class(ds, "Dataset") - expect_equivalent( + expect_equal( ds %>% select(chr, dbl) %>% filter(dbl > 7 & dbl < 53L) %>% # Testing the auto-casting of scalars @@ -110,7 +108,7 @@ test_that("Simple interface for datasets", { ) ) - expect_equivalent( + expect_equal( ds %>% select(string = chr, integer = int, part) %>% filter(integer > 6 & part == 1) %>% # 6 not 6L to test autocasting @@ -165,8 +163,8 @@ test_that("dataset from single local file path", { skip_on_os("windows") skip_if_not_available("parquet") ds <- open_dataset(files[1]) - expect_is(ds, "Dataset") - expect_equivalent( + expect_r6_class(ds, "Dataset") + expect_equal( ds %>% select(chr, dbl) %>% filter(dbl > 7) %>% @@ -180,8 +178,8 @@ test_that("dataset from vector of file paths", { skip_on_os("windows") skip_if_not_available("parquet") ds <- open_dataset(files) - expect_is(ds, "Dataset") - expect_equivalent( + expect_r6_class(ds, "Dataset") + expect_equal( ds %>% select(chr, dbl) %>% filter(dbl > 7 & dbl < 53L) %>% @@ -200,7 +198,7 @@ test_that("dataset from directory URI", { uri <- paste0("file://", dataset_dir) ds <- open_dataset(uri, partitioning = schema(part = uint8())) expect_r6_class(ds, "Dataset") - expect_equivalent( + expect_equal( ds %>% select(chr, dbl) %>% filter(dbl > 7 & dbl < 53L) %>% @@ -218,8 +216,8 @@ test_that("dataset from single file URI", { skip_if_not_available("parquet") uri <- paste0("file://", files[1]) ds <- open_dataset(uri) - expect_is(ds, "Dataset") - expect_equivalent( + expect_r6_class(ds, "Dataset") + expect_equal( ds %>% select(chr, dbl) %>% filter(dbl > 7) %>% @@ -234,8 +232,8 @@ test_that("dataset from vector of file URIs", { skip_if_not_available("parquet") uris <- paste0("file://", files) ds <- open_dataset(uris) - expect_is(ds, "Dataset") - expect_equivalent( + expect_r6_class(ds, "Dataset") + expect_equal( ds %>% select(chr, dbl) %>% filter(dbl > 7 & dbl < 53L) %>% @@ -270,7 +268,7 @@ test_that("Hive partitioning", { skip_if_not_available("parquet") ds <- open_dataset(hive_dir, partitioning = hive_partition(other = utf8(), group = uint8())) expect_r6_class(ds, "Dataset") - expect_equivalent( + expect_equal( ds %>% filter(group == 2) %>% select(chr, dbl) %>% @@ -293,7 +291,7 @@ test_that("Partitioning inference", { # These are the same tests as above, just using the *PartitioningFactory ds1 <- open_dataset(dataset_dir, partitioning = "part") expect_identical(names(ds1), c(names(df1), "part")) - expect_equivalent( + expect_equal( ds1 %>% select(string = chr, integer = int, part) %>% filter(integer > 6 & part == 1) %>% @@ -307,7 +305,7 @@ test_that("Partitioning inference", { ds2 <- open_dataset(hive_dir) expect_identical(names(ds2), c(names(df1), "group", "other")) - expect_equivalent( + expect_equal( ds2 %>% filter(group == 2) %>% select(chr, dbl) %>% @@ -325,7 +323,7 @@ test_that("IPC/Feather format data", { expect_identical(names(ds), c(names(df1), "part")) expect_identical(dim(ds), c(20L, 7L)) - expect_equivalent( + expect_equal( ds %>% select(string = chr, integer = int, part) %>% filter(integer > 6 & part == 3) %>% @@ -353,7 +351,7 @@ test_that("CSV dataset", { # CountRows segfaults on RTools35/R 3.6, so don't test it there expect_identical(dim(ds), c(20L, 7L)) } - expect_equivalent( + expect_equal( ds %>% select(string = chr, integer = int, part) %>% filter(integer > 6 & part == 5) %>% @@ -386,13 +384,13 @@ test_that("CSV scan options", { write.csv(df, dst_file, row.names = FALSE, quote = FALSE) ds <- open_dataset(dst_dir, format = "csv") - expect_equivalent(ds %>% collect(), df) + expect_equal(ds %>% collect(), df) sb <- ds$NewScan() sb$FragmentScanOptions(options) tab <- sb$Finish()$ToTable() - expect_equivalent(as.data.frame(tab), tibble(chr = c("foo", NA))) + expect_equal(as.data.frame(tab), tibble(chr = c("foo", NA))) # Set default convert options in CsvFileFormat csv_format <- CsvFileFormat$create( @@ -400,7 +398,7 @@ test_that("CSV scan options", { strings_can_be_null = TRUE ) ds <- open_dataset(dst_dir, format = csv_format) - expect_equivalent(ds %>% collect(), tibble(chr = c("foo", NA))) + expect_equal(ds %>% collect(), tibble(chr = c("foo", NA))) # Set both parse and convert options df <- tibble(chr = c("foo", "mynull"), chr2 = c("bar", "baz")) @@ -411,7 +409,7 @@ test_that("CSV scan options", { null_values = c("mynull"), strings_can_be_null = TRUE ) - expect_equivalent(ds %>% collect(), tibble( + expect_equal(ds %>% collect(), tibble( chr = c("foo", NA), chr2 = c("bar", "baz") )) @@ -427,7 +425,7 @@ test_that("compressed CSV dataset", { expect_r6_class(ds$format, "CsvFileFormat") expect_r6_class(ds$filesystem, "LocalFileSystem") - expect_equivalent( + expect_equal( ds %>% select(string = chr, integer = int) %>% filter(integer > 6 & integer < 11) %>% @@ -449,7 +447,7 @@ test_that("CSV dataset options", { format <- FileFormat$create("csv", skip_rows = 1) ds <- open_dataset(dst_dir, format = format) - expect_equivalent( + expect_equal( ds %>% select(string = a) %>% collect(), @@ -459,17 +457,17 @@ test_that("CSV dataset options", { ds <- open_dataset(dst_dir, format = "csv", column_names = c("foo")) - expect_equivalent( + expect_equal( ds %>% select(string = foo) %>% collect(), - tibble(foo = c(c("chr"), letters[1:10])) + tibble(string = c(c("chr"), letters[1:10])) ) }) test_that("Other text delimited dataset", { ds1 <- open_dataset(tsv_dir, partitioning = "part", format = "tsv") - expect_equivalent( + expect_equal( ds1 %>% select(string = chr, integer = int, part) %>% filter(integer > 6 & part == 5) %>% @@ -482,7 +480,7 @@ test_that("Other text delimited dataset", { ) ds2 <- open_dataset(tsv_dir, partitioning = "part", format = "text", delimiter = "\t") - expect_equivalent( + expect_equal( ds2 %>% select(string = chr, integer = int, part) %>% filter(integer > 6 & part == 5) %>% @@ -550,7 +548,7 @@ test_that("readr parse options", { # With only readr parse options (and omitting format = "text") ds1 <- open_dataset(tsv_dir, partitioning = "part", delim = "\t") - expect_equivalent( + expect_equal( ds1 %>% select(string = chr, integer = int, part) %>% filter(integer > 6 & part == 5) %>% @@ -571,7 +569,7 @@ test_that("Dataset with multiple file formats", { open_dataset(ipc_dir, format = "arrow", partitioning = "part") )) expect_identical(names(ds), c(names(df1), "part")) - expect_equivalent( + expect_equal( ds %>% filter(int > 6 & part %in% c(1, 3)) %>% select(string = chr, integer = int) %>% @@ -589,7 +587,7 @@ test_that("Creating UnionDataset", { ds2 <- open_dataset(file.path(dataset_dir, 2)) union1 <- open_dataset(list(ds1, ds2)) expect_r6_class(union1, "UnionDataset") - expect_equivalent( + expect_equal( union1 %>% select(chr, dbl) %>% filter(dbl > 7 & dbl < 53L) %>% # Testing the auto-casting of scalars @@ -604,7 +602,7 @@ test_that("Creating UnionDataset", { # Now with the c() method union2 <- c(ds1, ds2) expect_r6_class(union2, "UnionDataset") - expect_equivalent( + expect_equal( union2 %>% select(chr, dbl) %>% filter(dbl > 7 & dbl < 53L) %>% # Testing the auto-casting of scalars @@ -624,7 +622,7 @@ test_that("map_batches", { skip("map_batches() is broken (ARROW-14029)") skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = "part") - expect_equivalent( + expect_equal( ds %>% filter(int > 5) %>% select(int, lgl) %>% @@ -642,7 +640,7 @@ test_that("partitioning = NULL to ignore partition information (but why?)", { test_that("filter() with is.nan()", { skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) - expect_equivalent( + expect_equal( ds %>% select(part, dbl) %>% filter(!is.nan(dbl), part == 2) %>% @@ -654,7 +652,7 @@ test_that("filter() with is.nan()", { test_that("filter() with %in%", { skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) - expect_equivalent( + expect_equal( ds %>% select(int, part) %>% filter(int %in% c(6, 4, 3, 103, 107), part == 1) %>% @@ -664,7 +662,7 @@ test_that("filter() with %in%", { # ARROW-9606: bug in %in% filter on partition column with >1 partition columns ds <- open_dataset(hive_dir) - expect_equivalent( + expect_equal( ds %>% filter(group %in% 2) %>% select(names(df2)) %>% @@ -676,7 +674,7 @@ test_that("filter() with %in%", { test_that("filter() on timestamp columns", { skip_if_not_available("parquet") ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) - expect_equivalent( + expect_equal( ds %>% filter(ts >= lubridate::ymd_hms("2015-05-04 03:12:39")) %>% filter(part == 1) %>% @@ -686,7 +684,7 @@ test_that("filter() on timestamp columns", { ) # Now with Date - expect_equivalent( + expect_equal( ds %>% filter(ts >= as.Date("2015-05-04")) %>% filter(part == 1) %>% @@ -697,7 +695,7 @@ test_that("filter() on timestamp columns", { # Now with bare string date skip("Implement more aggressive implicit casting for scalars (ARROW-11402)") - expect_equivalent( + expect_equal( ds %>% filter(ts >= "2015-05-04") %>% filter(part == 1) %>% @@ -751,7 +749,7 @@ twice: double (multiply_checked(int, 2)) See $.data for the source Arrow object", fixed = TRUE ) - expect_equivalent( + expect_equal( mutated %>% collect() %>% arrange(dbl), @@ -827,7 +825,7 @@ twice: double (multiply_checked(int, 2)) See $.data for the source Arrow object", fixed = TRUE ) - expect_equivalent( + expect_equal( arranged %>% collect(), rbind( @@ -848,10 +846,10 @@ test_that("compute()/collect(as_data_frame=FALSE)", { ds <- open_dataset(dataset_dir) tab1 <- ds %>% compute() - expect_is(tab1, "Table") + expect_r6_class(tab1, "Table") tab2 <- ds %>% collect(as_data_frame = FALSE) - expect_is(tab2, "Table") + expect_r6_class(tab2, "Table") tab3 <- ds %>% mutate(negint = -int) %>% @@ -860,7 +858,7 @@ test_that("compute()/collect(as_data_frame=FALSE)", { select(negint) %>% compute() - expect_is(tab3, "Table") + expect_r6_class(tab3, "Table") expect_equal( tab3 %>% collect(), @@ -874,7 +872,7 @@ test_that("compute()/collect(as_data_frame=FALSE)", { select(negint) %>% collect(as_data_frame = FALSE) - expect_is(tab3, "Table") + expect_r6_class(tab3, "Table") expect_equal( tab4 %>% collect(), @@ -887,7 +885,7 @@ test_that("compute()/collect(as_data_frame=FALSE)", { compute() # the group_by() prevents compute() from returning a Table... - expect_is(tab5, "arrow_dplyr_query") + expect_s3_class(tab5, "arrow_dplyr_query") # ... but $.data is a Table (InMemoryDataset)... expect_r6_class(tab5$.data, "InMemoryDataset") @@ -1029,14 +1027,14 @@ test_that("Scanner$ScanBatches", { ds <- open_dataset(ipc_dir, format = "feather") batches <- ds$NewScan()$Finish()$ScanBatches() table <- Table$create(!!!batches) - expect_equivalent(as.data.frame(table), rbind(df1, df2)) + expect_equal(as.data.frame(table), rbind(df1, df2)) # use_async will always use the thread pool (even if it only uses # one thread) and RTools 3.5 on Windows doesn't support this skip_on_os("windows") batches <- ds$NewScan()$UseAsync(TRUE)$Finish()$ScanBatches() table <- Table$create(!!!batches) - expect_equivalent(as.data.frame(table), rbind(df1, df2)) + expect_equal(as.data.frame(table), rbind(df1, df2)) }) test_that("Scanner$ToRecordBatchReader()", { @@ -1119,7 +1117,7 @@ expect_scan_result <- function(ds, schm) { tab <- scn$ToTable() expect_r6_class(tab, "Table") - expect_equivalent( + expect_equal( as.data.frame(tab), df1[8, c("chr", "lgl")] ) @@ -1147,7 +1145,7 @@ test_that("Assembling a Dataset manually and getting a Table", { expect_r6_class(child$schema, "Schema") expect_r6_class(child$format, "ParquetFileFormat") expect_equal(names(schm), names(child$schema)) - expect_equivalent(child$files, files) + expect_equal(child$files, files) ds <- Dataset$create(list(child), schm) expect_scan_result(ds, schm) @@ -1297,7 +1295,7 @@ test_that("Assembling multiple DatasetFactories with DatasetFactory", { expect_r6_class(ds, "UnionDataset") expect_r6_class(ds$schema, "Schema") expect_equal(names(schm), names(ds$schema)) - expect_equivalent(map(ds$children, ~ .$files), files) + expect_equal(unlist(map(ds$children, ~ .$files)), files) expect_scan_result(ds, schm) }) @@ -1312,7 +1310,7 @@ test_that("Writing a dataset: CSV->IPC", { new_ds <- open_dataset(dst_dir, format = "feather") - expect_equivalent( + expect_equal( new_ds %>% select(string = chr, integer = int) %>% filter(integer > 6 & integer < 11) %>% @@ -1344,7 +1342,7 @@ test_that("Writing a dataset: Parquet->IPC", { new_ds <- open_dataset(dst_dir, format = "feather") - expect_equivalent( + expect_equal( new_ds %>% select(string = chr, integer = int, group) %>% filter(integer > 6 & group == 1) %>% @@ -1368,7 +1366,7 @@ test_that("Writing a dataset: CSV->Parquet", { new_ds <- open_dataset(dst_dir) - expect_equivalent( + expect_equal( new_ds %>% select(string = chr, integer = int) %>% filter(integer > 6 & integer < 11) %>% @@ -1392,7 +1390,7 @@ test_that("Writing a dataset: Parquet->Parquet (default)", { new_ds <- open_dataset(dst_dir) - expect_equivalent( + expect_equal( new_ds %>% select(string = chr, integer = int, group) %>% filter(integer > 6 & group == 1) %>% @@ -1408,7 +1406,7 @@ test_that("Writing a dataset: Parquet->Parquet (default)", { test_that("Writing a dataset: no format specified", { skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-9651 dst_dir <- make_temp_dir() - write_dataset(mtcars, dst_dir) + write_dataset(example_data, dst_dir) new_ds <- open_dataset(dst_dir) expect_equal( list.files(dst_dir, pattern = "parquet"), @@ -1417,9 +1415,9 @@ test_that("Writing a dataset: no format specified", { expect_true( inherits(new_ds$format, "ParquetFileFormat") ) - expect_equivalent( + expect_equal( new_ds %>% collect(), - mtcars + example_data ) }) @@ -1443,7 +1441,7 @@ test_that("Dataset writing: dplyr methods", { write_dataset(dst_dir2, format = "feather") new_ds <- open_dataset(dst_dir2, format = "feather") - expect_equivalent( + expect_equal( collect(new_ds) %>% arrange(int), rbind(df1[c("chr", "dbl", "int")], df2[c("chr", "dbl", "int")]) %>% rename(dubs = dbl) ) @@ -1455,7 +1453,7 @@ test_that("Dataset writing: dplyr methods", { write_dataset(dst_dir3, format = "feather") new_ds <- open_dataset(dst_dir3, format = "feather") - expect_equivalent( + expect_equal( new_ds %>% select(names(df1)) %>% collect(), df1 %>% filter(int == 4) ) @@ -1468,7 +1466,7 @@ test_that("Dataset writing: dplyr methods", { write_dataset(dst_dir3, format = "feather") new_ds <- open_dataset(dst_dir3, format = "feather") - expect_equivalent( + expect_equal( new_ds %>% select(c(names(df1), "twice")) %>% collect(), df1 %>% filter(int == 4) %>% mutate(twice = int * 2) ) @@ -1536,7 +1534,7 @@ test_that("Dataset writing: from data.frame", { new_ds <- open_dataset(dst_dir, format = "feather") - expect_equivalent( + expect_equal( new_ds %>% select(string = chr, integer = int) %>% filter(integer > 6 & integer < 11) %>% @@ -1561,7 +1559,7 @@ test_that("Dataset writing: from RecordBatch", { new_ds <- open_dataset(dst_dir, format = "feather") - expect_equivalent( + expect_equal( new_ds %>% select(string = chr, integer = int) %>% filter(integer > 6 & integer < 11) %>% @@ -1588,7 +1586,7 @@ test_that("Writing a dataset: Ipc format options & compression", { expect_true(dir.exists(dst_dir)) new_ds <- open_dataset(dst_dir, format = "feather") - expect_equivalent( + expect_equal( new_ds %>% select(string = chr, integer = int) %>% filter(integer > 6 & integer < 11) %>% @@ -1609,12 +1607,12 @@ test_that("Writing a dataset: Parquet format options", { dst_dir_no_truncated_timestamps <- make_temp_dir() # Use trace() to confirm that options are passed in - trace( + suppressMessages(trace( "parquet___ArrowWriterProperties___create", tracer = quote(warning("allow_truncated_timestamps == ", allow_truncated_timestamps)), print = FALSE, where = write_dataset - ) + )) expect_warning( write_dataset(ds, dst_dir_no_truncated_timestamps, format = "parquet", partitioning = "int"), "allow_truncated_timestamps == FALSE" @@ -1623,7 +1621,10 @@ test_that("Writing a dataset: Parquet format options", { write_dataset(ds, dst_dir, format = "parquet", partitioning = "int", allow_truncated_timestamps = TRUE), "allow_truncated_timestamps == TRUE" ) - untrace("parquet___ArrowWriterProperties___create", where = write_dataset) + suppressMessages(untrace( + "parquet___ArrowWriterProperties___create", + where = write_dataset + )) # Now confirm we can read back what we sent expect_true(dir.exists(dst_dir)) @@ -1631,7 +1632,7 @@ test_that("Writing a dataset: Parquet format options", { new_ds <- open_dataset(dst_dir) - expect_equivalent( + expect_equal( new_ds %>% select(string = chr, integer = int) %>% filter(integer > 6 & integer < 11) %>% @@ -1656,7 +1657,7 @@ test_that("Writing a dataset: CSV format options", { write_dataset(df, dst_dir, format = "csv") expect_true(dir.exists(dst_dir)) new_ds <- open_dataset(dst_dir, format = "csv") - expect_equivalent(new_ds %>% collect(), df) + expect_equal(new_ds %>% collect(), df) dst_dir <- make_temp_dir() write_dataset(df, dst_dir, format = "csv", include_header = FALSE) @@ -1665,7 +1666,7 @@ test_that("Writing a dataset: CSV format options", { format = "csv", column_names = c("int", "dbl", "lgl", "chr") ) - expect_equivalent(new_ds %>% collect(), df) + expect_equal(new_ds %>% collect(), df) }) test_that("Dataset writing: unsupported features/input validation", { @@ -1704,10 +1705,8 @@ test_that("Error if no format specified and files are not parquet", { "Did you mean to specify a 'format' other than the default (parquet)?", fixed = TRUE ) - expect_failure( - expect_error( - open_dataset(csv_dir, partitioning = "part", format = "parquet"), - "Did you mean to specify a 'format'" - ) + expect_error( + open_dataset(csv_dir, partitioning = "part", format = "parquet"), + "Parquet magic bytes not found" ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R index b28d5792880..b23465beb17 100644 --- a/r/tests/testthat/test-dplyr-arrange.R +++ b/r/tests/testthat/test-dplyr-arrange.R @@ -17,7 +17,7 @@ skip_if_not_available("dataset") -library(dplyr) +library(dplyr, warn.conflicts = FALSE) # randomize order of rows in test data tbl <- slice_sample(example_data_for_sorting, prop = 1L) @@ -202,4 +202,4 @@ test_that("arrange() with bad inputs", { "expects only one argument", fixed = TRUE ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R index b36f70db232..4ca7b2c67b9 100644 --- a/r/tests/testthat/test-dplyr-collapse.R +++ b/r/tests/testthat/test-dplyr-collapse.R @@ -19,7 +19,7 @@ skip_if_not_available("dataset") withr::local_options(list(arrow.summarise.sort = TRUE)) -library(dplyr) +library(dplyr, warn.conflicts = FALSE) library(stringr) tbl <- example_data @@ -217,4 +217,4 @@ test_that("query_on_dataset handles collapse()", { collapse() %>% select(int) )) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-dplyr-distinct.R b/r/tests/testthat/test-dplyr-distinct.R index 0ad35ae0928..052fb5175f1 100644 --- a/r/tests/testthat/test-dplyr-distinct.R +++ b/r/tests/testthat/test-dplyr-distinct.R @@ -17,7 +17,7 @@ skip_if_not_available("dataset") -library(dplyr) +library(dplyr, warn.conflicts = FALSE) tbl <- example_data tbl$some_grouping <- rep(c(1, 2), 5) @@ -101,4 +101,4 @@ test_that("distinct() can return all columns", { arrange(int), tbl ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index e56ee4be462..30db10fa6ab 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -17,7 +17,7 @@ skip_if_not_available("dataset") -library(dplyr) +library(dplyr, warn.conflicts = FALSE) library(stringr) tbl <- example_data @@ -273,11 +273,7 @@ test_that("filter environment scope", { # 'could not find function "isEqualTo"' because we haven't defined it yet expect_dplyr_error(input %>% filter(isEqualTo(int, 4)), tbl) - skip("Need to substitute in user defined function too") - # TODO: fix this: this isEqualTo function is eagerly evaluating; it should - # instead yield Expressions. Probably bc the parent env of the function - # has the Ops.Expression methods defined; we need to move it so that the - # parent env is the data mask we use in the dplyr eval + # This works but only because there are S3 methods for those operations isEqualTo <- function(x, y) x == y & !is.na(x) expect_dplyr_equal( input %>% @@ -286,6 +282,23 @@ test_that("filter environment scope", { collect(), tbl ) + # Try something that needs to call another nse_func + expect_dplyr_equal( + input %>% + select(-fct) %>% + filter(nchar(padded_strings) < 10) %>% + collect(), + tbl + ) + isShortString <- function(x) nchar(x) < 10 + skip("TODO: 14071") + expect_dplyr_equal( + input %>% + select(-fct) %>% + filter(isShortString(padded_strings)) %>% + collect(), + tbl + ) }) test_that("Filtering on a column that doesn't exist errors correctly", { @@ -396,4 +409,4 @@ test_that("filter() with .data pronoun", { collect(), tbl ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index d6abb20c01c..fd1a99f8d24 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -17,7 +17,7 @@ skip_if_not_available("dataset") -library(dplyr) +library(dplyr, warn.conflicts = FALSE) library(stringr) tbl <- example_data @@ -155,4 +155,4 @@ test_that("group_by with .drop", { group_by_drop_default(), example_with_logical_factors ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R index cf4b0c5a8fd..61b17bc99b4 100644 --- a/r/tests/testthat/test-dplyr-lubridate.R +++ b/r/tests/testthat/test-dplyr-lubridate.R @@ -17,8 +17,8 @@ skip_if_not_available("dataset") -library(lubridate) -library(dplyr) +library(lubridate, warn.conflicts = FALSE) +library(dplyr, warn.conflicts = FALSE) # base::strptime() defaults to local timezone # but arrow's strptime defaults to UTC. @@ -33,6 +33,8 @@ if (tolower(Sys.info()[["sysname"]]) == "windows") { test_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "Pacific/Marquesas") } +skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13588 + test_df <- tibble::tibble( datetime = c(test_date, NA), date = c(as.Date("2021-09-09"), NA) @@ -295,4 +297,4 @@ test_that("extract yday from date", { collect(), test_df ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 30fc12ccf17..c3086d9b02a 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -17,7 +17,7 @@ skip_if_not_available("dataset") -library(dplyr) +library(dplyr, warn.conflicts = FALSE) library(stringr) tbl <- example_data @@ -283,6 +283,7 @@ test_that("dplyr::mutate's examples", { #> x y z #> #> 1 1 2 3 + expect_dplyr_equal( input %>% mutate(z = x + y, .before = 1) %>% collect(), df @@ -477,7 +478,7 @@ test_that("mutate and write_dataset", { new_ds <- open_dataset(dst_dir, format = "feather") - expect_equivalent( + expect_equal( new_ds %>% select(string = chr, integer = int, twice) %>% filter(integer > 6 & integer < 11) %>% @@ -520,4 +521,4 @@ test_that("mutate and pmin/pmax", { collect(), df ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 6418717a8ab..6b9708c03c8 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -18,7 +18,7 @@ skip_if_not_available("dataset") skip_if_not_available("utf8proc") -library(dplyr) +library(dplyr, warn.conflicts = FALSE) library(lubridate) library(stringr) library(stringi) @@ -412,67 +412,62 @@ test_that("strsplit and str_split", { mutate(x = strsplit(x, "and")) %>% collect(), df, - # Pass check.attributes = FALSE through to expect_equal - # (which gives you expect_equivalent() behavior). - # This is because the vctr that comes back from arrow (ListArray) + # `ignore_attr = TRUE` because the vctr coming back from arrow (ListArray) # has type information in it, but it's just a bare list from R/dplyr. - # Note also that whenever we bump up to testthat 3rd edition (ARROW-12871), - # the parameter is called `ignore_attr = TRUE` - check.attributes = FALSE + ignore_attr = TRUE ) expect_dplyr_equal( input %>% mutate(x = strsplit(x, "and.*", fixed = TRUE)) %>% collect(), df, - check.attributes = FALSE + ignore_attr = TRUE ) expect_dplyr_equal( input %>% mutate(x = strsplit(x, " +and +")) %>% collect(), df, - check.attributes = FALSE + ignore_attr = TRUE ) expect_dplyr_equal( input %>% mutate(x = str_split(x, "and")) %>% collect(), df, - check.attributes = FALSE + ignore_attr = TRUE ) expect_dplyr_equal( input %>% mutate(x = str_split(x, "and", n = 2)) %>% collect(), df, - check.attributes = FALSE + ignore_attr = TRUE ) expect_dplyr_equal( input %>% mutate(x = str_split(x, fixed("and"), n = 2)) %>% collect(), df, - check.attributes = FALSE + ignore_attr = TRUE ) expect_dplyr_equal( input %>% mutate(x = str_split(x, regex("and"), n = 2)) %>% collect(), df, - check.attributes = FALSE + ignore_attr = TRUE ) expect_dplyr_equal( input %>% mutate(x = str_split(x, "Foo|bar", n = 2)) %>% collect(), df, - check.attributes = FALSE + ignore_attr = TRUE ) }) test_that("arrow_*_split_whitespace functions", { - # use only ASCII whitespace characters df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux")) @@ -482,39 +477,43 @@ test_that("arrow_*_split_whitespace functions", { df_split <- tibble(x = list(c("Foo", "and", "bar"), c("baz", "and", "qux", "and", "quux"))) # use default option values - expect_equivalent( + expect_equal( df_ascii %>% Table$create() %>% mutate(x = arrow_ascii_split_whitespace(x)) %>% collect(), - df_split + df_split, + ignore_attr = TRUE ) - expect_equivalent( + expect_equal( df_utf8 %>% Table$create() %>% mutate(x = arrow_utf8_split_whitespace(x)) %>% collect(), - df_split + df_split, + ignore_attr = TRUE ) # specify non-default option values - expect_equivalent( + expect_equal( df_ascii %>% Table$create() %>% mutate( x = arrow_ascii_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE)) ) %>% collect(), - tibble(x = list(c("Foo\nand", "bar"), c("baz\tand qux and", "quux"))) + tibble(x = list(c("Foo\nand", "bar"), c("baz\tand qux and", "quux"))), + ignore_attr = TRUE ) - expect_equivalent( + expect_equal( df_utf8 %>% Table$create() %>% mutate( x = arrow_utf8_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE)) ) %>% collect(), - tibble(x = list(c("Foo\u00A0and", "bar"), c("baz\u2006and\u1680qux\u3000and", "quux"))) + tibble(x = list(c("Foo\u00A0and", "bar"), c("baz\u2006and\u1680qux\u3000and", "quux"))), + ignore_attr = TRUE ) }) @@ -618,7 +617,6 @@ test_that("backreferences (substitutions) in string replacement", { }) test_that("edge cases in string detection and replacement", { - # in case-insensitive fixed match/replace, test that "\\E" in the search # string and backslashes in the replacement string are interpreted literally. # this test does not use expect_dplyr_equal() because base::sub() and @@ -655,7 +653,6 @@ test_that("edge cases in string detection and replacement", { }) test_that("strptime", { - # base::strptime() defaults to local timezone # but arrow's strptime defaults to UTC. # So that tests are consistent, set the local timezone to UTC @@ -673,7 +670,7 @@ test_that("strptime", { ) %>% collect(), t_stamp, - check.tzone = FALSE + ignore_attr = "tzone" ) expect_equal( @@ -684,7 +681,7 @@ test_that("strptime", { ) %>% collect(), t_stamp, - check.tzone = FALSE + ignore_attr = "tzone" ) expect_equal( @@ -695,7 +692,7 @@ test_that("strptime", { ) %>% collect(), t_stamp, - check.tzone = FALSE + ignore_attr = "tzone" ) expect_equal( @@ -706,11 +703,11 @@ test_that("strptime", { ) %>% collect(), t_stamp, - check.tzone = FALSE + ignore_attr = "tzone" ) tstring <- tibble(x = c("08-05-2008", NA)) - tstamp <- tibble(x = c(strptime("08-05-2008", format = "%m-%d-%Y"), NA)) + tstamp <- strptime(c("08-05-2008", NA), format = "%m-%d-%Y") expect_equal( tstring %>% @@ -718,15 +715,15 @@ test_that("strptime", { mutate( x = strptime(x, format = "%m-%d-%Y") ) %>% - collect(), - tstamp, - check.tzone = FALSE + pull(), + # R's strptime returns POSIXlt (list type) + as.POSIXct(tstamp), + ignore_attr = "tzone" ) }) test_that("errors in strptime", { # Error when tz is passed - x <- Expression$field_ref("x") expect_error( nse_funcs$strptime(x, tz = "PDT"), @@ -771,7 +768,8 @@ test_that("strftime", { times ) - withr::with_timezone("Pacific/Marquesas", + withr::with_timezone( + "Pacific/Marquesas", expect_dplyr_equal( input %>% mutate(x = strftime(datetime, format = formats, tz = "EST")) %>% @@ -867,14 +865,14 @@ test_that("format_ISO8601", { test_that("arrow_find_substring and arrow_find_substring_regex", { df <- tibble(x = c("Foo and Bar", "baz and qux and quux")) - expect_equivalent( + expect_equal( df %>% Table$create() %>% mutate(x = arrow_find_substring(x, options = list(pattern = "b"))) %>% collect(), tibble(x = c(-1, 0)) ) - expect_equivalent( + expect_equal( df %>% Table$create() %>% mutate(x = arrow_find_substring( @@ -884,7 +882,7 @@ test_that("arrow_find_substring and arrow_find_substring_regex", { collect(), tibble(x = c(8, 0)) ) - expect_equivalent( + expect_equal( df %>% Table$create() %>% mutate(x = arrow_find_substring_regex( @@ -894,7 +892,7 @@ test_that("arrow_find_substring and arrow_find_substring_regex", { collect(), tibble(x = c(-1, 0)) ) - expect_equivalent( + expect_equal( df %>% Table$create() %>% mutate(x = arrow_find_substring_regex( @@ -925,7 +923,7 @@ test_that("stri_reverse and arrow_ascii_reverse functions", { df_ascii ) - expect_equivalent( + expect_equal( df_ascii %>% Table$create() %>% mutate(x = arrow_ascii_reverse(x)) %>% @@ -949,7 +947,7 @@ test_that("str_like", { # these tests to use expect_dplyr_equal # No match - entire string - expect_equivalent( + expect_equal( df %>% Table$create() %>% mutate(x = str_like(x, "baz")) %>% @@ -958,7 +956,7 @@ test_that("str_like", { ) # Match - entire string - expect_equivalent( + expect_equal( df %>% Table$create() %>% mutate(x = str_like(x, "Foo and bar")) %>% @@ -967,7 +965,7 @@ test_that("str_like", { ) # Wildcard - expect_equivalent( + expect_equal( df %>% Table$create() %>% mutate(x = str_like(x, "f%", ignore_case = TRUE)) %>% @@ -976,7 +974,7 @@ test_that("str_like", { ) # Ignore case - expect_equivalent( + expect_equal( df %>% Table$create() %>% mutate(x = str_like(x, "f%", ignore_case = FALSE)) %>% @@ -985,7 +983,7 @@ test_that("str_like", { ) # Single character - expect_equivalent( + expect_equal( df %>% Table$create() %>% mutate(x = str_like(x, "_a%")) %>% @@ -1213,4 +1211,4 @@ test_that("str_sub", { nse_funcs$str_sub("Apache Arrow", 1, c(2, 3)), "`end` must be length 1 - other lengths are not supported in Arrow" ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index d868a3ee96b..e5c0139a8c5 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -19,7 +19,7 @@ skip_if_not_available("dataset") withr::local_options(list(arrow.summarise.sort = TRUE)) -library(dplyr) +library(dplyr, warn.conflicts = FALSE) library(stringr) tbl <- example_data @@ -766,4 +766,4 @@ test_that(".groups argument", { ), "NOTVALID" ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R index 1023c2060e4..1862f65620c 100644 --- a/r/tests/testthat/test-dplyr.R +++ b/r/tests/testthat/test-dplyr.R @@ -17,7 +17,7 @@ skip_if_not_available("dataset") -library(dplyr) +library(dplyr, warn.conflicts = FALSE) library(stringr) tbl <- example_data @@ -78,9 +78,12 @@ test_that("Empty select returns no columns", { ) }) test_that("Empty select still includes the group_by columns", { - expect_dplyr_equal( - input %>% group_by(chr) %>% select() %>% collect(), - tbl + expect_message( + expect_dplyr_equal( + input %>% group_by(chr) %>% select() %>% collect(), + tbl + ), + "Adding missing grouping variables" ) }) @@ -165,7 +168,7 @@ test_that("collect(as_data_frame=FALSE)", { b1 <- batch %>% collect(as_data_frame = FALSE) - expect_is(b1, "RecordBatch") + expect_r6_class(b1, "RecordBatch") b2 <- batch %>% select(int, chr) %>% @@ -354,6 +357,7 @@ test_that("relocate with selection helpers", { }) test_that("explicit type conversions with cast()", { + suppressPackageStartupMessages(library(bit64)) num_int32 <- 12L num_int64 <- bit64::as.integer64(10) @@ -786,7 +790,7 @@ test_that("type checks with is.*()", { }) test_that("type checks with is_*()", { - library(rlang) + library(rlang, warn.conflicts = FALSE) expect_dplyr_equal( input %>% transmute( @@ -1616,4 +1620,4 @@ test_that("coalesce()", { "At least one argument must be supplied to coalesce()", fixed = TRUE ) -}) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R index 034c4049a34..c4aab718d90 100644 --- a/r/tests/testthat/test-expression.R +++ b/r/tests/testthat/test-expression.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("Expressions") test_that("C++ expressions", { skip_if_not_available("dataset") @@ -48,11 +47,11 @@ test_that("C++ expressions", { "Expression\n(f > 4)", fixed = TRUE ) - expect_type_equal( + expect_equal( f$type(schema(f = float64())), float64() ) - expect_type_equal( + expect_equal( (f > 4)$type(schema(f = float64())), bool() ) diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index 80757b04f05..136474deadb 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -222,7 +222,7 @@ test_that("FeatherReader methods", { # print method expect_identical( capture.output(print(reader)), - # TODO: can we get rows/columns? + # TODO: can we get rows/columns? c("FeatherReader:", "Schema", "x: int32", "y: double", "z: string") ) }) @@ -242,7 +242,7 @@ test_that("Error messages are shown when the compression algorithm lz4 is not fo if (codec_is_available("lz4")) { d <- read_feather(ft_file) - expect_is(d, "data.frame") + expect_s3_class(d, "data.frame") } else { expect_error(read_feather(ft_file), msg, fixed = TRUE) } diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R index a9ef5a32e36..a302187d172 100644 --- a/r/tests/testthat/test-field.R +++ b/r/tests/testthat/test-field.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("Field") test_that("field() factory", { x <- field("x", int32()) diff --git a/r/tests/testthat/test-filesystem.R b/r/tests/testthat/test-filesystem.R index 38b6f61269e..5ee096f13b4 100644 --- a/r/tests/testthat/test-filesystem.R +++ b/r/tests/testthat/test-filesystem.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("File system") test_that("LocalFilesystem", { fs <- LocalFileSystem$create() diff --git a/r/tests/testthat/test-install-arrow.R b/r/tests/testthat/test-install-arrow.R index c53ee829829..977f9d77dd2 100644 --- a/r/tests/testthat/test-install-arrow.R +++ b/r/tests/testthat/test-install-arrow.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("install_arrow()") - r_only({ test_that("arrow_repos", { cran <- "https://cloud.r-project.org/" diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R index c39e1b7a423..825511b978e 100644 --- a/r/tests/testthat/test-json.R +++ b/r/tests/testthat/test-json.R @@ -17,8 +17,6 @@ skip_if_not_available("json") -context("JsonTableReader") - test_that("Can read json file with scalars columns (ARROW-5503)", { tf <- tempfile() on.exit(unlink(tf)) @@ -208,7 +206,10 @@ test_that("Can read json file with nested columns (ARROW-5503)", { hello <- Array$create(c(NA, NA, "hi", "bonjour", "ciao", NA)) expect_equal(struct_array$field(0L), ps) expect_equal(struct_array$GetFieldByName("ps"), ps) - expect_equal(struct_array$Flatten(), list(ps, hello)) + struct_cols <- struct_array$Flatten() + expect_identical(length(struct_cols), 2L) + expect_equal(struct_cols[[1]], ps) + expect_equal(struct_cols[[2]], hello) expect_equal( as.vector(struct_array), tibble::tibble(ps = ps$as_vector(), hello = hello$as_vector()) @@ -223,18 +224,20 @@ test_that("Can read json file with nested columns (ARROW-5503)", { c(5, 6) ) list_array <- tab1$column(0) - expect_equivalent( + expect_equal( list_array$as_vector(), - list_array_r + list_array_r, + ignore_attr = TRUE ) tib <- as.data.frame(tab1) - expect_equivalent( + expect_equal( tib, tibble::tibble( arr = list_array_r, nuf = tibble::tibble(ps = ps$as_vector(), hello = hello$as_vector()) - ) + ), + ignore_attr = TRUE ) }) @@ -248,5 +251,5 @@ test_that("Can read json file with list> nested columns (ARROW-7740 one <- tibble::tibble(b = c(1, 2)) expected <- tibble::tibble(a = c(list(one), list(one))) - expect_equivalent(read_json_arrow(tf), expected) + expect_equal(read_json_arrow(tf), expected, ignore_attr = TRUE) }) diff --git a/r/tests/testthat/test-message-reader.R b/r/tests/testthat/test-message-reader.R index 340a3e3ed1e..44f3fe4f7d2 100644 --- a/r/tests/testthat/test-message-reader.R +++ b/r/tests/testthat/test-message-reader.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("MessageReader") test_that("MessageReader can be created from raw vectors", { batch <- record_batch(x = 1:10) diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R index 3fbb038272c..c9ee4cb72cd 100644 --- a/r/tests/testthat/test-message.R +++ b/r/tests/testthat/test-message.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("Message") test_that("read_message can read from input stream", { batch <- record_batch(x = 1:10) diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R index 3cd56cca64f..894dbe3d9de 100644 --- a/r/tests/testthat/test-na-omit.R +++ b/r/tests/testthat/test-na-omit.R @@ -48,12 +48,21 @@ test_that("na.fail on Scalar", { test_that("na.omit on Table", { tbl <- Table$create(example_data) - expect_equivalent(as.data.frame(na.omit(tbl)), na.omit(example_data)) + expect_equal( + as.data.frame(na.omit(tbl)), + na.omit(example_data), + # We don't include an attribute with the rows omitted + ignore_attr = "na.action" + ) }) test_that("na.exclude on Table", { tbl <- Table$create(example_data) - expect_equivalent(as.data.frame(na.exclude(tbl)), na.exclude(example_data)) + expect_equal( + as.data.frame(na.exclude(tbl)), + na.exclude(example_data), + ignore_attr = "na.action" + ) }) test_that("na.fail on Table", { @@ -63,12 +72,20 @@ test_that("na.fail on Table", { test_that("na.omit on RecordBatch", { batch <- record_batch(example_data) - expect_equivalent(as.data.frame(na.omit(batch)), na.omit(example_data)) + expect_equal( + as.data.frame(na.omit(batch)), + na.omit(example_data), + ignore_attr = "na.action" + ) }) test_that("na.exclude on RecordBatch", { batch <- record_batch(example_data) - expect_equivalent(as.data.frame(na.exclude(batch)), na.omit(example_data)) + expect_equal( + as.data.frame(na.exclude(batch)), + na.omit(example_data), + ignore_attr = "na.action" + ) }) test_that("na.fail on RecordBatch", { diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index 41dcfe38c94..791c7b61cca 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -17,8 +17,6 @@ skip_if_not_available("parquet") -context("Parquet file reading/writing") - pq_file <- system.file("v0.7.1.parquet", package = "arrow") test_that("reading a known Parquet file to tibble", { @@ -35,7 +33,7 @@ test_that("simple int column roundtrip", { write_parquet(df, pq_tmp_file) df_read <- read_parquet(pq_tmp_file) - expect_equivalent(df, df_read) + expect_equal(df, df_read) # Make sure file connection is cleaned up expect_error(file.remove(pq_tmp_file), NA) expect_false(file.exists(pq_tmp_file)) @@ -103,7 +101,7 @@ test_that("write_parquet() handles various write_statistics= specs", { test_that("write_parquet() accepts RecordBatch too", { batch <- RecordBatch$create(x1 = 1:5, x2 = 1:5, y = 1:5) tab <- parquet_roundtrip(batch) - expect_equivalent(tab, Table$create(batch)) + expect_equal(tab, Table$create(batch)) }) test_that("write_parquet() with invalid input type", { @@ -124,7 +122,7 @@ test_that("write_parquet() can truncate timestamps", { write_parquet(tab, tf, coerce_timestamps = "ms", allow_truncated_timestamps = TRUE) new <- read_parquet(tf, as_data_frame = FALSE) expect_type_equal(new$x1, timestamp("ms", "UTC")) - expect_equivalent(as.data.frame(tab), as.data.frame(new)) + expect_equal(as.data.frame(tab), as.data.frame(new)) }) test_that("make_valid_version()", { @@ -158,7 +156,7 @@ test_that("Factors are preserved when writing/reading from Parquet", { write_parquet(df, pq_tmp_file) df_read <- read_parquet(pq_tmp_file) - expect_equivalent(df, df_read) + expect_equal(df, df_read) }) test_that("Lists are preserved when writing/reading from Parquet", { @@ -173,7 +171,7 @@ test_that("Lists are preserved when writing/reading from Parquet", { write_parquet(df, pq_tmp_file) df_read <- read_parquet(pq_tmp_file) - expect_equivalent(df, df_read) + expect_equal(df, df_read, ignore_attr = TRUE) }) test_that("write_parquet() to stream", { @@ -191,7 +189,7 @@ test_that("write_parquet() returns its input", { tf <- tempfile() on.exit(unlink(tf)) df_out <- write_parquet(df, tf) - expect_equivalent(df, df_out) + expect_equal(df, df_out) }) test_that("write_parquet() handles version argument", { @@ -255,7 +253,7 @@ test_that("Error messages are shown when the compression algorithm snappy is not if (codec_is_available("snappy")) { d <- read_parquet(pq_file) - expect_is(d, "data.frame") + expect_s3_class(d, "data.frame") } else { expect_error(read_parquet(pq_file), msg, fixed = TRUE) } diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R index d5815247d51..82b3bfc0198 100644 --- a/r/tests/testthat/test-python.R +++ b/r/tests/testthat/test-python.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("To/from Python") - test_that("install_pyarrow", { skip_on_cran() skip_if_not_dev_mode() diff --git a/r/tests/testthat/test-read-record-batch.R b/r/tests/testthat/test-read-record-batch.R index 56f4e8e6e00..ba109da6c6b 100644 --- a/r/tests/testthat/test-read-record-batch.R +++ b/r/tests/testthat/test-read-record-batch.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("reading RecordBatches") test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { tab <- Table$create( diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index 3b4205443cd..66f6db56d90 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("read-write") test_that("table round trip", { tbl <- tibble::tibble( diff --git a/r/tests/testthat/test-record-batch-reader.R b/r/tests/testthat/test-record-batch-reader.R index 483588ab4bb..3992670dce1 100644 --- a/r/tests/testthat/test-record-batch-reader.R +++ b/r/tests/testthat/test-record-batch-reader.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("RecordBatch.*(Reader|Writer)") test_that("RecordBatchStreamReader / Writer", { tbl <- tibble::tibble( diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R index a2a13cbf887..e2c1dc2e728 100644 --- a/r/tests/testthat/test-s3-minio.R +++ b/r/tests/testthat/test-s3-minio.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("S3 tests using local minio") if (arrow_with_s3() && process_is_running("minio server")) { # Get minio config, with expected defaults diff --git a/r/tests/testthat/test-s3.R b/r/tests/testthat/test-s3.R index 995730a7977..298b15bb80c 100644 --- a/r/tests/testthat/test-s3.R +++ b/r/tests/testthat/test-s3.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("S3 integration tests") run_these <- tryCatch( expr = { diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R index 566228cbcaa..87445023e8c 100644 --- a/r/tests/testthat/test-scalar.R +++ b/r/tests/testthat/test-scalar.R @@ -15,12 +15,11 @@ # specific language governing permissions and limitations # under the License. -context("Scalar") expect_scalar_roundtrip <- function(x, type) { s <- Scalar$create(x) expect_r6_class(s, "Scalar") - expect_type_equal(s$type, type) + expect_equal(s$type, type) expect_identical(length(s), 1L) if (inherits(type, "NestedType")) { # Should this be? Missing if all elements are missing? @@ -28,7 +27,7 @@ expect_scalar_roundtrip <- function(x, type) { } else { expect_identical(as.vector(is.na(s)), is.na(x)) # MakeArrayFromScalar not implemented for list types - expect_equal(as.vector(s), x) + expect_as_vector(s, x) } } @@ -45,8 +44,8 @@ test_that("Scalar print", { }) test_that("Creating Scalars of a different type and casting them", { - expect_type_equal(Scalar$create(4L, int8())$type, int8()) - expect_type_equal(Scalar$create(4L)$cast(float32())$type, float32()) + expect_equal(Scalar$create(4L, int8())$type, int8()) + expect_equal(Scalar$create(4L)$cast(float32())$type, float32()) }) test_that("Scalar to Array", { diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 933ba4785a8..e31ff5c8be4 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -160,7 +160,7 @@ test_that("Schema$Equals", { expect_false(a$Equals(b, check_metadata = TRUE)) # Metadata not checked - expect_equivalent(a, b) + expect_equal(a, b, ignore_attr = TRUE) # Non-schema object expect_false(a$Equals(42)) diff --git a/r/tests/testthat/test-thread-pool.R b/r/tests/testthat/test-thread-pool.R index dab46269ca6..baf410368e7 100644 --- a/r/tests/testthat/test-thread-pool.R +++ b/r/tests/testthat/test-thread-pool.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("Global Thread Pool") test_that("can set/get cpu thread pool capacity", { old <- cpu_count() diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index d17b811974d..3821fb4503a 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -15,37 +15,36 @@ # specific language governing permissions and limitations # under the License. -context("test-type") test_that("type() gets the right type for arrow::Array", { a <- Array$create(1:10) - expect_type_equal(type(a), a$type) + expect_equal(type(a), a$type) }) test_that("type() gets the right type for ChunkedArray", { a <- chunked_array(1:10, 1:10) - expect_type_equal(type(a), a$type) + expect_equal(type(a), a$type) }) test_that("type() infers from R type", { - expect_type_equal(type(1:10), int32()) - expect_type_equal(type(1), float64()) - expect_type_equal(type(TRUE), boolean()) - expect_type_equal(type(raw()), uint8()) - expect_type_equal(type(""), utf8()) - expect_type_equal( + expect_equal(type(1:10), int32()) + expect_equal(type(1), float64()) + expect_equal(type(TRUE), boolean()) + expect_equal(type(raw()), uint8()) + expect_equal(type(""), utf8()) + expect_equal( type(example_data$fct), dictionary(int8(), utf8(), FALSE) ) - expect_type_equal( + expect_equal( type(lubridate::ymd_hms("2019-02-14 13:55:05")), timestamp(TimeUnit$MICRO, "UTC") ) - expect_type_equal( + expect_equal( type(hms::hms(56, 34, 12)), time32(unit = TimeUnit$SECOND) ) - expect_type_equal( + expect_equal( type(bit64::integer64()), int64() ) @@ -53,7 +52,7 @@ test_that("type() infers from R type", { test_that("type() can infer struct types from data frames", { df <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10]) - expect_type_equal(type(df), struct(x = int32(), y = float64(), z = utf8())) + expect_equal(type(df), struct(x = int32(), y = float64(), z = utf8())) }) test_that("DataType$Equals", { @@ -65,7 +64,7 @@ test_that("DataType$Equals", { expect_false(a == z) expect_equal(a, b) expect_failure(expect_equal(a, z)) - expect_failure(expect_type_equal(a, z), "int32 not equal to double") + expect_failure(expect_equal(a, z)) expect_false(a$Equals(32L)) }) @@ -74,7 +73,7 @@ test_that("Masked data type functions still work", { # Works when type function is masked string <- rlang::string - expect_type_equal( + expect_equal( Array$create("abc", type = string()), arrow::string() ) @@ -83,7 +82,7 @@ test_that("Masked data type functions still work", { # Works when with non-Arrow function that returns an Arrow type # when the non-Arrow function has the same name as a base R function... str <- arrow::string - expect_type_equal( + expect_equal( Array$create("abc", type = str()), arrow::string() ) @@ -91,7 +90,7 @@ test_that("Masked data type functions still work", { # ... and when it has the same name as an Arrow function type <- arrow::string - expect_type_equal( + expect_equal( Array$create("abc", type = type()), arrow::string() ) @@ -99,7 +98,7 @@ test_that("Masked data type functions still work", { # Works with local variable whose value is an Arrow type type <- arrow::string() - expect_type_equal( + expect_equal( Array$create("abc", type = type), arrow::string() ) diff --git a/r/tests/testthat/test-utf.R b/r/tests/testthat/test-utf.R index 75a162ec2de..69d196274a7 100644 --- a/r/tests/testthat/test-utf.R +++ b/r/tests/testthat/test-utf.R @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -context("String encoding") test_that("We handle non-UTF strings", { # Move the code with non-UTF strings to a separate file so that we don't