diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index c5fc6d819bf..02454a6ee3a 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -288,6 +288,19 @@ jobs: working-directory: 'r' extra-packages: | any::rcmdcheck + - name: Install MinIO + shell: bash + run: | + mkdir -p "$HOME/.local/bin" + curl \ + --output "$HOME/.local/bin/minio.exe" \ + https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z + chmod +x "$HOME/.local/bin/minio.exe" + echo "$HOME/.local/bin" >> $GITHUB_PATH + # TODO(ARROW-17149): figure out why the GCS tests are hanging on Windows + # - name: Install Google Cloud Storage Testbench + # shell: bash + # run: ci/scripts/install_gcs_testbench.sh default - name: Check shell: Rscript {0} run: | diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh index 519144ab4c5..f532bc7cf0a 100755 --- a/ci/scripts/r_test.sh +++ b/ci/scripts/r_test.sh @@ -100,14 +100,6 @@ SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true') } else { args <- c('--no-manual', '--ignore-vignettes') build_args <- '--no-build-vignettes' - - if (nzchar(Sys.which('minio'))) { - message('Running minio for S3 tests (if build supports them)') - minio_dir <- tempfile() - dir.create(minio_dir) - pid_minio <- sys::exec_background('minio', c('server', minio_dir)) - on.exit(tools::pskill(pid_minio), add = TRUE) - } } if (requireNamespace('reticulate', quietly = TRUE) && reticulate::py_module_available('pyarrow')) { diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 90e84d34bc2..cf83f563902 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -61,6 +61,7 @@ Suggests: rmarkdown, stringi, stringr, + sys, testthat (>= 3.1.0), tibble, tzdb, diff --git a/r/R/filesystem.R b/r/R/filesystem.R index 2f0b1cfd585..4ad6aa83e3d 100644 --- a/r/R/filesystem.R +++ b/r/R/filesystem.R @@ -497,7 +497,9 @@ gs_bucket <- function(bucket, ...) { GcsFileSystem <- R6Class("GcsFileSystem", inherit = FileSystem ) -GcsFileSystem$create <- function(anonymous = FALSE, ...) { +GcsFileSystem$create <- function(anonymous = FALSE, retry_limit_seconds = 15, ...) { + # The default retry limit in C++ is 15 minutes, but that is experienced as + # hanging in an interactive context, so default is set here to 15 seconds. options <- list(...) # Validate options @@ -525,8 +527,7 @@ GcsFileSystem$create <- function(anonymous = FALSE, ...) { valid_opts <- c( "access_token", "expiration", "json_credentials", "endpoint_override", - "scheme", "default_bucket_location", "retry_limit_seconds", - "default_metadata" + "scheme", "default_bucket_location", "default_metadata" ) invalid_opts <- setdiff(names(options), valid_opts) @@ -538,6 +539,8 @@ GcsFileSystem$create <- function(anonymous = FALSE, ...) { ) } + options$retry_limit_seconds <- retry_limit_seconds + fs___GcsFileSystem__Make(anonymous, options) } diff --git a/r/tests/testthat/helper-filesystems.R b/r/tests/testthat/helper-filesystems.R new file mode 100644 index 00000000000..2ad6d23b010 --- /dev/null +++ b/r/tests/testthat/helper-filesystems.R @@ -0,0 +1,190 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Run standard suite of integration tests for a filesystem +#' +#' @param name Name of filesystem to be printed in test name +#' @param fs A `FileSystem` instance to test with +#' @param path_formatter A function that takes a sequence of path segments and +#' returns a absolute path. +#' @param uri_formatter A function that takes a sequence of path segments and +#' returns a URI containing the filesystem scheme (e.g. 's3://', 'gs://'), the +#' absolute path, and any necessary connection options as URL query parameters. +test_filesystem <- function(name, fs, path_formatter, uri_formatter) { + # NOTE: it's important that we label these tests with name of filesystem so + # that we can differentiate the different calls to these test in the output. + test_that(sprintf("read/write Feather on %s using URIs", name), { + write_feather(example_data, uri_formatter("test.feather")) + expect_identical(read_feather(uri_formatter("test.feather")), example_data) + }) + + test_that(sprintf("read/write Feather on %s using Filesystem", name), { + write_feather(example_data, fs$path(path_formatter("test2.feather"))) + expect_identical( + read_feather(fs$path(path_formatter("test2.feather"))), + example_data + ) + }) + + if (!("package:dplyr" %in% search())) { + abort("library(dplyr) required for test_filesystem()") + } + + test_that(sprintf("read/write compressed csv on %s using FileSystem", name), { + skip_if_not_available("gzip") + dat <- tibble(x = seq(1, 10, by = 0.2)) + write_csv_arrow(dat, fs$path(path_formatter("test.csv.gz"))) + expect_identical( + read_csv_arrow(fs$path(path_formatter("test.csv.gz"))), + dat + ) + }) + + test_that(sprintf("read/write csv on %s using FileSystem", name), { + skip_if_not_available("gzip") + dat <- tibble(x = seq(1, 10, by = 0.2)) + write_csv_arrow(dat, fs$path(path_formatter("test.csv"))) + expect_identical( + read_csv_arrow(fs$path(path_formatter("test.csv"))), + dat + ) + }) + + test_that(sprintf("read/write IPC stream on %s", name), { + write_ipc_stream(example_data, fs$path(path_formatter("test3.ipc"))) + expect_identical( + read_ipc_stream(fs$path(path_formatter("test3.ipc"))), + example_data + ) + }) + + test_that(sprintf("read/write Parquet on %s", name), { + skip_if_not_available("parquet") + write_parquet(example_data, fs$path(path_formatter("test.parquet"))) + expect_identical(read_parquet(uri_formatter("test.parquet")), example_data) + }) + + if (arrow_with_dataset()) { + make_temp_dir <- function() { + path <- tempfile() + dir.create(path) + normalizePath(path, winslash = "/") + } + + test_that(sprintf("open_dataset with an %s file (not directory) URI", name), { + skip_if_not_available("parquet") + expect_identical( + open_dataset(uri_formatter("test.parquet")) %>% collect() %>% arrange(int), + example_data %>% arrange(int) + ) + }) + + test_that(sprintf("open_dataset with vector of %s file URIs", name), { + expect_identical( + open_dataset( + c(uri_formatter("test.feather"), uri_formatter("test2.feather")), + format = "feather" + ) %>% + arrange(int) %>% + collect(), + rbind(example_data, example_data) %>% arrange(int) + ) + }) + + test_that(sprintf("open_dataset errors if passed URIs mixing %s and local fs", name), { + td <- make_temp_dir() + expect_error( + open_dataset( + c( + uri_formatter("test.feather"), + paste0("file://", file.path(td, "fake.feather")) + ), + format = "feather" + ), + "Vectors of URIs for different file systems are not supported" + ) + }) + + # Dataset test setup, cf. test-dataset.R + first_date <- lubridate::ymd_hms("2015-04-29 03:12:39") + df1 <- tibble( + int = 1:10, + dbl = as.numeric(1:10), + lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2), + chr = letters[1:10], + fct = factor(LETTERS[1:10]), + ts = first_date + lubridate::days(1:10) + ) + + second_date <- lubridate::ymd_hms("2017-03-09 07:01:02") + df2 <- tibble( + int = 101:110, + dbl = as.numeric(51:60), + lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2), + chr = letters[10:1], + fct = factor(LETTERS[10:1]), + ts = second_date + lubridate::days(10:1) + ) + + # This is also to set up the dataset tests + test_that(sprintf("write_parquet with %s filesystem arg", name), { + skip_if_not_available("parquet") + fs$CreateDir(path_formatter("hive_dir", "group=1", "other=xxx")) + fs$CreateDir(path_formatter("hive_dir", "group=2", "other=yyy")) + expect_length(fs$ls(path_formatter("hive_dir")), 2) + write_parquet(df1, fs$path(path_formatter("hive_dir", "group=1", "other=xxx", "file1.parquet"))) + write_parquet(df2, fs$path(path_formatter("hive_dir", "group=2", "other=yyy", "file2.parquet"))) + expect_identical( + read_parquet(fs$path(path_formatter("hive_dir", "group=1", "other=xxx", "file1.parquet"))), + df1 + ) + }) + + test_that(sprintf("open_dataset with %s", name), { + ds <- open_dataset(fs$path(path_formatter("hive_dir"))) + expect_identical( + ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int), + rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int) + ) + }) + + test_that(sprintf("write_dataset with %s", name), { + ds <- open_dataset(fs$path(path_formatter("hive_dir"))) + write_dataset(ds, fs$path(path_formatter("new_dataset_dir"))) + expect_length(fs$ls(path_formatter("new_dataset_dir")), 1) + }) + + test_that(sprintf("copy files with %s", name), { + td <- make_temp_dir() + copy_files(uri_formatter("hive_dir"), td) + expect_length(dir(td), 2) + ds <- open_dataset(td) + expect_identical( + ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int), + rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int) + ) + + # Let's copy the other way and use a SubTreeFileSystem rather than URI + copy_files(td, fs$path(path_formatter("hive_dir2"))) + ds2 <- open_dataset(fs$path(path_formatter("hive_dir2"))) + expect_identical( + ds2 %>% select(int, dbl, lgl) %>% collect() %>% arrange(int), + rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int) + ) + }) + } # if(arrow_with_dataset()) +} diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R index 7a6c2687ed8..7279e245f23 100644 --- a/r/tests/testthat/helper-skip.R +++ b/r/tests/testthat/helper-skip.R @@ -109,6 +109,17 @@ process_is_running <- function(x) { return(TRUE) } - cmd <- sprintf("ps aux | grep '%s' | grep -v grep", x) - tryCatch(system(cmd, ignore.stdout = TRUE) == 0, error = function(e) FALSE) + if (tolower(Sys.info()[["sysname"]]) == "windows") { + # Batch scripts (CMD.exe) doesn't provide a command that shows the original + # call arguments, which we need for testbench since it's launched from Python. + inner_cmd <- paste("WMIC path win32_process get Commandline", + sprintf("| Select-String %s", x), + "| Select-String powershell.exe -NotMatch") + cmd <- sprintf("powershell -command \"%s\"", inner_cmd) + tryCatch(length(system(cmd, intern = TRUE, show.output.on.console = FALSE)) > 0, + error = function(e) FALSE) + } else { + cmd <- sprintf("ps aux | grep '%s' | grep -v grep", x) + tryCatch(system(cmd, ignore.stdout = TRUE) == 0, error = function(e) FALSE) + } } diff --git a/r/tests/testthat/test-gcs.R b/r/tests/testthat/test-gcs.R index a823442f30b..c0a02193c55 100644 --- a/r/tests/testthat/test-gcs.R +++ b/r/tests/testthat/test-gcs.R @@ -58,3 +58,51 @@ test_that("GcsFileSystem$create() input validation", { 'Invalid options for GcsFileSystem: "role_arn"' ) }) + +skip_on_cran() +skip_if_not(system('python -c "import testbench"') == 0, message = "googleapis-storage-testbench is not installed.") +library(dplyr) + +testbench_port <- Sys.getenv("TESTBENCH_PORT", "9001") + +pid_minio <- sys::exec_background("python", c("-m", "testbench", "--port", testbench_port), + std_out = FALSE, + std_err = FALSE # TODO: is there a good place to send output? +) +withr::defer(tools::pskill(pid_minio)) +Sys.sleep(1) # Wait for startup + +fs <- GcsFileSystem$create( + endpoint_override = sprintf("localhost:%s", testbench_port), + retry_limit_seconds = 1, + scheme = "http", + anonymous = TRUE # Will fail to resolve host name if anonymous isn't TRUE +) + +now <- as.character(as.numeric(Sys.time())) +tryCatch(fs$CreateDir(now), error = function(cond) { + if (grepl("Couldn't connect to server", cond, fixed = TRUE)) { + abort( + c(sprintf("Unable to connect to testbench on port %s.", testbench_port), + i = "You can set a custom port with TESTBENCH_PORT environment variable." + ), + parent = cond + ) + } else { + stop(cond) + } +}) +# Clean up when we're all done +withr::defer(fs$DeleteDir(now)) + +gcs_path <- function(...) { + paste(now, ..., sep = "/") +} +gcs_uri <- function(...) { + template <- "gs://anonymous@%s?scheme=http&endpoint_override=localhost%s%s&retry_limit_seconds=1" + sprintf(template, gcs_path(...), "%3A", testbench_port) +} + +test_filesystem("gcs", fs, gcs_path, gcs_uri) + +withr::deferred_run() diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R index ad11d04d5e9..4cd11333922 100644 --- a/r/tests/testthat/test-s3-minio.R +++ b/r/tests/testthat/test-s3-minio.R @@ -15,247 +15,96 @@ # specific language governing permissions and limitations # under the License. - -if (arrow_with_s3() && process_is_running("minio server")) { - # Get minio config, with expected defaults - minio_key <- Sys.getenv("MINIO_ACCESS_KEY", "minioadmin") - minio_secret <- Sys.getenv("MINIO_SECRET_KEY", "minioadmin") - minio_port <- Sys.getenv("MINIO_PORT", "9000") - - # Helper function for minio URIs - minio_uri <- function(...) { - template <- "s3://%s:%s@%s?scheme=http&endpoint_override=localhost%s%s" - sprintf(template, minio_key, minio_secret, minio_path(...), "%3A", minio_port) - } - minio_path <- function(...) paste(now, ..., sep = "/") - - # Create a "bucket" on minio for this test run, which we'll delete when done. - fs <- S3FileSystem$create( - access_key = minio_key, - secret_key = minio_secret, - scheme = "http", - endpoint_override = paste0("localhost:", minio_port), - allow_bucket_creation = TRUE, - allow_bucket_deletion = TRUE +skip_if_not(arrow_with_s3(), message = "arrow not build with S3 support.") +skip_if_not(nzchar(Sys.which("minio")), message = "minio is not installed.") + +library(dplyr) + +minio_dir <- Sys.getenv("MINIO_DATA_DIR", tempfile()) +minio_key <- "minioadmin" +minio_secret <- "minioadmin" +minio_port <- Sys.getenv("MINIO_PORT", "9000") + +# Start minio server +dir.create(minio_dir, showWarnings = FALSE) +pid_minio <- sys::exec_background("minio", c("server", minio_dir, "--address", sprintf(":%s", minio_port)), + std_out = FALSE +) +withr::defer(tools::pskill(pid_minio)) + +# Helper function for minio URIs +minio_uri <- function(...) { + template <- "s3://%s:%s@%s?scheme=http&endpoint_override=localhost%s%s" + sprintf(template, minio_key, minio_secret, minio_path(...), "%3A", minio_port) +} +minio_path <- function(...) paste(now, ..., sep = "/") + +# Create a "bucket" on minio for this test run, which we'll delete when done. +fs <- S3FileSystem$create( + access_key = minio_key, + secret_key = minio_secret, + scheme = "http", + endpoint_override = paste0("localhost:", minio_port), + allow_bucket_creation = TRUE, + allow_bucket_deletion = TRUE +) +limited_fs <- S3FileSystem$create( + access_key = minio_key, + secret_key = minio_secret, + scheme = "http", + endpoint_override = paste0("localhost:", minio_port), + allow_bucket_creation = FALSE, + allow_bucket_deletion = FALSE +) +now <- as.character(as.numeric(Sys.time())) +fs$CreateDir(now) +# Clean up when we're all done +withr::defer(fs$DeleteDir(now)) + +test_filesystem("s3", fs, minio_path, minio_uri) + +test_that("CreateDir fails on bucket if allow_bucket_creation=False", { + now_tmp <- paste0(now, "-test-fail-delete") + fs$CreateDir(now_tmp) + + expect_error(limited_fs$CreateDir("should-fail")) + expect_error(limited_fs$DeleteDir(now_tmp)) +}) + +test_that("S3FileSystem input validation", { + expect_error( + S3FileSystem$create(access_key = "foo"), + "Key authentication requires both access_key and secret_key" ) - limited_fs <- S3FileSystem$create( - access_key = minio_key, - secret_key = minio_secret, - scheme = "http", - endpoint_override = paste0("localhost:", minio_port), - allow_bucket_creation = FALSE, - allow_bucket_deletion = FALSE + expect_error( + S3FileSystem$create(secret_key = "foo"), + "Key authentication requires both access_key and secret_key" ) - now <- as.character(as.numeric(Sys.time())) - fs$CreateDir(now) - # Clean up when we're all done - on.exit(fs$DeleteDir(now)) - - test_that("read/write Feather on minio", { - write_feather(example_data, minio_uri("test.feather")) - expect_identical(read_feather(minio_uri("test.feather")), example_data) - }) - - test_that("read/write Feather by filesystem, not URI", { - write_feather(example_data, fs$path(minio_path("test2.feather"))) - expect_identical( - read_feather(fs$path(minio_path("test2.feather"))), - example_data - ) - }) - - test_that("read/write compressed csv by filesystem", { - skip_if_not_available("gzip") - dat <- tibble(x = seq(1, 10, by = 0.2)) - write_csv_arrow(dat, fs$path(minio_path("test.csv.gz"))) - expect_identical( - read_csv_arrow(fs$path(minio_path("test.csv.gz"))), - dat - ) - }) - - test_that("read/write csv by filesystem", { - skip_if_not_available("gzip") - dat <- tibble(x = seq(1, 10, by = 0.2)) - write_csv_arrow(dat, fs$path(minio_path("test.csv"))) - expect_identical( - read_csv_arrow(fs$path(minio_path("test.csv"))), - dat - ) - }) - - test_that("read/write stream", { - write_ipc_stream(example_data, fs$path(minio_path("test3.ipc"))) - expect_identical( - read_ipc_stream(fs$path(minio_path("test3.ipc"))), - example_data - ) - }) - - test_that("read/write Parquet on minio", { - skip_if_not_available("parquet") - write_parquet(example_data, fs$path(minio_uri("test.parquet"))) - expect_identical(read_parquet(minio_uri("test.parquet")), example_data) - }) - - if (arrow_with_dataset()) { - library(dplyr) - - make_temp_dir <- function() { - path <- tempfile() - dir.create(path) - normalizePath(path, winslash = "/") - } - - test_that("open_dataset with an S3 file (not directory) URI", { - skip_if_not_available("parquet") - expect_identical( - open_dataset(minio_uri("test.parquet")) %>% collect() %>% arrange(int), - example_data %>% arrange(int) - ) - }) - - test_that("open_dataset with vector of S3 file URIs", { - expect_identical( - open_dataset( - c(minio_uri("test.feather"), minio_uri("test2.feather")), - format = "feather" - ) %>% - arrange(int) %>% - collect(), - rbind(example_data, example_data) %>% arrange(int) - ) - }) - - test_that("open_dataset errors on URIs for different file systems", { - td <- make_temp_dir() - expect_error( - open_dataset( - c( - minio_uri("test.feather"), - paste0("file://", file.path(td, "fake.feather")) - ), - format = "feather" - ), - "Vectors of URIs for different file systems are not supported" - ) - }) - - # Dataset test setup, cf. test-dataset.R - first_date <- lubridate::ymd_hms("2015-04-29 03:12:39") - df1 <- tibble( - int = 1:10, - dbl = as.numeric(1:10), - lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2), - chr = letters[1:10], - fct = factor(LETTERS[1:10]), - ts = first_date + lubridate::days(1:10) - ) - - second_date <- lubridate::ymd_hms("2017-03-09 07:01:02") - df2 <- tibble( - int = 101:110, - dbl = as.numeric(51:60), - lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2), - chr = letters[10:1], - fct = factor(LETTERS[10:1]), - ts = second_date + lubridate::days(10:1) + expect_error( + S3FileSystem$create(session_token = "foo"), + paste0( + "In order to initialize a session with temporary credentials, ", + "both secret_key and access_key must be provided ", + "in addition to session_token." ) + ) + expect_error( + S3FileSystem$create(access_key = "foo", secret_key = "asdf", anonymous = TRUE), + 'Cannot specify "access_key" and "secret_key" when anonymous = TRUE' + ) + expect_error( + S3FileSystem$create(access_key = "foo", secret_key = "asdf", role_arn = "qwer"), + "Cannot provide both key authentication and role_arn" + ) + expect_error( + S3FileSystem$create(access_key = "foo", secret_key = "asdf", external_id = "qwer"), + 'Cannot specify "external_id" without providing a role_arn string' + ) + expect_error( + S3FileSystem$create(external_id = "foo"), + 'Cannot specify "external_id" without providing a role_arn string' + ) +}) - # This is also to set up the dataset tests - test_that("write_parquet with filesystem arg", { - skip_if_not_available("parquet") - fs$CreateDir(minio_path("hive_dir", "group=1", "other=xxx")) - fs$CreateDir(minio_path("hive_dir", "group=2", "other=yyy")) - expect_length(fs$ls(minio_path("hive_dir")), 2) - write_parquet(df1, fs$path(minio_path("hive_dir", "group=1", "other=xxx", "file1.parquet"))) - write_parquet(df2, fs$path(minio_path("hive_dir", "group=2", "other=yyy", "file2.parquet"))) - expect_identical( - read_parquet(fs$path(minio_path("hive_dir", "group=1", "other=xxx", "file1.parquet"))), - df1 - ) - }) - - test_that("open_dataset with fs", { - ds <- open_dataset(fs$path(minio_path("hive_dir"))) - expect_identical( - ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int), - rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int) - ) - }) - - test_that("write_dataset with fs", { - ds <- open_dataset(fs$path(minio_path("hive_dir"))) - write_dataset(ds, fs$path(minio_path("new_dataset_dir"))) - expect_length(fs$ls(minio_path("new_dataset_dir")), 1) - }) - - test_that("CreateDir fails on bucket if allow_bucket_creation=False", { - now_tmp <- paste0(now, "-test-fail-delete") - fs$CreateDir(now_tmp) - - expect_error(limited_fs$CreateDir("should-fail")) - expect_error(limited_fs$DeleteDir(now_tmp)) - }) - - test_that("Let's test copy_files too", { - td <- make_temp_dir() - copy_files(minio_uri("hive_dir"), td) - expect_length(dir(td), 2) - ds <- open_dataset(td) - expect_identical( - ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int), - rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int) - ) - - # Let's copy the other way and use a SubTreeFileSystem rather than URI - copy_files(td, fs$path(minio_path("hive_dir2"))) - ds2 <- open_dataset(fs$path(minio_path("hive_dir2"))) - expect_identical( - ds2 %>% select(int, dbl, lgl) %>% collect() %>% arrange(int), - rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int) - ) - }) - } - - test_that("S3FileSystem input validation", { - expect_error( - S3FileSystem$create(access_key = "foo"), - "Key authentication requires both access_key and secret_key" - ) - expect_error( - S3FileSystem$create(secret_key = "foo"), - "Key authentication requires both access_key and secret_key" - ) - expect_error( - S3FileSystem$create(session_token = "foo"), - paste0( - "In order to initialize a session with temporary credentials, ", - "both secret_key and access_key must be provided ", - "in addition to session_token." - ) - ) - expect_error( - S3FileSystem$create(access_key = "foo", secret_key = "asdf", anonymous = TRUE), - 'Cannot specify "access_key" and "secret_key" when anonymous = TRUE' - ) - expect_error( - S3FileSystem$create(access_key = "foo", secret_key = "asdf", role_arn = "qwer"), - "Cannot provide both key authentication and role_arn" - ) - expect_error( - S3FileSystem$create(access_key = "foo", secret_key = "asdf", external_id = "qwer"), - 'Cannot specify "external_id" without providing a role_arn string' - ) - expect_error( - S3FileSystem$create(external_id = "foo"), - 'Cannot specify "external_id" without providing a role_arn string' - ) - }) -} else { - # Kinda hacky, let's put a skipped test here, just so we note that the tests - # didn't run - test_that("S3FileSystem tests with Minio", { - skip("Minio is not running") - }) -} +# Cleanup +withr::deferred_run()