diff --git a/r/R/feather.R b/r/R/feather.R index 637ce23234a..7c545eeda56 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -125,7 +125,7 @@ write_feather <- function(x, #' #' @inheritParams read_ipc_stream #' @inheritParams read_delim_arrow -#' @param ... additional parameters, passed to [FeatherReader$create()][FeatherReader] +#' @param ... additional parameters, passed to [make_readable_file()]. #' #' @return A `data.frame` if `as_data_frame` is `TRUE` (the default), or an #' Arrow [Table] otherwise @@ -144,17 +144,20 @@ write_feather <- function(x, #' } read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) { if (!inherits(file, "RandomAccessFile")) { - file <- make_readable_file(file) + file <- make_readable_file(file, ...) on.exit(file$close()) } - reader <- FeatherReader$create(file, ...) + reader <- FeatherReader$create(file) col_select <- enquo(col_select) columns <- if (!quo_is_null(col_select)) { vars_select(names(reader), !!col_select) } - out <- reader$Read(columns) + out <- tryCatch( + reader$Read(columns), + error = read_compressed_error + ) if (isTRUE(as_data_frame)) { out <- as.data.frame(out) diff --git a/r/R/parquet.R b/r/R/parquet.R index 45751b16170..169d9f57f52 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -52,10 +52,16 @@ read_parquet <- function(file, schema <- reader$GetSchema() names <- names(schema) indices <- match(vars_select(names, !!col_select), names) - 1L - tab <- reader$ReadTable(indices) + tab <- tryCatch( + reader$ReadTable(indices), + error = read_compressed_error + ) } else { # read all columns - tab <- reader$ReadTable() + tab <- tryCatch( + reader$ReadTable(), + error = read_compressed_error + ) } if (as_data_frame) { diff --git a/r/R/util.R b/r/R/util.R index 6d9c91b74aa..f5b505f352a 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -87,10 +87,26 @@ is_constant <- function(expr) { length(all_vars(expr)) == 0 } +read_compressed_error <- function(e) { + msg <- conditionMessage(e) + if (grepl(" codec ", msg)) { + compression <- sub(".*Support for codec '(.*)'.*", "\\1", msg) + e$message <- paste0( + msg, + "\nIn order to read this file, you will need to reinstall arrow with additional features enabled.", + "\nSet one of these environment variables before installing:", + sprintf("\n\n * LIBARROW_MINIMAL=false (for all optional features, including '%s')", compression), + sprintf("\n * ARROW_WITH_%s=ON (for just '%s')", toupper(compression), compression), + "\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details" + ) + } + stop(e) +} + handle_embedded_nul_error <- function(e) { msg <- conditionMessage(e) if (grepl(" nul ", msg)) { e$message <- paste0(msg, "; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)") } stop(e) -} \ No newline at end of file +} diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index c5467c3a22f..fe3a7f1e23d 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -21,7 +21,7 @@ of columns, as used in \code{dplyr::select()}.} \item{as_data_frame}{Should the function return a \code{data.frame} (default) or an Arrow \link{Table}?} -\item{...}{additional parameters, passed to \link[=FeatherReader]{FeatherReader$create()}} +\item{...}{additional parameters, passed to \code{\link[=make_readable_file]{make_readable_file()}}.} } \value{ A \code{data.frame} if \code{as_data_frame} is \code{TRUE} (the default), or an diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index 52325c7f410..e33fda64641 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -196,3 +196,23 @@ test_that("Character vectors > 2GB can write to feather", { }) unlink(feather_file) + +ft_file <- test_path("golden-files/data-arrow_2.0.0_lz4.feather") + +test_that("Error messages are shown when the compression algorithm lz4 is not found", { + msg <- "NotImplemented: Support for codec 'lz4' not built\nIn order to read this file, you will need to reinstall arrow with additional features enabled.\nSet one of these environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional features, including 'lz4')\n * ARROW_WITH_LZ4=ON (for just 'lz4')\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details" + + if (codec_is_available("lz4")) { + d <- read_feather(ft_file) + expect_is(d, "data.frame") + } else { + expect_error(read_feather(ft_file), msg, fixed = TRUE) + } +}) + +test_that("Error is created when feather reads a parquet file", { + expect_error( + read_feather(system.file("v0.7.1.parquet", package = "arrow")), + "Not a Feather V1 or Arrow IPC file" + ) +}) diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index 4ac356f004d..14e7aa78e05 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -234,3 +234,21 @@ test_that("ParquetFileReader $ReadRowGroup(s) methods", { expect_true(reader$ReadRowGroups(c(0, 1), 0) == Table$create(x = 1:20)) expect_error(reader$ReadRowGroups(c(0, 1), 1)) }) + +test_that("Error messages are shown when the compression algorithm snappy is not found", { + msg <- "NotImplemented: Support for codec 'snappy' not built\nIn order to read this file, you will need to reinstall arrow with additional features enabled.\nSet one of these environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional features, including 'snappy')\n * ARROW_WITH_SNAPPY=ON (for just 'snappy')\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details" + + if (codec_is_available("snappy")) { + d <- read_parquet(pq_file) + expect_is(d, "data.frame") + } else { + expect_error(read_parquet(pq_file), msg, fixed = TRUE) + } +}) + +test_that("Error is created when parquet reads a feather file", { + expect_error( + read_parquet(test_path("golden-files/data-arrow_2.0.0_lz4.feather")), + "Parquet magic bytes not found in footer" + ) +})