From 942858f8dadc8f44cb1b1fa9e943f8cd0c14901d Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Wed, 17 Mar 2021 18:24:36 -0300 Subject: [PATCH 01/22] first informative error msg for lz4 error --- r/R/feather.R | 32 +++--- r/R/parquet.R | 274 ++++++++++++++++++++++++++------------------------ 2 files changed, 165 insertions(+), 141 deletions(-) diff --git a/r/R/feather.R b/r/R/feather.R index 5aaf340c6db..a3e962941f9 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -96,7 +96,7 @@ write_feather <- function(x, # "lz4" is the convenience if (compression == "lz4") { - compression <- "lz4_frame" + compression <- "lz4_frame" } compression <- compression_from_name(compression) @@ -154,7 +154,15 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) { vars_select(names(reader), !!col_select) } - out <- reader$Read(columns) + out <- tryCatch( + reader$Read(columns), + error = function (e) { + if (grepl("Support for codec", conditionMessage(e))) { + msg <- "Unsupported compressed format: We suggest either setting the right environment variable to install binaries or setting LIBARROW_MINIMAL=false and then reinstall the package." + stop(msg, call. = FALSE) + } + } + ) if (isTRUE(as_data_frame)) { out <- as.data.frame(out) @@ -191,16 +199,16 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) { #' @export #' @include arrow-package.R FeatherReader <- R6Class("FeatherReader", inherit = ArrowObject, - public = list( - Read = function(columns) { - ipc___feather___Reader__Read(self, columns) - } - ), - active = list( - # versions are officially 2 for V1 and 3 for V2 :shrug: - version = function() ipc___feather___Reader__version(self) - 1L, - column_names = function() ipc___feather___Reader__column_names(self) - ) + public = list( + Read = function(columns) { + ipc___feather___Reader__Read(self, columns) + } + ), + active = list( + # versions are officially 2 for V1 and 3 for V2 :shrug: + version = function() ipc___feather___Reader__version(self) - 1L, + column_names = function() ipc___feather___Reader__column_names(self) + ) ) #' @export diff --git a/r/R/parquet.R b/r/R/parquet.R index 45751b16170..093704495d6 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -52,10 +52,26 @@ read_parquet <- function(file, schema <- reader$GetSchema() names <- names(schema) indices <- match(vars_select(names, !!col_select), names) - 1L - tab <- reader$ReadTable(indices) + tab <- tryCatch( + reader$ReadTable(indices), + error = function (e) { + if (grepl("Support for codec", conditionMessage(e))) { + msg <- "Unsupported compressed format: We suggest either setting the right environment variable to install binaries or setting LIBARROW_MINIMAL=false and then reinstall the package." + stop(msg, call. = FALSE) + } + } + ) } else { # read all columns - tab <- reader$ReadTable() + tab <- tryCatch( + reader$ReadTable(), + error = function (e) { + if (grepl("Support for codec", conditionMessage(e))) { + msg <- "Unsupported compressed format: We suggest either setting the right environment variable to install binaries or setting LIBARROW_MINIMAL=false and then reinstall the package." + stop(msg, call. = FALSE) + } + } + ) } if (as_data_frame) { @@ -209,7 +225,7 @@ ParquetArrowWriterProperties$create <- function(use_deprecated_int96_timestamps timestamp_unit <- -1L # null sentinel value } else { timestamp_unit <- make_valid_time_unit(coerce_timestamps, - c("ms" = TimeUnit$MILLI, "us" = TimeUnit$MICRO) + c("ms" = TimeUnit$MILLI, "us" = TimeUnit$MICRO) ) } parquet___ArrowWriterProperties___create( @@ -280,62 +296,62 @@ make_valid_version <- function(version, valid_versions = valid_parquet_version) #' @export ParquetWriterProperties <- R6Class("ParquetWriterProperties", inherit = ArrowObject) ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inherit = ArrowObject, - public = list( - set_version = function(version) { - parquet___WriterProperties___Builder__version(self, make_valid_version(version)) - }, - set_compression = function(table, compression) { - compression <- compression_from_name(compression) - assert_that(is.integer(compression)) - private$.set(table, compression, - parquet___ArrowWriterProperties___Builder__set_compressions - ) - }, - set_compression_level = function(table, compression_level){ - # cast to integer but keep names - compression_level <- set_names(as.integer(compression_level), names(compression_level)) - private$.set(table, compression_level, - parquet___ArrowWriterProperties___Builder__set_compression_levels - ) - }, - set_dictionary = function(table, use_dictionary) { - assert_that(is.logical(use_dictionary)) - private$.set(table, use_dictionary, - parquet___ArrowWriterProperties___Builder__set_use_dictionary - ) - }, - set_write_statistics = function(table, write_statistics) { - assert_that(is.logical(write_statistics)) - private$.set(table, write_statistics, - parquet___ArrowWriterProperties___Builder__set_write_statistics - ) - }, - set_data_page_size = function(data_page_size) { - parquet___ArrowWriterProperties___Builder__data_page_size(self, data_page_size) - } - ), + public = list( + set_version = function(version) { + parquet___WriterProperties___Builder__version(self, make_valid_version(version)) + }, + set_compression = function(table, compression) { + compression <- compression_from_name(compression) + assert_that(is.integer(compression)) + private$.set(table, compression, + parquet___ArrowWriterProperties___Builder__set_compressions + ) + }, + set_compression_level = function(table, compression_level){ + # cast to integer but keep names + compression_level <- set_names(as.integer(compression_level), names(compression_level)) + private$.set(table, compression_level, + parquet___ArrowWriterProperties___Builder__set_compression_levels + ) + }, + set_dictionary = function(table, use_dictionary) { + assert_that(is.logical(use_dictionary)) + private$.set(table, use_dictionary, + parquet___ArrowWriterProperties___Builder__set_use_dictionary + ) + }, + set_write_statistics = function(table, write_statistics) { + assert_that(is.logical(write_statistics)) + private$.set(table, write_statistics, + parquet___ArrowWriterProperties___Builder__set_write_statistics + ) + }, + set_data_page_size = function(data_page_size) { + parquet___ArrowWriterProperties___Builder__data_page_size(self, data_page_size) + } + ), - private = list( - .set = function(table, value, FUN) { - msg <- paste0("unsupported ", substitute(value), "= specification") - column_names <- names(table) - given_names <- names(value) - if (is.null(given_names)) { - if (length(value) %in% c(1L, length(column_names))) { - # If there's a single, unnamed value, FUN will set it globally - # If there are values for all columns, send them along with the names - FUN(self, column_names, value) - } else { - abort(msg) - } - } else if (all(given_names %in% column_names)) { - # Use the given names - FUN(self, given_names, value) - } else { - abort(msg) - } - } - ) + private = list( + .set = function(table, value, FUN) { + msg <- paste0("unsupported ", substitute(value), "= specification") + column_names <- names(table) + given_names <- names(value) + if (is.null(given_names)) { + if (length(value) %in% c(1L, length(column_names))) { + # If there's a single, unnamed value, FUN will set it globally + # If there are values for all columns, send them along with the names + FUN(self, column_names, value) + } else { + abort(msg) + } + } else if (all(given_names %in% column_names)) { + # Use the given names + FUN(self, given_names, value) + } else { + abort(msg) + } + } + ) ) ParquetWriterProperties$create <- function(table, @@ -395,12 +411,12 @@ ParquetWriterProperties$create <- function(table, #' @export #' @include arrow-package.R ParquetFileWriter <- R6Class("ParquetFileWriter", inherit = ArrowObject, - public = list( - WriteTable = function(table, chunk_size) { - parquet___arrow___FileWriter__WriteTable(self, table, chunk_size) - }, - Close = function() parquet___arrow___FileWriter__Close(self) - ) + public = list( + WriteTable = function(table, chunk_size) { + parquet___arrow___FileWriter__WriteTable(self, table, chunk_size) + }, + Close = function() parquet___arrow___FileWriter__Close(self) + ) ) ParquetFileWriter$create <- function(schema, sink, @@ -461,53 +477,53 @@ ParquetFileWriter$create <- function(schema, #' } #' @include arrow-package.R ParquetFileReader <- R6Class("ParquetFileReader", - inherit = ArrowObject, - active = list( - num_rows = function() { - as.integer(parquet___arrow___FileReader__num_rows(self)) - }, - num_columns = function() { - parquet___arrow___FileReader__num_columns(self) - }, - num_row_groups = function() { - parquet___arrow___FileReader__num_row_groups(self) - } - ), - public = list( - ReadTable = function(column_indices = NULL) { - if (is.null(column_indices)) { - parquet___arrow___FileReader__ReadTable1(self) - } else { - column_indices <- vec_cast(column_indices, integer()) - parquet___arrow___FileReader__ReadTable2(self, column_indices) - } - }, - ReadRowGroup = function(i, column_indices = NULL) { - i <- vec_cast(i, integer()) - if (is.null(column_indices)) { - parquet___arrow___FileReader__ReadRowGroup1(self, i) - } else { - column_indices <- vec_cast(column_indices, integer()) - parquet___arrow___FileReader__ReadRowGroup2(self, i, column_indices) - } - }, - ReadRowGroups = function(row_groups, column_indices = NULL) { - row_groups <- vec_cast(row_groups, integer()) - if (is.null(column_indices)) { - parquet___arrow___FileReader__ReadRowGroups1(self, row_groups) - } else { - column_indices <- vec_cast(column_indices, integer()) - parquet___arrow___FileReader__ReadRowGroups2(self, row_groups, column_indices) - } - }, - ReadColumn = function(i) { - i <- vec_cast(i, integer()) - parquet___arrow___FileReader__ReadColumn(self, i) - }, - GetSchema = function() { - parquet___arrow___FileReader__GetSchema(self) - } - ) + inherit = ArrowObject, + active = list( + num_rows = function() { + as.integer(parquet___arrow___FileReader__num_rows(self)) + }, + num_columns = function() { + parquet___arrow___FileReader__num_columns(self) + }, + num_row_groups = function() { + parquet___arrow___FileReader__num_row_groups(self) + } + ), + public = list( + ReadTable = function(column_indices = NULL) { + if (is.null(column_indices)) { + parquet___arrow___FileReader__ReadTable1(self) + } else { + column_indices <- vec_cast(column_indices, integer()) + parquet___arrow___FileReader__ReadTable2(self, column_indices) + } + }, + ReadRowGroup = function(i, column_indices = NULL) { + i <- vec_cast(i, integer()) + if (is.null(column_indices)) { + parquet___arrow___FileReader__ReadRowGroup1(self, i) + } else { + column_indices <- vec_cast(column_indices, integer()) + parquet___arrow___FileReader__ReadRowGroup2(self, i, column_indices) + } + }, + ReadRowGroups = function(row_groups, column_indices = NULL) { + row_groups <- vec_cast(row_groups, integer()) + if (is.null(column_indices)) { + parquet___arrow___FileReader__ReadRowGroups1(self, row_groups) + } else { + column_indices <- vec_cast(column_indices, integer()) + parquet___arrow___FileReader__ReadRowGroups2(self, row_groups, column_indices) + } + }, + ReadColumn = function(i) { + i <- vec_cast(i, integer()) + parquet___arrow___FileReader__ReadColumn(self, i) + }, + GetSchema = function() { + parquet___arrow___FileReader__GetSchema(self) + } + ) ) ParquetFileReader$create <- function(file, @@ -544,24 +560,24 @@ ParquetFileReader$create <- function(file, #' #' @export ParquetArrowReaderProperties <- R6Class("ParquetArrowReaderProperties", - inherit = ArrowObject, - public = list( - read_dictionary = function(column_index) { - parquet___arrow___ArrowReaderProperties__get_read_dictionary(self, column_index) - }, - set_read_dictionary = function(column_index, read_dict) { - parquet___arrow___ArrowReaderProperties__set_read_dictionary(self, column_index, read_dict) - } - ), - active = list( - use_threads = function(use_threads) { - if(missing(use_threads)) { - parquet___arrow___ArrowReaderProperties__get_use_threads(self) - } else { - parquet___arrow___ArrowReaderProperties__set_use_threads(self, use_threads) - } - } - ) + inherit = ArrowObject, + public = list( + read_dictionary = function(column_index) { + parquet___arrow___ArrowReaderProperties__get_read_dictionary(self, column_index) + }, + set_read_dictionary = function(column_index, read_dict) { + parquet___arrow___ArrowReaderProperties__set_read_dictionary(self, column_index, read_dict) + } + ), + active = list( + use_threads = function(use_threads) { + if(missing(use_threads)) { + parquet___arrow___ArrowReaderProperties__get_use_threads(self) + } else { + parquet___arrow___ArrowReaderProperties__set_use_threads(self, use_threads) + } + } + ) ) ParquetArrowReaderProperties$create <- function(use_threads = option_use_threads()) { From 8286451b243cc28516fe511f60f0585afd6e3bab Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 18 Mar 2021 11:50:50 -0300 Subject: [PATCH 02/22] compression <- lz4_frame test2 --- r/R/feather.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/feather.R b/r/R/feather.R index a3e962941f9..04ab129a312 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -96,7 +96,7 @@ write_feather <- function(x, # "lz4" is the convenience if (compression == "lz4") { - compression <- "lz4_frame" + compression <- "lz4_frame" } compression <- compression_from_name(compression) From beb462067995f8257f62ded951ac5a3bb994524c Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 18 Mar 2021 11:52:03 -0300 Subject: [PATCH 03/22] compression <- lz4_frame test3 --- r/R/feather.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/feather.R b/r/R/feather.R index 04ab129a312..a3e962941f9 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -96,7 +96,7 @@ write_feather <- function(x, # "lz4" is the convenience if (compression == "lz4") { - compression <- "lz4_frame" + compression <- "lz4_frame" } compression <- compression_from_name(compression) From 97f3402aa1cb836ec7c034e037429973eb04303f Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 18 Mar 2021 12:25:59 -0300 Subject: [PATCH 04/22] compression <- lz4_frame test4 --- r/R/feather.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/feather.R b/r/R/feather.R index a3e962941f9..3fb07f68962 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -96,7 +96,7 @@ write_feather <- function(x, # "lz4" is the convenience if (compression == "lz4") { - compression <- "lz4_frame" + compression <- "lz4_frame" } compression <- compression_from_name(compression) From 654510b888e51fe0b534cb34e1cf93a51bc6ddde Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 18 Mar 2021 12:27:12 -0300 Subject: [PATCH 05/22] compression <- lz4_frame test5 --- r/R/feather.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/r/R/feather.R b/r/R/feather.R index 3fb07f68962..48d87247bfd 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -199,16 +199,16 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) { #' @export #' @include arrow-package.R FeatherReader <- R6Class("FeatherReader", inherit = ArrowObject, - public = list( - Read = function(columns) { - ipc___feather___Reader__Read(self, columns) - } - ), - active = list( - # versions are officially 2 for V1 and 3 for V2 :shrug: - version = function() ipc___feather___Reader__version(self) - 1L, - column_names = function() ipc___feather___Reader__column_names(self) - ) + public = list( + Read = function(columns) { + ipc___feather___Reader__Read(self, columns) + } + ), + active = list( + # versions are officially 2 for V1 and 3 for V2 :shrug: + version = function() ipc___feather___Reader__version(self) - 1L, + column_names = function() ipc___feather___Reader__column_names(self) + ) ) #' @export From 4a8c9455bb543fde11396f80ee2ffad76086cf53 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 18 Mar 2021 12:37:45 -0300 Subject: [PATCH 06/22] compression <- lz4_frame test9 --- r/R/parquet.R | 112 +++++++++++++++++++++++++------------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/r/R/parquet.R b/r/R/parquet.R index 093704495d6..b84425fecc4 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -225,7 +225,7 @@ ParquetArrowWriterProperties$create <- function(use_deprecated_int96_timestamps timestamp_unit <- -1L # null sentinel value } else { timestamp_unit <- make_valid_time_unit(coerce_timestamps, - c("ms" = TimeUnit$MILLI, "us" = TimeUnit$MICRO) + c("ms" = TimeUnit$MILLI, "us" = TimeUnit$MICRO) ) } parquet___ArrowWriterProperties___create( @@ -296,62 +296,62 @@ make_valid_version <- function(version, valid_versions = valid_parquet_version) #' @export ParquetWriterProperties <- R6Class("ParquetWriterProperties", inherit = ArrowObject) ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inherit = ArrowObject, - public = list( - set_version = function(version) { - parquet___WriterProperties___Builder__version(self, make_valid_version(version)) - }, - set_compression = function(table, compression) { - compression <- compression_from_name(compression) - assert_that(is.integer(compression)) - private$.set(table, compression, - parquet___ArrowWriterProperties___Builder__set_compressions - ) - }, - set_compression_level = function(table, compression_level){ - # cast to integer but keep names - compression_level <- set_names(as.integer(compression_level), names(compression_level)) - private$.set(table, compression_level, - parquet___ArrowWriterProperties___Builder__set_compression_levels - ) - }, - set_dictionary = function(table, use_dictionary) { - assert_that(is.logical(use_dictionary)) - private$.set(table, use_dictionary, - parquet___ArrowWriterProperties___Builder__set_use_dictionary - ) - }, - set_write_statistics = function(table, write_statistics) { - assert_that(is.logical(write_statistics)) - private$.set(table, write_statistics, - parquet___ArrowWriterProperties___Builder__set_write_statistics - ) - }, - set_data_page_size = function(data_page_size) { - parquet___ArrowWriterProperties___Builder__data_page_size(self, data_page_size) - } - ), + public = list( + set_version = function(version) { + parquet___WriterProperties___Builder__version(self, make_valid_version(version)) + }, + set_compression = function(table, compression) { + compression <- compression_from_name(compression) + assert_that(is.integer(compression)) + private$.set(table, compression, + parquet___ArrowWriterProperties___Builder__set_compressions + ) + }, + set_compression_level = function(table, compression_level){ + # cast to integer but keep names + compression_level <- set_names(as.integer(compression_level), names(compression_level)) + private$.set(table, compression_level, + parquet___ArrowWriterProperties___Builder__set_compression_levels + ) + }, + set_dictionary = function(table, use_dictionary) { + assert_that(is.logical(use_dictionary)) + private$.set(table, use_dictionary, + parquet___ArrowWriterProperties___Builder__set_use_dictionary + ) + }, + set_write_statistics = function(table, write_statistics) { + assert_that(is.logical(write_statistics)) + private$.set(table, write_statistics, + parquet___ArrowWriterProperties___Builder__set_write_statistics + ) + }, + set_data_page_size = function(data_page_size) { + parquet___ArrowWriterProperties___Builder__data_page_size(self, data_page_size) + } + ), - private = list( - .set = function(table, value, FUN) { - msg <- paste0("unsupported ", substitute(value), "= specification") - column_names <- names(table) - given_names <- names(value) - if (is.null(given_names)) { - if (length(value) %in% c(1L, length(column_names))) { - # If there's a single, unnamed value, FUN will set it globally - # If there are values for all columns, send them along with the names - FUN(self, column_names, value) - } else { - abort(msg) - } - } else if (all(given_names %in% column_names)) { - # Use the given names - FUN(self, given_names, value) - } else { - abort(msg) - } - } - ) + private = list( + .set = function(table, value, FUN) { + msg <- paste0("unsupported ", substitute(value), "= specification") + column_names <- names(table) + given_names <- names(value) + if (is.null(given_names)) { + if (length(value) %in% c(1L, length(column_names))) { + # If there's a single, unnamed value, FUN will set it globally + # If there are values for all columns, send them along with the names + FUN(self, column_names, value) + } else { + abort(msg) + } + } else if (all(given_names %in% column_names)) { + # Use the given names + FUN(self, given_names, value) + } else { + abort(msg) + } + } + ) ) ParquetWriterProperties$create <- function(table, From c7a6f1a46fef2ca7c28c52d32bfdf51e1c79a227 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 18 Mar 2021 12:45:53 -0300 Subject: [PATCH 07/22] compression <- lz4_frame test109 --- r/R/parquet.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/R/parquet.R b/r/R/parquet.R index b84425fecc4..e493badacb6 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -311,19 +311,19 @@ ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inhe # cast to integer but keep names compression_level <- set_names(as.integer(compression_level), names(compression_level)) private$.set(table, compression_level, - parquet___ArrowWriterProperties___Builder__set_compression_levels + parquet___ArrowWriterProperties___Builder__set_compression_levels ) }, set_dictionary = function(table, use_dictionary) { assert_that(is.logical(use_dictionary)) private$.set(table, use_dictionary, - parquet___ArrowWriterProperties___Builder__set_use_dictionary + parquet___ArrowWriterProperties___Builder__set_use_dictionary ) }, set_write_statistics = function(table, write_statistics) { assert_that(is.logical(write_statistics)) private$.set(table, write_statistics, - parquet___ArrowWriterProperties___Builder__set_write_statistics + parquet___ArrowWriterProperties___Builder__set_write_statistics ) }, set_data_page_size = function(data_page_size) { From d0406c3aa82de904b3508cdbbc623982f1f99406 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 18 Mar 2021 12:47:05 -0300 Subject: [PATCH 08/22] compression <- lz4_frame test11 --- r/R/parquet.R | 108 +++++++++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/r/R/parquet.R b/r/R/parquet.R index e493badacb6..d15e6c16f6b 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -304,7 +304,7 @@ ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inhe compression <- compression_from_name(compression) assert_that(is.integer(compression)) private$.set(table, compression, - parquet___ArrowWriterProperties___Builder__set_compressions + parquet___ArrowWriterProperties___Builder__set_compressions ) }, set_compression_level = function(table, compression_level){ @@ -411,12 +411,12 @@ ParquetWriterProperties$create <- function(table, #' @export #' @include arrow-package.R ParquetFileWriter <- R6Class("ParquetFileWriter", inherit = ArrowObject, - public = list( - WriteTable = function(table, chunk_size) { - parquet___arrow___FileWriter__WriteTable(self, table, chunk_size) - }, - Close = function() parquet___arrow___FileWriter__Close(self) - ) + public = list( + WriteTable = function(table, chunk_size) { + parquet___arrow___FileWriter__WriteTable(self, table, chunk_size) + }, + Close = function() parquet___arrow___FileWriter__Close(self) + ) ) ParquetFileWriter$create <- function(schema, sink, @@ -477,53 +477,53 @@ ParquetFileWriter$create <- function(schema, #' } #' @include arrow-package.R ParquetFileReader <- R6Class("ParquetFileReader", - inherit = ArrowObject, - active = list( - num_rows = function() { - as.integer(parquet___arrow___FileReader__num_rows(self)) - }, - num_columns = function() { - parquet___arrow___FileReader__num_columns(self) - }, - num_row_groups = function() { - parquet___arrow___FileReader__num_row_groups(self) - } - ), - public = list( - ReadTable = function(column_indices = NULL) { - if (is.null(column_indices)) { - parquet___arrow___FileReader__ReadTable1(self) - } else { - column_indices <- vec_cast(column_indices, integer()) - parquet___arrow___FileReader__ReadTable2(self, column_indices) - } - }, - ReadRowGroup = function(i, column_indices = NULL) { - i <- vec_cast(i, integer()) - if (is.null(column_indices)) { - parquet___arrow___FileReader__ReadRowGroup1(self, i) - } else { - column_indices <- vec_cast(column_indices, integer()) - parquet___arrow___FileReader__ReadRowGroup2(self, i, column_indices) - } - }, - ReadRowGroups = function(row_groups, column_indices = NULL) { - row_groups <- vec_cast(row_groups, integer()) - if (is.null(column_indices)) { - parquet___arrow___FileReader__ReadRowGroups1(self, row_groups) - } else { - column_indices <- vec_cast(column_indices, integer()) - parquet___arrow___FileReader__ReadRowGroups2(self, row_groups, column_indices) - } - }, - ReadColumn = function(i) { - i <- vec_cast(i, integer()) - parquet___arrow___FileReader__ReadColumn(self, i) - }, - GetSchema = function() { - parquet___arrow___FileReader__GetSchema(self) - } - ) + inherit = ArrowObject, + active = list( + num_rows = function() { + as.integer(parquet___arrow___FileReader__num_rows(self)) + }, + num_columns = function() { + parquet___arrow___FileReader__num_columns(self) + }, + num_row_groups = function() { + parquet___arrow___FileReader__num_row_groups(self) + } + ), + public = list( + ReadTable = function(column_indices = NULL) { + if (is.null(column_indices)) { + parquet___arrow___FileReader__ReadTable1(self) + } else { + column_indices <- vec_cast(column_indices, integer()) + parquet___arrow___FileReader__ReadTable2(self, column_indices) + } + }, + ReadRowGroup = function(i, column_indices = NULL) { + i <- vec_cast(i, integer()) + if (is.null(column_indices)) { + parquet___arrow___FileReader__ReadRowGroup1(self, i) + } else { + column_indices <- vec_cast(column_indices, integer()) + parquet___arrow___FileReader__ReadRowGroup2(self, i, column_indices) + } + }, + ReadRowGroups = function(row_groups, column_indices = NULL) { + row_groups <- vec_cast(row_groups, integer()) + if (is.null(column_indices)) { + parquet___arrow___FileReader__ReadRowGroups1(self, row_groups) + } else { + column_indices <- vec_cast(column_indices, integer()) + parquet___arrow___FileReader__ReadRowGroups2(self, row_groups, column_indices) + } + }, + ReadColumn = function(i) { + i <- vec_cast(i, integer()) + parquet___arrow___FileReader__ReadColumn(self, i) + }, + GetSchema = function() { + parquet___arrow___FileReader__GetSchema(self) + } + ) ) ParquetFileReader$create <- function(file, From fc79467c54bdc9d526a8fa7b9a7d64e64716cc77 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 18 Mar 2021 12:49:48 -0300 Subject: [PATCH 09/22] compression <- lz4_frame test12 --- r/R/parquet.R | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/r/R/parquet.R b/r/R/parquet.R index d15e6c16f6b..4eec32128da 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -560,24 +560,24 @@ ParquetFileReader$create <- function(file, #' #' @export ParquetArrowReaderProperties <- R6Class("ParquetArrowReaderProperties", - inherit = ArrowObject, - public = list( - read_dictionary = function(column_index) { - parquet___arrow___ArrowReaderProperties__get_read_dictionary(self, column_index) - }, - set_read_dictionary = function(column_index, read_dict) { - parquet___arrow___ArrowReaderProperties__set_read_dictionary(self, column_index, read_dict) - } - ), - active = list( - use_threads = function(use_threads) { - if(missing(use_threads)) { - parquet___arrow___ArrowReaderProperties__get_use_threads(self) - } else { - parquet___arrow___ArrowReaderProperties__set_use_threads(self, use_threads) - } - } - ) + inherit = ArrowObject, + public = list( + read_dictionary = function(column_index) { + parquet___arrow___ArrowReaderProperties__get_read_dictionary(self, column_index) + }, + set_read_dictionary = function(column_index, read_dict) { + parquet___arrow___ArrowReaderProperties__set_read_dictionary(self, column_index, read_dict) + } + ), + active = list( + use_threads = function(use_threads) { + if(missing(use_threads)) { + parquet___arrow___ArrowReaderProperties__get_use_threads(self) + } else { + parquet___arrow___ArrowReaderProperties__set_use_threads(self, use_threads) + } + } + ) ) ParquetArrowReaderProperties$create <- function(use_threads = option_use_threads()) { From 3446bb7543210412349b7bc2dea05850ffae7239 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 18 Mar 2021 14:56:21 -0300 Subject: [PATCH 10/22] initial tests for compression msgs --- r/tests/testthat/test-feather.R | 14 ++++++++++++++ r/tests/testthat/test-parquet.R | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index 52325c7f410..59b81b99634 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -196,3 +196,17 @@ test_that("Character vectors > 2GB can write to feather", { }) unlink(feather_file) + +test_that("Error messages are shown when the compression algorithm lz4/snappy + is not found", { + skip_on_cran() + if (codec_is_available("lz4")) { + d <- read_feather(system.file("extdata", "pets.feather", package="arrow")) + expect_is(d, "data.frame") + } else { + expect_error( + read_feather(system.file("extdata", "pets.feather", package="arrow")), + "Unsupported compressed format" + ) + } +}) diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index 4ac356f004d..423db5a81f6 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -234,3 +234,17 @@ test_that("ParquetFileReader $ReadRowGroup(s) methods", { expect_true(reader$ReadRowGroups(c(0, 1), 0) == Table$create(x = 1:20)) expect_error(reader$ReadRowGroups(c(0, 1), 1)) }) + +test_that("Error messages are shown when the compression algorithm lz4/snappy + is not found", { + skip_on_cran() + if (codec_is_available("snappy")) { + d <- read_parquet(system.file("extdata", "pets.feather", package = "arrow")) + expect_is(d, "data.frame") + } else { + expect_error( + read_parquet(system.file("extdata", "pets.parquet", package = "arrow")), + "Unsupported compressed format" + ) + } +}) From 086f25e227e70a25794e45cec8a5b5af7fa895c6 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 18 Mar 2021 16:41:01 -0300 Subject: [PATCH 11/22] fixed parquet test with files --- r/inst/extdata/pets.feather | Bin 0 -> 1026 bytes r/inst/extdata/pets.parquet | Bin 0 -> 1208 bytes r/tests/testthat/test-parquet.R | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 r/inst/extdata/pets.feather create mode 100644 r/inst/extdata/pets.parquet diff --git a/r/inst/extdata/pets.feather b/r/inst/extdata/pets.feather new file mode 100644 index 0000000000000000000000000000000000000000..ff203d7dca6e39ab3ee059785b7952bdfbb780bf GIT binary patch literal 1026 zcmdT@J5Iwu5FICuV~~Zc2niPyQPO~r5Ofq2F`*6$hc<2poYU z3tKs@P)j}gRifd$CN*VV5` z8|W2!X}lsIsDb1bYaFT%#^lAb6ubc1HGH_85kAX`(;+8G9g<;6|$ zoHYo29c%sp;GiXr^x9n+isMW9d*!?br0~84I7!c(Zm5fMos)(Z-q9ke(ho%sC z1CAUy@C1l62gHFV;0ZV)gm?(V%sN&{l?wt!){b}n*_m&56W6g>R8sjWUn3`T0M|E7 z0I0-WcID_?fyybTpls4DW%;*Y)g;w5Nu!KR-k{MI!psJ*g|D(+NrL6M7LNMEXrK(o zc3ukSl?W< Date: Fri, 19 Mar 2021 16:43:59 -0300 Subject: [PATCH 12/22] all requested changes except skip_if_not_available --- r/R/feather.R | 7 +------ r/R/parquet.R | 7 +------ r/inst/extdata/pets.feather | Bin 1026 -> 0 bytes r/inst/extdata/pets.parquet | Bin 1208 -> 0 bytes r/inst/v0.7.1.feather | Bin 0 -> 3018 bytes r/tests/testthat/test-feather.R | 8 +++++--- r/tests/testthat/test-parquet.R | 4 ++-- 7 files changed, 9 insertions(+), 17 deletions(-) delete mode 100644 r/inst/extdata/pets.feather delete mode 100644 r/inst/extdata/pets.parquet create mode 100644 r/inst/v0.7.1.feather diff --git a/r/R/feather.R b/r/R/feather.R index 48d87247bfd..ef0b57f6ab8 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -156,12 +156,7 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) { out <- tryCatch( reader$Read(columns), - error = function (e) { - if (grepl("Support for codec", conditionMessage(e))) { - msg <- "Unsupported compressed format: We suggest either setting the right environment variable to install binaries or setting LIBARROW_MINIMAL=false and then reinstall the package." - stop(msg, call. = FALSE) - } - } + error = function(e) { read_compressed_error(e) } ) if (isTRUE(as_data_frame)) { diff --git a/r/R/parquet.R b/r/R/parquet.R index 4eec32128da..e20cdf41619 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -54,12 +54,7 @@ read_parquet <- function(file, indices <- match(vars_select(names, !!col_select), names) - 1L tab <- tryCatch( reader$ReadTable(indices), - error = function (e) { - if (grepl("Support for codec", conditionMessage(e))) { - msg <- "Unsupported compressed format: We suggest either setting the right environment variable to install binaries or setting LIBARROW_MINIMAL=false and then reinstall the package." - stop(msg, call. = FALSE) - } - } + error = function(e) { read_compressed_error(e) } ) } else { # read all columns diff --git a/r/inst/extdata/pets.feather b/r/inst/extdata/pets.feather deleted file mode 100644 index ff203d7dca6e39ab3ee059785b7952bdfbb780bf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1026 zcmdT@J5Iwu5FICuV~~Zc2niPyQPO~r5Ofq2F`*6$hc<2poYU z3tKs@P)j}gRifd$CN*VV5` z8|W2!X}lsIsDb1bYaFT%#^lAb6ubc1HGH_85kAX`(;+8G9g<;6|$ zoHYo29c%sp;GiXr^x9n+isMW9d*!?br0~84I7!c(Zm5fMos)(Z-q9ke(ho%sC z1CAUy@C1l62gHFV;0ZV)gm?(V%sN&{l?wt!){b}n*_m&56W6g>R8sjWUn3`T0M|E7 z0I0-WcID_?fyybTpls4DW%;*Y)g;w5Nu!KR-k{MI!psJ*g|D(+NrL6M7LNMEXrK(o zc3ukSl?W<7%Q6n<;3lTBj98xm0$O6yt?3I!x5sc86#JcT-rOXE0+l=fh7Ozf(b+$IClMe41bkU$)Qicn5fDtgEPp{iUEYOAO{vp)Yy+ z=9~A;o0&Inc71q!eDruP(I(_XfXF45Rti%Sg~&k*V%fk1&U z^dQuR{s`Zf&{^masLl5G%}oRg@EqN`4~9CRg+k>_sXSMxmOm_43yDGjeJ8Lc7%PT{ zr^W_(Y!{T{v0y4%fS!^504?vpn+NYR&0E`nHwWJLn)g`o1;%difz z<508A3!oLlBO%vr$@NBo8^_g)?^StRTyg@G-+@~y&(=?Yo7LQNiW}lQG{$WPo~{;a zmHIqg24`g(9tIn*v#P=vMym+%o=wq z_>a$bbnleUYw%mTe_7ij+OBJBjKPLKzy%1|uMACTtl<_m{AKMwqzS+H_y*^+KY^Sz ziotK{&N-jo;McW()#o=jul-Rz=ms(PUEMe5^BZjPdB^8BIN1=B-xH1J#jgK5Hu<;X zBhe}G$q<(O(0YHc_4b9?>&d$dT^G}Le@eQv;hev^(UU)aGYJR0x00)S7w?U&-f+nM zdKcv`tR-K_AE2dcNi&u=4|z!$((wU3?&0gHE*Q}+w4a4`Ltloz0(}i?N(s$BfsVtx z->gijT&(75<3|gRnIZ6ZcdH}stDwR2UG&7tU2HD6q^YJ_!pW|{KBG67K=i(XYbI=%cOJcgE z=L7U6GJO-7-pL94qtQ1B1MQW4eQ3xLzVm$*oi14@0d#=hgt@B zoR^gmI0{jZGNvy0?8OjH!KPbjm@IS@nzQN37%b~JbW42eNWl^N;&Y2GMZ`WX%)Jrb zm~h4QYZiUwioC4f4zZ68E=ELxmWE|!3hZk8w|%DJUvE>j9}%kd+l7g7 zf9xshb!9?3#KrV(>gFjvR#(AAVpay*RTlfr48HaAz1tu)ASgcKYqt!8_yFTa2ura+v3uXE=FJH}K>+__A=&Q~Le8qHW{%4Q?q 2GB can write to feather", { unlink(feather_file) -test_that("Error messages are shown when the compression algorithm lz4/snappy +ft_file <- system.file("v0.7.1.feather", package = "arrow") + +test_that("Error messages are shown when the compression algorithm lz4 is not found", { skip_on_cran() if (codec_is_available("lz4")) { - d <- read_feather(system.file("extdata", "pets.feather", package="arrow")) + d <- read_feather(ft_file) expect_is(d, "data.frame") } else { expect_error( - read_feather(system.file("extdata", "pets.feather", package="arrow")), + read_feather(ft_file), "Unsupported compressed format" ) } diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index bf65aee2be4..2df8a057f8f 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -239,11 +239,11 @@ test_that("Error messages are shown when the compression algorithm lz4/snappy is not found", { skip_on_cran() if (codec_is_available("snappy")) { - d <- read_parquet(system.file("extdata", "pets.parquet", package = "arrow")) + d <- read_parquet(pq_file) expect_is(d, "data.frame") } else { expect_error( - read_parquet(system.file("extdata", "pets.parquet", package = "arrow")), + read_parquet(pq_file), "Unsupported compressed format" ) } From 18c51f2032bfc9bbffcacea11a0a27025ee513f2 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Fri, 19 Mar 2021 16:45:35 -0300 Subject: [PATCH 13/22] indices else stament with erro function --- r/R/parquet.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/r/R/parquet.R b/r/R/parquet.R index e20cdf41619..689324113c2 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -60,12 +60,7 @@ read_parquet <- function(file, # read all columns tab <- tryCatch( reader$ReadTable(), - error = function (e) { - if (grepl("Support for codec", conditionMessage(e))) { - msg <- "Unsupported compressed format: We suggest either setting the right environment variable to install binaries or setting LIBARROW_MINIMAL=false and then reinstall the package." - stop(msg, call. = FALSE) - } - } + error = function(e) { read_compressed_error(e) } ) } From ea5949e2e0a11fd0e60d791a1799f1aebc5982d8 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Wed, 31 Mar 2021 17:52:01 -0300 Subject: [PATCH 14/22] small progress with error msg to send theread_compressed_error function --- r/R/feather.R | 4 ++-- r/R/util.R | 12 ++++++++++++ r/tests/testthat/test-feather.R | 1 - r/tests/testthat/test-parquet.R | 1 - 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/r/R/feather.R b/r/R/feather.R index ef0b57f6ab8..bbf6b9d1e47 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -144,10 +144,10 @@ write_feather <- function(x, #' } read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) { if (!inherits(file, "RandomAccessFile")) { - file <- make_readable_file(file) + file <- arrow:::make_readable_file(file) on.exit(file$close()) } - reader <- FeatherReader$create(file, ...) + reader <- FeatherReader$create(file) col_select <- enquo(col_select) columns <- if (!quo_is_null(col_select)) { diff --git a/r/R/util.R b/r/R/util.R index 3362c0f4fda..99876f65bcd 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -45,3 +45,15 @@ is_list_of <- function(object, class) { } empty_named_list <- function() structure(list(), .Names = character(0)) + +read_compressed_error <- function(e) { + e <- as.character(e) + msg <- paste("Unsupported compressed format", + regmatches(e, gregexpr("(?<=\')(.*?)(?=\')", e, perl = TRUE))[[1]], + "\nPlease visit https://arrow.apache.org/docs/r/articles/install.html", + "\nfor an explanation about setting LD_LIBRARY_PATH/PKG_CONFIG_PATH or", + "\nsetting LIBARROW_MINIMAL=false and then reinstall the package." + ) + message(msg) + FALSE +} diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index 2f7608f5d37..48001173422 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -201,7 +201,6 @@ ft_file <- system.file("v0.7.1.feather", package = "arrow") test_that("Error messages are shown when the compression algorithm lz4 is not found", { - skip_on_cran() if (codec_is_available("lz4")) { d <- read_feather(ft_file) expect_is(d, "data.frame") diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index 2df8a057f8f..ff7877a32b7 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -237,7 +237,6 @@ test_that("ParquetFileReader $ReadRowGroup(s) methods", { test_that("Error messages are shown when the compression algorithm lz4/snappy is not found", { - skip_on_cran() if (codec_is_available("snappy")) { d <- read_parquet(pq_file) expect_is(d, "data.frame") From 1830829a11bd6fe22fa302c4cf13a858ee954b90 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 1 Apr 2021 13:02:18 -0300 Subject: [PATCH 15/22] polished end user msg, return error instead of data.frame with FALSE when lz4/snappy is not installed --- r/R/util.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/r/R/util.R b/r/R/util.R index 99876f65bcd..0e5b7478973 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -48,12 +48,13 @@ empty_named_list <- function() structure(list(), .Names = character(0)) read_compressed_error <- function(e) { e <- as.character(e) - msg <- paste("Unsupported compressed format", - regmatches(e, gregexpr("(?<=\')(.*?)(?=\')", e, perl = TRUE))[[1]], + alg <- regmatches(e, gregexpr("(?<=\')(.*?)(?=\')", e, perl = TRUE))[[1]] + msg <- paste("Unsupported compressed format", alg, "\nPlease visit https://arrow.apache.org/docs/r/articles/install.html", - "\nfor an explanation about setting LD_LIBRARY_PATH/PKG_CONFIG_PATH or", - "\nsetting LIBARROW_MINIMAL=false and then reinstall the package." + "\nfor an explanation about optional features such as compression libraries enabled.", + "\nSetting LIBARROW_MINIMAL=false and then building the package from source fixes this,", + sprintf("\nor building libarrow with -DARROW_WITH_%s=ON and reinstalling the package.", + toupper(alg)) ) - message(msg) - FALSE + stop(msg) } From 9169e0c5f97ebf6d993388349ca4882427d94b46 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Thu, 1 Apr 2021 15:13:07 -0300 Subject: [PATCH 16/22] fix https://github.com/apache/arrow/pull/9743#discussion_r605817747 --- r/R/feather.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/feather.R b/r/R/feather.R index bbf6b9d1e47..f8f3cb39354 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -144,7 +144,7 @@ write_feather <- function(x, #' } read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) { if (!inherits(file, "RandomAccessFile")) { - file <- arrow:::make_readable_file(file) + file <- make_readable_file(file) on.exit(file$close()) } reader <- FeatherReader$create(file) From e78232b7d64cac9cbb9baf71fb551b197ec7cd6d Mon Sep 17 00:00:00 2001 From: Pachamaltese Date: Fri, 2 Apr 2021 13:08:05 -0300 Subject: [PATCH 17/22] Update r/R/util.R Co-authored-by: Neal Richardson --- r/R/util.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/util.R b/r/R/util.R index 0e5b7478973..e926493eaaa 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -48,7 +48,7 @@ empty_named_list <- function() structure(list(), .Names = character(0)) read_compressed_error <- function(e) { e <- as.character(e) - alg <- regmatches(e, gregexpr("(?<=\')(.*?)(?=\')", e, perl = TRUE))[[1]] + alg <- sub(".*Support for codec '(.*)'.*", "\\1", e) msg <- paste("Unsupported compressed format", alg, "\nPlease visit https://arrow.apache.org/docs/r/articles/install.html", "\nfor an explanation about optional features such as compression libraries enabled.", From 66fa3a197f8cb2e8b5a6f9c1e473af896103ca13 Mon Sep 17 00:00:00 2001 From: Pachamaltese Date: Fri, 2 Apr 2021 13:17:49 -0300 Subject: [PATCH 18/22] Update r/R/util.R Co-authored-by: Jonathan Keane --- r/R/util.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/util.R b/r/R/util.R index e926493eaaa..1a43fa36b23 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -51,7 +51,7 @@ read_compressed_error <- function(e) { alg <- sub(".*Support for codec '(.*)'.*", "\\1", e) msg <- paste("Unsupported compressed format", alg, "\nPlease visit https://arrow.apache.org/docs/r/articles/install.html", - "\nfor an explanation about optional features such as compression libraries enabled.", + "\nfor information about troubleshooting installation issues.", "\nSetting LIBARROW_MINIMAL=false and then building the package from source fixes this,", sprintf("\nor building libarrow with -DARROW_WITH_%s=ON and reinstalling the package.", toupper(alg)) From 16d29a3779e658b197eeda858355f3d16c5c1f59 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Fri, 2 Apr 2021 15:31:20 -0300 Subject: [PATCH 19/22] partial progress --- r/R/util.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/r/R/util.R b/r/R/util.R index 1a43fa36b23..4d697ad59ac 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -48,13 +48,14 @@ empty_named_list <- function() structure(list(), .Names = character(0)) read_compressed_error <- function(e) { e <- as.character(e) - alg <- sub(".*Support for codec '(.*)'.*", "\\1", e) - msg <- paste("Unsupported compressed format", alg, - "\nPlease visit https://arrow.apache.org/docs/r/articles/install.html", - "\nfor information about troubleshooting installation issues.", - "\nSetting LIBARROW_MINIMAL=false and then building the package from source fixes this,", - sprintf("\nor building libarrow with -DARROW_WITH_%s=ON and reinstalling the package.", - toupper(alg)) + compression <- sub(".*Support for codec '(.*)'.*", "\\1", e) + msg <- c( + sprintf("Unsupported compressed format %s", compression), + "\nTry setting the environment variable LIBARROW_MINIMAL=false and reinstalling", + "\nfor a more complete installation ", + sprintf("(including %s) or setting", compression), + sprintf("\nARROW_WITH_%s=ON", toupper(compression)), + "and reinstalling to enable support for this codec." ) - stop(msg) + stop(msg, call. = FALSE) } From a32d3fc35191a3f6ddd9433f64876611a6e7edc0 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Fri, 2 Apr 2021 15:34:13 -0300 Subject: [PATCH 20/22] line width --- r/R/util.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/r/R/util.R b/r/R/util.R index 4d697ad59ac..97932cbaf0a 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -54,8 +54,7 @@ read_compressed_error <- function(e) { "\nTry setting the environment variable LIBARROW_MINIMAL=false and reinstalling", "\nfor a more complete installation ", sprintf("(including %s) or setting", compression), - sprintf("\nARROW_WITH_%s=ON", toupper(compression)), - "and reinstalling to enable support for this codec." + sprintf("\nARROW_WITH_%s=ON and reinstalling to enable support for this codec.", toupper(compression)) ) stop(msg, call. = FALSE) } From 3e429befe81dfdc8b6063cc7b319f6119b951aa6 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Fri, 2 Apr 2021 16:32:14 -0300 Subject: [PATCH 21/22] remove added files in inst --- r/inst/v0.7.1.feather | Bin 3018 -> 0 bytes r/tests/testthat/test-feather.R | 4 +++- 2 files changed, 3 insertions(+), 1 deletion(-) delete mode 100644 r/inst/v0.7.1.feather diff --git a/r/inst/v0.7.1.feather b/r/inst/v0.7.1.feather deleted file mode 100644 index 5a412c1f77bae3a050608ce5c3f119e9431bf59a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3018 zcmeHJO>7%Q6n<;3lTBj98xm0$O6yt?3I!x5sc86#JcT-rOXE0+l=fh7Ozf(b+$IClMe41bkU$)Qicn5fDtgEPp{iUEYOAO{vp)Yy+ z=9~A;o0&Inc71q!eDruP(I(_XfXF45Rti%Sg~&k*V%fk1&U z^dQuR{s`Zf&{^masLl5G%}oRg@EqN`4~9CRg+k>_sXSMxmOm_43yDGjeJ8Lc7%PT{ zr^W_(Y!{T{v0y4%fS!^504?vpn+NYR&0E`nHwWJLn)g`o1;%difz z<508A3!oLlBO%vr$@NBo8^_g)?^StRTyg@G-+@~y&(=?Yo7LQNiW}lQG{$WPo~{;a zmHIqg24`g(9tIn*v#P=vMym+%o=wq z_>a$bbnleUYw%mTe_7ij+OBJBjKPLKzy%1|uMACTtl<_m{AKMwqzS+H_y*^+KY^Sz ziotK{&N-jo;McW()#o=jul-Rz=ms(PUEMe5^BZjPdB^8BIN1=B-xH1J#jgK5Hu<;X zBhe}G$q<(O(0YHc_4b9?>&d$dT^G}Le@eQv;hev^(UU)aGYJR0x00)S7w?U&-f+nM zdKcv`tR-K_AE2dcNi&u=4|z!$((wU3?&0gHE*Q}+w4a4`Ltloz0(}i?N(s$BfsVtx z->gijT&(75<3|gRnIZ6ZcdH}stDwR2UG&7tU2HD6q^YJ_!pW|{KBG67K=i(XYbI=%cOJcgE z=L7U6GJO-7-pL94qtQ1B1MQW4eQ3xLzVm$*oi14@0d#=hgt@B zoR^gmI0{jZGNvy0?8OjH!KPbjm@IS@nzQN37%b~JbW42eNWl^N;&Y2GMZ`WX%)Jrb zm~h4QYZiUwioC4f4zZ68E=ELxmWE|!3hZk8w|%DJUvE>j9}%kd+l7g7 zf9xshb!9?3#KrV(>gFjvR#(AAVpay*RTlfr48HaAz1tu)ASgcKYqt!8_yFTa2ura+v3uXE=FJH}K>+__A=&Q~Le8qHW{%4Q?q 2GB can write to feather", { unlink(feather_file) -ft_file <- system.file("v0.7.1.feather", package = "arrow") +# lz4 ---- + +ft_file <- test_path("golden-files/data-arrow_2.0.0_lz4.feather") test_that("Error messages are shown when the compression algorithm lz4 is not found", { From 9e04faa128e36767f92d5a837a451e102b79e9bb Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Fri, 2 Apr 2021 16:37:25 -0300 Subject: [PATCH 22/22] fixes https://github.com/apache/arrow/pull/9743/files#r606252213 --- r/tests/testthat/test-parquet.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index ff7877a32b7..89310d8af22 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -240,10 +240,5 @@ test_that("Error messages are shown when the compression algorithm lz4/snappy if (codec_is_available("snappy")) { d <- read_parquet(pq_file) expect_is(d, "data.frame") - } else { - expect_error( - read_parquet(pq_file), - "Unsupported compressed format" - ) } })