From 3727946f5abf3c7c4526318ee317dcb4c48a72e6 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 9 Sep 2019 13:39:33 +0200 Subject: [PATCH 1/5] codec = NULL means choose the right default for the current platform. --- r/R/compression.R | 23 +++++++++++++---------- r/tests/testthat/helper-arrow.R | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/r/R/compression.R b/r/R/compression.R index a58defe640a..1f76e5b01eb 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -27,8 +27,15 @@ Codec <- R6Class("Codec", inherit = Object) #' #' @export compression_codec <- function(type = "GZIP") { - type <- CompressionType[[match.arg(type, names(CompressionType))]] - unique_ptr(Codec, util___Codec__Create(type)) + if (is.character(type)) { + type <- unique_ptr("Codec", util___Codec__Create( + CompressionType[[match.arg(type, names(CompressionType))]] + )) + } else if (!inherits(type, "Codec")) { + abort("Not compatible with requested type") + } + + type } #' @title Compressed stream classes @@ -56,11 +63,8 @@ compression_codec <- function(type = "GZIP") { #' @export #' @include arrow-package.R CompressedOutputStream <- R6Class("CompressedOutputStream", inherit = OutputStream) -CompressedOutputStream$create <- function(stream, codec = compression_codec()){ - if (.Platform$OS.type == "windows") { - stop("'CompressedOutputStream' is unsupported in Windows.") - } - assert_is(codec, "Codec") +CompressedOutputStream$create <- function(stream, codec = "GZIP"){ + codec <- compression_codec(codec) if (is.character(stream)) { stream <- FileOutputStream$create(stream) } @@ -73,9 +77,8 @@ CompressedOutputStream$create <- function(stream, codec = compression_codec()){ #' @format NULL #' @export CompressedInputStream <- R6Class("CompressedInputStream", inherit = InputStream) -CompressedInputStream$create <- function(stream, codec = compression_codec()){ - # TODO (npr): why would CompressedInputStream work on Windows if CompressedOutputStream doesn't? (and is it still the case that it does not?) - assert_is(codec, "Codec") +CompressedInputStream$create <- function(stream, codec = "GZIP"){ + codec <- compression_codec(codec) if (is.character(stream)) { stream <- ReadableFile$create(stream) } diff --git a/r/tests/testthat/helper-arrow.R b/r/tests/testthat/helper-arrow.R index 4882036bfbf..c330f51664c 100644 --- a/r/tests/testthat/helper-arrow.R +++ b/r/tests/testthat/helper-arrow.R @@ -16,7 +16,7 @@ # under the License. # Wrap testthat::test_that with a check for the C++ library -options(..skip.tests = !arrow::arrow_available()) +options(..skip.tests = !arrow:::arrow_available()) test_that <- function(what, code) { testthat::test_that(what, { From 9cc2b43abc1e9bfd29657451331a1ed0b92454f8 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 10 Sep 2019 09:37:14 +0200 Subject: [PATCH 2/5] using vctrs::vec_[cr]bind instead of dplyr::bind_[rows|cols] --- r/data-raw/codegen.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R index ba5d0cbdf9d..b90f8543aa2 100644 --- a/r/data-raw/codegen.R +++ b/r/data-raw/codegen.R @@ -41,6 +41,7 @@ suppressPackageStartupMessages({ library(dplyr) library(purrr) library(glue) + library(vctrs) }) if (packageVersion("decor") < '0.0.0.9001') { @@ -53,7 +54,7 @@ decorations <- cpp_decorations() %>% # more concisely # rap( ~ decor:::parse_cpp_function(context)) mutate(functions = map(context, decor:::parse_cpp_function)) %>% - { bind_cols(., bind_rows(pull(., functions))) } %>% + { vec_cbind(., vec_rbind(!!!pull(., functions))) } %>% select(-functions) message(glue("*** > {n} functions decorated with [[arrow::export]]", n = nrow(decorations))) From c8021a5d976bdf0a13bd29061c24faf9d81e17c6 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 10 Sep 2019 09:37:34 +0200 Subject: [PATCH 3/5] Making GZIP default everywhere --- r/R/compression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/compression.R b/r/R/compression.R index 1f76e5b01eb..f6a3f59df70 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -28,7 +28,7 @@ Codec <- R6Class("Codec", inherit = Object) #' @export compression_codec <- function(type = "GZIP") { if (is.character(type)) { - type <- unique_ptr("Codec", util___Codec__Create( + type <- unique_ptr(Codec, util___Codec__Create( CompressionType[[match.arg(type, names(CompressionType))]] )) } else if (!inherits(type, "Codec")) { From 7d83d2bb96a7451c17af66060d8c3659465e66a4 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Tue, 10 Sep 2019 17:26:16 -0700 Subject: [PATCH 4/5] Move compression_codec to Codec; add docs; allow lowercase compression types --- r/NAMESPACE | 2 +- r/R/compression.R | 39 ++++++++++++++++++++++---------------- r/man/Codec.Rd | 21 ++++++++++++++++++++ r/man/compression.Rd | 2 +- r/man/compression_codec.Rd | 14 -------------- 5 files changed, 46 insertions(+), 32 deletions(-) create mode 100644 r/man/Codec.Rd delete mode 100644 r/man/compression_codec.Rd diff --git a/r/NAMESPACE b/r/NAMESPACE index 936d9170a2d..1060bddf8ea 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -42,6 +42,7 @@ export(Buffer) export(BufferOutputStream) export(BufferReader) export(ChunkedArray) +export(Codec) export(CompressedInputStream) export(CompressedOutputStream) export(CompressionType) @@ -75,7 +76,6 @@ export(boolean) export(buffer) export(cast_options) export(chunked_array) -export(compression_codec) export(contains) export(csv_convert_options) export(csv_parse_options) diff --git a/r/R/compression.R b/r/R/compression.R index f6a3f59df70..2b999589d05 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -19,22 +19,29 @@ #' @include arrow-package.R #' @include io.R -Codec <- R6Class("Codec", inherit = Object) - -#' codec -#' -#' @param type type of codec -#' +#' @title Compression Codec class +#' @usage NULL +#' @format NULL +#' @docType class +#' @description Codecs allow you to create [compressed input and output +#' streams][compression]. +#' @section Factory: +#' The `Codec$create()` factory method takes the following argument: +#' * `type`: string name of the compression method. See [CompressionType] for +#' a list of possible values. `type` may be upper- or lower-cased. Support +#' for compression methods depends on build-time flags for the C++ library. +#' Most builds support at least "gzip" and "snappy". +#' @rdname Codec +#' @name Codec #' @export -compression_codec <- function(type = "GZIP") { +Codec <- R6Class("Codec", inherit = Object) +Codec$create <- function(type = "gzip") { if (is.character(type)) { type <- unique_ptr(Codec, util___Codec__Create( - CompressionType[[match.arg(type, names(CompressionType))]] + CompressionType[[match.arg(toupper(type), names(CompressionType))]] )) - } else if (!inherits(type, "Codec")) { - abort("Not compatible with requested type") } - + assert_is(type, "Codec") type } @@ -46,7 +53,7 @@ compression_codec <- function(type = "GZIP") { #' @usage NULL #' @format NULL #' @description `CompressedInputStream` and `CompressedOutputStream` -#' allow you to apply a [compression_codec()] to an +#' allow you to apply a compression [Codec] to an #' input or output stream. #' #' @section Factory: @@ -63,8 +70,8 @@ compression_codec <- function(type = "GZIP") { #' @export #' @include arrow-package.R CompressedOutputStream <- R6Class("CompressedOutputStream", inherit = OutputStream) -CompressedOutputStream$create <- function(stream, codec = "GZIP"){ - codec <- compression_codec(codec) +CompressedOutputStream$create <- function(stream, codec = "gzip"){ + codec <- Codec$create(codec) if (is.character(stream)) { stream <- FileOutputStream$create(stream) } @@ -77,8 +84,8 @@ CompressedOutputStream$create <- function(stream, codec = "GZIP"){ #' @format NULL #' @export CompressedInputStream <- R6Class("CompressedInputStream", inherit = InputStream) -CompressedInputStream$create <- function(stream, codec = "GZIP"){ - codec <- compression_codec(codec) +CompressedInputStream$create <- function(stream, codec = "gzip"){ + codec <- Codec$create(codec) if (is.character(stream)) { stream <- ReadableFile$create(stream) } diff --git a/r/man/Codec.Rd b/r/man/Codec.Rd new file mode 100644 index 00000000000..548c6b89c15 --- /dev/null +++ b/r/man/Codec.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compression.R +\docType{class} +\name{Codec} +\alias{Codec} +\title{Compression Codec class} +\description{ +Codecs allow you to create \link[=compression]{compressed input and output streams}. +} +\section{Factory}{ + +The \code{Codec$create()} factory method takes the following argument: +\itemize{ +\item \code{type}: string name of the compression method. See \link{CompressionType} for +a list of possible values. \code{type} may be upper- or lower-cased. Support +for compression methods depends on build-time flags for the C++ library. +Most builds support at least "gzip" and "snappy". +} +} + +\keyword{datasets} diff --git a/r/man/compression.Rd b/r/man/compression.Rd index e9d0ca5d493..16130f8321e 100644 --- a/r/man/compression.Rd +++ b/r/man/compression.Rd @@ -8,7 +8,7 @@ \title{Compressed stream classes} \description{ \code{CompressedInputStream} and \code{CompressedOutputStream} -allow you to apply a \code{\link[=compression_codec]{compression_codec()}} to an +allow you to apply a compression \link{Codec} to an input or output stream. } \section{Factory}{ diff --git a/r/man/compression_codec.Rd b/r/man/compression_codec.Rd deleted file mode 100644 index a7db8ab0deb..00000000000 --- a/r/man/compression_codec.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compression.R -\name{compression_codec} -\alias{compression_codec} -\title{codec} -\usage{ -compression_codec(type = "GZIP") -} -\arguments{ -\item{type}{type of codec} -} -\description{ -codec -} From 88fbf008fcad7f2857d316e9b205cf4a8cf3894e Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Tue, 10 Sep 2019 17:29:57 -0700 Subject: [PATCH 5/5] Remove windows test skip --- r/tests/testthat/test-compressed.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/r/tests/testthat/test-compressed.R b/r/tests/testthat/test-compressed.R index 008f974215b..8bf1092616e 100644 --- a/r/tests/testthat/test-compressed.R +++ b/r/tests/testthat/test-compressed.R @@ -18,8 +18,6 @@ context("Compressed.*Stream") test_that("can write Buffer to CompressedOutputStream and read back in CompressedInputStream", { - skip_on_os("windows") - buf <- buffer(as.raw(sample(0:255, size = 1024, replace = TRUE))) tf1 <- tempfile()