apache · nealrichardson · Sep 11, 2020 · Sep 27, 2020 · Sep 27, 2020 · Oct 5, 2020
diff --git a/r/NAMESPACE b/r/NAMESPACE
@@ -3,6 +3,7 @@
 S3method("!=",ArrowObject)
 S3method("$",RecordBatch)
 S3method("$",Schema)
+S3method("$",SubTreeFileSystem)
 S3method("$",Table)
 S3method("==",ArrowObject)
 S3method("[",Array)
@@ -188,6 +189,7 @@ export(TimestampParser)
 export(Type)
 export(UnionDataset)
 export(arrow_available)
+export(arrow_with_s3)
 export(binary)
 export(bool)
 export(boolean)
@@ -196,6 +198,7 @@ export(cast_options)
 export(chunked_array)
 export(codec_is_available)
 export(contains)
+export(copy_files)
 export(cpu_count)
 export(dataset_factory)
 export(date32)
@@ -249,6 +252,7 @@ export(read_parquet)
 export(read_schema)
 export(read_tsv_arrow)
 export(record_batch)
+export(s3_bucket)
 export(schema)
 export(set_cpu_count)
 export(starts_with)

diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
@@ -53,18 +53,28 @@
 
 #' Is the C++ Arrow library available?
 #'
-#' You won't generally need to call this function, but it's here in case it
-#' helps for development purposes.
+#' You won't generally need to call these function, but they're made available
+#' for diagnostic purposes.
 #' @return `TRUE` or `FALSE` depending on whether the package was installed
-#' with the Arrow C++ library. If `FALSE`, you'll need to install the C++
-#' library and then reinstall the R package. See [install_arrow()] for help.
+#' with the Arrow C++ library (check with `arrow_available()`) or with S3
+#' support enabled (check with `arrow_with_s3()`).
 #' @export
 #' @examples
 #' arrow_available()
+#' arrow_with_s3()
+#' @seealso If either of these are `FALSE`, see
+#' `vignette("install", package = "arrow")` for guidance on reinstalling the
+#' package.
 arrow_available <- function() {
   .Call(`_arrow_available`)
 }
 
+#' @rdname arrow_available
+#' @export
+arrow_with_s3 <- function() {
+  .Call(`_s3_available`)
+}
+
 option_use_threads <- function() {
   !is_false(getOption("arrow.use_threads"))
 }

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
diff --git a/r/R/csv.R b/r/R/csv.R
@@ -77,7 +77,8 @@
 #' `col_names`, and the CSV file has a header row that would otherwise be used
 #' to idenfity column names, you'll need to add `skip = 1` to skip that row.
 #'
-#' @param file A character file name or URI, `raw` vector, or an Arrow input stream.
+#' @param file A character file name or URI, `raw` vector, an Arrow input stream,
+#' or a `FileSystem` with path (`SubTreeFileSystem`).
 #' If a file name, a memory-mapped Arrow [InputStream] will be opened and
 #' closed when finished; compression will be detected from the file extension
 #' and handled automatically. If an input stream is provided, it will be left
@@ -123,8 +124,6 @@
 #' parsing options provided in other arguments (e.g. `delim`, `quote`, etc.).
 #' @param convert_options see [file reader options][CsvReadOptions]
 #' @param read_options see [file reader options][CsvReadOptions]
-#' @param filesystem A [FileSystem] where `file` can be found if it is a
-#' string file path; default is the local file system
 #' @param as_data_frame Should the function return a `data.frame` (default) or
 #' an Arrow [Table]?
 #'
@@ -156,7 +155,6 @@ read_delim_arrow <- function(file,
                              parse_options = NULL,
                              convert_options = NULL,
                              read_options = NULL,
-                             filesystem = NULL,
                              as_data_frame = TRUE,
                              timestamp_parsers = NULL) {
   if (inherits(schema, "Schema")) {
@@ -186,7 +184,7 @@ read_delim_arrow <- function(file,
   }
 
   if (!inherits(file, "InputStream")) {
-    file <- make_readable_file(file, filesystem = filesystem)
+    file <- make_readable_file(file)
     on.exit(file$close())
   }
   reader <- CsvTableReader$create(

diff --git a/r/R/dataset-factory.R b/r/R/dataset-factory.R
@@ -44,9 +44,6 @@ DatasetFactory$create <- function(x,
   if (is_list_of(x, "DatasetFactory")) {
     return(shared_ptr(DatasetFactory, dataset___UnionDatasetFactory__Make(x)))
   }
-  if (!is.string(x)) {
-    stop("'x' must be a string or a list of DatasetFactory", call. = FALSE)
-  }
 
   path_and_fs <- get_path_and_filesystem(x, filesystem)
   selector <- FileSelector$create(path_and_fs$path, allow_not_found = FALSE, recursive = TRUE)

diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
@@ -27,8 +27,8 @@
 #' and `group_by()` operations done on the dataset. `filter()` queries will be
 #' applied to restrict written rows.
 #' Note that `select()`-ed columns may not be renamed.
-#' @param path string path or URI to a directory to write to (directory will be
-#' created if it does not exist)
+#' @param path string path, URI, or `SubTreeFileSystem` referencing a directory
+#' to write to (directory will be created if it does not exist)
 #' @param format file format to write the dataset to. Currently supported
 #' formats are "feather" (aka "ipc") and "parquet". Default is to write to the
 #' same format as `dataset`.
@@ -41,8 +41,6 @@
 #' will yield `"part-0.feather", ...`.
 #' @param hive_style logical: write partition segments as Hive-style
 #' (`key1=value1/key2=value2/file.ext`) or as just bare values. Default is `TRUE`.
-#' @param filesystem A [FileSystem] where the dataset should be written if it is a
-#' string file path; default is the local file system
 #' @param ... additional format-specific arguments. For available Parquet
 #' options, see [write_parquet()].
 #' @return The input `dataset`, invisibly
@@ -53,7 +51,6 @@ write_dataset <- function(dataset,
                           partitioning = dplyr::group_vars(dataset),
                           basename_template = paste0("part-{i}.", as.character(format)),
                           hive_style = TRUE,
-                          filesystem = NULL,
                           ...) {
   if (inherits(dataset, "arrow_dplyr_query")) {
     # We can select a subset of columns but we can't rename them
@@ -79,7 +76,7 @@ write_dataset <- function(dataset,
     }
   }
 
-  path_and_fs <- get_path_and_filesystem(path, filesystem)
+  path_and_fs <- get_path_and_filesystem(path)
   options <- FileWriteOptions$create(format, table = scanner, ...)
 
   dataset___Dataset__Write(options, path_and_fs$fs, path_and_fs$path,

diff --git a/r/R/feather.R b/r/R/feather.R
@@ -24,9 +24,8 @@
 #' and the version 2 specification, which is the Apache Arrow IPC file format.
 #'
 #' @param x `data.frame`, [RecordBatch], or [Table]
-#' @param sink A string file path, URI, or [OutputStream]
-#' @param filesystem A [FileSystem] where `sink` should be written if it is a
-#' string file path; default is the local file system
+#' @param sink A string file path, URI, or [OutputStream], or path in a file
+#' system (`SubTreeFileSystem`)
 #' @param version integer Feather file version. Version 2 is the current.
 #' Version 1 is the more limited legacy format.
 #' @param chunk_size For V2 files, the number of rows that each chunk of data
@@ -54,7 +53,6 @@
 #' @include arrow-package.R
 write_feather <- function(x,
                           sink,
-                          filesystem = NULL,
                           version = 2,
                           chunk_size = 65536L,
                           compression = c("default", "lz4", "uncompressed", "zstd"),
@@ -108,11 +106,10 @@ write_feather <- function(x,
   }
   assert_is(x, "Table")
 
-  if (is.string(sink)) {
-    sink <- make_output_stream(sink, filesystem)
+  if (!inherits(sink, "OutputStream")) {
+    sink <- make_output_stream(sink)
     on.exit(sink$close())
   }
-  assert_is(sink, "OutputStream")
   ipc___WriteFeather__Table(sink, x, version, chunk_size, compression, compression_level)
   invisible(x_out)
 }
@@ -144,9 +141,9 @@ write_feather <- function(x,
 #' # Can select columns
 #' df <- read_feather(tf, col_select = starts_with("d"))
 #' }
-read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, filesystem = NULL, ...) {
+read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) {
   if (!inherits(file, "RandomAccessFile")) {
-    file <- make_readable_file(file, filesystem = filesystem)
+    file <- make_readable_file(file)
     on.exit(file$close())
   }
   reader <- FeatherReader$create(file, ...)