From 1944c6aae7f9835058b6cb3f67a27cc8c1937528 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 22 Aug 2019 10:03:23 -0700 Subject: [PATCH 1/4] Use fs::path_real() instead of path_abs() --- r/NAMESPACE | 1 + r/R/RecordBatchReader.R | 2 +- r/R/RecordBatchWriter.R | 10 +++++----- r/R/arrow-package.R | 1 + r/R/compression.R | 4 ++-- r/R/csv.R | 2 +- r/R/feather.R | 4 ++-- r/R/io.R | 6 +++--- r/R/json.R | 2 +- r/R/parquet.R | 2 +- r/R/read_table.R | 4 ++-- r/R/write_arrow.R | 4 ++-- r/man/RecordBatchFileWriter.Rd | 2 +- r/man/RecordBatchStreamWriter.Rd | 4 ++-- r/man/read_table.Rd | 2 +- r/man/write_arrow.Rd | 2 +- 16 files changed, 27 insertions(+), 25 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 3a413c0e802..fbdbfb2dcda 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -195,6 +195,7 @@ importFrom(Rcpp,sourceCpp) importFrom(assertthat,assert_that) importFrom(bit64,print.integer64) importFrom(bit64,str.integer64) +importFrom(fs,path_real) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_int) diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 6dab2d1ff76..12ac3313553 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -124,7 +124,7 @@ RecordBatchFileReader <- function(file) { #' @export `RecordBatchFileReader.character` <- function(file) { assert_that(length(file) == 1L) - RecordBatchFileReader(fs::path_abs(file)) + RecordBatchFileReader(path_real(file)) } #' @export diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 59aa9847a1f..490ef8b1ced 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -94,8 +94,8 @@ #' #' @param sink Where to write. Can either be: #' -#' - A string, meant as a file path, passed to [fs::path_abs()] -#' - a [file path][fs::path_abs()] +#' - A string, meant as a file path, passed to [path_real()] +#' - a [file path][path_real()] #' - [arrow::io::OutputStream][arrow__io__OutputStream] #' #' @param schema The [arrow::Schema][arrow__Schema] for data to be written. @@ -109,7 +109,7 @@ RecordBatchStreamWriter <- function(sink, schema) { #' @export RecordBatchStreamWriter.character <- function(sink, schema){ - RecordBatchStreamWriter(fs::path_abs(sink), schema) + RecordBatchStreamWriter(path_real(sink), schema) } #' @export @@ -161,7 +161,7 @@ RecordBatchStreamWriter.fs_path <- function(sink, schema){ #' @param sink Where to write. Can either be: #' #' - character vector of length one -#' - a [file path][fs::path_abs()] +#' - a [file path][path_real()] #' - [arrow::io::OutputStream][arrow__io__OutputStream] #' #' @param schema The [arrow::Schema][arrow__Schema] for data to be written. @@ -175,7 +175,7 @@ RecordBatchFileWriter <- function(sink, schema) { #' @export RecordBatchFileWriter.character <- function(sink, schema){ - RecordBatchFileWriter(fs::path_abs(sink), schema) + RecordBatchFileWriter(path_real(sink), schema) } #' @export diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 86e909e7329..62e652b6c75 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -16,6 +16,7 @@ # under the License. #' @importFrom R6 R6Class +#' @importFrom fs path_real #' @importFrom purrr map map_int map2 #' @importFrom assertthat assert_that #' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos diff --git a/r/R/compression.R b/r/R/compression.R index e10fef1bd2e..175c7d215a3 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -50,7 +50,7 @@ CompressedOutputStream <- function(stream, codec = compression_codec("GZIP")){ #' @export CompressedOutputStream.character <- function(stream, codec = compression_codec("GZIP")){ - CompressedOutputStream(fs::path_abs(stream), codec = codec) + CompressedOutputStream(path_real(stream), codec = codec) } #' @export @@ -75,7 +75,7 @@ CompressedInputStream <- function(stream, codec = codec("GZIP")){ #' @export CompressedInputStream.character <- function(stream, codec = compression_codec("GZIP")){ - CompressedInputStream(fs::path_abs(stream), codec = codec) + CompressedInputStream(path_real(stream), codec = codec) } #' @export diff --git a/r/R/csv.R b/r/R/csv.R index 5b5d36cbe0b..1300551c86b 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -348,7 +348,7 @@ csv_table_reader.default <- function(file, convert_options = csv_convert_options(), ... ){ - csv_table_reader(fs::path_abs(file), + csv_table_reader(path_real(file), read_options = read_options, parse_options = parse_options, convert_options = convert_options, diff --git a/r/R/feather.R b/r/R/feather.R index 8bcbe2b80e0..97b33d19b77 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -109,7 +109,7 @@ write_feather_RecordBatch <- function(data, stream) { #' @export #' @method write_feather_RecordBatch character `write_feather_RecordBatch.character` <- function(data, stream) { - `write_feather_RecordBatch.fs_path`(data, fs::path_abs(stream)) + `write_feather_RecordBatch.fs_path`(data, path_real(stream)) } #' @export @@ -139,7 +139,7 @@ FeatherTableReader <- function(file, mmap = TRUE, ...){ #' @export FeatherTableReader.character <- function(file, mmap = TRUE, ...) { - FeatherTableReader(fs::path_abs(file), mmap = mmap, ...) + FeatherTableReader(path_real(file), mmap = mmap, ...) } #' @export diff --git a/r/R/io.R b/r/R/io.R index 5d7d99cb5e8..e3b234d6947 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -253,7 +253,7 @@ #' #' @export mmap_create <- function(path, size) { - shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(fs::path_abs(path), size)) + shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(path_real(path), size)) } #' Open a memory mapped file @@ -264,7 +264,7 @@ mmap_create <- function(path, size) { #' @export mmap_open <- function(path, mode = c("read", "write", "readwrite")) { mode <- match(match.arg(mode), c("read", "write", "readwrite")) - 1L - shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(fs::path_abs(path), mode)) + shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(path_real(path), mode)) } #' open a [arrow::io::ReadableFile][arrow__io__ReadableFile] @@ -275,7 +275,7 @@ mmap_open <- function(path, mode = c("read", "write", "readwrite")) { #' #' @export ReadableFile <- function(path) { - shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(fs::path_abs(path))) + shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(path_real(path))) } #' Open a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] diff --git a/r/R/json.R b/r/R/json.R index dce130e61a1..f1cd8874096 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -87,7 +87,7 @@ json_table_reader.default <- function(file, parse_options = json_parse_options(), ... ){ - json_table_reader(fs::path_abs(file), + json_table_reader(path_real(file), read_options = read_options, parse_options = parse_options, ... diff --git a/r/R/parquet.R b/r/R/parquet.R index 4fcff6b7b1b..72e1d01ca5b 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -95,7 +95,7 @@ parquet_file_reader.fs_path <- function(file, props = parquet_arrow_reader_prope #' @export parquet_file_reader.character <- function(file, props = parquet_arrow_reader_properties(), memory_map = TRUE, ...) { - parquet_file_reader(fs::path_abs(file), props = parquet_arrow_reader_properties(), memory_map = memory_map, ...) + parquet_file_reader(path_real(file), props = parquet_arrow_reader_properties(), memory_map = memory_map, ...) } #' @export diff --git a/r/R/read_table.R b/r/R/read_table.R index ff2c5dd8c17..8cccafcfa9a 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -27,7 +27,7 @@ #' read an [arrow::Table][arrow__Table] from the remaining record batches #' in the reader #' -#' - a string or [file path][fs::path_abs()]: interpret the file as an arrow +#' - a string or [file path][fs::path_real()]: interpret the file as an arrow #' binary file format, and uses a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] #' to process it. #' @@ -62,7 +62,7 @@ read_table <- function(stream){ #' @export read_table.character <- function(stream){ assert_that(length(stream) == 1L) - read_table(fs::path_abs(stream)) + read_table(path_real(stream)) } #' @export diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index 435fa82a40f..85399588796 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -36,7 +36,7 @@ to_arrow <- function(x) { #' of `x` is used. The stream is left open. This uses the streaming format #' or the binary file format depending on the type of the writer. #' -#' - A string or [file path][fs::path_abs()]: `x` is serialized with +#' - A string or [file path][path_real()]: `x` is serialized with #' a [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e. #' using the binary file format. #' @@ -61,7 +61,7 @@ write_arrow <- function(x, stream, ...) { #' @export `write_arrow.character` <- function(x, stream, ...) { - write_arrow(x, fs::path_abs(stream), ...) + write_arrow(x, path_real(stream), ...) } #' @export diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd index 90858304b0b..869e63bc0f5 100644 --- a/r/man/RecordBatchFileWriter.Rd +++ b/r/man/RecordBatchFileWriter.Rd @@ -10,7 +10,7 @@ RecordBatchFileWriter(sink, schema) \item{sink}{Where to write. Can either be: \itemize{ \item character vector of length one -\item a \link[fs:path_abs]{file path} +\item a \link[=path_real]{file path} \item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} }} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd index b9183a80719..2cac5113a86 100644 --- a/r/man/RecordBatchStreamWriter.Rd +++ b/r/man/RecordBatchStreamWriter.Rd @@ -9,8 +9,8 @@ RecordBatchStreamWriter(sink, schema) \arguments{ \item{sink}{Where to write. Can either be: \itemize{ -\item A string, meant as a file path, passed to \code{\link[fs:path_abs]{fs::path_abs()}} -\item a \link[fs:path_abs]{file path} +\item A string, meant as a file path, passed to \code{\link[=path_real]{path_real()}} +\item a \link[=path_real]{file path} \item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} }} diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index c5863c1d43e..2ee4b30aa86 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -18,7 +18,7 @@ from all the record batches in the reader \item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader}: read an \link[=arrow__Table]{arrow::Table} from the remaining record batches in the reader -\item a string or \link[fs:path_abs]{file path}: interpret the file as an arrow +\item a string or \link[fs:path_real]{file path}: interpret the file as an arrow binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} to process it. \item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index 7169ece6e77..c0ff06eb5dd 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -14,7 +14,7 @@ write_arrow(x, stream, ...) \item A \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}: the \code{$write()} of \code{x} is used. The stream is left open. This uses the streaming format or the binary file format depending on the type of the writer. -\item A string or \link[fs:path_abs]{file path}: \code{x} is serialized with +\item A string or \link[=path_real]{file path}: \code{x} is serialized with a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter}, i.e. using the binary file format. \item A raw vector: typically of length zero (its data is ignored, and only used for From 8e003e0a15e380699772c8e3bf12c7fc76ebeab3 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 22 Aug 2019 10:12:55 -0700 Subject: [PATCH 2/4] Update NEWS --- r/NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/r/NEWS.md b/r/NEWS.md index 5ab4e18794b..3cd2f32b362 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -21,6 +21,7 @@ * `read_csv_arrow()` supports more parsing options, including `col_names` and `skip` * `read_parquet()` and `read_feather()` can ingest data from a `raw` vector ([ARROW-6278](https://issues.apache.org/jira/browse/ARROW-6278)) +* File readers now properly handle paths that need expanding, such as `~/file.parquet` ([ARROW-6323](https://issues.apache.org/jira/browse/ARROW-6323)) # arrow 0.14.1 From 7f51e4c18e21aaa54d7d00844dcfc28552c760fa Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 22 Aug 2019 13:10:06 -0700 Subject: [PATCH 3/4] Use base::normalizePath instead --- r/NAMESPACE | 2 +- r/R/RecordBatchWriter.R | 6 +++--- r/R/arrow-package.R | 4 +++- r/R/read_table.R | 2 +- r/R/write_arrow.R | 2 +- r/man/RecordBatchFileWriter.Rd | 2 +- r/man/RecordBatchStreamWriter.Rd | 4 ++-- r/man/read_table.Rd | 2 +- r/man/write_arrow.Rd | 2 +- 9 files changed, 14 insertions(+), 12 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index fbdbfb2dcda..d11d95be91d 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -195,7 +195,7 @@ importFrom(Rcpp,sourceCpp) importFrom(assertthat,assert_that) importFrom(bit64,print.integer64) importFrom(bit64,str.integer64) -importFrom(fs,path_real) +importFrom(fs,path_abs) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_int) diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 490ef8b1ced..050bf3c9e5e 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -94,8 +94,8 @@ #' #' @param sink Where to write. Can either be: #' -#' - A string, meant as a file path, passed to [path_real()] -#' - a [file path][path_real()] +#' - A string, meant as a file path, passed to [fs::path_abs()] +#' - a [file path][fs::path_abs()] #' - [arrow::io::OutputStream][arrow__io__OutputStream] #' #' @param schema The [arrow::Schema][arrow__Schema] for data to be written. @@ -161,7 +161,7 @@ RecordBatchStreamWriter.fs_path <- function(sink, schema){ #' @param sink Where to write. Can either be: #' #' - character vector of length one -#' - a [file path][path_real()] +#' - a [file path][fs::path_abs()] #' - [arrow::io::OutputStream][arrow__io__OutputStream] #' #' @param schema The [arrow::Schema][arrow__Schema] for data to be written. diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 62e652b6c75..c95bb633a92 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -16,7 +16,7 @@ # under the License. #' @importFrom R6 R6Class -#' @importFrom fs path_real +#' @importFrom fs path_abs #' @importFrom purrr map map_int map2 #' @importFrom assertthat assert_that #' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos @@ -43,3 +43,5 @@ arrow_available <- function() { option_use_threads <- function() { !is_false(getOption("arrow.use_threads")) } + +path_real <- function(x) path_abs(normalizePath(x, mustWork = FALSE)) diff --git a/r/R/read_table.R b/r/R/read_table.R index 8cccafcfa9a..3c83e50307a 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -27,7 +27,7 @@ #' read an [arrow::Table][arrow__Table] from the remaining record batches #' in the reader #' -#' - a string or [file path][fs::path_real()]: interpret the file as an arrow +#' - a string or [file path][fs::path_abs()]: interpret the file as an arrow #' binary file format, and uses a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] #' to process it. #' diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index 85399588796..7d8568f55f1 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -36,7 +36,7 @@ to_arrow <- function(x) { #' of `x` is used. The stream is left open. This uses the streaming format #' or the binary file format depending on the type of the writer. #' -#' - A string or [file path][path_real()]: `x` is serialized with +#' - A string or [file path][fs::path_abs()]: `x` is serialized with #' a [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e. #' using the binary file format. #' diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd index 869e63bc0f5..90858304b0b 100644 --- a/r/man/RecordBatchFileWriter.Rd +++ b/r/man/RecordBatchFileWriter.Rd @@ -10,7 +10,7 @@ RecordBatchFileWriter(sink, schema) \item{sink}{Where to write. Can either be: \itemize{ \item character vector of length one -\item a \link[=path_real]{file path} +\item a \link[fs:path_abs]{file path} \item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} }} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd index 2cac5113a86..b9183a80719 100644 --- a/r/man/RecordBatchStreamWriter.Rd +++ b/r/man/RecordBatchStreamWriter.Rd @@ -9,8 +9,8 @@ RecordBatchStreamWriter(sink, schema) \arguments{ \item{sink}{Where to write. Can either be: \itemize{ -\item A string, meant as a file path, passed to \code{\link[=path_real]{path_real()}} -\item a \link[=path_real]{file path} +\item A string, meant as a file path, passed to \code{\link[fs:path_abs]{fs::path_abs()}} +\item a \link[fs:path_abs]{file path} \item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} }} diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index 2ee4b30aa86..c5863c1d43e 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -18,7 +18,7 @@ from all the record batches in the reader \item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader}: read an \link[=arrow__Table]{arrow::Table} from the remaining record batches in the reader -\item a string or \link[fs:path_real]{file path}: interpret the file as an arrow +\item a string or \link[fs:path_abs]{file path}: interpret the file as an arrow binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} to process it. \item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index c0ff06eb5dd..7169ece6e77 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -14,7 +14,7 @@ write_arrow(x, stream, ...) \item A \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}: the \code{$write()} of \code{x} is used. The stream is left open. This uses the streaming format or the binary file format depending on the type of the writer. -\item A string or \link[=path_real]{file path}: \code{x} is serialized with +\item A string or \link[fs:path_abs]{file path}: \code{x} is serialized with a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter}, i.e. using the binary file format. \item A raw vector: typically of length zero (its data is ignored, and only used for From 635d954bbc0b729b974402a8c0146cdb860217d8 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 22 Aug 2019 13:58:06 -0700 Subject: [PATCH 4/4] Consolidate file path massaging logic and remove fs dependency --- r/DESCRIPTION | 2 +- r/NAMESPACE | 13 ------------- r/R/RecordBatchReader.R | 5 ----- r/R/RecordBatchWriter.R | 16 ++-------------- r/R/arrow-package.R | 3 --- r/R/compression.R | 10 ---------- r/R/csv.R | 15 --------------- r/R/feather.R | 15 ++------------- r/R/io.R | 8 ++++---- r/R/json.R | 13 ------------- r/R/parquet.R | 15 +++++++-------- r/R/read_table.R | 9 ++------- r/R/write_arrow.R | 7 +------ r/man/FeatherTableReader.Rd | 2 +- r/man/RecordBatchFileWriter.Rd | 3 +-- r/man/RecordBatchStreamWriter.Rd | 3 +-- r/man/read_table.Rd | 2 +- r/man/write_arrow.Rd | 2 +- 18 files changed, 24 insertions(+), 119 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 2df07e68744..d2ecde3b1c0 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -28,7 +28,6 @@ LinkingTo: Imports: assertthat, bit64, - fs, purrr, R6, Rcpp (>= 1.0.1), @@ -39,6 +38,7 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 6.1.1 Suggests: covr, + fs, hms, lubridate, rmarkdown, diff --git a/r/NAMESPACE b/r/NAMESPACE index d11d95be91d..d97fa4c2b58 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -11,14 +11,11 @@ S3method(BufferReader,"arrow::Buffer") S3method(BufferReader,default) S3method(CompressedInputStream,"arrow::io::InputStream") S3method(CompressedInputStream,character) -S3method(CompressedInputStream,fs_path) S3method(CompressedOutputStream,"arrow::io::OutputStream") S3method(CompressedOutputStream,character) -S3method(CompressedOutputStream,fs_path) S3method(FeatherTableReader,"arrow::io::RandomAccessFile") S3method(FeatherTableReader,"arrow::ipc::feather::TableReader") S3method(FeatherTableReader,character) -S3method(FeatherTableReader,fs_path) S3method(FeatherTableReader,raw) S3method(FeatherTableWriter,"arrow::io::OutputStream") S3method(FixedSizeBufferWriter,"arrow::Buffer") @@ -28,17 +25,14 @@ S3method(MessageReader,default) S3method(RecordBatchFileReader,"arrow::Buffer") S3method(RecordBatchFileReader,"arrow::io::RandomAccessFile") S3method(RecordBatchFileReader,character) -S3method(RecordBatchFileReader,fs_path) S3method(RecordBatchFileReader,raw) S3method(RecordBatchFileWriter,"arrow::io::OutputStream") S3method(RecordBatchFileWriter,character) -S3method(RecordBatchFileWriter,fs_path) S3method(RecordBatchStreamReader,"arrow::Buffer") S3method(RecordBatchStreamReader,"arrow::io::InputStream") S3method(RecordBatchStreamReader,raw) S3method(RecordBatchStreamWriter,"arrow::io::OutputStream") S3method(RecordBatchStreamWriter,character) -S3method(RecordBatchStreamWriter,fs_path) S3method(as.data.frame,"arrow::RecordBatch") S3method(as.data.frame,"arrow::Table") S3method(as.raw,"arrow::Buffer") @@ -52,19 +46,16 @@ S3method(csv_table_reader,"arrow::csv::TableReader") S3method(csv_table_reader,"arrow::io::InputStream") S3method(csv_table_reader,character) S3method(csv_table_reader,default) -S3method(csv_table_reader,fs_path) S3method(dim,"arrow::RecordBatch") S3method(dim,"arrow::Table") S3method(json_table_reader,"arrow::io::InputStream") S3method(json_table_reader,"arrow::json::TableReader") S3method(json_table_reader,character) S3method(json_table_reader,default) -S3method(json_table_reader,fs_path) S3method(length,"arrow::Array") S3method(names,"arrow::RecordBatch") S3method(parquet_file_reader,"arrow::io::RandomAccessFile") S3method(parquet_file_reader,character) -S3method(parquet_file_reader,fs_path) S3method(parquet_file_reader,raw) S3method(print,"arrow-enum") S3method(read_message,"arrow::io::InputStream") @@ -81,7 +72,6 @@ S3method(read_schema,raw) S3method(read_table,"arrow::ipc::RecordBatchFileReader") S3method(read_table,"arrow::ipc::RecordBatchStreamReader") S3method(read_table,character) -S3method(read_table,fs_path) S3method(read_table,raw) S3method(type,"arrow::Array") S3method(type,"arrow::ChunkedArray") @@ -89,7 +79,6 @@ S3method(type,"arrow::Column") S3method(type,default) S3method(write_arrow,"arrow::ipc::RecordBatchWriter") S3method(write_arrow,character) -S3method(write_arrow,fs_path) S3method(write_arrow,raw) S3method(write_feather,"arrow::RecordBatch") S3method(write_feather,data.frame) @@ -97,7 +86,6 @@ S3method(write_feather,default) S3method(write_feather_RecordBatch,"arrow::io::OutputStream") S3method(write_feather_RecordBatch,character) S3method(write_feather_RecordBatch,default) -S3method(write_feather_RecordBatch,fs_path) export(BufferOutputStream) export(BufferReader) export(CompressedInputStream) @@ -195,7 +183,6 @@ importFrom(Rcpp,sourceCpp) importFrom(assertthat,assert_that) importFrom(bit64,print.integer64) importFrom(bit64,str.integer64) -importFrom(fs,path_abs) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_int) diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 12ac3313553..ae3bd27a780 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -124,11 +124,6 @@ RecordBatchFileReader <- function(file) { #' @export `RecordBatchFileReader.character` <- function(file) { assert_that(length(file) == 1L) - RecordBatchFileReader(path_real(file)) -} - -#' @export -`RecordBatchFileReader.fs_path` <- function(file) { RecordBatchFileReader(ReadableFile(file)) } diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 050bf3c9e5e..eb0a9c61d09 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -94,8 +94,7 @@ #' #' @param sink Where to write. Can either be: #' -#' - A string, meant as a file path, passed to [fs::path_abs()] -#' - a [file path][fs::path_abs()] +#' - A string file path #' - [arrow::io::OutputStream][arrow__io__OutputStream] #' #' @param schema The [arrow::Schema][arrow__Schema] for data to be written. @@ -109,11 +108,6 @@ RecordBatchStreamWriter <- function(sink, schema) { #' @export RecordBatchStreamWriter.character <- function(sink, schema){ - RecordBatchStreamWriter(path_real(sink), schema) -} - -#' @export -RecordBatchStreamWriter.fs_path <- function(sink, schema){ RecordBatchStreamWriter(FileOutputStream(sink), schema) } @@ -160,8 +154,7 @@ RecordBatchStreamWriter.fs_path <- function(sink, schema){ #' #' @param sink Where to write. Can either be: #' -#' - character vector of length one -#' - a [file path][fs::path_abs()] +#' - a string file path #' - [arrow::io::OutputStream][arrow__io__OutputStream] #' #' @param schema The [arrow::Schema][arrow__Schema] for data to be written. @@ -175,11 +168,6 @@ RecordBatchFileWriter <- function(sink, schema) { #' @export RecordBatchFileWriter.character <- function(sink, schema){ - RecordBatchFileWriter(path_real(sink), schema) -} - -#' @export -RecordBatchFileWriter.fs_path <- function(sink, schema){ RecordBatchFileWriter(FileOutputStream(sink), schema) } diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index c95bb633a92..86e909e7329 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -16,7 +16,6 @@ # under the License. #' @importFrom R6 R6Class -#' @importFrom fs path_abs #' @importFrom purrr map map_int map2 #' @importFrom assertthat assert_that #' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos @@ -43,5 +42,3 @@ arrow_available <- function() { option_use_threads <- function() { !is_false(getOption("arrow.use_threads")) } - -path_real <- function(x) path_abs(normalizePath(x, mustWork = FALSE)) diff --git a/r/R/compression.R b/r/R/compression.R index 175c7d215a3..399fcb81db2 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -50,11 +50,6 @@ CompressedOutputStream <- function(stream, codec = compression_codec("GZIP")){ #' @export CompressedOutputStream.character <- function(stream, codec = compression_codec("GZIP")){ - CompressedOutputStream(path_real(stream), codec = codec) -} - -#' @export -CompressedOutputStream.fs_path <- function(stream, codec = compression_codec("GZIP")){ CompressedOutputStream(FileOutputStream(stream), codec = codec) } @@ -75,11 +70,6 @@ CompressedInputStream <- function(stream, codec = codec("GZIP")){ #' @export CompressedInputStream.character <- function(stream, codec = compression_codec("GZIP")){ - CompressedInputStream(path_real(stream), codec = codec) -} - -#' @export -CompressedInputStream.fs_path <- function(stream, codec = compression_codec("GZIP")){ CompressedInputStream(ReadableFile(stream), codec = codec) } diff --git a/r/R/csv.R b/r/R/csv.R index 1300551c86b..3c5e5b7e8e4 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -347,21 +347,6 @@ csv_table_reader.default <- function(file, parse_options = csv_parse_options(), convert_options = csv_convert_options(), ... -){ - csv_table_reader(path_real(file), - read_options = read_options, - parse_options = parse_options, - convert_options = convert_options, - ... - ) -} - -#' @export -`csv_table_reader.fs_path` <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... ){ csv_table_reader(mmap_open(file), read_options = read_options, diff --git a/r/R/feather.R b/r/R/feather.R index 97b33d19b77..48123f7cdfd 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -108,13 +108,7 @@ write_feather_RecordBatch <- function(data, stream) { #' @export #' @method write_feather_RecordBatch character -`write_feather_RecordBatch.character` <- function(data, stream) { - `write_feather_RecordBatch.fs_path`(data, path_real(stream)) -} - -#' @export -#' @method write_feather_RecordBatch fs_path -`write_feather_RecordBatch.fs_path` <- function(data, stream) { +write_feather_RecordBatch.character <- function(data, stream) { file_stream <- FileOutputStream(stream) on.exit(file_stream$close()) `write_feather_RecordBatch.arrow::io::OutputStream`(data, file_stream) @@ -129,7 +123,7 @@ write_feather_RecordBatch <- function(data, stream) { #' A `arrow::ipc::feather::TableReader` to read from a file #' #' @param file A file path or `arrow::io::RandomAccessFile` -#' @param mmap Is the file memory mapped (applicable to the `character` and `fs_path` methods) +#' @param mmap Is the file memory mapped (applicable to the `character` method) #' @param ... extra parameters #' #' @export @@ -139,11 +133,6 @@ FeatherTableReader <- function(file, mmap = TRUE, ...){ #' @export FeatherTableReader.character <- function(file, mmap = TRUE, ...) { - FeatherTableReader(path_real(file), mmap = mmap, ...) -} - -#' @export -FeatherTableReader.fs_path <- function(file, mmap = TRUE, ...) { if (isTRUE(mmap)) { stream <- mmap_open(file, ...) } else { diff --git a/r/R/io.R b/r/R/io.R index e3b234d6947..3169a180eb9 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -253,7 +253,7 @@ #' #' @export mmap_create <- function(path, size) { - shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(path_real(path), size)) + shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(normalizePath(path, mustWork = FALSE), size)) } #' Open a memory mapped file @@ -264,7 +264,7 @@ mmap_create <- function(path, size) { #' @export mmap_open <- function(path, mode = c("read", "write", "readwrite")) { mode <- match(match.arg(mode), c("read", "write", "readwrite")) - 1L - shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(path_real(path), mode)) + shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(normalizePath(path), mode)) } #' open a [arrow::io::ReadableFile][arrow__io__ReadableFile] @@ -275,7 +275,7 @@ mmap_open <- function(path, mode = c("read", "write", "readwrite")) { #' #' @export ReadableFile <- function(path) { - shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(path_real(path))) + shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(normalizePath(path))) } #' Open a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] @@ -286,7 +286,7 @@ ReadableFile <- function(path) { #' #' @export FileOutputStream <- function(path) { - shared_ptr(`arrow::io::FileOutputStream`, io___FileOutputStream__Open(path)) + shared_ptr(`arrow::io::FileOutputStream`, io___FileOutputStream__Open(normalizePath(path, mustWork = FALSE))) } #' Open a [arrow::io::MockOutputStream][arrow__io__MockOutputStream] diff --git a/r/R/json.R b/r/R/json.R index f1cd8874096..9573ff547b3 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -86,19 +86,6 @@ json_table_reader.default <- function(file, read_options = json_read_options(), parse_options = json_parse_options(), ... -){ - json_table_reader(path_real(file), - read_options = read_options, - parse_options = parse_options, - ... - ) -} - -#' @export -`json_table_reader.fs_path` <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... ){ json_table_reader(ReadableFile(file), read_options = read_options, diff --git a/r/R/parquet.R b/r/R/parquet.R index 72e1d01ca5b..c76619c4597 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -85,7 +85,11 @@ parquet_file_reader <- function(file, props = parquet_arrow_reader_properties(), } #' @export -parquet_file_reader.fs_path <- function(file, props = parquet_arrow_reader_properties(), memory_map = TRUE, ...) { +parquet_file_reader.character <- function(file, + props = parquet_arrow_reader_properties(), + memory_map = TRUE, + ...) { + file <- normalizePath(file) if (isTRUE(memory_map)) { parquet_file_reader(mmap_open(file), props = props, ...) } else { @@ -94,13 +98,8 @@ parquet_file_reader.fs_path <- function(file, props = parquet_arrow_reader_prope } #' @export -parquet_file_reader.character <- function(file, props = parquet_arrow_reader_properties(), memory_map = TRUE, ...) { - parquet_file_reader(path_real(file), props = parquet_arrow_reader_properties(), memory_map = memory_map, ...) -} - -#' @export -parquet_file_reader.raw <- function(file, props = parquet_arrow_reader_properties(), memory_map = TRUE, ...) { - parquet_file_reader(BufferReader(file), props = parquet_arrow_reader_properties(), memory_map = memory_map, ...) +parquet_file_reader.raw <- function(file, props = parquet_arrow_reader_properties(), ...) { + parquet_file_reader(BufferReader(file), props = props, ...) } #' Read a Parquet file diff --git a/r/R/read_table.R b/r/R/read_table.R index 3c83e50307a..a05d15dff56 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -27,7 +27,7 @@ #' read an [arrow::Table][arrow__Table] from the remaining record batches #' in the reader #' -#' - a string or [file path][fs::path_abs()]: interpret the file as an arrow +#' - a string file path: interpret the file as an arrow #' binary file format, and uses a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] #' to process it. #' @@ -60,13 +60,8 @@ read_table <- function(stream){ } #' @export -read_table.character <- function(stream){ +read_table.character <- function(stream) { assert_that(length(stream) == 1L) - read_table(path_real(stream)) -} - -#' @export -read_table.fs_path <- function(stream) { stream <- ReadableFile(stream) on.exit(stream$close()) batch_reader <- RecordBatchFileReader(stream) diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index 7d8568f55f1..f57eff36c57 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -36,7 +36,7 @@ to_arrow <- function(x) { #' of `x` is used. The stream is left open. This uses the streaming format #' or the binary file format depending on the type of the writer. #' -#' - A string or [file path][fs::path_abs()]: `x` is serialized with +#' - A string file path: `x` is serialized with #' a [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e. #' using the binary file format. #' @@ -61,11 +61,6 @@ write_arrow <- function(x, stream, ...) { #' @export `write_arrow.character` <- function(x, stream, ...) { - write_arrow(x, path_real(stream), ...) -} - -#' @export -`write_arrow.fs_path` <- function(x, stream, ...) { assert_that(length(stream) == 1L) x <- to_arrow(x) file_stream <- FileOutputStream(stream) diff --git a/r/man/FeatherTableReader.Rd b/r/man/FeatherTableReader.Rd index 452291e7e61..3276628d50e 100644 --- a/r/man/FeatherTableReader.Rd +++ b/r/man/FeatherTableReader.Rd @@ -9,7 +9,7 @@ FeatherTableReader(file, mmap = TRUE, ...) \arguments{ \item{file}{A file path or \code{arrow::io::RandomAccessFile}} -\item{mmap}{Is the file memory mapped (applicable to the \code{character} and \code{fs_path} methods)} +\item{mmap}{Is the file memory mapped (applicable to the \code{character} method)} \item{...}{extra parameters} } diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd index 90858304b0b..d89578f97be 100644 --- a/r/man/RecordBatchFileWriter.Rd +++ b/r/man/RecordBatchFileWriter.Rd @@ -9,8 +9,7 @@ RecordBatchFileWriter(sink, schema) \arguments{ \item{sink}{Where to write. Can either be: \itemize{ -\item character vector of length one -\item a \link[fs:path_abs]{file path} +\item a string file path \item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} }} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd index b9183a80719..9d9bbc9ceb0 100644 --- a/r/man/RecordBatchStreamWriter.Rd +++ b/r/man/RecordBatchStreamWriter.Rd @@ -9,8 +9,7 @@ RecordBatchStreamWriter(sink, schema) \arguments{ \item{sink}{Where to write. Can either be: \itemize{ -\item A string, meant as a file path, passed to \code{\link[fs:path_abs]{fs::path_abs()}} -\item a \link[fs:path_abs]{file path} +\item A string file path \item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} }} diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index c5863c1d43e..e556b8b0773 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -18,7 +18,7 @@ from all the record batches in the reader \item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader}: read an \link[=arrow__Table]{arrow::Table} from the remaining record batches in the reader -\item a string or \link[fs:path_abs]{file path}: interpret the file as an arrow +\item a string file path: interpret the file as an arrow binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} to process it. \item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index 7169ece6e77..9ba65cb18f3 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -14,7 +14,7 @@ write_arrow(x, stream, ...) \item A \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}: the \code{$write()} of \code{x} is used. The stream is left open. This uses the streaming format or the binary file format depending on the type of the writer. -\item A string or \link[fs:path_abs]{file path}: \code{x} is serialized with +\item A string file path: \code{x} is serialized with a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter}, i.e. using the binary file format. \item A raw vector: typically of length zero (its data is ignored, and only used for