Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
S3method("!=",ArrowObject)
S3method("$",RecordBatch)
S3method("$",Schema)
S3method("$",SubTreeFileSystem)
S3method("$",Table)
S3method("==",ArrowObject)
S3method("[",Array)
Expand Down Expand Up @@ -188,6 +189,7 @@ export(TimestampParser)
export(Type)
export(UnionDataset)
export(arrow_available)
export(arrow_with_s3)
export(binary)
export(bool)
export(boolean)
Expand All @@ -196,6 +198,7 @@ export(cast_options)
export(chunked_array)
export(codec_is_available)
export(contains)
export(copy_files)
export(cpu_count)
export(dataset_factory)
export(date32)
Expand Down Expand Up @@ -249,6 +252,7 @@ export(read_parquet)
export(read_schema)
export(read_tsv_arrow)
export(record_batch)
export(s3_bucket)
export(schema)
export(set_cpu_count)
export(starts_with)
Expand Down
18 changes: 14 additions & 4 deletions r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,28 @@

#' Is the C++ Arrow library available?
#'
#' You won't generally need to call this function, but it's here in case it
#' helps for development purposes.
#' You won't generally need to call these function, but they're made available
#' for diagnostic purposes.
#' @return `TRUE` or `FALSE` depending on whether the package was installed
#' with the Arrow C++ library. If `FALSE`, you'll need to install the C++
#' library and then reinstall the R package. See [install_arrow()] for help.
#' with the Arrow C++ library (check with `arrow_available()`) or with S3
#' support enabled (check with `arrow_with_s3()`).
#' @export
#' @examples
#' arrow_available()
#' arrow_with_s3()
#' @seealso If either of these are `FALSE`, see
#' `vignette("install", package = "arrow")` for guidance on reinstalling the
#' package.
arrow_available <- function() {
.Call(`_arrow_available`)
}

#' @rdname arrow_available
#' @export
arrow_with_s3 <- function() {
.Call(`_s3_available`)
}

option_use_threads <- function() {
!is_false(getOption("arrow.use_threads"))
}
Expand Down
12 changes: 12 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 3 additions & 5 deletions r/R/csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@
#' `col_names`, and the CSV file has a header row that would otherwise be used
#' to idenfity column names, you'll need to add `skip = 1` to skip that row.
#'
#' @param file A character file name or URI, `raw` vector, or an Arrow input stream.
#' @param file A character file name or URI, `raw` vector, an Arrow input stream,
#' or a `FileSystem` with path (`SubTreeFileSystem`).
#' If a file name, a memory-mapped Arrow [InputStream] will be opened and
#' closed when finished; compression will be detected from the file extension
#' and handled automatically. If an input stream is provided, it will be left
Expand Down Expand Up @@ -123,8 +124,6 @@
#' parsing options provided in other arguments (e.g. `delim`, `quote`, etc.).
#' @param convert_options see [file reader options][CsvReadOptions]
#' @param read_options see [file reader options][CsvReadOptions]
#' @param filesystem A [FileSystem] where `file` can be found if it is a
#' string file path; default is the local file system
#' @param as_data_frame Should the function return a `data.frame` (default) or
#' an Arrow [Table]?
#'
Expand Down Expand Up @@ -156,7 +155,6 @@ read_delim_arrow <- function(file,
parse_options = NULL,
convert_options = NULL,
read_options = NULL,
filesystem = NULL,
as_data_frame = TRUE,
timestamp_parsers = NULL) {
if (inherits(schema, "Schema")) {
Expand Down Expand Up @@ -186,7 +184,7 @@ read_delim_arrow <- function(file,
}

if (!inherits(file, "InputStream")) {
file <- make_readable_file(file, filesystem = filesystem)
file <- make_readable_file(file)
on.exit(file$close())
}
reader <- CsvTableReader$create(
Expand Down
3 changes: 0 additions & 3 deletions r/R/dataset-factory.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ DatasetFactory$create <- function(x,
if (is_list_of(x, "DatasetFactory")) {
return(shared_ptr(DatasetFactory, dataset___UnionDatasetFactory__Make(x)))
}
if (!is.string(x)) {
stop("'x' must be a string or a list of DatasetFactory", call. = FALSE)
}

path_and_fs <- get_path_and_filesystem(x, filesystem)
selector <- FileSelector$create(path_and_fs$path, allow_not_found = FALSE, recursive = TRUE)
Expand Down
9 changes: 3 additions & 6 deletions r/R/dataset-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
#' and `group_by()` operations done on the dataset. `filter()` queries will be
#' applied to restrict written rows.
#' Note that `select()`-ed columns may not be renamed.
#' @param path string path or URI to a directory to write to (directory will be
#' created if it does not exist)
#' @param path string path, URI, or `SubTreeFileSystem` referencing a directory
#' to write to (directory will be created if it does not exist)
#' @param format file format to write the dataset to. Currently supported
#' formats are "feather" (aka "ipc") and "parquet". Default is to write to the
#' same format as `dataset`.
Expand All @@ -41,8 +41,6 @@
#' will yield `"part-0.feather", ...`.
#' @param hive_style logical: write partition segments as Hive-style
#' (`key1=value1/key2=value2/file.ext`) or as just bare values. Default is `TRUE`.
#' @param filesystem A [FileSystem] where the dataset should be written if it is a
#' string file path; default is the local file system
#' @param ... additional format-specific arguments. For available Parquet
#' options, see [write_parquet()].
#' @return The input `dataset`, invisibly
Expand All @@ -53,7 +51,6 @@ write_dataset <- function(dataset,
partitioning = dplyr::group_vars(dataset),
basename_template = paste0("part-{i}.", as.character(format)),
hive_style = TRUE,
filesystem = NULL,
...) {
if (inherits(dataset, "arrow_dplyr_query")) {
# We can select a subset of columns but we can't rename them
Expand All @@ -79,7 +76,7 @@ write_dataset <- function(dataset,
}
}

path_and_fs <- get_path_and_filesystem(path, filesystem)
path_and_fs <- get_path_and_filesystem(path)
options <- FileWriteOptions$create(format, table = scanner, ...)

dataset___Dataset__Write(options, path_and_fs$fs, path_and_fs$path,
Expand Down
15 changes: 6 additions & 9 deletions r/R/feather.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@
#' and the version 2 specification, which is the Apache Arrow IPC file format.
#'
#' @param x `data.frame`, [RecordBatch], or [Table]
#' @param sink A string file path, URI, or [OutputStream]
#' @param filesystem A [FileSystem] where `sink` should be written if it is a
#' string file path; default is the local file system
#' @param sink A string file path, URI, or [OutputStream], or path in a file
#' system (`SubTreeFileSystem`)
#' @param version integer Feather file version. Version 2 is the current.
#' Version 1 is the more limited legacy format.
#' @param chunk_size For V2 files, the number of rows that each chunk of data
Expand Down Expand Up @@ -54,7 +53,6 @@
#' @include arrow-package.R
write_feather <- function(x,
sink,
filesystem = NULL,
version = 2,
chunk_size = 65536L,
compression = c("default", "lz4", "uncompressed", "zstd"),
Expand Down Expand Up @@ -108,11 +106,10 @@ write_feather <- function(x,
}
assert_is(x, "Table")

if (is.string(sink)) {
sink <- make_output_stream(sink, filesystem)
if (!inherits(sink, "OutputStream")) {
sink <- make_output_stream(sink)
on.exit(sink$close())
}
assert_is(sink, "OutputStream")
ipc___WriteFeather__Table(sink, x, version, chunk_size, compression, compression_level)
invisible(x_out)
}
Expand Down Expand Up @@ -144,9 +141,9 @@ write_feather <- function(x,
#' # Can select columns
#' df <- read_feather(tf, col_select = starts_with("d"))
#' }
read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, filesystem = NULL, ...) {
read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) {
if (!inherits(file, "RandomAccessFile")) {
file <- make_readable_file(file, filesystem = filesystem)
file <- make_readable_file(file)
on.exit(file$close())
}
reader <- FeatherReader$create(file, ...)
Expand Down
Loading