Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ jobs:
with:
fetch-depth: 0
- name: Make R tests verbose
# If you get a segfault/mysterious test Execution halted,
# make this `true` to see where it dies.
if: false
shell: cmd
run: |
cd r/tests
Expand Down
4 changes: 4 additions & 0 deletions ci/scripts/cpp_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ export ARROW_TEST_DATA=${arrow_dir}/testing/data
export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
export LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_PATH}

# By default, aws-sdk tries to contact a non-existing local ip host
# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
export AWS_EC2_METADATA_DISABLED=TRUE

case "$(uname)" in
Linux)
n_jobs=$(nproc)
Expand Down
3 changes: 3 additions & 0 deletions ci/scripts/r_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ export _R_CHECK_TESTS_NLINES_=0
export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE
export _R_CHECK_LIMIT_CORES_=FALSE
export VERSION=$(grep ^Version DESCRIPTION | sed s/Version:\ //)
# By default, aws-sdk tries to contact a non-existing local ip host
# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
export AWS_EC2_METADATA_DISABLED=TRUE

# Make sure we aren't writing to the home dir (CRAN _hates_ this but there is no official check)
BEFORE=$(ls -alh ~/)
Expand Down
1 change: 1 addition & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ export(RecordBatchFileReader)
export(RecordBatchFileWriter)
export(RecordBatchStreamReader)
export(RecordBatchStreamWriter)
export(S3FileSystem)
export(ScalarExpression)
export(Scanner)
export(ScannerBuilder)
Expand Down
16 changes: 16 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 11 additions & 23 deletions r/R/dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,9 @@ DatasetFactory <- R6Class("DatasetFactory", inherit = ArrowObject,
)
)
DatasetFactory$create <- function(x,
filesystem = c("auto", "local"),
filesystem = NULL,
format = c("parquet", "arrow", "ipc", "feather"),
partitioning = NULL,
allow_not_found = FALSE,
recursive = TRUE,
...) {
if (is_list_of(x, "DatasetFactory")) {
return(shared_ptr(DatasetFactory, dataset___UnionDatasetFactory__Make(x)))
Expand All @@ -282,21 +280,15 @@ DatasetFactory$create <- function(x,
}

if (!inherits(filesystem, "FileSystem")) {
filesystem <- match.arg(filesystem)
if (filesystem == "auto") {
# When there are other FileSystems supported, detect e.g. S3 from x
filesystem <- "local"
if (grepl("://", x)) {
fs_from_uri <- FileSystem$from_uri(x)
filesystem <- fs_from_uri$fs
x <- fs_from_uri$path
} else {
filesystem <- LocalFileSystem$create()
}
filesystem <- list(
local = LocalFileSystem
# We'll register other file systems here
)[[filesystem]]$create(...)
}
selector <- FileSelector$create(
x,
allow_not_found = allow_not_found,
recursive = recursive
)
selector <- FileSelector$create(x, allow_not_found = FALSE, recursive = TRUE)

if (is.character(format)) {
format <- FileFormat$create(match.arg(format))
Expand Down Expand Up @@ -331,8 +323,8 @@ DatasetFactory$create <- function(x,
#' a list of `DatasetFactory` objects whose datasets should be
#' grouped. If this argument is specified it will be used to construct a
#' `UnionDatasetFactory` and other arguments will be ignored.
#' @param filesystem A string identifier for the filesystem corresponding to
#' `x`. Currently only "local" is supported.
#' @param filesystem A [FileSystem] object; if omitted, the `FileSystem` will
#' be detected from `x`
#' @param format A string identifier of the format of the files in `x`.
#' Currently "parquet" and "ipc"/"arrow"/"feather" (aliases for each other)
#' are supported. For Feather, only version 2 files are supported.
Expand All @@ -348,11 +340,7 @@ DatasetFactory$create <- function(x,
#' by [hive_partition()] which parses explicit or autodetected fields from
#' Hive-style path segments
#' * `NULL` for no partitioning
#' @param allow_not_found logical: is `x` allowed to not exist? Default
#' `FALSE`. See [FileSelector].
#' @param recursive logical: should files be discovered in subdirectories of
#' `x`? Default `TRUE`.
#' @param ... Additional arguments passed to the [FileSystem] `$create()` method
#' @param ... Additional arguments, currently ignored
#' @return A `DatasetFactory` object. Pass this to [open_dataset()],
#' in a list potentially with other `DatasetFactory` objects, to create
#' a `Dataset`.
Expand Down
33 changes: 33 additions & 0 deletions r/R/filesystem.R
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,18 @@ FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = F
#' @export
FileSystem <- R6Class("FileSystem", inherit = ArrowObject,
public = list(
..dispatch = function() {
type_name <- self$type_name
if (type_name == "local") {
shared_ptr(LocalFileSystem, self$pointer())
} else if (type_name == "s3") {
shared_ptr(S3FileSystem, self$pointer())
} else if (type_name == "subtree") {
shared_ptr(SubTreeFileSystem, self$pointer())
} else {
self
}
},
GetFileInfo = function(x) {
if (inherits(x, "FileSelector")) {
map(
Expand Down Expand Up @@ -224,8 +236,16 @@ FileSystem <- R6Class("FileSystem", inherit = ArrowObject,
OpenAppendStream = function(path) {
shared_ptr(OutputStream, fs___FileSystem__OpenAppendStream(self, clean_path_rel(path)))
}
),
active = list(
type_name = function() fs___FileSystem__type_name(self)
)
)
FileSystem$from_uri <- function(uri) {
out <- fs___FileSystemFromUri(uri)
out$fs <- shared_ptr(FileSystem, out$fs)$..dispatch()
out
}

#' @usage NULL
#' @format NULL
Expand All @@ -236,6 +256,19 @@ LocalFileSystem$create <- function() {
shared_ptr(LocalFileSystem, fs___LocalFileSystem__create())
}

#' @usage NULL
#' @format NULL
#' @rdname FileSystem
#' @export
S3FileSystem <- R6Class("S3FileSystem", inherit = FileSystem)
S3FileSystem$create <- function() {
fs___EnsureS3Initialized()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should you just hide this in fs___S3FileSystem__create instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could, sure. Wasn't sure how many functions I would need to write and whether they'd all need it.

shared_ptr(S3FileSystem, fs___S3FileSystem__create())
}

arrow_with_s3 <- function() {
.Call(`_s3_available`)
}

#' @usage NULL
#' @format NULL
Expand Down
6 changes: 6 additions & 0 deletions r/configure
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ echo "#include $PKG_TEST_HEADER" | ${TEST_CMD} >/dev/null 2>&1
if [ $? -eq 0 ] || [ "$UNAME" = "Darwin" ]; then
# Always build with arrow on macOS
PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_ARROW"
# Check for features
LIB_DIR=`echo $PKG_LIBS | sed -e 's/ -l.*//' | sed -e 's/^-L//'`
grep 'set(ARROW_S3 "ON")' $LIB_DIR/cmake/arrow/ArrowOptions.cmake >/dev/null 2>&1
if [ $? -eq 0 ]; then
PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_S3"
fi
echo "PKG_CFLAGS=$PKG_CFLAGS"
echo "PKG_LIBS=$PKG_LIBS"
else
Expand Down
50 changes: 34 additions & 16 deletions r/data-raw/codegen.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,19 @@ if (packageVersion("decor") < '0.0.0.9001') {
stop("more recent version of `decor` needed, please install with `remotes::install_github('romainfrancois/decor')`")
}

decorations <- cpp_decorations() %>%
filter(decoration == "arrow::export") %>%
# the three lines below can be expressed with rap()
# more concisely
# rap( ~ decor:::parse_cpp_function(context))
mutate(functions = map(context, decor:::parse_cpp_function)) %>%
{ vec_cbind(., vec_rbind(!!!pull(., functions))) } %>%
select(-functions)

message(glue("*** > {n} functions decorated with [[arrow::export]]", n = nrow(decorations)))
get_exported_functions <- function(decorations, export_tag) {
out <- decorations %>%
filter(decoration %in% paste0(export_tag, "::export")) %>%
# the three lines below can be expressed with rap()
# more concisely
# rap( ~ decor:::parse_cpp_function(context))
mutate(functions = map(context, decor:::parse_cpp_function)) %>%
{ vec_cbind(., vec_rbind(!!!pull(., functions))) } %>%
select(-functions) %>%
mutate(decoration = sub("::export", "", decoration))
message(glue("*** > {n} functions decorated with [[{tags}::export]]", n = nrow(out), tags = paste0(export_tag, collapse = "|")))
out
}

glue_collapse_data <- function(data, ..., sep = ", ", last = "") {
res <- glue_collapse(glue_data(data, ...), sep = sep, last = last)
Expand All @@ -73,12 +76,16 @@ wrap_call <- function(name, return_type, args) {
glue::glue("\treturn Rcpp::wrap({call});")
}
}
cpp_functions_definitions <- decorations %>%
select(name, return_type, args, file, line) %>%
pmap_chr(function(name, return_type, args, file, line){

all_decorations <- cpp_decorations()
arrow_exports <- get_exported_functions(all_decorations, c("arrow", "s3"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WIll you want to do this for other component, e.g. json, csv, parquet, dataset...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this change we could if we wanted to enable building slimmer versions of the package, but I'm not planning on doing that unless there's a compelling reason to.


cpp_functions_definitions <- arrow_exports %>%
select(name, return_type, args, file, line, decoration) %>%
pmap_chr(function(name, return_type, args, file, line, decoration){
glue::glue('
// {basename(file)}
#if defined(ARROW_R_WITH_ARROW)
#if defined(ARROW_R_WITH_{toupper(decoration)})
{return_type} {name}({real_params});
RcppExport SEXP _arrow_{name}({sexp_params}){{
BEGIN_RCPP
Expand All @@ -101,7 +108,7 @@ cpp_functions_definitions <- decorations %>%
}) %>%
glue_collapse(sep = "\n")

cpp_functions_registration <- decorations %>%
cpp_functions_registration <- arrow_exports %>%
select(name, return_type, args) %>%
pmap_chr(function(name, return_type, args){
glue('\t\t{{ "_arrow_{name}", (DL_FUNC) &_arrow_{name}, {nrow(args)}}}, ')
Expand All @@ -127,8 +134,19 @@ return Rf_ScalarLogical(
);
}}

extern "C" SEXP _s3_available() {{
return Rf_ScalarLogical(
#if defined(ARROW_R_WITH_S3)
TRUE
#else
FALSE
#endif
);
}}

static const R_CallMethodDef CallEntries[] = {{
\t\t{{ "_arrow_available", (DL_FUNC)& _arrow_available, 0 }},
\t\t{{ "_s3_available", (DL_FUNC)& _s3_available, 0 }},
{cpp_functions_registration}
\t\t{{NULL, NULL, 0}}
}};
Expand All @@ -142,7 +160,7 @@ RcppExport void R_init_arrow(DllInfo* dll){{

message("*** > generated file `src/arrowExports.cpp`")

r_functions <- decorations %>%
r_functions <- arrow_exports %>%
select(name, return_type, args) %>%
pmap_chr(function(name, return_type, args) {
params <- if (nrow(args)) {
Expand Down
1 change: 1 addition & 0 deletions r/man/FileSystem.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions r/man/cpu_count.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 4 additions & 12 deletions r/man/dataset_factory.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 0 additions & 14 deletions r/man/set_cpu_count.Rd

This file was deleted.

Loading