Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ importFrom(rlang,"%||%")
importFrom(rlang,.data)
importFrom(rlang,abort)
importFrom(rlang,as_label)
importFrom(rlang,caller_env)
importFrom(rlang,dots_n)
importFrom(rlang,enexpr)
importFrom(rlang,enexprs)
Expand Down
2 changes: 1 addition & 1 deletion r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#' @importFrom R6 R6Class
#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep
#' @importFrom assertthat assert_that is.string
#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr
#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env
#' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
#' @useDynLib arrow, .registration = TRUE
#' @keywords internal
Expand Down
4 changes: 4 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions r/R/chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowDatum,
public = list(
length = function() ChunkedArray__length(self),
type_id = function() ChunkedArray__type(self)$id,
chunk = function(i) Array$create(ChunkedArray__chunk(self, i)),
as_vector = function() ChunkedArray__as_vector(self),
Slice = function(offset, length = NULL){
Expand Down
3 changes: 3 additions & 0 deletions r/R/dplyr-eval.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ arrow_mask <- function(.data) {
f_env[[f]] <- fail
}

# Assign the schema to the expressions
map(.data$selected_columns, ~(.$schema <- .data$.data$schema))

# Add the column references and make the mask
out <- new_data_mask(
new_environment(.data$selected_columns, parent = f_env),
Expand Down
75 changes: 75 additions & 0 deletions r/R/dplyr-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,30 @@ nse_funcs$cast <- function(x, target_type, safe = TRUE, ...) {
Expression$create("cast", x, options = opts)
}

nse_funcs$is <- function(object, class2) {
if (is.string(class2)) {
switch(class2,
# for R data types, pass off to is.*() functions
character = nse_funcs$is.character(object),
numeric = nse_funcs$is.numeric(object),
integer = nse_funcs$is.integer(object),
integer64 = nse_funcs$is.integer64(object),
logical = nse_funcs$is.logical(object),
factor = nse_funcs$is.factor(object),
list = nse_funcs$is.list(object),
# for Arrow data types, compare class2 with object$type()$ToString(),
# but first strip off any parameters to only compare the top-level data
# type, and canonicalize class2
sub("^([^([<]+).*$", "\\1", object$type()$ToString()) ==
canonical_type_str(class2)
)
} else if (inherits(class2, "DataType")) {
object$type() == as_type(class2)
} else {
stop("Second argument to is() is not a string or DataType", call. = FALSE)
}
}

nse_funcs$dictionary_encode <- function(x,
null_encoding_behavior = c("mask", "encode")) {
behavior <- toupper(match.arg(null_encoding_behavior))
Expand Down Expand Up @@ -121,6 +145,57 @@ nse_funcs$as.numeric <- function(x) {
Expression$create("cast", x, options = cast_options(to_type = float64()))
}

# is.* type functions
nse_funcs$is.character <- function(x) {
x$type_id() %in% Type[c("STRING", "LARGE_STRING")]
}
nse_funcs$is.numeric <- function(x) {
x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
"UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE",
"DECIMAL", "DECIMAL256")]
}
nse_funcs$is.double <- function(x) {
x$type_id() == Type["DOUBLE"]
}
nse_funcs$is.integer <- function(x) {
x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
"UINT64", "INT64")]
}
nse_funcs$is.integer64 <- function(x) {
x$type_id() == Type["INT64"]
}
nse_funcs$is.logical <- function(x) {
x$type_id() == Type["BOOL"]
}
nse_funcs$is.factor <- function(x) {
x$type_id() == Type["DICTIONARY"]
}
nse_funcs$is.list <- function(x) {
x$type_id() %in% Type[c("LIST", "FIXED_SIZE_LIST", "LARGE_LIST")]
}

# rlang::is_* type functions
nse_funcs$is_character <- function(x, n = NULL) {
assert_that(is.null(n))
nse_funcs$is.character(x)
}
nse_funcs$is_double <- function(x, n = NULL, finite = NULL) {
assert_that(is.null(n) && is.null(finite))
nse_funcs$is.double(x)
}
nse_funcs$is_integer <- function(x, n = NULL) {
assert_that(is.null(n))
nse_funcs$is.integer(x)
}
nse_funcs$is_list <- function(x, n = NULL) {
assert_that(is.null(n))
nse_funcs$is.list(x)
}
nse_funcs$is_logical <- function(x, n = NULL) {
assert_that(is.null(n))
nse_funcs$is.logical(x)
}

# String functions
nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
if (allowNA) {
Expand Down
15 changes: 10 additions & 5 deletions r/R/dplyr-select.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,16 @@ relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL
# The code in this function is adapted from the code in dplyr::relocate.data.frame
# at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R
# TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829
check_select_helpers(c(enexprs(...), enexpr(.before), enexpr(.after)))

.data <- arrow_dplyr_query(.data)

to_move <- eval_select(expr(c(...)), .data$selected_columns)
# Assign the schema to the expressions
map(.data$selected_columns, ~(.$schema <- .data$.data$schema))

# Create a mask for evaluating expressions in tidyselect helpers
mask <- new_environment(.cache$functions, parent = caller_env())

to_move <- eval_select(substitute(c(...)), .data$selected_columns, mask)

.before <- enquo(.before)
.after <- enquo(.after)
Expand All @@ -73,12 +78,12 @@ relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL
if (has_before && has_after) {
abort("Must supply only one of `.before` and `.after`.")
} else if (has_before) {
where <- min(unname(eval_select(.before, .data$selected_columns)))
where <- min(unname(eval_select(quo_get_expr(.before), .data$selected_columns, mask)))
if (!where %in% to_move) {
to_move <- c(to_move, where)
}
} else if (has_after) {
where <- max(unname(eval_select(.after, .data$selected_columns)))
where <- max(unname(eval_select(quo_get_expr(.after), .data$selected_columns, mask)))
if (!where %in% to_move) {
to_move <- c(where, to_move)
}
Expand Down Expand Up @@ -117,4 +122,4 @@ check_select_helpers <- function(exprs) {
call. = FALSE
)
}
}
}
10 changes: 9 additions & 1 deletion r/R/expression.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,15 @@
Expression <- R6Class("Expression", inherit = ArrowObject,
public = list(
ToString = function() compute___expr__ToString(self),
type = function(schema) compute___expr__type(self, schema),
schema = NULL,
type = function(schema = self$schema) {
assert_that(!is.null(schema))
compute___expr__type(self, schema)
},
type_id = function(schema = self$schema) {
assert_that(!is.null(schema))
compute___expr__type_id(self, schema)
},
cast = function(to_type, safe = TRUE, ...) {
opts <- list(
to_type = to_type,
Expand Down
1 change: 1 addition & 0 deletions r/R/scalar.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ Scalar <- R6Class("Scalar",
# TODO: document the methods
public = list(
ToString = function() Scalar__ToString(self),
type_id = function() Scalar__type(self)$id,
as_vector = function() Scalar__as_vector(self),
as_array = function() MakeArrayFromScalar(self),
Equals = function(other, ...) {
Expand Down
49 changes: 49 additions & 0 deletions r/R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,55 @@ as_type <- function(type, name = "type") {
type
}

canonical_type_str <- function(type_str) {
# canonicalizes data type strings, converting data type function names and
# aliases to match the strings returned by DataType$ToString()
assert_that(is.string(type_str))
if (grepl("[([<]", type_str)) {
stop("Cannot interpret string representations of data types that have parameters", call. = FALSE)
}
switch(type_str,
int8 = "int8",
int16 = "int16",
int32 = "int32",
int64 = "int64",
uint8 = "uint8",
uint16 = "uint16",
uint32 = "uint32",
uint64 = "uint64",
float16 = "halffloat",
halffloat = "halffloat",
float32 = "float",
float = "float",
float64 = "double",
double = "double",
boolean = "bool",
bool = "bool",
utf8 = "string",
large_utf8 = "large_string",
large_string = "large_string",
binary = "binary",
large_binary = "large_binary",
fixed_size_binary = "fixed_size_binary",
string = "string",
date32 = "date32",
date64 = "date64",
time32 = "time32",
time64 = "time64",
null = "null",
timestamp = "timestamp",
decimal = "decimal128",
struct = "struct",
list_of = "list",
list = "list",
large_list_of = "large_list",
large_list = "large_list",
fixed_size_list_of = "fixed_size_list",
fixed_size_list = "fixed_size_list",
stop("Unrecognized string representation of data type", call. = FALSE)
)
}

# vctrs support -----------------------------------------------------------
str_dup <- function(x, times) {
paste0(rep(x, times = times), collapse = "")
Expand Down
17 changes: 17 additions & 0 deletions r/src/arrowExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions r/src/expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,11 @@ std::shared_ptr<arrow::DataType> compute___expr__type(
return bound.type();
}

// [[arrow::export]]
arrow::Type::type compute___expr__type_id(const std::shared_ptr<compute::Expression>& x,
const std::shared_ptr<arrow::Schema>& schema) {
auto bound = ValueOrStop(x->Bind(*schema));
return bound.type()->id();
}

#endif
Loading