Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ Collate:
'dplyr-funcs-augmented.R'
'dplyr-funcs-conditional.R'
'dplyr-funcs-datetime.R'
'dplyr-funcs-doc.R'
'dplyr-funcs-math.R'
'dplyr-funcs-string.R'
'dplyr-funcs-type.R'
Expand Down
1 change: 1 addition & 0 deletions r/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ style-all:
R -s -e 'styler::style_file(setdiff(dir(pattern = "R$$", recursive = TRUE), source(".styler_excludes.R")$$value))'

doc: style
R -s -f data-raw/docgen.R
R -s -e 'roxygen2::roxygenize()'
-git add --all man/*.Rd

Expand Down
51 changes: 38 additions & 13 deletions r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,50 @@
#' @keywords internal
"_PACKAGE"

# TODO(ARROW-17666): Include notes about features not supported here.
supported_dplyr_methods <- list(
select = NULL,
filter = NULL,
collect = NULL,
summarise = NULL,
group_by = NULL,
groups = NULL,
group_vars = NULL,
group_by_drop_default = NULL,
ungroup = NULL,
mutate = NULL,
transmute = NULL,
arrange = NULL,
rename = NULL,
pull = NULL,
relocate = NULL,
compute = NULL,
collapse = NULL,
distinct = NULL,
left_join = NULL,
right_join = NULL,
inner_join = NULL,
full_join = NULL,
semi_join = NULL,
anti_join = NULL,
count = NULL,
tally = NULL,
rename_with = NULL,
union = NULL,
union_all = NULL,
glimpse = NULL,
show_query = NULL,
explain = NULL
)

#' @importFrom vctrs s3_register vec_size vec_cast vec_unique
.onLoad <- function(...) {
# Make sure C++ knows on which thread it is safe to call the R API
InitializeMainRThread()

dplyr_methods <- paste0(
"dplyr::",
c(
"select", "filter", "collect", "summarise", "group_by", "groups",
"group_vars", "group_by_drop_default", "ungroup", "mutate", "transmute",
"arrange", "rename", "pull", "relocate", "compute", "collapse",
"distinct", "left_join", "right_join", "inner_join", "full_join",
"semi_join", "anti_join", "count", "tally", "rename_with", "union",
"union_all", "glimpse", "show_query", "explain"
)
)
for (cl in c("Dataset", "ArrowTabular", "RecordBatchReader", "arrow_dplyr_query")) {
for (m in dplyr_methods) {
s3_register(m, cl)
for (m in names(supported_dplyr_methods)) {
s3_register(paste0("dplyr::", m), cl)
}
}
s3_register("dplyr::tbl_vars", "arrow_dplyr_query")
Expand Down
19 changes: 16 additions & 3 deletions r/R/dplyr-funcs-augmented.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,21 @@
# specific language governing permissions and limitations
# under the License.

#' Add the data filename as a column
#'
#' This function only exists inside `arrow` `dplyr` queries, and it only is
#' valid when quering on a `FileSystemDataset`.
#'
#' @return A `FieldRef` `Expression` that refers to the filename augmented
#' column.
#' @examples
#' \dontrun{
#' open_dataset("nyc-taxi") %>%
#' mutate(file = add_filename())
#' }
#' @keywords internal
add_filename <- function() Expression$field_ref("__filename")

register_bindings_augmented <- function() {
register_binding("add_filename", function() {
Expression$field_ref("__filename")
})
register_binding("arrow::add_filename", add_filename)
}
53 changes: 26 additions & 27 deletions r/R/dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -649,55 +649,54 @@ register_bindings_datetime_parsers <- function() {

build_expr("assume_timezone", coalesce_output, options = list(timezone = tz))
})

}

register_bindings_datetime_rounding <- function() {
register_binding(
"round_date",
"lubridate::round_date",
function(x,
unit = "second",
week_start = getOption("lubridate.week.start", 7)) {
opts <- parse_period_unit(unit)
if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
return(shift_temporal_to_week("round_temporal", x, week_start, options = opts))
}

opts <- parse_period_unit(unit)
if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
return(shift_temporal_to_week("round_temporal", x, week_start, options = opts))
Expression$create("round_temporal", x, options = opts)
}

Expression$create("round_temporal", x, options = opts)
})
)

register_binding(
"floor_date",
"lubridate::floor_date",
function(x,
unit = "second",
week_start = getOption("lubridate.week.start", 7)) {
opts <- parse_period_unit(unit)
if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
return(shift_temporal_to_week("floor_temporal", x, week_start, options = opts))
}

opts <- parse_period_unit(unit)
if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
return(shift_temporal_to_week("floor_temporal", x, week_start, options = opts))
Expression$create("floor_temporal", x, options = opts)
}

Expression$create("floor_temporal", x, options = opts)
})
)

register_binding(
"ceiling_date",
"lubridate::ceiling_date",
function(x,
unit = "second",
change_on_boundary = NULL,
week_start = getOption("lubridate.week.start", 7)) {
opts <- parse_period_unit(unit)
if (is.null(change_on_boundary)) {
change_on_boundary <- ifelse(call_binding("is.Date", x), TRUE, FALSE)
}
opts$ceil_is_strictly_greater <- change_on_boundary

if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
return(shift_temporal_to_week("ceil_temporal", x, week_start, options = opts))
}
opts <- parse_period_unit(unit)
if (is.null(change_on_boundary)) {
change_on_boundary <- ifelse(call_binding("is.Date", x), TRUE, FALSE)
}
opts$ceil_is_strictly_greater <- change_on_boundary

Expression$create("ceil_temporal", x, options = opts)
})
if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start
return(shift_temporal_to_week("ceil_temporal", x, week_start, options = opts))
}

Expression$create("ceil_temporal", x, options = opts)
}
)
}
Loading