diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 8632ca3053d..33c67a52854 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -113,10 +113,10 @@ Ops.ArrowDatum <- function(e1, e2) { #' @export Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { switch(.Generic, - abs = , + abs = eval_array_expression("abs_checked", x), + ceiling = eval_array_expression("ceil", x), sign = , floor = , - ceiling = , trunc = , acos = , asin = , diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R index af2f1deef81..9199ce0dd52 100644 --- a/r/R/dplyr-datetime-helpers.R +++ b/r/R/dplyr-datetime-helpers.R @@ -27,16 +27,16 @@ check_time_locale <- function(locale = Sys.getlocale("LC_TIME")) { } .helpers_function_map <- list( - "dminutes" = list(60, "s"), - "dhours" = list(3600, "s"), - "ddays" = list(86400, "s"), - "dweeks" = list(604800, "s"), - "dmonths" = list(2629800, "s"), - "dyears" = list(31557600, "s"), - "dseconds" = list(1, "s"), - "dmilliseconds" = list(1, "ms"), - "dmicroseconds" = list(1, "us"), - "dnanoseconds" = list(1, "ns") + "lubridate::dminutes" = list(60, "s"), + "lubridate::dhours" = list(3600, "s"), + "lubridate::ddays" = list(86400, "s"), + "lubridate::dweeks" = list(604800, "s"), + "lubridate::dmonths" = list(2629800, "s"), + "lubridate::dyears" = list(31557600, "s"), + "lubridate::dseconds" = list(1, "s"), + "lubridate::dmilliseconds" = list(1, "ms"), + "lubridate::dmicroseconds" = list(1, "us"), + "lubridate::dnanoseconds" = list(1, "ns") ) make_duration <- function(x, unit) { # TODO(ARROW-15862): remove first cast to int64 diff --git a/r/R/dplyr-funcs-conditional.R b/r/R/dplyr-funcs-conditional.R index 493031d2f57..74d19d85903 100644 --- a/r/R/dplyr-funcs-conditional.R +++ b/r/R/dplyr-funcs-conditional.R @@ -16,7 +16,7 @@ # under the License. register_bindings_conditional <- function() { - register_binding("coalesce", function(...) { + register_binding("dplyr::coalesce", function(...) { args <- list2(...) if (length(args) < 1) { abort("At least one argument must be supplied to coalesce()") @@ -60,14 +60,14 @@ register_bindings_conditional <- function() { build_expr("if_else", condition, true, false) } - register_binding("if_else", if_else_binding) + register_binding("dplyr::if_else", if_else_binding) # Although base R ifelse allows `yes` and `no` to be different classes - register_binding("ifelse", function(test, yes, no) { + register_binding("base::ifelse", function(test, yes, no) { if_else_binding(condition = test, true = yes, false = no) }) - register_binding("case_when", function(...) { + register_binding("dplyr::case_when", function(...) { formulas <- list2(...) n <- length(formulas) if (n == 0) { diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index df830a6b66f..7d11cdc1134 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -28,10 +28,10 @@ register_bindings_datetime <- function() { } register_bindings_datetime_utility <- function() { - register_binding("strptime", function(x, - format = "%Y-%m-%d %H:%M:%S", - tz = "", - unit = "ms") { + register_binding("base::strptime", function(x, + format = "%Y-%m-%d %H:%M:%S", + tz = "", + unit = "ms") { # Arrow uses unit for time parsing, strptime() does not. # Arrow has no default option for strptime (format, unit), # we suggest following format = "%Y-%m-%d %H:%M:%S", unit = MILLI/1L/"ms", @@ -75,10 +75,10 @@ register_bindings_datetime_utility <- function() { output }) - register_binding("strftime", function(x, - format = "", - tz = "", - usetz = FALSE) { + register_binding("base::strftime", function(x, + format = "", + tz = "", + usetz = FALSE) { if (usetz) { format <- paste(format, "%Z") } @@ -95,7 +95,7 @@ register_bindings_datetime_utility <- function() { Expression$create("strftime", ts, options = list(format = format, locale = check_time_locale())) }) - register_binding("format_ISO8601", function(x, usetz = FALSE, precision = NULL, ...) { + register_binding("lubridate::format_ISO8601", function(x, usetz = FALSE, precision = NULL, ...) { ISO8601_precision_map <- list( y = "%Y", @@ -126,7 +126,7 @@ register_bindings_datetime_utility <- function() { Expression$create("strftime", x, options = list(format = format, locale = "C")) }) - register_binding("is.Date", function(x) { + register_binding("lubridate::is.Date", function(x) { inherits(x, "Date") || (inherits(x, "Expression") && x$type_id() %in% Type[c("DATE32", "DATE64")]) }) @@ -135,27 +135,29 @@ register_bindings_datetime_utility <- function() { inherits(x, c("POSIXt", "POSIXct", "POSIXlt", "Date")) || (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP", "DATE32", "DATE64")]) } - register_binding("is.instant", is_instant_binding) - register_binding("is.timepoint", is_instant_binding) + register_binding("lubridate::is.instant", is_instant_binding) + register_binding("lubridate::is.timepoint", is_instant_binding) - register_binding("is.POSIXct", function(x) { + register_binding("lubridate::is.POSIXct", function(x) { inherits(x, "POSIXct") || (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP")]) }) - register_binding("date", function(x) { + register_binding("lubridate::date", function(x) { build_expr("cast", x, options = list(to_type = date32())) }) } register_bindings_datetime_components <- function() { - register_binding("second", function(x) { + register_binding("lubridate::second", function(x) { Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x)) }) - register_binding("wday", function(x, label = FALSE, abbr = TRUE, - week_start = getOption("lubridate.week.start", 7), - locale = Sys.getlocale("LC_TIME")) { + register_binding("lubridate::wday", function(x, + label = FALSE, + abbr = TRUE, + week_start = getOption("lubridate.week.start", 7), + locale = Sys.getlocale("LC_TIME")) { if (label) { if (abbr) { format <- "%a" @@ -168,14 +170,14 @@ register_bindings_datetime_components <- function() { Expression$create("day_of_week", x, options = list(count_from_zero = FALSE, week_start = week_start)) }) - register_binding("week", function(x) { + register_binding("lubridate::week", function(x) { (call_binding("yday", x) - 1) %/% 7 + 1 }) - register_binding("month", function(x, - label = FALSE, - abbr = TRUE, - locale = Sys.getlocale("LC_TIME")) { + register_binding("lubridate::month", function(x, + label = FALSE, + abbr = TRUE, + locale = Sys.getlocale("LC_TIME")) { if (call_binding("is.integer", x)) { x <- call_binding( "if_else", @@ -207,14 +209,14 @@ register_bindings_datetime_components <- function() { build_expr("month", x) }) - register_binding("am", function(x) { + register_binding("lubridate::am", function(x) { hour <- Expression$create("hour", x) hour < 12 }) - register_binding("pm", function(x) { + register_binding("lubridate::pm", function(x) { !call_binding("am", x) }) - register_binding("tz", function(x) { + register_binding("lubridate::tz", function(x) { if (!call_binding("is.POSIXct", x)) { abort( paste0( @@ -227,7 +229,7 @@ register_bindings_datetime_components <- function() { x$type()$timezone() }) - register_binding("semester", function(x, with_year = FALSE) { + register_binding("lubridate::semester", function(x, with_year = FALSE) { month <- call_binding("month", x) semester <- call_binding("if_else", month <= 6, 1L, 2L) if (with_year) { @@ -240,13 +242,13 @@ register_bindings_datetime_components <- function() { } register_bindings_datetime_conversion <- function() { - register_binding("make_datetime", function(year = 1970L, - month = 1L, - day = 1L, - hour = 0L, - min = 0L, - sec = 0, - tz = "UTC") { + register_binding("lubridate::make_datetime", function(year = 1970L, + month = 1L, + day = 1L, + hour = 0L, + min = 0L, + sec = 0, + tz = "UTC") { # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820). # Stop if tz other than 'UTC' is provided. @@ -258,18 +260,20 @@ register_bindings_datetime_conversion <- function() { build_expr("strptime", x, options = list(format = "%Y-%m-%d-%H-%M-%S", unit = 0L)) }) - register_binding("make_date", function(year = 1970L, month = 1L, day = 1L) { + register_binding("lubridate::make_date", function(year = 1970L, + month = 1L, + day = 1L) { x <- call_binding("make_datetime", year, month, day) build_expr("cast", x, options = cast_options(to_type = date32())) }) - register_binding("ISOdatetime", function(year, - month, - day, - hour, - min, - sec, - tz = "UTC") { + register_binding("base::ISOdatetime", function(year, + month, + day, + hour, + min, + sec, + tz = "UTC") { # NAs for seconds aren't propagated (but treated as 0) in the base version sec <- call_binding( @@ -282,21 +286,21 @@ register_bindings_datetime_conversion <- function() { call_binding("make_datetime", year, month, day, hour, min, sec, tz) }) - register_binding("ISOdate", function(year, - month, - day, - hour = 12, - min = 0, - sec = 0, - tz = "UTC") { + register_binding("base::ISOdate", function(year, + month, + day, + hour = 12, + min = 0, + sec = 0, + tz = "UTC") { call_binding("make_datetime", year, month, day, hour, min, sec, tz) }) - register_binding("as.Date", function(x, - format = NULL, - tryFormats = "%Y-%m-%d", - origin = "1970-01-01", - tz = "UTC") { + register_binding("base::as.Date", function(x, + format = NULL, + tryFormats = "%Y-%m-%d", + origin = "1970-01-01", + tz = "UTC") { if (is.null(format) && length(tryFormats) > 1) { abort( paste( @@ -324,10 +328,10 @@ register_bindings_datetime_conversion <- function() { ) }) - register_binding("as_date", function(x, - format = NULL, - origin = "1970-01-01", - tz = NULL) { + register_binding("lubridate::as_date", function(x, + format = NULL, + origin = "1970-01-01", + tz = NULL) { # base::as.Date() and lubridate::as_date() differ in the way they use the # `tz` argument. Both cast to the desired timezone, if present. The # difference appears when the `tz` argument is not set: `as.Date()` uses the @@ -344,10 +348,10 @@ register_bindings_datetime_conversion <- function() { ) }) - register_binding("as_datetime", function(x, - origin = "1970-01-01", - tz = "UTC", - format = NULL) { + register_binding("lubridate::as_datetime", function(x, + origin = "1970-01-01", + tz = "UTC", + format = NULL) { if (call_binding("is.numeric", x)) { delta <- call_binding("difftime", origin, "1970-01-01") delta <- build_expr("cast", delta, options = cast_options(to_type = int64())) @@ -367,7 +371,7 @@ register_bindings_datetime_conversion <- function() { build_expr("assume_timezone", output, options = list(timezone = tz)) }) - register_binding("decimal_date", function(date) { + register_binding("lubridate::decimal_date", function(date) { y <- build_expr("year", date) start <- call_binding("make_datetime", year = y, tz = "UTC") sofar <- call_binding("difftime", date, start, units = "secs") @@ -380,7 +384,7 @@ register_bindings_datetime_conversion <- function() { y + sofar$cast(int64()) / total }) - register_binding("date_decimal", function(decimal, tz = "UTC") { + register_binding("lubridate::date_decimal", function(decimal, tz = "UTC") { y <- build_expr("floor", decimal) start <- call_binding("make_datetime", year = y, tz = tz) @@ -399,10 +403,10 @@ register_bindings_datetime_conversion <- function() { } register_bindings_duration <- function() { - register_binding("difftime", function(time1, - time2, - tz, - units = "secs") { + register_binding("base::difftime", function(time1, + time2, + tz, + units = "secs") { if (units != "secs") { abort("`difftime()` with units other than `secs` not supported in Arrow") } @@ -440,9 +444,9 @@ register_bindings_duration <- function() { subtract_output <- build_expr("-", time1, time2) build_expr("cast", subtract_output, options = cast_options(to_type = duration("s"))) }) - register_binding("as.difftime", function(x, - format = "%X", - units = "secs") { + register_binding("base::as.difftime", function(x, + format = "%X", + units = "secs") { # windows doesn't seem to like "%X" if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") { format <- "%H:%M:%S" @@ -475,9 +479,9 @@ register_bindings_duration <- function() { } register_bindings_duration_constructor <- function() { - register_binding("make_difftime", function(num = NULL, - units = "secs", - ...) { + register_binding("lubridate::make_difftime", function(num = NULL, + units = "secs", + ...) { if (units != "secs") { abort("`make_difftime()` with units other than 'secs' not supported in Arrow") } @@ -520,18 +524,18 @@ register_bindings_duration_helpers <- function() { ) } - register_binding("dpicoseconds", function(x = 1) { + register_binding("lubridate::dpicoseconds", function(x = 1) { abort("Duration in picoseconds not supported in Arrow.") }) } register_bindings_datetime_parsers <- function() { - register_binding("parse_date_time", function(x, - orders, - tz = "UTC", - truncated = 0, - quiet = TRUE, - exact = FALSE) { + register_binding("lubridate::parse_date_time", function(x, + orders, + tz = "UTC", + truncated = 0, + quiet = TRUE, + exact = FALSE) { if (!quiet) { arrow_not_supported("`quiet = FALSE`") } @@ -584,14 +588,17 @@ register_bindings_datetime_parsers <- function() { } for (ymd_order in ymd_parser_vec) { - register_binding(ymd_order, ymd_parser_map_factory(ymd_order)) + register_binding( + paste0("lubridate::", ymd_order), + ymd_parser_map_factory(ymd_order) + ) } - register_binding("fast_strptime", function(x, - format, - tz = "UTC", - lt = FALSE, - cutoff_2000 = 68L) { + register_binding("lubridate::fast_strptime", function(x, + format, + tz = "UTC", + lt = FALSE, + cutoff_2000 = 68L) { # `lt` controls the output `lt = TRUE` returns a POSIXlt (which doesn't play # well with mutate, for example) if (lt) { diff --git a/r/R/dplyr-funcs-math.R b/r/R/dplyr-funcs-math.R index 0ba2ddc856e..e7667532000 100644 --- a/r/R/dplyr-funcs-math.R +++ b/r/R/dplyr-funcs-math.R @@ -49,10 +49,10 @@ register_bindings_math <- function() { Expression$create("logb_checked", x, Expression$scalar(base)) } - register_binding("log", log_binding) - register_binding("logb", log_binding) + register_binding("base::log", log_binding) + register_binding("base::logb", log_binding) - register_binding("pmin", function(..., na.rm = FALSE) { + register_binding("base::pmin", function(..., na.rm = FALSE) { build_expr( "min_element_wise", ..., @@ -60,7 +60,7 @@ register_bindings_math <- function() { ) }) - register_binding("pmax", function(..., na.rm = FALSE) { + register_binding("base::pmax", function(..., na.rm = FALSE) { build_expr( "max_element_wise", ..., @@ -68,12 +68,12 @@ register_bindings_math <- function() { ) }) - register_binding("trunc", function(x, ...) { + register_binding("base::trunc", function(x, ...) { # accepts and ignores ... for consistency with base::trunc() build_expr("trunc", x) }) - register_binding("round", function(x, digits = 0) { + register_binding("base::round", function(x, digits = 0) { build_expr( "round", x, @@ -81,14 +81,14 @@ register_bindings_math <- function() { ) }) - register_binding("sqrt", function(x) { + register_binding("base::sqrt", function(x) { build_expr( "sqrt_checked", x ) }) - register_binding("exp", function(x) { + register_binding("base::exp", function(x) { build_expr( "power_checked", exp(1), diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R index 892c5175486..b300d7c439e 100644 --- a/r/R/dplyr-funcs-string.R +++ b/r/R/dplyr-funcs-string.R @@ -161,7 +161,7 @@ register_bindings_string_join <- function() { } } - register_binding("paste", function(..., sep = " ", collapse = NULL, recycle0 = FALSE) { + register_binding("base::paste", function(..., sep = " ", collapse = NULL, recycle0 = FALSE) { assert_that( is.null(collapse), msg = "paste() with the collapse argument is not yet supported in Arrow" @@ -172,7 +172,7 @@ register_bindings_string_join <- function() { arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep) }) - register_binding("paste0", function(..., collapse = NULL, recycle0 = FALSE) { + register_binding("base::paste0", function(..., collapse = NULL, recycle0 = FALSE) { assert_that( is.null(collapse), msg = "paste0() with the collapse argument is not yet supported in Arrow" @@ -180,7 +180,7 @@ register_bindings_string_join <- function() { arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "") }) - register_binding("str_c", function(..., sep = "", collapse = NULL) { + register_binding("stringr::str_c", function(..., sep = "", collapse = NULL) { assert_that( is.null(collapse), msg = "str_c() with the collapse argument is not yet supported in Arrow" @@ -198,7 +198,10 @@ register_bindings_string_regex <- function() { ) } - register_binding("grepl", function(pattern, x, ignore.case = FALSE, fixed = FALSE) { + register_binding("base::grepl", function(pattern, + x, + ignore.case = FALSE, + fixed = FALSE) { arrow_fun <- ifelse(fixed, "match_substring", "match_substring_regex") out <- create_string_match_expr( arrow_fun, @@ -210,7 +213,7 @@ register_bindings_string_regex <- function() { }) - register_binding("str_detect", function(string, pattern, negate = FALSE) { + register_binding("stringr::str_detect", function(string, pattern, negate = FALSE) { opts <- get_stringr_pattern_options(enexpr(pattern)) arrow_fun <- ifelse(opts$fixed, "match_substring", "match_substring_regex") out <- create_string_match_expr(arrow_fun, @@ -224,7 +227,9 @@ register_bindings_string_regex <- function() { out }) - register_binding("str_like", function(string, pattern, ignore_case = TRUE) { + register_binding("stringr::str_like", function(string, + pattern, + ignore_case = TRUE) { Expression$create( "match_like", string, @@ -232,7 +237,7 @@ register_bindings_string_regex <- function() { ) }) - register_binding("str_count", function(string, pattern) { + register_binding("stringr::str_count", function(string, pattern) { opts <- get_stringr_pattern_options(enexpr(pattern)) if (!is.string(pattern)) { arrow_not_supported("`pattern` must be a length 1 character vector; other values") @@ -245,7 +250,7 @@ register_bindings_string_regex <- function() { ) }) - register_binding("startsWith", function(x, prefix) { + register_binding("base::startsWith", function(x, prefix) { Expression$create( "starts_with", x, @@ -253,7 +258,7 @@ register_bindings_string_regex <- function() { ) }) - register_binding("endsWith", function(x, suffix) { + register_binding("base::endsWith", function(x, suffix) { Expression$create( "ends_with", x, @@ -261,7 +266,7 @@ register_bindings_string_regex <- function() { ) }) - register_binding("str_starts", function(string, pattern, negate = FALSE) { + register_binding("stringr::str_starts", function(string, pattern, negate = FALSE) { opts <- get_stringr_pattern_options(enexpr(pattern)) if (opts$fixed) { out <- call_binding("startsWith", x = string, prefix = opts$pattern) @@ -279,7 +284,7 @@ register_bindings_string_regex <- function() { out }) - register_binding("str_ends", function(string, pattern, negate = FALSE) { + register_binding("stringr::str_ends", function(string, pattern, negate = FALSE) { opts <- get_stringr_pattern_options(enexpr(pattern)) if (opts$fixed) { out <- call_binding("endsWith", x = string, suffix = opts$pattern) @@ -326,12 +331,12 @@ register_bindings_string_regex <- function() { } } - register_binding("sub", arrow_r_string_replace_function(1L)) - register_binding("gsub", arrow_r_string_replace_function(-1L)) - register_binding("str_replace", arrow_stringr_string_replace_function(1L)) - register_binding("str_replace_all", arrow_stringr_string_replace_function(-1L)) + register_binding("base::sub", arrow_r_string_replace_function(1L)) + register_binding("base::gsub", arrow_r_string_replace_function(-1L)) + register_binding("stringr::str_replace", arrow_stringr_string_replace_function(1L)) + register_binding("stringr::str_replace_all", arrow_stringr_string_replace_function(-1L)) - register_binding("strsplit", function(x, split, fixed = FALSE, perl = FALSE, + register_binding("base::strsplit", function(x, split, fixed = FALSE, perl = FALSE, useBytes = FALSE) { assert_that(is.string(split)) @@ -350,7 +355,10 @@ register_bindings_string_regex <- function() { ) }) - register_binding("str_split", function(string, pattern, n = Inf, simplify = FALSE) { + register_binding("stringr::str_split", function(string, + pattern, + n = Inf, + simplify = FALSE) { opts <- get_stringr_pattern_options(enexpr(pattern)) arrow_fun <- ifelse(opts$fixed, "split_pattern", "split_pattern_regex") if (opts$ignore_case) { @@ -382,7 +390,7 @@ register_bindings_string_regex <- function() { } register_bindings_string_other <- function() { - register_binding("nchar", function(x, type = "chars", allowNA = FALSE, keepNA = NA) { + register_binding("base::nchar", function(x, type = "chars", allowNA = FALSE, keepNA = NA) { if (allowNA) { arrow_not_supported("allowNA = TRUE") } @@ -400,22 +408,22 @@ register_bindings_string_other <- function() { } }) - register_binding("str_to_lower", function(string, locale = "en") { + register_binding("stringr::str_to_lower", function(string, locale = "en") { stop_if_locale_provided(locale) Expression$create("utf8_lower", string) }) - register_binding("str_to_upper", function(string, locale = "en") { + register_binding("stringr::str_to_upper", function(string, locale = "en") { stop_if_locale_provided(locale) Expression$create("utf8_upper", string) }) - register_binding("str_to_title", function(string, locale = "en") { + register_binding("stringr::str_to_title", function(string, locale = "en") { stop_if_locale_provided(locale) Expression$create("utf8_title", string) }) - register_binding("str_trim", function(string, side = c("both", "left", "right")) { + register_binding("stringr::str_trim", function(string, side = c("both", "left", "right")) { side <- match.arg(side) trim_fun <- switch(side, left = "utf8_ltrim_whitespace", @@ -425,7 +433,7 @@ register_bindings_string_other <- function() { Expression$create(trim_fun, string) }) - register_binding("substr", function(x, start, stop) { + register_binding("base::substr", function(x, start, stop) { assert_that( length(start) == 1, msg = "`start` must be length 1 - other lengths are not supported in Arrow" @@ -457,11 +465,11 @@ register_bindings_string_other <- function() { ) }) - register_binding("substring", function(text, first, last) { + register_binding("base::substring", function(text, first, last) { call_binding("substr", x = text, start = first, stop = last) }) - register_binding("str_sub", function(string, start = 1L, end = -1L) { + register_binding("stringr::str_sub", function(string, start = 1L, end = -1L) { assert_that( length(start) == 1, msg = "`start` must be length 1 - other lengths are not supported in Arrow" @@ -498,7 +506,10 @@ register_bindings_string_other <- function() { }) - register_binding("str_pad", function(string, width, side = c("left", "right", "both"), pad = " ") { + register_binding("stringr::str_pad", function(string, + width, + side = c("left", "right", "both"), + pad = " ") { assert_that(is_integerish(width)) side <- match.arg(side) assert_that(is.string(pad)) diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R index 6c409c6c7e8..9925d0347f7 100644 --- a/r/R/dplyr-funcs-type.R +++ b/r/R/dplyr-funcs-type.R @@ -43,13 +43,13 @@ register_bindings_type_cast <- function() { # as.* type casting functions # as.factor() is mapped in expression.R - register_binding("as.character", function(x) { + register_binding("base::as.character", function(x) { build_expr("cast", x, options = cast_options(to_type = string())) }) - register_binding("as.double", function(x) { + register_binding("base::as.double", function(x) { build_expr("cast", x, options = cast_options(to_type = float64())) }) - register_binding("as.integer", function(x) { + register_binding("base::as.integer", function(x) { build_expr( "cast", x, @@ -60,7 +60,7 @@ register_bindings_type_cast <- function() { ) ) }) - register_binding("as.integer64", function(x) { + register_binding("bit64::as.integer64", function(x) { build_expr( "cast", x, @@ -71,14 +71,14 @@ register_bindings_type_cast <- function() { ) ) }) - register_binding("as.logical", function(x) { + register_binding("base::as.logical", function(x) { build_expr("cast", x, options = cast_options(to_type = boolean())) }) - register_binding("as.numeric", function(x) { + register_binding("base::as.numeric", function(x) { build_expr("cast", x, options = cast_options(to_type = float64())) }) - register_binding("is", function(object, class2) { + register_binding("methods::is", function(object, class2) { if (is.string(class2)) { switch(class2, # for R data types, pass off to is.*() functions @@ -103,7 +103,9 @@ register_bindings_type_cast <- function() { }) # Create a data frame/tibble/struct column - register_binding("tibble", function(..., .rows = NULL, .name_repair = NULL) { + register_binding("tibble::tibble", function(..., + .rows = NULL, + .name_repair = NULL) { if (!is.null(.rows)) arrow_not_supported(".rows") if (!is.null(.name_repair)) arrow_not_supported(".name_repair") @@ -122,9 +124,12 @@ register_bindings_type_cast <- function() { ) }) - register_binding("data.frame", function(..., row.names = NULL, - check.rows = NULL, check.names = TRUE, fix.empty.names = TRUE, - stringsAsFactors = FALSE) { + register_binding("base::data.frame", function(..., + row.names = NULL, + check.rows = NULL, + check.names = TRUE, + fix.empty.names = TRUE, + stringsAsFactors = FALSE) { # we need a specific value of stringsAsFactors because the default was # TRUE in R <= 3.6 if (!identical(stringsAsFactors, FALSE)) { @@ -159,70 +164,70 @@ register_bindings_type_cast <- function() { register_bindings_type_inspect <- function() { # is.* type functions - register_binding("is.character", function(x) { + register_binding("base::is.character", function(x) { is.character(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c("STRING", "LARGE_STRING")]) }) - register_binding("is.numeric", function(x) { + register_binding("base::is.numeric", function(x) { is.numeric(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c( "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE", "DECIMAL128", "DECIMAL256" )]) }) - register_binding("is.double", function(x) { + register_binding("base::is.double", function(x) { is.double(x) || (inherits(x, "Expression") && x$type_id() == Type["DOUBLE"]) }) - register_binding("is.integer", function(x) { + register_binding("base::is.integer", function(x) { is.integer(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c( "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32", "UINT64", "INT64" )]) }) - register_binding("is.integer64", function(x) { + register_binding("bit64::is.integer64", function(x) { inherits(x, "integer64") || (inherits(x, "Expression") && x$type_id() == Type["INT64"]) }) - register_binding("is.logical", function(x) { + register_binding("base::is.logical", function(x) { is.logical(x) || (inherits(x, "Expression") && x$type_id() == Type["BOOL"]) }) - register_binding("is.factor", function(x) { + register_binding("base::is.factor", function(x) { is.factor(x) || (inherits(x, "Expression") && x$type_id() == Type["DICTIONARY"]) }) - register_binding("is.list", function(x) { + register_binding("base::is.list", function(x) { is.list(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c( "LIST", "FIXED_SIZE_LIST", "LARGE_LIST" )]) }) # rlang::is_* type functions - register_binding("is_character", function(x, n = NULL) { + register_binding("rlang::is_character", function(x, n = NULL) { assert_that(is.null(n)) call_binding("is.character", x) }) - register_binding("is_double", function(x, n = NULL, finite = NULL) { + register_binding("rlang::is_double", function(x, n = NULL, finite = NULL) { assert_that(is.null(n) && is.null(finite)) call_binding("is.double", x) }) - register_binding("is_integer", function(x, n = NULL) { + register_binding("rlang::is_integer", function(x, n = NULL) { assert_that(is.null(n)) call_binding("is.integer", x) }) - register_binding("is_list", function(x, n = NULL) { + register_binding("rlang::is_list", function(x, n = NULL) { assert_that(is.null(n)) call_binding("is.list", x) }) - register_binding("is_logical", function(x, n = NULL) { + register_binding("rlang::is_logical", function(x, n = NULL) { assert_that(is.null(n)) call_binding("is.logical", x) }) } register_bindings_type_elementwise <- function() { - register_binding("is.na", function(x) { + register_binding("base::is.na", function(x) { build_expr("is_null", x, options = list(nan_is_null = TRUE)) }) - register_binding("is.nan", function(x) { + register_binding("base::is.nan", function(x) { if (is.double(x) || (inherits(x, "Expression") && x$type_id() %in% TYPES_WITH_NAN)) { # TODO: if an option is added to the is_nan kernel to treat NA as NaN, @@ -233,17 +238,17 @@ register_bindings_type_elementwise <- function() { } }) - register_binding("between", function(x, left, right) { + register_binding("dplyr::between", function(x, left, right) { x >= left & x <= right }) - register_binding("is.finite", function(x) { + register_binding("base::is.finite", function(x) { is_fin <- Expression$create("is_finite", x) # for compatibility with base::is.finite(), return FALSE for NA_real_ is_fin & !call_binding("is.na", is_fin) }) - register_binding("is.infinite", function(x) { + register_binding("base::is.infinite", function(x) { is_inf <- Expression$create("is_inf", x) # for compatibility with base::is.infinite(), return FALSE for NA_real_ is_inf & !call_binding("is.na", is_inf) @@ -251,7 +256,7 @@ register_bindings_type_elementwise <- function() { } register_bindings_type_format <- function() { - register_binding("format", function(x, ...) { + register_binding("base::format", function(x, ...) { # We use R's format if we get a single R object here since we don't (yet) # support all of the possible options for casting to string if (!inherits(x, "Expression")) { diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index 95c1f69f4fb..7c4ed99e2ed 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -58,15 +58,26 @@ NULL #' @keywords internal #' register_binding <- function(fun_name, fun, registry = nse_funcs) { - name <- gsub("^.*?::", "", fun_name) - namespace <- gsub("::.*$", "", fun_name) + unqualified_name <- sub("^.*?:{+}", "", fun_name) - previous_fun <- if (name %in% names(registry)) registry[[name]] else NULL + previous_fun <- registry[[unqualified_name]] - if (is.null(fun) && !is.null(previous_fun)) { - rm(list = name, envir = registry, inherits = FALSE) + # if the unqualified name exists in the registry, warn + if (!is.null(fun) && !is.null(previous_fun)) { + warn( + paste0( + "A \"", + unqualified_name, + "\" binding already exists in the registry and will be overwritten.") + ) + } + + # register both as `pkg::fun` and as `fun` if `qualified_name` is prefixed + if (grepl("::", fun_name)) { + registry[[unqualified_name]] <- fun + registry[[fun_name]] <- fun } else { - registry[[name]] <- fun + registry[[unqualified_name]] <- fun } invisible(previous_fun) @@ -116,3 +127,17 @@ create_binding_cache <- function() { nse_funcs <- new.env(parent = emptyenv()) agg_funcs <- new.env(parent = emptyenv()) .cache <- new.env(parent = emptyenv()) + +# we register 2 versions of the "::" binding - one for use with nse_funcs +# (registered below) and another one for use with agg_funcs (registered in +# dplyr-summarize.R) +nse_funcs[["::"]] <- function(lhs, rhs) { + lhs_name <- as.character(substitute(lhs)) + rhs_name <- as.character(substitute(rhs)) + + fun_name <- paste0(lhs_name, "::", rhs_name) + + # if we do not have a binding for pkg::fun, then fall back on to the + # regular pkg::fun function + nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] +} diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 9226c476cb9..92587f6c685 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -56,49 +56,49 @@ agg_fun_output_type <- function(fun, input_type, hash) { } register_bindings_aggregate <- function() { - register_binding_agg("sum", function(..., na.rm = FALSE) { + register_binding_agg("base::sum", function(..., na.rm = FALSE) { list( fun = "sum", data = ensure_one_arg(list2(...), "sum"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("any", function(..., na.rm = FALSE) { + register_binding_agg("base::any", function(..., na.rm = FALSE) { list( fun = "any", data = ensure_one_arg(list2(...), "any"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("all", function(..., na.rm = FALSE) { + register_binding_agg("base::all", function(..., na.rm = FALSE) { list( fun = "all", data = ensure_one_arg(list2(...), "all"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("mean", function(x, na.rm = FALSE) { + register_binding_agg("base::mean", function(x, na.rm = FALSE) { list( fun = "mean", data = x, options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("sd", function(x, na.rm = FALSE, ddof = 1) { + register_binding_agg("stats::sd", function(x, na.rm = FALSE, ddof = 1) { list( fun = "stddev", data = x, options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) }) - register_binding_agg("var", function(x, na.rm = FALSE, ddof = 1) { + register_binding_agg("stats::var", function(x, na.rm = FALSE, ddof = 1) { list( fun = "variance", data = x, options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) }) - register_binding_agg("quantile", function(x, probs, na.rm = FALSE) { + register_binding_agg("stats::quantile", function(x, probs, na.rm = FALSE) { if (length(probs) != 1) { arrow_not_supported("quantile() with length(probs) != 1") } @@ -116,7 +116,7 @@ register_bindings_aggregate <- function() { options = list(skip_nulls = na.rm, q = probs) ) }) - register_binding_agg("median", function(x, na.rm = FALSE) { + register_binding_agg("stats::median", function(x, na.rm = FALSE) { # TODO: Bind to the Arrow function that returns an exact median and remove # this warning (ARROW-14021) warn( @@ -131,28 +131,28 @@ register_bindings_aggregate <- function() { options = list(skip_nulls = na.rm) ) }) - register_binding_agg("n_distinct", function(..., na.rm = FALSE) { + register_binding_agg("dplyr::n_distinct", function(..., na.rm = FALSE) { list( fun = "count_distinct", data = ensure_one_arg(list2(...), "n_distinct"), options = list(na.rm = na.rm) ) }) - register_binding_agg("n", function() { + register_binding_agg("dplyr::n", function() { list( fun = "sum", data = Expression$scalar(1L), options = list() ) }) - register_binding_agg("min", function(..., na.rm = FALSE) { + register_binding_agg("base::min", function(..., na.rm = FALSE) { list( fun = "min", data = ensure_one_arg(list2(...), "min"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("max", function(..., na.rm = FALSE) { + register_binding_agg("base::max", function(..., na.rm = FALSE) { list( fun = "max", data = ensure_one_arg(list2(...), "max"), @@ -161,6 +161,22 @@ register_bindings_aggregate <- function() { }) } +# we register 2 versions of the "::" binding - one for use with agg_funcs +# (registered below) and another one for use with nse_funcs +# (registered in dplyr-funcs.R) +agg_funcs[["::"]] <- function(lhs, rhs) { + lhs_name <- as.character(substitute(lhs)) + rhs_name <- as.character(substitute(rhs)) + + fun_name <- paste0(lhs_name, "::", rhs_name) + + # if we do not have a binding for pkg::fun, then fall back on to the + # nse_funcs (useful when we have a regular function inside an aggregating one) + # and then, if searching nse_funcs fails too, fall back to the + # regular `pkg::fun()` function + agg_funcs[[fun_name]] %||% nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] +} + # The following S3 methods are registered on load if dplyr is present summarise.arrow_dplyr_query <- function(.data, ...) { @@ -348,7 +364,7 @@ summarize_eval <- function(name, quosure, ctx, hash) { # the list output from the Arrow hash_tdigest kernel to flatten it into a # column of type float64. We do that by modifying the unevaluated expression # to replace quantile(...) with arrow_list_element(quantile(...), 0L) - if (hash && "quantile" %in% funs_in_expr) { + if (hash && any(c("quantile", "stats::quantile") %in% funs_in_expr)) { expr <- wrap_hash_quantile(expr) funs_in_expr <- all_funs(expr) } @@ -464,7 +480,7 @@ wrap_hash_quantile <- function(expr) { if (length(expr) == 1) { return(expr) } else { - if (is.call(expr) && expr[[1]] == quote(quantile)) { + if (is.call(expr) && any(c(quote(quantile), quote(stats::quantile)) == expr[[1]])) { return(str2lang(paste0("arrow_list_element(", deparse1(expr), ", 0L)"))) } else { return(as.call(lapply(expr, wrap_hash_quantile))) diff --git a/r/R/expression.R b/r/R/expression.R index be43de01e1c..6b9eb5e89c5 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -26,59 +26,59 @@ # functions are arranged alphabetically by name within categories # arithmetic functions - "abs" = "abs_checked", - "ceiling" = "ceil", - "floor" = "floor", - "log10" = "log10_checked", - "log1p" = "log1p_checked", - "log2" = "log2_checked", - "sign" = "sign", + "base::abs" = "abs_checked", + "base::ceiling" = "ceil", + "base::floor" = "floor", + "base::log10" = "log10_checked", + "base::log1p" = "log1p_checked", + "base::log2" = "log2_checked", + "base::sign" = "sign", # trunc is defined in dplyr-functions.R # trigonometric functions - "acos" = "acos_checked", - "asin" = "asin_checked", - "cos" = "cos_checked", - "sin" = "sin_checked", - "tan" = "tan_checked", + "base::acos" = "acos_checked", + "base::asin" = "asin_checked", + "base::cos" = "cos_checked", + "base::sin" = "sin_checked", + "base::tan" = "tan_checked", # logical functions "!" = "invert", # string functions # nchar is defined in dplyr-functions.R - "str_length" = "utf8_length", + "stringr::str_length" = "utf8_length", # str_pad is defined in dplyr-functions.R # str_sub is defined in dplyr-functions.R # str_to_lower is defined in dplyr-functions.R # str_to_title is defined in dplyr-functions.R # str_to_upper is defined in dplyr-functions.R # str_trim is defined in dplyr-functions.R - "stri_reverse" = "utf8_reverse", + "stringi::stri_reverse" = "utf8_reverse", # substr is defined in dplyr-functions.R # substring is defined in dplyr-functions.R - "tolower" = "utf8_lower", - "toupper" = "utf8_upper", + "base::tolower" = "utf8_lower", + "base::toupper" = "utf8_upper", # date and time functions - "day" = "day", - "dst" = "is_dst", - "hour" = "hour", - "isoweek" = "iso_week", - "epiweek" = "us_week", - "isoyear" = "iso_year", - "epiyear" = "us_year", - "minute" = "minute", - "quarter" = "quarter", + "lubridate::day" = "day", + "lubridate::dst" = "is_dst", + "lubridate::hour" = "hour", + "lubridate::isoweek" = "iso_week", + "lubridate::epiweek" = "us_week", + "lubridate::isoyear" = "iso_year", + "lubridate::epiyear" = "us_year", + "lubridate::minute" = "minute", + "lubridate::quarter" = "quarter", # second is defined in dplyr-functions.R # wday is defined in dplyr-functions.R - "mday" = "day", - "yday" = "day_of_year", - "year" = "year", - "leap_year" = "is_leap_year", + "lubridate::mday" = "day", + "lubridate::yday" = "day_of_year", + "lubridate::year" = "year", + "lubridate::leap_year" = "is_leap_year", # type conversion functions - "as.factor" = "dictionary_encode" + "base::as.factor" = "dictionary_encode" ) .binary_function_map <- list( @@ -104,8 +104,8 @@ "%%" = "divide_checked", "^" = "power_checked", "%in%" = "is_in_meta_binary", - "strrep" = "binary_repeat", - "str_dup" = "binary_repeat" + "base::strrep" = "binary_repeat", + "stringr::str_dup" = "binary_repeat" ) .array_function_map <- c(.unary_function_map, .binary_function_map) diff --git a/r/R/util.R b/r/R/util.R index a51fde0c2d6..55ff29db73a 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -93,6 +93,15 @@ all_funs <- function(expr) { expr <- quo_get_expr(expr) } names <- all.names(expr) + # if we have namespace-qualified functions, we rebuild the function name with + # the `pkg::` prefix + if ("::" %in% names) { + for (i in seq_along(names)) { + if (names[i] == "::") { + names[i] <- paste0(names[i + 1], names[i], names[i + 2]) + } + } + } names[map_lgl(names, ~ is_function(expr, .))] } diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index 60c740a5c1a..aed46d801ce 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -400,3 +400,20 @@ test_that("filter() with .data pronoun", { tbl ) }) + +test_that("filter() with namespaced functions", { + compare_dplyr_binding( + .input %>% + filter(dplyr::between(dbl, 1, 2)) %>% + collect(), + tbl + ) + + skip_if_not_available("utf8proc") + compare_dplyr_binding( + .input %>% + filter(dbl > 2, stringr::str_length(verses) > 25) %>% + collect(), + tbl + ) +}) diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index 4f5fdb0af4e..4898d1e9e3e 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -29,7 +29,8 @@ test_that("if_else and ifelse", { compare_dplyr_binding( .input %>% mutate( - y = if_else(int > 5, 1, 0) + y = if_else(int > 5, 1, 0), + y2 = dplyr::if_else(int > 6, 1, 0) ) %>% collect(), tbl @@ -65,7 +66,8 @@ test_that("if_else and ifelse", { compare_dplyr_binding( .input %>% mutate( - y = ifelse(int > 5, 1, 0) + y = ifelse(int > 5, 1, 0), + y2 = base::ifelse(int > 6, 1, 0) ) %>% collect(), tbl @@ -192,6 +194,18 @@ test_that("case_when()", { tbl ) + # with namespacing + compare_dplyr_binding( + .input %>% + filter(dplyr::case_when( + dbl + int - 1.1 == dbl2 ~ TRUE, + NA ~ NA, + TRUE ~ FALSE + ) & !is.na(dbl2)) %>% + collect(), + tbl + ) + # dplyr::case_when() errors if values on right side of formulas do not have # exactly the same type, but the Arrow case_when kernel allows compatible types expect_equal( @@ -303,6 +317,20 @@ test_that("coalesce()", { df ) + # with namespacing + compare_dplyr_binding( + .input %>% + mutate( + cw = dplyr::coalesce(w), + cz = dplyr::coalesce(z), + cwx = dplyr::coalesce(w, x), + cwxy = dplyr::coalesce(w, x, y), + cwxyz = dplyr::coalesce(w, x, y, z) + ) %>% + collect(), + df + ) + # factor df_fct <- df %>% transmute(across(everything(), ~ factor(.x, levels = c("a", "b", "c")))) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index ce804d1727f..f0543736404 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -70,6 +70,16 @@ test_that("strptime", { collect(), t_stamp_with_pm_tz ) + + expect_equal( + t_string %>% + record_batch() %>% + mutate( + x = base::strptime(x, format = "%Y-%m-%d %H:%M:%S") + ) %>% + collect(), + t_stamp_with_pm_tz + ) }) # adding a timezone to a timezone-naive timestamp works @@ -196,7 +206,10 @@ test_that("strftime", { compare_dplyr_binding( .input %>% - mutate(x = strftime(datetime, format = formats)) %>% + mutate( + x = strftime(datetime, format = formats), + x2 = base::strftime(datetime, format = formats) + ) %>% collect(), times ) @@ -280,7 +293,10 @@ test_that("format_ISO8601", { compare_dplyr_binding( .input %>% - mutate(x = format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% + mutate( + a = format_ISO8601(x, precision = "ymd", usetz = FALSE), + a2 = lubridate::format_ISO8601(x, precision = "ymd", usetz = FALSE) + ) %>% collect(), times ) @@ -340,14 +356,22 @@ test_that("is.* functions from lubridate", { # make sure all true and at least one false value is considered compare_dplyr_binding( .input %>% - mutate(x = is.POSIXct(datetime), y = is.POSIXct(integer)) %>% + mutate( + x = is.POSIXct(datetime), + y = is.POSIXct(integer), + x2 = lubridate::is.POSIXct(datetime) + ) %>% collect(), test_df ) compare_dplyr_binding( .input %>% - mutate(x = is.Date(date), y = is.Date(integer)) %>% + mutate( + x = is.Date(date), + y = is.Date(integer), + x2 = lubridate::is.Date(date) + ) %>% collect(), test_df ) @@ -368,7 +392,10 @@ test_that("is.* functions from lubridate", { mutate( x = is.timepoint(datetime), y = is.instant(date), - z = is.timepoint(integer) + z = is.timepoint(integer), + x2 = lubridate::is.timepoint(datetime), + y2 = lubridate::is.instant(date), + z2 = lubridate::is.timepoint(integer) ) %>% collect(), test_df @@ -398,7 +425,10 @@ test_that("extract isoyear from timestamp", { test_that("extract epiyear from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = epiyear(datetime)) %>% + mutate( + x = epiyear(datetime), + x2 = lubridate::epiyear(datetime) + ) %>% collect(), test_df ) @@ -416,7 +446,10 @@ test_that("extract quarter from timestamp", { test_that("extract month from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = month(datetime)) %>% + mutate( + x = month(datetime), + x2 = lubridate::month(datetime) + ) %>% collect(), test_df ) @@ -442,7 +475,10 @@ test_that("extract month from timestamp", { test_that("extract isoweek from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = isoweek(datetime)) %>% + mutate( + x = isoweek(datetime), + x2 = lubridate::isoweek(datetime) + ) %>% collect(), test_df ) @@ -460,7 +496,10 @@ test_that("extract epiweek from timestamp", { test_that("extract week from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = week(datetime)) %>% + mutate( + x = week(datetime), + x2 = lubridate::week(datetime) + ) %>% collect(), test_df ) @@ -526,7 +565,10 @@ test_that("extract mday from timestamp", { test_that("extract yday from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = yday(datetime)) %>% + mutate( + x = yday(datetime), + x2 = lubridate::yday(datetime) + ) %>% collect(), test_df ) @@ -535,7 +577,10 @@ test_that("extract yday from timestamp", { test_that("extract hour from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = hour(datetime)) %>% + mutate( + x = hour(datetime), + x2 = lubridate::hour(datetime) + ) %>% collect(), test_df ) @@ -544,7 +589,10 @@ test_that("extract hour from timestamp", { test_that("extract minute from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = minute(datetime)) %>% + mutate( + x = minute(datetime), + x2 = lubridate::minute(datetime) + ) %>% collect(), test_df ) @@ -553,7 +601,10 @@ test_that("extract minute from timestamp", { test_that("extract second from timestamp", { compare_dplyr_binding( .input %>% - mutate(x = second(datetime)) %>% + mutate( + x = second(datetime), + x2 = lubridate::second(datetime) + ) %>% collect(), test_df, # arrow supports nanosecond resolution but lubridate does not @@ -566,7 +617,10 @@ test_that("extract second from timestamp", { test_that("extract year from date", { compare_dplyr_binding( .input %>% - mutate(x = year(date)) %>% + mutate( + x = year(date), + x2 = lubridate::year(date) + ) %>% collect(), test_df ) @@ -575,7 +629,10 @@ test_that("extract year from date", { test_that("extract isoyear from date", { compare_dplyr_binding( .input %>% - mutate(x = isoyear(date)) %>% + mutate( + x = isoyear(date), + x2 = lubridate::isoyear(date) + ) %>% collect(), test_df ) @@ -593,7 +650,10 @@ test_that("extract epiyear from date", { test_that("extract quarter from date", { compare_dplyr_binding( .input %>% - mutate(x = quarter(date)) %>% + mutate( + x = quarter(date), + x2 = lubridate::quarter(date) + ) %>% collect(), test_df ) @@ -611,7 +671,10 @@ test_that("extract isoweek from date", { test_that("extract epiweek from date", { compare_dplyr_binding( .input %>% - mutate(x = epiweek(date)) %>% + mutate( + x = epiweek(date), + x2 = lubridate::epiweek(date) + ) %>% collect(), test_df ) @@ -655,7 +718,10 @@ test_that("extract month from date", { test_that("extract day from date", { compare_dplyr_binding( .input %>% - mutate(x = day(date)) %>% + mutate( + x = day(date), + x2 = lubridate::day(date) + ) %>% collect(), test_df ) @@ -671,7 +737,10 @@ test_that("extract wday from date", { compare_dplyr_binding( .input %>% - mutate(x = wday(date, week_start = 3)) %>% + mutate( + x = wday(date, week_start = 3), + x2 = lubridate::wday(date, week_start = 3) + ) %>% collect(), test_df ) @@ -703,7 +772,10 @@ test_that("extract wday from date", { test_that("extract mday from date", { compare_dplyr_binding( .input %>% - mutate(x = mday(date)) %>% + mutate( + x = mday(date), + x2 = lubridate::mday(date) + ) %>% collect(), test_df ) @@ -721,7 +793,10 @@ test_that("extract yday from date", { test_that("leap_year mirror lubridate", { compare_dplyr_binding( .input %>% - mutate(x = leap_year(date)) %>% + mutate( + x = leap_year(date), + x2 = lubridate::leap_year(date) + ) %>% collect(), test_df ) @@ -753,7 +828,9 @@ test_that("am/pm mirror lubridate", { .input %>% mutate( am = am(test_time), - pm = pm(test_time) + pm = pm(test_time), + am2 = lubridate::am(test_time), + pm2 = lubridate::pm(test_time) ) %>% collect(), data.frame( @@ -776,7 +853,10 @@ test_that("extract tz", { compare_dplyr_binding( .input %>% - mutate(timezone_posixct_date = tz(posixct_date)) %>% + mutate( + timezone_posixct_date = tz(posixct_date), + timezone_posixct_date2 = lubridate::tz(posixct_date) + ) %>% collect(), df ) @@ -819,6 +899,7 @@ test_that("semester works with temporal types and integers", { .input %>% mutate( sem_wo_year = semester(dates), + sem_wo_year2 = lubridate::semester(dates), sem_w_year = semester(dates, with_year = TRUE) ) %>% collect(), @@ -849,7 +930,10 @@ test_that("dst extracts daylight savings time correctly", { compare_dplyr_binding( .input %>% - mutate(dst = dst(dates)) %>% + mutate( + dst = dst(dates), + dst2 = lubridate::dst(dates) + ) %>% collect(), test_df ) @@ -937,15 +1021,9 @@ test_that("date works in arrow", { r_date_object <- lubridate::ymd_hms("2012-03-26 23:12:13") - # we can't (for now) use namespacing, so we need to make sure lubridate::date() - # and not base::date() is being used. This is due to the way testthat runs and - # normal use of arrow would not have to do this explicitly. - # TODO: remove after ARROW-14575 - date <- lubridate::date - compare_dplyr_binding( .input %>% - mutate(a_date = date(posixct_date)) %>% + mutate(a_date = lubridate::date(posixct_date)) %>% collect(), test_df ) @@ -959,7 +1037,7 @@ test_that("date works in arrow", { compare_dplyr_binding( .input %>% - mutate(date_from_r_object = date(r_date_object)) %>% + mutate(date_from_r_object = lubridate::date(r_date_object)) %>% collect(), test_df ) @@ -1026,7 +1104,10 @@ test_that("make_date & make_datetime", { compare_dplyr_binding( .input %>% - mutate(composed_date = make_date(year, month, day)) %>% + mutate( + composed_date = make_date(year, month, day), + composed_date2 = lubridate::make_date(year, month, day) + ) %>% collect(), test_df ) @@ -1040,7 +1121,10 @@ test_that("make_date & make_datetime", { compare_dplyr_binding( .input %>% - mutate(composed_datetime = make_datetime(year, month, day, hour, min, sec)) %>% + mutate( + composed_datetime = make_datetime(year, month, day, hour, min, sec), + composed_datetime2 = lubridate::make_datetime(year, month, day, hour, min, sec) + ) %>% collect(), test_df, # the make_datetime binding uses strptime which does not support tz, hence @@ -1074,7 +1158,10 @@ test_that("ISO_datetime & ISOdate", { compare_dplyr_binding( .input %>% - mutate(composed_date = ISOdate(year, month, day)) %>% + mutate( + composed_date = ISOdate(year, month, day), + composed_date2 = base::ISOdate(year, month, day) + ) %>% collect(), test_df, # the make_datetime binding uses strptime which does not support tz, hence @@ -1096,7 +1183,8 @@ test_that("ISO_datetime & ISOdate", { compare_dplyr_binding( .input %>% mutate( - composed_datetime = ISOdatetime(year, month, day, hour, min, sec, tz = "UTC") + composed_datetime = ISOdatetime(year, month, day, hour, min, sec, tz = "UTC"), + composed_datetime2 = base::ISOdatetime(year, month, day, hour, min, sec, tz = "UTC") ) %>% collect(), test_df, @@ -1118,7 +1206,7 @@ test_that("ISO_datetime & ISOdate", { ) }) -test_that("difftime works correctly", { +test_that("difftime()", { test_df <- tibble( time1 = as.POSIXct( c("2021-02-20", "2021-07-31 0:0:0", "2021-10-30", "2021-01-31 0:0:0") @@ -1132,7 +1220,8 @@ test_that("difftime works correctly", { compare_dplyr_binding( .input %>% mutate( - secs2 = difftime(time1, time2, units = "secs") + secs = difftime(time1, time2, units = "secs"), + secs2 = base::difftime(time1, time2, units = "secs") ) %>% collect(), test_df, @@ -1204,7 +1293,10 @@ test_that("as.difftime()", { compare_dplyr_binding( .input %>% - mutate(hms_difftime = as.difftime(hms_string, units = "secs")) %>% + mutate( + hms_difftime = as.difftime(hms_string, units = "secs"), + hms_difftime2 = base::as.difftime(hms_string, units = "secs") + ) %>% collect(), test_df ) @@ -1275,10 +1367,12 @@ test_that("`decimal_date()` and `date_decimal()`", { .input %>% mutate( decimal_date_from_POSIXct = decimal_date(b), + decimal_date_from_POSIXct2 = lubridate::decimal_date(b), decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")), decimal_date_from_r_date_obj = decimal_date(as.Date("2022-03-25")), decimal_date_from_date = decimal_date(c), date_from_decimal = date_decimal(a), + date_from_decimal2 = lubridate::date_decimal(a), date_from_decimal_r_obj = date_decimal(2022.178) ) %>% collect(), @@ -1333,7 +1427,13 @@ test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { r_obj_ddays = ddays(3), r_obj_dweeks = dweeks(4), r_obj_dmonths = dmonths(5), - r_obj_dyears = dyears(6) + r_obj_dyears = dyears(6), + r_obj_dminutes2 = lubridate::dminutes(1), + r_obj_dhours2 = lubridate::dhours(2), + r_obj_ddays2 = lubridate::ddays(3), + r_obj_dweeks2 = lubridate::dweeks(4), + r_obj_dmonths2 = lubridate::dmonths(5), + r_obj_dyears2 = lubridate::dyears(6) ) %>% collect(), tibble(), @@ -1366,6 +1466,10 @@ test_that("dseconds, dmilliseconds, dmicroseconds, dnanoseconds, dpicoseconds", dmilliseconds = dmilliseconds(x), dmicroseconds = dmicroseconds(x), dnanoseconds = dnanoseconds(x), + dseconds2 = lubridate::dseconds(x), + dmilliseconds2 = lubridate::dmilliseconds(x), + dmicroseconds2 = lubridate::dmicroseconds(x), + dnanoseconds2 = lubridate::dnanoseconds(x), ) %>% collect(), example_d, @@ -1404,6 +1508,11 @@ test_that("dseconds, dmilliseconds, dmicroseconds, dnanoseconds, dpicoseconds", "Duration in picoseconds not supported in Arrow" ) + expect_error( + call_binding("lubridate::dpicoseconds"), + "Duration in picoseconds not supported in Arrow" + ) + # double -> duration not supported in Arrow. # Error is generated in the C++ code expect_error( @@ -1448,6 +1557,14 @@ test_that("make_difftime()", { day = 2, week = 4, units = "secs" + ), + duration_from_parts2 = lubridate::make_difftime( + second = seconds, + minute = minutes, + hour = hours, + day = days, + week = weeks, + units = "secs" ) ) %>% collect(), @@ -1526,6 +1643,7 @@ test_that("`as.Date()` and `as_date()`", { .input %>% mutate( date_dv1 = as.Date(date_var), + date_dv1_nmspc = base::as.Date(date_var), date_pv1 = as.Date(posixct_var), date_pv_tz1 = as.Date(posixct_var, tz = "Pacific/Marquesas"), date_utc1 = as.Date(dt_utc), @@ -1536,6 +1654,7 @@ test_that("`as.Date()` and `as_date()`", { date_int_origin1 = as.Date(integer_var, origin = "1970-01-03"), date_integerish1 = as.Date(integerish_var, origin = "1970-01-01"), date_dv2 = as_date(date_var), + date_dv2_nmspc = lubridate::as_date(date_var), date_pv2 = as_date(posixct_var), date_pv_tz2 = as_date(posixct_var, tz = "Pacific/Marquesas"), date_utc2 = as_date(dt_utc), @@ -1668,22 +1787,11 @@ test_that("`as_datetime()`", { double_date = c(10.1, 25.2, NA) ) - test_df %>% - arrow_table() %>% - mutate( - ddate = as_datetime(date), - dchar_date_no_tz = as_datetime(char_date), - dchar_date_non_iso = as_datetime(char_date_non_iso, format = "%Y-%d-%m %H:%M:%S"), - dint_date = as_datetime(int_date, origin = "1970-01-02"), - dintegerish_date = as_datetime(integerish_date, origin = "1970-01-02"), - dintegerish_date2 = as_datetime(integerish_date, origin = "1970-01-01") - ) %>% - collect() - compare_dplyr_binding( .input %>% mutate( ddate = as_datetime(date), + ddate2 = lubridate::as_datetime(date), dchar_date_no_tz = as_datetime(char_date), dchar_date_with_tz = as_datetime(char_date, tz = "Pacific/Marquesas"), dint_date = as_datetime(int_date, origin = "1970-01-02"), @@ -1715,6 +1823,7 @@ test_that("parse_date_time() works with year, month, and date components", { .input %>% mutate( parsed_date_ymd = parse_date_time(string_ymd, orders = "ymd"), + parsed_date_ymd2 = lubridate::parse_date_time(string_ymd, orders = "ymd"), parsed_date_dmy = parse_date_time(string_dmy, orders = "dmy"), parsed_date_mdy = parse_date_time(string_mdy, orders = "mdy") ) %>% @@ -1807,7 +1916,13 @@ test_that("year, month, day date/time parsers", { mdy_date = mdy(mdy_string), myd_date = myd(myd_string), dmy_date = dmy(dmy_string), - dym_date = dym(dym_string) + dym_date = dym(dym_string), + ymd_date2 = lubridate::ymd(ymd_string), + ydm_date2 = lubridate::ydm(ydm_string), + mdy_date2 = lubridate::mdy(mdy_string), + myd_date2 = lubridate::myd(myd_string), + dmy_date2 = lubridate::dmy(dmy_string), + dym_date2 = lubridate::dym(dym_string) ) %>% collect(), test_df @@ -1849,14 +1964,17 @@ test_that("ym, my & yq parsers", { .input %>% mutate( ym_date = ym(ym_string), + ym_date2 = lubridate::ym(ym_string), ym_datetime = ym(ym_string, tz = "Pacific/Marquesas"), Ym_date = ym(Ym_string), Ym_datetime = ym(Ym_string, tz = "Pacific/Marquesas"), my_date = my(my_string), + my_date2 = lubridate::my(my_string), my_datetime = my(my_string, tz = "Pacific/Marquesas"), mY_date = my(mY_string), mY_datetime = my(mY_string, tz = "Pacific/Marquesas"), yq_date_from_string = yq(yq_string), + yq_date_from_string2 = lubridate::yq(yq_string), yq_datetime_from_string = yq(yq_string, tz = "Pacific/Marquesas"), yq_date_from_numeric = yq(yq_numeric), yq_datetime_from_numeric = yq(yq_numeric, tz = "Pacific/Marquesas"), @@ -1891,12 +2009,8 @@ test_that("lubridate's fast_strptime", { compare_dplyr_binding( .input %>% mutate( - y = - fast_strptime( - x, - format = "%Y-%m-%d %H:%M:%S", - lt = FALSE - ) + y = fast_strptime(x, format = "%Y-%m-%d %H:%M:%S", lt = FALSE), + y2 = lubridate::fast_strptime(x, format = "%Y-%m-%d %H:%M:%S", lt = FALSE) ) %>% collect(), tibble( diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index 47a9f0b7c02..5f7da452395 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -25,7 +25,9 @@ test_that("abs()", { compare_dplyr_binding( .input %>% - transmute(abs = abs(x)) %>% + transmute( + abs = abs(x), + abs2 = base::abs(x)) %>% collect(), df ) @@ -36,7 +38,10 @@ test_that("sign()", { compare_dplyr_binding( .input %>% - transmute(sign = sign(x)) %>% + transmute( + sign = sign(x), + sign2 = base::sign(x) + ) %>% collect(), df ) @@ -51,7 +56,11 @@ test_that("ceiling(), floor(), trunc(), round()", { c = ceiling(x), f = floor(x), t = trunc(x), - r = round(x) + r = round(x), + c2 = base::ceiling(x), + f2 = base::floor(x), + t2 = base::trunc(x), + r2 = base::round(x) ) %>% collect(), df @@ -141,7 +150,10 @@ test_that("log functions", { compare_dplyr_binding( .input %>% - mutate(y = log(x)) %>% + mutate( + y = log(x), + y2 = base::log(x) + ) %>% collect(), df ) @@ -248,6 +260,19 @@ test_that("log functions", { collect(), df ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate( + a = base::logb(x), + b = base::log1p(x), + c = base::log2(x), + d = base::log10(x) + ) %>% + collect(), + df + ) }) test_that("trig functions", { @@ -287,6 +312,20 @@ test_that("trig functions", { collect(), df ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate( + a = base::sin(x), + b = base::cos(x), + c = base::tan(x), + d = base::asin(x), + e = base::acos(x) + ) %>% + collect(), + df + ) }) test_that("arith functions ", { @@ -336,7 +375,10 @@ test_that("exp()", { compare_dplyr_binding( .input %>% - mutate(y = exp(x)) %>% + mutate( + y = exp(x), + y2 = base::exp(x) + ) %>% collect(), df ) @@ -347,7 +389,10 @@ test_that("sqrt()", { compare_dplyr_binding( .input %>% - mutate(y = sqrt(x)) %>% + mutate( + y = sqrt(x), + y2 = base::sqrt(x) + ) %>% collect(), df ) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index c4d54d325f4..423fe1ccd8e 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -23,6 +23,14 @@ library(lubridate) library(stringr) library(stringi) +tbl <- example_data +# Add some better string data +tbl$verses <- verses[[1]] +# c(" a ", " b ", " c ", ...) increasing padding +# nchar = 3 5 7 9 11 13 15 17 19 21 +tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both") +tbl$some_grouping <- rep(c(1, 2), 5) + test_that("paste, paste0, and str_c", { df <- tibble( v = c("A", "B", "C"), @@ -37,7 +45,10 @@ test_that("paste, paste0, and str_c", { # no NAs in data compare_dplyr_binding( .input %>% - transmute(paste(v, w)) %>% + transmute( + a = paste(v, w), + a2 = base::paste(v, w) + ) %>% collect(), df ) @@ -49,13 +60,18 @@ test_that("paste, paste0, and str_c", { ) compare_dplyr_binding( .input %>% - transmute(paste0(v, w)) %>% + transmute( + a = paste0(v, w), + a2 = base::paste0(v, w)) %>% collect(), df ) compare_dplyr_binding( .input %>% - transmute(str_c(v, w)) %>% + transmute( + a = str_c(v, w), + a2 = stringr::str_c(v, w) + ) %>% collect(), df ) @@ -236,6 +252,13 @@ test_that("grepl", { collect(), df ) + # with namespacing + compare_dplyr_binding( + .input %>% + filter(base::grepl("Foo", x, fixed = fixed)) %>% + collect(), + df + ) } }) @@ -283,7 +306,10 @@ test_that("str_detect", { ) compare_dplyr_binding( .input %>% - transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>% + transmute( + a = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE)), + a2 = stringr::str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE)) + ) %>% collect(), df ) @@ -372,6 +398,22 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", { ) }) +test_that("sub and gsub with namespacing", { + compare_dplyr_binding( + .input %>% + mutate(verses_new = base::gsub("o", "u", verses, fixed = TRUE)) %>% + collect(), + tbl + ) + + compare_dplyr_binding( + .input %>% + mutate(verses_new = base::sub("o", "u", verses, fixed = TRUE)) %>% + collect(), + tbl + ) +}) + test_that("str_replace and str_replace_all", { df <- tibble(x = c("Foo", "bar")) @@ -404,13 +446,19 @@ test_that("str_replace and str_replace_all", { ) compare_dplyr_binding( .input %>% - transmute(x = str_replace_all(x, fixed("o"), "u")) %>% + transmute( + x = str_replace_all(x, fixed("o"), "u"), + x2 = stringr::str_replace_all(x, fixed("o"), "u") + ) %>% collect(), df ) compare_dplyr_binding( .input %>% - transmute(x = str_replace(x, fixed("O"), "u")) %>% + transmute( + x = str_replace(x, fixed("O"), "u"), + x2 = stringr::str_replace(x, fixed("O"), "u") + ) %>% collect(), df ) @@ -443,14 +491,20 @@ test_that("strsplit and str_split", { ) compare_dplyr_binding( .input %>% - mutate(x = strsplit(x, " +and +")) %>% + mutate( + a = strsplit(x, " +and +"), + a2 = base::strsplit(x, " +and +") + ) %>% collect(), df, ignore_attr = TRUE ) compare_dplyr_binding( .input %>% - mutate(x = str_split(x, "and")) %>% + mutate( + a = str_split(x, "and"), + a2 = stringr::str_split(x, "and") + ) %>% collect(), df, ignore_attr = TRUE @@ -511,7 +565,10 @@ test_that("str_to_lower, str_to_upper, and str_to_title", { transmute( x_lower = str_to_lower(x), x_upper = str_to_upper(x), - x_title = str_to_title(x) + x_title = str_to_title(x), + x_lower_nmspc = stringr::str_to_lower(x), + x_upper_nmspc = stringr::str_to_upper(x), + x_title_nmspc = stringr::str_to_title(x) ) %>% collect(), df @@ -802,6 +859,14 @@ test_that("str_like", { collect(), tibble(x = c(FALSE, FALSE)) ) + # with namespacing + expect_equal( + df %>% + Table$create() %>% + mutate(x = stringr::str_like(x, "baz")) %>% + collect(), + tibble(x = c(FALSE, FALSE)) + ) # Match - entire string expect_equal( @@ -882,7 +947,10 @@ test_that("str_pad", { compare_dplyr_binding( .input %>% - mutate(x = str_pad(x, width = 31, side = "both")) %>% + mutate( + a = str_pad(x, width = 31, side = "both"), + a2 = stringr::str_pad(x, width = 31, side = "both") + ) %>% collect(), df ) @@ -949,7 +1017,10 @@ test_that("substr", { compare_dplyr_binding( .input %>% - mutate(y = substr(x, -5, -1)) %>% + mutate( + y = substr(x, -5, -1), + y2 = base::substr(x, -5, -1) + ) %>% collect(), df ) @@ -972,7 +1043,10 @@ test_that("substring", { compare_dplyr_binding( .input %>% - mutate(y = substring(x, 1, 6)) %>% + mutate( + y = substring(x, 1, 6), + y2 = base::substring(x, 1, 6) + ) %>% collect(), df ) @@ -1046,7 +1120,10 @@ test_that("str_sub", { compare_dplyr_binding( .input %>% - mutate(y = str_sub(x, -5, -1)) %>% + mutate( + y = str_sub(x, -5, -1), + y2 = stringr::str_sub(x, -5, -1) + ) %>% collect(), df ) @@ -1097,6 +1174,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { .input %>% transmute( a = str_starts(x, "b.*"), + a2 = stringr::str_starts(x, "b.*"), b = str_starts(x, "b.*", negate = TRUE), c = str_starts(x, fixed("b")), d = str_starts(x, fixed("b"), negate = TRUE) @@ -1137,6 +1215,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { .input %>% transmute( a = str_ends(x, "r"), + a2 = stringr::str_ends(x, "r"), b = str_ends(x, "r", negate = TRUE), c = str_ends(x, fixed("r")), d = str_ends(x, fixed("r"), negate = TRUE) @@ -1144,6 +1223,7 @@ test_that("str_starts, str_ends, startsWith, endsWith", { collect(), df ) + compare_dplyr_binding( .input %>% filter(startsWith(x, "b")) %>% @@ -1176,7 +1256,9 @@ test_that("str_starts, str_ends, startsWith, endsWith", { .input %>% transmute( a = startsWith(x, "b"), - b = endsWith(x, "r") + b = endsWith(x, "r"), + a2 = base::startsWith(x, "b"), + b2 = base::endsWith(x, "r") ) %>% collect(), df @@ -1191,7 +1273,10 @@ test_that("str_count", { compare_dplyr_binding( .input %>% - mutate(a_count = str_count(cities, pattern = "a")) %>% + mutate( + a_count = str_count(cities, pattern = "a"), + a_count_nmspc = stringr::str_count(cities, pattern = "a") + ) %>% collect(), df ) @@ -1242,3 +1327,73 @@ test_that("str_count", { df ) }) + +test_that("base::tolower and base::toupper", { + compare_dplyr_binding( + .input %>% + mutate( + verse_to_upper = toupper(verses), + verse_to_lower = tolower(verses), + verse_to_upper_nmspc = base::toupper(verses), + verse_to_lower_nmspc = base::tolower(verses) + ) %>% + collect(), + tbl + ) +}) + +test_that("namespaced unary and binary string functions", { + # str_length and stringi::stri_reverse + compare_dplyr_binding( + .input %>% + mutate( + verse_length = stringr::str_length(verses), + reverses_verse = stringi::stri_reverse(verses) + ) %>% + collect(), + tbl + ) + + # stringr::str_dup and base::strrep + df <- tibble(x = c("foo1", " \tB a R\n", "!apACHe aRroW!")) + for (times in 0:8) { + compare_dplyr_binding( + .input %>% + mutate(x = base::strrep(x, times)) %>% + collect(), + df + ) + + compare_dplyr_binding( + .input %>% + mutate(x = stringr::str_dup(x, times)) %>% + collect(), + df + ) + } +}) + +test_that("nchar with namespacing", { + compare_dplyr_binding( + .input %>% + mutate(verses_nchar = base::nchar(verses)) %>% + collect(), + tbl + ) +}) + +test_that("str_trim()", { + compare_dplyr_binding( + .input %>% + mutate( + left_trim_padded_string = str_trim(padded_strings, "left"), + right_trim_padded_string = str_trim(padded_strings, "right"), + both_trim_padded_string = str_trim(padded_strings, "both"), + left_trim_padded_string_nmspc = stringr::str_trim(padded_strings, "left"), + right_trim_padded_string_nmspc = stringr::str_trim(padded_strings, "right"), + both_trim_padded_string_nmspc = stringr::str_trim(padded_strings, "both") + ) %>% + collect(), + tbl + ) +}) diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index b32fe8f7f88..3f274b97f7f 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -119,6 +119,10 @@ test_that("explicit type conversions with as.*()", { chr2dbl = as.double(chr), chr2int = as.integer(chr), chr2num = as.numeric(chr), + chr2chr2 = base::as.character(chr), + chr2dbl2 = base::as.double(chr), + chr2int2 = base::as.integer(chr), + chr2num2 = base::as.numeric(chr), rchr2chr = as.character("string"), rchr2dbl = as.double("1.5"), rchr2int = as.integer("1"), @@ -131,6 +135,7 @@ test_that("explicit type conversions with as.*()", { .input %>% transmute( chr2i64 = as.integer64(chr), + chr2i64_nmspc = bit64::as.integer64(chr), dbl2i64 = as.integer64(dbl), i642i64 = as.integer64(i64), rchr2i64 = as.integer64("10000000000"), @@ -144,6 +149,7 @@ test_that("explicit type conversions with as.*()", { .input %>% transmute( chr2lgl = as.logical(chr), + chr2lgl2 = base::as.logical(chr), dbl2lgl = as.logical(dbl), int2lgl = as.logical(int), rchr2lgl = as.logical("TRUE"), @@ -208,7 +214,9 @@ test_that("is.finite(), is.infinite(), is.nan()", { .input %>% transmute( is_fin = is.finite(x), - is_inf = is.infinite(x) + is_inf = is.infinite(x), + is_fin2 = base::is.finite(x), + is_inf2 = base::is.infinite(x) ) %>% collect(), df @@ -217,7 +225,8 @@ test_that("is.finite(), is.infinite(), is.nan()", { compare_dplyr_binding( .input %>% transmute( - is_nan = is.nan(x) + is_nan = is.nan(x), + is_nan2 = base::is.nan(x) ) %>% collect(), df @@ -229,7 +238,8 @@ test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", { compare_dplyr_binding( .input %>% transmute( - is_na = is.na(x) + is_na = is.na(x), + is_na2 = base::is.na(x) ) %>% collect(), df @@ -246,40 +256,41 @@ test_that("type checks with is() giving Arrow types", { dec256 = Array$create(pi)$cast(decimal256(3, 2)), f64 = Array$create(1.1, float64()), str = Array$create("a", arrow::string()) - ) %>% transmute( - i32_is_i32 = is(i32, int32()), - i32_is_dec = is(i32, decimal(3, 2)), - i32_is_dec128 = is(i32, decimal128(3, 2)), - i32_is_dec256 = is(i32, decimal256(3, 2)), - i32_is_i64 = is(i32, float64()), - i32_is_str = is(i32, arrow::string()), - dec_is_i32 = is(dec, int32()), - dec_is_dec = is(dec, decimal(3, 2)), - dec_is_dec128 = is(dec, decimal128(3, 2)), - dec_is_dec256 = is(dec, decimal256(3, 2)), - dec_is_i64 = is(dec, float64()), - dec_is_str = is(dec, arrow::string()), - dec128_is_i32 = is(dec128, int32()), - dec128_is_dec128 = is(dec128, decimal128(3, 2)), - dec128_is_dec256 = is(dec128, decimal256(3, 2)), - dec128_is_i64 = is(dec128, float64()), - dec128_is_str = is(dec128, arrow::string()), - dec256_is_i32 = is(dec128, int32()), - dec256_is_dec128 = is(dec128, decimal128(3, 2)), - dec256_is_dec256 = is(dec128, decimal256(3, 2)), - dec256_is_i64 = is(dec128, float64()), - dec256_is_str = is(dec128, arrow::string()), - f64_is_i32 = is(f64, int32()), - f64_is_dec = is(f64, decimal(3, 2)), - f64_is_dec128 = is(f64, decimal128(3, 2)), - f64_is_dec256 = is(f64, decimal256(3, 2)), - f64_is_i64 = is(f64, float64()), - f64_is_str = is(f64, arrow::string()), - str_is_i32 = is(str, int32()), - str_is_dec128 = is(str, decimal128(3, 2)), - str_is_dec256 = is(str, decimal256(3, 2)), - str_is_i64 = is(str, float64()), - str_is_str = is(str, arrow::string()) + ) %>% + transmute( + i32_is_i32 = is(i32, int32()), + i32_is_dec = is(i32, decimal(3, 2)), + i32_is_dec128 = is(i32, decimal128(3, 2)), + i32_is_dec256 = is(i32, decimal256(3, 2)), + i32_is_f64 = is(i32, float64()), + i32_is_str = is(i32, string()), + dec_is_i32 = is(dec, int32()), + dec_is_dec = is(dec, decimal(3, 2)), + dec_is_dec128 = is(dec, decimal128(3, 2)), + dec_is_dec256 = is(dec, decimal256(3, 2)), + dec_is_f64 = is(dec, float64()), + dec_is_str = is(dec, string()), + dec128_is_i32 = is(dec128, int32()), + dec128_is_dec128 = is(dec128, decimal128(3, 2)), + dec128_is_dec256 = is(dec128, decimal256(3, 2)), + dec128_is_f64 = is(dec128, float64()), + dec128_is_str = is(dec128, string()), + dec256_is_i32 = is(dec128, int32()), + dec256_is_dec128 = is(dec128, decimal128(3, 2)), + dec256_is_dec256 = is(dec128, decimal256(3, 2)), + dec256_is_f64 = is(dec128, float64()), + dec256_is_str = is(dec128, string()), + f64_is_i32 = is(f64, int32()), + f64_is_dec = is(f64, decimal(3, 2)), + f64_is_dec128 = is(f64, decimal128(3, 2)), + f64_is_dec256 = is(f64, decimal256(3, 2)), + f64_is_f64 = is(f64, float64()), + f64_is_str = is(f64, string()), + str_is_i32 = is(str, int32()), + str_is_dec128 = is(str, decimal128(3, 2)), + str_is_dec256 = is(str, decimal256(3, 2)), + str_is_i64 = is(str, float64()), + str_is_str = is(str, string()) ) %>% collect() %>% t() %>% @@ -300,6 +311,9 @@ test_that("type checks with is() giving Arrow types", { i32_is_i32 = is(i32, "int32"), i32_is_i64 = is(i32, "double"), i32_is_str = is(i32, "string"), + i32_is_i32_nmspc = methods::is(i32, "int32"), + i32_is_i64_nmspc = methods::is(i32, "double"), + i32_is_str_nmspc = methods::is(i32, "string"), f64_is_i32 = is(f64, "int32"), f64_is_i64 = is(f64, "double"), f64_is_str = is(f64, "string"), @@ -310,7 +324,7 @@ test_that("type checks with is() giving Arrow types", { collect() %>% t() %>% as.vector(), - c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE) + c(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE) ) # with class2=string alias expect_equal( @@ -443,6 +457,14 @@ test_that("type checks with is.*()", { chr_is_lst = is.list(chr), chr_is_lgl = is.logical(chr), chr_is_num = is.numeric(chr), + chr_is_chr2 = base::is.character(chr), + chr_is_dbl2 = base::is.double(chr), + chr_is_fct2 = base::is.factor(chr), + chr_is_int2 = base::is.integer(chr), + chr_is_i642 = bit64::is.integer64(chr), + chr_is_lst2 = base::is.list(chr), + chr_is_lgl2 = base::is.logical(chr), + chr_is_num2 = base::is.numeric(chr), dbl_is_chr = is.character(dbl), dbl_is_dbl = is.double(dbl), dbl_is_fct = is.factor(dbl), @@ -519,6 +541,11 @@ test_that("type checks with is_*()", { chr_is_int = is_integer(chr), chr_is_lst = is_list(chr), chr_is_lgl = is_logical(chr), + chr_is_chr2 = rlang::is_character(chr), + chr_is_dbl2 = rlang::is_double(chr), + chr_is_int2 = rlang::is_integer(chr), + chr_is_lst2 = rlang::is_list(chr), + chr_is_lgl2 = rlang::is_logical(chr), dbl_is_chr = is_character(dbl), dbl_is_dbl = is_double(dbl), dbl_is_int = is_integer(dbl), @@ -599,7 +626,10 @@ test_that("as.factor()/dictionary_encode()", { compare_dplyr_binding( .input %>% - transmute(x = as.factor(x)) %>% + transmute( + x = as.factor(x), + x2 = base::as.factor(x) + ) %>% collect(), df1 ) @@ -689,6 +719,10 @@ test_that("structs/nested data frames/tibbles can be created", { df_col = tibble( regular_col1 = regular_col1, regular_col2 = regular_col2 + ), + df_col2 = tibble::tibble( + regular_col1 = regular_col1, + regular_col2 = regular_col2 ) ) %>% collect(), @@ -755,10 +789,14 @@ test_that("structs/nested data frames/tibbles can be created", { compare_dplyr_binding( .input %>% transmute( - df_col = data.frame(regular_col1, regular_col1, check.names = FALSE) + df_col = data.frame(regular_col1, regular_col1, check.names = FALSE), + df_col2 = base::data.frame(regular_col1, regular_col1, check.names = FALSE) ) %>% collect() %>% - mutate(df_col = as.data.frame(df_col)), + mutate( + df_col = as.data.frame(df_col), + df_col2 = as.data.frame(df_col2) + ), df ) @@ -822,7 +860,10 @@ test_that("format date/time", { compare_dplyr_binding( .input %>% - mutate(x = format(datetime, format = formats)) %>% + mutate( + x = format(datetime, format = formats), + x2 = base::format(datetime, format = formats) + ) %>% collect(), times ) diff --git a/r/tests/testthat/test-dplyr-funcs.R b/r/tests/testthat/test-dplyr-funcs.R index d96b4b2cf87..2156ad9af06 100644 --- a/r/tests/testthat/test-dplyr-funcs.R +++ b/r/tests/testthat/test-dplyr-funcs.R @@ -18,22 +18,29 @@ test_that("register_binding() works", { fake_registry <- new.env(parent = emptyenv()) fun1 <- function() NULL + fun2 <- function() "Hello" - expect_null(register_binding("some_fun", fun1, fake_registry)) + expect_null(register_binding("some.pkg::some_fun", fun1, fake_registry)) expect_identical(fake_registry$some_fun, fun1) + expect_identical(fake_registry$`some.pkg::some_fun`, fun1) - expect_identical(register_binding("some_fun", NULL, fake_registry), fun1) - expect_false("some_fun" %in% names(fake_registry)) - expect_silent(expect_null(register_binding("some_fun", NULL, fake_registry))) + expect_identical(register_binding("some.pkg::some_fun", NULL, fake_registry), fun1) + expect_silent(expect_null(register_binding("some.pkg::some_fun", NULL, fake_registry))) - expect_null(register_binding("some_pkg::some_fun", fun1, fake_registry)) + expect_null(register_binding("somePkg::some_fun", fun1, fake_registry)) expect_identical(fake_registry$some_fun, fun1) + + expect_warning( + register_binding("some.pkg2::some_fun", fun2, fake_registry), + "A \"some_fun\" binding already exists in the registry and will be overwritten." + ) }) test_that("register_binding_agg() works", { fake_registry <- new.env(parent = emptyenv()) fun1 <- function() NULL - expect_null(register_binding_agg("some_fun", fun1, fake_registry)) + expect_null(register_binding_agg("somePkg::some_fun", fun1, fake_registry)) expect_identical(fake_registry$some_fun, fun1) + expect_identical(fake_registry$`somePkg::some_fun`, fun1) }) diff --git a/r/tests/testthat/test-dplyr-glimpse.R b/r/tests/testthat/test-dplyr-glimpse.R index 9deb9087b17..c93273bdeef 100644 --- a/r/tests/testthat/test-dplyr-glimpse.R +++ b/r/tests/testthat/test-dplyr-glimpse.R @@ -17,7 +17,7 @@ # The glimpse output for tests with `example_data` is different on R < 3.6 # because the `lgl` column is generated with `sample()` and the RNG -# algorithm is different in older R versions. +# algorithm is different in older R versions. skip_on_r_older_than("3.6") library(dplyr, warn.conflicts = FALSE) diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R index a4e558a80b8..08d6a77d3d1 100644 --- a/r/tests/testthat/test-dplyr-group-by.R +++ b/r/tests/testthat/test-dplyr-group-by.R @@ -156,3 +156,15 @@ test_that("group_by with .drop", { example_with_logical_factors ) }) + +test_that("group_by() with namespaced functions", { + compare_dplyr_binding( + .input %>% + group_by(int > base::sqrt(25)) %>% + summarise(mean(dbl, na.rm = TRUE)) %>% + # group order is different from dplyr, hence reordering + arrange(`int > base::sqrt(25)`) %>% + collect(), + tbl + ) +}) diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index beb893afec7..66e3b4edf0d 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -144,9 +144,12 @@ test_that("transmute() defuses dots arguments (ARROW-13262)", { expect_warning( tbl %>% Table$create() %>% - transmute(stringr::str_c(chr, chr)) %>% + transmute( + a = stringr::str_c(padded_strings, padded_strings), + b = stringr::str_squish(a) + ) %>% collect(), - "Expression stringr::str_c(chr, chr) not supported in Arrow; pulling data into R", + "Expression stringr::str_squish(a) not supported in Arrow; pulling data into R", fixed = TRUE ) }) @@ -528,7 +531,11 @@ test_that("mutate and pmin/pmax", { max_val_1 = pmax(val1, val2, val3), max_val_2 = pmax(val1, val2, val3, na.rm = TRUE), min_val_1 = pmin(val1, val2, val3), - min_val_2 = pmin(val1, val2, val3, na.rm = TRUE) + min_val_2 = pmin(val1, val2, val3, na.rm = TRUE), + max_val_1_nmspc = base::pmax(val1, val2, val3), + max_val_2_nmspc = base::pmax(val1, val2, val3, na.rm = TRUE), + min_val_1_nmspc = base::pmin(val1, val2, val3), + min_val_2_nmspc = base::pmin(val1, val2, val3, na.rm = TRUE) ) %>% collect(), df @@ -544,3 +551,41 @@ test_that("mutate and pmin/pmax", { df ) }) + +test_that("mutate() and transmute() with namespaced functions", { + compare_dplyr_binding( + .input %>% + mutate( + a = base::round(dbl) + base::log(int) + ) %>% + collect(), + tbl + ) + compare_dplyr_binding( + .input %>% + transmute( + a = base::round(dbl) + base::log(int) + ) %>% + collect(), + tbl + ) + + # str_detect binding depends on RE2 + skip_if_not_available("re2") + compare_dplyr_binding( + .input %>% + mutate( + b = stringr::str_detect(verses, "ur") + ) %>% + collect(), + tbl + ) + compare_dplyr_binding( + .input %>% + transmute( + b = stringr::str_detect(verses, "ur") + ) %>% + collect(), + tbl + ) +}) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 5ad7425ee87..c2207a1f273 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -103,7 +103,10 @@ test_that("Group by mean on dataset", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(mean = mean(int, na.rm = FALSE)) %>% + summarize( + mean = mean(int, na.rm = FALSE), + mean2 = base::mean(int, na.rm = TRUE) + ) %>% collect(), tbl ) @@ -121,7 +124,10 @@ test_that("Group by sd on dataset", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(sd = sd(int, na.rm = FALSE)) %>% + summarize( + sd = sd(int, na.rm = FALSE), + sd2 = stats::sd(int, na.rm = TRUE) + ) %>% collect(), tbl ) @@ -139,7 +145,10 @@ test_that("Group by var on dataset", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(var = var(int, na.rm = FALSE)) %>% + summarize( + var = var(int, na.rm = FALSE), + var2 = stats::var(int, na.rm = TRUE) + ) %>% collect(), tbl ) @@ -156,7 +165,10 @@ test_that("n()", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(counts = n()) %>% + summarize( + counts = n(), + counts2 = dplyr::n() + ) %>% arrange(some_grouping) %>% collect(), tbl @@ -167,14 +179,20 @@ test_that("Group by any/all", { compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(any(lgl, na.rm = TRUE)) %>% + summarize( + any(lgl, na.rm = TRUE), + base::any(lgl, na.rm = TRUE) + ) %>% collect(), tbl ) compare_dplyr_binding( .input %>% group_by(some_grouping) %>% - summarize(all(lgl, na.rm = TRUE)) %>% + summarize( + all(lgl, na.rm = TRUE), + base::all(lgl, na.rm = TRUE) + ) %>% collect(), tbl ) @@ -219,7 +237,7 @@ test_that("Group by any/all", { }) test_that("n_distinct() on dataset", { - # With groupby + # With group_by compare_dplyr_binding( .input %>% group_by(some_grouping) %>% @@ -243,7 +261,10 @@ test_that("n_distinct() on dataset", { ) compare_dplyr_binding( .input %>% - summarize(distinct = n_distinct(lgl, na.rm = TRUE)) %>% + summarize( + distinct = n_distinct(lgl, na.rm = TRUE), + distinct2 = dplyr::n_distinct(lgl, na.rm = TRUE) + ) %>% collect(), tbl ) @@ -343,6 +364,8 @@ test_that("median()", { summarize( med_dbl = median(dbl), med_int = as.double(median(int)), + med_dbl2 = stats::median(dbl), + med_int2 = base::as.double(stats::median(int)), med_dbl_narmf = median(dbl, FALSE), med_int_narmf = as.double(median(int, na.rm = FALSE)) ) %>% @@ -459,6 +482,35 @@ test_that("quantile()", { ) }) +test_that("quantile() with namespacing", { + suppressWarnings( + expect_warning( + expect_equal( + tbl %>% + group_by(some_grouping) %>% + summarize( + q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE), + q_int = as.double( + quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE) + ) + ) %>% + arrange(some_grouping), + Table$create(tbl) %>% + group_by(some_grouping) %>% + summarize( + q_dbl = stats::quantile(dbl, probs = 0.5, na.rm = TRUE), + q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE)) + ) %>% + arrange(some_grouping) %>% + collect() + ), + "quantile() currently returns an approximate quantile in Arrow", + fixed = TRUE + ), + classes = "arrow.quantile.approximate" + ) +}) + test_that("summarize() with min() and max()", { compare_dplyr_binding( .input %>% @@ -491,7 +543,9 @@ test_that("summarize() with min() and max()", { select(int) %>% summarize( min_int = min(int, na.rm = TRUE), - max_int = max(int, na.rm = TRUE) + max_int = max(int, na.rm = TRUE), + min_int2 = base::min(int, na.rm = TRUE), + max_int2 = base::max(int, na.rm = TRUE) ) %>% collect(), tbl, @@ -999,3 +1053,28 @@ test_that("summarise() can handle scalars and literal values", { tibble(y = 2L) ) }) + +test_that("summarise() supports namespacing", { + compare_dplyr_binding( + .input %>% + summarize(total = base::sum(int, na.rm = TRUE)) %>% + collect(), + tbl + ) + compare_dplyr_binding( + .input %>% + summarise( + log_total = sum(base::log(int) + 1, na.rm = TRUE) + ) %>% + collect(), + tbl + ) + compare_dplyr_binding( + .input %>% + summarise( + log_total = base::round(base::sum(base::log(int) + dbl, na.rm = TRUE)) + ) %>% + collect(), + tbl + ) +}) diff --git a/r/tests/testthat/test-util.R b/r/tests/testthat/test-util.R index 20fdedf3e12..15aece7c3fe 100644 --- a/r/tests/testthat/test-util.R +++ b/r/tests/testthat/test-util.R @@ -39,3 +39,34 @@ test_that("as_writable_table() errors for invalid input", { # make sure other errors make it through expect_snapshot_error(wrapper_fun(data.frame(x = I(list(1, "a"))))) }) + +test_that("all_funs() identifies namespace-qualified and unqualified functions", { + expect_equal( + all_funs(rlang::quo(pkg::fun())), + "pkg::fun" + ) + expect_equal( + all_funs(rlang::quo(pkg::fun(other_pkg::obj))), + "pkg::fun" + ) + expect_equal( + all_funs(rlang::quo(other_fun(pkg::fun()))), + c("other_fun", "pkg::fun") + ) + expect_equal( + all_funs(rlang::quo(other_pkg::other_fun(pkg::fun()))), + c("other_pkg::other_fun", "pkg::fun") + ) + expect_equal( + all_funs(rlang::quo(other_pkg::other_fun(pkg::fun(sum(base::log()))))), + c("other_pkg::other_fun", "pkg::fun", "sum", "base::log") + ) + expect_equal( + all_funs(rlang::quo(other_fun(fun(sum(log()))))), + c("other_fun", "fun", "sum", "log") + ) + expect_equal( + all_funs(rlang::quo(other_fun(fun(sum(base::log()))))), + c("other_fun", "fun", "sum", "base::log") + ) +}) diff --git a/r/vignettes/developers/bindings.Rmd b/r/vignettes/developers/bindings.Rmd index 95dc5c9f61e..efe729c5f5c 100644 --- a/r/vignettes/developers/bindings.Rmd +++ b/r/vignettes/developers/bindings.Rmd @@ -191,11 +191,11 @@ As `startsWith()` requires options, direct mapping is not appropriate. If the function cannot be mapped directly, some extra work may be needed to ensure that calling the arrow version of the function results in the same result as calling the R version of the function. In this case, the function will need -adding to the `nse_funcs` list in `arrow/r/R/dplyr-functions.R`. Here is how -this might look for `startsWith()`: +adding to the `nse_funcs` function registry. Here is how this might look for +`startsWith()`: ```{r, eval = FALSE} -register_binding("startsWith", function(x, prefix) { +register_binding("base::startsWith", function(x, prefix) { Expression$create( "starts_with", x, @@ -211,6 +211,15 @@ closest analog to the function whose binding is being defined and define the new binding in a similar location. For example, the binding for `startsWith()` is registered in `dplyr-funcs-string.R` next to the binding for `endsWith()`. +Note: we use the namespace-qualified name (i.e. `"base::startsWith"`) for a +binding. This will register the same binding both as `startsWith()` and as +`base::startsWith()`, which will allow us to use the `pkg::` prefix in a call. + +```{r} +arrow_table(starwars) %>% + filter(stringr::str_detect(name, "Darth")) +``` + Hint: you can use `call_function()` to call a compute function directly from R. This might be useful if you want to experiment with a compute function while you're writing bindings for it, e.g.