diff --git a/r/NEWS.md b/r/NEWS.md index 98a7e114294..9083249a99b 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -24,6 +24,7 @@ * component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator), `date()` (extract date), `epiyear()` (epiyear), improvements to `month()`, which now works with integer inputs. * Added `make_date()` & `make_datetime()` + `ISOdatetime()` & `ISOdate()` to create date-times from numeric representations. * Added `decimal_date()` and `date_decimal()` + * Added `make_difftime()` (duration constructor) * date-time functionality: * Added `difftime` and `as.difftime()` * Added `as.Date()` to convert to date diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 3b03477cc81..7657a79271b 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -353,6 +353,36 @@ register_bindings_duration <- function() { }) } +register_bindings_difftime_constructors <- function() { + register_binding("make_difftime", function(num = NULL, + units = "secs", + ...) { + if (units != "secs") { + abort("`make_difftime()` with units other than 'secs' not supported in Arrow") + } + + chunks <- list(...) + + # lubridate concatenates durations passed via the `num` argument with those + # passed via `...` resulting in a vector of length 2 - which is virtually + # unusable in a dplyr pipeline. Arrow errors in this situation + if (!is.null(num) && length(chunks) > 0) { + abort("`make_difftime()` with both `num` and `...` not supported in Arrow") + } + + if (!is.null(num)) { + # build duration from num if present + duration <- num + } else { + # build duration from chunks when nothing is passed via ... + duration <- duration_from_chunks(chunks) + } + + duration <- build_expr("cast", duration, options = cast_options(to_type = int64())) + duration$cast(duration("s")) + }) +} + make_duration <- function(x, unit) { x <- build_expr("cast", x, options = cast_options(to_type = int64())) x$cast(duration(unit)) @@ -397,3 +427,39 @@ binding_format_datetime <- function(x, format = "", tz = "", usetz = FALSE) { build_expr("strftime", x, options = list(format = format, locale = Sys.getlocale("LC_TIME"))) } + +# this is a helper function used for creating a difftime / duration objects from +# several of the accepted pieces (second, minute, hour, day, week) +duration_from_chunks <- function(chunks) { + accepted_chunks <- c("second", "minute", "hour", "day", "week") + matched_chunks <- accepted_chunks[pmatch(names(chunks), accepted_chunks, duplicates.ok = TRUE)] + + if (any(is.na(matched_chunks))) { + abort( + paste0( + "named `difftime` units other than: ", + oxford_paste(accepted_chunks, quote_symbol = "`"), + " not supported in Arrow. \nInvalid `difftime` parts: ", + oxford_paste(names(chunks[is.na(matched_chunks)]), quote_symbol = "`") + ) + ) + } + + matched_chunks <- matched_chunks[!is.na(matched_chunks)] + + chunks <- chunks[matched_chunks] + chunk_duration <- c( + "second" = 1L, + "minute" = 60L, + "hour" = 3600L, + "day" = 86400L, + "week" = 604800L + ) + + # transform the duration of each chunk in seconds and add everything together + duration <- 0 + for (chunk in names(chunks)) { + duration <- duration + chunks[[chunk]] * chunk_duration[[chunk]] + } + duration +} diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index eaa6ed9dc67..c66ed04893d 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -106,6 +106,7 @@ create_binding_cache <- function() { register_bindings_aggregate() register_bindings_conditional() register_bindings_datetime() + register_bindings_difftime_constructors() register_bindings_duration() register_bindings_duration_helpers() register_bindings_math() diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index fc030779ecb..b1ff505004b 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -1304,3 +1304,92 @@ test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", { ignore_attr = TRUE ) }) + +test_that("make_difftime()", { + test_df <- tibble( + seconds = c(3, 4, 5, 6), + minutes = c(1.5, 2.3, 4.5, 6.7), + hours = c(2, 3, 4, 5), + days = c(6, 7, 8, 9), + weeks = c(1, 3, 5, NA), + number = 10:13 + ) + + compare_dplyr_binding( + .input %>% + mutate( + duration_from_parts = make_difftime( + second = seconds, + minute = minutes, + hour = hours, + day = days, + week = weeks, + units = "secs" + ), + duration_from_num = make_difftime( + num = number, + units = "secs" + ), + duration_from_r_num = make_difftime( + num = 154, + units = "secs" + ), + duration_from_r_parts = make_difftime( + minute = 45, + day = 2, + week = 4, + units = "secs" + ) + ) %>% + collect(), + test_df + ) + + # named difftime parts other than `second`, `minute`, `hour`, `day` and `week` + # are not supported + expect_error( + expect_warning( + test_df %>% + arrow_table() %>% + mutate( + err_difftime = make_difftime(month = 2) + ) %>% + collect(), + paste0("named `difftime` units other than: `second`, `minute`, `hour`,", + " `day`, and `week` not supported in Arrow.") + ) + ) + + # units other than "secs" not supported since they are the only ones in common + # between R and Arrow + compare_dplyr_binding( + .input %>% + mutate(error_difftime = make_difftime(num = number, units = "mins")) %>% + collect(), + test_df, + warning = TRUE + ) + + # constructing a difftime from both `num` and parts passed through `...` while + # possible with the lubridate function (resulting in a concatenation of the 2 + # resulting objects), it errors in a dplyr context + expect_error( + expect_warning( + test_df %>% + arrow_table() %>% + mutate( + duration_from_num_and_parts = make_difftime( + num = number, + second = seconds, + minute = minutes, + hour = hours, + day = days, + week = weeks, + units = "secs" + ) + ) %>% + collect(), + "with both `num` and `...` not supported in Arrow" + ) + ) +})