Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator), `date()` (extract date), `epiyear()` (epiyear), improvements to `month()`, which now works with integer inputs.
* Added `make_date()` & `make_datetime()` + `ISOdatetime()` & `ISOdate()` to create date-times from numeric representations.
* Added `decimal_date()` and `date_decimal()`
* Added `make_difftime()` (duration constructor)
* date-time functionality:
* Added `difftime` and `as.difftime()`
* Added `as.Date()` to convert to date
Expand Down
66 changes: 66 additions & 0 deletions r/R/dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,36 @@ register_bindings_duration <- function() {
})
}

register_bindings_difftime_constructors <- function() {
register_binding("make_difftime", function(num = NULL,
units = "secs",
...) {
if (units != "secs") {
abort("`make_difftime()` with units other than 'secs' not supported in Arrow")
}

chunks <- list(...)

# lubridate concatenates durations passed via the `num` argument with those
# passed via `...` resulting in a vector of length 2 - which is virtually
# unusable in a dplyr pipeline. Arrow errors in this situation
if (!is.null(num) && length(chunks) > 0) {
abort("`make_difftime()` with both `num` and `...` not supported in Arrow")
}

if (!is.null(num)) {
# build duration from num if present
duration <- num
} else {
# build duration from chunks when nothing is passed via ...
duration <- duration_from_chunks(chunks)
}

duration <- build_expr("cast", duration, options = cast_options(to_type = int64()))
duration$cast(duration("s"))
})
}

make_duration <- function(x, unit) {
x <- build_expr("cast", x, options = cast_options(to_type = int64()))
x$cast(duration(unit))
Expand Down Expand Up @@ -397,3 +427,39 @@ binding_format_datetime <- function(x, format = "", tz = "", usetz = FALSE) {

build_expr("strftime", x, options = list(format = format, locale = Sys.getlocale("LC_TIME")))
}

# this is a helper function used for creating a difftime / duration objects from
# several of the accepted pieces (second, minute, hour, day, week)
duration_from_chunks <- function(chunks) {
accepted_chunks <- c("second", "minute", "hour", "day", "week")
matched_chunks <- accepted_chunks[pmatch(names(chunks), accepted_chunks, duplicates.ok = TRUE)]

if (any(is.na(matched_chunks))) {
abort(
paste0(
"named `difftime` units other than: ",
oxford_paste(accepted_chunks, quote_symbol = "`"),
" not supported in Arrow. \nInvalid `difftime` parts: ",
oxford_paste(names(chunks[is.na(matched_chunks)]), quote_symbol = "`")
)
)
}

matched_chunks <- matched_chunks[!is.na(matched_chunks)]

chunks <- chunks[matched_chunks]
chunk_duration <- c(
"second" = 1L,
"minute" = 60L,
"hour" = 3600L,
"day" = 86400L,
"week" = 604800L
)

# transform the duration of each chunk in seconds and add everything together
duration <- 0
for (chunk in names(chunks)) {
duration <- duration + chunks[[chunk]] * chunk_duration[[chunk]]
}
duration
}
1 change: 1 addition & 0 deletions r/R/dplyr-funcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ create_binding_cache <- function() {
register_bindings_aggregate()
register_bindings_conditional()
register_bindings_datetime()
register_bindings_difftime_constructors()
register_bindings_duration()
register_bindings_duration_helpers()
register_bindings_math()
Expand Down
89 changes: 89 additions & 0 deletions r/tests/testthat/test-dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -1304,3 +1304,92 @@ test_that("dminutes, dhours, ddays, dweeks, dmonths, dyears", {
ignore_attr = TRUE
)
})

test_that("make_difftime()", {
test_df <- tibble(
seconds = c(3, 4, 5, 6),
minutes = c(1.5, 2.3, 4.5, 6.7),
hours = c(2, 3, 4, 5),
days = c(6, 7, 8, 9),
weeks = c(1, 3, 5, NA),
number = 10:13
)

compare_dplyr_binding(
.input %>%
mutate(
duration_from_parts = make_difftime(
second = seconds,
minute = minutes,
hour = hours,
day = days,
week = weeks,
units = "secs"
),
duration_from_num = make_difftime(
num = number,
units = "secs"
),
duration_from_r_num = make_difftime(
num = 154,
units = "secs"
),
duration_from_r_parts = make_difftime(
minute = 45,
day = 2,
week = 4,
units = "secs"
)
) %>%
collect(),
test_df
)

# named difftime parts other than `second`, `minute`, `hour`, `day` and `week`
# are not supported
expect_error(
expect_warning(
test_df %>%
arrow_table() %>%
mutate(
err_difftime = make_difftime(month = 2)
) %>%
collect(),
paste0("named `difftime` units other than: `second`, `minute`, `hour`,",
" `day`, and `week` not supported in Arrow.")
)
)

# units other than "secs" not supported since they are the only ones in common
# between R and Arrow
compare_dplyr_binding(
.input %>%
mutate(error_difftime = make_difftime(num = number, units = "mins")) %>%
collect(),
test_df,
warning = TRUE
)

# constructing a difftime from both `num` and parts passed through `...` while
# possible with the lubridate function (resulting in a concatenation of the 2
# resulting objects), it errors in a dplyr context
expect_error(
expect_warning(
test_df %>%
arrow_table() %>%
mutate(
duration_from_num_and_parts = make_difftime(
num = number,
second = seconds,
minute = minutes,
hour = hours,
day = days,
week = weeks,
units = "secs"
)
) %>%
collect(),
"with both `num` and `...` not supported in Arrow"
)
)
})