diff --git a/r/NEWS.md b/r/NEWS.md index 0a7d30d2a37..1a1f198e0f3 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -22,10 +22,11 @@ * `read_csv_arrow()`'s readr-style type `T` is now mapped to `timestamp(unit = "ns")` instead of `timestamp(unit = "s")`. * `lubridate`: * component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator), `date()` (extract date), `epiyear()` (epiyear), improvements to `month()`, which now works with integer inputs. - * `make_date()` & `make_datetime()` + `ISOdatetime()` & `ISOdate()` to create date-times from numeric representations. + * Added `make_date()` & `make_datetime()` + `ISOdatetime()` & `ISOdate()` to create date-times from numeric representations. + * Added `decimal_date()` and `date_decimal()` * date-time functionality: - * `difftime` and `as.difftime()` - * `as.Date()` to convert to date + * Added `difftime` and `as.difftime()` + * Added `as.Date()` to convert to date # arrow 7.0.0 diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index c583aed5472..586c9d6d3f5 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -270,6 +270,20 @@ register_bindings_duration <- function() { time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp(timezone = "UTC"))) } + # if time1 or time2 are timestamps they cannot be expressed in "s" /seconds + # otherwise they cannot be added subtracted with durations + # TODO delete the casting to "us" once + # https://issues.apache.org/jira/browse/ARROW-16060 is solved + if (inherits(time1, "Expression") && + time1$type_id() %in% Type[c("TIMESTAMP")] && time1$type()$unit() != 2L) { + time1 <- build_expr("cast", time1, options = cast_options(to_type = timestamp("us"))) + } + + if (inherits(time2, "Expression") && + time2$type_id() %in% Type[c("TIMESTAMP")] && time2$type()$unit() != 2L) { + time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp("us"))) + } + # we need to go build the subtract expression instead of `time1 - time2` to # prevent complaints when we try to subtract an R object from an Expression subtract_output <- build_expr("-", time1, time2) @@ -309,6 +323,34 @@ register_bindings_duration <- function() { build_expr("cast", x, options = cast_options(to_type = duration(unit = "s"))) }) + register_binding("decimal_date", function(date) { + y <- build_expr("year", date) + start <- call_binding("make_datetime", year = y, tz = "UTC") + sofar <- call_binding("difftime", date, start, units = "secs") + total <- call_binding( + "if_else", + build_expr("is_leap_year", date), + Expression$scalar(31622400L), # number of seconds in a leap year (366 days) + Expression$scalar(31536000L) # number of seconds in a regular year (365 days) + ) + y + sofar$cast(int64()) / total + }) + register_binding("date_decimal", function(decimal, tz = "UTC") { + y <- build_expr("floor", decimal) + + start <- call_binding("make_datetime", year = y, tz = tz) + seconds <- call_binding( + "if_else", + build_expr("is_leap_year", start), + Expression$scalar(31622400L), # number of seconds in a leap year (366 days) + Expression$scalar(31536000L) # number of seconds in a regular year (365 days) + ) + + fraction <- decimal - y + delta <- build_expr("floor", seconds * fraction) + delta <- delta$cast(int64()) + start + delta$cast(duration("s")) + }) } binding_format_datetime <- function(x, format = "", tz = "", usetz = FALSE) { diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 6df40505d1a..2c819b3014c 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -1107,7 +1107,7 @@ test_that("difftime works correctly", { .input %>% mutate( secs2 = difftime( - as.POSIXct("2022-03-07", tz = "Europe/Bucharest"), + as.POSIXct("2022-03-07", tz = "Pacific/Marquesas"), time1, units = "secs" ) @@ -1186,3 +1186,32 @@ test_that("as.difftime()", { collect() ) }) + +test_that("`decimal_date()` and `date_decimal()`", { + test_df <- tibble( + a = c(2007.38998954347, 1970.77732069883, 2020.96061799722, + 2009.43465948477, 1975.71251467871, NA), + b = as.POSIXct( + c("2007-05-23 08:18:30", "1970-10-11 17:19:45", "2020-12-17 14:04:06", + "2009-06-08 15:37:01", "1975-09-18 01:37:42", NA) + ), + c = as.Date( + c("2007-05-23", "1970-10-11", "2020-12-17", "2009-06-08", "1975-09-18", NA) + ) + ) + + compare_dplyr_binding( + .input %>% + mutate( + decimal_date_from_POSIXct = decimal_date(b), + decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")), + decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25")), + decimal_date_from_date = decimal_date(c), + date_from_decimal = date_decimal(a), + date_from_decimal_r_obj = date_decimal(2022.178) + ) %>% + collect(), + test_df, + ignore_attr = "tzone" + ) +})