diff --git a/r/NEWS.md b/r/NEWS.md index 43e36a52541..6b2e0532168 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -22,6 +22,7 @@ * `read_csv_arrow()`'s readr-style type `T` is now mapped to `timestamp(unit = "ns")` instead of `timestamp(unit = "s")`. * `lubridate`: * component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator), `date()` (extract date), `epiyear()` (epiyear), improvements to `month()`, which now works with integer inputs. + * `make_date()` & `make_datetime()` + `ISOdatetime()` & `ISOdate()` to create date-times from numeric representations. * date-time functionality: * `as.Date()` to convert to date diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index b1ae6a44159..7f8660d74a7 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -187,6 +187,52 @@ register_bindings_datetime <- function() { register_binding("date", function(x) { build_expr("cast", x, options = list(to_type = date32())) }) + register_binding("make_datetime", function(year = 1970L, + month = 1L, + day = 1L, + hour = 0L, + min = 0L, + sec = 0, + tz = "UTC") { + + # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820). + # Stop if tz other than 'UTC' is provided. + if (tz != "UTC") { + arrow_not_supported("Time zone other than 'UTC'") + } + + x <- call_binding("str_c", year, month, day, hour, min, sec, sep = "-") + build_expr("strptime", x, options = list(format = "%Y-%m-%d-%H-%M-%S", unit = 0L)) + }) + register_binding("make_date", function(year = 1970L, month = 1L, day = 1L) { + x <- call_binding("make_datetime", year, month, day) + build_expr("cast", x, options = cast_options(to_type = date32())) + }) + register_binding("ISOdatetime", function(year, + month, + day, + hour, + min, + sec, + tz = "UTC") { + + # NAs for seconds aren't propagated (but treated as 0) in the base version + sec <- call_binding("if_else", + call_binding("is.na", sec), + 0, + sec) + + call_binding("make_datetime", year, month, day, hour, min, sec, tz) + }) + register_binding("ISOdate", function(year, + month, + day, + hour = 12, + min = 0, + sec = 0, + tz = "UTC") { + call_binding("make_datetime", year, month, day, hour, min, sec, tz) + }) } binding_format_datetime <- function(x, format = "", tz = "", usetz = FALSE) { diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index d0afda8912d..62d682a600b 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -974,3 +974,105 @@ test_that("date() errors with unsupported inputs", { regexp = "Unsupported cast from double to date32 using function cast_date32" ) }) + +test_that("make_date & make_datetime", { + test_df <- expand.grid( + year = c(1999, 1969, 2069, NA), + month = c(1, 2, 7, 12, NA), + day = c(1, 9, 13, 28, NA), + hour = c(0, 7, 23, NA), + min = c(0, 59, NA), + sec = c(0, 59, NA) + ) %>% + tibble() + + compare_dplyr_binding( + .input %>% + mutate(composed_date = make_date(year, month, day)) %>% + collect(), + test_df + ) + + compare_dplyr_binding( + .input %>% + mutate(composed_date_r_obj = make_date(1999, 12, 31)) %>% + collect(), + test_df + ) + + compare_dplyr_binding( + .input %>% + mutate(composed_datetime = make_datetime(year, month, day, hour, min, sec)) %>% + collect(), + test_df, + # the make_datetime binding uses strptime which does not support tz, hence + # a mismatch in tzone attribute (ARROW-12820) + ignore_attr = TRUE + ) + + compare_dplyr_binding( + .input %>% + mutate( + composed_datetime_r_obj = make_datetime(1999, 12, 31, 14, 15, 16)) %>% + collect(), + test_df, + # the make_datetime binding uses strptime which does not support tz, hence + # a mismatch in tzone attribute (ARROW-12820) + ignore_attr = TRUE + ) +}) + +test_that("ISO_datetime & ISOdate", { + test_df <- expand.grid( + year = c(1999, 1969, 2069, NA), + month = c(1, 2, 7, 12, NA), + day = c(1, 9, 13, 28, NA), + hour = c(0, 7, 23, NA), + min = c(0, 59, NA), + sec = c(0, 59, NA) + ) %>% + tibble() + + compare_dplyr_binding( + .input %>% + mutate(composed_date = ISOdate(year, month, day)) %>% + collect(), + test_df, + # the make_datetime binding uses strptime which does not support tz, hence + # a mismatch in tzone attribute (ARROW-12820) + ignore_attr = TRUE + ) + + compare_dplyr_binding( + .input %>% + mutate(composed_date_r_obj = ISOdate(1999, 12, 31)) %>% + collect(), + test_df, + # the make_datetime binding uses strptime which does not support tz, hence + # a mismatch in tzone attribute (ARROW-12820) + ignore_attr = TRUE + ) + + # the default `tz` for base::ISOdatetime is "", but in Arrow it's "UTC" + compare_dplyr_binding( + .input %>% + mutate( + composed_datetime = ISOdatetime(year, month, day, hour, min, sec, tz = "UTC")) %>% + collect(), + test_df, + # the make_datetime binding uses strptime which does not support tz, hence + # a mismatch in tzone attribute (ARROW-12820) + ignore_attr = TRUE + ) + + compare_dplyr_binding( + .input %>% + mutate( + composed_datetime_r_obj = ISOdatetime(1999, 12, 31, 14, 15, 16)) %>% + collect(), + test_df, + # the make_datetime binding uses strptime which does not support tz, hence + # a mismatch in tzone attribute (ARROW-12820) + ignore_attr = TRUE + ) +})