From edfa6ed82665ca47660eff2ae4fd95ebdd6c6a64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 11 May 2022 10:32:51 +0100 Subject: [PATCH 1/6] added year, month, and day parsers + unit tests --- r/R/dplyr-funcs-datetime.R | 24 +++++++++++- r/tests/testthat/test-dplyr-funcs-datetime.R | 41 +++++++++++++++++++- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 466272db461..aac3b16c61f 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -518,6 +518,28 @@ register_bindings_datetime_parsers <- function() { coalesce_output <- build_expr("coalesce", args = parse_attempt_expressions) - build_expr("assume_timezone", coalesce_output, options = list(timezone = tz)) + if (!is.null(tz)) { + build_expr("assume_timezone", coalesce_output, options = list(timezone = tz)) + } else { + coalesce_output + } + }) + + ymd_parser_vec <- c("ymd", "ydm", "mdy", "myd", "dmy", "dym") + + ymd_parser_map_factory <- function(order) { + force(order) + function(x, tz = NULL) { + parse_x <- call_binding("parse_date_time", x, order, tz) + if(is.null(tz)) { + parse_x <- parse_x$cast(date32()) + } + parse_x + } + } + + for (ymd_order in ymd_parser_vec) { + register_binding(ymd_order, ymd_parser_map_factory(ymd_order)) + } } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 859c20ce02f..d5b13e3bba1 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -1254,7 +1254,7 @@ test_that("`decimal_date()` and `date_decimal()`", { mutate( decimal_date_from_POSIXct = decimal_date(b), decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")), - decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25")), + decimal_date_from_r_date_obj = decimal_date(as.Date("2022-03-25")), decimal_date_from_date = decimal_date(c), date_from_decimal = date_decimal(a), date_from_decimal_r_obj = date_decimal(2022.178) @@ -1732,3 +1732,42 @@ test_that("parse_date_time() doesn't work with hour, minutes, and second compone '"ymd_HMS" `orders` not supported in Arrow' ) }) + +test_that("year, month, day date/time parsers work", { + test_df <- tibble::tibble( + ymd_string = c("2022-05-11", "2022/05/12", "22.05-13"), + ydm_string = c("2022-11-05", "2022/12/05", "22.13-05"), + mdy_string = c("05-11-2022", "05/12/2022", "05.13-22"), + myd_string = c("05-2022-11", "05/2022/12", "05.22-14"), + dmy_string = c("11-05-2022", "12/05/2022", "13.05-22"), + dym_string = c("11-2022-05", "12/2022/05", "13.22-05") + ) + + compare_dplyr_binding( + .input %>% + mutate( + ymd_date = ymd(ymd_string), + ydm_date = ydm(ydm_string), + mdy_date = mdy(mdy_string), + myd_date = myd(myd_string), + dmy_date = dmy(dmy_string), + dym_date = dym(dym_string) + ) %>% + collect(), + test_df + ) + + compare_dplyr_binding( + .input %>% + mutate( + ymd_date = ymd(ymd_string, tz = "Pacific/Marquesas"), + ydm_date = ydm(ydm_string, tz = "Pacific/Marquesas"), + mdy_date = mdy(mdy_string, tz = "Pacific/Marquesas"), + myd_date = myd(myd_string, tz = "Pacific/Marquesas"), + dmy_date = dmy(dmy_string, tz = "Pacific/Marquesas"), + dym_date = dym(dym_string, tz = "Pacific/Marquesas") + ) %>% + collect(), + test_df + ) +}) From 4c5677dc4fc13c9022c81b2a5ee4720629ab901c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 11 May 2022 12:33:01 +0100 Subject: [PATCH 2/6] lint --- r/R/dplyr-funcs-datetime.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index aac3b16c61f..505e8b535bd 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -532,7 +532,7 @@ register_bindings_datetime_parsers <- function() { force(order) function(x, tz = NULL) { parse_x <- call_binding("parse_date_time", x, order, tz) - if(is.null(tz)) { + if (is.null(tz)) { parse_x <- parse_x$cast(date32()) } parse_x From 33463a64745974774a473cd13161958b38b28179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 11 May 2022 12:34:26 +0100 Subject: [PATCH 3/6] skip tests on win & r 3.6 --- r/tests/testthat/test-dplyr-funcs-datetime.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index d5b13e3bba1..61595ef887a 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -1743,6 +1743,8 @@ test_that("year, month, day date/time parsers work", { dym_string = c("11-2022-05", "12/2022/05", "13.22-05") ) + # string processing requires RE2 library (not available on Windows with R 3.6) + skip_if_not_available("re2") compare_dplyr_binding( .input %>% mutate( From 2172c0b3f067895f83ffba33e95dd40c88f84bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 12 May 2022 12:01:29 +0100 Subject: [PATCH 4/6] added comment about casting to `date32()` when `tz` is `NULL` --- r/R/dplyr-funcs-datetime.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 505e8b535bd..752c63395df 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -533,6 +533,9 @@ register_bindings_datetime_parsers <- function() { function(x, tz = NULL) { parse_x <- call_binding("parse_date_time", x, order, tz) if (is.null(tz)) { + # we cast so we can mimic the behaviour of the `tz` argument in lubridate + # "If NULL (default), a Date object is returned. Otherwise a POSIXct with + # time zone attribute set to tz." parse_x <- parse_x$cast(date32()) } parse_x From 1d7fce9a7779d3076b7e73b81bd4bfccf422fc2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 12 May 2022 12:04:02 +0100 Subject: [PATCH 5/6] added comment on why we need to handle a situation when `tz` is `NULL` in `parse_date_time()` --- r/R/dplyr-funcs-datetime.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 752c63395df..cec638bce80 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -518,6 +518,9 @@ register_bindings_datetime_parsers <- function() { coalesce_output <- build_expr("coalesce", args = parse_attempt_expressions) + # we need this binding to be able to handle a NULL `tz`, which will then be + # used by bindings such as `ymd` to return, based on whether tz is NULL or + # not, a date or timestamp if (!is.null(tz)) { build_expr("assume_timezone", coalesce_output, options = list(timezone = tz)) } else { From 2f8bbd9a7fc52cfeb4c2746220be728a3d998e91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 12 May 2022 12:40:27 +0100 Subject: [PATCH 6/6] update comment --- r/tests/testthat/test-dplyr-funcs-datetime.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 61595ef887a..42448e8243f 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -1640,7 +1640,8 @@ test_that("`as_datetime()`", { }) test_that("parse_date_time() works with year, month, and date components", { - # string processing requires RE2 library (not available on Windows with R 3.6) + # these functions' internals use some string processing which requires the + # RE2 library (not available on Windows with R 3.6) skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -1700,7 +1701,8 @@ test_that("parse_date_time() works with year, month, and date components", { }) test_that("parse_date_time() works with a mix of formats and orders", { - # string processing requires RE2 library (not available on Windows with R 3.6) + # these functions' internals use some string processing which requires the + # RE2 library (not available on Windows with R 3.6) skip_if_not_available("re2") test_df <- tibble( string_combi = c("2021-09-1", "2/09//2021", "09.3.2021") @@ -1743,7 +1745,8 @@ test_that("year, month, day date/time parsers work", { dym_string = c("11-2022-05", "12/2022/05", "13.22-05") ) - # string processing requires RE2 library (not available on Windows with R 3.6) + # these functions' internals use some string processing which requires the + # RE2 library (not available on Windows with R 3.6) skip_if_not_available("re2") compare_dplyr_binding( .input %>%