diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 02ec35bda26..501d6b33335 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -28,25 +28,57 @@ register_bindings_datetime <- function() { } register_bindings_datetime_utility <- function() { - register_binding("strptime", function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, + register_binding("strptime", function(x, + format = "%Y-%m-%d %H:%M:%S", + tz = "", unit = "ms") { # Arrow uses unit for time parsing, strptime() does not. # Arrow has no default option for strptime (format, unit), # we suggest following format = "%Y-%m-%d %H:%M:%S", unit = MILLI/1L/"ms", # (ARROW-12809) - # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820). - # Stop if tz is provided. - if (is.character(tz)) { - arrow_not_supported("Time zone argument") - } + unit <- make_valid_time_unit( + unit, + c(valid_time64_units, valid_time32_units) + ) + + output <- build_expr( + "strptime", + x, + options = + list( + format = format, + unit = unit, + error_is_null = TRUE + ) + ) - unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units)) + if (tz == "") { + tz <- Sys.timezone() + } - build_expr("strptime", x, options = list(format = format, unit = unit, error_is_null = TRUE)) + # if a timestamp does not contain timezone information (i.e. it is + # "timezone-naive") we can attach timezone information (i.e. convert it into + # a "timezone-aware" timestamp) with `assume_timezone` + # if we want to cast to a different timezone, we can only do it for + # timezone-aware timestamps, not for timezone-naive ones + if (!is.null(tz)) { + output <- build_expr( + "assume_timezone", + output, + options = + list( + timezone = tz + ) + ) + } + output }) - register_binding("strftime", function(x, format = "", tz = "", usetz = FALSE) { + register_binding("strftime", function(x, + format = "", + tz = "", + usetz = FALSE) { if (usetz) { format <- paste(format, "%Z") } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index b1223630153..c644a66db55 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -49,50 +49,82 @@ test_df <- tibble::tibble( test_that("strptime", { t_string <- tibble(x = c("2018-10-07 19:04:05", NA)) - t_stamp <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA)) + # lubridate defaults to "UTC" as timezone => t_stamp is in "UTC" + t_stamp_with_utc_tz <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA)) + t_stamp_with_pm_tz <- tibble( + x = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Pacific/Marquesas"), NA) + ) + + # base::strptime returns a POSIXlt (a list) => we cannot use compare_dplyr_binding + # => we use expect_equal for the tests below + + withr::with_timezone("Pacific/Marquesas", { + # the default value for strptime's `tz` argument is "", which is interpreted + # as the current timezone. we test here if the strptime binding picks up + # correctly the current timezone (similarly to the base R version) + expect_equal( + t_string %>% + record_batch() %>% + mutate( + x = strptime(x, format = "%Y-%m-%d %H:%M:%S") + ) %>% + collect(), + t_stamp_with_pm_tz + ) + }) + + # adding a timezone to a timezone-naive timestamp works + # and since our TZ when running the test is (typically) Pacific/Marquesas + # this also tests that assigning a TZ different from the current session one + # works as expected + expect_equal( + t_string %>% + arrow_table() %>% + mutate( + x = strptime(x, format = "%Y-%m-%d %H:%M:%S", tz = "Pacific/Marquesas") + ) %>% + collect(), + t_stamp_with_pm_tz + ) expect_equal( t_string %>% Table$create() %>% mutate( - x = strptime(x) + x = strptime(x, tz = "UTC") ) %>% collect(), - t_stamp, - ignore_attr = "tzone" + t_stamp_with_utc_tz ) expect_equal( t_string %>% Table$create() %>% mutate( - x = strptime(x, format = "%Y-%m-%d %H:%M:%S") + x = strptime(x, format = "%Y-%m-%d %H:%M:%S", tz = "UTC") ) %>% collect(), - t_stamp, - ignore_attr = "tzone" + t_stamp_with_utc_tz ) expect_equal( t_string %>% Table$create() %>% mutate( - x = strptime(x, format = "%Y-%m-%d %H:%M:%S", unit = "ns") + x = strptime(x, format = "%Y-%m-%d %H:%M:%S", unit = "ns", tz = "UTC") ) %>% collect(), - t_stamp, - ignore_attr = "tzone" + t_stamp_with_utc_tz ) expect_equal( t_string %>% Table$create() %>% mutate( - x = strptime(x, format = "%Y-%m-%d %H:%M:%S", unit = "s") + x = strptime(x, format = "%Y-%m-%d %H:%M:%S", unit = "s", tz = "UTC") ) %>% collect(), - t_stamp, - ignore_attr = "tzone" + t_stamp_with_utc_tz ) tstring <- tibble(x = c("08-05-2008", NA)) @@ -111,15 +143,6 @@ test_that("strptime", { ) }) -test_that("errors in strptime", { - # Error when tz is passed - x <- Expression$field_ref("x") - expect_error( - call_binding("strptime", x, tz = "PDT"), - "Time zone argument not supported in Arrow" - ) -}) - test_that("strptime returns NA when format doesn't match the data", { df <- tibble( str_date = c("2022-02-07", "2012/02-07", "1975/01-02", "1981/01-07", NA)