From 588304e94ea69bb398db31ca2d3c66c95a55cc1e Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 7 Sep 2021 22:10:49 +0200 Subject: [PATCH 1/8] Adding StrftimeOptions. --- r/R/dplyr-functions.R | 4 ++ r/src/compute.cpp | 13 +++++ .../testthat/test-dplyr-string-functions.R | 49 +++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 808956efe15..3d90b8fa2c4 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -672,6 +672,10 @@ nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit Expression$create("strptime", x, options = list(format = format, unit = unit)) } +nse_funcs$strftime <- function(x, format = "%Y-%m-%d %H:%M:%S", locale="C") { + Expression$create("strftime", x, options = list(format = format, locale = locale)) +} + nse_funcs$second <- function(x) { Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x)) } diff --git a/r/src/compute.cpp b/r/src/compute.cpp index c6ba0a28046..1e987b2b661 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -332,6 +332,19 @@ std::shared_ptr make_compute_options( cpp11::as_cpp(options["unit"])); } + if (func_name == "strftime") { + using Options = arrow::compute::StrftimeOptions; + std::string format = "%Y-%m-%dT%H:%M:%S"; + std::string locale = "C"; + if (!Rf_isNull(options["format"])) { + format = cpp11::as_cpp(options["format"]); + } + if (!Rf_isNull(options["locale"])) { + locale = cpp11::as_cpp(options["locale"]); + } + return std::make_shared(Options(format, locale)); + } + if (func_name == "assume_timezone") { using Options = arrow::compute::AssumeTimezoneOptions; enum Options::Ambiguous ambiguous; diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index b6b8f5a714a..0f2247e57ac 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -719,6 +719,55 @@ test_that("errors in strptime", { ) }) +test_that("strftime", { + # TODO: consider reevaluating this workaround after ARROW-12980 + withr::local_timezone("UTC") + t_stamp <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA)) + t_string <- tibble(x = c("2018-10-07 19:04:05.000000", NA)) + t_string_2 <- tibble(x = c("07 October 2018 19:04:05.000000 UTC", NA)) + + # TODO: remove once we have tz support on windows ARROW-13168 + if (tolower(Sys.info()[["sysname"]]) != "windows") { + expect_equal( + t_stamp %>% + Table$create() %>% + mutate( + x = strftime(x) + ) %>% + collect(), + t_string + ) + + expect_equal( + t_stamp %>% + Table$create() %>% + mutate( + x = strftime(x, format = "%Y-%m-%d %H:%M:%S") + ) %>% + collect(), + t_string + ) + + expect_equal( + t_stamp %>% + Table$create() %>% + mutate( + x = strftime(x, format = "%d %B %Y %H:%M:%S %Z", locale = "C") + ) %>% + collect(), + t_string_2 + ) + } else { + expect_error( + t_stamp %>% + Table$create() %>% + mutate(x = strftime(x)) %>% + collect(), + "Strftime not yet implemented on windows." + ) + } +}) + test_that("arrow_find_substring and arrow_find_substring_regex", { df <- tibble(x = c("Foo and Bar", "baz and qux and quux")) From 463f1425bdf4eada56a650ec0df2f7ad01b6ee6a Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 16 Sep 2021 21:03:51 +0200 Subject: [PATCH 2/8] Review feedback implementation. --- python/pyarrow/tests/test_compute.py | 2 +- r/R/dplyr-functions.R | 10 +++- r/src/compute.cpp | 12 +--- .../testthat/test-dplyr-string-functions.R | 55 +++++++------------ 4 files changed, 32 insertions(+), 47 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 46ba78027b5..b5e2fb2a6c2 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1560,7 +1560,7 @@ def _fix_timestamp(s): formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", "%I", "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%c", "%x", - "%X", "%%", "%G", "%V", "%u", "%V"] + "%X", "%%", "%G", "%V", "%u"] for timezone in timezones: ts = pd.to_datetime(times).tz_localize(timezone) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 3d90b8fa2c4..e1aaaf1e6b8 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -672,8 +672,14 @@ nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit Expression$create("strptime", x, options = list(format = format, unit = unit)) } -nse_funcs$strftime <- function(x, format = "%Y-%m-%d %H:%M:%S", locale="C") { - Expression$create("strftime", x, options = list(format = format, locale = locale)) +nse_funcs$strftime <- function(x, format = "", tz = "", usetz = FALSE) { + if (tz != "") { + arrow_not_supported("tz argument") + } + if (usetz) { + format = paste(format, "%Z") + } + Expression$create("strftime", x, options = list(format = format, locale = Sys.getlocale("LC_TIME"))) } nse_funcs$second <- function(x) { diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 1e987b2b661..02600566672 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -334,15 +334,9 @@ std::shared_ptr make_compute_options( if (func_name == "strftime") { using Options = arrow::compute::StrftimeOptions; - std::string format = "%Y-%m-%dT%H:%M:%S"; - std::string locale = "C"; - if (!Rf_isNull(options["format"])) { - format = cpp11::as_cpp(options["format"]); - } - if (!Rf_isNull(options["locale"])) { - locale = cpp11::as_cpp(options["locale"]); - } - return std::make_shared(Options(format, locale)); + return std::make_shared( + Options(cpp11::as_cpp(options["format"]), + cpp11::as_cpp(options["locale"]))); } if (func_name == "assume_timezone") { diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 0f2247e57ac..f461282c9de 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -720,50 +720,35 @@ test_that("errors in strptime", { }) test_that("strftime", { + skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 + # TODO: consider reevaluating this workaround after ARROW-12980 withr::local_timezone("UTC") - t_stamp <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA)) - t_string <- tibble(x = c("2018-10-07 19:04:05.000000", NA)) - t_string_2 <- tibble(x = c("07 October 2018 19:04:05.000000 UTC", NA)) - - # TODO: remove once we have tz support on windows ARROW-13168 - if (tolower(Sys.info()[["sysname"]]) != "windows") { - expect_equal( - t_stamp %>% - Table$create() %>% - mutate( - x = strftime(x) - ) %>% - collect(), - t_string - ) + times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA)) + + formats = c("%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", + "%I", "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%c", "%x", + "%X", "%%", "%G", "%V", "%u") - expect_equal( - t_stamp %>% - Table$create() %>% - mutate( - x = strftime(x, format = "%Y-%m-%d %H:%M:%S") - ) %>% + for (format in formats) { + expect_dplyr_equal( + input %>% + mutate(x = strftime(x, format = format)) %>% collect(), - t_string + times ) - expect_equal( - t_stamp %>% - Table$create() %>% - mutate( - x = strftime(x, format = "%d %B %Y %H:%M:%S %Z", locale = "C") - ) %>% + expect_dplyr_equal( + input %>% + mutate(x = strftime(x, format = format, usetz = TRUE)) %>% collect(), - t_string_2 + times ) - } else { + + x <- Expression$field_ref("x") expect_error( - t_stamp %>% - Table$create() %>% - mutate(x = strftime(x)) %>% - collect(), - "Strftime not yet implemented on windows." + nse_funcs$strftime(x, format = format, tz="Mars/Mariner_Valley"), + "tz argument not supported by Arrow" ) } }) From 4678968c1103f0d9d4f86894c0d9a6980bda301e Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 17 Sep 2021 11:07:53 +0200 Subject: [PATCH 3/8] Update r/tests/testthat/test-dplyr-string-functions.R Co-authored-by: Nic --- r/R/dplyr-functions.R | 12 ++-- .../testthat/test-dplyr-string-functions.R | 55 ++++++++++++++----- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index e1aaaf1e6b8..e0c92490381 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -673,13 +673,15 @@ nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit } nse_funcs$strftime <- function(x, format = "", tz = "", usetz = FALSE) { - if (tz != "") { - arrow_not_supported("tz argument") - } if (usetz) { - format = paste(format, "%Z") + format <- paste(format, "%Z") + } + if (tz == "") { + tz <- Sys.timezone() } - Expression$create("strftime", x, options = list(format = format, locale = Sys.getlocale("LC_TIME"))) + unit <- TimestampType__unit(x$type()) + ts <- Expression$create("cast", x, options = list(to_type = timestamp(unit, tz))) + Expression$create("strftime", ts, options = list(format = format, locale = Sys.getlocale("LC_TIME"))) } nse_funcs$second <- function(x) { diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index f461282c9de..34735595440 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -725,32 +725,61 @@ test_that("strftime", { # TODO: consider reevaluating this workaround after ARROW-12980 withr::local_timezone("UTC") times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA)) + seconds <- tibble(x = c("05.000000", NA)) + formats_minus_c <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u" + formats <- paste(formats_minus_c, "%c") - formats = c("%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", - "%I", "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%c", "%x", - "%X", "%%", "%G", "%V", "%u") + expect_dplyr_equal( + input %>% + mutate(x = strftime(x, format = formats)) %>% + collect(), + times + ) - for (format in formats) { - expect_dplyr_equal( + withr::with_timezone("Pacific/Marquesas", + expect_dplyr_equal( input %>% - mutate(x = strftime(x, format = format)) %>% + mutate(x = strftime(x, format = formats)) %>% collect(), times ) + ) + expect_dplyr_equal( + input %>% + mutate(x = strftime(x, format = formats_minus_c, tz = "Pacific/Marquesas")) %>% + collect(), + times + ) + + withr::with_locale(new = c("LC_TIME" = "C"), expect_dplyr_equal( input %>% - mutate(x = strftime(x, format = format, usetz = TRUE)) %>% + mutate(x = strftime(x, format = "%c", tz = "Pacific/Marquesas")) %>% collect(), times ) + ) - x <- Expression$field_ref("x") - expect_error( - nse_funcs$strftime(x, format = format, tz="Mars/Mariner_Valley"), - "tz argument not supported by Arrow" - ) - } + expect_dplyr_equal( + input %>% + mutate(x = strftime(x, format = formats, usetz = TRUE)) %>% + collect(), + times + ) + + # Output precision of %S depends on the input timestamp precision. + # Timestamps with second precision are represented as integers while + # milliseconds, microsecond and nanoseconds are represented as fixed floating + # point numbers with 3, 6 and 9 decimal places respectively. + expect_dplyr_equal( + input %>% + mutate(x = strftime(x, format = "%S")) %>% + transmute(as.double(substr(x, 1, 2))) %>% + collect(), + times, + tolerance = 1e-6 + ) }) test_that("arrow_find_substring and arrow_find_substring_regex", { From a41a7d563a4162a59cd5c965805283e71a7f367c Mon Sep 17 00:00:00 2001 From: Rok Date: Mon, 20 Sep 2021 18:38:09 +0200 Subject: [PATCH 4/8] Review feedback. --- r/R/dplyr-functions.R | 3 +-- r/tests/testthat/test-dplyr-string-functions.R | 14 +++++--------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index e0c92490381..69f69bae9bb 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -679,8 +679,7 @@ nse_funcs$strftime <- function(x, format = "", tz = "", usetz = FALSE) { if (tz == "") { tz <- Sys.timezone() } - unit <- TimestampType__unit(x$type()) - ts <- Expression$create("cast", x, options = list(to_type = timestamp(unit, tz))) + ts <- Expression$create("cast", x, options = list(to_type = timestamp(x$type()$unit(), tz))) Expression$create("strftime", ts, options = list(format = format, locale = Sys.getlocale("LC_TIME"))) } diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 34735595440..3706c29aacc 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -722,8 +722,6 @@ test_that("errors in strptime", { test_that("strftime", { skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 - # TODO: consider reevaluating this workaround after ARROW-12980 - withr::local_timezone("UTC") times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA)) seconds <- tibble(x = c("05.000000", NA)) formats_minus_c <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u" @@ -752,13 +750,11 @@ test_that("strftime", { times ) - withr::with_locale(new = c("LC_TIME" = "C"), - expect_dplyr_equal( - input %>% - mutate(x = strftime(x, format = "%c", tz = "Pacific/Marquesas")) %>% - collect(), - times - ) + expect_dplyr_equal( + input %>% + mutate(x = strftime(x, format = "%c", tz = "Pacific/Marquesas")) %>% + collect(), + times ) expect_dplyr_equal( From 9e633c7972a62d22813ce806726b7fb6be2aa508 Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 21 Sep 2021 17:57:24 +0200 Subject: [PATCH 5/8] Disabling %c flag. --- r/R/dplyr-functions.R | 7 ++++ .../testthat/test-dplyr-string-functions.R | 38 ++++++++++--------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 69f69bae9bb..3184c020ca4 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -673,12 +673,19 @@ nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit } nse_funcs$strftime <- function(x, format = "", tz = "", usetz = FALSE) { + if (grepl("%c", format)) { + # This check is due to differences in the way %c currently works in Arrow and R's strftime. + # We can revisit this after this is resolved: https://github.com/HowardHinnant/date/issues/704 + arrow_not_supported("%c flag currently not supported.") + } if (usetz) { format <- paste(format, "%Z") } if (tz == "") { tz <- Sys.timezone() } + # Arrow's strftime prints in timezone of the timestamp. To match R's strftime behavior we first + # cast the timestamp to desired timezone. This is a metadata only change. ts <- Expression$create("cast", x, options = list(to_type = timestamp(x$type()$unit(), tz))) Expression$create("strftime", ts, options = list(format = format, locale = Sys.getlocale("LC_TIME"))) } diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 3706c29aacc..48678e2645b 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -722,10 +722,9 @@ test_that("errors in strptime", { test_that("strftime", { skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 - times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA)) + times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA)) seconds <- tibble(x = c("05.000000", NA)) - formats_minus_c <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u" - formats <- paste(formats_minus_c, "%c") + formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u" expect_dplyr_equal( input %>% @@ -734,34 +733,37 @@ test_that("strftime", { times ) - withr::with_timezone("Pacific/Marquesas", - expect_dplyr_equal( - input %>% - mutate(x = strftime(x, format = formats)) %>% - collect(), - times - ) - ) - expect_dplyr_equal( input %>% - mutate(x = strftime(x, format = formats_minus_c, tz = "Pacific/Marquesas")) %>% + mutate(x = strftime(x, format = formats, tz = "Pacific/Marquesas")) %>% collect(), times ) expect_dplyr_equal( input %>% - mutate(x = strftime(x, format = "%c", tz = "Pacific/Marquesas")) %>% + mutate(x = strftime(x, format = formats, tz = "EST", usetz = TRUE)) %>% collect(), times ) - expect_dplyr_equal( - input %>% - mutate(x = strftime(x, format = formats, usetz = TRUE)) %>% + withr::with_timezone("Pacific/Marquesas", + expect_dplyr_equal( + input %>% + mutate(x = strftime(x, format = formats, tz = "EST")) %>% + collect(), + times + ) + ) + + # This check is due to differences in the way %c currently works in Arrow and R's strftime. + # We can revisit this after this is resolved: https://github.com/HowardHinnant/date/issues/704 + expect_warning( + times %>% + Table$create() %>% + mutate(x = strftime(x, format = "%c")) %>% collect(), - times + "%c flag currently not supported. not supported by Arrow" ) # Output precision of %S depends on the input timestamp precision. From 6af5ad4a00d5cbf15a66d6da33f69c4ab6731473 Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 22 Sep 2021 20:09:08 +0200 Subject: [PATCH 6/8] Adding format_ISO8601, movint %c check to c++. --- .../arrow/compute/kernels/scalar_temporal.cc | 5 +++ r/R/dplyr-functions.R | 26 ++++++++--- .../testthat/test-dplyr-string-functions.R | 44 +++++++++++++++++-- 3 files changed, 67 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 59ffee73c97..396eec842ae 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -529,6 +529,11 @@ struct Strftime { const StrftimeOptions& options = StrftimeState::Get(ctx); auto timezone = GetInputTimezone(type); + // This check is due to surprising %c behavior. + // See https://github.com/HowardHinnant/date/issues/704 + if ((options.format.find("%c") != std::string::npos) && (options.locale != "C")) { + return Status::Invalid("%c flag is not supported in non-C locales."); + } if (timezone.empty()) { if ((options.format.find("%z") != std::string::npos) || (options.format.find("%Z") != std::string::npos)) { diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 3184c020ca4..8014160ea8c 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -673,11 +673,6 @@ nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit } nse_funcs$strftime <- function(x, format = "", tz = "", usetz = FALSE) { - if (grepl("%c", format)) { - # This check is due to differences in the way %c currently works in Arrow and R's strftime. - # We can revisit this after this is resolved: https://github.com/HowardHinnant/date/issues/704 - arrow_not_supported("%c flag currently not supported.") - } if (usetz) { format <- paste(format, "%Z") } @@ -690,6 +685,27 @@ nse_funcs$strftime <- function(x, format = "", tz = "", usetz = FALSE) { Expression$create("strftime", ts, options = list(format = format, locale = Sys.getlocale("LC_TIME"))) } +ISO8601_precision_map <- + list(y = "%Y", + ym = "%Y-%m", + ymd = "%Y-%m-%d", + ymdh = "%Y-%m-%dT%H", + ymdhm = "%Y-%m-%dT%H:%M", + ymdhms = "%Y-%m-%dT%H:%M:%S") + +nse_funcs$format_ISO8601 <- function(x, usetz = FALSE, precision = NULL, ...) { + if(is.null(precision)) { + precision <- "ymdhms" + } + if (is.null(format <- ISO8601_precision_map[[precision]])) { + stop("Invalid value for precision provided: ", precision) + } + if (usetz) { + format <- paste0(format, "%z") + } + Expression$create("strftime", x, options = list(format = format, locale = "C")) +} + nse_funcs$second <- function(x) { Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x)) } diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 48678e2645b..5153a4fc088 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -19,6 +19,7 @@ skip_if_not_available("dataset") skip_if_not_available("utf8proc") library(dplyr) +library(lubridate) library(stringr) library(stringi) @@ -757,13 +758,13 @@ test_that("strftime", { ) # This check is due to differences in the way %c currently works in Arrow and R's strftime. - # We can revisit this after this is resolved: https://github.com/HowardHinnant/date/issues/704 - expect_warning( + # We can revisit after https://github.com/HowardHinnant/date/issues/704 is resolved. + expect_error( times %>% Table$create() %>% mutate(x = strftime(x, format = "%c")) %>% collect(), - "%c flag currently not supported. not supported by Arrow" + "%c flag is not supported in non-C locales." ) # Output precision of %S depends on the input timestamp precision. @@ -780,6 +781,43 @@ test_that("strftime", { ) }) +test_that("format_ISO8601", { + skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 + times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA)) + + expect_dplyr_equal( + input %>% + mutate(x = format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>% + collect(), + times + ) + + expect_dplyr_equal( + input %>% + mutate(x = format_ISO8601(x, precision = "ymd", usetz = TRUE)) %>% + collect(), + times + ) + + # See comment regarding %S flag in strftime tests + expect_dplyr_equal( + input %>% + mutate(x = format_ISO8601(x, precision = "ymdhms", usetz = FALSE)) %>% + mutate(x = gsub("\\.0*", "", x)) %>% + collect(), + times + ) + + # See comment regarding %S flag in strftime tests + expect_dplyr_equal( + input %>% + mutate(x = format_ISO8601(x, precision = "ymdhms", usetz = TRUE)) %>% + mutate(x = gsub("\\.0*", "", x)) %>% + collect(), + times + ) +}) + test_that("arrow_find_substring and arrow_find_substring_regex", { df <- tibble(x = c("Foo and Bar", "baz and qux and quux")) From 3a74ae6caf3eacab7e40c91e7b2487a2aaa18cbf Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 22 Sep 2021 22:50:25 +0200 Subject: [PATCH 7/8] Update r/R/dplyr-functions.R Co-authored-by: Jonathan Keane --- r/R/dplyr-functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 8014160ea8c..fb617280298 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -694,7 +694,7 @@ ISO8601_precision_map <- ymdhms = "%Y-%m-%dT%H:%M:%S") nse_funcs$format_ISO8601 <- function(x, usetz = FALSE, precision = NULL, ...) { - if(is.null(precision)) { + if (is.null(precision)) { precision <- "ymdhms" } if (is.null(format <- ISO8601_precision_map[[precision]])) { From d45cd22665854bf3f40c7329bd9b8b7e115829c3 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 23 Sep 2021 14:00:21 +0200 Subject: [PATCH 8/8] Update r/R/dplyr-functions.R Co-authored-by: Nic --- r/R/dplyr-functions.R | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index fb617280298..a51bf38de23 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -685,21 +685,29 @@ nse_funcs$strftime <- function(x, format = "", tz = "", usetz = FALSE) { Expression$create("strftime", ts, options = list(format = format, locale = Sys.getlocale("LC_TIME"))) } -ISO8601_precision_map <- - list(y = "%Y", - ym = "%Y-%m", - ymd = "%Y-%m-%d", - ymdh = "%Y-%m-%dT%H", - ymdhm = "%Y-%m-%dT%H:%M", - ymdhms = "%Y-%m-%dT%H:%M:%S") - nse_funcs$format_ISO8601 <- function(x, usetz = FALSE, precision = NULL, ...) { + ISO8601_precision_map <- + list(y = "%Y", + ym = "%Y-%m", + ymd = "%Y-%m-%d", + ymdh = "%Y-%m-%dT%H", + ymdhm = "%Y-%m-%dT%H:%M", + ymdhms = "%Y-%m-%dT%H:%M:%S") + if (is.null(precision)) { precision <- "ymdhms" } - if (is.null(format <- ISO8601_precision_map[[precision]])) { - stop("Invalid value for precision provided: ", precision) + if (!precision %in% names(ISO8601_precision_map)) { + abort( + paste( + "`precision` must be one of the following values:", + paste(names(ISO8601_precision_map), collapse = ", "), + "\nValue supplied was: ", + precision + ) + ) } + format <- ISO8601_precision_map[[precision]] if (usetz) { format <- paste0(format, "%z") }