From 07cbd08df188588a8c12d8816b5ee39a21f3804e Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 21 Sep 2021 19:41:04 +0200 Subject: [PATCH 1/2] Add date32 date64 to strftime. --- .../arrow/compute/kernels/scalar_temporal.cc | 2 +- .../compute/kernels/scalar_temporal_test.cc | 21 ++++++++++ r/R/dplyr-functions.R | 14 ++++--- r/tests/testthat/test-dplyr-lubridate.R | 40 +++++++++++++------ .../testthat/test-dplyr-string-functions.R | 25 ++++++++---- 5 files changed, 75 insertions(+), 27 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index d4e340c5ff1..bb4b44e77fb 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -1199,7 +1199,7 @@ void RegisterScalarTemporal(FunctionRegistry* registry) { static const auto default_strftime_options = StrftimeOptions(); auto strftime = MakeSimpleUnaryTemporal( - "strftime", {WithTimes, WithTimestamps}, utf8(), &strftime_doc, + "strftime", {WithDates, WithTimestamps}, utf8(), &strftime_doc, &default_strftime_options, StrftimeState::Init); DCHECK_OK(registry->AddFunction(std::move(strftime))); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index fa9de95a494..f457a4f02b9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -639,6 +639,7 @@ TEST_F(ScalarTemporalTest, Strftime) { CheckScalarUnary("strftime", timestamp(TimeUnit::NANO, "US/Hawaii"), nanoseconds, utf8(), string_nanoseconds, &options); +<<<<<<< HEAD auto options_hms = StrftimeOptions("%H:%M:%S"); auto options_ymdhms = StrftimeOptions("%Y-%m-%dT%H:%M:%S"); @@ -693,6 +694,26 @@ TEST_F(ScalarTemporalTest, Strftime) { Invalid, testing::HasSubstr("Invalid: Timezone not present, cannot convert to string"), Strftime(arr_ns, StrftimeOptions("%Y-%m-%dT%H:%M:%S%Z"))); + + auto options_ymd = StrftimeOptions("%Y-%m-%d"); + auto options_ymdhms = StrftimeOptions("%Y-%m-%dT%H:%M:%S"); + + const char* date32s = R"([0, 10957, 10958, null])"; + const char* date64s = R"([0, 946684800000, 946771200000, null])"; + const char* dates32_ymd = R"(["1970-01-01", "2000-01-01", "2000-01-02", null])"; + const char* dates64_ymd = R"(["1970-01-01", "2000-01-01", "2000-01-02", null])"; + const char* dates32_ymdhms = + R"(["1970-01-01T00:00:00", "2000-01-01T00:00:00", "2000-01-02T00:00:00", null])"; + const char* dates64_ymdhms = + R"(["1970-01-01T00:00:00.000", "2000-01-01T00:00:00.000", + "2000-01-02T00:00:00.000", null])"; + + CheckScalarUnary("strftime", date32(), date32s, utf8(), dates32_ymd, &options_ymd); + CheckScalarUnary("strftime", date64(), date64s, utf8(), dates64_ymd, &options_ymd); + CheckScalarUnary("strftime", date32(), date32s, utf8(), dates32_ymdhms, + &options_ymdhms); + CheckScalarUnary("strftime", date64(), date64s, utf8(), dates64_ymdhms, + &options_ymdhms); } TEST_F(ScalarTemporalTest, StrftimeNoTimezone) { diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 4b67e947e4a..1f770d9e288 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -731,13 +731,15 @@ nse_funcs$round <- function(x, digits = 0) { ) } -nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) { - - # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime - # When the ticket below is resolved, we should be able to support the label argument - # https://issues.apache.org/jira/browse/ARROW-13133 +nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7), + locale = Sys.getlocale("LC_TIME")) { if (label) { - arrow_not_supported("Label argument") + if (abbr) ( + format <- "%a" + ) else { + format <- "%A" + } + return(Expression$create("strftime", x, options = list(format = format, locale = locale))) } Expression$create("day_of_week", x, options = list(count_from_zero = FALSE, week_start = week_start)) diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R index d3b41da58ad..19c867bfc0c 100644 --- a/r/tests/testthat/test-dplyr-lubridate.R +++ b/r/tests/testthat/test-dplyr-lubridate.R @@ -125,12 +125,20 @@ test_that("extract wday from timestamp", { test_df ) - # We should be able to support the label argument after this ticket is resolved: - # https://issues.apache.org/jira/browse/ARROW-13133 - x <- Expression$field_ref("x") - expect_error( - nse_funcs$wday(x, label = TRUE), - "Label argument not supported by Arrow" + expect_dplyr_equal( + input %>% + mutate(x = wday(date, label = TRUE)) %>% + mutate(x = as.character(x)) %>% + collect(), + test_df + ) + + expect_dplyr_equal( + input %>% + mutate(x = wday(datetime, label = TRUE, abbr = TRUE)) %>% + mutate(x = as.character(x)) %>% + collect(), + test_df ) }) @@ -259,12 +267,20 @@ test_that("extract wday from date", { test_df ) - # We should be able to support the label argument after this ticket is resolved: - # https://issues.apache.org/jira/browse/ARROW-13133 - x <- Expression$field_ref("x") - expect_error( - nse_funcs$wday(x, label = TRUE), - "Label argument not supported by Arrow" + expect_dplyr_equal( + input %>% + mutate(x = wday(date, label = TRUE, abbr = TRUE)) %>% + mutate(x = as.character(x)) %>% + collect(), + test_df + ) + + expect_dplyr_equal( + input %>% + mutate(x = wday(date, label = TRUE)) %>% + mutate(x = as.character(x)) %>% + collect(), + test_df ) }) diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R index 13c4415ba5c..fc91995bf62 100644 --- a/r/tests/testthat/test-dplyr-string-functions.R +++ b/r/tests/testthat/test-dplyr-string-functions.R @@ -737,27 +737,36 @@ test_that("errors in strptime", { test_that("strftime", { skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 - times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA)) - seconds <- tibble(x = c("05.000000", NA)) + times <- tibble( + datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA), + date = c(as.Date("2021-09-09"), NA) + ) formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u" expect_dplyr_equal( input %>% - mutate(x = strftime(x, format = formats)) %>% + mutate(x = strftime(datetime, format = formats)) %>% + collect(), + times + ) + + expect_dplyr_equal( + input %>% + mutate(x = strftime(date, format = formats)) %>% collect(), times ) expect_dplyr_equal( input %>% - mutate(x = strftime(x, format = formats, tz = "Pacific/Marquesas")) %>% + mutate(x = strftime(datetime, format = formats, tz = "Pacific/Marquesas")) %>% collect(), times ) expect_dplyr_equal( input %>% - mutate(x = strftime(x, format = formats, tz = "EST", usetz = TRUE)) %>% + mutate(x = strftime(datetime, format = formats, tz = "EST", usetz = TRUE)) %>% collect(), times ) @@ -765,7 +774,7 @@ test_that("strftime", { withr::with_timezone("Pacific/Marquesas", expect_dplyr_equal( input %>% - mutate(x = strftime(x, format = formats, tz = "EST")) %>% + mutate(x = strftime(datetime, format = formats, tz = "EST")) %>% collect(), times ) @@ -776,7 +785,7 @@ test_that("strftime", { expect_error( times %>% Table$create() %>% - mutate(x = strftime(x, format = "%c")) %>% + mutate(x = strftime(datetime, format = "%c")) %>% collect(), "%c flag is not supported in non-C locales." ) @@ -787,7 +796,7 @@ test_that("strftime", { # point numbers with 3, 6 and 9 decimal places respectively. expect_dplyr_equal( input %>% - mutate(x = strftime(x, format = "%S")) %>% + mutate(x = strftime(datetime, format = "%S")) %>% transmute(as.double(substr(x, 1, 2))) %>% collect(), times, From 3624c13e4548dc41a26eebab2b5a71b604c169dc Mon Sep 17 00:00:00 2001 From: Rok Date: Sat, 25 Sep 2021 12:36:08 +0200 Subject: [PATCH 2/2] Harmonize with ARROW-14111. --- cpp/src/arrow/compute/kernels/scalar_temporal.cc | 2 +- cpp/src/arrow/compute/kernels/scalar_temporal_test.cc | 2 -- docs/source/cpp/compute.rst | 2 +- r/tests/testthat/test-dplyr-lubridate.R | 4 ++++ 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index bb4b44e77fb..674be60d42b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -1199,7 +1199,7 @@ void RegisterScalarTemporal(FunctionRegistry* registry) { static const auto default_strftime_options = StrftimeOptions(); auto strftime = MakeSimpleUnaryTemporal( - "strftime", {WithDates, WithTimestamps}, utf8(), &strftime_doc, + "strftime", {WithTimes, WithDates, WithTimestamps}, utf8(), &strftime_doc, &default_strftime_options, StrftimeState::Init); DCHECK_OK(registry->AddFunction(std::move(strftime))); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index f457a4f02b9..fcc54e418ee 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -639,7 +639,6 @@ TEST_F(ScalarTemporalTest, Strftime) { CheckScalarUnary("strftime", timestamp(TimeUnit::NANO, "US/Hawaii"), nanoseconds, utf8(), string_nanoseconds, &options); -<<<<<<< HEAD auto options_hms = StrftimeOptions("%H:%M:%S"); auto options_ymdhms = StrftimeOptions("%Y-%m-%dT%H:%M:%S"); @@ -696,7 +695,6 @@ TEST_F(ScalarTemporalTest, Strftime) { Strftime(arr_ns, StrftimeOptions("%Y-%m-%dT%H:%M:%S%Z"))); auto options_ymd = StrftimeOptions("%Y-%m-%d"); - auto options_ymdhms = StrftimeOptions("%Y-%m-%dT%H:%M:%S"); const char* date32s = R"([0, 10957, 10958, null])"; const char* date64s = R"([0, 946684800000, 946771200000, null])"; diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index f6988a0b0d1..3c53a255023 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1215,7 +1215,7 @@ provided by a concrete function :func:`~arrow::compute::Cast`. +=================+============+====================+==================+==============================+=======+ | cast | Unary | Many | Variable | :struct:`CastOptions` | | +-----------------+------------+--------------------+------------------+------------------------------+-------+ -| strftime | Unary | Timestamp, Time | String | :struct:`StrftimeOptions` | \(1) | +| strftime | Unary | Temporal | String | :struct:`StrftimeOptions` | \(1) | +-----------------+------------+--------------------+------------------+------------------------------+-------+ | strptime | Unary | String-like | Timestamp | :struct:`StrptimeOptions` | | +-----------------+------------+--------------------+------------------+------------------------------+-------+ diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R index 19c867bfc0c..cf4b0c5a8fd 100644 --- a/r/tests/testthat/test-dplyr-lubridate.R +++ b/r/tests/testthat/test-dplyr-lubridate.R @@ -125,6 +125,8 @@ test_that("extract wday from timestamp", { test_df ) + skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 + expect_dplyr_equal( input %>% mutate(x = wday(date, label = TRUE)) %>% @@ -267,6 +269,8 @@ test_that("extract wday from date", { test_df ) + skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 + expect_dplyr_equal( input %>% mutate(x = wday(date, label = TRUE, abbr = TRUE)) %>%