diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 91d1b21ad88..a068cf09c61 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -442,3 +442,30 @@ nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit Expression$create("strptime", x, options = list(format = format, unit = unit)) } + +nse_funcs$second <- function(x) { + Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x)) +} + +# After ARROW-13054 is completed, we can refactor this for simplicity +# +# Arrow's `day_of_week` kernel counts from 0 (Monday) to 6 (Sunday), whereas +# `lubridate::wday` counts from 1 to 7, and allows users to specify which day +# of the week is first (Sunday by default). This Expression converts the returned +# day of the week back to the value that would be returned by lubridate by +# providing offset values based on the specified week_start day, and adding 1 +# so the returned value is 1-indexed instead of 0-indexed. +nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) { + + # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime + # When the ticket below is resolved, we should be able to support the label argument + # https://issues.apache.org/jira/browse/ARROW-13133 + if (label) { + arrow_not_supported("Label argument") + } + + # overall formula to convert from arrow::wday to lubridate::wday is: + # ((wday(day) - start + 8) %% 7) + 1 + ((Expression$create("day_of_week", x) - Expression$scalar(week_start) + 8) %% 7) + 1 + +} diff --git a/r/R/expression.R b/r/R/expression.R index ba542339ff8..b06490d678c 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -29,8 +29,19 @@ # stringr spellings of those "str_length" = "utf8_length", "str_to_lower" = "utf8_lower", - "str_to_upper" = "utf8_upper" - # str_trim is defined in dplyr.R + "str_to_upper" = "utf8_upper", + # str_trim is defined in dplyr-functions.R + "year" = "year", + "isoyear" = "iso_year", + "quarter" = "quarter", + "month" = "month", + "isoweek" = "iso_week", + "day" = "day", + # wday is defined in dplyr-functions.R + "yday" = "day_of_year", + "hour" = "hour", + # second is defined in dplyr-functions.R + "minute" = "minute" ) .binary_function_map <- list( diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R new file mode 100644 index 00000000000..2ebb6f3b93e --- /dev/null +++ b/r/tests/testthat/test-dplyr-lubridate.R @@ -0,0 +1,178 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +library(lubridate) +library(dplyr) + +test_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "") +test_df <- tibble::tibble(date = test_date) + +# We can support this feature after ARROW-12980 is merged +test_that("timezone aware timestamps are not supported",{ + + tz_aware_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "BST") + tz_aware_df <- tibble::tibble(date = tz_aware_date) + + expect_error( + Table$create(tz_aware_df) %>% + mutate(x = wday(date)) %>% + collect(), + "Cannot extract components from timestamp with specific timezone" + ) +}) + +# We can support this feature when ARROW-13138 is resolved +test_that("date32 objects are not supported",{ + + date <- ymd("2017-01-01") + df <- tibble::tibble(date = date) + + expect_error( + Table$create(df) %>% + mutate(x = year(date)) %>% + collect(), + "Function year has no kernel matching input types" + ) +}) + + +test_that("extract year from date", { + expect_dplyr_equal( + input %>% + mutate(x = year(date)) %>% + collect(), + test_df, + check.tzone = FALSE + ) +}) + +test_that("extract isoyear from date", { + expect_dplyr_equal( + input %>% + mutate(x = isoyear(date)) %>% + collect(), + test_df + ) +}) + +test_that("extract quarter from date", { + expect_dplyr_equal( + input %>% + mutate(x = quarter(date)) %>% + collect(), + test_df + ) +}) + +test_that("extract month from date", { + expect_dplyr_equal( + input %>% + mutate(x = month(date)) %>% + collect(), + test_df + ) +}) + +test_that("extract isoweek from date", { + expect_dplyr_equal( + input %>% + mutate(x = isoweek(date)) %>% + collect(), + test_df + ) +}) + +test_that("extract day from date", { + expect_dplyr_equal( + input %>% + mutate(x = day(date)) %>% + collect(), + test_df + ) +}) + + +test_that("extract wday from date", { + expect_dplyr_equal( + input %>% + mutate(x = wday(date)) %>% + collect(), + test_df + ) + + expect_dplyr_equal( + input %>% + mutate(x = wday(date, week_start = 3)) %>% + collect(), + test_df + ) + + expect_dplyr_equal( + input %>% + mutate(x = wday(date, week_start = 1)) %>% + collect(), + test_df + ) + + # We should be able to support the label argument after this ticket is resolved: + # https://issues.apache.org/jira/browse/ARROW-13133 + x <- Expression$field_ref("x") + expect_error( + nse_funcs$wday(x, label = TRUE), + "Label argument not supported by Arrow" + ) + +}) + +test_that("extract yday from date", { + expect_dplyr_equal( + input %>% + mutate(x = yday(date)) %>% + collect(), + test_df + ) +}) + +test_that("extract hour from date", { + expect_dplyr_equal( + input %>% + mutate(x = hour(date)) %>% + collect(), + test_df + ) +}) + +test_that("extract minute from date", { + expect_dplyr_equal( + input %>% + mutate(x = minute(date)) %>% + collect(), + test_df + ) +}) + +test_that("extract second from date", { + expect_dplyr_equal( + input %>% + mutate(x = second(date)) %>% + collect(), + test_df, + # arrow supports nanosecond resolution but lubridate does not + tolerance = 1e-6 + ) +}) +