diff --git a/r/NEWS.md b/r/NEWS.md index 81a23aa0318..43e36a52541 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -20,12 +20,8 @@ # arrow 7.0.0.9000 * `read_csv_arrow()`'s readr-style type `T` is now mapped to `timestamp(unit = "ns")` instead of `timestamp(unit = "s")`. -* `lubridate`: - * `tz()` to extract/get timezone - * `semester()` to extract/get semester - * `dst()` to get daylight savings time indicator. - * `date()` to extract date - * `epiyear()` to get epiyear +* `lubridate`: + * component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator), `date()` (extract date), `epiyear()` (epiyear), improvements to `month()`, which now works with integer inputs. * date-time functionality: * `as.Date()` to convert to date diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 5a22c970965..8f5a7689c07 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -105,17 +105,38 @@ register_bindings_datetime <- function() { (call_binding("yday", x) - 1) %/% 7 + 1 }) - register_binding("month", function(x, label = FALSE, abbr = TRUE, locale = Sys.getlocale("LC_TIME")) { + register_binding("month", function(x, + label = FALSE, + abbr = TRUE, + locale = Sys.getlocale("LC_TIME")) { + + if (call_binding("is.integer", x)) { + x <- call_binding("if_else", + call_binding("between", x, 1, 12), + x, + NA_integer_) + if (!label) { + # if we don't need a label we can return the integer itself (already + # constrained to 1:12) + return(x) + } + # make the integer into a date32() - which interprets integers as + # days from epoch (we multiply by 28 to be able to later extract the + # month with label) - NB this builds a false date (to be used by strftime) + # since we only know and care about the month + x <- build_expr("cast", x * 28L, options = cast_options(to_type = date32())) + } + if (label) { if (abbr) { format <- "%b" } else { format <- "%B" } - return(Expression$create("strftime", x, options = list(format = format, locale = locale))) + return(build_expr("strftime", x, options = list(format = format, locale = locale))) } - Expression$create("month", x) + build_expr("month", x) }) register_binding("is.Date", function(x) { diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 073041a4b87..d0afda8912d 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -783,16 +783,12 @@ test_that("semester works with temporal types and integers", { collect(), test_df ) - # semester extraction from months as integers is not supported yet - # it will be once https://issues.apache.org/jira/browse/ARROW-15701 is done - # TODO change from expect_error to compare_dplyr_bindings - expect_error( - test_df %>% - arrow_table() %>% + + compare_dplyr_binding( + .input %>% mutate(sem_month_as_int = semester(month_as_int)) %>% collect(), - regexp = "NotImplemented: Function 'month' has no kernel matching input types (array[int32])", - fixed = TRUE + test_df ) expect_error( @@ -802,8 +798,8 @@ test_that("semester works with temporal types and integers", { collect(), regexp = "NotImplemented: Function 'month' has no kernel matching input types (array[string])", fixed = TRUE - ) - }) + ) +}) test_that("dst extracts daylight savings time correctly", { test_df <- tibble( @@ -820,6 +816,80 @@ test_that("dst extracts daylight savings time correctly", { ) }) +test_that("month() supports integer input", { + test_df_month <- tibble( + month_as_int = c(1:12, NA) + ) + + compare_dplyr_binding( + .input %>% + mutate(month_int_input = month(month_as_int)) %>% + collect(), + test_df_month + ) + + skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 + + compare_dplyr_binding( + .input %>% + # R returns ordered factor whereas Arrow returns character + mutate( + month_int_input = as.character(month(month_as_int, label = TRUE)) + ) %>% + collect(), + test_df_month + ) + + compare_dplyr_binding( + .input %>% + # R returns ordered factor whereas Arrow returns character + mutate( + month_int_input = as.character( + month(month_as_int, label = TRUE, abbr = FALSE) + ) + ) %>% + collect(), + test_df_month + ) + }) + +test_that("month() errors with double input and returns NA with int outside 1:12", { + test_df_month <- tibble( + month_as_int = c(-1L, 1L, 13L, NA), + month_as_double = month_as_int + 0.1 + ) + + expect_equal( + test_df_month %>% + arrow_table() %>% + select(month_as_int) %>% + mutate(month_int_input = month(month_as_int)) %>% + collect(), + tibble( + month_as_int = c(-1L, 1L, 13L, NA), + month_int_input = c(NA, 1L, NA, NA) + ) + ) + + expect_error( + test_df_month %>% + arrow_table() %>% + mutate(month_dbl_input = month(month_as_double)) %>% + collect(), + regexp = "Function 'month' has no kernel matching input types (array[double])", + fixed = TRUE + ) + + expect_error( + test_df_month %>% + record_batch() %>% + mutate(month_dbl_input = month(month_as_double)) %>% + collect(), + regexp = "Function 'month' has no kernel matching input types (array[double])", + fixed = TRUE + ) +}) + test_that("date works in arrow", { # https://issues.apache.org/jira/browse/ARROW-13168 skip_on_os("windows")