From cf1e4b23fce698bd00e4d75de8bfbbbcf6f90413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 21 Feb 2022 22:17:55 +0000 Subject: [PATCH 01/17] pass at `month()` with integer inputs + early testing --- r/R/dplyr-funcs-datetime.R | 17 ++++++++++++++ r/tests/testthat/test-dplyr-funcs-datetime.R | 24 ++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 5a22c970965..a27c692580a 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -106,6 +106,23 @@ register_bindings_datetime <- function() { }) register_binding("month", function(x, label = FALSE, abbr = TRUE, locale = Sys.getlocale("LC_TIME")) { + if (call_binding("is.integer", x)) { + if (inherits(x, "Expression")) { + x <- call_binding( + "if_else", + call_binding_agg("all", call_binding("between", x, 1, 12)), + x, + abort("bla1: Values are not in 1:12") + ) + } else { + if (all(1 <= x & x <= 12)) { + x <- x + } else { + abort("bla2: Values are not in 1:12") + } + } + } + if (label) { if (abbr) { format <- "%b" diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 073041a4b87..fb1f5b99798 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -769,6 +769,7 @@ test_that("extract tz", { }) test_that("semester works with temporal types and integers", { +test_that("month() supports integer input",{ test_df <- tibble( month_as_int = c(1:12, NA), month_as_char_pad = sprintf("%02i", month_as_int), @@ -817,6 +818,29 @@ test_that("dst extracts daylight savings time correctly", { mutate(dst = dst(dates)) %>% collect(), test_df + test_df %>% + arrow_table() %>% + mutate(month_int_input = month(month_as_int)) %>% + collect() + + # we need to support both integer and integer + label, similar to below + lubridate::month(1) + lubridate::month(1.1) + lubridate::month(4L) + lubridate::month(4L, label = TRUE) + + + expect_error( + call_binding("month", "not a month") + ) + expect_error( + call_binding("month", 22L) + ) + expect_error( + call_binding("month", -4) + ) + expect_error( + call_binding("month", NA) ) }) From 61f2082831660bdc839caea8d63e9b9180b63ce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 21 Feb 2022 23:02:58 +0000 Subject: [PATCH 02/17] eod --- r/R/dplyr-funcs-datetime.R | 3 +++ r/tests/testthat/test-dplyr-funcs-datetime.R | 13 ++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index a27c692580a..b48581c56b5 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -106,6 +106,7 @@ register_bindings_datetime <- function() { }) register_binding("month", function(x, label = FALSE, abbr = TRUE, locale = Sys.getlocale("LC_TIME")) { + browser() if (call_binding("is.integer", x)) { if (inherits(x, "Expression")) { x <- call_binding( @@ -114,8 +115,10 @@ register_bindings_datetime <- function() { x, abort("bla1: Values are not in 1:12") ) + return(x) } else { if (all(1 <= x & x <= 12)) { + # x needs to be an Expression x <- x } else { abort("bla2: Values are not in 1:12") diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index fb1f5b99798..f572012c6d2 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -356,6 +356,11 @@ test_that("extract month from timestamp", { test_df ) + test_df %>% + arrow_table() %>% + mutate(x = month(datetime, label = TRUE)) %>% + collect() + skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 compare_dplyr_binding( @@ -770,7 +775,7 @@ test_that("extract tz", { test_that("semester works with temporal types and integers", { test_that("month() supports integer input",{ - test_df <- tibble( + test_df_month <- tibble( month_as_int = c(1:12, NA), month_as_char_pad = sprintf("%02i", month_as_int), dates = as.Date(paste0("2021-", month_as_char_pad, "-15")) @@ -819,10 +824,16 @@ test_that("dst extracts daylight savings time correctly", { collect(), test_df test_df %>% + test_df_month %>% arrow_table() %>% mutate(month_int_input = month(month_as_int)) %>% collect() + at <- arrow_table(example_with_times) + at$posixlt$as_vector() + at$posixlt$as_vector()$mon + + # we need to support both integer and integer + label, similar to below lubridate::month(1) lubridate::month(1.1) From 52f7acc3c3e4d79053d09c28ad356a8ca78d84a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 2 Mar 2022 08:49:48 +0000 Subject: [PATCH 03/17] updated `month()` to accept integer inputs and return NA if the integer is outside the 1:12 range --- r/R/dplyr-funcs-datetime.R | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index b48581c56b5..8798011e960 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -105,25 +105,20 @@ register_bindings_datetime <- function() { (call_binding("yday", x) - 1) %/% 7 + 1 }) - register_binding("month", function(x, label = FALSE, abbr = TRUE, locale = Sys.getlocale("LC_TIME")) { - browser() + register_binding("month", function(x, + label = FALSE, + abbr = TRUE, + locale = Sys.getlocale("LC_TIME")) { + if (call_binding("is.integer", x)) { - if (inherits(x, "Expression")) { - x <- call_binding( - "if_else", - call_binding_agg("all", call_binding("between", x, 1, 12)), - x, - abort("bla1: Values are not in 1:12") - ) - return(x) - } else { - if (all(1 <= x & x <= 12)) { - # x needs to be an Expression - x <- x - } else { - abort("bla2: Values are not in 1:12") - } + if (is.integer(x)) { + x <- build_expr("cast", x, options = cast_options(to_type = int32())) } + x <- call_binding("if_else", + call_binding("between", x, 1, 12), + x, + NA_integer_) + x <- build_expr("cast", x * 28L, options = cast_options(to_type = date32())) } if (label) { @@ -132,7 +127,8 @@ register_bindings_datetime <- function() { } else { format <- "%B" } - return(Expression$create("strftime", x, options = list(format = format, locale = locale))) + return(Expression$create("strftime", x, + options = list(format = format, locale = locale))) } Expression$create("month", x) From 617e2a94c8b7732ee96d6f65f1626d4cd0d6375d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 2 Mar 2022 09:02:09 +0000 Subject: [PATCH 04/17] added more unit tests --- r/tests/testthat/test-dplyr-funcs-datetime.R | 70 ++++++++++++++++---- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index f572012c6d2..fef9e43b387 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -776,9 +776,7 @@ test_that("extract tz", { test_that("semester works with temporal types and integers", { test_that("month() supports integer input",{ test_df_month <- tibble( - month_as_int = c(1:12, NA), - month_as_char_pad = sprintf("%02i", month_as_int), - dates = as.Date(paste0("2021-", month_as_char_pad, "-15")) + month_as_int = c(1:12, NA) ) # semester extraction from dates @@ -833,25 +831,69 @@ test_that("dst extracts daylight savings time correctly", { at$posixlt$as_vector() at$posixlt$as_vector()$mon + compare_dplyr_binding( + .input %>% + mutate(month_int_input = month(month_as_int)) %>% + collect(), + test_df_month + ) - # we need to support both integer and integer + label, similar to below - lubridate::month(1) - lubridate::month(1.1) - lubridate::month(4L) - lubridate::month(4L, label = TRUE) + compare_dplyr_binding( + .input %>% + # R returns ordered factor whereas Arrow returns character + mutate( + month_int_input = as.character(month(month_as_int, label = TRUE)) + ) %>% + collect(), + test_df_month + ) + compare_dplyr_binding( + .input %>% + # R returns ordered factor whereas Arrow returns character + mutate( + month_int_input = as.character( + month(month_as_int, label = TRUE, abbr = FALSE) + )) %>% + collect(), + test_df_month + ) +}) - expect_error( - call_binding("month", "not a month") +test_that("month() errors with double input and returns NA with int outside 1:12", { + test_df_month <- tibble( + month_as_int = c(-1L, 1L, 13L, NA), + month_as_double = month_as_int + 0.1 ) + expect_error( - call_binding("month", 22L) + test_df_month %>% + arrow_table() %>% + mutate(month_dbl_input = month(month_as_double)) %>% + collect(), + regexp = "Function 'month' has no kernel matching input types (array[double])", + fixed = TRUE ) + expect_error( - call_binding("month", -4) + test_df_month %>% + record_batch() %>% + mutate(month_dbl_input = month(month_as_double)) %>% + collect(), + regexp = "Function 'month' has no kernel matching input types (array[double])", + fixed = TRUE ) - expect_error( - call_binding("month", NA) + + expect_equal( + test_df_month %>% + arrow_table() %>% + select(month_as_int) %>% + mutate(month_int_input = month(month_as_int)) %>% + collect(), + tibble( + month_as_int = c(-1L, 1L, 13L, NA), + month_int_input = c(NA, 1L, NA, NA) + ) ) }) From 136fcf275e57b0cf998c2ebda43e3225c653177b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 2 Mar 2022 09:19:03 +0000 Subject: [PATCH 05/17] rebase + update semester extraction from integer unit test --- r/tests/testthat/test-dplyr-funcs-datetime.R | 148 +++++++++---------- 1 file changed, 72 insertions(+), 76 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index fef9e43b387..5d86f2b5319 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -774,9 +774,10 @@ test_that("extract tz", { }) test_that("semester works with temporal types and integers", { -test_that("month() supports integer input",{ - test_df_month <- tibble( - month_as_int = c(1:12, NA) + test_df <- tibble( + month_as_int = c(1:12, NA), + month_as_char_pad = sprintf("%02i", month_as_int), + dates = as.Date(paste0("2021-", month_as_char_pad, "-15")) ) # semester extraction from dates @@ -787,16 +788,12 @@ test_that("month() supports integer input",{ collect(), test_df ) - # semester extraction from months as integers is not supported yet - # it will be once https://issues.apache.org/jira/browse/ARROW-15701 is done - # TODO change from expect_error to compare_dplyr_bindings - expect_error( - test_df %>% - arrow_table() %>% + + compare_dplyr_binding( + .input %>% mutate(sem_month_as_int = semester(month_as_int)) %>% collect(), - regexp = "NotImplemented: Function 'month' has no kernel matching input types (array[int32])", - fixed = TRUE + test_df ) expect_error( @@ -807,7 +804,7 @@ test_that("month() supports integer input",{ regexp = "NotImplemented: Function 'month' has no kernel matching input types (array[string])", fixed = TRUE ) - }) +}) test_that("dst extracts daylight savings time correctly", { test_df <- tibble( @@ -821,78 +818,77 @@ test_that("dst extracts daylight savings time correctly", { mutate(dst = dst(dates)) %>% collect(), test_df - test_df %>% - test_df_month %>% - arrow_table() %>% - mutate(month_int_input = month(month_as_int)) %>% - collect() + ) +}) - at <- arrow_table(example_with_times) - at$posixlt$as_vector() - at$posixlt$as_vector()$mon +test_that("month() supports integer input",{ + test_df_month <- tibble( + month_as_int = c(1:12, NA) + ) - compare_dplyr_binding( - .input %>% - mutate(month_int_input = month(month_as_int)) %>% - collect(), - test_df_month - ) + compare_dplyr_binding( + .input %>% + mutate(month_int_input = month(month_as_int)) %>% + collect(), + test_df_month + ) - compare_dplyr_binding( - .input %>% - # R returns ordered factor whereas Arrow returns character - mutate( - month_int_input = as.character(month(month_as_int, label = TRUE)) - ) %>% - collect(), - test_df_month - ) + compare_dplyr_binding( + .input %>% + # R returns ordered factor whereas Arrow returns character + mutate( + month_int_input = as.character(month(month_as_int, label = TRUE)) + ) %>% + collect(), + test_df_month + ) - compare_dplyr_binding( - .input %>% - # R returns ordered factor whereas Arrow returns character - mutate( - month_int_input = as.character( - month(month_as_int, label = TRUE, abbr = FALSE) - )) %>% - collect(), - test_df_month - ) -}) + compare_dplyr_binding( + .input %>% + # R returns ordered factor whereas Arrow returns character + mutate( + month_int_input = as.character( + month(month_as_int, label = TRUE, abbr = FALSE) + )) %>% + collect(), + test_df_month + ) + }) -test_that("month() errors with double input and returns NA with int outside 1:12", { - test_df_month <- tibble( - month_as_int = c(-1L, 1L, 13L, NA), - month_as_double = month_as_int + 0.1 - ) + test_that("month() errors with double input and returns NA with int outside 1:12", { + test_df_month <- tibble( + month_as_int = c(-1L, 1L, 13L, NA), + month_as_double = month_as_int + 0.1 + ) - expect_error( - test_df_month %>% - arrow_table() %>% - mutate(month_dbl_input = month(month_as_double)) %>% - collect(), - regexp = "Function 'month' has no kernel matching input types (array[double])", - fixed = TRUE - ) + expect_error( + test_df_month %>% + arrow_table() %>% + mutate(month_dbl_input = month(month_as_double)) %>% + collect(), + regexp = "Function 'month' has no kernel matching input types (array[double])", + fixed = TRUE + ) - expect_error( - test_df_month %>% - record_batch() %>% - mutate(month_dbl_input = month(month_as_double)) %>% - collect(), - regexp = "Function 'month' has no kernel matching input types (array[double])", - fixed = TRUE - ) + expect_error( + test_df_month %>% + record_batch() %>% + mutate(month_dbl_input = month(month_as_double)) %>% + collect(), + regexp = "Function 'month' has no kernel matching input types (array[double])", + fixed = TRUE + ) - expect_equal( - test_df_month %>% - arrow_table() %>% - select(month_as_int) %>% - mutate(month_int_input = month(month_as_int)) %>% - collect(), - tibble( - month_as_int = c(-1L, 1L, 13L, NA), - month_int_input = c(NA, 1L, NA, NA) + expect_equal( + test_df_month %>% + arrow_table() %>% + select(month_as_int) %>% + mutate(month_int_input = month(month_as_int)) %>% + collect(), + tibble( + month_as_int = c(-1L, 1L, 13L, NA), + month_int_input = c(NA, 1L, NA, NA) + ) ) ) }) From d882683d5fa8d44e30a7ab71c59822b6a7e04e8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 2 Mar 2022 09:21:47 +0000 Subject: [PATCH 06/17] clean-up --- r/tests/testthat/test-dplyr-funcs-datetime.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 5d86f2b5319..690bd7da897 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -356,11 +356,6 @@ test_that("extract month from timestamp", { test_df ) - test_df %>% - arrow_table() %>% - mutate(x = month(datetime, label = TRUE)) %>% - collect() - skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 compare_dplyr_binding( From b882920a544aa94be08385ce0417cd505340544f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 2 Mar 2022 10:15:25 +0000 Subject: [PATCH 07/17] add NEWS and ignore some attributes on Windows (after adding the Win helper skip) --- r/NEWS.md | 3 +++ r/tests/testthat/helper-skip.R | 4 ++++ r/tests/testthat/test-dplyr-funcs-datetime.R | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/r/NEWS.md b/r/NEWS.md index 81a23aa0318..92a5dc5f3a5 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -28,6 +28,9 @@ * `epiyear()` to get epiyear * date-time functionality: * `as.Date()` to convert to date +* `lubridate`: + * component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator) + * improvements to `month()`, which now works with integer inputs # arrow 7.0.0 diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R index df0de777258..f0761d1ce7f 100644 --- a/r/tests/testthat/helper-skip.R +++ b/r/tests/testthat/helper-skip.R @@ -78,3 +78,7 @@ process_is_running <- function(x) { cmd <- sprintf("ps aux | grep '%s' | grep -v grep", x) tryCatch(system(cmd, ignore.stdout = TRUE) == 0, error = function(e) FALSE) } + +on_windows <- function() { + ifelse(tolower(Sys.info()[["sysname"]]) == "windows", TRUE, FALSE) +} diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 690bd7da897..a94a484b2b8 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -835,7 +835,8 @@ test_that("month() supports integer input",{ month_int_input = as.character(month(month_as_int, label = TRUE)) ) %>% collect(), - test_df_month + test_df_month, + ignore_attr = on_windows() ) compare_dplyr_binding( From d4c4d00299920eb999b1fdb6b4c01bd938ab1674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 2 Mar 2022 11:20:58 +0000 Subject: [PATCH 08/17] skip on windows since `strftime()` is not yet available --- r/R/dplyr-funcs-datetime.R | 3 +-- r/tests/testthat/test-dplyr-funcs-datetime.R | 5 +++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 8798011e960..79fd7374c90 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -127,8 +127,7 @@ register_bindings_datetime <- function() { } else { format <- "%B" } - return(Expression$create("strftime", x, - options = list(format = format, locale = locale))) + return(Expression$create("strftime", x, options = list(format = format, locale = locale))) } Expression$create("month", x) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index a94a484b2b8..5cf5b1c7abf 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -828,6 +828,8 @@ test_that("month() supports integer input",{ test_df_month ) + skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 + compare_dplyr_binding( .input %>% # R returns ordered factor whereas Arrow returns character @@ -835,8 +837,7 @@ test_that("month() supports integer input",{ month_int_input = as.character(month(month_as_int, label = TRUE)) ) %>% collect(), - test_df_month, - ignore_attr = on_windows() + test_df_month ) compare_dplyr_binding( From 10bfe427bf06c604ee586ea9c23eab18cd528b87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 2 Mar 2022 12:05:20 +0000 Subject: [PATCH 09/17] removed skip helper --- r/tests/testthat/helper-skip.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R index f0761d1ce7f..df0de777258 100644 --- a/r/tests/testthat/helper-skip.R +++ b/r/tests/testthat/helper-skip.R @@ -78,7 +78,3 @@ process_is_running <- function(x) { cmd <- sprintf("ps aux | grep '%s' | grep -v grep", x) tryCatch(system(cmd, ignore.stdout = TRUE) == 0, error = function(e) FALSE) } - -on_windows <- function() { - ifelse(tolower(Sys.info()[["sysname"]]) == "windows", TRUE, FALSE) -} From 704216b28c7f2ac81daed37811bf912e0ecc1f2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 2 Mar 2022 13:20:49 +0000 Subject: [PATCH 10/17] lint --- r/tests/testthat/test-dplyr-funcs-datetime.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 5cf5b1c7abf..92158811f5d 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -816,7 +816,7 @@ test_that("dst extracts daylight savings time correctly", { ) }) -test_that("month() supports integer input",{ +test_that("month() supports integer input", { test_df_month <- tibble( month_as_int = c(1:12, NA) ) From f1aafa42b6401cc7240c1aa90ac6f3dd5abf9ecb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 3 Mar 2022 21:01:47 +0000 Subject: [PATCH 11/17] rebase typo --- r/tests/testthat/test-dplyr-funcs-datetime.R | 63 ++++++++++---------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 92158811f5d..6237880a7f4 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -798,7 +798,7 @@ test_that("semester works with temporal types and integers", { collect(), regexp = "NotImplemented: Function 'month' has no kernel matching input types (array[string])", fixed = TRUE - ) + ) }) test_that("dst extracts daylight savings time correctly", { @@ -852,40 +852,39 @@ test_that("month() supports integer input", { ) }) - test_that("month() errors with double input and returns NA with int outside 1:12", { - test_df_month <- tibble( - month_as_int = c(-1L, 1L, 13L, NA), - month_as_double = month_as_int + 0.1 - ) +test_that("month() errors with double input and returns NA with int outside 1:12", { + test_df_month <- tibble( + month_as_int = c(-1L, 1L, 13L, NA), + month_as_double = month_as_int + 0.1 + ) - expect_error( - test_df_month %>% - arrow_table() %>% - mutate(month_dbl_input = month(month_as_double)) %>% - collect(), - regexp = "Function 'month' has no kernel matching input types (array[double])", - fixed = TRUE - ) + expect_error( + test_df_month %>% + arrow_table() %>% + mutate(month_dbl_input = month(month_as_double)) %>% + collect(), + regexp = "Function 'month' has no kernel matching input types (array[double])", + fixed = TRUE + ) - expect_error( - test_df_month %>% - record_batch() %>% - mutate(month_dbl_input = month(month_as_double)) %>% - collect(), - regexp = "Function 'month' has no kernel matching input types (array[double])", - fixed = TRUE - ) + expect_error( + test_df_month %>% + record_batch() %>% + mutate(month_dbl_input = month(month_as_double)) %>% + collect(), + regexp = "Function 'month' has no kernel matching input types (array[double])", + fixed = TRUE + ) - expect_equal( - test_df_month %>% - arrow_table() %>% - select(month_as_int) %>% - mutate(month_int_input = month(month_as_int)) %>% - collect(), - tibble( - month_as_int = c(-1L, 1L, 13L, NA), - month_int_input = c(NA, 1L, NA, NA) - ) + expect_equal( + test_df_month %>% + arrow_table() %>% + select(month_as_int) %>% + mutate(month_int_input = month(month_as_int)) %>% + collect(), + tibble( + month_as_int = c(-1L, 1L, 13L, NA), + month_int_input = c(NA, 1L, NA, NA) ) ) }) From deb2144a1462da2bf0ed66a1306fc10bd55c877e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 7 Mar 2022 09:44:53 +0000 Subject: [PATCH 12/17] style --- r/tests/testthat/test-dplyr-funcs-datetime.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 6237880a7f4..7d2e988be20 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -846,7 +846,8 @@ test_that("month() supports integer input", { mutate( month_int_input = as.character( month(month_as_int, label = TRUE, abbr = FALSE) - )) %>% + ) + ) %>% collect(), test_df_month ) From 4b9941180e0529cb40604a7d65a03a707bc9f380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 7 Mar 2022 09:45:38 +0000 Subject: [PATCH 13/17] remove duplicate code and use `build_expr()` --- r/R/dplyr-funcs-datetime.R | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 79fd7374c90..10651001d13 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -111,14 +111,21 @@ register_bindings_datetime <- function() { locale = Sys.getlocale("LC_TIME")) { if (call_binding("is.integer", x)) { - if (is.integer(x)) { - x <- build_expr("cast", x, options = cast_options(to_type = int32())) - } x <- call_binding("if_else", call_binding("between", x, 1, 12), x, NA_integer_) - x <- build_expr("cast", x * 28L, options = cast_options(to_type = date32())) + if (!label) { + # if we don't need a label we can return the integer itself (constrained + # to 1:12) + return(x) + } else { + # make the integer into a date32() - which interprets integers as + # days from epoch (we multiply by 28 to be able to later extract the + # month with label) - NB this builds a false date (to be used by strftime) + # since we only know and care about the month + x <- build_expr("cast", x * 28L, options = cast_options(to_type = date32())) + } } if (label) { @@ -127,10 +134,10 @@ register_bindings_datetime <- function() { } else { format <- "%B" } - return(Expression$create("strftime", x, options = list(format = format, locale = locale))) + return(build_expr("strftime", x, options = list(format = format, locale = locale))) } - Expression$create("month", x) + build_expr("month", x) }) register_binding("is.Date", function(x) { From 75896a4ebb275bf6caf84109954a971eae8d8472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 7 Mar 2022 10:03:40 +0000 Subject: [PATCH 14/17] updated NEWS --- r/NEWS.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/r/NEWS.md b/r/NEWS.md index 92a5dc5f3a5..43e36a52541 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -20,17 +20,10 @@ # arrow 7.0.0.9000 * `read_csv_arrow()`'s readr-style type `T` is now mapped to `timestamp(unit = "ns")` instead of `timestamp(unit = "s")`. -* `lubridate`: - * `tz()` to extract/get timezone - * `semester()` to extract/get semester - * `dst()` to get daylight savings time indicator. - * `date()` to extract date - * `epiyear()` to get epiyear +* `lubridate`: + * component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator), `date()` (extract date), `epiyear()` (epiyear), improvements to `month()`, which now works with integer inputs. * date-time functionality: * `as.Date()` to convert to date -* `lubridate`: - * component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator) - * improvements to `month()`, which now works with integer inputs # arrow 7.0.0 From a00363083a1fb01c50e42120355b715ea86c1e26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 8 Mar 2022 16:05:57 +0000 Subject: [PATCH 15/17] move the test to top of block --- r/tests/testthat/test-dplyr-funcs-datetime.R | 23 ++++++++++---------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 7d2e988be20..25a367e4f7a 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -859,6 +859,18 @@ test_that("month() errors with double input and returns NA with int outside 1:12 month_as_double = month_as_int + 0.1 ) + expect_equal( + test_df_month %>% + arrow_table() %>% + select(month_as_int) %>% + mutate(month_int_input = month(month_as_int)) %>% + collect(), + tibble( + month_as_int = c(-1L, 1L, 13L, NA), + month_int_input = c(NA, 1L, NA, NA) + ) + ) + expect_error( test_df_month %>% arrow_table() %>% @@ -877,17 +889,6 @@ test_that("month() errors with double input and returns NA with int outside 1:12 fixed = TRUE ) - expect_equal( - test_df_month %>% - arrow_table() %>% - select(month_as_int) %>% - mutate(month_int_input = month(month_as_int)) %>% - collect(), - tibble( - month_as_int = c(-1L, 1L, 13L, NA), - month_int_input = c(NA, 1L, NA, NA) - ) - ) }) test_that("date works in arrow", { From 75e8b0aa468a05c6b9bdde5dfffbac85a09cc852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 8 Mar 2022 16:06:26 +0000 Subject: [PATCH 16/17] simplify the `if` ... `else` logic --- r/R/dplyr-funcs-datetime.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 10651001d13..8f5a7689c07 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -116,16 +116,15 @@ register_bindings_datetime <- function() { x, NA_integer_) if (!label) { - # if we don't need a label we can return the integer itself (constrained - # to 1:12) + # if we don't need a label we can return the integer itself (already + # constrained to 1:12) return(x) - } else { + } # make the integer into a date32() - which interprets integers as # days from epoch (we multiply by 28 to be able to later extract the # month with label) - NB this builds a false date (to be used by strftime) # since we only know and care about the month x <- build_expr("cast", x * 28L, options = cast_options(to_type = date32())) - } } if (label) { From 5316754cfe4161125dd99bf249502dddbbaa89c0 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 9 Mar 2022 09:22:09 -0600 Subject: [PATCH 17/17] Update r/tests/testthat/test-dplyr-funcs-datetime.R --- r/tests/testthat/test-dplyr-funcs-datetime.R | 1 - 1 file changed, 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 25a367e4f7a..d0afda8912d 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -888,7 +888,6 @@ test_that("month() errors with double input and returns NA with int outside 1:12 regexp = "Function 'month' has no kernel matching input types (array[double])", fixed = TRUE ) - }) test_that("date works in arrow", {