From d6b66d2bf6e3977a547328ae72170e68d953fbbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 7 Feb 2022 15:30:18 +0000 Subject: [PATCH 01/34] first pass at implementing the binding for `lubridate::tz()`. needs more tests --- r/R/dplyr-funcs-datetime.R | 4 +++- r/tests/testthat/test-dplyr-funcs-datetime.R | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 04c0214fdfb..35556acb7dc 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -147,5 +147,7 @@ register_bindings_datetime <- function() { register_binding("pm", function(x) { !call_binding("am", x) }) - + register_binding("tz", function(x) { + x$type()$timezone() + }) } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index a7a705678c1..2cc6e066b14 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -711,3 +711,14 @@ test_that("am/pm mirror lubridate", { ) }) + +test_that("extract tz", { + compare_dplyr_binding( + .input %>% + mutate(timezone = tz(x)) %>% + collect(), + tibble( + x = as.POSIXct(c("2022-02-07", "2022-03-04"), tz = "Pacific/Marquesas") + ) + ) +}) From ebc14431088ee9281ae2e8986b1ef3ec65b22a96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 8 Feb 2022 10:22:06 +0000 Subject: [PATCH 02/34] `tz()` returns `"UTC"` for strings, date and NAs + more tests --- r/R/dplyr-funcs-datetime.R | 7 ++++++- r/tests/testthat/test-dplyr-funcs-datetime.R | 17 +++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 35556acb7dc..1cc78c04279 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,6 +148,11 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - x$type()$timezone() + if (call_binding("is.character", x) || + call_binding("is.Date", x)) { + return("UTC") + } else { + return(x$type()$timezone()) + } }) } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 2cc6e066b14..5fc347b472b 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -713,12 +713,21 @@ test_that("am/pm mirror lubridate", { }) test_that("extract tz", { + df <- tibble( + x = as.POSIXct(c("2022-02-07", "2022-03-04"), tz = "Pacific/Marquesas"), + # tz() returns "UTC" for NAs, strings and dates + y = c("2022-02-07", NA), + z = as.Date(c("2022-02-07", NA)) + ) + compare_dplyr_binding( .input %>% - mutate(timezone = tz(x)) %>% + mutate( + timezone_x = tz(x), + timezone_y = tz(y), + timezone_z = tz(z) + ) %>% collect(), - tibble( - x = as.POSIXct(c("2022-02-07", "2022-03-04"), tz = "Pacific/Marquesas") - ) + df ) }) From 104c779321926febed94e3cb0cae974cf87dadb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 8 Feb 2022 10:26:15 +0000 Subject: [PATCH 03/34] updated NEWS --- r/NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/r/NEWS.md b/r/NEWS.md index 3ec4fc722d7..de361ab1a85 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,6 +19,9 @@ # arrow 7.0.0.9000 +* `lubridate`: + * `tz()` to extract/get timezone + # arrow 7.0.0 ## Enhancements to dplyr and datasets From 9710ffdc937c336a9814c92fbba45a2335c5150b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 10 Feb 2022 10:41:18 +0000 Subject: [PATCH 04/34] fall back to the simpler implementation for `tz()` --- r/R/dplyr-funcs-datetime.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 1cc78c04279..35556acb7dc 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,11 +148,6 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - if (call_binding("is.character", x) || - call_binding("is.Date", x)) { - return("UTC") - } else { - return(x$type()$timezone()) - } + x$type()$timezone() }) } From 38e38e0f094e3f538781cc330d0053048771a886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 10 Feb 2022 10:42:05 +0000 Subject: [PATCH 05/34] split good and error behaviour in 2 different chunks and use `expect_snapshot()` for the error chunk --- .../testthat/_snaps/dplyr-funcs-datetime.md | 15 ++++++++++ r/tests/testthat/test-dplyr-funcs-datetime.R | 30 ++++++++++++++----- 2 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 r/tests/testthat/_snaps/dplyr-funcs-datetime.md diff --git a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md new file mode 100644 index 00000000000..ac676498253 --- /dev/null +++ b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md @@ -0,0 +1,15 @@ +# extract tz + + Code + compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(z), + timezone_w = tz(w), timezone_v = tz(v)) %>% collect(), df) + Warning + tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. + tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. + tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. + tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. + Error + `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. + Message: Expression tz(z) not supported in Arrow; pulling data into R + Class: simpleWarning/warning/condition + diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 5fc347b472b..019e9c8e258 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -714,20 +714,34 @@ test_that("am/pm mirror lubridate", { test_that("extract tz", { df <- tibble( - x = as.POSIXct(c("2022-02-07", "2022-03-04"), tz = "Pacific/Marquesas"), - # tz() returns "UTC" for NAs, strings and dates + x = as.POSIXct(c("2022-02-07", NA), tz = "Pacific/Marquesas"), + # lubridate::tz() returns -for the time being - "UTC" for NAs, strings, + # dates and numerics y = c("2022-02-07", NA), - z = as.Date(c("2022-02-07", NA)) + z = as.Date(c("2022-02-07", NA)), + w = c(1L, 5L), + v = c(1.1, 2.47) ) compare_dplyr_binding( .input %>% - mutate( - timezone_x = tz(x), - timezone_y = tz(y), - timezone_z = tz(z) - ) %>% + mutate(timezone_x = tz(x)) %>% collect(), df ) + + expect_snapshot( + compare_dplyr_binding( + .input %>% + mutate( + timezone_y = tz(x), + timezone_z = tz(z), + timezone_w = tz(w), + timezone_v = tz(v) + ) %>% + collect(), + df + ), + error = TRUE + ) }) From d88e4c96b6abb9e445031c37d9001937478c3718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 10 Feb 2022 21:02:01 +0000 Subject: [PATCH 06/34] update `tz()` binding to return `NA` when x is `NA` --- r/R/dplyr-funcs-datetime.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 35556acb7dc..3a78abafb07 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,6 +148,11 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - x$type()$timezone() + call_binding( + "if_else", + call_binding("is.na", x), + NA_character_, + x$type()$timezone() + ) }) } From ff23580d9a4918acc1373de4681b0b2e3a0e26d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 10 Feb 2022 21:02:45 +0000 Subject: [PATCH 07/34] update tests to reflect different behaviour for `NA`: arrow returns `NA`, lubridate returns `"UTC"` --- r/tests/testthat/test-dplyr-funcs-datetime.R | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 019e9c8e258..648f6f1464c 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -714,7 +714,7 @@ test_that("am/pm mirror lubridate", { test_that("extract tz", { df <- tibble( - x = as.POSIXct(c("2022-02-07", NA), tz = "Pacific/Marquesas"), + x = as.POSIXct(c("2022-02-07", "2022-02-10"), tz = "Pacific/Marquesas"), # lubridate::tz() returns -for the time being - "UTC" for NAs, strings, # dates and numerics y = c("2022-02-07", NA), @@ -744,4 +744,20 @@ test_that("extract tz", { ), error = TRUE ) + + df2 <- tibble( + x = as.POSIXct(c("2022-02-07", NA), tz = "Pacific/Marquesas") + ) + + # NAs propagate + expect_equal( + df2 %>% + record_batch() %>% + mutate(timezone_x = tz(x)) %>% + collect(), + tibble( + x = df2$x, + timezone_x = c("Pacific/Marquesas", NA) + ) + ) }) From 6014efaf242a9ab33f559507eca8ef7ab0759a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 10 Feb 2022 22:08:42 +0000 Subject: [PATCH 08/34] revert to first / original implementation and remove NA propagation unit test --- r/R/dplyr-funcs-datetime.R | 5 ----- r/tests/testthat/test-dplyr-funcs-datetime.R | 16 ---------------- 2 files changed, 21 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 3a78abafb07..249b789e524 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,11 +148,6 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - call_binding( - "if_else", - call_binding("is.na", x), - NA_character_, x$type()$timezone() - ) }) } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 648f6f1464c..e7ffb3792aa 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -744,20 +744,4 @@ test_that("extract tz", { ), error = TRUE ) - - df2 <- tibble( - x = as.POSIXct(c("2022-02-07", NA), tz = "Pacific/Marquesas") - ) - - # NAs propagate - expect_equal( - df2 %>% - record_batch() %>% - mutate(timezone_x = tz(x)) %>% - collect(), - tibble( - x = df2$x, - timezone_x = c("Pacific/Marquesas", NA) - ) - ) }) From baaed40559d5a5d79fb5328c518abce675d36a5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 15 Feb 2022 10:13:51 +0000 Subject: [PATCH 09/34] updated unit tests and snapshots for `tz()` --- .../testthat/_snaps/dplyr-funcs-datetime.md | 39 +++++++++++++++-- r/tests/testthat/test-dplyr-funcs-datetime.R | 42 +++++++++++++++++-- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md index ac676498253..84921f3dfaa 100644 --- a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md +++ b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md @@ -1,15 +1,46 @@ # extract tz Code - compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(z), - timezone_w = tz(w), timezone_v = tz(v)) %>% collect(), df) + compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(y)) %>% + collect(), df) + Error + `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. + Message: In tz(y), timezone extraction for objects of class `character` not supported in Arrow; pulling data into R + Class: simpleWarning/warning/condition + +--- + + Code + compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(z)) %>% + collect(), df) + Error + `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. + Message: In tz(z), timezone extraction for objects of class `date` not supported in Arrow; pulling data into R + Class: simpleWarning/warning/condition + +--- + + Code + compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_w = tz(w)) %>% + collect(), df) Warning tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. - tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. + Error + `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. + Message: In tz(w), timezone extraction for objects of class `numeric` not supported in Arrow; pulling data into R + Class: simpleWarning/warning/condition + +--- + + Code + compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_v = tz(v)) %>% + collect(), df) + Warning + tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. Error `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: Expression tz(z) not supported in Arrow; pulling data into R + Message: In tz(v), timezone extraction for objects of class `numeric` not supported in Arrow; pulling data into R Class: simpleWarning/warning/condition diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index e7ffb3792aa..fbff64a6938 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -715,8 +715,8 @@ test_that("am/pm mirror lubridate", { test_that("extract tz", { df <- tibble( x = as.POSIXct(c("2022-02-07", "2022-02-10"), tz = "Pacific/Marquesas"), - # lubridate::tz() returns -for the time being - "UTC" for NAs, strings, - # dates and numerics + #lubridate::tz() returns -for the time being - "UTC" for NAs, strings, + #dates and numerics y = c("2022-02-07", NA), z = as.Date(c("2022-02-07", NA)), w = c(1L, 5L), @@ -735,8 +735,42 @@ test_that("extract tz", { .input %>% mutate( timezone_y = tz(x), - timezone_z = tz(z), - timezone_w = tz(w), + timezone_z = tz(y) + ) %>% + collect(), + df + ), + error = TRUE + ) + expect_snapshot( + compare_dplyr_binding( + .input %>% + mutate( + timezone_y = tz(x), + timezone_z = tz(z) + ) %>% + collect(), + df + ), + error = TRUE + ) + expect_snapshot( + compare_dplyr_binding( + .input %>% + mutate( + timezone_y = tz(x), + timezone_w = tz(w) + ) %>% + collect(), + df + ), + error = TRUE + ) + expect_snapshot( + compare_dplyr_binding( + .input %>% + mutate( + timezone_y = tz(x), timezone_v = tz(v) ) %>% collect(), From badcdb6a7bd064e851588690f08cd7ff35227696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 15 Feb 2022 10:17:38 +0000 Subject: [PATCH 10/34] update `tz()` with more meaningful error messages --- r/R/dplyr-funcs-datetime.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 249b789e524..c541cbba87c 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,6 +148,14 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - x$type()$timezone() + if (call_binding("is.Date", x)) { + abort("timezone extraction for objects of class `date` not supported in Arrow") + } else if (call_binding("is.numeric", x)) { + abort("timezone extraction for objects of class `numeric` not supported in Arrow") + } else if (call_binding("is.character", x)) { + abort("timezone extraction for objects of class `character` not supported in Arrow") + } else { + return(x$type()$timezone()) + } }) } From f03ab7dbf07a77a451627b07a16d0fd93ec6933c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 15 Feb 2022 20:43:31 +0000 Subject: [PATCH 11/34] meaningful names for the test dataframe columns + update snapshots --- .../testthat/_snaps/dplyr-funcs-datetime.md | 24 +++++++------- r/tests/testthat/test-dplyr-funcs-datetime.R | 33 ++++++++++--------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md index 84921f3dfaa..6d928052c52 100644 --- a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md +++ b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md @@ -1,46 +1,46 @@ # extract tz Code - compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(y)) %>% - collect(), df) + compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), + timezone_char_date = tz(char_date)) %>% collect(), df) Error `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(y), timezone extraction for objects of class `character` not supported in Arrow; pulling data into R + Message: In tz(char_date), timezone extraction for objects of class `string` not supported in Arrow; pulling data into R Class: simpleWarning/warning/condition --- Code - compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(z)) %>% - collect(), df) + compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), + timezone_date_date = tz(date_date)) %>% collect(), df) Error `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(z), timezone extraction for objects of class `date` not supported in Arrow; pulling data into R + Message: In tz(date_date), timezone extraction for objects of class `date32[day]` not supported in Arrow; pulling data into R Class: simpleWarning/warning/condition --- Code - compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_w = tz(w)) %>% - collect(), df) + compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), + timezone_integer_date = tz(integer_date)) %>% collect(), df) Warning tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. Error `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(w), timezone extraction for objects of class `numeric` not supported in Arrow; pulling data into R + Message: In tz(integer_date), timezone extraction for objects of class `int32` not supported in Arrow; pulling data into R Class: simpleWarning/warning/condition --- Code - compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_v = tz(v)) %>% - collect(), df) + compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), + timezone_double_date = tz(double_date)) %>% collect(), df) Warning tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. Error `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(v), timezone extraction for objects of class `numeric` not supported in Arrow; pulling data into R + Message: In tz(double_date), timezone extraction for objects of class `double` not supported in Arrow; pulling data into R Class: simpleWarning/warning/condition diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index fbff64a6938..e6951426e2e 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -714,18 +714,18 @@ test_that("am/pm mirror lubridate", { test_that("extract tz", { df <- tibble( - x = as.POSIXct(c("2022-02-07", "2022-02-10"), tz = "Pacific/Marquesas"), - #lubridate::tz() returns -for the time being - "UTC" for NAs, strings, - #dates and numerics - y = c("2022-02-07", NA), - z = as.Date(c("2022-02-07", NA)), - w = c(1L, 5L), - v = c(1.1, 2.47) + posixct_date = as.POSIXct(c("2022-02-07", "2022-02-10"), tz = "Pacific/Marquesas"), + # lubridate::tz() returns -for the time being - "UTC" for NAs, strings, + # dates and numerics + char_date = c("2022-02-07", NA), + date_date = as.Date(c("2022-02-07", NA)), + integer_date = c(1L, 5L), + double_date = c(1.1, 2.47) ) compare_dplyr_binding( .input %>% - mutate(timezone_x = tz(x)) %>% + mutate(timezone_posixct_date = tz(posixct_date)) %>% collect(), df ) @@ -734,20 +734,21 @@ test_that("extract tz", { compare_dplyr_binding( .input %>% mutate( - timezone_y = tz(x), - timezone_z = tz(y) + timezone_posixct_date = tz(posixct_date), + timezone_char_date = tz(char_date) ) %>% collect(), df ), error = TRUE ) + expect_snapshot( compare_dplyr_binding( .input %>% mutate( - timezone_y = tz(x), - timezone_z = tz(z) + timezone_posixct_date = tz(posixct_date), + timezone_date_date = tz(date_date) ) %>% collect(), df @@ -758,8 +759,8 @@ test_that("extract tz", { compare_dplyr_binding( .input %>% mutate( - timezone_y = tz(x), - timezone_w = tz(w) + timezone_posixct_date = tz(posixct_date), + timezone_integer_date = tz(integer_date) ) %>% collect(), df @@ -770,8 +771,8 @@ test_that("extract tz", { compare_dplyr_binding( .input %>% mutate( - timezone_y = tz(x), - timezone_v = tz(v) + timezone_posixct_date = tz(posixct_date), + timezone_double_date = tz(double_date) ) %>% collect(), df From 022070a1a81f88398e46040ab5a160e3b72cbc5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 15 Feb 2022 20:44:23 +0000 Subject: [PATCH 12/34] simplify the implementation (start with timestamp & error for anything else) --- r/R/dplyr-funcs-datetime.R | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index c541cbba87c..1a41d9550b1 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,14 +148,10 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - if (call_binding("is.Date", x)) { - abort("timezone extraction for objects of class `date` not supported in Arrow") - } else if (call_binding("is.numeric", x)) { - abort("timezone extraction for objects of class `numeric` not supported in Arrow") - } else if (call_binding("is.character", x)) { - abort("timezone extraction for objects of class `character` not supported in Arrow") - } else { + if (inherits(x$type(), "Timestamp")) { return(x$type()$timezone()) + } else { + abort(paste0("timezone extraction for objects of class `", x$type()$ToString(),"` not supported in Arrow")) } }) } From d47fd4d87eebf4af5c1ca6b6ce05b13bc5d3a59b Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 16 Feb 2022 11:59:43 -0600 Subject: [PATCH 13/34] some clean up --- r/R/dplyr-funcs-datetime.R | 13 ++-- r/tests/testthat/test-dplyr-funcs-datetime.R | 64 ++++---------------- 2 files changed, 22 insertions(+), 55 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 1a41d9550b1..26d63d406ee 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,10 +148,15 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - if (inherits(x$type(), "Timestamp")) { - return(x$type()$timezone()) - } else { - abort(paste0("timezone extraction for objects of class `", x$type()$ToString(),"` not supported in Arrow")) + if (!call_binding("is.POSIXct", x)) { + if (inherits(x, "Expression")) { + class <- x$type()$ToString() + } else { + class <- type(x) + } + abort(paste0("timezone extraction for objects of class `", class, "` not supported in Arrow")) } + + x$type()$timezone() }) } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index e6951426e2e..dd2da1833fb 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -715,12 +715,6 @@ test_that("am/pm mirror lubridate", { test_that("extract tz", { df <- tibble( posixct_date = as.POSIXct(c("2022-02-07", "2022-02-10"), tz = "Pacific/Marquesas"), - # lubridate::tz() returns -for the time being - "UTC" for NAs, strings, - # dates and numerics - char_date = c("2022-02-07", NA), - date_date = as.Date(c("2022-02-07", NA)), - integer_date = c(1L, 5L), - double_date = c(1.1, 2.47) ) compare_dplyr_binding( @@ -730,53 +724,21 @@ test_that("extract tz", { df ) - expect_snapshot( - compare_dplyr_binding( - .input %>% - mutate( - timezone_posixct_date = tz(posixct_date), - timezone_char_date = tz(char_date) - ) %>% - collect(), - df - ), - error = TRUE + expect_error( + call_binding("tz", Expression$scalar("2020-10-01")), + "timezone extraction for objects of class `string` not supported in Arrow" ) - - expect_snapshot( - compare_dplyr_binding( - .input %>% - mutate( - timezone_posixct_date = tz(posixct_date), - timezone_date_date = tz(date_date) - ) %>% - collect(), - df - ), - error = TRUE + expect_error( + call_binding("tz", Expression$scalar(as.Date("2020-10-01"))), + "timezone extraction for objects of class `date32[day]` not supported in Arrow", + fixed = TRUE ) - expect_snapshot( - compare_dplyr_binding( - .input %>% - mutate( - timezone_posixct_date = tz(posixct_date), - timezone_integer_date = tz(integer_date) - ) %>% - collect(), - df - ), - error = TRUE + expect_error( + call_binding("tz", Expression$scalar(1L)), + "timezone extraction for objects of class `int32` not supported in Arrow" ) - expect_snapshot( - compare_dplyr_binding( - .input %>% - mutate( - timezone_posixct_date = tz(posixct_date), - timezone_double_date = tz(double_date) - ) %>% - collect(), - df - ), - error = TRUE + expect_error( + call_binding("tz", Expression$scalar(1.1)), + "timezone extraction for objects of class `double` not supported in Arrow" ) }) From 14d3ac404df9801fbfc36417cd4c8df7f697fd08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 17 Feb 2022 09:26:07 +0000 Subject: [PATCH 14/34] removed snapshot md file --- .../testthat/_snaps/dplyr-funcs-datetime.md | 46 ------------------- 1 file changed, 46 deletions(-) delete mode 100644 r/tests/testthat/_snaps/dplyr-funcs-datetime.md diff --git a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md deleted file mode 100644 index 6d928052c52..00000000000 --- a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md +++ /dev/null @@ -1,46 +0,0 @@ -# extract tz - - Code - compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), - timezone_char_date = tz(char_date)) %>% collect(), df) - Error - `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(char_date), timezone extraction for objects of class `string` not supported in Arrow; pulling data into R - Class: simpleWarning/warning/condition - ---- - - Code - compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), - timezone_date_date = tz(date_date)) %>% collect(), df) - Error - `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(date_date), timezone extraction for objects of class `date32[day]` not supported in Arrow; pulling data into R - Class: simpleWarning/warning/condition - ---- - - Code - compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), - timezone_integer_date = tz(integer_date)) %>% collect(), df) - Warning - tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. - tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. - Error - `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(integer_date), timezone extraction for objects of class `int32` not supported in Arrow; pulling data into R - Class: simpleWarning/warning/condition - ---- - - Code - compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), - timezone_double_date = tz(double_date)) %>% collect(), df) - Warning - tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. - tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. - Error - `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(double_date), timezone extraction for objects of class `double` not supported in Arrow; pulling data into R - Class: simpleWarning/warning/condition - From 52067c9e4bb31a3be2fd5f34e855c62e172ecbd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 17 Feb 2022 09:26:27 +0000 Subject: [PATCH 15/34] class extraction --- r/R/dplyr-funcs-datetime.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 26d63d406ee..8bfab8dd829 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -152,7 +152,7 @@ register_bindings_datetime <- function() { if (inherits(x, "Expression")) { class <- x$type()$ToString() } else { - class <- type(x) + class <- type(x)$ToString() } abort(paste0("timezone extraction for objects of class `", class, "` not supported in Arrow")) } From 792f4ae71aba378af33b57fa8751a6b54623da54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 17 Feb 2022 10:21:40 +0000 Subject: [PATCH 16/34] simplify class extraction once #12447 is merged --- r/R/dplyr-funcs-datetime.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 8bfab8dd829..45b8851d2fc 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -149,11 +149,7 @@ register_bindings_datetime <- function() { }) register_binding("tz", function(x) { if (!call_binding("is.POSIXct", x)) { - if (inherits(x, "Expression")) { - class <- x$type()$ToString() - } else { - class <- type(x)$ToString() - } + class <- type(x)$ToString() abort(paste0("timezone extraction for objects of class `", class, "` not supported in Arrow")) } From d1d594676bb60bf657f175906071fb42c9a73057 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 7 Feb 2022 15:30:18 +0000 Subject: [PATCH 17/34] first pass at implementing the binding for `lubridate::tz()`. needs more tests --- r/R/dplyr-funcs-datetime.R | 4 +++- r/tests/testthat/test-dplyr-funcs-datetime.R | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 04c0214fdfb..35556acb7dc 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -147,5 +147,7 @@ register_bindings_datetime <- function() { register_binding("pm", function(x) { !call_binding("am", x) }) - + register_binding("tz", function(x) { + x$type()$timezone() + }) } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index a7a705678c1..2cc6e066b14 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -711,3 +711,14 @@ test_that("am/pm mirror lubridate", { ) }) + +test_that("extract tz", { + compare_dplyr_binding( + .input %>% + mutate(timezone = tz(x)) %>% + collect(), + tibble( + x = as.POSIXct(c("2022-02-07", "2022-03-04"), tz = "Pacific/Marquesas") + ) + ) +}) From 91c5e660a7f9f836d416fbb0cc1c2b0f14c8a042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 8 Feb 2022 10:22:06 +0000 Subject: [PATCH 18/34] `tz()` returns `"UTC"` for strings, date and NAs + more tests --- r/R/dplyr-funcs-datetime.R | 7 ++++++- r/tests/testthat/test-dplyr-funcs-datetime.R | 17 +++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 35556acb7dc..1cc78c04279 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,6 +148,11 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - x$type()$timezone() + if (call_binding("is.character", x) || + call_binding("is.Date", x)) { + return("UTC") + } else { + return(x$type()$timezone()) + } }) } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 2cc6e066b14..5fc347b472b 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -713,12 +713,21 @@ test_that("am/pm mirror lubridate", { }) test_that("extract tz", { + df <- tibble( + x = as.POSIXct(c("2022-02-07", "2022-03-04"), tz = "Pacific/Marquesas"), + # tz() returns "UTC" for NAs, strings and dates + y = c("2022-02-07", NA), + z = as.Date(c("2022-02-07", NA)) + ) + compare_dplyr_binding( .input %>% - mutate(timezone = tz(x)) %>% + mutate( + timezone_x = tz(x), + timezone_y = tz(y), + timezone_z = tz(z) + ) %>% collect(), - tibble( - x = as.POSIXct(c("2022-02-07", "2022-03-04"), tz = "Pacific/Marquesas") - ) + df ) }) From 46080f6d3f54c09229eabcc83d5136eca691a2b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 8 Feb 2022 10:26:15 +0000 Subject: [PATCH 19/34] updated NEWS --- r/NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/r/NEWS.md b/r/NEWS.md index a7d36e0fd51..e69d46275fe 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,6 +19,9 @@ # arrow 7.0.0.9000 +* `lubridate`: + * `tz()` to extract/get timezone + # arrow 7.0.0 ## Enhancements to dplyr and datasets From 0233ec52f92d94f96271893291ffac67b4ac8d7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 10 Feb 2022 10:41:18 +0000 Subject: [PATCH 20/34] fall back to the simpler implementation for `tz()` --- r/R/dplyr-funcs-datetime.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 1cc78c04279..35556acb7dc 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,11 +148,6 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - if (call_binding("is.character", x) || - call_binding("is.Date", x)) { - return("UTC") - } else { - return(x$type()$timezone()) - } + x$type()$timezone() }) } From de7f9923e3e3df406a8f0de9651aa760848e36ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 10 Feb 2022 10:42:05 +0000 Subject: [PATCH 21/34] split good and error behaviour in 2 different chunks and use `expect_snapshot()` for the error chunk --- .../testthat/_snaps/dplyr-funcs-datetime.md | 15 ++++++++++ r/tests/testthat/test-dplyr-funcs-datetime.R | 30 ++++++++++++++----- 2 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 r/tests/testthat/_snaps/dplyr-funcs-datetime.md diff --git a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md new file mode 100644 index 00000000000..ac676498253 --- /dev/null +++ b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md @@ -0,0 +1,15 @@ +# extract tz + + Code + compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(z), + timezone_w = tz(w), timezone_v = tz(v)) %>% collect(), df) + Warning + tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. + tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. + tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. + tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. + Error + `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. + Message: Expression tz(z) not supported in Arrow; pulling data into R + Class: simpleWarning/warning/condition + diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 5fc347b472b..019e9c8e258 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -714,20 +714,34 @@ test_that("am/pm mirror lubridate", { test_that("extract tz", { df <- tibble( - x = as.POSIXct(c("2022-02-07", "2022-03-04"), tz = "Pacific/Marquesas"), - # tz() returns "UTC" for NAs, strings and dates + x = as.POSIXct(c("2022-02-07", NA), tz = "Pacific/Marquesas"), + # lubridate::tz() returns -for the time being - "UTC" for NAs, strings, + # dates and numerics y = c("2022-02-07", NA), - z = as.Date(c("2022-02-07", NA)) + z = as.Date(c("2022-02-07", NA)), + w = c(1L, 5L), + v = c(1.1, 2.47) ) compare_dplyr_binding( .input %>% - mutate( - timezone_x = tz(x), - timezone_y = tz(y), - timezone_z = tz(z) - ) %>% + mutate(timezone_x = tz(x)) %>% collect(), df ) + + expect_snapshot( + compare_dplyr_binding( + .input %>% + mutate( + timezone_y = tz(x), + timezone_z = tz(z), + timezone_w = tz(w), + timezone_v = tz(v) + ) %>% + collect(), + df + ), + error = TRUE + ) }) From 07c2a65f3b815021d4dd96f983d04be77820010b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 10 Feb 2022 21:02:01 +0000 Subject: [PATCH 22/34] update `tz()` binding to return `NA` when x is `NA` --- r/R/dplyr-funcs-datetime.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 35556acb7dc..3a78abafb07 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,6 +148,11 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - x$type()$timezone() + call_binding( + "if_else", + call_binding("is.na", x), + NA_character_, + x$type()$timezone() + ) }) } From 0936441af65caa768da0758139ce1ab462ba1b6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 10 Feb 2022 21:02:45 +0000 Subject: [PATCH 23/34] update tests to reflect different behaviour for `NA`: arrow returns `NA`, lubridate returns `"UTC"` --- r/tests/testthat/test-dplyr-funcs-datetime.R | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 019e9c8e258..648f6f1464c 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -714,7 +714,7 @@ test_that("am/pm mirror lubridate", { test_that("extract tz", { df <- tibble( - x = as.POSIXct(c("2022-02-07", NA), tz = "Pacific/Marquesas"), + x = as.POSIXct(c("2022-02-07", "2022-02-10"), tz = "Pacific/Marquesas"), # lubridate::tz() returns -for the time being - "UTC" for NAs, strings, # dates and numerics y = c("2022-02-07", NA), @@ -744,4 +744,20 @@ test_that("extract tz", { ), error = TRUE ) + + df2 <- tibble( + x = as.POSIXct(c("2022-02-07", NA), tz = "Pacific/Marquesas") + ) + + # NAs propagate + expect_equal( + df2 %>% + record_batch() %>% + mutate(timezone_x = tz(x)) %>% + collect(), + tibble( + x = df2$x, + timezone_x = c("Pacific/Marquesas", NA) + ) + ) }) From de648f44da8f4bd67527b1319f921c1df2177834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 10 Feb 2022 22:08:42 +0000 Subject: [PATCH 24/34] revert to first / original implementation and remove NA propagation unit test --- r/R/dplyr-funcs-datetime.R | 5 ----- r/tests/testthat/test-dplyr-funcs-datetime.R | 16 ---------------- 2 files changed, 21 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 3a78abafb07..249b789e524 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,11 +148,6 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - call_binding( - "if_else", - call_binding("is.na", x), - NA_character_, x$type()$timezone() - ) }) } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 648f6f1464c..e7ffb3792aa 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -744,20 +744,4 @@ test_that("extract tz", { ), error = TRUE ) - - df2 <- tibble( - x = as.POSIXct(c("2022-02-07", NA), tz = "Pacific/Marquesas") - ) - - # NAs propagate - expect_equal( - df2 %>% - record_batch() %>% - mutate(timezone_x = tz(x)) %>% - collect(), - tibble( - x = df2$x, - timezone_x = c("Pacific/Marquesas", NA) - ) - ) }) From 165031c592dfe7e7c658050fad639c2d38915d19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 15 Feb 2022 10:13:51 +0000 Subject: [PATCH 25/34] updated unit tests and snapshots for `tz()` --- .../testthat/_snaps/dplyr-funcs-datetime.md | 39 +++++++++++++++-- r/tests/testthat/test-dplyr-funcs-datetime.R | 42 +++++++++++++++++-- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md index ac676498253..84921f3dfaa 100644 --- a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md +++ b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md @@ -1,15 +1,46 @@ # extract tz Code - compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(z), - timezone_w = tz(w), timezone_v = tz(v)) %>% collect(), df) + compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(y)) %>% + collect(), df) + Error + `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. + Message: In tz(y), timezone extraction for objects of class `character` not supported in Arrow; pulling data into R + Class: simpleWarning/warning/condition + +--- + + Code + compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(z)) %>% + collect(), df) + Error + `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. + Message: In tz(z), timezone extraction for objects of class `date` not supported in Arrow; pulling data into R + Class: simpleWarning/warning/condition + +--- + + Code + compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_w = tz(w)) %>% + collect(), df) Warning tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. - tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. + Error + `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. + Message: In tz(w), timezone extraction for objects of class `numeric` not supported in Arrow; pulling data into R + Class: simpleWarning/warning/condition + +--- + + Code + compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_v = tz(v)) %>% + collect(), df) + Warning + tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. Error `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: Expression tz(z) not supported in Arrow; pulling data into R + Message: In tz(v), timezone extraction for objects of class `numeric` not supported in Arrow; pulling data into R Class: simpleWarning/warning/condition diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index e7ffb3792aa..fbff64a6938 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -715,8 +715,8 @@ test_that("am/pm mirror lubridate", { test_that("extract tz", { df <- tibble( x = as.POSIXct(c("2022-02-07", "2022-02-10"), tz = "Pacific/Marquesas"), - # lubridate::tz() returns -for the time being - "UTC" for NAs, strings, - # dates and numerics + #lubridate::tz() returns -for the time being - "UTC" for NAs, strings, + #dates and numerics y = c("2022-02-07", NA), z = as.Date(c("2022-02-07", NA)), w = c(1L, 5L), @@ -735,8 +735,42 @@ test_that("extract tz", { .input %>% mutate( timezone_y = tz(x), - timezone_z = tz(z), - timezone_w = tz(w), + timezone_z = tz(y) + ) %>% + collect(), + df + ), + error = TRUE + ) + expect_snapshot( + compare_dplyr_binding( + .input %>% + mutate( + timezone_y = tz(x), + timezone_z = tz(z) + ) %>% + collect(), + df + ), + error = TRUE + ) + expect_snapshot( + compare_dplyr_binding( + .input %>% + mutate( + timezone_y = tz(x), + timezone_w = tz(w) + ) %>% + collect(), + df + ), + error = TRUE + ) + expect_snapshot( + compare_dplyr_binding( + .input %>% + mutate( + timezone_y = tz(x), timezone_v = tz(v) ) %>% collect(), From 30e2ceab3344da7ddf21ca419fc6f3bb6b3c4b09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 15 Feb 2022 10:17:38 +0000 Subject: [PATCH 26/34] update `tz()` with more meaningful error messages --- r/R/dplyr-funcs-datetime.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 249b789e524..c541cbba87c 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,6 +148,14 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - x$type()$timezone() + if (call_binding("is.Date", x)) { + abort("timezone extraction for objects of class `date` not supported in Arrow") + } else if (call_binding("is.numeric", x)) { + abort("timezone extraction for objects of class `numeric` not supported in Arrow") + } else if (call_binding("is.character", x)) { + abort("timezone extraction for objects of class `character` not supported in Arrow") + } else { + return(x$type()$timezone()) + } }) } From 384fddc0262517ba312eee44ad887eb655bdec9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 15 Feb 2022 20:43:31 +0000 Subject: [PATCH 27/34] meaningful names for the test dataframe columns + update snapshots --- .../testthat/_snaps/dplyr-funcs-datetime.md | 24 +++++++------- r/tests/testthat/test-dplyr-funcs-datetime.R | 33 ++++++++++--------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md index 84921f3dfaa..6d928052c52 100644 --- a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md +++ b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md @@ -1,46 +1,46 @@ # extract tz Code - compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(y)) %>% - collect(), df) + compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), + timezone_char_date = tz(char_date)) %>% collect(), df) Error `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(y), timezone extraction for objects of class `character` not supported in Arrow; pulling data into R + Message: In tz(char_date), timezone extraction for objects of class `string` not supported in Arrow; pulling data into R Class: simpleWarning/warning/condition --- Code - compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_z = tz(z)) %>% - collect(), df) + compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), + timezone_date_date = tz(date_date)) %>% collect(), df) Error `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(z), timezone extraction for objects of class `date` not supported in Arrow; pulling data into R + Message: In tz(date_date), timezone extraction for objects of class `date32[day]` not supported in Arrow; pulling data into R Class: simpleWarning/warning/condition --- Code - compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_w = tz(w)) %>% - collect(), df) + compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), + timezone_integer_date = tz(integer_date)) %>% collect(), df) Warning tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. Error `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(w), timezone extraction for objects of class `numeric` not supported in Arrow; pulling data into R + Message: In tz(integer_date), timezone extraction for objects of class `int32` not supported in Arrow; pulling data into R Class: simpleWarning/warning/condition --- Code - compare_dplyr_binding(.input %>% mutate(timezone_y = tz(x), timezone_v = tz(v)) %>% - collect(), df) + compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), + timezone_double_date = tz(double_date)) %>% collect(), df) Warning tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. Error `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(v), timezone extraction for objects of class `numeric` not supported in Arrow; pulling data into R + Message: In tz(double_date), timezone extraction for objects of class `double` not supported in Arrow; pulling data into R Class: simpleWarning/warning/condition diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index fbff64a6938..e6951426e2e 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -714,18 +714,18 @@ test_that("am/pm mirror lubridate", { test_that("extract tz", { df <- tibble( - x = as.POSIXct(c("2022-02-07", "2022-02-10"), tz = "Pacific/Marquesas"), - #lubridate::tz() returns -for the time being - "UTC" for NAs, strings, - #dates and numerics - y = c("2022-02-07", NA), - z = as.Date(c("2022-02-07", NA)), - w = c(1L, 5L), - v = c(1.1, 2.47) + posixct_date = as.POSIXct(c("2022-02-07", "2022-02-10"), tz = "Pacific/Marquesas"), + # lubridate::tz() returns -for the time being - "UTC" for NAs, strings, + # dates and numerics + char_date = c("2022-02-07", NA), + date_date = as.Date(c("2022-02-07", NA)), + integer_date = c(1L, 5L), + double_date = c(1.1, 2.47) ) compare_dplyr_binding( .input %>% - mutate(timezone_x = tz(x)) %>% + mutate(timezone_posixct_date = tz(posixct_date)) %>% collect(), df ) @@ -734,20 +734,21 @@ test_that("extract tz", { compare_dplyr_binding( .input %>% mutate( - timezone_y = tz(x), - timezone_z = tz(y) + timezone_posixct_date = tz(posixct_date), + timezone_char_date = tz(char_date) ) %>% collect(), df ), error = TRUE ) + expect_snapshot( compare_dplyr_binding( .input %>% mutate( - timezone_y = tz(x), - timezone_z = tz(z) + timezone_posixct_date = tz(posixct_date), + timezone_date_date = tz(date_date) ) %>% collect(), df @@ -758,8 +759,8 @@ test_that("extract tz", { compare_dplyr_binding( .input %>% mutate( - timezone_y = tz(x), - timezone_w = tz(w) + timezone_posixct_date = tz(posixct_date), + timezone_integer_date = tz(integer_date) ) %>% collect(), df @@ -770,8 +771,8 @@ test_that("extract tz", { compare_dplyr_binding( .input %>% mutate( - timezone_y = tz(x), - timezone_v = tz(v) + timezone_posixct_date = tz(posixct_date), + timezone_double_date = tz(double_date) ) %>% collect(), df From af96552143cbcb830051849efee785a5653dde5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 15 Feb 2022 20:44:23 +0000 Subject: [PATCH 28/34] simplify the implementation (start with timestamp & error for anything else) --- r/R/dplyr-funcs-datetime.R | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index c541cbba87c..1a41d9550b1 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,14 +148,10 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - if (call_binding("is.Date", x)) { - abort("timezone extraction for objects of class `date` not supported in Arrow") - } else if (call_binding("is.numeric", x)) { - abort("timezone extraction for objects of class `numeric` not supported in Arrow") - } else if (call_binding("is.character", x)) { - abort("timezone extraction for objects of class `character` not supported in Arrow") - } else { + if (inherits(x$type(), "Timestamp")) { return(x$type()$timezone()) + } else { + abort(paste0("timezone extraction for objects of class `", x$type()$ToString(),"` not supported in Arrow")) } }) } From 3cee34a068db8e6bec90f5bd139f11d199974d6f Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 16 Feb 2022 11:59:43 -0600 Subject: [PATCH 29/34] some clean up --- r/R/dplyr-funcs-datetime.R | 13 ++-- r/tests/testthat/test-dplyr-funcs-datetime.R | 64 ++++---------------- 2 files changed, 22 insertions(+), 55 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 1a41d9550b1..26d63d406ee 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -148,10 +148,15 @@ register_bindings_datetime <- function() { !call_binding("am", x) }) register_binding("tz", function(x) { - if (inherits(x$type(), "Timestamp")) { - return(x$type()$timezone()) - } else { - abort(paste0("timezone extraction for objects of class `", x$type()$ToString(),"` not supported in Arrow")) + if (!call_binding("is.POSIXct", x)) { + if (inherits(x, "Expression")) { + class <- x$type()$ToString() + } else { + class <- type(x) + } + abort(paste0("timezone extraction for objects of class `", class, "` not supported in Arrow")) } + + x$type()$timezone() }) } diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index e6951426e2e..dd2da1833fb 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -715,12 +715,6 @@ test_that("am/pm mirror lubridate", { test_that("extract tz", { df <- tibble( posixct_date = as.POSIXct(c("2022-02-07", "2022-02-10"), tz = "Pacific/Marquesas"), - # lubridate::tz() returns -for the time being - "UTC" for NAs, strings, - # dates and numerics - char_date = c("2022-02-07", NA), - date_date = as.Date(c("2022-02-07", NA)), - integer_date = c(1L, 5L), - double_date = c(1.1, 2.47) ) compare_dplyr_binding( @@ -730,53 +724,21 @@ test_that("extract tz", { df ) - expect_snapshot( - compare_dplyr_binding( - .input %>% - mutate( - timezone_posixct_date = tz(posixct_date), - timezone_char_date = tz(char_date) - ) %>% - collect(), - df - ), - error = TRUE + expect_error( + call_binding("tz", Expression$scalar("2020-10-01")), + "timezone extraction for objects of class `string` not supported in Arrow" ) - - expect_snapshot( - compare_dplyr_binding( - .input %>% - mutate( - timezone_posixct_date = tz(posixct_date), - timezone_date_date = tz(date_date) - ) %>% - collect(), - df - ), - error = TRUE + expect_error( + call_binding("tz", Expression$scalar(as.Date("2020-10-01"))), + "timezone extraction for objects of class `date32[day]` not supported in Arrow", + fixed = TRUE ) - expect_snapshot( - compare_dplyr_binding( - .input %>% - mutate( - timezone_posixct_date = tz(posixct_date), - timezone_integer_date = tz(integer_date) - ) %>% - collect(), - df - ), - error = TRUE + expect_error( + call_binding("tz", Expression$scalar(1L)), + "timezone extraction for objects of class `int32` not supported in Arrow" ) - expect_snapshot( - compare_dplyr_binding( - .input %>% - mutate( - timezone_posixct_date = tz(posixct_date), - timezone_double_date = tz(double_date) - ) %>% - collect(), - df - ), - error = TRUE + expect_error( + call_binding("tz", Expression$scalar(1.1)), + "timezone extraction for objects of class `double` not supported in Arrow" ) }) From fa75502ff9174c3bdae0c3c76710cd1c2bab61fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 17 Feb 2022 09:26:07 +0000 Subject: [PATCH 30/34] removed snapshot md file --- .../testthat/_snaps/dplyr-funcs-datetime.md | 46 ------------------- 1 file changed, 46 deletions(-) delete mode 100644 r/tests/testthat/_snaps/dplyr-funcs-datetime.md diff --git a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md b/r/tests/testthat/_snaps/dplyr-funcs-datetime.md deleted file mode 100644 index 6d928052c52..00000000000 --- a/r/tests/testthat/_snaps/dplyr-funcs-datetime.md +++ /dev/null @@ -1,46 +0,0 @@ -# extract tz - - Code - compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), - timezone_char_date = tz(char_date)) %>% collect(), df) - Error - `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(char_date), timezone extraction for objects of class `string` not supported in Arrow; pulling data into R - Class: simpleWarning/warning/condition - ---- - - Code - compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), - timezone_date_date = tz(date_date)) %>% collect(), df) - Error - `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(date_date), timezone extraction for objects of class `date32[day]` not supported in Arrow; pulling data into R - Class: simpleWarning/warning/condition - ---- - - Code - compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), - timezone_integer_date = tz(integer_date)) %>% collect(), df) - Warning - tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. - tz(): Don't know how to compute timezone for object of class integer; returning "UTC". This warning will become an error in the next major version of lubridate. - Error - `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(integer_date), timezone extraction for objects of class `int32` not supported in Arrow; pulling data into R - Class: simpleWarning/warning/condition - ---- - - Code - compare_dplyr_binding(.input %>% mutate(timezone_posixct_date = tz(posixct_date), - timezone_double_date = tz(double_date)) %>% collect(), df) - Warning - tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. - tz(): Don't know how to compute timezone for object of class numeric; returning "UTC". This warning will become an error in the next major version of lubridate. - Error - `via_batch <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = record_batch(tbl))))` threw an unexpected warning. - Message: In tz(double_date), timezone extraction for objects of class `double` not supported in Arrow; pulling data into R - Class: simpleWarning/warning/condition - From 4bf48f6d8684dadf73f8a3df069ff5a9d58ce8de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 17 Feb 2022 09:26:27 +0000 Subject: [PATCH 31/34] class extraction --- r/R/dplyr-funcs-datetime.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 26d63d406ee..8bfab8dd829 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -152,7 +152,7 @@ register_bindings_datetime <- function() { if (inherits(x, "Expression")) { class <- x$type()$ToString() } else { - class <- type(x) + class <- type(x)$ToString() } abort(paste0("timezone extraction for objects of class `", class, "` not supported in Arrow")) } From 554345be1e9a728609083ecccd211494fa41cf89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 17 Feb 2022 10:21:40 +0000 Subject: [PATCH 32/34] simplify class extraction once #12447 is merged --- r/R/dplyr-funcs-datetime.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 8bfab8dd829..45b8851d2fc 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -149,11 +149,7 @@ register_bindings_datetime <- function() { }) register_binding("tz", function(x) { if (!call_binding("is.POSIXct", x)) { - if (inherits(x, "Expression")) { - class <- x$type()$ToString() - } else { - class <- type(x)$ToString() - } + class <- type(x)$ToString() abort(paste0("timezone extraction for objects of class `", class, "` not supported in Arrow")) } From 3bff651df13e152d321dc2ac95a0f6d39ccd0a79 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sat, 19 Feb 2022 08:32:43 -0600 Subject: [PATCH 33/34] a few test additions|changes --- r/R/dplyr-funcs-datetime.R | 3 +-- r/tests/testthat/test-dplyr-funcs-datetime.R | 15 +++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 45b8851d2fc..d9bfcbd7e3e 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -149,8 +149,7 @@ register_bindings_datetime <- function() { }) register_binding("tz", function(x) { if (!call_binding("is.POSIXct", x)) { - class <- type(x)$ToString() - abort(paste0("timezone extraction for objects of class `", class, "` not supported in Arrow")) + abort(paste0("timezone extraction for objects of class `", type(x)$ToString(), "` not supported in Arrow")) } x$type()$timezone() diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index dd2da1833fb..e3092f3455c 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -724,21 +724,28 @@ test_that("extract tz", { df ) + # test a few types directly from R objects expect_error( - call_binding("tz", Expression$scalar("2020-10-01")), + call_binding("tz", "2020-10-01"), "timezone extraction for objects of class `string` not supported in Arrow" ) expect_error( - call_binding("tz", Expression$scalar(as.Date("2020-10-01"))), + call_binding("tz", as.Date("2020-10-01")), "timezone extraction for objects of class `date32[day]` not supported in Arrow", fixed = TRUE ) expect_error( - call_binding("tz", Expression$scalar(1L)), + call_binding("tz", 1L), "timezone extraction for objects of class `int32` not supported in Arrow" ) expect_error( - call_binding("tz", Expression$scalar(1.1)), + call_binding("tz", 1.1), "timezone extraction for objects of class `double` not supported in Arrow" ) + + # Test one expression + expect_error( + call_binding("tz", Expression$scalar("2020-10-01")), + "timezone extraction for objects of class `string` not supported in Arrow" + ) }) From 85c079c9db41e074293ee584b3ac02f34c589463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 21 Feb 2022 16:20:13 +0000 Subject: [PATCH 34/34] update the `type` method for `Expression` to return a function + more unit tests --- r/R/type.R | 2 +- r/tests/testthat/test-type.R | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index f8b408e4065..0bcd3e77d91 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -78,7 +78,7 @@ type.default <- function(x) Array__infer_type(x) type.ArrowDatum <- function(x) x$type #' @export -type.Expression <- function(x) x$type +type.Expression <- function(x) x$type() #----- metadata diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 168cdcff690..71c2302116a 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -235,7 +235,10 @@ test_that("type() gets the right type for Expression", { y <- Expression$scalar(10) add_xy <- Expression$create("add", x, y) - expect_equal(x$type, type(x)) - expect_equal(y$type, type(y)) - expect_equal(add_xy$type, type(add_xy)) + expect_equal(x$type(), type(x)) + expect_equal(type(x), int32()) + expect_equal(y$type(), type(y)) + expect_equal(type(y), float64()) + expect_equal(add_xy$type(), type(add_xy)) + expect_equal(type(add_xy), float64()) })