From 3b90e25fecc3725d63445662790b6648c6763ae2 Mon Sep 17 00:00:00 2001 From: Rok Date: Mon, 27 Jun 2022 16:42:58 +0200 Subject: [PATCH 1/7] Add qday --- r/NEWS.md | 1 + r/R/dplyr-funcs-datetime.R | 9 ++++++ r/src/compute.cpp | 29 ++++++++++++++++++++ r/tests/testthat/test-dplyr-funcs-datetime.R | 18 ++++++++++++ 4 files changed, 57 insertions(+) diff --git a/r/NEWS.md b/r/NEWS.md index 59245b971d2..560e484c33e 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -31,6 +31,7 @@ Instead of these, use the `read_ipc_file()` and `write_ipc_file()` for IPC files, or, `read_ipc_stream()` and `write_ipc_stream()` for IPC streams. * `write_parquet()` now defaults to writing Parquet format version 2.4 (was 1.0). Previously deprecated arguments `properties` and `arrow_properties` have been removed; if you need to deal with these lower-level properties objects directly, use `ParquetFileWriter`, which `write_parquet()` wraps. +* added `lubridate::qday()` (day of quarter) # arrow 8.0.0 diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 7d11cdc1134..9514a6e1ef3 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -209,6 +209,15 @@ register_bindings_datetime_components <- function() { build_expr("month", x) }) + register_binding("lubridate::qday", function(x) { + floored_x <- Expression$create("floor_temporal", x, options = list(unit = 9L, calendar_based_origin = FALSE)) + days_between <- Expression$create("days_between", floored_x, x) + if (call_binding("is.Date", x)) { + return(Expression$create("add", days_between, Expression$scalar(1L))) + } + days_between + }) + register_binding("lubridate::am", function(x) { hour <- Expression$create("hour", x) hour < 12 diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 0db558972e8..885af3f7ab0 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -519,6 +519,35 @@ std::shared_ptr make_compute_options( return out; } + if (func_name == "round_temporal" || func_name == "floor_temporal" || + func_name == "ceil_temporal") { + using Options = arrow::compute::RoundTemporalOptions; + + int64_t multiple = 1; + enum arrow::compute::CalendarUnit unit = arrow::compute::CalendarUnit::DAY; + bool week_starts_monday = true; + bool ceil_is_strictly_greater = true; + bool calendar_based_origin = true; + + if (!Rf_isNull(options["multiple"])) { + multiple = cpp11::as_cpp(options["multiple"]); + } + if (!Rf_isNull(options["unit"])) { + unit = cpp11::as_cpp(options["unit"]); + } + if (!Rf_isNull(options["week_starts_monday"])) { + week_starts_monday = cpp11::as_cpp(options["week_starts_monday"]); + } + if (!Rf_isNull(options["ceil_is_strictly_greater"])) { + ceil_is_strictly_greater = cpp11::as_cpp(options["ceil_is_strictly_greater"]); + } + if (!Rf_isNull(options["calendar_based_origin"])) { + calendar_based_origin = cpp11::as_cpp(options["calendar_based_origin"]); + } + return std::make_shared(multiple, unit, week_starts_monday, + ceil_is_strictly_greater, calendar_based_origin); + } + if (func_name == "round_to_multiple") { using Options = arrow::compute::RoundToMultipleOptions; auto out = std::make_shared(Options::Defaults()); diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index f0543736404..b6ae1835e3b 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -574,6 +574,15 @@ test_that("extract yday from timestamp", { ) }) +test_that("extract qday from timestamp", { + compare_dplyr_binding( + .input %>% + mutate(x = qday(datetime)) %>% + collect(), + test_df + ) +}) + test_that("extract hour from timestamp", { compare_dplyr_binding( .input %>% @@ -790,6 +799,15 @@ test_that("extract yday from date", { ) }) +test_that("extract qday from date", { + compare_dplyr_binding( + .input %>% + mutate(x = qday(date)) %>% + collect(), + test_df + ) +}) + test_that("leap_year mirror lubridate", { compare_dplyr_binding( .input %>% From e0676b0dd219c3c1086b29b86ffd3044ee657bfe Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 29 Jun 2022 02:09:55 +0200 Subject: [PATCH 2/7] Switching away from days_between --- r/R/dplyr-funcs-datetime.R | 14 +++++++------- r/tests/testthat/test-dplyr-funcs-datetime.R | 7 +++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 9514a6e1ef3..0364f759ab6 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -209,13 +209,13 @@ register_bindings_datetime_components <- function() { build_expr("month", x) }) - register_binding("lubridate::qday", function(x) { - floored_x <- Expression$create("floor_temporal", x, options = list(unit = 9L, calendar_based_origin = FALSE)) - days_between <- Expression$create("days_between", floored_x, x) - if (call_binding("is.Date", x)) { - return(Expression$create("add", days_between, Expression$scalar(1L))) - } - days_between + register_binding("qday", function(x) { + x <- build_expr("floor_temporal", x, options = list(unit = 6L, calendar_based_origin = FALSE)) + x <- build_expr("cast", x, options = cast_options(to_type = timestamp("s"))) + floored_x <- build_expr("floor_temporal", x, options = list(unit = 9L, calendar_based_origin = FALSE)) + duration_between <- build_expr("subtract", x, floored_x) + seconds_between <- build_expr("cast", duration_between, options = cast_options(to_type = int64())) + build_expr("floor", seconds_between / Expression$scalar(86400L) + Expression$scalar(1L)) }) register_binding("lubridate::am", function(x) { diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index b6ae1835e3b..1bf833529b8 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -806,6 +806,13 @@ test_that("extract qday from date", { collect(), test_df ) + + compare_dplyr_binding( + .input %>% + mutate(y = qday(as.Date(date))) %>% + collect(), + test_df + ) }) test_that("leap_year mirror lubridate", { From 05c8a241b1fa4659bb3192ebc81330f24a2ab3f0 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 29 Jun 2022 14:07:00 +0200 Subject: [PATCH 3/7] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dragoș Moldovan-Grünfeld --- r/tests/testthat/test-dplyr-funcs-datetime.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 1bf833529b8..9e743d74a7f 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -581,6 +581,12 @@ test_that("extract qday from timestamp", { collect(), test_df ) + compare_dplyr_binding( + .input %>% + mutate(x = qday(as.POSIXct("2022-06-29 12:35))) %>% + collect(), + test_df + ) }) test_that("extract hour from timestamp", { @@ -809,7 +815,7 @@ test_that("extract qday from date", { compare_dplyr_binding( .input %>% - mutate(y = qday(as.Date(date))) %>% + mutate(y = qday(as.Date("2022-06-29"))) %>% collect(), test_df ) From b018e3c7805c81271c8be116439f7b7fb19c15bf Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 30 Jun 2022 22:03:07 +0200 Subject: [PATCH 4/7] Fix timezone bug --- r/R/dplyr-funcs-datetime.R | 12 +++++------- r/tests/testthat/test-dplyr-funcs-datetime.R | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 0364f759ab6..13ae5668f28 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -209,13 +209,11 @@ register_bindings_datetime_components <- function() { build_expr("month", x) }) - register_binding("qday", function(x) { - x <- build_expr("floor_temporal", x, options = list(unit = 6L, calendar_based_origin = FALSE)) - x <- build_expr("cast", x, options = cast_options(to_type = timestamp("s"))) - floored_x <- build_expr("floor_temporal", x, options = list(unit = 9L, calendar_based_origin = FALSE)) - duration_between <- build_expr("subtract", x, floored_x) - seconds_between <- build_expr("cast", duration_between, options = cast_options(to_type = int64())) - build_expr("floor", seconds_between / Expression$scalar(86400L) + Expression$scalar(1L)) + register_binding("lubridate::qday", function(x) { + # We calculate day of quarter by flooring timestamp to beginning of quarter and + # calculating days between beginning of quarter and timestamp/date in question. + floored_x <- build_expr("floor_temporal", x, options = list(unit = 9L)) + build_expr("days_between", floored_x, x) + Expression$scalar(1L) }) register_binding("lubridate::am", function(x) { diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 9e743d74a7f..37fe3818937 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -583,7 +583,7 @@ test_that("extract qday from timestamp", { ) compare_dplyr_binding( .input %>% - mutate(x = qday(as.POSIXct("2022-06-29 12:35))) %>% + mutate(x = qday(as.POSIXct("2022-06-29 12:35"))) %>% collect(), test_df ) From 3247ce842169ba940fdd735348c26e97b1df0eab Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 6 Jul 2022 14:57:35 +0200 Subject: [PATCH 5/7] Review feedback - expanding test data range --- r/tests/testthat/test-dplyr-funcs-datetime.R | 23 ++++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 37fe3818937..f3ae29f3205 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -575,18 +575,23 @@ test_that("extract yday from timestamp", { }) test_that("extract qday from timestamp", { + test_df <- tibble::tibble( + time = as.POSIXct(seq(as.Date("1999-12-31"), as.Date("2001-01-01"), by = "day")) + ) compare_dplyr_binding( .input %>% - mutate(x = qday(datetime)) %>% + mutate(x = qday(time)) %>% collect(), - test_df + test_df, + ignore_attr = "tzone" ) compare_dplyr_binding( - .input %>% - mutate(x = qday(as.POSIXct("2022-06-29 12:35"))) %>% - collect(), - test_df - ) + .input %>% + mutate(x = qday(as.POSIXct("2022-06-29 12:35"))) %>% + collect(), + test_df, + ignore_attr = "tzone" + ) }) test_that("extract hour from timestamp", { @@ -806,6 +811,10 @@ test_that("extract yday from date", { }) test_that("extract qday from date", { + test_df <- tibble::tibble( + date = seq(as.Date("1999-12-31"), as.Date("2001-01-01"), by = "day") + ) + compare_dplyr_binding( .input %>% mutate(x = qday(date)) %>% From 37ebd87d6741aa1e60634a61dd1c4f81d50eb092 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 18 Jul 2022 23:24:36 +0200 Subject: [PATCH 6/7] Update r/R/dplyr-funcs-datetime.R --- r/R/dplyr-funcs-datetime.R | 1 + 1 file changed, 1 insertion(+) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 13ae5668f28..1db6c647d53 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -212,6 +212,7 @@ register_bindings_datetime_components <- function() { register_binding("lubridate::qday", function(x) { # We calculate day of quarter by flooring timestamp to beginning of quarter and # calculating days between beginning of quarter and timestamp/date in question. + # Since we use one one-based numbering we add one. floored_x <- build_expr("floor_temporal", x, options = list(unit = 9L)) build_expr("days_between", floored_x, x) + Expression$scalar(1L) }) From ef84a7a2fff035caf5f826c3a4aa92af2c0dcb60 Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 19 Jul 2022 13:32:26 +0200 Subject: [PATCH 7/7] Review feedback --- r/tests/testthat/test-dplyr-funcs-datetime.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index f3ae29f3205..6caf061fc85 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -576,22 +576,22 @@ test_that("extract yday from timestamp", { test_that("extract qday from timestamp", { test_df <- tibble::tibble( - time = as.POSIXct(seq(as.Date("1999-12-31"), as.Date("2001-01-01"), by = "day")) + time = as.POSIXct(seq(as.Date("1999-12-31", tz = "UTC"), as.Date("2001-01-01", tz = "UTC"), by = "day")) ) + compare_dplyr_binding( .input %>% - mutate(x = qday(time)) %>% + transmute(x = qday(time)) %>% collect(), - test_df, - ignore_attr = "tzone" + test_df ) + compare_dplyr_binding( .input %>% - mutate(x = qday(as.POSIXct("2022-06-29 12:35"))) %>% + transmute(x = qday(as.POSIXct("2022-06-29 12:35"))) %>% collect(), - test_df, - ignore_attr = "tzone" - ) + test_df + ) }) test_that("extract hour from timestamp", {