diff --git a/r/NEWS.md b/r/NEWS.md index 59245b971d2..560e484c33e 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -31,6 +31,7 @@ Instead of these, use the `read_ipc_file()` and `write_ipc_file()` for IPC files, or, `read_ipc_stream()` and `write_ipc_stream()` for IPC streams. * `write_parquet()` now defaults to writing Parquet format version 2.4 (was 1.0). Previously deprecated arguments `properties` and `arrow_properties` have been removed; if you need to deal with these lower-level properties objects directly, use `ParquetFileWriter`, which `write_parquet()` wraps. +* added `lubridate::qday()` (day of quarter) # arrow 8.0.0 diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 7d11cdc1134..1db6c647d53 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -209,6 +209,14 @@ register_bindings_datetime_components <- function() { build_expr("month", x) }) + register_binding("lubridate::qday", function(x) { + # We calculate day of quarter by flooring timestamp to beginning of quarter and + # calculating days between beginning of quarter and timestamp/date in question. + # Since we use one one-based numbering we add one. + floored_x <- build_expr("floor_temporal", x, options = list(unit = 9L)) + build_expr("days_between", floored_x, x) + Expression$scalar(1L) + }) + register_binding("lubridate::am", function(x) { hour <- Expression$create("hour", x) hour < 12 diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 0db558972e8..885af3f7ab0 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -519,6 +519,35 @@ std::shared_ptr make_compute_options( return out; } + if (func_name == "round_temporal" || func_name == "floor_temporal" || + func_name == "ceil_temporal") { + using Options = arrow::compute::RoundTemporalOptions; + + int64_t multiple = 1; + enum arrow::compute::CalendarUnit unit = arrow::compute::CalendarUnit::DAY; + bool week_starts_monday = true; + bool ceil_is_strictly_greater = true; + bool calendar_based_origin = true; + + if (!Rf_isNull(options["multiple"])) { + multiple = cpp11::as_cpp(options["multiple"]); + } + if (!Rf_isNull(options["unit"])) { + unit = cpp11::as_cpp(options["unit"]); + } + if (!Rf_isNull(options["week_starts_monday"])) { + week_starts_monday = cpp11::as_cpp(options["week_starts_monday"]); + } + if (!Rf_isNull(options["ceil_is_strictly_greater"])) { + ceil_is_strictly_greater = cpp11::as_cpp(options["ceil_is_strictly_greater"]); + } + if (!Rf_isNull(options["calendar_based_origin"])) { + calendar_based_origin = cpp11::as_cpp(options["calendar_based_origin"]); + } + return std::make_shared(multiple, unit, week_starts_monday, + ceil_is_strictly_greater, calendar_based_origin); + } + if (func_name == "round_to_multiple") { using Options = arrow::compute::RoundToMultipleOptions; auto out = std::make_shared(Options::Defaults()); diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index f0543736404..6caf061fc85 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -574,6 +574,26 @@ test_that("extract yday from timestamp", { ) }) +test_that("extract qday from timestamp", { + test_df <- tibble::tibble( + time = as.POSIXct(seq(as.Date("1999-12-31", tz = "UTC"), as.Date("2001-01-01", tz = "UTC"), by = "day")) + ) + + compare_dplyr_binding( + .input %>% + transmute(x = qday(time)) %>% + collect(), + test_df + ) + + compare_dplyr_binding( + .input %>% + transmute(x = qday(as.POSIXct("2022-06-29 12:35"))) %>% + collect(), + test_df + ) +}) + test_that("extract hour from timestamp", { compare_dplyr_binding( .input %>% @@ -790,6 +810,26 @@ test_that("extract yday from date", { ) }) +test_that("extract qday from date", { + test_df <- tibble::tibble( + date = seq(as.Date("1999-12-31"), as.Date("2001-01-01"), by = "day") + ) + + compare_dplyr_binding( + .input %>% + mutate(x = qday(date)) %>% + collect(), + test_df + ) + + compare_dplyr_binding( + .input %>% + mutate(y = qday(as.Date("2022-06-29"))) %>% + collect(), + test_df + ) +}) + test_that("leap_year mirror lubridate", { compare_dplyr_binding( .input %>%