Skip to content
Closed
7 changes: 4 additions & 3 deletions r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@
* `read_csv_arrow()`'s readr-style type `T` is now mapped to `timestamp(unit = "ns")` instead of `timestamp(unit = "s")`.
* `lubridate`:
* component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator), `date()` (extract date), `epiyear()` (epiyear), improvements to `month()`, which now works with integer inputs.
* `make_date()` & `make_datetime()` + `ISOdatetime()` & `ISOdate()` to create date-times from numeric representations.
* Added `make_date()` & `make_datetime()` + `ISOdatetime()` & `ISOdate()` to create date-times from numeric representations.
* Added `decimal_date()` and `date_decimal()`
* date-time functionality:
* `difftime` and `as.difftime()`
* `as.Date()` to convert to date
* Added `difftime` and `as.difftime()`
* Added `as.Date()` to convert to date

# arrow 7.0.0

Expand Down
42 changes: 42 additions & 0 deletions r/R/dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,20 @@ register_bindings_duration <- function() {
time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp(timezone = "UTC")))
}

# if time1 or time2 are timestamps they cannot be expressed in "s" /seconds
# otherwise they cannot be added subtracted with durations
# TODO delete the casting to "us" once
# https://issues.apache.org/jira/browse/ARROW-16060 is solved
if (inherits(time1, "Expression") &&
time1$type_id() %in% Type[c("TIMESTAMP")] && time1$type()$unit() != 2L) {
time1 <- build_expr("cast", time1, options = cast_options(to_type = timestamp("us")))
}

if (inherits(time2, "Expression") &&
time2$type_id() %in% Type[c("TIMESTAMP")] && time2$type()$unit() != 2L) {
time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp("us")))
}
Comment on lines +273 to +285
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we update this section since it looks like ARROW-16060 has already been solved?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Absolutely, feel free to open a Jira + PR to do so if we don't already have one


# we need to go build the subtract expression instead of `time1 - time2` to
# prevent complaints when we try to subtract an R object from an Expression
subtract_output <- build_expr("-", time1, time2)
Expand Down Expand Up @@ -309,6 +323,34 @@ register_bindings_duration <- function() {

build_expr("cast", x, options = cast_options(to_type = duration(unit = "s")))
})
register_binding("decimal_date", function(date) {
y <- build_expr("year", date)
start <- call_binding("make_datetime", year = y, tz = "UTC")
sofar <- call_binding("difftime", date, start, units = "secs")
total <- call_binding(
"if_else",
build_expr("is_leap_year", date),
Expression$scalar(31622400L), # number of seconds in a leap year (366 days)
Expression$scalar(31536000L) # number of seconds in a regular year (365 days)
)
y + sofar$cast(int64()) / total
})
register_binding("date_decimal", function(decimal, tz = "UTC") {
y <- build_expr("floor", decimal)

start <- call_binding("make_datetime", year = y, tz = tz)
seconds <- call_binding(
"if_else",
build_expr("is_leap_year", start),
Expression$scalar(31622400L), # number of seconds in a leap year (366 days)
Expression$scalar(31536000L) # number of seconds in a regular year (365 days)
)

fraction <- decimal - y
delta <- build_expr("floor", seconds * fraction)
delta <- delta$cast(int64())
start + delta$cast(duration("s"))
})
}

binding_format_datetime <- function(x, format = "", tz = "", usetz = FALSE) {
Expand Down
31 changes: 30 additions & 1 deletion r/tests/testthat/test-dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,7 @@ test_that("difftime works correctly", {
.input %>%
mutate(
secs2 = difftime(
as.POSIXct("2022-03-07", tz = "Europe/Bucharest"),
as.POSIXct("2022-03-07", tz = "Pacific/Marquesas"),
time1,
units = "secs"
)
Expand Down Expand Up @@ -1186,3 +1186,32 @@ test_that("as.difftime()", {
collect()
)
})

test_that("`decimal_date()` and `date_decimal()`", {
test_df <- tibble(
a = c(2007.38998954347, 1970.77732069883, 2020.96061799722,
2009.43465948477, 1975.71251467871, NA),
b = as.POSIXct(
c("2007-05-23 08:18:30", "1970-10-11 17:19:45", "2020-12-17 14:04:06",
"2009-06-08 15:37:01", "1975-09-18 01:37:42", NA)
),
c = as.Date(
c("2007-05-23", "1970-10-11", "2020-12-17", "2009-06-08", "1975-09-18", NA)
)
)

compare_dplyr_binding(
.input %>%
mutate(
decimal_date_from_POSIXct = decimal_date(b),
decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")),
decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25")),
decimal_date_from_date = decimal_date(c),
date_from_decimal = date_decimal(a),
date_from_decimal_r_obj = date_decimal(2022.178)
) %>%
collect(),
test_df,
ignore_attr = "tzone"
)
})