From 199242a5309bbd9ae6f9fee276e18b597b9df639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 3 May 2022 13:45:58 +0100 Subject: [PATCH 1/6] use `make_duration()` helper when defining `make_difftime()` --- r/R/dplyr-funcs-datetime.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 4f2517e8efc..73fe926512f 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -422,8 +422,7 @@ register_bindings_difftime_constructors <- function() { duration <- duration_from_chunks(chunks) } - duration <- build_expr("cast", duration, options = cast_options(to_type = int64())) - duration$cast(duration("s")) + make_duration(duration, "s") }) } From b4ccc08125e015b3fdb901ab3f796e3a3ca379fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 3 May 2022 13:55:13 +0100 Subject: [PATCH 2/6] added comment why `make_duration()` is not a good fit here --- r/R/dplyr-funcs-datetime.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 73fe926512f..3dd5f946ba5 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -309,8 +309,9 @@ register_bindings_duration <- function() { if (call_binding("is.character", x)) { x <- build_expr("strptime", x, options = list(format = format, unit = 0L)) - # complex casting only due to cast type restrictions: time64 -> int64 -> duration(us) + # complex casting due to cast type restrictions: time64 -> int64 -> duration(us) # and then we cast to duration ("s") at the end + # we also need the casting chain to get the measurement units right x <- x$cast(time64("us"))$cast(int64())$cast(duration("us")) } From 5ea1351a7c0e99ec6848225fd178cd2859ddeb53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 9 May 2022 09:01:29 +0100 Subject: [PATCH 3/6] used `make_duration()` for `date_decimal()` and simplify casting chain inside `binding_as_date_numeric()` + removed commented referencing ARROW-16253 --- r/R/dplyr-funcs-datetime.R | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 3dd5f946ba5..b53be6af42c 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -355,8 +355,8 @@ register_bindings_duration <- function() { fraction <- decimal - y delta <- build_expr("floor", seconds * fraction) - delta <- delta$cast(int64()) - start + delta$cast(duration("s")) + delta <- make_duration(delta, "s") + start + delta }) } @@ -527,13 +527,9 @@ binding_as_date_numeric <- function(x, origin = "1970-01-01") { if (origin != "1970-01-01") { delta_in_sec <- call_binding("difftime", origin, "1970-01-01") - # TODO: revisit once either of these issues is addressed: - # https://issues.apache.org/jira/browse/ARROW-16253 (helper function for - # casting from double to duration) or - # https://issues.apache.org/jira/browse/ARROW-15862 (casting from int32 - # -> duration or double -> duration) - delta_in_sec <- build_expr("cast", delta_in_sec, options = cast_options(to_type = int64())) - delta_in_days <- (delta_in_sec / 86400L)$cast(int32()) + # TODO revisit once https://issues.apache.org/jira/browse/ARROW-15862 + # (casting from int32 -> duration or double -> duration) is addressed + delta_in_days <- (delta_in_sec$cast(int64()) / 86400L)$cast(int32()) x <- build_expr("+", x, delta_in_days) } From ae8a25325db234e151e9fe1a3323b36eb5c854ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 9 May 2022 09:09:29 +0100 Subject: [PATCH 4/6] redo changes post master merge --- r/R/dplyr-datetime-helpers.R | 10 +++------- r/R/dplyr-funcs-datetime.R | 4 ++-- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R index 9acf8b18435..0295dd35409 100644 --- a/r/R/dplyr-datetime-helpers.R +++ b/r/R/dplyr-datetime-helpers.R @@ -142,13 +142,9 @@ binding_as_date_numeric <- function(x, origin = "1970-01-01") { if (origin != "1970-01-01") { delta_in_sec <- call_binding("difftime", origin, "1970-01-01") - # TODO: revisit once either of these issues is addressed: - # https://issues.apache.org/jira/browse/ARROW-16253 (helper function for - # casting from double to duration) or - # https://issues.apache.org/jira/browse/ARROW-15862 (casting from int32 - # -> duration or double -> duration) - delta_in_sec <- build_expr("cast", delta_in_sec, options = cast_options(to_type = int64())) - delta_in_days <- (delta_in_sec / 86400L)$cast(int32()) + # TODO revisit once https://issues.apache.org/jira/browse/ARROW-15862 + # (casting from int32 -> duration or double -> duration) is addressed + delta_in_days <- (delta_in_sec$cast(int64()) / 86400L)$cast(int32()) x <- build_expr("+", x, delta_in_days) } diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index edfe9fe44bb..836f4361d81 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -351,8 +351,8 @@ register_bindings_datetime_conversion <- function() { fraction <- decimal - y delta <- build_expr("floor", seconds * fraction) - delta <- delta$cast(int64()) - start + delta$cast(duration("s")) + delta <- make_duration(delta, "s") + start + delta }) } From c346f60c54b8a08e5c5be3c736eca2af861a863c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 9 May 2022 15:01:09 +0100 Subject: [PATCH 5/6] use `make_duration()` inside `as.difftime()` --- r/R/dplyr-funcs-datetime.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 836f4361d81..7b750b56041 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -415,7 +415,7 @@ register_bindings_duration <- function() { # complex casting due to cast type restrictions: time64 -> int64 -> duration(us) # and then we cast to duration ("s") at the end # we also need the casting chain to get the measurement units right - x <- x$cast(time64("us"))$cast(int64())$cast(duration("us")) + x <- make_duration(x$cast(time64("us")), unit = "us") } # numeric -> duration not supported in Arrow yet so we use int64() as an From 5e786616cb9417a8b798ef5b94062904e290afb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 9 May 2022 20:49:42 +0100 Subject: [PATCH 6/6] update comment to match logic --- r/R/dplyr-funcs-datetime.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R index 7b750b56041..abf9a2056e1 100644 --- a/r/R/dplyr-funcs-datetime.R +++ b/r/R/dplyr-funcs-datetime.R @@ -412,9 +412,7 @@ register_bindings_duration <- function() { if (call_binding("is.character", x)) { x <- build_expr("strptime", x, options = list(format = format, unit = 0L)) - # complex casting due to cast type restrictions: time64 -> int64 -> duration(us) - # and then we cast to duration ("s") at the end - # we also need the casting chain to get the measurement units right + # we do a final cast to duration ("s") at the end x <- make_duration(x$cast(time64("us")), unit = "us") }