Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
a10b1b2
1st working draft. some finessing needed
dragosmg May 5, 2022
2c7b8d1
use `is.null` instead of missing + comment
dragosmg May 5, 2022
3b4af68
temp
dragosmg May 5, 2022
a64e4e7
clean-up `binding_as_date_character()`
dragosmg May 5, 2022
17ab143
update unit tests
dragosmg May 5, 2022
1983821
small change
dragosmg May 5, 2022
93a7418
use `parse_date_time()` for `as_date()` and `as.Date()` bindings + up…
dragosmg May 10, 2022
26fb334
clean-up and use ellipsis `...`
dragosmg May 10, 2022
8ba5dc7
support R objects as inputs + additional unit tests
dragosmg May 10, 2022
ec0b385
add unit test for `parse_date_time()` with regular R object
dragosmg May 10, 2022
99e0147
separate the unit test involving string processing and skip it when R…
dragosmg May 10, 2022
ab0a843
skip parsing with R object test on R 3.6 on windows
dragosmg May 10, 2022
613bed0
skip tests on r 3.6 & windows (re2 not available)
dragosmg May 10, 2022
d6d45a2
new, failing test to test tryFormats not coalescing
dragosmg May 13, 2022
7a21e92
update `test_df` for `as.Date()`
dragosmg May 20, 2022
0479b69
remove `browser()`
dragosmg May 20, 2022
96c44d3
undo some trials
dragosmg May 25, 2022
2492c00
scaffolding for `tryFormats`
dragosmg May 26, 2022
ec59d1f
revert `binding_as_character()`
dragosmg Jun 30, 2022
a0ad6fa
revert `binding_as_date_numeric()`
dragosmg Jun 30, 2022
be71950
revert `binding_as_date()`
dragosmg Jun 30, 2022
17ea2f5
revert `as.Date` and `as_date` bindings definitions
dragosmg Jun 30, 2022
764f2d0
revert `tryFormats` test
dragosmg Jun 30, 2022
130d426
test_fix
dragosmg Jun 30, 2022
16a5849
revert tests
dragosmg Jun 30, 2022
d929b8b
revert tests
dragosmg Jun 30, 2022
ecb8f99
revert `binding_as_date()`
dragosmg Jun 30, 2022
7e42b13
not skipping since `as.Date()` no longer uses the `parse_date_time()`…
dragosmg Jul 1, 2022
0dcfe8c
re-order
dragosmg Jul 1, 2022
fe64d7b
reorder
dragosmg Jul 1, 2022
948e8d3
improve error message for `as.Date()` with multiple `tryFormats` + un…
dragosmg Jul 4, 2022
a624355
update error message
dragosmg Jul 4, 2022
29c3251
clearer error message
dragosmg Jul 4, 2022
39ee8f2
move the `tryFormats` error message in `as.Date()`
dragosmg Jul 4, 2022
104ebb2
remove `.keep = "used"`
dragosmg Jul 4, 2022
1d96d85
removed `.keep = "used"`
dragosmg Jul 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions r/R/dplyr-datetime-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,6 @@ binding_as_date <- function(x,
format = NULL,
tryFormats = "%Y-%m-%d",
origin = "1970-01-01") {
if (is.null(format) && length(tryFormats) > 1) {
abort("`as.Date()` with multiple `tryFormats` is not supported in Arrow")
}

if (call_binding("is.Date", x)) {
return(x)
Expand Down
10 changes: 10 additions & 0 deletions r/R/dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,16 @@ register_bindings_datetime_conversion <- function() {
tryFormats = "%Y-%m-%d",
origin = "1970-01-01",
tz = "UTC") {

if (is.null(format) && length(tryFormats) > 1) {
abort(
paste(
"`as.Date()` with multiple `tryFormats` is not supported in Arrow,",
"consider using the lubridate specialised parsing functions such as, `ymd()`, `ymd()`, etc."
)
)
}

# base::as.Date() and lubridate::as_date() differ in the way they use the
# `tz` argument. Both cast to the desired timezone, if present. The
# difference appears when the `tz` argument is not set: `as.Date()` uses the
Expand Down
117 changes: 83 additions & 34 deletions r/tests/testthat/test-dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -1508,16 +1508,19 @@ test_that("make_difftime()", {

test_that("`as.Date()` and `as_date()`", {
test_df <- tibble::tibble(
posixct_var = as.POSIXct("2022-02-25 00:00:01", tz = "Pacific/Marquesas"),
dt_europe = ymd_hms("2010-08-03 00:50:50", tz = "Europe/London"),
dt_utc = ymd_hms("2010-08-03 00:50:50"),
date_var = as.Date("2022-02-25"),
difference_date = ymd_hms("2010-08-03 00:50:50", tz = "Pacific/Marquesas"),
character_ymd_var = "2022-02-25 00:00:01",
character_ydm_var = "2022/25/02 00:00:01",
integer_var = 32L,
integerish_var = 32,
double_var = 34.56
posixct_var = as.POSIXct(c("2022-02-25 00:00:01", "1987-11-24 12:34:56", NA), tz = "Pacific/Marquesas"),
dt_europe = ymd_hms("2010-08-03 00:50:50", "1987-11-24 12:34:56", NA, tz = "Europe/London"),
dt_utc = ymd_hms("2010-08-03 00:50:50", "1987-11-24 12:34:56", NA),
date_var = as.Date(c("2022-02-25", "1987-11-24", NA)),
difference_date = ymd_hms("2010-08-03 00:50:50", "1987-11-24 12:34:56", NA, tz = "Pacific/Marquesas"),
try_formats_string = c(NA, "2022-01-01", "2022/01/01"),
character_ymd_hms_var = c("2022-02-25 00:00:01", "1987-11-24 12:34:56", NA),
character_ydm_hms_var = c("2022/25/02 00:00:01", "1987/24/11 12:34:56", NA),
character_ymd_var = c("2022-02-25", "1987-11-24", NA),
character_ydm_var = c("2022/25/02", "1987/24/11", NA),
integer_var = c(21L, 32L, NA),
integerish_var = c(21, 32, NA),
double_var = c(12.34, 56.78, NA)
)

compare_dplyr_binding(
Expand All @@ -1528,8 +1531,8 @@ test_that("`as.Date()` and `as_date()`", {
date_pv_tz1 = as.Date(posixct_var, tz = "Pacific/Marquesas"),
date_utc1 = as.Date(dt_utc),
date_europe1 = as.Date(dt_europe),
date_char_ymd1 = as.Date(character_ymd_var, format = "%Y-%m-%d %H:%M:%S"),
date_char_ydm1 = as.Date(character_ydm_var, format = "%Y/%d/%m %H:%M:%S"),
date_char_ymd_hms1 = as.Date(character_ymd_hms_var, format = "%Y-%m-%d %H:%M:%S"),
date_char_ydm_hms1 = as.Date(character_ydm_hms_var, format = "%Y/%d/%m %H:%M:%S"),
date_int1 = as.Date(integer_var, origin = "1970-01-01"),
date_int_origin1 = as.Date(integer_var, origin = "1970-01-03"),
date_integerish1 = as.Date(integerish_var, origin = "1970-01-01"),
Expand All @@ -1538,8 +1541,8 @@ test_that("`as.Date()` and `as_date()`", {
date_pv_tz2 = as_date(posixct_var, tz = "Pacific/Marquesas"),
date_utc2 = as_date(dt_utc),
date_europe2 = as_date(dt_europe),
date_char_ymd2 = as_date(character_ymd_var, format = "%Y-%m-%d %H:%M:%S"),
date_char_ydm2 = as_date(character_ydm_var, format = "%Y/%d/%m %H:%M:%S"),
date_char_ymd2 = as_date(character_ymd_hms_var, format = "%Y-%m-%d %H:%M:%S"),
date_char_ydm2 = as_date(character_ydm_hms_var, format = "%Y/%d/%m %H:%M:%S"),
date_int2 = as_date(integer_var, origin = "1970-01-01"),
date_int_origin2 = as_date(integer_var, origin = "1970-01-03"),
date_integerish2 = as_date(integerish_var, origin = "1970-01-01")
Expand All @@ -1549,35 +1552,63 @@ test_that("`as.Date()` and `as_date()`", {
)

# we do not support multiple tryFormats
compare_dplyr_binding(
.input %>%
mutate(date_char_ymd = as.Date(character_ymd_var,
tryFormats = c("%Y-%m-%d", "%Y/%m/%d")
)) %>%
# this is not a simple warning, therefore we cannot use compare_dplyr_binding()
# with `warning = TRUE`
# arrow_table test
expect_warning(
test_df %>%
arrow_table() %>%
mutate(
date_char_ymd = as.Date(
character_ymd_var,
tryFormats = c("%Y-%m-%d", "%Y/%m/%d")
)
) %>%
collect(),
test_df,
warning = TRUE
regexp = "consider using the lubridate specialised parsing functions"
)

# strptime does not support a partial format - testing an error surfaced from
# C++ (hence not testing the content of the error message)
# TODO revisit once - https://issues.apache.org/jira/browse/ARROW-15813
expect_error(
# record batch test
expect_warning(
test_df %>%
arrow_table() %>%
mutate(date_char_ymd = as_date(character_ymd_var)) %>%
collect()
record_batch() %>%
mutate(
date_char_ymd = as.Date(
character_ymd_var,
tryFormats = c("%Y-%m-%d", "%Y/%m/%d")
)
) %>%
collect(),
regexp = "consider using the lubridate specialised parsing functions"
)

# strptime does not support a partial format - Arrow returns NA, while
# lubridate parses correctly
# TODO revisit once - https://issues.apache.org/jira/browse/ARROW-15813
expect_error(
test_df %>%
arrow_table() %>%
mutate(date_char_ymd = as.Date(character_ymd_var)) %>%
collect(),
regexp = "Failed to parse string: '2022-02-25 00:00:01' as a scalar of type timestamp[s]",
fixed = TRUE
expect_equal(
test_df %>%
arrow_table() %>%
mutate(date_char_ymd_hms = as_date(character_ymd_hms_var)) %>%
collect(),
test_df %>%
mutate(date_char_ymd_hms = as_date(character_ymd_hms_var)) %>%
collect()
)
)

# same as above
expect_error(
expect_equal(
test_df %>%
arrow_table() %>%
mutate(date_char_ymd_hms = as.Date(character_ymd_hms_var)) %>%
collect(),
test_df %>%
mutate(date_char_ymd_hms = as.Date(character_ymd_hms_var)) %>%
collect()
)
)

# we do not support as.Date() with double/ float (error surfaced from C++)
# TODO revisit after https://issues.apache.org/jira/browse/ARROW-15798
Expand Down Expand Up @@ -1613,6 +1644,24 @@ test_that("`as.Date()` and `as_date()`", {
)
})

test_that("`as_date()` and `as.Date()` work with R objects", {
compare_dplyr_binding(
.input %>%
mutate(
date1 = as.Date("2022-05-10"),
date2 = as.Date(12, origin = "2022-05-01"),
date3 = as.Date("2022-10-03", tryFormats = "%Y-%m-%d"),
date4 = as_date("2022-05-10"),
date5 = as_date(12, origin = "2022-05-01"),
date6 = as_date("2022-10-03")
) %>%
collect(),
tibble(
a = 1
)
)
})

test_that("`as_datetime()`", {
test_df <- tibble(
date = as.Date(c("2022-03-22", "2021-07-30", NA)),
Expand Down