Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,8 @@
# arrow 7.0.0.9000

* `read_csv_arrow()`'s readr-style type `T` is now mapped to `timestamp(unit = "ns")` instead of `timestamp(unit = "s")`.
* `lubridate`:
* `tz()` to extract/get timezone
* `semester()` to extract/get semester
* `dst()` to get daylight savings time indicator.
* `date()` to extract date
* `epiyear()` to get epiyear
* `lubridate`:
* component extraction functions: `tz()` (timezone), `semester()` (semester), `dst()` (daylight savings time indicator), `date()` (extract date), `epiyear()` (epiyear), improvements to `month()`, which now works with integer inputs.
* date-time functionality:
* `as.Date()` to convert to date

Expand Down
27 changes: 24 additions & 3 deletions r/R/dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,38 @@ register_bindings_datetime <- function() {
(call_binding("yday", x) - 1) %/% 7 + 1
})

register_binding("month", function(x, label = FALSE, abbr = TRUE, locale = Sys.getlocale("LC_TIME")) {
register_binding("month", function(x,
label = FALSE,
abbr = TRUE,
locale = Sys.getlocale("LC_TIME")) {

if (call_binding("is.integer", x)) {
x <- call_binding("if_else",
call_binding("between", x, 1, 12),
x,
NA_integer_)
if (!label) {
# if we don't need a label we can return the integer itself (already
# constrained to 1:12)
return(x)
}
# make the integer into a date32() - which interprets integers as
# days from epoch (we multiply by 28 to be able to later extract the
# month with label) - NB this builds a false date (to be used by strftime)
# since we only know and care about the month
x <- build_expr("cast", x * 28L, options = cast_options(to_type = date32()))
}

if (label) {
if (abbr) {
format <- "%b"
} else {
format <- "%B"
}
return(Expression$create("strftime", x, options = list(format = format, locale = locale)))
return(build_expr("strftime", x, options = list(format = format, locale = locale)))
}

Expression$create("month", x)
build_expr("month", x)
})

register_binding("is.Date", function(x) {
Expand Down
90 changes: 80 additions & 10 deletions r/tests/testthat/test-dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -783,16 +783,12 @@ test_that("semester works with temporal types and integers", {
collect(),
test_df
)
# semester extraction from months as integers is not supported yet
# it will be once https://issues.apache.org/jira/browse/ARROW-15701 is done
# TODO change from expect_error to compare_dplyr_bindings
expect_error(
test_df %>%
arrow_table() %>%

compare_dplyr_binding(
.input %>%
mutate(sem_month_as_int = semester(month_as_int)) %>%
collect(),
regexp = "NotImplemented: Function 'month' has no kernel matching input types (array[int32])",
fixed = TRUE
test_df
)

expect_error(
Expand All @@ -802,8 +798,8 @@ test_that("semester works with temporal types and integers", {
collect(),
regexp = "NotImplemented: Function 'month' has no kernel matching input types (array[string])",
fixed = TRUE
)
})
)
})

test_that("dst extracts daylight savings time correctly", {
test_df <- tibble(
Expand All @@ -820,6 +816,80 @@ test_that("dst extracts daylight savings time correctly", {
)
})

test_that("month() supports integer input", {
test_df_month <- tibble(
month_as_int = c(1:12, NA)
)

compare_dplyr_binding(
.input %>%
mutate(month_int_input = month(month_as_int)) %>%
collect(),
test_df_month
)

skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168

compare_dplyr_binding(
.input %>%
# R returns ordered factor whereas Arrow returns character
mutate(
month_int_input = as.character(month(month_as_int, label = TRUE))
) %>%
collect(),
test_df_month
)

compare_dplyr_binding(
.input %>%
# R returns ordered factor whereas Arrow returns character
mutate(
month_int_input = as.character(
month(month_as_int, label = TRUE, abbr = FALSE)
)
) %>%
collect(),
test_df_month
)
})

test_that("month() errors with double input and returns NA with int outside 1:12", {
test_df_month <- tibble(
month_as_int = c(-1L, 1L, 13L, NA),
month_as_double = month_as_int + 0.1
)

expect_equal(
test_df_month %>%
arrow_table() %>%
select(month_as_int) %>%
mutate(month_int_input = month(month_as_int)) %>%
collect(),
tibble(
month_as_int = c(-1L, 1L, 13L, NA),
month_int_input = c(NA, 1L, NA, NA)
)
)

expect_error(
test_df_month %>%
arrow_table() %>%
mutate(month_dbl_input = month(month_as_double)) %>%
collect(),
regexp = "Function 'month' has no kernel matching input types (array[double])",
fixed = TRUE
)

expect_error(
test_df_month %>%
record_batch() %>%
mutate(month_dbl_input = month(month_as_double)) %>%
collect(),
regexp = "Function 'month' has no kernel matching input types (array[double])",
fixed = TRUE
)
})

test_that("date works in arrow", {
# https://issues.apache.org/jira/browse/ARROW-13168
skip_on_os("windows")
Expand Down