From 52f398c48ccaea1db6f0d703bc3d1a16817f66cb Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 9 Jul 2021 11:26:12 +0100 Subject: [PATCH 01/10] Add log and trig functions --- r/R/compute.R | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ r/R/expression.R | 12 +++++++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/r/R/compute.R b/r/R/compute.R index 4277ad8d6df..db67085b712 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -116,6 +116,54 @@ max.ArrowDatum <- function(..., na.rm = FALSE) { scalar_aggregate("min_max", ..., na.rm = na.rm)$GetFieldByName("max") } +#' @export +log.ArrowDatum <- function(x) { + call_function("ln_checked", x) +} + +#' @export +logb.ArrowDatum <- log.ArrowDatum + +#' @export +log10.ArrowDatum <- function(x) { + call_function("log10_checked", x) +} + +#' @export +log2.ArrowDatum <- function(x) { + call_function("log2_checked", x) +} + +#' @export +log1p.ArrowDatum <- function(x) { + call_function("log1p_checked", x) +} + +#' @export +sin.ArrowDatum <- function(x) { + call_function("sin_checked", x) +} + +#' @export +cos.ArrowDatum <- function(x) { + call_function("cos_checked", x) +} + +#' @export +tan.ArrowDatum <- function(x) { + call_function("tan_checked", x) +} + +#' @export +asin.ArrowDatum <- function(x) { + call_function("asin_checked", x) +} + +#' @export +acos.ArrowDatum <- function(x) { + call_function("acos_checked", x) +} + scalar_aggregate <- function(FUN, ..., na.rm = FALSE, na.min_count = 0) { a <- collect_arrays_from_dots(list(...)) if (!na.rm) { diff --git a/r/R/expression.R b/r/R/expression.R index be80c9db969..1607e0d24cd 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -43,7 +43,17 @@ "yday" = "day_of_year", "hour" = "hour", # second is defined in dplyr-functions.R - "minute" = "minute" + "minute" = "minute", + "log" = "ln_checked", + "logb" = "ln_checked", + "log10" = "log10_checked", + "log2" = "log2_checked", + "log1p" = "log1p_checked", + "sin" = "sin_checked", + "cos" = "cos_checked", + "tan" = "tan_checked", + "asin" = "asin_checked", + "acos" = "acos_checked" ) .binary_function_map <- list( From c3b36d348087de081465a39c50aa767adb6f98b7 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 9 Jul 2021 11:26:22 +0100 Subject: [PATCH 02/10] Fix typo --- r/tests/testthat/helper-expectation.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R index d173620398e..359e31ef57d 100644 --- a/r/tests/testthat/helper-expectation.R +++ b/r/tests/testthat/helper-expectation.R @@ -123,7 +123,7 @@ expect_dplyr_equal <- function(expr, } if (!is.null(skip_msg)) { - skip(paste(skip_msg, collpase = "\n")) + skip(paste(skip_msg, collapse = "\n")) } } @@ -212,7 +212,7 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in } if (!is.null(skip_msg)) { - skip(paste(skip_msg, collpase = "\n")) + skip(paste(skip_msg, collapse = "\n")) } } @@ -273,7 +273,7 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in } if (!is.null(skip_msg)) { - skip(paste(skip_msg, collpase = "\n")) + skip(paste(skip_msg, collapse = "\n")) } } From 665c8ed6c5cf414f16447125f0bf8af3e619f2ff Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 9 Jul 2021 11:26:54 +0100 Subject: [PATCH 03/10] Export new functions --- r/NAMESPACE | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index ab45aa9985e..133eb6aa048 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -21,6 +21,7 @@ S3method("[[<-",Schema) S3method("names<-",ArrowTabular) S3method(Ops,ArrowDatum) S3method(Ops,Expression) +S3method(acos,ArrowDatum) S3method(all,ArrowDatum) S3method(all,equal.ArrowObject) S3method(any,ArrowDatum) @@ -36,7 +37,9 @@ S3method(as.list,ArrowTabular) S3method(as.list,Schema) S3method(as.raw,Buffer) S3method(as.vector,ArrowDatum) +S3method(asin,ArrowDatum) S3method(c,Dataset) +S3method(cos,ArrowDatum) S3method(dim,ArrowTabular) S3method(dim,Dataset) S3method(dim,StructArray) @@ -55,6 +58,8 @@ S3method(length,ArrowDatum) S3method(length,ArrowTabular) S3method(length,Scalar) S3method(length,Schema) +S3method(log,ArrowDatum) +S3method(log1p,ArrowDatum) S3method(max,ArrowDatum) S3method(mean,ArrowDatum) S3method(median,ArrowDatum) @@ -83,6 +88,7 @@ S3method(read_message,InputStream) S3method(read_message,MessageReader) S3method(read_message,default) S3method(row.names,ArrowTabular) +S3method(sin,ArrowDatum) S3method(sort,ArrowDatum) S3method(sort,Scalar) S3method(sum,ArrowDatum) @@ -90,6 +96,7 @@ S3method(tail,ArrowDatum) S3method(tail,ArrowTabular) S3method(tail,Dataset) S3method(tail,arrow_dplyr_query) +S3method(tan,ArrowDatum) S3method(type,ArrowDatum) S3method(type,default) S3method(unique,ArrowDatum) @@ -237,6 +244,9 @@ export(list_compute_functions) export(list_flights) export(list_of) export(load_flight_server) +export(log10.ArrowDatum) +export(log2.ArrowDatum) +export(logb.ArrowDatum) export(map_batches) export(match_arrow) export(matches) From 3d4ee2f07812d7d5b5c1bdf9dc7ba322a2393afa Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 9 Jul 2021 12:01:56 +0100 Subject: [PATCH 04/10] Add tests for log and trig functions --- r/tests/testthat/test-compute-arith.R | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R index 2586ba865b3..f958edb94f0 100644 --- a/r/tests/testthat/test-compute-arith.R +++ b/r/tests/testthat/test-compute-arith.R @@ -115,3 +115,28 @@ test_that("Dates casting", { # Error: NotImplemented: Function add_checked has no kernel matching input types (array[date32[day]], scalar[double]) expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4 ) + 2), NA_integer_)) }) + +test_that("log", { + + data <- c(as.numeric(1:10), NA, NA) + + expect_vector_equal(log(input), data) + expect_vector_equal(logb(input), data) + expect_vector_equal(log1p(input), data) + expect_vector_equal(log2(input), data) + expect_vector_equal(log10(input), data) + +}) + +test_that("trig", { + + data <- c(as.numeric(1:10), NA, NA) + under_1 <- c(seq(from = 0, to = 1, by = 0.1), NA) + + expect_vector_equal(sin(input), data) + expect_vector_equal(cos(input), data) + expect_vector_equal(tan(input), data) + expect_vector_equal(asin(under_1), data) + expect_vector_equal(acos(under_1), data) + +}) From 874793ffcfe78f8fdeff1806c632d8ed2753cb4f Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 9 Jul 2021 14:28:34 +0100 Subject: [PATCH 05/10] Remove unnecessary S3 functions --- r/NAMESPACE | 10 ---------- r/R/compute.R | 48 ------------------------------------------------ 2 files changed, 58 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 133eb6aa048..ab45aa9985e 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -21,7 +21,6 @@ S3method("[[<-",Schema) S3method("names<-",ArrowTabular) S3method(Ops,ArrowDatum) S3method(Ops,Expression) -S3method(acos,ArrowDatum) S3method(all,ArrowDatum) S3method(all,equal.ArrowObject) S3method(any,ArrowDatum) @@ -37,9 +36,7 @@ S3method(as.list,ArrowTabular) S3method(as.list,Schema) S3method(as.raw,Buffer) S3method(as.vector,ArrowDatum) -S3method(asin,ArrowDatum) S3method(c,Dataset) -S3method(cos,ArrowDatum) S3method(dim,ArrowTabular) S3method(dim,Dataset) S3method(dim,StructArray) @@ -58,8 +55,6 @@ S3method(length,ArrowDatum) S3method(length,ArrowTabular) S3method(length,Scalar) S3method(length,Schema) -S3method(log,ArrowDatum) -S3method(log1p,ArrowDatum) S3method(max,ArrowDatum) S3method(mean,ArrowDatum) S3method(median,ArrowDatum) @@ -88,7 +83,6 @@ S3method(read_message,InputStream) S3method(read_message,MessageReader) S3method(read_message,default) S3method(row.names,ArrowTabular) -S3method(sin,ArrowDatum) S3method(sort,ArrowDatum) S3method(sort,Scalar) S3method(sum,ArrowDatum) @@ -96,7 +90,6 @@ S3method(tail,ArrowDatum) S3method(tail,ArrowTabular) S3method(tail,Dataset) S3method(tail,arrow_dplyr_query) -S3method(tan,ArrowDatum) S3method(type,ArrowDatum) S3method(type,default) S3method(unique,ArrowDatum) @@ -244,9 +237,6 @@ export(list_compute_functions) export(list_flights) export(list_of) export(load_flight_server) -export(log10.ArrowDatum) -export(log2.ArrowDatum) -export(logb.ArrowDatum) export(map_batches) export(match_arrow) export(matches) diff --git a/r/R/compute.R b/r/R/compute.R index db67085b712..4277ad8d6df 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -116,54 +116,6 @@ max.ArrowDatum <- function(..., na.rm = FALSE) { scalar_aggregate("min_max", ..., na.rm = na.rm)$GetFieldByName("max") } -#' @export -log.ArrowDatum <- function(x) { - call_function("ln_checked", x) -} - -#' @export -logb.ArrowDatum <- log.ArrowDatum - -#' @export -log10.ArrowDatum <- function(x) { - call_function("log10_checked", x) -} - -#' @export -log2.ArrowDatum <- function(x) { - call_function("log2_checked", x) -} - -#' @export -log1p.ArrowDatum <- function(x) { - call_function("log1p_checked", x) -} - -#' @export -sin.ArrowDatum <- function(x) { - call_function("sin_checked", x) -} - -#' @export -cos.ArrowDatum <- function(x) { - call_function("cos_checked", x) -} - -#' @export -tan.ArrowDatum <- function(x) { - call_function("tan_checked", x) -} - -#' @export -asin.ArrowDatum <- function(x) { - call_function("asin_checked", x) -} - -#' @export -acos.ArrowDatum <- function(x) { - call_function("acos_checked", x) -} - scalar_aggregate <- function(FUN, ..., na.rm = FALSE, na.min_count = 0) { a <- collect_arrays_from_dots(list(...)) if (!na.rm) { From a2835351c19373ad4b6fd893f062a104129d03ee Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 9 Jul 2021 14:28:47 +0100 Subject: [PATCH 06/10] Add tests for NSE funcs --- r/tests/testthat/test-compute-arith.R | 25 -------- r/tests/testthat/test-dplyr.R | 82 +++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 25 deletions(-) diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R index f958edb94f0..2586ba865b3 100644 --- a/r/tests/testthat/test-compute-arith.R +++ b/r/tests/testthat/test-compute-arith.R @@ -115,28 +115,3 @@ test_that("Dates casting", { # Error: NotImplemented: Function add_checked has no kernel matching input types (array[date32[day]], scalar[double]) expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4 ) + 2), NA_integer_)) }) - -test_that("log", { - - data <- c(as.numeric(1:10), NA, NA) - - expect_vector_equal(log(input), data) - expect_vector_equal(logb(input), data) - expect_vector_equal(log1p(input), data) - expect_vector_equal(log2(input), data) - expect_vector_equal(log10(input), data) - -}) - -test_that("trig", { - - data <- c(as.numeric(1:10), NA, NA) - under_1 <- c(seq(from = 0, to = 1, by = 0.1), NA) - - expect_vector_equal(sin(input), data) - expect_vector_equal(cos(input), data) - expect_vector_equal(tan(input), data) - expect_vector_equal(asin(under_1), data) - expect_vector_equal(acos(under_1), data) - -}) diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R index 459c5ebc441..420730b4f40 100644 --- a/r/tests/testthat/test-dplyr.R +++ b/r/tests/testthat/test-dplyr.R @@ -945,3 +945,85 @@ test_that("abs()", { df ) }) + +test_that("log functions", { + + df <- tibble(x = c(as.numeric(1:10), NA, NA)) + + expect_dplyr_equal( + input %>% + mutate(y = log(x)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(y = logb(x)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(y = log1p(x)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(y = log2(x)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(y = log10(x)) %>% + collect(), + df + ) + +}) + +test_that("trig functions", { + + df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA)) + + expect_dplyr_equal( + input %>% + mutate(y = sin(x)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(y = cos(x)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(y = tan(x)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(y = asin(x)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(y = acos(x)) %>% + collect(), + df + ) + +}) \ No newline at end of file From 004aa116d1ad13aaaf23f104a0f314e6823580ab Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 14 Jul 2021 08:20:51 +0100 Subject: [PATCH 07/10] Remove unnecessary conversion to double/float --- r/tests/testthat/test-dplyr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R index 420730b4f40..1647961da83 100644 --- a/r/tests/testthat/test-dplyr.R +++ b/r/tests/testthat/test-dplyr.R @@ -948,7 +948,7 @@ test_that("abs()", { test_that("log functions", { - df <- tibble(x = c(as.numeric(1:10), NA, NA)) + df <- tibble(x = c(1:10, NA, NA)) expect_dplyr_equal( input %>% From 82eed1675523d22513671fe0282cb9b6387d8787 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 15 Jul 2021 13:41:52 +0100 Subject: [PATCH 08/10] Update code for log and add tests for different bases --- r/R/dplyr-functions.R | 20 ++++++++++++++++++++ r/R/expression.R | 2 -- r/tests/testthat/test-dplyr.R | 27 +++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 5ddd6968972..3e054bb74f0 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -559,3 +559,23 @@ nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption Expression$create("day_of_week", x, options = list(one_based_numbering = TRUE, week_start = week_start)) } + +nse_funcs$log <- function(x, base = exp(1)){ + + if (base == exp(1)) { + return(Expression$create("ln_checked", x)) + } + + if (base == 2) { + return(Expression$create("log2_checked", x)) + } + + if (base == 10) { + return(Expression$create("log10_checked", x)) + } + + stop("`base` values other than exp(1), 2 and 10 not supported in Arrow") + +} + +nse_funcs$logb <- nse_funcs$log \ No newline at end of file diff --git a/r/R/expression.R b/r/R/expression.R index 1607e0d24cd..16915427292 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -44,8 +44,6 @@ "hour" = "hour", # second is defined in dplyr-functions.R "minute" = "minute", - "log" = "ln_checked", - "logb" = "ln_checked", "log10" = "log10_checked", "log2" = "log2_checked", "log1p" = "log1p_checked", diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R index 1647961da83..63d0433fc23 100644 --- a/r/tests/testthat/test-dplyr.R +++ b/r/tests/testthat/test-dplyr.R @@ -957,6 +957,33 @@ test_that("log functions", { df ) + expect_dplyr_equal( + input %>% + mutate(y = log(x, base = exp(1))) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(y = log(x, base = 2)) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(y = log(x, base = 10)) %>% + collect(), + df + ) + + expect_error( + nse_funcs$log(Expression$scalar(x), base = 5), + "`base` values other than exp(1), 2 and 10 not supported in Arrow", + fixed = TRUE + ) + expect_dplyr_equal( input %>% mutate(y = logb(x)) %>% From dc698d93f882fb9cded1557a5c81845fd43b8799 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 15 Jul 2021 13:44:16 +0100 Subject: [PATCH 09/10] Add spacing fix --- r/R/dplyr-functions.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 3e054bb74f0..bb51681f47c 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -560,7 +560,7 @@ nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption } -nse_funcs$log <- function(x, base = exp(1)){ +nse_funcs$log <- function(x, base = exp(1)) { if (base == exp(1)) { return(Expression$create("ln_checked", x)) @@ -573,9 +573,7 @@ nse_funcs$log <- function(x, base = exp(1)){ if (base == 10) { return(Expression$create("log10_checked", x)) } - stop("`base` values other than exp(1), 2 and 10 not supported in Arrow") - } nse_funcs$logb <- nse_funcs$log \ No newline at end of file From 332728ac8c6f3e1c3161781814c7671782a170a1 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 15 Jul 2021 12:09:43 -0400 Subject: [PATCH 10/10] Update r/R/dplyr-functions.R --- r/R/dplyr-functions.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index bb51681f47c..61046d5e829 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -573,7 +573,8 @@ nse_funcs$log <- function(x, base = exp(1)) { if (base == 10) { return(Expression$create("log10_checked", x)) } - stop("`base` values other than exp(1), 2 and 10 not supported in Arrow") + # ARROW-13345 + stop("`base` values other than exp(1), 2 and 10 not supported in Arrow", call. = FALSE) } -nse_funcs$logb <- nse_funcs$log \ No newline at end of file +nse_funcs$logb <- nse_funcs$log