From 65280800bbee115068d55a99c16519c42d3c7a9f Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Mon, 28 Jun 2021 17:55:49 -0400 Subject: [PATCH 01/15] trying again --- r/R/dplyr-functions.R | 24 ++++++++++++++++++++---- r/src/compute.cpp | 9 +++++++++ r/tests/testthat/test-dplyr-mutate.R | 22 ++++++++++++++++++++++ 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 5076fc09847..08e2757e1c0 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -398,6 +398,22 @@ nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) { ) } +nse_funcs$pmin <- function(..., na.rm = FALSE) { + Expression$create( + "element_wise_min", + ..., + options = list(skip_nulls = na.rm) + ) +} + +nse_funcs$pmax <- function(..., na.rm = FALSE) { + Expression$create( + "element_wise_max", + ..., + options = list(skip_nulls = na.rm) + ) +} + # String function helpers # format `pattern` as needed for case insensitivity and literal matching by RE2 @@ -511,7 +527,7 @@ nse_funcs$second <- function(x) { } # After ARROW-13054 is completed, we can refactor this for simplicity -# +# # Arrow's `day_of_week` kernel counts from 0 (Monday) to 6 (Sunday), whereas # `lubridate::wday` counts from 1 to 7, and allows users to specify which day # of the week is first (Sunday by default). This Expression converts the returned @@ -519,16 +535,16 @@ nse_funcs$second <- function(x) { # providing offset values based on the specified week_start day, and adding 1 # so the returned value is 1-indexed instead of 0-indexed. nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) { - + # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime # When the ticket below is resolved, we should be able to support the label argument # https://issues.apache.org/jira/browse/ARROW-13133 if (label) { arrow_not_supported("Label argument") } - + # overall formula to convert from arrow::wday to lubridate::wday is: # ((wday(day) - start + 8) %% 7) + 1 ((Expression$create("day_of_week", x) - Expression$scalar(week_start) + 8) %% 7) + 1 - + } diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 9a05dd02859..486499a92f8 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -180,6 +180,15 @@ std::shared_ptr make_compute_options( return out; } + if (func_name == "element_wise_min" || func_name == "element_wise_max") { + using Options = arrow::compute::ElementWiseAggregateOptions; + bool skip_nulls = false; + if (!Rf_isNull(options["skip_nulls"])) { + skip_nulls = cpp11::as_cpp(options["skip_nulls"]); + } + return std::make_shared(skip_nulls); + } + if (func_name == "quantile") { using Options = arrow::compute::QuantileOptions; auto out = std::make_shared(Options::Defaults()); diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 98eb4983d32..012d287e946 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -418,3 +418,25 @@ test_that("mutate and write_dataset", { summarize(mean = mean(integer)) ) }) + + +test_that("mutate and pmin/pmax", { + df <- tibble( + city = c("Chillan", "Valdivia", "Osorno"), + val1 = c(200, 300, NA), + val2 = c(100, NA, NA), + val3 = c(0, NA, NA) + ) + + expect_dplyr_equal( + input %>% + mutate( + max_val_1 = pmax(val1, val2, val3), + max_val_2 = pmax(val1, val2, val3, na.rm = T), + min_val_1 = pmin(val1, val2, val3), + min_val_2 = pmin(val1, val2, val3, na.rm = T) + ) %>% + collect(), + df + ) +}) From 5b30a131c97910e1a167de37f75b3b63e7ad52bd Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Mon, 28 Jun 2021 17:58:24 -0400 Subject: [PATCH 02/15] whitespace --- r/R/dplyr-functions.R | 2 +- r/tests/testthat/test-dplyr-mutate.R | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 08e2757e1c0..3bfd2b4f6bf 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -535,7 +535,7 @@ nse_funcs$second <- function(x) { # providing offset values based on the specified week_start day, and adding 1 # so the returned value is 1-indexed instead of 0-indexed. nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) { - + # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime # When the ticket below is resolved, we should be able to support the label argument # https://issues.apache.org/jira/browse/ARROW-13133 diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 012d287e946..a21a95502a4 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -419,7 +419,6 @@ test_that("mutate and write_dataset", { ) }) - test_that("mutate and pmin/pmax", { df <- tibble( city = c("Chillan", "Valdivia", "Osorno"), From d66c1ef30058648e26a76f7f2250d5798cd4a856 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Mon, 28 Jun 2021 18:02:12 -0400 Subject: [PATCH 03/15] space --- r/R/dplyr-functions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 3bfd2b4f6bf..11df0857d58 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -527,7 +527,7 @@ nse_funcs$second <- function(x) { } # After ARROW-13054 is completed, we can refactor this for simplicity -# +# # Arrow's `day_of_week` kernel counts from 0 (Monday) to 6 (Sunday), whereas # `lubridate::wday` counts from 1 to 7, and allows users to specify which day # of the week is first (Sunday by default). This Expression converts the returned @@ -535,7 +535,7 @@ nse_funcs$second <- function(x) { # providing offset values based on the specified week_start day, and adding 1 # so the returned value is 1-indexed instead of 0-indexed. nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) { - + # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime # When the ticket below is resolved, we should be able to support the label argument # https://issues.apache.org/jira/browse/ARROW-13133 From 8471dd3837df808d5d33560b9a04662b6f90168a Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Mon, 28 Jun 2021 17:55:49 -0400 Subject: [PATCH 04/15] trying again --- r/R/dplyr-functions.R | 24 ++++++++++++++++++++---- r/src/compute.cpp | 9 +++++++++ r/tests/testthat/test-dplyr-mutate.R | 22 ++++++++++++++++++++++ 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 5076fc09847..08e2757e1c0 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -398,6 +398,22 @@ nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) { ) } +nse_funcs$pmin <- function(..., na.rm = FALSE) { + Expression$create( + "element_wise_min", + ..., + options = list(skip_nulls = na.rm) + ) +} + +nse_funcs$pmax <- function(..., na.rm = FALSE) { + Expression$create( + "element_wise_max", + ..., + options = list(skip_nulls = na.rm) + ) +} + # String function helpers # format `pattern` as needed for case insensitivity and literal matching by RE2 @@ -511,7 +527,7 @@ nse_funcs$second <- function(x) { } # After ARROW-13054 is completed, we can refactor this for simplicity -# +# # Arrow's `day_of_week` kernel counts from 0 (Monday) to 6 (Sunday), whereas # `lubridate::wday` counts from 1 to 7, and allows users to specify which day # of the week is first (Sunday by default). This Expression converts the returned @@ -519,16 +535,16 @@ nse_funcs$second <- function(x) { # providing offset values based on the specified week_start day, and adding 1 # so the returned value is 1-indexed instead of 0-indexed. nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) { - + # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime # When the ticket below is resolved, we should be able to support the label argument # https://issues.apache.org/jira/browse/ARROW-13133 if (label) { arrow_not_supported("Label argument") } - + # overall formula to convert from arrow::wday to lubridate::wday is: # ((wday(day) - start + 8) %% 7) + 1 ((Expression$create("day_of_week", x) - Expression$scalar(week_start) + 8) %% 7) + 1 - + } diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 9a05dd02859..486499a92f8 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -180,6 +180,15 @@ std::shared_ptr make_compute_options( return out; } + if (func_name == "element_wise_min" || func_name == "element_wise_max") { + using Options = arrow::compute::ElementWiseAggregateOptions; + bool skip_nulls = false; + if (!Rf_isNull(options["skip_nulls"])) { + skip_nulls = cpp11::as_cpp(options["skip_nulls"]); + } + return std::make_shared(skip_nulls); + } + if (func_name == "quantile") { using Options = arrow::compute::QuantileOptions; auto out = std::make_shared(Options::Defaults()); diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 98eb4983d32..012d287e946 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -418,3 +418,25 @@ test_that("mutate and write_dataset", { summarize(mean = mean(integer)) ) }) + + +test_that("mutate and pmin/pmax", { + df <- tibble( + city = c("Chillan", "Valdivia", "Osorno"), + val1 = c(200, 300, NA), + val2 = c(100, NA, NA), + val3 = c(0, NA, NA) + ) + + expect_dplyr_equal( + input %>% + mutate( + max_val_1 = pmax(val1, val2, val3), + max_val_2 = pmax(val1, val2, val3, na.rm = T), + min_val_1 = pmin(val1, val2, val3), + min_val_2 = pmin(val1, val2, val3, na.rm = T) + ) %>% + collect(), + df + ) +}) From f11615659baebb7e036fea0f026abcf4c51e5baa Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Mon, 28 Jun 2021 17:58:24 -0400 Subject: [PATCH 05/15] whitespace --- r/R/dplyr-functions.R | 2 +- r/tests/testthat/test-dplyr-mutate.R | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 08e2757e1c0..3bfd2b4f6bf 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -535,7 +535,7 @@ nse_funcs$second <- function(x) { # providing offset values based on the specified week_start day, and adding 1 # so the returned value is 1-indexed instead of 0-indexed. nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) { - + # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime # When the ticket below is resolved, we should be able to support the label argument # https://issues.apache.org/jira/browse/ARROW-13133 diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 012d287e946..a21a95502a4 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -419,7 +419,6 @@ test_that("mutate and write_dataset", { ) }) - test_that("mutate and pmin/pmax", { df <- tibble( city = c("Chillan", "Valdivia", "Osorno"), From b4a2dbaeba5b12190afa71c5cbb097cbc0a4c76c Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Mon, 28 Jun 2021 18:02:12 -0400 Subject: [PATCH 06/15] space --- r/R/dplyr-functions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 3bfd2b4f6bf..11df0857d58 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -527,7 +527,7 @@ nse_funcs$second <- function(x) { } # After ARROW-13054 is completed, we can refactor this for simplicity -# +# # Arrow's `day_of_week` kernel counts from 0 (Monday) to 6 (Sunday), whereas # `lubridate::wday` counts from 1 to 7, and allows users to specify which day # of the week is first (Sunday by default). This Expression converts the returned @@ -535,7 +535,7 @@ nse_funcs$second <- function(x) { # providing offset values based on the specified week_start day, and adding 1 # so the returned value is 1-indexed instead of 0-indexed. nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) { - + # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime # When the ticket below is resolved, we should be able to support the label argument # https://issues.apache.org/jira/browse/ARROW-13133 From db277c6a99db4abe14ed2d491392145d6713546a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pach=C3=A1?= Date: Tue, 29 Jun 2021 14:55:55 -0400 Subject: [PATCH 07/15] Update r/src/compute.cpp Co-authored-by: Ian Cook --- r/src/compute.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 486499a92f8..99c39cac3fd 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -182,7 +182,7 @@ std::shared_ptr make_compute_options( if (func_name == "element_wise_min" || func_name == "element_wise_max") { using Options = arrow::compute::ElementWiseAggregateOptions; - bool skip_nulls = false; + bool skip_nulls = true; if (!Rf_isNull(options["skip_nulls"])) { skip_nulls = cpp11::as_cpp(options["skip_nulls"]); } From 5a4c39629d59f96c2979e1f08b32f3a8066bc42f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pach=C3=A1?= Date: Tue, 29 Jun 2021 14:56:11 -0400 Subject: [PATCH 08/15] Update r/R/dplyr-functions.R Co-authored-by: Ian Cook --- r/R/dplyr-functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 11df0857d58..7c5e63afd26 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -407,7 +407,7 @@ nse_funcs$pmin <- function(..., na.rm = FALSE) { } nse_funcs$pmax <- function(..., na.rm = FALSE) { - Expression$create( + build_expr( "element_wise_max", ..., options = list(skip_nulls = na.rm) From 6c4149144aae80f4d7d67121dc69f67e91c7d318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pach=C3=A1?= Date: Tue, 29 Jun 2021 14:56:17 -0400 Subject: [PATCH 09/15] Update r/R/dplyr-functions.R Co-authored-by: Ian Cook --- r/R/dplyr-functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 7c5e63afd26..56d92ef75b3 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -399,7 +399,7 @@ nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) { } nse_funcs$pmin <- function(..., na.rm = FALSE) { - Expression$create( + build_expr( "element_wise_min", ..., options = list(skip_nulls = na.rm) From 92f3f88684b3035efcdde107e994f3ac2f0ee1a2 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Tue, 29 Jun 2021 15:31:59 -0400 Subject: [PATCH 10/15] renamed c++ pmix/pmax --- r/R/dplyr-functions.R | 4 ++-- r/src/compute.cpp | 2 +- r/tests/testthat/test-dplyr-mutate.R | 10 ++++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 94555e431ec..db235321402 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -400,7 +400,7 @@ nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) { nse_funcs$pmin <- function(..., na.rm = FALSE) { build_expr( - "element_wise_min", + "min_element_wise", ..., options = list(skip_nulls = na.rm) ) @@ -408,7 +408,7 @@ nse_funcs$pmin <- function(..., na.rm = FALSE) { nse_funcs$pmax <- function(..., na.rm = FALSE) { build_expr( - "element_wise_max", + "max_element_wise", ..., options = list(skip_nulls = na.rm) ) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index e3bf698e1ee..09381f2fe96 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -180,7 +180,7 @@ std::shared_ptr make_compute_options( return out; } - if (func_name == "element_wise_min" || func_name == "element_wise_max") { + if (func_name == "min_element_wise" || func_name == "max_element_wise") { using Options = arrow::compute::ElementWiseAggregateOptions; bool skip_nulls = true; if (!Rf_isNull(options["skip_nulls"])) { diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index a21a95502a4..908ada296be 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -438,4 +438,14 @@ test_that("mutate and pmin/pmax", { collect(), df ) + + expect_dplyr_equal( + input %>% + mutate( + max_val_1 = pmax(val1 - 100, 200, val1 * 100, na.rm = T), + min_val_1 = pmin(val1 - 100, 100, val1 * 100, na.rm = T), + ) %>% + collect(), + df + ) }) From 9cddc4b3a07d14477d2355ce8753d716a23164f6 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Tue, 29 Jun 2021 15:39:29 -0400 Subject: [PATCH 11/15] styleee --- r/R/dplyr-functions.R | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index db235321402..27769e6df6f 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -60,19 +60,19 @@ nse_funcs$cast <- function(x, target_type, safe = TRUE, ...) { nse_funcs$is <- function(object, class2) { if (is.string(class2)) { switch(class2, - # for R data types, pass off to is.*() functions - character = nse_funcs$is.character(object), - numeric = nse_funcs$is.numeric(object), - integer = nse_funcs$is.integer(object), - integer64 = nse_funcs$is.integer64(object), - logical = nse_funcs$is.logical(object), - factor = nse_funcs$is.factor(object), - list = nse_funcs$is.list(object), - # for Arrow data types, compare class2 with object$type()$ToString(), - # but first strip off any parameters to only compare the top-level data - # type, and canonicalize class2 - sub("^([^([<]+).*$", "\\1", object$type()$ToString()) == - canonical_type_str(class2) + # for R data types, pass off to is.*() functions + character = nse_funcs$is.character(object), + numeric = nse_funcs$is.numeric(object), + integer = nse_funcs$is.integer(object), + integer64 = nse_funcs$is.integer64(object), + logical = nse_funcs$is.logical(object), + factor = nse_funcs$is.factor(object), + list = nse_funcs$is.list(object), + # for Arrow data types, compare class2 with object$type()$ToString(), + # but first strip off any parameters to only compare the top-level data + # type, and canonicalize class2 + sub("^([^([<]+).*$", "\\1", object$type()$ToString()) == + canonical_type_str(class2) ) } else if (inherits(class2, "DataType")) { object$type() == as_type(class2) From 215a4ba25bb99665ebf7153c205730b8c0bbd536 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Tue, 29 Jun 2021 15:40:35 -0400 Subject: [PATCH 12/15] styleee --- r/R/dplyr-functions.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index 27769e6df6f..27d6e889199 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -273,9 +273,9 @@ arrow_string_join_function <- function(null_handling, null_replacement = NULL) { nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) { side <- match.arg(side) trim_fun <- switch(side, - left = "utf8_ltrim_whitespace", - right = "utf8_rtrim_whitespace", - both = "utf8_trim_whitespace" + left = "utf8_ltrim_whitespace", + right = "utf8_rtrim_whitespace", + both = "utf8_trim_whitespace" ) Expression$create(trim_fun, string) } @@ -391,7 +391,7 @@ nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) { string, options = list( pattern = - opts$pattern, + opts$pattern, reverse = FALSE, max_splits = n - 1L ) From 7058b3f4a9d6617b7a7f7b65e9cf081fe9672c0a Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Tue, 29 Jun 2021 16:03:15 -0400 Subject: [PATCH 13/15] styleee2 --- r/src/compute.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 09381f2fe96..e5ff4145f50 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -40,7 +40,7 @@ std::shared_ptr RecordBatch__cast( arrow::ArrayVector columns(nc); for (int i = 0; i < nc; i++) { columns[i] = ValueOrStop( - arrow::compute::Cast(*batch->column(i), schema->field(i)->type(), *opts)); + arrow::compute::Cast(*batch->column(i), schema->field(i)->type(), *opts)); } return arrow::RecordBatch::Make(schema, batch->num_rows(), std::move(columns)); @@ -58,7 +58,7 @@ std::shared_ptr Table__cast(const std::shared_ptr& t for (int i = 0; i < nc; i++) { arrow::Datum value(table->column(i)); arrow::Datum out = - ValueOrStop(arrow::compute::Cast(value, schema->field(i)->type(), *opts)); + ValueOrStop(arrow::compute::Cast(value, schema->field(i)->type(), *opts)); columns[i] = out.chunked_array(); } return arrow::Table::Make(schema, std::move(columns), table->num_rows()); @@ -104,23 +104,23 @@ arrow::Datum as_cpp(SEXP x) { SEXP from_datum(arrow::Datum datum) { switch (datum.kind()) { - case arrow::Datum::SCALAR: - return cpp11::to_r6(datum.scalar()); + case arrow::Datum::SCALAR: + return cpp11::to_r6(datum.scalar()); - case arrow::Datum::ARRAY: - return cpp11::to_r6(datum.make_array()); + case arrow::Datum::ARRAY: + return cpp11::to_r6(datum.make_array()); - case arrow::Datum::CHUNKED_ARRAY: - return cpp11::to_r6(datum.chunked_array()); + case arrow::Datum::CHUNKED_ARRAY: + return cpp11::to_r6(datum.chunked_array()); - case arrow::Datum::RECORD_BATCH: - return cpp11::to_r6(datum.record_batch()); + case arrow::Datum::RECORD_BATCH: + return cpp11::to_r6(datum.record_batch()); - case arrow::Datum::TABLE: - return cpp11::to_r6(datum.table()); + case arrow::Datum::TABLE: + return cpp11::to_r6(datum.table()); - default: - break; + default: + break; } cpp11::stop("from_datum: Not implemented for Datum %s", datum.ToString().c_str()); @@ -150,7 +150,7 @@ std::shared_ptr make_compute_options( // false means descending, true means ascending auto order = cpp11::as_cpp(options["order"]); auto out = - std::make_shared(Options(order ? Order::Descending : Order::Ascending)); + std::make_shared(Options(order ? Order::Descending : Order::Ascending)); return out; } @@ -165,7 +165,7 @@ std::shared_ptr make_compute_options( std::vector keys; for (size_t i = 0; i < names.size(); i++) { keys.push_back( - Key(names[i], (orders[i] > 0) ? Order::Descending : Order::Ascending)); + Key(names[i], (orders[i] > 0) ? Order::Descending : Order::Ascending)); } auto out = std::make_shared(Options(keys)); return out; From 98b27327da5e140b08c0c970cbd3c02017017b4a Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Tue, 29 Jun 2021 16:07:37 -0400 Subject: [PATCH 14/15] styleee3 --- r/src/compute.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index e5ff4145f50..964dd47d3ac 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -200,8 +200,8 @@ std::shared_ptr make_compute_options( if (!Rf_isNull(interpolation) && TYPEOF(interpolation) == INTSXP && XLENGTH(interpolation) == 1) { out->interpolation = - cpp11::as_cpp( - interpolation); + cpp11::as_cpp( + interpolation); } return out; } @@ -217,8 +217,8 @@ std::shared_ptr make_compute_options( auto out = std::make_shared(Options::Defaults()); if (!Rf_isNull(options["null_encoding_behavior"])) { out->null_encoding_behavior = cpp11::as_cpp< - enum arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior>( - options["null_encoding_behavior"]); + enum arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior>( + options["null_encoding_behavior"]); } return out; } @@ -232,8 +232,8 @@ std::shared_ptr make_compute_options( auto out = std::make_shared(Options::Defaults()); if (!Rf_isNull(options["null_handling"])) { out->null_handling = - cpp11::as_cpp( - options["null_handling"]); + cpp11::as_cpp( + options["null_handling"]); } if (!Rf_isNull(options["null_replacement"])) { out->null_replacement = cpp11::as_cpp(options["null_replacement"]); @@ -267,8 +267,8 @@ std::shared_ptr make_compute_options( if (func_name == "strptime") { using Options = arrow::compute::StrptimeOptions; return std::make_shared( - cpp11::as_cpp(options["format"]), - cpp11::as_cpp(options["unit"])); + cpp11::as_cpp(options["format"]), + cpp11::as_cpp(options["unit"])); } if (func_name == "split_pattern" || func_name == "split_pattern_regex") { @@ -326,7 +326,7 @@ std::shared_ptr make_cast_options(cpp11::list optio SEXP allow_int_overflow = options["allow_int_overflow"]; if (!Rf_isNull(allow_int_overflow) && cpp11::as_cpp(allow_int_overflow)) { - out->allow_int_overflow = cpp11::as_cpp(allow_int_overflow); + out->allow_int_overflow = cpp11::as_cpp(allow_int_overflow); } return out; } @@ -336,7 +336,7 @@ SEXP compute__CallFunction(std::string func_name, cpp11::list args, cpp11::list auto opts = make_compute_options(func_name, options); auto datum_args = arrow::r::from_r_list(args); auto out = ValueOrStop( - arrow::compute::CallFunction(func_name, datum_args, opts.get(), gc_context())); + arrow::compute::CallFunction(func_name, datum_args, opts.get(), gc_context())); return from_datum(std::move(out)); } @@ -352,7 +352,7 @@ SEXP compute__GroupBy(cpp11::list arguments, cpp11::list keys, cpp11::list optio auto opts = make_compute_options(name, name_opts[1]); aggregates.push_back( - arrow::compute::internal::Aggregate{std::move(name), opts.get()}); + arrow::compute::internal::Aggregate{std::move(name), opts.get()}); keep_alives.push_back(std::move(opts)); } From 57af341e145c4592e05a75423c79ec92d2270048 Mon Sep 17 00:00:00 2001 From: Mauricio Vargas Date: Tue, 29 Jun 2021 16:10:08 -0400 Subject: [PATCH 15/15] styleee4 --- r/src/compute.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 964dd47d3ac..458e0e386e9 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -218,7 +218,7 @@ std::shared_ptr make_compute_options( if (!Rf_isNull(options["null_encoding_behavior"])) { out->null_encoding_behavior = cpp11::as_cpp< enum arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior>( - options["null_encoding_behavior"]); + options["null_encoding_behavior"]); } return out; } @@ -326,7 +326,7 @@ std::shared_ptr make_cast_options(cpp11::list optio SEXP allow_int_overflow = options["allow_int_overflow"]; if (!Rf_isNull(allow_int_overflow) && cpp11::as_cpp(allow_int_overflow)) { - out->allow_int_overflow = cpp11::as_cpp(allow_int_overflow); + out->allow_int_overflow = cpp11::as_cpp(allow_int_overflow); } return out; }