Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
8d50ac6
Add function which pulls out expressions in across() into separate qu…
thisisnic Aug 2, 2022
666baa5
Add test
thisisnic Aug 2, 2022
6074aac
Update test to (correctly) fail
thisisnic Aug 2, 2022
ba98591
Move suffix code inside block
thisisnic Aug 2, 2022
097eb8a
Fix so it passes in the case of a single function
thisisnic Aug 2, 2022
5dc9a51
Add more tests
thisisnic Aug 2, 2022
d5fd733
Add another comment
thisisnic Aug 2, 2022
2c882c3
Run styler
thisisnic Aug 2, 2022
bb730d6
Import rlang functions
thisisnic Aug 2, 2022
8bc2f6b
Run document() to import more rlang functions
thisisnic Aug 2, 2022
77340e1
Unskip test
thisisnic Aug 2, 2022
1963832
Pfft, whitespace
thisisnic Aug 2, 2022
c6f5e82
Update to work with both mutate and summarise
thisisnic Aug 3, 2022
b196573
Add across tests for summarise
thisisnic Aug 3, 2022
1288a40
Add comment
thisisnic Aug 3, 2022
692e41b
Comment out failing summarise tests and add some more really simple s…
thisisnic Aug 3, 2022
5d7af31
Abstract out column names logic
thisisnic Aug 3, 2022
9ef1c57
Add comment
thisisnic Aug 3, 2022
1f88b8b
Pass through .groups
thisisnic Aug 3, 2022
36706da
Back out changes to summarise()
thisisnic Aug 3, 2022
850dcc4
Refactor to remove a for loop
thisisnic Aug 4, 2022
290d549
Refactor to remove for loops
thisisnic Aug 7, 2022
ad6033b
Move comments around
thisisnic Aug 7, 2022
f5051e1
Refactor to calculate names and quosures together
thisisnic Aug 7, 2022
1dcb5bb
Refactor back into original function
thisisnic Aug 7, 2022
dffb49e
Add a TODO
thisisnic Aug 7, 2022
63bc695
Update failing test and add justification in comment
thisisnic Aug 9, 2022
6285976
Remove whitespace
thisisnic Aug 9, 2022
02d92d3
Add some failing tests
thisisnic Aug 9, 2022
de3ebd0
Add another failing test
thisisnic Aug 9, 2022
3c465b0
Fix typo
thisisnic Aug 9, 2022
5c56bfd
Use match.call so we don't worry about argument order
thisisnic Aug 9, 2022
869081d
Raise error for deprecated argument, add tests for unsupported and de…
thisisnic Aug 9, 2022
20c73bd
Skip failing tests as they're part of .names
thisisnic Aug 9, 2022
5eb3162
Remove redundant calls to as.character
thisisnic Aug 9, 2022
45fce8a
Reorder code and return early if .fns is NULL
thisisnic Aug 9, 2022
f40d76e
Run styler
thisisnic Aug 9, 2022
5c720a7
Add comments, refactor some bits out as functions
thisisnic Aug 10, 2022
32ce550
Add another failing test
thisisnic Aug 10, 2022
bccbe0a
Remove broken workaround, move comments to better location, refactor …
thisisnic Aug 10, 2022
721d420
Uncomment tests
thisisnic Aug 10, 2022
3cb51a4
Add failing test
thisisnic Aug 12, 2022
c34fc78
Update r/R/dplyr-mutate.R
thisisnic Aug 11, 2022
a4ee387
Update r/tests/testthat/test-dplyr-mutate.R
thisisnic Aug 11, 2022
b7b4fa4
Add another failing test
thisisnic Aug 12, 2022
f6114d2
Use user-supplied list name for output column name
thisisnic Aug 15, 2022
7707ce0
Update test for dodgy input
thisisnic Aug 15, 2022
02107f3
Run devtools::document()
thisisnic Aug 15, 2022
f836d2a
Ensure new quos have correct env
thisisnic Aug 15, 2022
8474851
Add explanation of failing test
thisisnic Aug 15, 2022
21cb4be
Add a test containing more expressions before and after across(), and…
thisisnic Aug 15, 2022
aa052e4
Rename unfold_across -> expand_across
thisisnic Aug 22, 2022
c76cd39
Update match.call with extra cols
thisisnic Aug 22, 2022
6ffb9a1
Add failing test for unnamed cols
thisisnic Aug 22, 2022
b78bce8
Add tests and functionality for if .cols argument not supplied
thisisnic Aug 22, 2022
1b45ea2
Uncomment names test
thisisnic Aug 22, 2022
d3a13c0
Handle lambda functions and null list of functions
thisisnic Aug 23, 2022
9f49d2f
Move across tests to a separate file
thisisnic Aug 23, 2022
37d5f0f
Move across to own file
thisisnic Aug 23, 2022
9abddac
Remove redundant line
thisisnic Aug 23, 2022
59e38d0
Add another test of names
thisisnic Aug 23, 2022
e8b065f
Ru styler
thisisnic Aug 24, 2022
d3ddb9b
Add Apache header and swap tbl for example_data
thisisnic Aug 24, 2022
7a2603e
Call dplyr in tests
thisisnic Aug 24, 2022
b8f8636
Namespace glue
thisisnic Aug 25, 2022
cad3872
Remove extra code referencing unsupported features
thisisnic Aug 25, 2022
0b2e459
Remove redundant ref
thisisnic Aug 25, 2022
4ffeafb
Add glue as explicit dependency
thisisnic Aug 25, 2022
9926a49
Add test for no functions
thisisnic Aug 30, 2022
c777cad
Add back in code deleted
thisisnic Aug 30, 2022
12cc5eb
Add new helper
thisisnic Aug 30, 2022
8912b38
Properly handle no functions supplied
thisisnic Aug 30, 2022
d94cd23
Separate mutate vs expand_across tests
thisisnic Aug 30, 2022
e321510
Reorder tests and add a load more for combos of .names and .fns not y…
thisisnic Aug 31, 2022
bd6e2b6
Add in test for dodgy names spec
thisisnic Aug 31, 2022
b2fb25a
Simplify across function
thisisnic Aug 31, 2022
29c8fc5
Remove extraneous rlang call
thisisnic Aug 31, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Biarch: true
Imports:
assertthat,
bit64 (>= 0.9-7),
glue,
methods,
purrr,
R6,
Expand Down Expand Up @@ -91,6 +92,7 @@ Collate:
'dataset-scan.R'
'dataset-write.R'
'dictionary.R'
'dplyr-across.R'
'dplyr-arrange.R'
'dplyr-collect.R'
'dplyr-count.R'
Expand Down
7 changes: 7 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ importFrom(assertthat,assert_that)
importFrom(assertthat,is.string)
importFrom(bit64,print.integer64)
importFrom(bit64,str.integer64)
importFrom(glue,glue)
importFrom(methods,as)
importFrom(purrr,as_mapper)
importFrom(purrr,flatten)
Expand All @@ -413,6 +414,7 @@ importFrom(rlang,as_function)
importFrom(rlang,as_label)
importFrom(rlang,as_quosure)
importFrom(rlang,call2)
importFrom(rlang,call_args)
importFrom(rlang,caller_env)
importFrom(rlang,dots_n)
importFrom(rlang,enexpr)
Expand All @@ -425,20 +427,25 @@ importFrom(rlang,eval_tidy)
importFrom(rlang,exec)
importFrom(rlang,expr)
importFrom(rlang,is_bare_character)
importFrom(rlang,is_call)
importFrom(rlang,is_character)
importFrom(rlang,is_empty)
importFrom(rlang,is_false)
importFrom(rlang,is_formula)
importFrom(rlang,is_integerish)
importFrom(rlang,is_interactive)
importFrom(rlang,is_list)
importFrom(rlang,is_quosure)
importFrom(rlang,is_symbol)
importFrom(rlang,list2)
importFrom(rlang,new_data_mask)
importFrom(rlang,new_environment)
importFrom(rlang,quo_get_env)
importFrom(rlang,quo_get_expr)
importFrom(rlang,quo_is_call)
importFrom(rlang,quo_is_null)
importFrom(rlang,quo_name)
importFrom(rlang,quo_set_env)
importFrom(rlang,quo_set_expr)
importFrom(rlang,quos)
importFrom(rlang,seq2)
Expand Down
4 changes: 3 additions & 1 deletion r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind set_names exec
#' @importFrom rlang is_bare_character quo_get_expr quo_get_env quo_set_expr .data seq2 is_interactive
#' @importFrom rlang expr caller_env is_character quo_name is_quosure enexpr enexprs as_quosure
#' @importFrom rlang is_list call2 is_empty as_function as_label arg_match
#' @importFrom rlang is_list call2 is_empty as_function as_label arg_match is_symbol is_call call_args
#' @importFrom rlang quo_set_env quo_get_env is_formula quo_is_call
#' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
#' @importFrom glue glue
#' @useDynLib arrow, .registration = TRUE
#' @keywords internal
"_PACKAGE"
Expand Down
177 changes: 177 additions & 0 deletions r/R/dplyr-across.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

expand_across <- function(.data, quos_in) {
quos_out <- list()
# retrieve items using their values to preserve naming of quos other than across
for (quo_i in seq_along(quos_in)) {
quo_in <- quos_in[quo_i]
quo_expr <- quo_get_expr(quo_in[[1]])
quo_env <- quo_get_env(quo_in[[1]])

if (is_call(quo_expr, "across")) {
new_quos <- list()

across_call <- match.call(
definition = dplyr::across,
call = quo_expr,
expand.dots = FALSE,
envir = quo_env
)

if (!all(names(across_call[-1]) %in% c(".cols", ".fns", ".names"))) {
abort("`...` argument to `across()` is deprecated in dplyr and not supported in Arrow")
}

if (!is.null(across_call[[".cols"]])) {
cols <- across_call[[".cols"]]
} else {
cols <- quote(everything())
}

setup <- across_setup(
cols = !!as_quosure(cols, quo_env),
fns = across_call[[".fns"]],
names = across_call[[".names"]],
.caller_env = quo_env,
mask = .data,
inline = TRUE
)

if (!is_list(setup$fns) && !is.null(setup$fns) && as.character(setup$fns)[[1]] == "~") {
abort(
paste(
"purrr-style lambda functions as `.fns` argument to `across()`",
"not yet supported in Arrow"
)
)
}

new_quos <- quosures_from_setup(setup, quo_env)

quos_out <- append(quos_out, new_quos)
} else {
quos_out <- append(quos_out, quo_in)
}
}

quos_out
}

# given a named list of functions and column names, create a list of new quosures
quosures_from_setup <- function(setup, quo_env) {
if (!is.null(setup$fns)) {
func_list_full <- rep(setup$fns, length(setup$vars))
cols_list_full <- rep(setup$vars, each = length(setup$fns))

# get new quosures
new_quo_list <- map2(
func_list_full, cols_list_full,
~ quo(!!call2(.x, sym(.y)))
)
} else {
# if there's no functions, just map to variables themselves
new_quo_list <- map(
setup$vars,
~ quo(!!sym(.x))
)
}

quosures <- set_names(new_quo_list, setup$names)
map(quosures, ~ quo_set_env(.x, quo_env))
}

across_setup <- function(cols, fns, names, .caller_env, mask, inline = FALSE) {
cols <- enquo(cols)

vars <- names(dplyr::select(mask, !!cols))

if (is.null(fns)) {
if (!is.null(names)) {
glue_mask <- across_glue_mask(.caller_env, .col = vars, .fn = "1")
names <- vctrs::vec_as_names(glue::glue(names, .envir = glue_mask), repair = "check_unique")
} else {
names <- vars
}

value <- list(vars = vars, fns = fns, names = names)
return(value)
}

# apply `.names` smart default
if (is.function(fns) || is_formula(fns) || is.name(fns)) {
names <- names %||% "{.col}"
fns <- list("1" = fns)
} else {
names <- names %||% "{.col}_{.fn}"
fns <- call_args(fns)
}

if (any(map_lgl(fns, is_formula))) {
abort(
paste(
"purrr-style lambda functions as `.fns` argument to `across()`",
"not yet supported in Arrow"
)
)
}

if (!is.list(fns)) {
msg <- c("`.fns` must be NULL, a function, a formula, or a list of functions/formulas.")
abort(msg)
}

# make sure fns has names, use number to replace unnamed
if (is.null(names(fns))) {
names_fns <- seq_along(fns)
} else {
names_fns <- names(fns)
empties <- which(names_fns == "")
if (length(empties)) {
names_fns[empties] <- empties
}
}

glue_mask <- across_glue_mask(.caller_env,
.col = rep(vars, each = length(fns)),
.fn = rep(names_fns, length(vars))
)
names <- vctrs::vec_as_names(glue::glue(names, .envir = glue_mask), repair = "check_unique")

if (!inline) {
fns <- map(fns, as_function)
}

# ensure .names argument has resulted in
if (length(names) != (length(vars) * length(fns))) {
abort(
c(
"`.names` specification must produce (number of columns * number of functions) names.",
x = paste0(
length(vars) * length(fns), " names required (", length(vars), " columns * ", length(fns), " functions)\n ",
length(names), " name(s) produced: ", paste(names, collapse = ",")
)
)
)
}

list(vars = vars, fns = fns, names = names)
}

across_glue_mask <- function(.col, .fn, .caller_env) {
env(.caller_env, .col = .col, .fn = .fn, col = .col, fn = .fn)
}
4 changes: 3 additions & 1 deletion r/R/dplyr-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ mutate.arrow_dplyr_query <- function(.data,
.before = NULL,
.after = NULL) {
call <- match.call()
exprs <- ensure_named_exprs(quos(...))

expression_list <- expand_across(.data, quos(...))
exprs <- ensure_named_exprs(expression_list)

.keep <- match.arg(.keep)
.before <- enquo(.before)
Expand Down
4 changes: 4 additions & 0 deletions r/tests/testthat/helper-expectation.R
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,7 @@ split_vector_as_list <- function(vec) {
vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
list(vec1, vec2)
}

expect_across_equal <- function(actual, expected, tbl) {
expect_identical(expand_across(tbl, actual), as.list(expected))
}
Loading