diff --git a/NEWS.md b/NEWS.md index d55a901723..f5f0ab6754 100644 --- a/NEWS.md +++ b/NEWS.md @@ -99,6 +99,8 @@ unit = "s") 9. `fread` improves handling of very small (<1e-300) or very large (>1e+300) floating point numbers on non-x86 architectures (specifically ppc64le and armv7hl). Thanks to @QuLogic for reporting and fixing, [PR#4165](https://github.com/Rdatatable/data.table/pull/4165). +10. When updating by reference, the use of `get` could result in columns being re-ordered silently, [#4089](https://github.com/Rdatatable/data.table/issues/4089). Thanks to @dmongin for reporting and Cole Miller for the fix. + ## NOTES 0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. diff --git a/R/data.table.R b/R/data.table.R index 4d2f33d0bc..a6a2afccf2 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -973,7 +973,35 @@ replace_dot_alias = function(e) { } # fix for long standing FR/bug, #495 and #484 allcols = c(names_x, xdotprefix, names_i, idotprefix) - if ( length(non_sdvars <- setdiff(intersect(av, allcols), c(bynames, ansvars))) ) { + non_sdvars = setdiff(intersect(av, allcols), c(bynames, ansvars)) + + # added 'mget' - fix for #994 + if (any(c("get", "mget") %chin% av)){ + if (verbose) + cat(gettextf("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld ansvars: %s \n", brackify(ansvars), domain = "R-data.table")) + # get('varname') is too difficult to detect which columns are used in general + # eval(macro) column names are detected via the if jsub[[1]]==eval switch earlier above. + + # Do not include z in .SD when dt[, z := {.SD; get("x")}, .SDcols = "y"] (#2326, #2338) + if (is.call(jsub) && length(jsub[[1L]]) == 1L && jsub[[1L]] == ":="){ + if (is.symbol(jsub[[2L]])) { + jsub_lhs_symbol = as.character(jsub[[2L]]) + if (jsub_lhs_symbol %chin% non_sdvars) { + sdvars = setdiff(sdvars, jsub_lhs_symbol) + } + } + } + + if (missing(.SDcols)) { + ansvars = setdiff(allcols, bynames) # fix for bug #5443 + } else { + # fixes #4089 - if .SDcols was already evaluated, we do not want the order of the columns to change. + ansvars = union(ansvars, setdiff(setdiff(allcols, ansvars), bynames)) + } + non_sdvars = setdiff(ansvars, sdvars) + ansvals = chmatch(ansvars, names_x) + if (verbose) cat(gettextf("New ansvars: %s \n", brackify(ansvars), domain = "R-data.table")) + } else if (length(non_sdvars)) { # we've a situation like DT[, c(sum(V1), lapply(.SD, mean)), by=., .SDcols=...] or # DT[, lapply(.SD, function(x) x *v1), by=, .SDcols=...] etc., ansvars = union(ansvars, non_sdvars) @@ -993,29 +1021,6 @@ replace_dot_alias = function(e) { } # if (!length(ansvars)) Leave ansvars empty. Important for test 607. - # TODO remove as (m)get is now folded in above. - # added 'mget' - fix for #994 - if (any(c("get", "mget") %chin% av)) { - if (verbose) { - cat("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld:", paste(ansvars,collapse=","),"\n") - # get('varname') is too difficult to detect which columns are used in general - # eval(macro) column names are detected via the if jsub[[1]]==eval switch earlier above. - } - - # Do not include z in .SD when dt[, z := {.SD; get("x")}, .SDcols = "y"] (#2326, #2338) - if (jsub %iscall% ':=' && is.symbol(jsub[[2L]])) { - jsub_lhs_symbol = as.character(jsub[[2L]]) - if (jsub_lhs_symbol %chin% non_sdvars) { - sdvars = setdiff(sdvars, jsub_lhs_symbol) - } - } - allcols = c(names_x, xdotprefix, names_i, idotprefix) - ansvars = setdiff(allcols, bynames) # fix for bug #34 - non_sdvars = setdiff(ansvars, sdvars) - ansvals = chmatch(ansvars, names_x) - if (verbose) cat("New:",paste(ansvars,collapse=","),"\n") - } - lhs = NULL newnames = NULL suppPrint = identity diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0ffe3f5bf5..c0caeba602 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16805,3 +16805,8 @@ test(2135.1, DT[ , .NGRP], 1L) test(2135.2, DT[ , .NGRP, by = grp1]$NGRP, rep(4L, 4L)) test(2135.3, DT[ , .NGRP, by = grp2]$NGRP, rep(2L, 2L)) test(2135.4, DT[ , .NGRP, by = .(grp1, grp2)]$NGRP, rep(8L, 8L)) + +# Use of (m)get does not re-order when using .SDcols, #4089 +dt = data.table(x = 1L, y = 2L, z = 3L) +cols = c('x', 'y') +test(2136.1, dt[, (cols) := lapply(.SD[get("x") == 1],function(x){x + 2L}), .SDcols = cols ,by = z], data.table(x = 1L + 2L, y = 2L + 2L, z = 3L))