From c46d908b9ae9c1a6afd0754a5e1e252f5e0cc601 Mon Sep 17 00:00:00 2001
From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com>
Date: Wed, 29 Jan 2020 23:23:29 -0500
Subject: [PATCH 1/6] Prevent get reordering on assignment

This also includes slight refactoring to eliminate one *TODO* comment
---
 R/data.table.R | 57 +++++++++++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 1fee0396a9..352f8b142f 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -968,7 +968,39 @@ replace_dot_alias = function(e) {
         }
         # fix for long standing FR/bug, #495 and #484
         allcols = c(names_x, xdotprefix, names_i, idotprefix)
-        if ( length(non_sdvars <- setdiff(intersect(av, allcols), c(bynames, ansvars))) ) {
+        non_sdvars <- setdiff(intersect(av, allcols), c(bynames, ansvars))
+        
+        # added 'mget' - fix for #994
+        if (any(c("get", "mget") %chin% av)){
+          # get('varname') is too difficult to detect which columns are used in general
+          # eval(macro) column names are detected via the  if jsub[[1]]==eval switch earlier above.
+          b_warn = FALSE 
+          if (is.call(jsub) && length(jsub[[1L]]) == 1L && jsub[[1L]] == ":="){
+            if (is.symbol(jsub[[2L]])) {
+              jsub_lhs_symbol = as.character(jsub[[2L]])
+              if (jsub_lhs_symbol %chin% non_sdvars) {
+                sdvars = setdiff(sdvars, jsub_lhs_symbol)
+              }
+            } else {
+              # fix for #4089 - if all names are already accounted for, we don't need to re-extract the names for the (m)get call
+              b_warn = is.null(names_i) && all(names_x %chin% c(ansvars, allbyvars))
+            }
+          }
+          if (!b_warn) {
+            if (verbose) 
+              cat("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld:", paste(ansvars,collapse=","),"\n")
+            ansvars = setdiff(allcols, bynames) # fix for bug #5443
+            non_sdvars = setdiff(ansvars, sdvars)
+            ansvals = chmatch(ansvars, names_x)
+            if (verbose) 
+              cat("New:",paste(ansvars,collapse=","),"\n")
+          } else { ## TODO remove this else statement in Jan. 2023
+            newvars = setdiff(allcols, bynames) # checking if old cols c("y", "x") became c("x", "y")
+            if (length(newvars) == length(ansvars) && !all(newvars == ansvars)) {
+              warning("(m)get found in j with update assignment (i.e., `(cols) := ...`). j previously was re-ordered. See Issue #4089")
+            }
+          } 
+        } else  if (length(non_sdvars)) {
           # we've a situation like DT[, c(sum(V1), lapply(.SD, mean)), by=., .SDcols=...] or
           # DT[, lapply(.SD, function(x) x *v1), by=, .SDcols=...] etc.,
           ansvars = union(ansvars, non_sdvars)
@@ -988,29 +1020,6 @@ replace_dot_alias = function(e) {
       }
       # if (!length(ansvars)) Leave ansvars empty. Important for test 607.
 
-      # TODO remove as (m)get is now folded in above.
-      # added 'mget' - fix for #994
-      if (any(c("get", "mget") %chin% av)) {
-        if (verbose) {
-          cat("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld:", paste(ansvars,collapse=","),"\n")
-          # get('varname') is too difficult to detect which columns are used in general
-          # eval(macro) column names are detected via the  if jsub[[1]]==eval switch earlier above.
-        }
-
-        # Do not include z in .SD when dt[, z := {.SD; get("x")}, .SDcols = "y"] (#2326, #2338)
-        if (is.call(jsub) && length(jsub[[1L]]) == 1L && jsub[[1L]] == ":=" && is.symbol(jsub[[2L]])) {
-          jsub_lhs_symbol = as.character(jsub[[2L]])
-            if (jsub_lhs_symbol %chin% non_sdvars) {
-            sdvars = setdiff(sdvars, jsub_lhs_symbol)
-          }
-        }
-        allcols = c(names_x, xdotprefix, names_i, idotprefix)
-        ansvars = setdiff(allcols, bynames) # fix for bug #5443
-        non_sdvars = setdiff(ansvars, sdvars)
-        ansvals = chmatch(ansvars, names_x)
-        if (verbose) cat("New:",paste(ansvars,collapse=","),"\n")
-      }
-
       lhs = NULL
       newnames = NULL
       suppPrint = identity

From e1ccde01491056847ee13a482e77579e1c338a73 Mon Sep 17 00:00:00 2001
From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com>
Date: Wed, 29 Jan 2020 23:30:30 -0500
Subject: [PATCH 2/6] Update NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index d1b6b90bcd..facde8c113 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -79,6 +79,7 @@ unit = "s")
 
 3. Dispatch of `first` and `last` functions now properly works again for `xts` objects, [#4053](https://github.com/Rdatatable/data.table/issues/4053). Thanks to @ethanbsmith for reporting.
 
+4. When updating by reference, the use of `get` could result in columns being re-ordered silently, [#4089](https://github.com/Rdatatable/data.table/issues/4089). Thanks to @dmongin for reporting and Cole Miller for the fix.
 ## NOTES
 
 1. `as.IDate`, `as.ITime`, `second`, `minute`, and `hour` now recognize UTC equivalents for speed: GMT, GMT-0, GMT+0, GMT0, Etc/GMT, and Etc/UTC, [#4116](https://github.com/Rdatatable/data.table/issues/4116).

From fad5b9713530338b0b905f2903572f5a5b39444a Mon Sep 17 00:00:00 2001
From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com>
Date: Wed, 29 Jan 2020 23:32:39 -0500
Subject: [PATCH 3/6] better warning

---
 R/data.table.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/data.table.R b/R/data.table.R
index 352f8b142f..157192a93d 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -997,7 +997,7 @@ replace_dot_alias = function(e) {
           } else { ## TODO remove this else statement in Jan. 2023
             newvars = setdiff(allcols, bynames) # checking if old cols c("y", "x") became c("x", "y")
             if (length(newvars) == length(ansvars) && !all(newvars == ansvars)) {
-              warning("(m)get found in j with update assignment (i.e., `(cols) := ...`). j previously was re-ordered. See Issue #4089")
+              warning("(m)get found in j with update assignment (i.e., `(cols) := ...`). j column order was re-ordered prior to version 1.12.9. See Issue #4089")
             }
           } 
         } else  if (length(non_sdvars)) {

From 47e53eacb8ef747f30148c7f9c943e08cbd48acf Mon Sep 17 00:00:00 2001
From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com>
Date: Sat, 1 Feb 2020 19:20:49 -0500
Subject: [PATCH 4/6] Update data.table.R

---
 R/data.table.R | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 1fc39cc40b..8ea33d28d7 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -968,39 +968,35 @@ replace_dot_alias = function(e) {
         }
         # fix for long standing FR/bug, #495 and #484
         allcols = c(names_x, xdotprefix, names_i, idotprefix)
-        non_sdvars <- setdiff(intersect(av, allcols), c(bynames, ansvars))
-        
+        non_sdvars = setdiff(intersect(av, allcols), c(bynames, ansvars))
+      
         # added 'mget' - fix for #994
         if (any(c("get", "mget") %chin% av)){
-          # get('varname') is too difficult to detect which columns are used in general
-          # eval(macro) column names are detected via the  if jsub[[1]]==eval switch earlier above.
-          b_warn = FALSE 
+          if (verbose) 
+            cat(gettextf("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld ansvars: %s \n", paste(ansvars,collapse=","), domain = "R-data.table"))
+            # get('varname') is too difficult to detect which columns are used in general
+            # eval(macro) column names are detected via the  if jsub[[1]]==eval switch earlier above.
+
+          # Do not include z in .SD when dt[, z := {.SD; get("x")}, .SDcols = "y"] (#2326, #2338)
           if (is.call(jsub) && length(jsub[[1L]]) == 1L && jsub[[1L]] == ":="){
             if (is.symbol(jsub[[2L]])) {
               jsub_lhs_symbol = as.character(jsub[[2L]])
               if (jsub_lhs_symbol %chin% non_sdvars) {
                 sdvars = setdiff(sdvars, jsub_lhs_symbol)
               }
-            } else {
-              # fix for #4089 - if all names are already accounted for, we don't need to re-extract the names for the (m)get call
-              b_warn = is.null(names_i) && all(names_x %chin% c(ansvars, allbyvars))
-            }
+            } 
           }
-          if (!b_warn) {
-            if (verbose) 
-              cat("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld:", paste(ansvars,collapse=","),"\n")
+          
+          if (missing(.SDcols)) {
             ansvars = setdiff(allcols, bynames) # fix for bug #5443
-            non_sdvars = setdiff(ansvars, sdvars)
-            ansvals = chmatch(ansvars, names_x)
-            if (verbose) 
-              cat("New:",paste(ansvars,collapse=","),"\n")
-          } else { ## TODO remove this else statement in Jan. 2023
-            newvars = setdiff(allcols, bynames) # checking if old cols c("y", "x") became c("x", "y")
-            if (length(newvars) == length(ansvars) && !all(newvars == ansvars)) {
-              warning("(m)get found in j with update assignment (i.e., `(cols) := ...`). j column order was re-ordered prior to version 1.12.9. See Issue #4089")
-            }
-          } 
-        } else  if (length(non_sdvars)) {
+          } else {
+            # fixes #4089 - if .SDcols was already evaluated, we do not want the order of the columns to change.
+            ansvars = union(ansvars, setdiff(setdiff(allcols, ansvars), bynames)) 
+          }
+          non_sdvars = setdiff(ansvars, sdvars)
+          ansvals = chmatch(ansvars, names_x)
+          if (verbose) cat(gettextf("New ansvars: %s \n",paste(ansvars,collapse=","), domain = "R-data.table"))
+        } else if (length(non_sdvars)) {
           # we've a situation like DT[, c(sum(V1), lapply(.SD, mean)), by=., .SDcols=...] or
           # DT[, lapply(.SD, function(x) x *v1), by=, .SDcols=...] etc.,
           ansvars = union(ansvars, non_sdvars)

From 767b20e682d31cadd9494059c6e2f6fc7df591bf Mon Sep 17 00:00:00 2001
From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com>
Date: Sat, 1 Feb 2020 19:32:07 -0500
Subject: [PATCH 5/6] changed to brackify

---
 R/data.table.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 8ea33d28d7..4ed2083f55 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -973,7 +973,7 @@ replace_dot_alias = function(e) {
         # added 'mget' - fix for #994
         if (any(c("get", "mget") %chin% av)){
           if (verbose) 
-            cat(gettextf("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld ansvars: %s \n", paste(ansvars,collapse=","), domain = "R-data.table"))
+            cat(gettextf("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld ansvars: %s \n", brackify(ansvars), domain = "R-data.table"))
             # get('varname') is too difficult to detect which columns are used in general
             # eval(macro) column names are detected via the  if jsub[[1]]==eval switch earlier above.
 
@@ -995,7 +995,7 @@ replace_dot_alias = function(e) {
           }
           non_sdvars = setdiff(ansvars, sdvars)
           ansvals = chmatch(ansvars, names_x)
-          if (verbose) cat(gettextf("New ansvars: %s \n",paste(ansvars,collapse=","), domain = "R-data.table"))
+          if (verbose) cat(gettextf("New ansvars: %s \n", brackify(ansvars), domain = "R-data.table"))
         } else if (length(non_sdvars)) {
           # we've a situation like DT[, c(sum(V1), lapply(.SD, mean)), by=., .SDcols=...] or
           # DT[, lapply(.SD, function(x) x *v1), by=, .SDcols=...] etc.,

From 4d11fe0984b9c380b1dc598a27a4ef616a1cfe5c Mon Sep 17 00:00:00 2001
From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com>
Date: Sun, 2 Feb 2020 07:04:24 -0500
Subject: [PATCH 6/6] Update tests.Rraw

---
 inst/tests/tests.Rraw | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index f6d17a0762..62f7a66433 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -16770,6 +16770,10 @@ test(2132.2, fifelse(TRUE, 1, s2),       error = "S4 class objects (except nanot
 test(2132.3, fcase(TRUE, s1, FALSE, s2), error = "S4 class objects (except nanotime) are not supported. Please see https://github.com/Rdatatable/data.table/issues/4131.")
 rm(s1, s2, class2132)
 
+# Use of (m)get does not re-order when using .SDcols, #4089
+dt = data.table(x = 1L, y = 2L, z = 3L)
+cols = c('x', 'y')
+test(2133.1, dt[, (cols) := lapply(.SD[get("x") == 1],function(x){x + 2L}), .SDcols = cols ,by = z], data.table(x = 1L + 2L, y = 2L + 2L, z = 3L))
 
 ########################
 #  Add new tests here  #