From bbe734fa200ad5dd4e12439b7ac82544b874c4be Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 24 Aug 2019 05:04:47 +0800
Subject: [PATCH 1/6] Closes #3209 -- g[[

---
 R/data.table.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 47c6729320..0d79ed2bdf 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1512,7 +1512,7 @@ replace_order = function(isub, verbose, env) {
         jvnames = ansvarsnew
       }
     } else if (length(as.character(jsub[[1L]])) == 1L) {  # Else expect problems with <jsub[[1L]] == >
-      subopt = length(jsub) == 3L && jsub[[1L]] == "[" && (is.numeric(jsub[[3L]]) || jsub[[3L]] == ".N")
+      subopt = length(jsub) == 3L && (jsub[[1L]] == "[" || jsub[[1L]] == "[[") && (is.numeric(jsub[[3L]]) || jsub[[3L]] == ".N")
       headopt = jsub[[1L]] == "head" || jsub[[1L]] == "tail"
       firstopt = jsub[[1L]] == "first" || jsub[[1L]] == "last" # fix for #2030
       if ((length(jsub) >= 2L && jsub[[2L]] == ".SD") &&
@@ -1628,7 +1628,7 @@ replace_order = function(isub, verbose, env) {
         }
       } else {
         # Apply GForce
-        gfuns = c("sum", "prod", "mean", "median", "var", "sd", ".N", "min", "max", "head", "last", "first", "tail", "[") # added .N for #5760
+        gfuns = c("sum", "prod", "mean", "median", "var", "sd", ".N", "min", "max", "head", "last", "first", "tail", "[", "[[") # added .N for #5760
         .ok = function(q) {
           if (dotN(q)) return(TRUE) # For #5760
           # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls where x is a column of .SD
@@ -1640,7 +1640,7 @@ replace_order = function(isub, verbose, env) {
           # otherwise there must be three arguments, and only in two cases:
           #   1) head/tail(x, 1) or 2) x[n], n>0
           length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) &&
-            ( (q1c %chin% c("head", "tail") && q3==1L) || (q1c == "[" && q3>0L) )
+            ( (q1c %chin% c("head", "tail") && q3==1L) || ((q1c == "[" || q1c == "[[") && q3>0L) )
         }
         if (jsub[[1L]]=="list") {
           GForce = TRUE
@@ -2790,7 +2790,7 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) {
 }
 
 # GForce functions
-`g[` = function(x, n) .Call(Cgnthvalue, x, as.integer(n)) # n is of length=1 here.
+`g[` = `g[[` = function(x, n) .Call(Cgnthvalue, x, as.integer(n)) # n is of length=1 here.
 ghead = function(x, n) .Call(Cghead, x, as.integer(n)) # n is not used at the moment
 gtail = function(x, n) .Call(Cgtail, x, as.integer(n)) # n is not used at the moment
 gfirst = function(x) .Call(Cgfirst, x)

From d8dfc0df679d3f5f82b4b8cf57181e672ee38caa Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 24 Aug 2019 05:11:04 +0800
Subject: [PATCH 2/6] tidy code to simplify adding GForce functions

gforce_ok back to local scope
---
 R/data.table.R | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 0d79ed2bdf..b540532b68 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1628,8 +1628,7 @@ replace_order = function(isub, verbose, env) {
         }
       } else {
         # Apply GForce
-        gfuns = c("sum", "prod", "mean", "median", "var", "sd", ".N", "min", "max", "head", "last", "first", "tail", "[", "[[") # added .N for #5760
-        .ok = function(q) {
+        .gforce_ok = function(q) {
           if (dotN(q)) return(TRUE) # For #5760
           # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls where x is a column of .SD
           # is.symbol() is for #1369, #1974 and #2949
@@ -1644,8 +1643,8 @@ replace_order = function(isub, verbose, env) {
         }
         if (jsub[[1L]]=="list") {
           GForce = TRUE
-          for (ii in seq_along(jsub)[-1L]) if (!.ok(jsub[[ii]])) GForce = FALSE
-        } else GForce = .ok(jsub)
+          for (ii in seq_along(jsub)[-1L]) if (!.gforce_ok(jsub[[ii]])) {GForce = FALSE; break}
+        } else GForce = .gforce_ok(jsub)
         if (GForce) {
           if (jsub[[1L]]=="list")
             for (ii in seq_along(jsub)[-1L]) {
@@ -2790,6 +2789,12 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) {
 }
 
 # GForce functions
+#   to add a new function to GForce (from the R side -- the easy part!):
+#     (1) add it to gfuns
+#     (2) edit .gforce_ok (defined within `[`) to catch which j will apply the new function
+#     (3) define the gfun = function() R wrapper
+gfuns = c("[", "[[", "head", "tail", "first", "last", "sum", "mean", "prod",
+          "median", "min", "max", "var", "sd", ".N") # added .N for #5760
 `g[` = `g[[` = function(x, n) .Call(Cgnthvalue, x, as.integer(n)) # n is of length=1 here.
 ghead = function(x, n) .Call(Cghead, x, as.integer(n)) # n is not used at the moment
 gtail = function(x, n) .Call(Cgtail, x, as.integer(n)) # n is not used at the moment

From 653d9ce3596c1f39b35a8dd23a4ed4d49f09c623 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 24 Aug 2019 22:50:24 +0800
Subject: [PATCH 3/6] add test, NEWS

---
 NEWS.md               |  2 ++
 inst/tests/tests.Rraw | 11 ++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 6b6999c7a6..b3e582d2ea 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -232,6 +232,8 @@
 
 29. `integer64` defined on a subset of a new column would leave "gibberish" on the remaining rows, [#3723](https://github.com/Rdatatable/data.table/issues/3723). A bug in `rbindlist` with the same root cause was also fixed, [#1459](https://github.com/Rdatatable/data.table/issues/1459). Thanks @shrektan and @jangorecki for the reports.
 
+30. `[[` calls by group _on vectors_ now optimized with `GForce`, [#3209](https://github.com/Rdatatable/data.table/issues/3209). Thanks @renkun-ken for the suggestion. Optimization for list columns has been skipped for now; please file a feature request with your use case if this would benefit you.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index aade8300e6..8709e76ec8 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7977,6 +7977,16 @@ test(1581.11, ans2 <- dt[x %in% letters[15:20], .SD[2], by=x, verbose=TRUE],
 test(1581.12, ans1, ans2)
 options(datatable.optimize = Inf)
 
+# #3209 g[[
+options(datatable.optimize=1L)
+test(1581.13, ans1 <- dt[x %in% letters[15:20], i1[[2]], by=x, verbose=TRUE],
+              output = "(GForce FALSE)")
+options(datatable.optimize=2L)
+test(1581.14, ans2 <- dt[x %in% letters[15:20], i1[[2]], by=x, verbose=TRUE],
+              output = "GForce optimized j")
+test(1581.15, ans1, ans2)
+options(datatable.optimize = Inf)
+
 # handle NULL value correctly #1429
 test(1582, uniqueN(NULL), 0L)
 
@@ -15738,7 +15748,6 @@ if (test_bit64) {
   test(2083.2, rbind(data.table(a=1:2, b=as.integer64(c(1,NA))), data.table(a=3L), fill=TRUE)$b, as.integer64(c(1, NA, NA)))
 }
 
-
 ###################################
 #  Add new tests above this line  #
 ###################################

From ea47e4bb262296778d5d974cb0f7e8abbc3722b7 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 24 Aug 2019 23:15:14 +0800
Subject: [PATCH 4/6] opt=Inf needed

---
 inst/tests/tests.Rraw | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 8709e76ec8..7a6e17b160 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7981,11 +7981,10 @@ options(datatable.optimize = Inf)
 options(datatable.optimize=1L)
 test(1581.13, ans1 <- dt[x %in% letters[15:20], i1[[2]], by=x, verbose=TRUE],
               output = "(GForce FALSE)")
-options(datatable.optimize=2L)
+options(datatable.optimize=Inf)
 test(1581.14, ans2 <- dt[x %in% letters[15:20], i1[[2]], by=x, verbose=TRUE],
               output = "GForce optimized j")
 test(1581.15, ans1, ans2)
-options(datatable.optimize = Inf)
 
 # handle NULL value correctly #1429
 test(1582, uniqueN(NULL), 0L)

From 77115fc506548446866a20d65b2fa25c8559b75f Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Fri, 30 Aug 2019 11:06:38 -0700
Subject: [PATCH 5/6] d1 not i1 because the i1 was removed from dt on line
 above between test 1580 and 1581.1

---
 inst/tests/tests.Rraw | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e84ecf7ead..f1e1bf7718 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7979,10 +7979,10 @@ options(datatable.optimize = Inf)
 
 # #3209 g[[
 options(datatable.optimize=1L)
-test(1581.13, ans1 <- dt[x %in% letters[15:20], i1[[2]], by=x, verbose=TRUE],
+test(1581.13, ans1 <- dt[x %in% letters[15:20], d1[[2]], by=x, verbose=TRUE],
               output = "(GForce FALSE)")
 options(datatable.optimize=Inf)
-test(1581.14, ans2 <- dt[x %in% letters[15:20], i1[[2]], by=x, verbose=TRUE],
+test(1581.14, ans2 <- dt[x %in% letters[15:20], d1[[2]], by=x, verbose=TRUE],
               output = "GForce optimized j")
 test(1581.15, ans1, ans2)
 

From dca85aec1d860885e0d4e1b3b4eeab990b03c2d0 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Fri, 30 Aug 2019 11:18:56 -0700
Subject: [PATCH 6/6] reinstated request in news item to file FR for this
 optimization on list columns

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 4cc891b682..5300aaa0eb 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -166,7 +166,7 @@
 
 25. `setnames(DT, old, new)` now omits any `old==new` to save redundant key and index name updates, [#3783](https://github.com/Rdatatable/data.table/issues/3783). `setnames(DT, new)` (i.e. not providing `old`) already omitted any column name updates where `names(DT)==new`; e.g. `setnames(DT, gsub('^_', '', names(DT)))` exits early if no columns start with `_`.
 
-26. `[[` by group is now optimized, [#3209](https://github.com/Rdatatable/data.table/issues/3209). Thanks @renkun-ken for the suggestion. `[` by group was already optimized.
+26. `[[` by group is now optimized for regular vectors (not type list), [#3209](https://github.com/Rdatatable/data.table/issues/3209). Thanks @renkun-ken for the suggestion. `[` by group was already optimized. Please file a feature request if you would like this optimization for list columns.
 
 #### BUG FIXES