From 3b3239c4b238c78fd09ad35b2d24a704922a9bea Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Fri, 16 Aug 2019 00:19:44 +0800
Subject: [PATCH 1/6] Closes #3496 and #3766 -- detect & block setkey from
 double-reordering identical columns

vestigial
---
 NEWS.md               |  2 ++
 R/data.table.R        | 18 +++++++++---------
 R/setkey.R            | 13 ++++++++-----
 inst/tests/tests.Rraw | 10 ++++++++++
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index bc6b599c35..867664e00c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -228,6 +228,8 @@
 
 27. `integer64` sum-by-group is now properly optimized, [#1647](https://github.com/Rdatatable/data.table/issues/1647), [#3464](https://github.com/Rdatatable/data.table/issues/3464). Thanks to @mlandry22-h2o for the report.
 
+28. `setkey`/`setkeyv` would mangle the ordering of tables with memory-duplicate (i.e., sharing the same `address`) columns, [#3496](https://github.com/Rdatatable/data.table/issues/3496) and [#3766](https://github.com/Rdatatable/data.table/issues/3766). This is fixed by forcing copies of duplicated columns before reordering. Relatedly, `[` could also produce such columns when subsetting. Thanks @kirillmayantsev and @alex46015 for reporting and @jaapwalhout and @Atrebas helping to debug&isolate the issue.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
diff --git a/R/data.table.R b/R/data.table.R
index 47c6729320..e926612f4b 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1266,25 +1266,25 @@ replace_order = function(isub, verbose, env) {
 
     jval = eval(jsub, SDenv, parent.frame())
     # copy 'jval' when required
-    # More speedup - only check + copy if irows is NULL
     # Temp fix for #921 - check address and copy *after* evaluating 'jval'
-    if (is.null(irows)) {
-      if (!is.list(jval)) { # performance improvement when i-arg is S4, but not list, #1438, Thanks @DCEmilberg.
-        jcpy = address(jval) %in% vapply_1c(SDenv$.SD, address) # %chin% errors when RHS is list()
-        if (jcpy) jval = copy(jval)
-      } else if (address(jval) == address(SDenv$.SD)) {
+    # need to check if copy needed regardless of irows status (#3766)
+    if (!is.list(jval)) { # performance improvement when i-arg is S4, but not list, #1438, Thanks @DCEmilberg.
+      jcpy = address(jval) %chin% vapply_1c(SDenv$.SD, address)
+      if (jcpy) jval = copy(jval)
+    } else {
+      if (address(jval) == address(SDenv$.SD)) {
         jval = copy(jval)
-      } else if ( length(jcpy <- which(vapply_1c(jval, address) %in% vapply_1c(SDenv, address))) ) {
+      } else if ( length(jcpy <- which(vapply_1c(jval, address) %chin% vapply_1c(SDenv, address))) ) {
         for (jidx in jcpy) jval[[jidx]] = copy(jval[[jidx]])
-      } else if (is.call(jsub) && jsub[[1L]] == "get" && is.list(jval)) {
+      } else if (is.call(jsub) && jsub[[1L]] == "get") {
         jval = copy(jval) # fix for #1212
       }
-    } else {
       if (is.data.table(jval)) {
         setattr(jval, '.data.table.locked', NULL) # fix for #1341
         if (!truelength(jval)) alloc.col(jval)
       }
     }
+
     if (!is.null(lhs)) {
       # TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above.
       .Call(Cassign,x,irows,cols,newnames,jval)
diff --git a/R/setkey.R b/R/setkey.R
index fbf8accbe8..0037a9ddb0 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -102,12 +102,15 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
   }
   setattr(x,"index",NULL)   # TO DO: reorder existing indexes likely faster than rebuilding again. Allow optionally. Simpler for now to clear.
   if (length(o)) {
-    if (verbose) {
-      tt = suppressMessages(system.time(.Call(Creorder,x,o)))
-      cat("reorder took", tt["user.self"]+tt["sys.self"], "sec\n")
-    } else {
-      .Call(Creorder,x,o)
+    # table has deep-duplicate columns, e.g. #3496
+    if (any(idx <- duplicated(vapply_1c(x, address)))) {
+      if (verbose) { last.started.at=proc.time(); cat('Deep duplicate columns (identical address in memory) detected... copying', sum(idx), 'columns ...'); flush.console() }
+      for (j in which(idx)) set(x, NULL, j, copy(x[[j]]))
+      if (verbose) { cat("done in", timetaken(last.started.at), "\n"); flush.console() }
     }
+    if (verbose) { last.started.at = proc.time() }
+    .Call(Creorder,x,o)
+    if (verbose) { cat("reorder took", timetaken(last.started.at), "\n"); flush.console() }
   } else {
     if (verbose) cat("x is already ordered by these columns, no need to call reorder\n")
   } # else empty integer() from forderv means x is already ordered by those cols, nothing to do.
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 58c3c63c4a..503bdc8a75 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15671,6 +15671,16 @@ if (test_bit64) {
   test(2077.06, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g]))
 }
 
+# #3766 simple queries can create tables with deep-copy-identical columns
+x = data.table(a=1L, b=c(1L, 4L, 2L, 3L), c=4:1)
+test(2078.1, x[a == 1L, .(b, b2=b)][ , !identical(address(b), address(b2))])
+# #3496 generally need to protect against deep-copy-idential columns in setkey
+x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
+x$b = x$a
+setDT(x)
+test(2078.2, setkey(x, a, verbose=TRUE), output='Deep duplicate columns')
+test(2078.3, x, data.table(a=paste0(1:2), b=paste0(1:2), key='a'))
+
 
 ###################################
 #  Add new tests above this line  #

From 9d4e929fb318a488be9d754f77bcfb1872500f69 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sun, 18 Aug 2019 23:28:43 +0800
Subject: [PATCH 2/6] also closed #2245; test added

---
 NEWS.md               | 2 +-
 inst/tests/tests.Rraw | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 867664e00c..f02e967a30 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -228,7 +228,7 @@
 
 27. `integer64` sum-by-group is now properly optimized, [#1647](https://github.com/Rdatatable/data.table/issues/1647), [#3464](https://github.com/Rdatatable/data.table/issues/3464). Thanks to @mlandry22-h2o for the report.
 
-28. `setkey`/`setkeyv` would mangle the ordering of tables with memory-duplicate (i.e., sharing the same `address`) columns, [#3496](https://github.com/Rdatatable/data.table/issues/3496) and [#3766](https://github.com/Rdatatable/data.table/issues/3766). This is fixed by forcing copies of duplicated columns before reordering. Relatedly, `[` could also produce such columns when subsetting. Thanks @kirillmayantsev and @alex46015 for reporting and @jaapwalhout and @Atrebas helping to debug&isolate the issue.
+28. `setkey`/`setkeyv` would mangle the ordering of tables with memory-duplicate (i.e., sharing the same `address`) columns, [#3496](https://github.com/Rdatatable/data.table/issues/3496) and [#3766](https://github.com/Rdatatable/data.table/issues/3766). This is fixed by forcing copies of duplicated columns before reordering. Relatedly, `[` could also produce such columns when subsetting. Finally, `[` might leave its output with `.data.table.locked`, preventing downstream use of `:=`, [#2245](https://github.com/Rdatatable/data.table/issues/2245). Thanks @kirillmayantsev, @alex46015, and @grayskripko for reporting and @jaapwalhout and @Atrebas helping to debug&isolate the issue.
 
 #### NOTES
 
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 503bdc8a75..ace84c3166 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15680,6 +15680,9 @@ x$b = x$a
 setDT(x)
 test(2078.2, setkey(x, a, verbose=TRUE), output='Deep duplicate columns')
 test(2078.3, x, data.table(a=paste0(1:2), b=paste0(1:2), key='a'))
+## #2245 unset .data.table.locked even if is.null(irows)
+x <- data.table(a=c(0.85, -0.38, 1.19), b=c(0.56, 0.63, -1.30))
+test(2078.4, x[ , round(.SD, 1)][ , c := 8], data.table(a=c(.8, -.4, 1.2), b=c(.6,.6,-1.3), c=8))
 
 
 ###################################

From df3aed31da5e141a686f0df2dfbbe139be080c31 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Mon, 26 Aug 2019 15:35:48 -0700
Subject: [PATCH 3/6] test number typo

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b412fad704..3f6b68b7f4 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15778,7 +15778,7 @@ test(2087.1, x[a == 1L, .(b, b2=b)][ , !identical(address(b), address(b2))])
 x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
 x$b = x$a
 setDT(x)
-test(2078.2, setkey(x, a, verbose=TRUE), output='Deep duplicate columns')
+test(2087.2, setkey(x, a, verbose=TRUE), output='Deep duplicate columns')
 test(2087.3, x, data.table(a=paste0(1:2), b=paste0(1:2), key='a'))
 ## #2245 unset .data.table.locked even if is.null(irows)
 x <- data.table(a=c(0.85, -0.38, 1.19), b=c(0.56, 0.63, -1.30))

From 7637f5130d52037de54fbc230e7ebbb379efa428 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Mon, 26 Aug 2019 18:46:59 -0700
Subject: [PATCH 4/6] created copySharedColumns at C level, called from
 reorder.c at root cause. Faster too by avoiding address() at R level which
 creates character strings for addresses and avoiding hash table of
 duplicated(); only noticeable for very many columns.

---
 R/setkey.R            | 12 ++++++------
 inst/tests/tests.Rraw | 10 ++++++++--
 src/data.table.h      |  1 +
 src/reorder.c         |  2 +-
 src/utils.c           | 36 ++++++++++++++++++++++++++++++++++++
 5 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index 0037a9ddb0..0ffdf23a46 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -41,6 +41,12 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
     setattr(x,"index",NULL)  # setkey(DT,NULL) also clears secondary keys. setindex(DT,NULL) just clears secondary keys.
     return(invisible(x))
   }
+  if (!missing(verbose)) {
+    stopifnot(isTRUEorFALSE(verbose))
+    # set the global verbose option because that is fetched from C code without having to pass it through
+    oldverbose = options(datatable.verbose=verbose)
+    on.exit(options(oldverbose))
+  }
   if (!is.data.table(x)) stop("x is not a data.table")
   if (!is.character(cols)) stop("cols is not a character vector. Please see further information in ?setkey.")
   if (physical && identical(attr(x, ".data.table.locked", exact=TRUE),TRUE)) stop("Setting a physical key on .SD is reserved for possible future use; to modify the original data's order by group. Try setindex() instead. Or, set*(copy(.SD)) as a (slow) last resort.")
@@ -102,12 +108,6 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
   }
   setattr(x,"index",NULL)   # TO DO: reorder existing indexes likely faster than rebuilding again. Allow optionally. Simpler for now to clear.
   if (length(o)) {
-    # table has deep-duplicate columns, e.g. #3496
-    if (any(idx <- duplicated(vapply_1c(x, address)))) {
-      if (verbose) { last.started.at=proc.time(); cat('Deep duplicate columns (identical address in memory) detected... copying', sum(idx), 'columns ...'); flush.console() }
-      for (j in which(idx)) set(x, NULL, j, copy(x[[j]]))
-      if (verbose) { cat("done in", timetaken(last.started.at), "\n"); flush.console() }
-    }
     if (verbose) { last.started.at = proc.time() }
     .Call(Creorder,x,o)
     if (verbose) { cat("reorder took", timetaken(last.started.at), "\n"); flush.console() }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 3f6b68b7f4..2755fe2205 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15778,8 +15778,14 @@ test(2087.1, x[a == 1L, .(b, b2=b)][ , !identical(address(b), address(b2))])
 x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
 x$b = x$a
 setDT(x)
-test(2087.2, setkey(x, a, verbose=TRUE), output='Deep duplicate columns')
-test(2087.3, x, data.table(a=paste0(1:2), b=paste0(1:2), key='a'))
+test(2087.2, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), key="a"),
+             output='Found and copied 1 column with a shared memory address')
+x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
+x$b = x$a
+x$c = x$a
+setDT(x)
+test(2087.3, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"),
+             output='Found and copied 2 columns with a shared memory address')
 ## #2245 unset .data.table.locked even if is.null(irows)
 x <- data.table(a=c(0.85, -0.38, 1.19), b=c(0.56, 0.63, -1.30))
 test(2087.4, x[ , round(.SD, 1)][ , c := 8], data.table(a=c(.8, -.4, 1.2), b=c(.6,.6,-1.3), c=8))
diff --git a/src/data.table.h b/src/data.table.h
index 183d90bee8..469b9e76e7 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -216,4 +216,5 @@ void coerceFill(SEXP fill, double *dfill, int32_t *ifill, int64_t *i64fill);
 SEXP coerceFillR(SEXP fill);
 bool INHERITS(SEXP x, SEXP char_);
 bool Rinherits(SEXP x, SEXP char_);
+void copySharedColumns(SEXP x);
 
diff --git a/src/reorder.c b/src/reorder.c
index e65a58f26c..ec3824efa0 100644
--- a/src/reorder.c
+++ b/src/reorder.c
@@ -21,6 +21,7 @@ SEXP reorder(SEXP x, SEXP order)
         maxSize=SIZEOF(v);
       if (ALTREP(v)) SET_VECTOR_ELT(x,i,duplicate(v));  // expand compact vector in place ready for reordering by reference
     }
+    copySharedColumns(x); // otherwise two columns which point to the same vector would be reordered and then re-reordered, issues linked in PR#3768
   } else {
     if (SIZEOF(x)!=4 && SIZEOF(x)!=8 && SIZEOF(x)!=16)
       error("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (SIZEOF=%d)", type2char(TYPEOF(x)), SIZEOF(x));
@@ -34,7 +35,6 @@ SEXP reorder(SEXP x, SEXP order)
   int nprotect = 0;
   if (ALTREP(order)) { order=PROTECT(duplicate(order)); nprotect++; }  // TODO: how to fetch range of ALTREP compact vector
 
-
   const int *restrict idx = INTEGER(order);
   int i=0;
   while (i<nrow && idx[i] == i+1) i++;
diff --git a/src/utils.c b/src/utils.c
index f17535c318..a369a8f7bc 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -173,3 +173,39 @@ bool Rinherits(SEXP x, SEXP char_) {
  return ans;
 }
 
+void copySharedColumns(SEXP x) {
+  const int ncol = length(x);
+  if (!isNewList(x) || ncol==1) return;
+  bool *shared = (bool *)R_alloc(ncol, sizeof(bool)); // on R heap in case duplicate() fails
+  int *savetl = (int *)R_alloc(ncol, sizeof(int));  // on R heap for convenience but could be a calloc
+  int nShared = 0;
+  const SEXP *xp = VECTOR_PTR(x);
+  for (int i=0; i<ncol; ++i) {
+    SEXP thiscol = xp[i];
+    const int thistl = TRUELENGTH(thiscol);
+    if (thistl<0) {
+      shared[i] = true;
+      nShared++;
+      // do not duplicate() here as the duplicate() might fail. Careful to restore tl first to all columns.
+      // Aside: thistl is which column shares the same address as this one in case that's ever useful in future.
+    } else {
+      shared[i] = false;
+      savetl[i] = thistl;  // these are vectors which are all expected to have tl, unlike CHARSXP which often don't (savetl() has CHARSXP in mind)
+      SET_TRUELENGTH(thiscol, -i-1);
+    }
+  }
+  // now we know nShared and which ones they are (if any), restore original tl back to all columns
+  for (int i=0; i<ncol; ++i) {
+    if (!shared[i]) SET_TRUELENGTH(VECTOR_ELT(x, i), savetl[i]);
+  }
+  // now that truelength has been restored for all columns, we can finally call duplicate()
+  if (nShared) {
+    for (int i=0; i<ncol; ++i) {
+      if (shared[i])
+        SET_VECTOR_ELT(x, i, duplicate(VECTOR_ELT(x, i)));
+    }
+    if (GetVerbose()) Rprintf("Found and copied %d column%s with a shared memory address\n", nShared, nShared>1?"s":"");
+    // GetVerbose() (slightly expensive call of all options) called here only when needed
+  }
+}
+

From 5b59f6e17b8f1cfbcd4fdcefc603eb432d71e61a Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Tue, 27 Aug 2019 16:13:45 +0800
Subject: [PATCH 5/6] remove ancillary issue fix to be filed independently

---
 NEWS.md               |  2 +-
 R/data.table.R        | 13 +++++++------
 inst/tests/tests.Rraw |  8 +-------
 3 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 8b29a90967..7baa5af5ab 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -234,7 +234,7 @@
 
 30. `groupingsets` functions now properly handle alone special symbols when using an empty set to group by, [#3653](https://github.com/Rdatatable/data.table/issues/3653). Thanks to @Henrik-P for the report.
 
-31. `setkey`/`setkeyv` would mangle the ordering of tables with memory-duplicate (i.e., sharing the same `address`) columns, [#3496](https://github.com/Rdatatable/data.table/issues/3496) and [#3766](https://github.com/Rdatatable/data.table/issues/3766). This is fixed by forcing copies of duplicated columns before reordering. Relatedly, `[` could also produce such columns when subsetting. Finally, `[` might leave its output with `.data.table.locked`, preventing downstream use of `:=`, [#2245](https://github.com/Rdatatable/data.table/issues/2245). Thanks @kirillmayantsev, @alex46015, and @grayskripko for reporting and @jaapwalhout and @Atrebas helping to debug&isolate the issue.
+31. `setkey`/`setkeyv` would mangle the ordering of tables with memory-duplicate (i.e., sharing the same `address`) columns, [#3496](https://github.com/Rdatatable/data.table/issues/3496) and [#3766](https://github.com/Rdatatable/data.table/issues/3766). This is fixed by forcing copies of duplicated columns before reordering. Relatedly, `[` could also produce such columns when subsetting. Thanks @kirillmayantsev and @alex46015 for reporting and @jaapwalhout and @Atrebas helping to debug&isolate the issue.
 
 #### NOTES
 
diff --git a/R/data.table.R b/R/data.table.R
index e926612f4b..3885eef2ed 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1266,19 +1266,20 @@ replace_order = function(isub, verbose, env) {
 
     jval = eval(jsub, SDenv, parent.frame())
     # copy 'jval' when required
+    # More speedup - only check + copy if irows is NULL
     # Temp fix for #921 - check address and copy *after* evaluating 'jval'
-    # need to check if copy needed regardless of irows status (#3766)
-    if (!is.list(jval)) { # performance improvement when i-arg is S4, but not list, #1438, Thanks @DCEmilberg.
-      jcpy = address(jval) %chin% vapply_1c(SDenv$.SD, address)
-      if (jcpy) jval = copy(jval)
-    } else {
-      if (address(jval) == address(SDenv$.SD)) {
+    if (is.null(irows)) {
+      if (!is.list(jval)) { # performance improvement when i-arg is S4, but not list, #1438, Thanks @DCEmilberg.
+        jcpy = address(jval) %in% vapply_1c(SDenv$.SD, address) # %chin% errors when RHS is list()
+        if (jcpy) jval = copy(jval)
+      } else if (address(jval) == address(SDenv$.SD)) {
         jval = copy(jval)
       } else if ( length(jcpy <- which(vapply_1c(jval, address) %chin% vapply_1c(SDenv, address))) ) {
         for (jidx in jcpy) jval[[jidx]] = copy(jval[[jidx]])
       } else if (is.call(jsub) && jsub[[1L]] == "get") {
         jval = copy(jval) # fix for #1212
       }
+    } else {
       if (is.data.table(jval)) {
         setattr(jval, '.data.table.locked', NULL) # fix for #1341
         if (!truelength(jval)) alloc.col(jval)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 2755fe2205..1e874a2461 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15771,10 +15771,7 @@ test(2086.04, DT[ , sum(a), keyby = list()], data.table(V1=55L))
 test(2086.05, DT[ , sum(a), by = character()], data.table(V1=55L))
 test(2086.06, DT[ , sum(a), keyby = character()], data.table(V1=55L))
 
-# #3766 simple queries can create tables with deep-copy-identical columns
-x = data.table(a=1L, b=c(1L, 4L, 2L, 3L), c=4:1)
-test(2087.1, x[a == 1L, .(b, b2=b)][ , !identical(address(b), address(b2))])
-# #3496 generally need to protect against deep-copy-idential columns in setkey
+# #3496/#3766 setkey re-reorders columns which are deep-copy-identical
 x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
 x$b = x$a
 setDT(x)
@@ -15786,9 +15783,6 @@ x$c = x$a
 setDT(x)
 test(2087.3, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"),
              output='Found and copied 2 columns with a shared memory address')
-## #2245 unset .data.table.locked even if is.null(irows)
-x <- data.table(a=c(0.85, -0.38, 1.19), b=c(0.56, 0.63, -1.30))
-test(2087.4, x[ , round(.SD, 1)][ , c := 8], data.table(a=c(.8, -.4, 1.2), b=c(.6,.6,-1.3), c=8))
 
 
 ###################################

From f7031ba848bd4d074090779d45ccf4d6540c703b Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 27 Aug 2019 12:06:49 -0700
Subject: [PATCH 6/6] tweaks to news item and test

---
 NEWS.md               | 2 +-
 inst/tests/tests.Rraw | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 7baa5af5ab..bbcad04707 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -234,7 +234,7 @@
 
 30. `groupingsets` functions now properly handle alone special symbols when using an empty set to group by, [#3653](https://github.com/Rdatatable/data.table/issues/3653). Thanks to @Henrik-P for the report.
 
-31. `setkey`/`setkeyv` would mangle the ordering of tables with memory-duplicate (i.e., sharing the same `address`) columns, [#3496](https://github.com/Rdatatable/data.table/issues/3496) and [#3766](https://github.com/Rdatatable/data.table/issues/3766). This is fixed by forcing copies of duplicated columns before reordering. Relatedly, `[` could also produce such columns when subsetting. Thanks @kirillmayantsev and @alex46015 for reporting and @jaapwalhout and @Atrebas helping to debug&isolate the issue.
+31. A `data.table` created using `setDT()` on a `data.frame` containing identical columns referencing each other would cause `setkey()` to return incorrect results, [#3496](https://github.com/Rdatatable/data.table/issues/3496) and [#3766](https://github.com/Rdatatable/data.table/issues/3766). Thanks @kirillmayantsev and @alex46015 for reporting, and @jaapwalhout and @Atrebas for helping to debug and isolate the issue.
 
 #### NOTES
 
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 1e874a2461..de3fe0282e 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15771,7 +15771,10 @@ test(2086.04, DT[ , sum(a), keyby = list()], data.table(V1=55L))
 test(2086.05, DT[ , sum(a), by = character()], data.table(V1=55L))
 test(2086.06, DT[ , sum(a), keyby = character()], data.table(V1=55L))
 
-# #3496/#3766 setkey re-reorders columns which are deep-copy-identical
+# simple queries can create tables with columns sharing the same address, #3766
+x = data.table(a=1L, b=c(1L, 4L, 2L, 3L), c=4:1)
+test(2087.1, x[a == 1L, .(b, b2=b)][ , identical(address(b), address(b2))])
+# setkey detects and copies shared address columns, #3496
 x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
 x$b = x$a
 setDT(x)