From f09da12e15e1773299797a5d9a5422f8ba265bfe Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Thu, 10 Jan 2019 12:16:26 +0530
Subject: [PATCH 1/2] merge retains class of first argument, closes #1378

---
 NEWS.md               |  2 ++
 R/merge.R             | 11 +++++------
 inst/tests/tests.Rraw |  8 ++++++--
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index d2fb150451..20fc570dbf 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -92,6 +92,8 @@
 
 6. The one and only usage of `UNPROTECT_PTR()` has been removed, [#3232](https://github.com/Rdatatable/data.table/issues/3232). Thanks to Tomas Kalibera's investigation and advice here: https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html
 
+7. `merge` method now retains class of first argument passed to method, close [#1378](https://github.com/Rdatatable/data.table/issues/1378). Thanks to @michaelquinn32 for reopening.
+
 
 ### Changes in v1.11.8  (30 Sep 2018)
 
diff --git a/R/merge.R b/R/merge.R
index 332a865e07..087d40967c 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -4,6 +4,7 @@ merge.data.table <- function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FA
     stop("Argument 'sort' should be logical TRUE/FALSE")
   if (!no.dups %in% c(TRUE, FALSE))
     stop("Argument 'no.dups' should be logical TRUE/FALSE")
+  class_x = class(x)
   if (!is.data.table(y)) {
     y = as.data.table(y)
     if (missing(by) && missing(by.x)) {
@@ -60,11 +61,11 @@ merge.data.table <- function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FA
     end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L])
   }
 
-  dt = y[x,nomatch = if (all.x) NA else 0L,on=by,allow.cartesian=allow.cartesian]   # includes JIS columns (with a i. prefix if conflict with x names)
+  dt = y[x, nomatch = if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]   # includes JIS columns (with a i. prefix if conflict with x names)
 
   if (all.y && nrow(y)) {  # If y does not have any rows, no need to proceed
     # Perhaps not very commonly used, so not a huge deal that the join is redone here.
-    missingyidx = y[!x,which=TRUE,on=by,allow.cartesian=allow.cartesian]
+    missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
     if (length(missingyidx)) {
       yy = y[missingyidx]
       othercolsx = setdiff(names(x), by)
@@ -95,9 +96,7 @@ merge.data.table <- function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FA
             " are duplicated in the result")
   }
 
-  # merge resets class, #1378. X[Y] is quite clear that X is being *subset* by Y,
-  # makes sense to therefore retain X's class, unlike `merge`. Hard to tell what
-  # class to retain for *full join* for example.
-  setattr(dt, 'class', c("data.table", "data.frame"))
+  # merge retain class of first argument that resulted dispatch to this method #1378
+  setattr(dt, "class", class_x)
   dt
 }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 3cf867c456..3f1d62a0c2 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7544,12 +7544,16 @@ test(1568, dt[, sum(z), keyby=.(I(x), I(y))], data.table(I=I(ordered(rep(1:3,eac
 
 # Test 1569 is written under melt above.
 
-# fix for #1378, merge resets class
+# fix for #1378, merge retains class
 X = data.table(a=1:3, b=4:6)
 Y = data.table(a=1L, c=5L)
 setattr(Y, 'class', c("custom","data.table","data.frame"))
 test(1570.1, class(merge(X, Y, all=TRUE, by="a")), class(X))
-test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(X))
+test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(Y)) # class of result match to class of first argument 'x' to merge method
+A = data.table(x = c(1, 2, 3), y = c(4, 5, 6))
+B = data.table(x = c(1), w = c(5))
+class(A) = c("custom", "data.table", "data.frame")
+test(1570.3, class(merge(A, B, by="x")), class(A))
 
 # #1379, tstrsplit gains names argument
 X = data.table(a=c("ABC", "DEFG"))

From 4b83d3f2aaff18f89ff7696c9259bc76934152fa Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Fri, 31 May 2019 14:24:56 -0700
Subject: [PATCH 2/2] news item moved up

---
 NEWS.md               | 5 ++---
 R/merge.R             | 4 ++--
 inst/tests/tests.Rraw | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 1f2eeecd3f..3da712bc7f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -92,7 +92,6 @@
 
 15. `DT[order(col)[1:5], ...]` (i.e. where `i` is a compound expression involving `order()`) is now optimized to use `data.table`'s multithreaded `forder`, [#1921](https://github.com/Rdatatable/data.table/issues/1921). This example is not a fully optimal top-N query since the full ordering is still computed. The improvement is that the call to `order()` is computed faster for any `i` expression using `order`.
 
-
 #### BUG FIXES
 
 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting.
@@ -131,6 +130,8 @@
 
 18. `rbind.data.frame` on `IDate` columns changed the column from `integer` to `double`, [#2008](https://github.com/Rdatatable/data.table/issues/2008). Thanks to @rmcgehee for reporting.
 
+19. `merge.data,table` now retains any custom classes of the first argument, [#1378](https://github.com/Rdatatable/data.table/issues/1378). Thanks to @michaelquinn32 for reopening.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
@@ -357,8 +358,6 @@
 
 6. The one and only usage of `UNPROTECT_PTR()` has been removed, [#3232](https://github.com/Rdatatable/data.table/issues/3232). Thanks to Tomas Kalibera's investigation and advice here: https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html
 
-7. `merge` method now retains class of first argument passed to method, close [#1378](https://github.com/Rdatatable/data.table/issues/1378). Thanks to @michaelquinn32 for reopening.
-
 
 ### Changes in v1.11.8  (30 Sep 2018)
 
diff --git a/R/merge.R b/R/merge.R
index 70fd40e61f..dd08e713f8 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -61,7 +61,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L])
   }
 
-  dt = y[x, nomatch = if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]   # includes JIS columns (with a i. prefix if conflict with x names)
+  dt = y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]   # includes JIS columns (with a i. prefix if conflict with x names)
 
   if (all.y && nrow(y)) {  # If y does not have any rows, no need to proceed
     # Perhaps not very commonly used, so not a huge deal that the join is redone here.
@@ -96,7 +96,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
             " are duplicated in the result")
   }
 
-  # merge retain class of first argument that resulted dispatch to this method #1378
+  # retain custom classes of first argument that resulted in dispatch to this method, #1378
   setattr(dt, "class", class_x)
   dt
 }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index a6b7092d89..ea6107383f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7727,12 +7727,12 @@ test(1568, dt[, sum(z), keyby=.(I(x), I(y))], data.table(I=I(ordered(rep(1:3,eac
 
 # Old tests 1569-71 were moved to melt section and are now 1035-37
 
-# fix for #1378, merge retains class
+# fix for #1378, merge retains class of first argument
 X = data.table(a=1:3, b=4:6)
 Y = data.table(a=1L, c=5L)
 setattr(Y, 'class', c("custom","data.table","data.frame"))
 test(1570.1, class(merge(X, Y, all=TRUE, by="a")), class(X))
-test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(Y)) # class of result match to class of first argument 'x' to merge method
+test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(Y))
 A = data.table(x = c(1, 2, 3), y = c(4, 5, 6))
 B = data.table(x = c(1), w = c(5))
 class(A) = c("custom", "data.table", "data.frame")