diff --git a/NEWS.md b/NEWS.md index 9b59e25052..3da712bc7f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -92,7 +92,6 @@ 15. `DT[order(col)[1:5], ...]` (i.e. where `i` is a compound expression involving `order()`) is now optimized to use `data.table`'s multithreaded `forder`, [#1921](https://github.com/Rdatatable/data.table/issues/1921). This example is not a fully optimal top-N query since the full ordering is still computed. The improvement is that the call to `order()` is computed faster for any `i` expression using `order`. - #### BUG FIXES 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting. @@ -131,6 +130,8 @@ 18. `rbind.data.frame` on `IDate` columns changed the column from `integer` to `double`, [#2008](https://github.com/Rdatatable/data.table/issues/2008). Thanks to @rmcgehee for reporting. +19. `merge.data,table` now retains any custom classes of the first argument, [#1378](https://github.com/Rdatatable/data.table/issues/1378). Thanks to @michaelquinn32 for reopening. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/R/merge.R b/R/merge.R index a656f913af..dd08e713f8 100644 --- a/R/merge.R +++ b/R/merge.R @@ -4,6 +4,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL stop("Argument 'sort' should be logical TRUE/FALSE") if (!no.dups %in% c(TRUE, FALSE)) stop("Argument 'no.dups' should be logical TRUE/FALSE") + class_x = class(x) if (!is.data.table(y)) { y = as.data.table(y) if (missing(by) && missing(by.x)) { @@ -60,11 +61,11 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L]) } - dt = y[x,nomatch = if (all.x) NA else 0L,on=by,allow.cartesian=allow.cartesian] # includes JIS columns (with a i. prefix if conflict with x names) + dt = y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian] # includes JIS columns (with a i. prefix if conflict with x names) if (all.y && nrow(y)) { # If y does not have any rows, no need to proceed # Perhaps not very commonly used, so not a huge deal that the join is redone here. - missingyidx = y[!x,which=TRUE,on=by,allow.cartesian=allow.cartesian] + missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian] if (length(missingyidx)) { yy = y[missingyidx] othercolsx = setdiff(names(x), by) @@ -95,9 +96,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL " are duplicated in the result") } - # merge resets class, #1378. X[Y] is quite clear that X is being *subset* by Y, - # makes sense to therefore retain X's class, unlike `merge`. Hard to tell what - # class to retain for *full join* for example. - setattr(dt, 'class', c("data.table", "data.frame")) + # retain custom classes of first argument that resulted in dispatch to this method, #1378 + setattr(dt, "class", class_x) dt } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f2564bd163..ea6107383f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7727,12 +7727,16 @@ test(1568, dt[, sum(z), keyby=.(I(x), I(y))], data.table(I=I(ordered(rep(1:3,eac # Old tests 1569-71 were moved to melt section and are now 1035-37 -# fix for #1378, merge resets class +# fix for #1378, merge retains class of first argument X = data.table(a=1:3, b=4:6) Y = data.table(a=1L, c=5L) setattr(Y, 'class', c("custom","data.table","data.frame")) test(1570.1, class(merge(X, Y, all=TRUE, by="a")), class(X)) -test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(X)) +test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(Y)) +A = data.table(x = c(1, 2, 3), y = c(4, 5, 6)) +B = data.table(x = c(1), w = c(5)) +class(A) = c("custom", "data.table", "data.frame") +test(1570.3, class(merge(A, B, by="x")), class(A)) # #1379, tstrsplit gains names argument X = data.table(a=c("ABC", "DEFG"))