From f09da12e15e1773299797a5d9a5422f8ba265bfe Mon Sep 17 00:00:00 2001 From: jangorecki Date: Thu, 10 Jan 2019 12:16:26 +0530 Subject: [PATCH 1/2] merge retains class of first argument, closes #1378 --- NEWS.md | 2 ++ R/merge.R | 11 +++++------ inst/tests/tests.Rraw | 8 ++++++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index d2fb150451..20fc570dbf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -92,6 +92,8 @@ 6. The one and only usage of `UNPROTECT_PTR()` has been removed, [#3232](https://github.com/Rdatatable/data.table/issues/3232). Thanks to Tomas Kalibera's investigation and advice here: https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html +7. `merge` method now retains class of first argument passed to method, close [#1378](https://github.com/Rdatatable/data.table/issues/1378). Thanks to @michaelquinn32 for reopening. + ### Changes in v1.11.8 (30 Sep 2018) diff --git a/R/merge.R b/R/merge.R index 332a865e07..087d40967c 100644 --- a/R/merge.R +++ b/R/merge.R @@ -4,6 +4,7 @@ merge.data.table <- function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FA stop("Argument 'sort' should be logical TRUE/FALSE") if (!no.dups %in% c(TRUE, FALSE)) stop("Argument 'no.dups' should be logical TRUE/FALSE") + class_x = class(x) if (!is.data.table(y)) { y = as.data.table(y) if (missing(by) && missing(by.x)) { @@ -60,11 +61,11 @@ merge.data.table <- function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FA end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L]) } - dt = y[x,nomatch = if (all.x) NA else 0L,on=by,allow.cartesian=allow.cartesian] # includes JIS columns (with a i. prefix if conflict with x names) + dt = y[x, nomatch = if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian] # includes JIS columns (with a i. prefix if conflict with x names) if (all.y && nrow(y)) { # If y does not have any rows, no need to proceed # Perhaps not very commonly used, so not a huge deal that the join is redone here. - missingyidx = y[!x,which=TRUE,on=by,allow.cartesian=allow.cartesian] + missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian] if (length(missingyidx)) { yy = y[missingyidx] othercolsx = setdiff(names(x), by) @@ -95,9 +96,7 @@ merge.data.table <- function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FA " are duplicated in the result") } - # merge resets class, #1378. X[Y] is quite clear that X is being *subset* by Y, - # makes sense to therefore retain X's class, unlike `merge`. Hard to tell what - # class to retain for *full join* for example. - setattr(dt, 'class', c("data.table", "data.frame")) + # merge retain class of first argument that resulted dispatch to this method #1378 + setattr(dt, "class", class_x) dt } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3cf867c456..3f1d62a0c2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7544,12 +7544,16 @@ test(1568, dt[, sum(z), keyby=.(I(x), I(y))], data.table(I=I(ordered(rep(1:3,eac # Test 1569 is written under melt above. -# fix for #1378, merge resets class +# fix for #1378, merge retains class X = data.table(a=1:3, b=4:6) Y = data.table(a=1L, c=5L) setattr(Y, 'class', c("custom","data.table","data.frame")) test(1570.1, class(merge(X, Y, all=TRUE, by="a")), class(X)) -test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(X)) +test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(Y)) # class of result match to class of first argument 'x' to merge method +A = data.table(x = c(1, 2, 3), y = c(4, 5, 6)) +B = data.table(x = c(1), w = c(5)) +class(A) = c("custom", "data.table", "data.frame") +test(1570.3, class(merge(A, B, by="x")), class(A)) # #1379, tstrsplit gains names argument X = data.table(a=c("ABC", "DEFG")) From 4b83d3f2aaff18f89ff7696c9259bc76934152fa Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 31 May 2019 14:24:56 -0700 Subject: [PATCH 2/2] news item moved up --- NEWS.md | 5 ++--- R/merge.R | 4 ++-- inst/tests/tests.Rraw | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1f2eeecd3f..3da712bc7f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -92,7 +92,6 @@ 15. `DT[order(col)[1:5], ...]` (i.e. where `i` is a compound expression involving `order()`) is now optimized to use `data.table`'s multithreaded `forder`, [#1921](https://github.com/Rdatatable/data.table/issues/1921). This example is not a fully optimal top-N query since the full ordering is still computed. The improvement is that the call to `order()` is computed faster for any `i` expression using `order`. - #### BUG FIXES 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting. @@ -131,6 +130,8 @@ 18. `rbind.data.frame` on `IDate` columns changed the column from `integer` to `double`, [#2008](https://github.com/Rdatatable/data.table/issues/2008). Thanks to @rmcgehee for reporting. +19. `merge.data,table` now retains any custom classes of the first argument, [#1378](https://github.com/Rdatatable/data.table/issues/1378). Thanks to @michaelquinn32 for reopening. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. @@ -357,8 +358,6 @@ 6. The one and only usage of `UNPROTECT_PTR()` has been removed, [#3232](https://github.com/Rdatatable/data.table/issues/3232). Thanks to Tomas Kalibera's investigation and advice here: https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html -7. `merge` method now retains class of first argument passed to method, close [#1378](https://github.com/Rdatatable/data.table/issues/1378). Thanks to @michaelquinn32 for reopening. - ### Changes in v1.11.8 (30 Sep 2018) diff --git a/R/merge.R b/R/merge.R index 70fd40e61f..dd08e713f8 100644 --- a/R/merge.R +++ b/R/merge.R @@ -61,7 +61,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L]) } - dt = y[x, nomatch = if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian] # includes JIS columns (with a i. prefix if conflict with x names) + dt = y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian] # includes JIS columns (with a i. prefix if conflict with x names) if (all.y && nrow(y)) { # If y does not have any rows, no need to proceed # Perhaps not very commonly used, so not a huge deal that the join is redone here. @@ -96,7 +96,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL " are duplicated in the result") } - # merge retain class of first argument that resulted dispatch to this method #1378 + # retain custom classes of first argument that resulted in dispatch to this method, #1378 setattr(dt, "class", class_x) dt } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a6b7092d89..ea6107383f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7727,12 +7727,12 @@ test(1568, dt[, sum(z), keyby=.(I(x), I(y))], data.table(I=I(ordered(rep(1:3,eac # Old tests 1569-71 were moved to melt section and are now 1035-37 -# fix for #1378, merge retains class +# fix for #1378, merge retains class of first argument X = data.table(a=1:3, b=4:6) Y = data.table(a=1L, c=5L) setattr(Y, 'class', c("custom","data.table","data.frame")) test(1570.1, class(merge(X, Y, all=TRUE, by="a")), class(X)) -test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(Y)) # class of result match to class of first argument 'x' to merge method +test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(Y)) A = data.table(x = c(1, 2, 3), y = c(4, 5, 6)) B = data.table(x = c(1), w = c(5)) class(A) = c("custom", "data.table", "data.frame")