From f34da591f3957a21a8e54319bae50716e602b04c Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 29 Sep 2020 00:09:47 +0200 Subject: [PATCH 1/5] fixed order preserving of fintersect --- R/setops.R | 8 ++++---- inst/tests/tests.Rraw | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/setops.R b/R/setops.R index 1ac949601b..8f11c68a74 100644 --- a/R/setops.R +++ b/R/setops.R @@ -62,11 +62,11 @@ fintersect = function(x, y, all=FALSE) { if (all) { x = shallow(x)[, ".seqn" := rowidv(x)] y = shallow(y)[, ".seqn" := rowidv(y)] - jn.on = c(".seqn",setdiff(names(x),".seqn")) - x[y, .SD, .SDcols=setdiff(names(x),".seqn"), nomatch=NULL, on=jn.on] + jn.on = c(".seqn",setdiff(names(y),".seqn")) + y[x, .SD, .SDcols=setdiff(names(y),".seqn"), nomatch=NULL, on=jn.on] } else { - z = funique(y) # fixes #3034. When .. prefix in i= is implemented (TODO), this can be x[funique(..y), on=, multi=] - x[z, nomatch=NULL, on=names(x), mult="first"] + z = funique(x) # fixes #3034. When .. prefix in i= is implemented (TODO), this can be x[funique(..y), on=, multi=] + y[z, nomatch=NULL, on=names(y), mult="first"] } } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 46b6716cee..51ad259c5b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17144,3 +17144,5 @@ test(2153.4, address(ans$V1[[1L]]), address(ans$V1[[2L]])) # .NGRP doesn't chan test(2153.5, DT[, .(list(c(0L,.N,0L))), by=x], # c() here will create new object so this is ok anyway; i.e. address(.N) is not present in j's result data.table(x=1:2, V1=list(c(0L,1L,0L), c(0L,2L,0L)))) +# fintersect now also preservers order of first argument like intersect, #4716 +test(2154, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$x, c("c", "a")) From 701b98661a77b6f6b91d0fe2e3dee7898cfe251f Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 29 Sep 2020 00:17:12 +0200 Subject: [PATCH 2/5] added code commentary --- R/setops.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/setops.R b/R/setops.R index 8f11c68a74..b6dcd7b0b2 100644 --- a/R/setops.R +++ b/R/setops.R @@ -63,6 +63,7 @@ fintersect = function(x, y, all=FALSE) { x = shallow(x)[, ".seqn" := rowidv(x)] y = shallow(y)[, ".seqn" := rowidv(y)] jn.on = c(".seqn",setdiff(names(y),".seqn")) + # fixes #4716 by preserving order of 1st (uses y[x] join) argument instead of 2nd (uses x[y] join) y[x, .SD, .SDcols=setdiff(names(y),".seqn"), nomatch=NULL, on=jn.on] } else { z = funique(x) # fixes #3034. When .. prefix in i= is implemented (TODO), this can be x[funique(..y), on=, multi=] From 6a3297fec460f5a6facd0a7cfb8af36446651ed4 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 29 Sep 2020 00:21:55 +0200 Subject: [PATCH 3/5] resolve tests.Rraw conflict --- inst/tests/tests.Rraw | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 51ad259c5b..0eac2e1b6e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17144,5 +17144,11 @@ test(2153.4, address(ans$V1[[1L]]), address(ans$V1[[2L]])) # .NGRP doesn't chan test(2153.5, DT[, .(list(c(0L,.N,0L))), by=x], # c() here will create new object so this is ok anyway; i.e. address(.N) is not present in j's result data.table(x=1:2, V1=list(c(0L,1L,0L), c(0L,2L,0L)))) +# warning message segfault when no column names present, #4644 +test(2154.1, fread("0.0\n", colClasses="integer"), data.table(V1=0.0), + warning="Attempt to override column 1 of inherent type 'float64' down to 'int32' ignored.*please") +test(2154.2, fread("A\n0.0\n", colClasses="integer"), data.table(A=0.0), + warning="Attempt to override column 1 <> of inherent type 'float64' down to 'int32' ignored.*please") + # fintersect now also preservers order of first argument like intersect, #4716 -test(2154, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$x, c("c", "a")) +test(2155, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$x, c("c", "a")) From 8409881b381e6cac93029a0009817c14d69c7758 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 4 Jan 2021 20:37:15 -0700 Subject: [PATCH 4/5] Added news item --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index df1b96436c..3c62f3c261 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,8 @@ 1. If `fread()` discards a single line footer, the warning message which includes the discarded text now displays any non-ASCII characters correctly on Windows, [#4747](https://github.com/Rdatatable/data.table/issues/4747). Thanks to @shrektan for reporting and the PR. +2. `fintersect()` now retains the order of the first argument as reasonably expected, rather than retaining the order of the second argument, [#4716](https://github.com/Rdatatable/data.table/issues/4716). Thanks to Michel Lang for reporting, and Ben Schwen for the PR. + ## NOTES 1. Compiling from source no longer requires `zlib` header files to be available, [#4844](https://github.com/Rdatatable/data.table/pull/4844). The output suggests installing `zlib` headers, and how (e.g. `zlib1g-dev` on Ubuntu) as before, but now proceeds with `gzip` compression disabled in `fwrite`. Upon calling `fwrite(DT, "file.csv.gz")` at runtime, an error message suggests to reinstall `data.table` with `zlib` headers available. This does not apply to users on Windows or Mac who install the pre-compiled binary package from CRAN. From 17e0916a8315e0c393ad95b0ff49159e00b5b811 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 4 Jan 2021 20:39:20 -0700 Subject: [PATCH 5/5] Added Ben to contributors -- thank you and welcome --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3e878e84bd..00ec8c3bb6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -60,7 +60,8 @@ Authors@R: c( person("Jens Peder","Meldgaard", role="ctb"), person("Vaclav","Tlapak", role="ctb"), person("Kevin","Ushey", role="ctb"), - person("Dirk","Eddelbuettel", role="ctb")) + person("Dirk","Eddelbuettel", role="ctb"), + person("Ben","Schwen", role="ctb")) Depends: R (>= 3.1.0) Imports: methods Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown