diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cd9e15c7f4..759b51b23b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -94,15 +94,15 @@ build: ## build data.table sources as tar.gz archive - mkdir.exe -p cran/bin/windows/contrib/$R_VERSION; mv.exe $(ls.exe -1t data.table_*.zip | head.exe -n 1) cran/bin/windows/contrib/$R_VERSION .test-install-r-rel-win: &install-r-rel-win - - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.1.3-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait + - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.2.1-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait # see #5198 for discussion about the https link used above; it will break each time R is released and the version number will need to be updated .test-install-r-devel-win: &install-r-devel-win - curl.exe -s -o ../R-devel.exe https://cloud.r-project.org/bin/windows/base/R-devel-win.exe; Start-Process -FilePath ..\R-devel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait .test-install-r-oldrel-win: &install-r-oldrel-win - - curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.0.5/R-4.0.5-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait + - curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.1.3/R-4.1.3-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait .test-install-rtools-win: &install-rtools-win - - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools40-x86_64.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools40" -NoNewWindow -Wait + - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools42/files/rtools42-5253-5107-signed.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools42" -NoNewWindow -Wait .test-template: &test stage: test @@ -246,7 +246,7 @@ test-rel-win: ## R-release on Windows, test and build binaries before_script: - *install-r-rel-win - *install-rtools-win - - $ENV:PATH = "C:\R\bin;C:\rtools40\usr\bin;$ENV:PATH" + - $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH" - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most'), quiet=TRUE)" - *cp-src-win - rm.exe -r bus @@ -264,7 +264,7 @@ test-dev-win: ## R-devel on Windows; see #5294 for changes in Dec 2021 related t R_VERSION: "$R_DEVEL_VERSION" before_script: - *install-r-devel-win - - curl.exe -s -o ../rtools.exe https://www.r-project.org/nosvn/winutf8/ucrt3/rtools42-5038-5046.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools42" -NoNewWindow -Wait + - *install-rtools-win - $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH" - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most', exclude=c('knitr','rmarkdown')), quiet=TRUE)" ## exclude= for #5294 - *cp-src-win @@ -284,7 +284,7 @@ test-old-win: ## R-oldrel on Windows before_script: - *install-r-oldrel-win - *install-rtools-win - - $ENV:PATH = "C:\R\bin;C:\rtools40\usr\bin;$ENV:PATH" + - $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH" - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most', exclude=c('knitr','rmarkdown')), quiet=TRUE)" ## exclude= for #5294 - *cp-src-win - rm.exe -r bus diff --git a/DESCRIPTION b/DESCRIPTION index 924bdeb2dc..586ef0f308 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -71,7 +71,8 @@ Authors@R: c( person("Boniface Christian","Kamgang", role="ctb"), person("Olivier","Delmarcell", role="ctb"), person("Josh","O'Brien", role="ctb"), - person("Dereck","de Mezquita", role="ctb")) + person("Dereck","de Mezquita", role="ctb"), + person("Michael","Czekanski", role="ctb")) Depends: R (>= 3.1.0) Imports: methods Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown diff --git a/NAMESPACE b/NAMESPACE index ad306b4ce8..44676f9f5b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,7 +8,7 @@ exportClasses(data.table, IDate, ITime) export(data.table, tables, setkey, setkeyv, key, "key<-", haskey, CJ, SJ, copy) export(setindex, setindexv, indices) export(as.data.table,is.data.table,test.data.table) -export(last,first,like,"%like%","%ilike%","%flike%","%plike%",between,"%between%",inrange,"%inrange%") +export(last,first,like,"%like%","%ilike%","%flike%","%plike%",between,"%between%",inrange,"%inrange%", "%notin%") export(timetaken) export(truelength, setalloccol, alloc.col, ":=", let) export(setattr, setnames, setcolorder, set, setDT, setDF) diff --git a/NEWS.md b/NEWS.md index 19fc575e0d..039eee5ce4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -294,6 +294,8 @@ 40. New functions `yearmon()` and `yearqtr` give a combined representation of `year()` and `month()`/`quarter()`. These and also `yday`, `wday`, `mday`, `week`, `month` and `year` are now optimized for memory and compute efficiency by removing the `POSIXlt` dependency, [#649](https://github.com/Rdatatable/data.table/issues/649). Thanks to Matt Dowle for the request, and Benjamin Schwendinger for the PR. +41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error. + ## BUG FIXES 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries. diff --git a/R/notin.R b/R/notin.R new file mode 100644 index 0000000000..ba5cef5025 --- /dev/null +++ b/R/notin.R @@ -0,0 +1,7 @@ +"%notin%" = function(x, table) { + if (is.character(x) && is.character(table)) { + .Call(Cnotchin, x, table) + } else { + match(x, table, nomatch = 0L) == 0L + } +} diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f453b96208..338620ba0f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18804,3 +18804,13 @@ test(2237.1, as.data.frame(dt, row.names=c("x", "y")), df) df = data.frame(a=1:2, b=3:4) test(2237.2, as.data.frame(dt, row.names=NULL), df) +# Test new feature %notin%, #4152 +test(2238.1, 11 %notin% 1:10, TRUE) +test(2238.2, "a" %notin% c(), TRUE) +test(2238.3, "a" %notin% c("a", "b", "c"), FALSE) +test(2238.4, c(1, 2) %notin% c(1,2,3), c(FALSE, FALSE)) +test(2238.5, "a" %notin% character(), TRUE) +test(2238.6, "a" %notin% integer(), TRUE) +test(2238.7, "a" %notin% NULL, TRUE) +test(2238.8, NA %notin% 1:5, TRUE) +test(2238.9, NA %notin% c(1:5, NA), FALSE) diff --git a/man/notin.Rd b/man/notin.Rd new file mode 100644 index 0000000000..d84bb2024d --- /dev/null +++ b/man/notin.Rd @@ -0,0 +1,33 @@ +\name{notin} +\alias{\%notin\%} + +\title{ +Convenience operator for checking if an example is not in a set of elements +} + +\description{ +Check whether an object is absent from a table, i.e., the logical inverse of \code{\link[=base]{in}}. +} + +\usage{ +x \%notin\% table +} + +\arguments{ + \item{x}{ Vector or \code{NULL}: the values to be matched. } + \item{table}{ Vector or \code{NULL}: the values to be matched against. } +} + + +\value{ + Logical vector, \code{TRUE} for each element of \code{x} \emph{absent} from \code{table}, and \code{FALSE} for each element of \code{x} \emph{present} in \code{table}. +} + +\seealso{ \code{\link[base]{match}}, \code{\link[data.table]{chmatch}} } + + +\examples{ + 11 \%notin\% 1:10 # TRUE + "a" \%notin\% c("a", "b") # FALSE +} + diff --git a/src/data.table.h b/src/data.table.h index a7f52b5e09..b966e86c08 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -259,3 +259,6 @@ int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...); // programming.c SEXP substitute_call_arg_namesR(SEXP expr, SEXP env); + +//negate.c +SEXP notchin(SEXP x, SEXP table); diff --git a/src/init.c b/src/init.c index fd43b956e5..284c30b4fd 100644 --- a/src/init.c +++ b/src/init.c @@ -131,6 +131,7 @@ SEXP test_dt_win_snprintf(); SEXP dt_zlib_version(); SEXP startsWithAny(); SEXP convertDate(); +SEXP notchin(); // .Externals SEXP fastmean(); @@ -230,6 +231,7 @@ R_CallMethodDef callMethods[] = { {"Csubstitute_call_arg_namesR", (DL_FUNC) &substitute_call_arg_namesR, -1}, {"CstartsWithAny", (DL_FUNC)&startsWithAny, -1}, {"CconvertDate", (DL_FUNC)&convertDate, -1}, +{"Cnotchin", (DL_FUNC)¬chin, -1}, {NULL, NULL, 0} }; diff --git a/src/negate.c b/src/negate.c new file mode 100644 index 0000000000..4db3767ff8 --- /dev/null +++ b/src/negate.c @@ -0,0 +1,22 @@ +#include "data.table.h" + +void negateByRef(SEXP x) { + if(TYPEOF(x) != LGLSXP) { + error("not logical or integer vector"); // # nocov + } + const int n = length(x); + Rboolean *ansd = (Rboolean *)LOGICAL(x); + for(int i=0; i