diff --git a/NAMESPACE b/NAMESPACE index e565c631c6..cb45cf86fa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,7 @@ S3method(rollup, data.table) export(frollmean) export(nafill) export(setnafill) +export(.Last.updated) S3method("[", data.table) S3method("[<-", data.table) diff --git a/NEWS.md b/NEWS.md index 67764c472a..a822d6e71e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,6 +46,8 @@ 6. New functions `nafill` and `setnafill`, [#854](https://github.com/Rdatatable/data.table/issues/854). Thanks to Matthieu Gomez for the request and Jan Gorecki for implementing. +7. New variable `.Last.updated` (similar to R's `.Last.value`) contains the number of rows affected by the most recent `:=` or `set()`, [#1885](https://github.com/Rdatatable/data.table/issues/1885). + #### BUG FIXES 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting. diff --git a/R/data.table.R b/R/data.table.R index 8c2b854259..2354ab44eb 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1150,6 +1150,7 @@ replace_dot_alias <- function(e) { cat("No rows match i. No new columns to add so not evaluating RHS of :=\n") cat("Assigning to 0 row subset of",nrow(x),"rows\n") } + .Call(Cassign, x, irows, NULL, NULL, NULL, FALSE) # only purpose is to write 0 to .Last.updated .global$print = address(x) return(invisible(x)) } diff --git a/R/onLoad.R b/R/onLoad.R index 7d0e1a7e21..82fa26d95f 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -1,5 +1,7 @@ # nocov start +.Last.updated <- vector("integer", 1L) # exported variable; number of rows updated by the last := or set(), #1885 + .onLoad <- function(libname, pkgname) { # Runs when loaded but not attached to search() path; e.g., when a package just Imports (not Depends on) data.table if (!exists("test.data.table", .GlobalEnv, inherits=FALSE) && # check when installed package is loaded but skip when developing the package with cc() @@ -102,6 +104,8 @@ # R could feasibly in future not copy DF's vecsxp in this case. If that changes in R, we'd like to know via the warning # because tests will likely break too. The warning will quickly tell R-core and us why, so we can then update. + .Call(CinitLastUpdated, .Last.updated) #1885 + invisible() } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e3e9b2e0fd..4d37f21e1b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14084,6 +14084,51 @@ test(2029.1, fread(txt), data.table(A=1:2, B=4:5, C=7:8), warning="Discarded sin test(2029.2, fread(txt, quote=""), data.table(A=1:2, B=4:5, C=7:8), warning="Discarded single-line footer: <<3,6>>") test(2029.3, fread(txt, quote="", fill=TRUE), data.table(A=1:3, B=4:6, C=c(7:8,NA))) +# .Last.updated #1885 +d = data.table(a=1:4, b=2:5) +d[, z:=5L] +test(2030.01, .Last.updated, 4L) # new column +d[, z:=6L] +test(2030.02, .Last.updated, 4L) # update existing column +d[2:3, z:=7L] +test(2030.03, .Last.updated, 2L) # sub assign +d[integer(), z:=8L] +test(2030.04, .Last.updated, 0L) # empty sub-assign +d[-1L, z:=9L] +test(2030.05, .Last.updated, 3L) # inverse sub-assign +d[-(1:4), z:=10L] +test(2030.06, .Last.updated, 0L) # inverse empty sub-assign +d[, z:=NULL] +test(2030.07, .Last.updated, 4L) # delete column +d[2:3, z:=11L] +test(2030.08, .Last.updated, 2L) # new column during sub-assign +d[, z:=NULL] +d[integer(), z:=12L] +test(2030.09, .Last.updated, 0L) # new columns from empty sub-assign +d[, z:=NULL] +d[-(1:4), z:=13L] +test(2030.10, .Last.updated, 0L) # new columns from empty inverse sub-assign +d[, z:=NULL][, z:=14L] +test(2030.11, .Last.updated, 4L) # new column from chaining +d[, z:=NULL][2:3, z:=14L] +test(2030.12, .Last.updated, 2L) # sub-assign from chaining +d[2:3, z:=14L][, z:=NULL] +test(2030.13, .Last.updated, 4L) # delete column from chaining +set(d, 1:2, "z", 15L) +test(2030.14, .Last.updated, 2L) # set() updates .Last.updated too +g = data.table(a=1:4, z=15L) # join +d[g, on="a", z:=i.z] +test(2030.15, .Last.updated, 4L) # all match of all rows +g = data.table(a=2:4, z=16L) # join +d[, z:=NULL][g, on="a", z:=i.z] +test(2030.16, .Last.updated, 3L) # all match +g = data.table(a=c(2L,4L,6L), z=17L) +d[, z:=NULL][g, on="a", z:=i.z] +test(2030.17, .Last.updated, 2L) # partial match +g = data.table(a=5:6, z=18L) +d[, z:=NULL][g, on="a", z:=i.z] +test(2030.18, .Last.updated, 0L) # zero match + ################################### # Add new tests above this line # diff --git a/src/assign.c b/src/assign.c index 53b9021765..89899d34df 100644 --- a/src/assign.c +++ b/src/assign.c @@ -270,6 +270,8 @@ SEXP selfrefokwrapper(SEXP x, SEXP verbose) { return ScalarInteger(_selfrefok(x,FALSE,LOGICAL(verbose)[0])); } +int *_Last_updated = NULL; + SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP verb) { // For internal use only by := in [.data.table, and set() @@ -342,13 +344,17 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v if (verbose) Rprintf("Assigning to %d row subset of %d rows\n", numToDo, nrow); // TODO: include in message if any rows are assigned several times (e.g. by=.EACHI with dups in i) if (numToDo==0) { - if (!length(newcolnames)) return(dt); // all items of rows either 0 or NA. !length(newcolnames) for #759 + if (!length(newcolnames)) { + *_Last_updated = 0; + return(dt); // all items of rows either 0 or NA. !length(newcolnames) for #759 + } if (verbose) Rprintf("Added %d new column%s initialized with all-NA\n", length(newcolnames), (length(newcolnames)>1)?"s":""); } } if (!length(cols)) { warning("length(LHS)==0; no columns to delete or assign RHS to."); // test 1295 covers + *_Last_updated = 0; return(dt); } // FR #2077 - set able to add new cols by reference @@ -624,6 +630,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v } memrecycle(targetcol, rows, 0, targetlen, RHS); // also called from dogroups where these arguments are used more } + *_Last_updated = numToDo; // the updates have taken place with no error, so update .Last.updated now PROTECT(assignedNames = allocVector(STRSXP, LENGTH(cols))); protecti++; for (i=0;i