From 9c377fa857641964271a627d2fc6c2dc99c0f427 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Thu, 25 Apr 2019 22:10:16 +0200 Subject: [PATCH 1/3] .Last.nrow at Cassign --- NAMESPACE | 1 + NEWS.md | 2 ++ R/data.table.R | 10 +++++++--- R/onLoad.R | 2 ++ inst/tests/tests.Rraw | 45 +++++++++++++++++++++++++++++++++++++++++++ src/assign.c | 11 +++++++++-- 6 files changed, 66 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e565c631c6..d42dca1ed6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,7 @@ S3method(rollup, data.table) export(frollmean) export(nafill) export(setnafill) +export(.Last.nrow) S3method("[", data.table) S3method("[<-", data.table) diff --git a/NEWS.md b/NEWS.md index b905811c7c..4059d49331 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,6 +46,8 @@ 6. New functions `nafill` and `setnafill`, [#854](https://github.com/Rdatatable/data.table/issues/854). Thanks to Matthieu Gomez for the request and Jan Gorecki for implementing. +7. New technical variable `.Last.nrow` has been introduced to provide information about number of rows affected by the most recent update by reference using `:=`, closes [#1885](https://github.com/Rdatatable/data.table/issues/1885). + #### BUG FIXES 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting. diff --git a/R/data.table.R b/R/data.table.R index 8c2b854259..e9e2cacebc 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -216,6 +216,9 @@ replace_dot_alias <- function(e) { } } +# count of updated rows after `:=` stored in .Last.nrow #1885 +.Last.nrow <- NULL # onLoad init this to copy(NA_integer_) + "[.data.table" <- function (x, i, j, by, keyby, with=TRUE, nomatch=getOption("datatable.nomatch"), mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL) { # ..selfcount <<- ..selfcount+1 # in dev, we check no self calls, each of which doubles overhead, or could @@ -1137,6 +1140,7 @@ replace_dot_alias <- function(e) { cols = as.integer(m) newnames=NULL if (identical(irows, integer())) { + .Call(Cassign, NULL, irows, NULL, NULL, NULL, FALSE, TRUE, get(".Last.nrow", envir=topenv())) # call only to write 0 to .Last.nrow # Empty integer() means no rows e.g. logical i with only FALSE and NA # got converted to empty integer() by the which() above # Short circuit and do-nothing since columns already exist. If some don't @@ -1381,7 +1385,7 @@ replace_dot_alias <- function(e) { } if (!is.null(lhs)) { # TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above. - .Call(Cassign,x,irows,cols,newnames,jval,verbose) + .Call(Cassign, x, irows, cols, newnames, jval, verbose, TRUE, get(".Last.nrow", envir=topenv())) return(suppPrint(x)) } if ((is.call(jsub) && is.list(jval) && jsub[[1L]] != "get" && !is.object(jval)) || !missingby) { @@ -2095,7 +2099,7 @@ tail.data.table <- function(x, n=6L, ...) { # search for one other .Call to assign in [.data.table to see how it differs } verbose=getOption("datatable.verbose") - x = .Call(Cassign,copy(x),i,cols,newnames,value,verbose) # From 3.1.0, DF[2,"b"] = 7 no longer copies DF$a (so in this [<-.data.table method we need to copy) + x = .Call(Cassign,copy(x),i,cols,newnames,value,verbose,FALSE,NULL) # From 3.1.0, DF[2,"b"] = 7 no longer copies DF$a (so in this [<-.data.table method we need to copy) alloc.col(x) # can maybe avoid this realloc, but this is (slow) [<- anyway, so just be safe. if (length(reinstatekey)) setkeyv(x,reinstatekey) invisible(x) @@ -2638,7 +2642,7 @@ set <- function(x,i=NULL,j,value) # low overhead, loopable .Call(Csetlistelt,l,1L,value) # to avoid the copy by list() in R < 3.1.0 value = l } - .Call(Cassign,x,i,j,NULL,value,FALSE) # verbose=FALSE for speed to avoid getOption() TO DO: somehow read getOption("datatable.verbose") from C level + .Call(Cassign,x,i,j,NULL,value,FALSE,FALSE,NULL) # verbose=FALSE for speed to avoid getOption() TO DO: somehow read getOption("datatable.verbose") from C level invisible(x) } diff --git a/R/onLoad.R b/R/onLoad.R index 7d0e1a7e21..6742d9b48f 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -102,6 +102,8 @@ # R could feasibly in future not copy DF's vecsxp in this case. If that changes in R, we'd like to know via the warning # because tests will likely break too. The warning will quickly tell R-core and us why, so we can then update. + assign(".Last.nrow", copy(NA_integer_), envir=topenv()) #1885 + invisible() } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8f8f3e71ec..240e2257ed 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14072,6 +14072,51 @@ test(2027.1, DT[, list(1, ), by=a], error = 'Item 2 of the .() or list() passed test(2027.2, DT[, list(1,2,)], error = 'Item 3 of the .() or list() passed to j is missing') test(2027.3, DT[, .(1,,3,), by=a], error = 'Item 2 of the .() or list() passed to j is missing') +# .Last.nrow #1885 +d = data.table(a=1:4, b=2:5) +d[, z:=5L] +test(2028.01, .Last.nrow, 4L) # new column +d[, z:=6L] +test(2028.02, .Last.nrow, 4L) # update existing column +d[2:3, z:=7L] +test(2028.03, .Last.nrow, 2L) # sub assign +d[integer(), z:=8L] +test(2028.04, .Last.nrow, 0L) # empty sub-assign +d[-1L, z:=9L] +test(2028.05, .Last.nrow, 3L) # inverse sub-assign +d[-(1:4), z:=10L] +test(2028.06, .Last.nrow, 0L) # inverse empty sub-assign +d[, z:=NULL] +test(2028.07, .Last.nrow, 4L) # delete column +d[2:3, z:=11L] +test(2028.08, .Last.nrow, 2L) # new column during sub-assign +d[, z:=NULL] +d[integer(), z:=12L] +test(2028.09, .Last.nrow, 0L) # new columns from empty sub-assign +d[, z:=NULL] +d[-(1:4), z:=13L] +test(2028.10, .Last.nrow, 0L) # new columns from empty inverse sub-assign +d[, z:=NULL][, z:=14L] +test(2028.11, .Last.nrow, 4L) # new column from chaining +d[, z:=NULL][2:3, z:=14L] +test(2028.12, .Last.nrow, 2L) # sub-assign from chaining +d[2:3, z:=14L][, z:=NULL] +test(2028.13, .Last.nrow, 4L) # delete column from chaining +set(d, 1:2, "z", 15L) +test(2028.14, .Last.nrow, 4L) # set does not affect .Last.nrow +g = data.table(a=1:4, z=15L) # join +d[g, on="a", z:=i.z] +test(2028.15, .Last.nrow, 4L) # all match of all rows +g = data.table(a=2:4, z=16L) # join +d[, z:=NULL][g, on="a", z:=i.z] +test(2028.16, .Last.nrow, 3L) # all match +g = data.table(a=c(2L,4L,6L), z=17L) +d[, z:=NULL][g, on="a", z:=i.z] +test(2028.17, .Last.nrow, 2L) # partial match +g = data.table(a=5:6, z=18L) +d[, z:=NULL][g, on="a", z:=i.z] +test(2028.18, .Last.nrow, 0L) # zero match + ################################### # Add new tests above this line # diff --git a/src/assign.c b/src/assign.c index 53b9021765..be6fb321c7 100644 --- a/src/assign.c +++ b/src/assign.c @@ -270,7 +270,7 @@ SEXP selfrefokwrapper(SEXP x, SEXP verbose) { return ScalarInteger(_selfrefok(x,FALSE,LOGICAL(verbose)[0])); } -SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP verb) +SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP verb, SEXP update, SEXP updated) { // For internal use only by := in [.data.table, and set() // newcolnames : add these columns (if any) @@ -283,7 +283,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v const char *c1, *tc1, *tc2; int *buf, k=0, newKeyLength, indexNo; size_t size; // must be size_t otherwise overflow later in memcpy - if (isNull(dt)) error("assign has been passed a NULL dt"); + if (isNull(dt)){ + if (!length(rows) && LOGICAL(update)[0]) { // extra escape to handle case when no rows updated #1885 + INTEGER(updated)[0] = 0; + return(dt); + } else error("assign has been passed a NULL dt"); + } if (TYPEOF(dt) != VECSXP) error("dt passed to assign isn't type VECSXP"); if (length(bindingIsLocked) && LOGICAL(bindingIsLocked)[0]) error(".SD is locked. Updating .SD by reference using := or set are reserved for future use. Use := in j directly. Or use copy(.SD) as a (slow) last resort, until shallow() is exported."); @@ -321,6 +326,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v if (isNull(rows)) { numToDo = nrow; targetlen = nrow; + if (LOGICAL(update)[0]) INTEGER(updated)[0] = numToDo; if (verbose) Rprintf("Assigning to all %d rows\n", nrow); // fast way to assign to whole column, without creating 1:nrow(x) vector up in R, or here in C } else { @@ -339,6 +345,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v error("i[%d] is %d which is out of range [1,nrow=%d].",i+1,rowsd[i],nrow); // set() reaches here (test 2005.2); := reaches the same error in subset.c first if (rowsd[i]>=1) numToDo++; } + if (LOGICAL(update)[0]) INTEGER(updated)[0] = numToDo; if (verbose) Rprintf("Assigning to %d row subset of %d rows\n", numToDo, nrow); // TODO: include in message if any rows are assigned several times (e.g. by=.EACHI with dups in i) if (numToDo==0) { From 0247d5a3c4cbf6adacae862f4874661d5008e4cf Mon Sep 17 00:00:00 2001 From: jangorecki Date: Thu, 25 Apr 2019 22:35:57 +0200 Subject: [PATCH 2/3] rename to .Last.updated, .Last.nrow will be another var to be added later --- NAMESPACE | 2 +- NEWS.md | 2 +- R/data.table.R | 8 ++++---- R/onLoad.R | 2 +- inst/tests/tests.Rraw | 38 +++++++++++++++++++------------------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d42dca1ed6..cb45cf86fa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,7 +50,7 @@ S3method(rollup, data.table) export(frollmean) export(nafill) export(setnafill) -export(.Last.nrow) +export(.Last.updated) S3method("[", data.table) S3method("[<-", data.table) diff --git a/NEWS.md b/NEWS.md index 4059d49331..d08cec8629 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,7 +46,7 @@ 6. New functions `nafill` and `setnafill`, [#854](https://github.com/Rdatatable/data.table/issues/854). Thanks to Matthieu Gomez for the request and Jan Gorecki for implementing. -7. New technical variable `.Last.nrow` has been introduced to provide information about number of rows affected by the most recent update by reference using `:=`, closes [#1885](https://github.com/Rdatatable/data.table/issues/1885). +7. New technical variable `.Last.updated` has been introduced to provide information about number of rows affected by the most recent update by reference using `:=`, closes [#1885](https://github.com/Rdatatable/data.table/issues/1885). #### BUG FIXES diff --git a/R/data.table.R b/R/data.table.R index e9e2cacebc..7f1fe98eb0 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -216,8 +216,8 @@ replace_dot_alias <- function(e) { } } -# count of updated rows after `:=` stored in .Last.nrow #1885 -.Last.nrow <- NULL # onLoad init this to copy(NA_integer_) +# count of updated rows after `:=` stored in .Last.updated #1885 +.Last.updated <- NULL # onLoad init this to copy(NA_integer_) "[.data.table" <- function (x, i, j, by, keyby, with=TRUE, nomatch=getOption("datatable.nomatch"), mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL) { @@ -1140,7 +1140,7 @@ replace_dot_alias <- function(e) { cols = as.integer(m) newnames=NULL if (identical(irows, integer())) { - .Call(Cassign, NULL, irows, NULL, NULL, NULL, FALSE, TRUE, get(".Last.nrow", envir=topenv())) # call only to write 0 to .Last.nrow + .Call(Cassign, NULL, irows, NULL, NULL, NULL, FALSE, TRUE, get(".Last.updated", envir=topenv())) # call only to write 0 to .Last.updated # Empty integer() means no rows e.g. logical i with only FALSE and NA # got converted to empty integer() by the which() above # Short circuit and do-nothing since columns already exist. If some don't @@ -1385,7 +1385,7 @@ replace_dot_alias <- function(e) { } if (!is.null(lhs)) { # TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above. - .Call(Cassign, x, irows, cols, newnames, jval, verbose, TRUE, get(".Last.nrow", envir=topenv())) + .Call(Cassign, x, irows, cols, newnames, jval, verbose, TRUE, get(".Last.updated", envir=topenv())) return(suppPrint(x)) } if ((is.call(jsub) && is.list(jval) && jsub[[1L]] != "get" && !is.object(jval)) || !missingby) { diff --git a/R/onLoad.R b/R/onLoad.R index 6742d9b48f..075f04b396 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -102,7 +102,7 @@ # R could feasibly in future not copy DF's vecsxp in this case. If that changes in R, we'd like to know via the warning # because tests will likely break too. The warning will quickly tell R-core and us why, so we can then update. - assign(".Last.nrow", copy(NA_integer_), envir=topenv()) #1885 + assign(".Last.updated", copy(NA_integer_), envir=topenv()) #1885 invisible() } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 240e2257ed..d139cfd0e4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14072,50 +14072,50 @@ test(2027.1, DT[, list(1, ), by=a], error = 'Item 2 of the .() or list() passed test(2027.2, DT[, list(1,2,)], error = 'Item 3 of the .() or list() passed to j is missing') test(2027.3, DT[, .(1,,3,), by=a], error = 'Item 2 of the .() or list() passed to j is missing') -# .Last.nrow #1885 +# .Last.updated #1885 d = data.table(a=1:4, b=2:5) d[, z:=5L] -test(2028.01, .Last.nrow, 4L) # new column +test(2028.01, .Last.updated, 4L) # new column d[, z:=6L] -test(2028.02, .Last.nrow, 4L) # update existing column +test(2028.02, .Last.updated, 4L) # update existing column d[2:3, z:=7L] -test(2028.03, .Last.nrow, 2L) # sub assign +test(2028.03, .Last.updated, 2L) # sub assign d[integer(), z:=8L] -test(2028.04, .Last.nrow, 0L) # empty sub-assign +test(2028.04, .Last.updated, 0L) # empty sub-assign d[-1L, z:=9L] -test(2028.05, .Last.nrow, 3L) # inverse sub-assign +test(2028.05, .Last.updated, 3L) # inverse sub-assign d[-(1:4), z:=10L] -test(2028.06, .Last.nrow, 0L) # inverse empty sub-assign +test(2028.06, .Last.updated, 0L) # inverse empty sub-assign d[, z:=NULL] -test(2028.07, .Last.nrow, 4L) # delete column +test(2028.07, .Last.updated, 4L) # delete column d[2:3, z:=11L] -test(2028.08, .Last.nrow, 2L) # new column during sub-assign +test(2028.08, .Last.updated, 2L) # new column during sub-assign d[, z:=NULL] d[integer(), z:=12L] -test(2028.09, .Last.nrow, 0L) # new columns from empty sub-assign +test(2028.09, .Last.updated, 0L) # new columns from empty sub-assign d[, z:=NULL] d[-(1:4), z:=13L] -test(2028.10, .Last.nrow, 0L) # new columns from empty inverse sub-assign +test(2028.10, .Last.updated, 0L) # new columns from empty inverse sub-assign d[, z:=NULL][, z:=14L] -test(2028.11, .Last.nrow, 4L) # new column from chaining +test(2028.11, .Last.updated, 4L) # new column from chaining d[, z:=NULL][2:3, z:=14L] -test(2028.12, .Last.nrow, 2L) # sub-assign from chaining +test(2028.12, .Last.updated, 2L) # sub-assign from chaining d[2:3, z:=14L][, z:=NULL] -test(2028.13, .Last.nrow, 4L) # delete column from chaining +test(2028.13, .Last.updated, 4L) # delete column from chaining set(d, 1:2, "z", 15L) -test(2028.14, .Last.nrow, 4L) # set does not affect .Last.nrow +test(2028.14, .Last.updated, 4L) # set does not affect .Last.updated g = data.table(a=1:4, z=15L) # join d[g, on="a", z:=i.z] -test(2028.15, .Last.nrow, 4L) # all match of all rows +test(2028.15, .Last.updated, 4L) # all match of all rows g = data.table(a=2:4, z=16L) # join d[, z:=NULL][g, on="a", z:=i.z] -test(2028.16, .Last.nrow, 3L) # all match +test(2028.16, .Last.updated, 3L) # all match g = data.table(a=c(2L,4L,6L), z=17L) d[, z:=NULL][g, on="a", z:=i.z] -test(2028.17, .Last.nrow, 2L) # partial match +test(2028.17, .Last.updated, 2L) # partial match g = data.table(a=5:6, z=18L) d[, z:=NULL][g, on="a", z:=i.z] -test(2028.18, .Last.nrow, 0L) # zero match +test(2028.18, .Last.updated, 0L) # zero match ################################### From f390515886c304032869268a1309e8c40b3e5cb2 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 25 Apr 2019 18:12:07 -0700 Subject: [PATCH 3/3] tidy --- NEWS.md | 2 +- R/data.table.R | 11 ++++------- R/onLoad.R | 4 +++- inst/tests/tests.Rraw | 2 +- src/assign.c | 20 ++++++++++---------- src/init.c | 10 ++++++++++ 6 files changed, 29 insertions(+), 20 deletions(-) diff --git a/NEWS.md b/NEWS.md index f1b3deaaa7..a822d6e71e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,7 +46,7 @@ 6. New functions `nafill` and `setnafill`, [#854](https://github.com/Rdatatable/data.table/issues/854). Thanks to Matthieu Gomez for the request and Jan Gorecki for implementing. -7. New technical variable `.Last.updated` has been introduced to provide information about number of rows affected by the most recent update by reference using `:=`, closes [#1885](https://github.com/Rdatatable/data.table/issues/1885). +7. New variable `.Last.updated` (similar to R's `.Last.value`) contains the number of rows affected by the most recent `:=` or `set()`, [#1885](https://github.com/Rdatatable/data.table/issues/1885). #### BUG FIXES diff --git a/R/data.table.R b/R/data.table.R index 7f1fe98eb0..2354ab44eb 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -216,9 +216,6 @@ replace_dot_alias <- function(e) { } } -# count of updated rows after `:=` stored in .Last.updated #1885 -.Last.updated <- NULL # onLoad init this to copy(NA_integer_) - "[.data.table" <- function (x, i, j, by, keyby, with=TRUE, nomatch=getOption("datatable.nomatch"), mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL) { # ..selfcount <<- ..selfcount+1 # in dev, we check no self calls, each of which doubles overhead, or could @@ -1140,7 +1137,6 @@ replace_dot_alias <- function(e) { cols = as.integer(m) newnames=NULL if (identical(irows, integer())) { - .Call(Cassign, NULL, irows, NULL, NULL, NULL, FALSE, TRUE, get(".Last.updated", envir=topenv())) # call only to write 0 to .Last.updated # Empty integer() means no rows e.g. logical i with only FALSE and NA # got converted to empty integer() by the which() above # Short circuit and do-nothing since columns already exist. If some don't @@ -1154,6 +1150,7 @@ replace_dot_alias <- function(e) { cat("No rows match i. No new columns to add so not evaluating RHS of :=\n") cat("Assigning to 0 row subset of",nrow(x),"rows\n") } + .Call(Cassign, x, irows, NULL, NULL, NULL, FALSE) # only purpose is to write 0 to .Last.updated .global$print = address(x) return(invisible(x)) } @@ -1385,7 +1382,7 @@ replace_dot_alias <- function(e) { } if (!is.null(lhs)) { # TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above. - .Call(Cassign, x, irows, cols, newnames, jval, verbose, TRUE, get(".Last.updated", envir=topenv())) + .Call(Cassign,x,irows,cols,newnames,jval,verbose) return(suppPrint(x)) } if ((is.call(jsub) && is.list(jval) && jsub[[1L]] != "get" && !is.object(jval)) || !missingby) { @@ -2099,7 +2096,7 @@ tail.data.table <- function(x, n=6L, ...) { # search for one other .Call to assign in [.data.table to see how it differs } verbose=getOption("datatable.verbose") - x = .Call(Cassign,copy(x),i,cols,newnames,value,verbose,FALSE,NULL) # From 3.1.0, DF[2,"b"] = 7 no longer copies DF$a (so in this [<-.data.table method we need to copy) + x = .Call(Cassign,copy(x),i,cols,newnames,value,verbose) # From 3.1.0, DF[2,"b"] = 7 no longer copies DF$a (so in this [<-.data.table method we need to copy) alloc.col(x) # can maybe avoid this realloc, but this is (slow) [<- anyway, so just be safe. if (length(reinstatekey)) setkeyv(x,reinstatekey) invisible(x) @@ -2642,7 +2639,7 @@ set <- function(x,i=NULL,j,value) # low overhead, loopable .Call(Csetlistelt,l,1L,value) # to avoid the copy by list() in R < 3.1.0 value = l } - .Call(Cassign,x,i,j,NULL,value,FALSE,FALSE,NULL) # verbose=FALSE for speed to avoid getOption() TO DO: somehow read getOption("datatable.verbose") from C level + .Call(Cassign,x,i,j,NULL,value,FALSE) # verbose=FALSE for speed to avoid getOption() TO DO: somehow read getOption("datatable.verbose") from C level invisible(x) } diff --git a/R/onLoad.R b/R/onLoad.R index 075f04b396..82fa26d95f 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -1,5 +1,7 @@ # nocov start +.Last.updated <- vector("integer", 1L) # exported variable; number of rows updated by the last := or set(), #1885 + .onLoad <- function(libname, pkgname) { # Runs when loaded but not attached to search() path; e.g., when a package just Imports (not Depends on) data.table if (!exists("test.data.table", .GlobalEnv, inherits=FALSE) && # check when installed package is loaded but skip when developing the package with cc() @@ -102,7 +104,7 @@ # R could feasibly in future not copy DF's vecsxp in this case. If that changes in R, we'd like to know via the warning # because tests will likely break too. The warning will quickly tell R-core and us why, so we can then update. - assign(".Last.updated", copy(NA_integer_), envir=topenv()) #1885 + .Call(CinitLastUpdated, .Last.updated) #1885 invisible() } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a27570539b..4d37f21e1b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14115,7 +14115,7 @@ test(2030.12, .Last.updated, 2L) # sub-assign from chaining d[2:3, z:=14L][, z:=NULL] test(2030.13, .Last.updated, 4L) # delete column from chaining set(d, 1:2, "z", 15L) -test(2030.14, .Last.updated, 4L) # set does not affect .Last.updated +test(2030.14, .Last.updated, 2L) # set() updates .Last.updated too g = data.table(a=1:4, z=15L) # join d[g, on="a", z:=i.z] test(2030.15, .Last.updated, 4L) # all match of all rows diff --git a/src/assign.c b/src/assign.c index be6fb321c7..89899d34df 100644 --- a/src/assign.c +++ b/src/assign.c @@ -270,7 +270,9 @@ SEXP selfrefokwrapper(SEXP x, SEXP verbose) { return ScalarInteger(_selfrefok(x,FALSE,LOGICAL(verbose)[0])); } -SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP verb, SEXP update, SEXP updated) +int *_Last_updated = NULL; + +SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP verb) { // For internal use only by := in [.data.table, and set() // newcolnames : add these columns (if any) @@ -283,12 +285,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v const char *c1, *tc1, *tc2; int *buf, k=0, newKeyLength, indexNo; size_t size; // must be size_t otherwise overflow later in memcpy - if (isNull(dt)){ - if (!length(rows) && LOGICAL(update)[0]) { // extra escape to handle case when no rows updated #1885 - INTEGER(updated)[0] = 0; - return(dt); - } else error("assign has been passed a NULL dt"); - } + if (isNull(dt)) error("assign has been passed a NULL dt"); if (TYPEOF(dt) != VECSXP) error("dt passed to assign isn't type VECSXP"); if (length(bindingIsLocked) && LOGICAL(bindingIsLocked)[0]) error(".SD is locked. Updating .SD by reference using := or set are reserved for future use. Use := in j directly. Or use copy(.SD) as a (slow) last resort, until shallow() is exported."); @@ -326,7 +323,6 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v if (isNull(rows)) { numToDo = nrow; targetlen = nrow; - if (LOGICAL(update)[0]) INTEGER(updated)[0] = numToDo; if (verbose) Rprintf("Assigning to all %d rows\n", nrow); // fast way to assign to whole column, without creating 1:nrow(x) vector up in R, or here in C } else { @@ -345,17 +341,20 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v error("i[%d] is %d which is out of range [1,nrow=%d].",i+1,rowsd[i],nrow); // set() reaches here (test 2005.2); := reaches the same error in subset.c first if (rowsd[i]>=1) numToDo++; } - if (LOGICAL(update)[0]) INTEGER(updated)[0] = numToDo; if (verbose) Rprintf("Assigning to %d row subset of %d rows\n", numToDo, nrow); // TODO: include in message if any rows are assigned several times (e.g. by=.EACHI with dups in i) if (numToDo==0) { - if (!length(newcolnames)) return(dt); // all items of rows either 0 or NA. !length(newcolnames) for #759 + if (!length(newcolnames)) { + *_Last_updated = 0; + return(dt); // all items of rows either 0 or NA. !length(newcolnames) for #759 + } if (verbose) Rprintf("Added %d new column%s initialized with all-NA\n", length(newcolnames), (length(newcolnames)>1)?"s":""); } } if (!length(cols)) { warning("length(LHS)==0; no columns to delete or assign RHS to."); // test 1295 covers + *_Last_updated = 0; return(dt); } // FR #2077 - set able to add new cols by reference @@ -631,6 +630,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v } memrecycle(targetcol, rows, 0, targetlen, RHS); // also called from dogroups where these arguments are used more } + *_Last_updated = numToDo; // the updates have taken place with no error, so update .Last.updated now PROTECT(assignedNames = allocVector(STRSXP, LENGTH(cols))); protecti++; for (i=0;i