diff --git a/CRAN_Release.cmd b/CRAN_Release.cmd index 38c19ea24b..a1927b554e 100644 --- a/CRAN_Release.cmd +++ b/CRAN_Release.cmd @@ -106,15 +106,15 @@ test.data.table() install.packages("xml2") # to check the 150 URLs in NEWS.md under --as-cran below q("no") R CMD build . -R CMD check data.table_1.11.9.tar.gz --as-cran -R CMD INSTALL data.table_1.11.9.tar.gz +R CMD check data.table_1.12.1.tar.gz --as-cran +R CMD INSTALL data.table_1.12.1.tar.gz # Test C locale doesn't break test suite (#2771) echo LC_ALL=C > ~/.Renviron R Sys.getlocale()=="C" q("no") -R CMD check data.table_1.11.9.tar.gz +R CMD check data.table_1.12.1.tar.gz rm ~/.Renviron R @@ -145,7 +145,7 @@ alias R310=~/build/R-3.1.0/bin/R ### END ONE TIME BUILD cd ~/GitHub/data.table -R310 CMD INSTALL ./data.table_1.11.9.tar.gz +R310 CMD INSTALL ./data.table_1.12.1.tar.gz R310 require(data.table) test.data.table() @@ -157,7 +157,7 @@ test.data.table() vi ~/.R/Makevars # Make line SHLIB_OPENMP_CFLAGS= active to remove -fopenmp R CMD build . -R CMD INSTALL data.table_1.11.9.tar.gz # ensure that -fopenmp is missing and there are no warnings +R CMD INSTALL data.table_1.12.1.tar.gz # ensure that -fopenmp is missing and there are no warnings R require(data.table) # observe startup message about no OpenMP detected test.data.table() @@ -165,7 +165,7 @@ q("no") vi ~/.R/Makevars # revert change above R CMD build . -R CMD check data.table_1.11.9.tar.gz +R CMD check data.table_1.12.1.tar.gz ##################################################### # R-devel with UBSAN, ASAN and strict-barrier on too @@ -194,7 +194,7 @@ cd R-devel-strict # important to change directory name before building not af make alias Rdevel-strict='~/build/R-devel-strict/bin/R --vanilla' cd ~/GitHub/data.table -Rdevel-strict CMD INSTALL data.table_1.11.9.tar.gz +Rdevel-strict CMD INSTALL data.table_1.12.1.tar.gz # Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with them so should be passed through to here Rdevel-strict install.packages(c("bit64","xts","nanotime","R.utils"), repos="http://cloud.r-project.org") # minimum packages needed to not skip any tests in test.data.table() @@ -230,7 +230,7 @@ cd R-devel make cd ~/GitHub/data.table vi ~/.R/Makevars # make the -O0 -g line active, for info on source lines with any problems -Rdevel CMD INSTALL data.table_1.11.9.tar.gz +Rdevel CMD INSTALL data.table_1.12.1.tar.gz Rdevel -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --show-leak-kinds=definite" # gctorture(TRUE) # very slow, many days # gctorture2(step=100) @@ -272,7 +272,7 @@ There are some things to overcome to achieve compile without USE_RINTERNALS, tho ## Rdevel ## install.packages("bit64") ## q("no") -## Rdevel CMD INSTALL ~/data.table_1.11.9.tar.gz +## Rdevel CMD INSTALL ~/data.table_1.12.1.tar.gz ## Rdevel ## .Machine$sizeof.longdouble # check 0 ## require(data.table) @@ -287,7 +287,7 @@ cd ~/build/rchk/trunk . ../scripts/config.inc . ../scripts/cmpconfig.inc # edit ~/.R/Makevars to set CFLAGS=-O0 -g so that rchk can provide source line numbers -echo 'install.packages("~/GitHub/data.table/data.table_1.11.9.tar.gz",repos=NULL)' | ./bin/R --slave +echo 'install.packages("~/GitHub/data.table/data.table_1.12.1.tar.gz",repos=NULL)' | ./bin/R --slave ../scripts/check_package.sh data.table cat packages/lib/data.table/libs/*check # keep running and rerunning locally until all problems cease. @@ -413,8 +413,7 @@ BiocManager::valid() avail = available.packages(repos=BiocManager::repositories()) # includes CRAN at the end from getOption("repos"). And ensure latest Bioc version is in repo path here. deps = tools::package_dependencies("data.table", db=avail, which="most", reverse=TRUE, recursive=FALSE)[[1]] -exclude = c("TCGAbiolinks", # too long (>30mins): https://github.com/BioinformaticsFMRP/TCGAbiolinks/issues/240 - "ctsem") # too long (>30mins) see Ttotal on CRAN checks and warnings/errors: https://cran.r-project.org/web/checks/check_results_ctsem.html +exclude = c("TCGAbiolinks") # too long (>30mins): https://github.com/BioinformaticsFMRP/TCGAbiolinks/issues/240 deps = deps[-match(exclude, deps)] table(avail[deps,"Repository"]) length(deps) @@ -552,7 +551,7 @@ run = function(which=c("not.started","cran.fail","bioc.fail","both.fail","rerun. } # ** ensure latest version installed into revdeplib ** -system("R CMD INSTALL ~/GitHub/data.table/data.table_1.11.9.tar.gz") +system("R CMD INSTALL ~/GitHub/data.table/data.table_1.12.1.tar.gz") run("rerun.all") out = function(fnam="~/fail.log") { @@ -561,10 +560,9 @@ out = function(fnam="~/fail.log") { cat(paste(x,collapse=" "), "\n") cat(capture.output(sessionInfo()), "\n", file=fnam, sep="\n") cat("> BiocManager::install()\n", file=fnam, append=TRUE) - capture.output(BiocManager::install(), type="message", file=fnam, append=TRUE) + cat(capture.output(BiocManager::install(), type="message"), file=fnam, sep="\n", append=TRUE) cat("> BiocManager::valid()\n", file=fnam, append=TRUE) - capture.output(ans<-BiocManager::valid(), type="message", file=fnam, append=TRUE) - cat(ans, "\n", file=fnam, append=TRUE, sep="\n") + cat(isTRUE(BiocManager::valid()), "\n\n\n", file=fnam, append=TRUE) for (i in x) { system(paste0("ls | grep '",i,".*tar.gz' >> ",fnam)) system(paste0("grep -H . ./",i,".Rcheck/00check.log >> ",fnam)) diff --git a/NEWS.md b/NEWS.md index afaf423905..3a1036e651 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ #### NEW FEATURES +1. `:=` no longer recycles length>1 RHS vectors. There was a warning when recycling left a remainder but no warning when the LHS length was an exact multiple of the RHS length (the same behaviour as base R). Consistent feedback for several years has been that recycling is more often a bug. In rare cases where you need to recycle a length>1 vector, use `rep()` explicitly. Single values are still recycled silently as before. Early warning was given in [this tweet](https://twitter.com/MattDowle/status/1088544083499311104). The 758 CRAN and Bioconductor packages using data.table were tested and the maintainers of the 16 packages affected (2%) were consulted before going ahead, [#3310](https://github.com/Rdatatable/data.table/pull/3310). + #### BUG FIXES 1. `rbindlist()` of a malformed factor missing levels attribute is now a helpful error rather than a cryptic error about `STRING_ELT`, [#3315](https://github.com/Rdatatable/data.table/issues/3315). Thanks to Michael Chirico for reporting. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 346ce65361..7af23c1d60 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1367,7 +1367,9 @@ x = list(2,"b",2.718) test(470, DT[,baz:=x], data.table(a=1:3,b=4:6,foo=list(),bar=list(1,"a",3.14),baz=list(2,"b",2.718))) # Test recycling list DT = data.table(a=1:4,b=5:8) -test(471, DT[,foo:=list("a",2:3)], data.table(a=1:4,b=5:8,foo=list("a",2:3,"a",2:3))) +test(471.1, DT[,foo:=list("a",2:3)], error="Supplied 2 items to be assigned to 4 items of column 'foo'.*recycle") +test(471.2, names(DT), c("a","b")) # mismatch length error was caught ok before adding column not after column added +test(471.3, DT[,foo:=.(list("a"))], data.table(a=1:4,b=5:8,foo=list("a","a","a","a"))) # Test recycling singleton list if (ncol(DT)==3L) DT[,foo:=NULL] # else don't warn here under torture with skip= such that test 471 didn't run test(472, DT[,foo:=list(list(2:3))], data.table(a=1:4,b=5:8,foo=list(2:3,2:3,2:3,2:3))) @@ -1375,9 +1377,9 @@ test(472, DT[,foo:=list(list(2:3))], data.table(a=1:4,b=5:8,foo=list(2:3,2:3,2:3 # Test adding new column with a recycled factor, #1691 DT = data.table(a=1:4,b=5:8) DT[,c:=factor("a")] -test(473, DT, data.table(a=1:4,b=5:8,c=factor(c("a","a","a","a")))) -DT[,d:=factor(c("a","b"))] -test(474, DT, data.table(a=1:4,b=5:8,c=factor(c("a","a","a","a")),d=factor(c("a","b","a","b")))) +test(473.1, DT, data.table(a=1:4,b=5:8,c=factor(c("a","a","a","a")))) +test(473.2, DT[,d:=factor(c("a","b"))], error="Supplied 2 items to be assigned to 4 items of column 'd'") +test(474, DT[,d:=factor(c("X"))], data.table(a=1:4,b=5:8,c=factor(c("a","a","a","a")),d=factor(c("X","X","X","X")))) # Test scoping error introduced at 1.6.1, unique(DT) when key column is 'x' DT=data.table(x=c("a", "a", "b", "b"), y=c("a", "a", "b", "b"), key="x") @@ -1404,9 +1406,9 @@ gc() test(477, DT, data.table(a=1:3,foo=c(4L,10L,6L),foo2=c(4L,5L,11L))) test(478, DT[,foo:=foo], DT) # does nothing, with no warning, consistent with base R `a<-a`. -# Test that recycling now works with oversized inputs and % != 0 length, both with warnings. +# Old tests that recycling now works now moved to test error DT = data.table(x=1:4) -test(479, DT[, a:=5:7], data.table(x=1:4,a=c(5:7,5L)), warning="Supplied 3 items to be assigned to 4 items of column 'a' (recycled leaving remainder of 1 items)") +test(479, DT[, a:=5:7], error="Supplied 3 items to be assigned to 4 items of column 'a'") # Test that multiple columns can be added DT = data.table(x=1:4) @@ -2064,8 +2066,8 @@ test(745, DT[,ncol(DT):=1L], data.table(c1=1:3,c2=99L,c3=1L)) DT = data.table(a=letters[c(1:3,3L)],key="a") test(746, DT["a",c("new1","new2"):=list(4L, 5L)], data.table(a=letters[c(1:3,3L)],new1=INT(4,NA,NA,NA),new2=INT(5,NA,NA,NA),key="a")) -test(747, DT[,new1:=4:6], data.table(a=letters[c(1:3,3L)],new1=INT(4L,5L,6L,4L),new2=INT(5,NA,NA,NA),key="a"), warning="recycled leaving remainder of 1 item") -suppressWarnings(DT[,new1:=4:6]) +test(747.1, DT[,new1:=4:6], error="Supplied 3 items to be assigned to 4 items of column 'new1'") +test(747.2, DT[,new1:=INT(4,5,6,4)], data.table(a=letters[c(1:3,3L)],new1=INT(4L,5L,6L,4L),new2=INT(5,NA,NA,NA),key="a")) test(748, DT[c("c","b"),`:=`(new3=.N,new2=sum(new1)+1L),by=.EACHI], data.table(a=letters[c(1:3,3L)],new1=INT(4,5,6,4),new2=INT(5,6,11,11),new3=INT(NA,1,2,2),key="a")) # and multiple LHS by group, #1710 @@ -2136,12 +2138,10 @@ test(770, DT[J(2:3),.BY[[1]]==b,by=.EACHI], data.table(a=INT(2,2,3,3),V1=c(TRUE, # A data.table RHS of := caused a crash, #2311. a = data.table(first=1:6, third=c(1,1,1,3,3,4), key="first") b = data.table(first=c(3,4,4,5,6,7,8), second=1:7, key="first") -test(771, b[,third:=a[b,third,by=.EACHI]], b, warning="Supplied 2 items.*to 7.*recycled leaving remainder of 1 item") -test(772, copy(b)[,third:=as.list(a[b,third,by=.EACHI])], b, warning="Supplied 2 items.*to 7.*recycled leaving remainder of 1 item") -test(773, b[4,third[[1]]], c(1,3,3,3,4,NA,NA)) -test(774.1, b[,third:=a[b,third,mult="first"]], ans<-data.table(first=c(3,4,4,5,6,7,8), second=1:7, third=c(1,3,3,3,4,NA,NA), key="first")) -test(774.2, b[,third:=a[b,third]], ans) # mult="first" no longer needed as from v1.9.3. It now does what was naturally expected. - +test(771, b[,third:=a[b,third,by=.EACHI]], error="Supplied 2 items to be assigned to 7 items of column 'third'") +test(772, b[,third:=as.list(a[b,third,by=.EACHI])], error="Supplied 2 items to be assigned to 7 items of column 'third'") +test(773, b[,third:=a[b,third,mult="first"]], ans<-data.table(first=c(3,4,4,5,6,7,8), second=1:7, third=c(1,3,3,3,4,NA,NA), key="first")) +test(774, b[,third:=a[b,third]], ans) # mult="first" no longer needed as from v1.9.3. It now does what was naturally expected. # That names are dropped. (Names on the column vectors don't display. They increase size and aren't much use.) DT = data.table(a=1:3,b=LETTERS[1:3]) @@ -2296,10 +2296,9 @@ DT = data.table(a=1:3) DT[,a:=scale(a)] # 1 column matrix auto treated as vector test(835, na.omit(DT), DT) test(836, DT[,a:=as.integer(a)], data.table(a=INT(-1,0,1))) -test(837, DT[,a:=cbind(1,2)], data.table(a=c(1L,2L,1L)), - warning=c("2 column matrix RHS of := will be treated as one vector", - "Supplied 2 items to be assigned to 3 items.*recycled", - "Coerced double RHS to integer to match.*column 1 named 'a'.*values contain no fractions")) +test(837, DT[,a:=cbind(1,2)], + warning = "2 column matrix RHS of := will be treated as one vector", + error = "Supplied 2 items to be assigned to 3 items of column 'a'") DT = data.table(a=1:3,b=1:6) test(838, DT[,c:=scale(b), by=a][,c:=as.integer(1000*c)], data.table(a=1:3,b=1:6,c=rep(as.integer(1000*scale(1:2)), each=3))) @@ -2564,7 +2563,9 @@ test(915, fread(txt,sep="\n"), data.table("A;B;C|D,E"=c("1;3;4|5,6","2;4;6|8,10" # Crash bug when RHS is 0 length and := by group, fixed in 1.8.7 DT = data.table(a=1:3,b=1:6) -test(916, DT[,newcol:=logical(0),by=a], data.table(a=1:3,b=1:6,newcol=NA)) +test(916.1, DT[,newcol:=logical(0),by=a], error="Supplied 0 items to be assigned to group 1 of size 2 in column 'newcol'") +test(916.2, DT, data.table(a=1:3,b=1:6)) # that newcol was not added since group 1 failed +test(916.3, DT[,newcol:=NA,by=a], data.table(a=1:3,b=1:6,newcol=NA)) # roll join error when non last join column is factor, #2450 X = data.table(id=2001:2004, uid=c(1001,1002,1001,1001), state=factor(c('CA','CA','CA','MA')), ts=c(51,52,53,54), key='state,uid,ts') @@ -2715,10 +2716,7 @@ setkey(dt2, "Date") test(980, dt1[dt2, `:=`(A=A+i.A, B=B+i.B, C=i.C)][,list(A,B,C)], data.table(A=INT(110,115,120,225,230,235),B=INT(330,325,320,415,410,405),C=rep(1:2,each=3))) DT = data.table(A=1:2,B=3:4,C=5:6) -test(981, DT[,`:=`(D=B+4L,B=0:1,E=A*2L,F=A*3L,C=C+1L,G=C*2L),by=A], - data.table(A=1:2,B=0L,C=6:7,D=7:8,E=c(2L,4L),F=c(3L,6L),G=c(10L,12L)), - warning=c("RHS 2 is length 2.*group 1.*last 1 element.*discarded", - "RHS 2 is length 2.*group 2.*last 1 element.*discarded")) +test(981, DT[,`:=`(D=B+4L,B=0:1,E=A*2L,F=A*3L,C=C+1L,G=C*2L),by=A], error="Supplied 2 items to be assigned to group 1 of size 1 in column 'B'") DT = data.table(A=1:2,B=3:4,C=5:6) test(982, DT[,`:=`(D=B+4L,B=0L,E=A*2L,F=A*3L,C=C+1L,G=C*2L),by=A], data.table(A=1:2,B=0L,C=6:7,D=7:8,E=c(2L,4L),F=c(3L,6L),G=c(10L,12L))) # Also note that G is not yet iterative. In future: c(12,14) @@ -3534,12 +3532,18 @@ test(1131, My_Fun(), DT) # Test for #4957 - where `j` doesn't know `.N` when used with `lapply(.SD, function(x) ...)` test(1132, DT[, lapply(.SD, function(x) .N), by=ID], data.table(ID=c("A", "B", "C"), Value1=2L, Value2=2L)) -# Test for #4990 - `:=` does not generate recycling warning during 'by': -DT <- data.table(x=c(1,1,1,1,1,2,2)) +# Test for #4990 - `:=` recycle error during 'by' +DT <- data.table(x=INT(1,1,1,1,1,2,2)) # on a new column -test(1133.1, DT[, new := c(1,2), by=x], data.table(x=c(1,1,1,1,1,2,2), new=c(1,2,1,2,1,1,2)), warning="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'") -# on an already existing column -test(1133.2, DT[, new := c(1,2), by=x], data.table(x=c(1,1,1,1,1,2,2), new=c(1,2,1,2,1,1,2)), warning="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'") +test(1133.1, DT[, new := c(1,2), by=x], error="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'") +test(1133.2, DT, data.table(x=INT(1,1,1,1,1,2,2))) +# on an already existing column: +DT[, new:=99L] +test(1133.3, DT[, new := c(1,2), by=x], error="Type of RHS ('double') must match LHS ('integer')") +test(1133.4, DT[, new := c(1L,2L), by=x], error="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'") +test(1133.5, DT, data.table(x=INT(1,1,1,1,1,2,2), new=99L)) +test(1133.6, DT[, new := rep(-.GRP, .N), by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(-1,-1,-1,-1,-1,-2,-2))) +test(1133.7, DT[, new := .N, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(5,5,5,5,5,2,2))) # Fix for FR #2496 - catch `{` in `:=` expression in `j`: DT <- data.table(x=c("A", "A", "B", "B"), val =1:4) @@ -3977,9 +3981,8 @@ DT = CJ(c("Corp", "CORP"), 1:3) test(1190, setkey(DT), DT) # tests no warning here from setkey, was "key rebuilt" due to inconsistent locale sorting in v1.8.10 # non-exact recycling in j results. Was caught with error in v1.8.10, now recycles with remainder and warning -DT = data.table(a=1:2,b=1:6) -test(1191, DT[, list(b,1:2), by=a], data.table(a=INT(1,1,1,2,2,2),b=INT(1,3,5,2,4,6),V2=INT(1,2,1,1,2,1)), - warning="Recycled leaving remainder of 1 item.*This warning is once only") +DT = data.table(a=1:2, b=1:6) +test(1191, DT[, list(b,1:2), by=a], error="Supplied 2 items for column 2 of group 1 which has 3 rows.") # that twiddle is used consistently, and tolerance has gone. # nice example from : http://stackoverflow.com/questions/21885290/data-table-roll-nearest-returns-multiple-results @@ -4638,13 +4641,11 @@ test(1288.16, rbindlist(ll, fill=TRUE), error="fill=TRUE, but names of input lis # fix for #5647 dt = data.table(x=1L, y=1:10) -cp = copy(dt) -test(1289.1, dt[,z := c(rep(NA, 5), y), by=x], cp[, z := c(rep(NA, 5), y[1:5])], warning="RHS 1 is length 15") -dt = data.table(x=c(1:2), y=1:10) -cp = copy(dt) -test(1289.2, dt[, z := c(rep(NA, 5),y), by=x], cp[, z := rep(NA_integer_, 10)], - warning=c("RHS 1 is length 10.*group 1.*last 5 element.*discarded", - "RHS 1 is length 10.*group 2.*last 5 element.*discarded")) +test(1289.1, dt[, z := c(rep(NA,5), y), by=x], error="Supplied 15 items to be assigned to group 1 of size 10 in column 'z'") +test(1289.2, names(dt), c("x","y")) +dt = data.table(x=1:2, y=1:10) +test(1289.3, dt[, z := c(rep(NA,5), y), by=x], error="Supplied 10 items to be assigned to group 1 of size 5 in column 'z'") +test(1289.4, names(dt), c("x","y")) ######################################## # Extensve testing for "duplicate" names @@ -5284,21 +5285,16 @@ test(1364.24, setdiff_(X, Y[0L], by.x = 'a'), c = c("1", "3", "2"), d = c(1L, 3L, 2L), e = c(1L, 5L, 7L))) # not join along with by=.EACHI, #604 -DT <- data.table(A=c(1,1,1,2,2,2,2,3,3,4,5,5))[, `:=`(B=as.integer(A), C=c("c", "e", "a", "d"), D=factor(c("c", "e", "a", "d")), E=1:12)] +DT <- data.table(A=c(1,1,1,2,2,2,2,3,3,4,5,5))[, `:=`(B=as.integer(A), C=rep(c("c", "e", "a", "d"),3L), D=factor(rep(c("c", "e", "a", "d"),3L)), E=1:12)] setkey(DT, A) -test(1365.1, suppressMessages(DT[!J(c(2,5)), sum(E), by=.EACHI]), - suppressMessages(DT[J(c(1,3,4)), sum(E), by=.EACHI])) +test(1365.1, DT[!J(c(2,5)), sum(E), by=.EACHI], DT[J(c(1,3,4)), sum(E), by=.EACHI]) setkey(DT, B) -test(1365.2, suppressMessages(DT[!J(c(4:5)), list(.N, sum(E)), by=.EACHI]), - suppressMessages(DT[J(1:3), list(.N, sum(E)), by=.EACHI])) +test(1365.2, DT[!J(c(4:5)), list(.N, sum(E)), by=.EACHI], DT[J(1:3), list(.N, sum(E)), by=.EACHI]) setkey(DT, C) -test(1365.3, suppressMessages(copy(DT)[!"c", f := .N, by=.EACHI]), - suppressMessages(copy(DT)[c("a", "d", "e"), f := .N, by=.EACHI])) +test(1365.3, copy(DT)[!"c", f:=.N, by=.EACHI], copy(DT)[c("a","d","e"), f:=.N, by=.EACHI]) setkey(DT, D) -test(1365.4, suppressMessages(DT[!J(factor("c")), .N, by=.EACHI]), - suppressMessages(DT[J(factor(c("a", "d", "e"))), .N, by=.EACHI])) -test(1365.5, suppressMessages(DT[!"c", lapply(.SD, sum), by=.EACHI, .SDcols=c("B", "E")]), - suppressMessages(DT[c("a", "d", "e"), lapply(.SD, sum), by=.EACHI, .SDcols=c("B", "E")])) +test(1365.4, DT[!J(factor("c")), .N, by=.EACHI], DT[J(factor(c("a","d","e"))), .N, by=.EACHI]) +test(1365.5, DT[!"c", lapply(.SD, sum), by=.EACHI, .SDcols=c("B","E")], DT[c("a","d","e"), lapply(.SD, sum), by=.EACHI, .SDcols=c("B", "E")]) # uniqlengths doesn't error on 0-length input test(1366, uniqlengths(integer(0), 0L), integer(0)) @@ -8520,7 +8516,7 @@ setindex(dt, b) dt[, a := as.logical(sum(a)), by = b] test(1632.2, names(attributes(attr(dt, 'index'))), "__b") dt = data.table(a = rep(TFvec, 3), b = c("x", "y", "z")) -test(1632.3, copy(dt)[, c := !a, by=b], copy(dt)[, c := !TFvec]) +test(1632.3, copy(dt)[, c := !a, by=b], copy(dt)[, c := rep(!TFvec,3L)]) # by accepts colA:colB for interactive scenarios, #1395 dt = data.table(x=rep(1,18), y=rep(1:2, each=9), z=rep(1:3,each=6), a=rep(1:6, each=3))[, b := 6] @@ -12337,7 +12333,7 @@ test(1919, as.ITime(c('xxx', '10:43')), structure(c(NA, 38580L), class = "ITime" # wrong bmerge result if character gets coerced to factor, i is keyed, and level order in i is different from x, #2881 iris = data.table(iris) -iris$grp = c('A', 'B') +iris$grp = rep(c('A','B'), 75L) iris[, Species1 := factor(Species, levels=c('setosa','versicolor','virginica'), labels=c('setosa','versicolor','Virginica'))] iSorted = data.table(Species1 = c('setosa','Virginica'), grp='B', key=c("grp","Species1")) i = setkey(copy(iSorted),NULL) diff --git a/src/assign.c b/src/assign.c index 8969d17685..3e323c99f5 100644 --- a/src/assign.c +++ b/src/assign.c @@ -443,11 +443,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v if ((coln+1)<=oldncol && isFactor(VECTOR_ELT(dt,coln)) && !isString(thisvalue) && TYPEOF(thisvalue)!=INTSXP && TYPEOF(thisvalue)!=LGLSXP && !isReal(thisvalue) && !isNewList(thisvalue)) // !=INTSXP includes factor error("Can't assign to column '%s' (type 'factor') a value of type '%s' (not character, factor, integer or numeric)", CHAR(STRING_ELT(names,coln)),type2char(TYPEOF(thisvalue))); - if (nrow>0 && targetlen>0) { - if (vlen>targetlen) - warning("Supplied %d items to be assigned to %d items of column '%s' (%d unused)", vlen, targetlen,CHAR(colnam),vlen-targetlen); - else if (vlen>0 && targetlen%vlen != 0) - warning("Supplied %d items to be assigned to %d items of column '%s' (recycled leaving remainder of %d items).",vlen,targetlen,CHAR(colnam),targetlen%vlen); + if (nrow>0 && targetlen>0 && vlen>1 && vlen!=targetlen) { + error("Supplied %d items to be assigned to %d items of column '%s'. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code.", vlen, targetlen,CHAR(colnam)); } } // having now checked the inputs, from this point there should be no errors so we can now proceed to @@ -810,23 +807,26 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v return(dt); // needed for `*tmp*` mechanism (when := isn't used), and to return the new object after a := for compound syntax. } -static Rboolean anyNamed(SEXP x) { - if (MAYBE_REFERENCED(x)) return TRUE; +static bool anyNamed(SEXP x) { + if (MAYBE_REFERENCED(x)) return true; if (isNewList(x)) for (int i=0; i len ? len : length(source); // fix for 5647. when length(source) > len, slen must be len. - if (slen<1) return; if (TYPEOF(target) != TYPEOF(source)) error("Internal error: TYPEOF(target)['%s']!=TYPEOF(source)['%s']", type2char(TYPEOF(target)),type2char(TYPEOF(source))); // # nocov + int slen = length(source); + if (slen!=1 && slen!=len) error("Internal error: recycle length error not caught earlier. slen=%d len=%d", slen, len); // # nocov + // Internal error because the column has already been added to the DT, so length mismatch should have been caught before adding the column. + // for 5647 this used to limit slen to len, but no longer + + int protecti=0; if (isNewList(source)) { // A list() column; i.e. target is a column of pointers to SEXPs rather than the much more common case // where memrecycle copies the DATAPTR data to the atomic target from the atomic source. @@ -839,86 +839,95 @@ void memrecycle(SEXP target, SEXP where, int start, int len, SEXP source) // SEXP pointed to. // If source is already not named (because j already created a fresh unnamed vector within a list()) we don't want to // duplicate unnecessarily, hence checking for named rather than duplicating always. - // See #481 and #1270 + // See #481, #1270 and tests 1341.* fail without this duplicate(). if (anyNamed(source)) { source = PROTECT(duplicate(source)); protecti++; } } - size_t size = SIZEOF(target); if (!length(where)) { switch (TYPEOF(target)) { - case INTSXP : case REALSXP : case LGLSXP : - break; - case STRSXP : - for (; r0?1:0; r<(len/slen); r++) { // if the first slen were done in the switch above, convert r=slen to r=1 - memcpy((char *)DATAPTR(target) + (start+r*slen)*size, - (char *)DATAPTR(source), - slen * size); + case REALSXP : + if (slen==1) { + double *td = REAL(target); + const double val = REAL(source)[0]; + for (int i=0; i10 it may be worth memcpy, but we'd need to first know if 'where' was a contiguous subset } UNPROTECT(protecti); } diff --git a/src/dogroups.c b/src/dogroups.c index 02033af74e..ad85fe9c96 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -23,11 +23,10 @@ void setSizes() { SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose) { - R_len_t i, j, k, rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, r, thisansloc, grpn, thislen, igrp, vlen, origIlen=0, origSDnrow=0; + R_len_t i, j, k, rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0; int protecti=0; SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp; - Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE, recycleWarn=TRUE; - size_t size; // must be size_t, otherwise bug #5305 (integer overflow in memcpy) + Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE; clock_t tstart=0, tblock[10]={0}; int nblock[10]={0}; if (!isInteger(order)) error("Internal error: order not integer vector"); // # nocov @@ -132,7 +131,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX INTEGER(GRP)[0] = i+1; // group counter exposed as .GRP for (j=0; j=0 && nrowgroups) for (j=0; j=0) { for (j=0; j0 && vlen>grpn && j0 && grpn%vlen != 0) - warning("Supplied %d items to be assigned to group %d of size %d in column '%s' (recycled leaving remainder of %d items).",vlen,i+1,grpn,CHAR(STRING_ELT(dtnames,INTEGER(lhs)[j]-1)),grpn%vlen); - - memrecycle(target, order, INTEGER(starts)[i]-1, grpn, RHS); - + memrecycle(target, order, INTEGER(starts)[i]-1, grpn, RHS); // length mismatch checked above for all jval columns before starting to add any new columns copyMostAttrib(RHS, target); // not names, otherwise test 778 would fail. /* OLD FIX: commented now. The fix below resulted in segfault on factor columns because I dint set the "levels" Instead of fixing that, I just removed setting class if it's factor. Not appropriate fix. @@ -316,7 +324,6 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX OUTDATED: if (!isFactor(RHS)) setAttrib(target, R_ClassSymbol, getAttrib(RHS, R_ClassSymbol)); OUTDATED: // added !isFactor(RHS) to fix #5104 (side-effect of fixing #2531) See also #155 and #36 */ - } UNPROTECT(1); continue; @@ -390,11 +397,16 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX for (j=0; j