From 415689b847821dcff86b5471a5b2c771caf4cb6b Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 24 Sep 2019 18:32:11 -0700 Subject: [PATCH 01/43] interim --- NEWS.md | 16 +- R/test.data.table.R | 10 +- inst/tests/tests.Rraw | 32 ++-- src/assign.c | 381 ++++++++++++++++++++++++++++++------------ src/data.table.h | 2 +- src/dogroups.c | 4 +- src/rbindlist.c | 2 +- 7 files changed, 302 insertions(+), 145 deletions(-) diff --git a/NEWS.md b/NEWS.md index b477adf93b..03dc60a233 100644 --- a/NEWS.md +++ b/NEWS.md @@ -193,16 +193,16 @@ set.seed(108) x = rnorm(1e6); n = 1e3 rollfun = function(x, n, FUN) { - ans = rep(NA_real_, nx<-length(x)) + nx = nx = length(x) + ans = rep(NA_real_, nx) for (i in n:nx) ans[i] = FUN(x[(i-n+1):i]) ans } - system.time(ans1<-rollfun(x, n, mean)) - system.time(ans2<-zoo::rollapplyr(x, n, function(x) mean(x), fill=NA)) - system.time(ans3<-zoo::rollmeanr(x, n, fill=NA)) - system.time(ans4<-frollapply(x, n, mean)) - system.time(ans5<-frollmean(x, n)) - sapply(list(ans2,ans3,ans4,ans5), all.equal, ans1) + system.time(rollfun(x, n, mean)) + system.time(zoo::rollapplyr(x, n, function(x) mean(x), fill=NA)) + system.time(zoo::rollmeanr(x, n, fill=NA)) + system.time(frollapply(x, n, mean)) + system.time(frollmean(x, n)) ### fun mean sum median # base rollfun 8.815 5.151 60.175 @@ -224,6 +224,8 @@ # [1] "A" "b" "c" ``` +29. `DT[, integerColumn:=0]` and `set(DT,i,j,0)` no longer warns about the `0` being the wrong type (`numeric` instead of `integer`). The long warning was focussed on efficiency to help the user avoid the necessary coercion from `0` to `0L`. Although the time and space for this coercion in a single call is unmeasurably small, when placed in a loop the small overhead of any allocation on R's heap could start to become noticeable (more so for `set()` whose purpose it be low-overhead for looping). The coercion warning and its advice could be too much for new users ("what and why L?"), but advanced users appreciate the warning. Further, when assigning a value across many columns, it could be inconvenient to supply the correct type (where the columns vary in type) and frustating if a simple `0` would be clear. To balance these requirements from different types of users, the coercion is now avoided by doing the coercion ourselves in internal code without allocating an object on R's heap: an internal optimization that we will now refer to as a "punned-coercion". As there is no time or space penalty for a punned-coerce, advanced users don't need to worry about it either. Where a punned-coerce discards fractional data (e.g. 3.14 assigned to an integer column) there will still be a warning. Punned-coerce applies to length>1 vectors as well as length-1 vectors including recycling, in all cases of `:=` and `set()`. + ## BUG FIXES 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting. diff --git a/R/test.data.table.R b/R/test.data.table.R index 47ded3002f..322cd1e613 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -282,9 +282,9 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,output=NULL,notOutput=NULL, if (!missing(error) && !missing(y)) stop("Test ",numStr," is invalid: when error= is provided it does not make sense to pass y as well") # nocov - string_match = function(x, y) { - length(grep(x,y,fixed=TRUE)) || # try treating x as literal first; useful for most messages containing ()[]+ characters - length(tryCatch(grep(x,y), error=function(e)NULL)) # otherwise try x as regexp + string_match = function(x, y, ignore.case=FALSE) { + length(grep(x, y, fixed=TRUE)) || # try treating x as literal first; useful for most messages containing ()[]+ characters + length(tryCatch(grep(x, y, ignore.case=ignore.case), error=function(e)NULL)) # otherwise try x as regexp } xsub = substitute(x) @@ -364,10 +364,10 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,output=NULL,notOutput=NULL, fail = TRUE # nocov end } - if (length(notOutput) && string_match(notOutput, out)) { + if (length(notOutput) && string_match(notOutput, out, ignore.case=TRUE)) { # nocov start cat("Test",numStr,"produced output but should not have:\n") - cat("Expected absent: <<",gsub("\n","\\\\n",notOutput),">>\n",sep="") + cat("Expected absent (case insensitive): <<",gsub("\n","\\\\n",notOutput),">>\n",sep="") cat("Observed: <<",gsub("\n","\\\\n",out),">>\n",sep="") fail = TRUE # nocov end diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b8739c9560..55edfa7c14 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -869,17 +869,17 @@ test(299.03, truelength(DT)>length(DT)) # the := over-allocated, by 100 by def # FR #2551 - old 299.3 and 299.5 are changed to include length(RHS) > 1 to issue the warning DT[,c:=rep(42L,.N)] # plonk test(299.04, DT, data.table(a=1:3, c=42L)) -test(299.05, DT[2:3,c:=c(42, 42)], data.table(a=1:3,c=42L), warning="Coerced double RHS to integer.*column 2 named 'c'.*RHS.*no fractions.*more efficiently.*integer.*Consider.*L") +test(299.05, DT[2:3,c:=c(43, 44)], data.table(a=1:3,c=42:44)) # FR #2551 - length(RHS) = 1 - no warning for type conversion -test(299.06, DT[2,c:=42], data.table(a=1:3,c=42L)) +test(299.06, DT[2,c:=42], data.table(a=1:3,c=INT(42,42,44))) # also see tests 302 and 303. (Ok, new test file for fast assign would be tidier). test(299.07, DT[,c:=rep(FALSE,nrow(DT))], data.table(a=1:3,c=FALSE)) # replace c column with logical -test(299.08, DT[2:3,c:=c(42,0)], data.table(a=1:3,c=c(FALSE,TRUE,FALSE)), warning="Coerced double RHS to logical.*column 2 named 'c'.*If the target column's type logical is correct") +test(299.08, DT[2:3,c:=c(42,0)], error="Cannot assign 42.0.* to a logical column") +test(299.09, DT[2:3,c:=c(1,0)], data.table(a=1:3,c=c(FALSE,TRUE,FALSE))) # FR #2551 is now changed to fit in / fix bug #5442. Stricter warnings are in place now. Check tests 1294.1-34 below. -test(299.09, DT[2,c:=42], data.table(a=1:3,c=c(FALSE,TRUE,FALSE)), warning="Coerced double RHS to logical to match") -test(299.11, DT[2,c:=42L], data.table(a=1:3,c=c(FALSE,TRUE,FALSE)), warning="Coerced integer RHS to logical to match") -test(299.12, DT[2:3,c:=c(0L, 0L)], data.table(a=1:3,c=FALSE), warning="Coerced integer RHS to logical to match the type of the target column.*If the target column's type logical is correct") - +test(299.10, DT[2,c:=42], error="Cannot assign 42.0.* to a logical column") +test(299.11, DT[2,c:=42L], error="Cannot assign 42 to a logical column") +test(299.12, DT[2:3,c:=c(0L, 0L)], data.table(a=1:3,c=FALSE)) # Test bug fix #1468, combining i and by. DT = data.table(a=1:3,b=1:9,v=1:9,key="a,b") @@ -969,8 +969,7 @@ test(335, DT[,2:1]<-NULL, error="Attempt to assign to column") DT = data.table(a=1:2, b=1:6) test(336, DT[,z:=a/b], data.table(a=1:2,b=1:6,z=(1:2)/(1:6))) -test(337, DT[3:4,z:=a*b], data.table(a=1:2,b=1:6,z=c(1,1,3,8,1/5,2/6)), warning="Coerced integer RHS to double to match") - +test(337, DT[3:4,z:=a*b], data.table(a=1:2,b=1:6,z=c(1,1,3,8,1/5,2/6))) # test eval of LHS of := (using with=FALSE gives a warning here from v1.9.3) DT = data.table(a=1:3, b=4:6) @@ -1024,8 +1023,7 @@ test(355, DT[11:2010,f:=newlevels], data.table(f=factor(c(rep("000",10),newlevel DT = data.table(f=c("a","b"),x=1:4) # Test coercing factor to character column test(355.5, DT[3,f:=factor("foo")], data.table(f=c("a","b","foo","b"),x=1:4)) -test(355.6, DT[4,f:=factor("bar"),verbose=TRUE], data.table(f=c("a","b","foo","bar"),x=1:4), output="Coerced factor RHS to character to match the column") - +test(355.6, DT[4,f:=factor("bar"),verbose=TRUE], data.table(f=c("a","b","foo","bar"),x=1:4), notOutput="coerce") # See datatable-help post and NEWS item for 1.6.7 DT = data.table(X=factor(letters[1:10]), Y=1:10) @@ -1218,16 +1216,14 @@ test(423, truelength(DT), 1028L) DT <- data.table(a = factor(c("A", "Z")), b = 1:4) DT[1,1] <- "Z" test(424, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) -test(425, DT[1,1] <- 1, 1, warning="Coerced 'double' RHS to 'integer'") -test(426, DT, data.table(a=factor(c("A","Z")),b=1:4)) -DT[1,1] <- 2L +test(425, DT[1,1] <- 1, error="Cannot assign 'double' to 'factor'") +test(426, DT[1,1] <- 2L, error="Cannot assign 'integer' to 'factor'") test(427, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) DT[1,a:="A"] test(428, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) -DT[1,a:=2L] -test(429, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) -test(430, DT[1,1]<- 3L, 3L, warning="RHS contains 3 which is outside the levels range.*1,2.*of column 1, NAs generated") -test(431, DT[1,1:=4L], data.table(a=factor(c(NA,"Z","A","Z")),b=1:4), warning="RHS contains 4 which is outside the levels range.*1,2.*of column 1, NAs generated") +test(429, DT[1,a:=2L], error="Cannot assign 'integer' to 'factor'. Factors can only be assigned factor or character.") +test(430, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) +test(431, DT[1,1:=NA], data.table(a=factor(c(NA,"Z","A","Z")),b=1:4)) old = getOption("datatable.alloccol") # Test that unsetting datatable.alloccol is caught, #2014 options(datatable.alloccol=NULL) # In this =NULL case, options() in R 3.0.0 returned TRUE rather than the old value. This R bug was fixed in R 3.1.1. diff --git a/src/assign.c b/src/assign.c index 2c9d6e95f2..ff3bb39979 100644 --- a/src/assign.c +++ b/src/assign.c @@ -281,7 +281,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) // cols : column names or numbers corresponding to the values to set // rows : row numbers to assign R_len_t i, j, numToDo, targetlen, vlen, r, oldncol, oldtncol, coln, protecti=0, newcolnum, indexLength; - SEXP targetcol, nullint, thisv, targetlevels, newcol, s, colnam, tmp, colorder, key, index, a, assignedNames, indexNames; + SEXP targetcol, nullint, s, colnam, tmp, colorder, key, index, a, assignedNames, indexNames; bool verbose=GetVerbose(), anytodelete=false; const char *c1, *tc1, *tc2; int *buf, newKeyLength, indexNo; @@ -461,7 +461,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) // truelengths of both already set by alloccol } for (i=0; i oldncol) { // new column - SET_VECTOR_ELT(dt, coln, newcol=allocNAVectorLike(thisvalue, nrow)); + SET_VECTOR_ELT(dt, coln, targetcol=allocNAVectorLike(thisvalue, nrow)); // initialize with NAs for when 'rows' is a subset and it doesn't touch // do not try to save the time to NA fill (contiguous branch free assign anyway) since being // sure all items will be written to (isNull(rows), length(rows), vlen<1, targetlen) is not worth the risk. - if (isVectorAtomic(thisvalue)) copyMostAttrib(thisvalue,newcol); // class etc but not names + if (isVectorAtomic(thisvalue)) copyMostAttrib(thisvalue,targetcol); // class etc but not names // else for lists (such as data.frame and data.table) treat them as raw lists and drop attribs if (vlen<1) continue; // e.g. DT[,newcol:=integer()] (adding new empty column) - targetcol = newcol; - RHS = thisvalue; + // delete ... targetcol = newcol; + // RHS = thisvalue; } else { // existing column targetcol = VECTOR_ELT(dt,coln); + } + memrecycle(targetcol, rows, 0, targetlen, thisvalue, coln+1, CHAR(STRING_ELT(names, coln))); + // memrecycle is also called from dogroups; ^^ last 2 arguments just for messages + + // delete ... UNPROTECT(thisprotecti); // unprotect inside loop through columns to save protection stack + } + /* + // !! START DELETE !! if (isFactor(targetcol)) { // Coerce RHS to appropriate levels of LHS, adding new levels as necessary (unlike base) // If it's the same RHS being assigned to several columns, we have to recoerce for each @@ -557,7 +565,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) for (int j=0; j0) { - savetl(s); - } else if (tl<0) { - // # nocov start - for (int j=0; j0) { + savetl(s); + } else if (tl<0) { + // # nocov start + for (int j=0; j=0) { - if (tl>0) savetl(s); - SET_TRUELENGTH(s, -nTargetLevels-(++nAdd)); + int nAdd = 0; + for (int k=0; k=0) { + if (tl>0) savetl(s); + SET_TRUELENGTH(s, -nTargetLevels-(++nAdd)); + } // else, when sourceIsString, it's normal for there to be duplicates here } - } - const int nSource = length(source); - const int *sourceD = INTEGER(source); - int *newSourceD = INTEGER(newSource); - for (int i=0; i0) { + warning("%f has been truncated to %d because column %d ('%s') is type integer", REAL(source)[w-1], colnum, colname); } - free(temp); - } else { - // all source levels were already in target levels, but not with the same integers; we're done - savetl_end(); } - // now continue, but with the mapped integers in the (new) source + break; + case REALSXP: + break; + case STRSXP: + source = PROTECT(coerceVector(source, STRSXP)); protecti++; + break; + default: + error("Cannot assign type '%s' to '%s' column", type2char(TYPEOF(source)), type2char(TYPEOF(target))); } } if (!length(where)) { // e.g. called from rbindlist with where=R_NilValue @@ -911,16 +971,43 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source) memcpy(RAW(target)+start, RAW(source), slen*SIZEOF(target)); } break; - case LGLSXP: case INTSXP : - if (TYPEOF(source)!=LGLSXP && TYPEOF(source)!=INTSXP) { source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; } + case LGLSXP: + case INTSXP: { + int *td = INTEGER(target)+start; if (slen==1) { - int *td = INTEGER(target)+start; - const int val = INTEGER(source)[0]; - for (int i=0; i1 case + break; + case REALSXP: { + const double *sourceD = REAL(source); + for (int i=0; i1. Truncation when INTSXP, or invalid LGLSXP was caught earlier above. + } + } + } } - break; + } break; case REALSXP : { bool si64 = Rinherits(source, char_integer64); bool ti64 = Rinherits(target, char_integer64); @@ -982,13 +1069,27 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source) } break; case STRSXP : - if (TYPEOF(source)!=STRSXP) { source = PROTECT(coerceVector(source, STRSXP)); protecti++; } - if (slen==1) { - const SEXP val = STRING_ELT(source, 0); - for (int i=0; i1 && thislen!=maxn && grpn>0) { // grpn>0 for grouping empty tables; test 1986 error("Supplied %d items for column %d of group %d which has %d rows. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code.", thislen, j+1, i+1, maxn); } - memrecycle(target, R_NilValue, thisansloc, maxn, source); + memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, ""); } } ansloc += maxn; diff --git a/src/rbindlist.c b/src/rbindlist.c index a7b846535e..e4752edded 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -501,7 +501,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) thisCol = PROTECT(coerceVector(thisCol, VECSXP)); nprotect++; } // else coerces if needed within memrecycle; possibly with a no-alloc direct coerce - const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol); + const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, ""); if (ret) warning("Column %d of item %d: %s", w+1, i+1, ret); // currently just one warning when precision is lost; e.g. assigning 3.4 to integer64 } ansloc += thisnrow; From 1b07fffc80dc735c924cf34e4c4696bd62952c62 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 24 Sep 2019 19:47:13 -0700 Subject: [PATCH 02/43] interim --- inst/tests/tests.Rraw | 2 +- src/assign.c | 45 ++++++++++++++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 55edfa7c14..e1bd5fb05c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1221,7 +1221,7 @@ test(426, DT[1,1] <- 2L, error="Cannot assign 'integer' to 'factor'") test(427, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) DT[1,a:="A"] test(428, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) -test(429, DT[1,a:=2L], error="Cannot assign 'integer' to 'factor'. Factors can only be assigned factor or character.") +test(429, DT[1,a:=2L], error="Cannot assign 'integer' to 'factor'. Factor columns can only be assigned factor or character values, or NA in any type") test(430, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) test(431, DT[1,1:=NA], data.table(a=factor(c(NA,"Z","A","Z")),b=1:4)) diff --git a/src/assign.c b/src/assign.c index ff3bb39979..d561ca4822 100644 --- a/src/assign.c +++ b/src/assign.c @@ -833,16 +833,34 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } const bool sourceIsFactor=isFactor(source), targetIsFactor=isFactor(target); if (sourceIsFactor || targetIsFactor) { - if ((!sourceIsFactor && !isString(source)) || - (!targetIsFactor && !isString(target))) { - error("Cannot assign '%s' to '%s'. Factors can only be assigned factor or character.", - sourceIsFactor?"factor":type2char(TYPEOF(source)), - targetIsFactor?"factor":type2char(TYPEOF(target))); - } if (!targetIsFactor) { - // source must be factor - // assigning factor to character is left to later below, avoiding wasteful asCharacterFactor from v1.12.4 + if (!isString(target)) + error("Cannot assign 'factor' to '%s'. Factors can only be assigned to factor or character columns.", type2char(TYPEOF(target))); + // else assigning factor to character is left to later below, avoiding wasteful asCharacterFactor + } else if (!sourceIsFactor && !isString(source)) { + // source is not character or factor. If it's all-NA in another type, let it fall through + // to assign the NA later as-is without any coerce. Otherwise, error. + bool ok = true; + switch(TYPEOF(source)) { + case LGLSXP: case INTSXP: { + const int *sd = INTEGER(source); + for (int k=0; k0) { - warning("%f has been truncated to %d because column %d ('%s') is type integer", REAL(source)[w-1], colnum, colname); + warning("Coerced double RHS to integer to match the type of the target column (column %d named '%s'). One or more RHS values contain fractions which have been lost; e.g. item %d with value %f has been truncated to %d.", colnum, colname, w, REAL(source)[w-1], (int)REAL(source)[w-1]); } } + ok = true; break; case REALSXP: + ok = isLogical(source) || isInteger(source); break; case STRSXP: source = PROTECT(coerceVector(source, STRSXP)); protecti++; + ok = true; break; - default: - error("Cannot assign type '%s' to '%s' column", type2char(TYPEOF(source)), type2char(TYPEOF(target))); } + if (!ok) + error("Cannot assign type '%s' to '%s' column", type2char(TYPEOF(source)), type2char(TYPEOF(target))); } if (!length(where)) { // e.g. called from rbindlist with where=R_NilValue switch (TYPEOF(target)) { From 17d82df3f1a7278c30461f43d0a9b81d320fcafb Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 24 Sep 2019 21:54:48 -0700 Subject: [PATCH 03/43] interim --- src/assign.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/assign.c b/src/assign.c index d561ca4822..f158fdedda 100644 --- a/src/assign.c +++ b/src/assign.c @@ -834,8 +834,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const bool sourceIsFactor=isFactor(source), targetIsFactor=isFactor(target); if (sourceIsFactor || targetIsFactor) { if (!targetIsFactor) { - if (!isString(target)) - error("Cannot assign 'factor' to '%s'. Factors can only be assigned to factor or character columns.", type2char(TYPEOF(target))); + if (!isString(target) && !isNewList(target)) + error("Cannot assign 'factor' to '%s'. Factors can only be assigned to factor, character or list columns.", type2char(TYPEOF(target))); // else assigning factor to character is left to later below, avoiding wasteful asCharacterFactor } else if (!sourceIsFactor && !isString(source)) { // source is not character or factor. If it's all-NA in another type, let it fall through @@ -973,8 +973,13 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case REALSXP: ok = isLogical(source) || isInteger(source); break; + case RAWSXP: case STRSXP: - source = PROTECT(coerceVector(source, STRSXP)); protecti++; + case CPLXSXP: + source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; + ok = true; + break; + case VECSXP: ok = true; break; } @@ -984,7 +989,6 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, if (!length(where)) { // e.g. called from rbindlist with where=R_NilValue switch (TYPEOF(target)) { case RAWSXP: - if (TYPEOF(source)!=RAWSXP) { source = PROTECT(coerceVector(source, RAWSXP)); protecti++; } if (slen==1) { // recycle single items Rbyte *td = RAW(target)+start; From 6350d5a342d9761fdd6e408530cd59f0ca4b9d15 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 24 Sep 2019 23:31:06 -0700 Subject: [PATCH 04/43] interim --- inst/tests/tests.Rraw | 2 +- src/assign.c | 39 ++++++++++++++++++++++++--------------- src/data.table.h | 1 + src/utils.c | 21 +++++++++++++++++++++ 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e1bd5fb05c..23f39bc0f4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -4846,7 +4846,7 @@ test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L), warning="Coerced double RHS to inte test(1294.03, dt[, a := NA]$a, rep(NA_integer_, 3L)) test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L), warning=c("NAs introduced by coercion", - "Coerced character RHS to integer.*create the RHS as type integer.*with as.integer.. to avoid this warning.*DT., `a`:=as.character.`a`.*")) + "Coerced character RHS to integer.*column 1 named 'a'")) test(1294.05, dt[, a := list(list(1))]$a, rep(1L, 3L), warning="Coerced list RHS to integer to match.*column 1 named 'a'") test(1294.06, dt[, a := list(1L)]$a, rep(1L, 3L)) test(1294.07, dt[, a := list(1)]$a, rep(1L, 3L)) diff --git a/src/assign.c b/src/assign.c index f158fdedda..1121f2c59f 100644 --- a/src/assign.c +++ b/src/assign.c @@ -943,7 +943,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } } else if (TYPEOF(target)!=TYPEOF(source)) { // checks up front, otherwise we'd need checks twice in the two branches that cater for 'where' or not - bool ok = false; + bool pun = false; switch(TYPEOF(target)) { case LGLSXP: if (isInteger(source)) { @@ -952,14 +952,15 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const int val = sD[i]; if (val!=0 && val!=1 && val!=NA_INTEGER) error("Cannot assign %d to a logical column.", val); } + pun = true; } else if (isReal(source)) { // TODO: cater for integer64 here const double *sD = REAL(source); for (int i=0; i0) { warning("Coerced double RHS to integer to match the type of the target column (column %d named '%s'). One or more RHS values contain fractions which have been lost; e.g. item %d with value %f has been truncated to %d.", colnum, colname, w, REAL(source)[w-1], (int)REAL(source)[w-1]); } + pun = true; + } else if (isLogical(source)) { + pun = true; } - ok = true; break; case REALSXP: - ok = isLogical(source) || isInteger(source); - break; - case RAWSXP: - case STRSXP: - case CPLXSXP: - source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; - ok = true; + pun = isLogical(source) || isInteger(source); break; + //case RAWSXP: + //case STRSXP: + //case CPLXSXP: + // source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; + // ok = true; + // break; case VECSXP: - ok = true; + pun = true; break; } - if (!ok) - error("Cannot assign type '%s' to '%s' column", type2char(TYPEOF(source)), type2char(TYPEOF(target))); + if (!pun) { + SEXP tt = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; + if (!isString(target) && !allNA(source)) { + warning("Coerced %s RHS to %s to match the type of the target column (column %d named '%s').", + type2char(TYPEOF(source)), type2char(TYPEOF(target)), colnum, colname); + } + source = tt; + } + // If the target column's type %s is correct, it's best for efficiency to avoid the coercion and create the RHS as type %s. To achieve that consider R's type postfix: typeof(0L) vs typeof(0), and typeof(NA) vs typeof(NA_integer_) vs typeof(NA_real_). You can wrap the RHS with as.%s() to avoid this warning, but that will still perform the coercion. If the target column's type is not correct, it's best to revisit where the DT was created and fix the column type there; e.g., by using colClasses= in fread(). Otherwise, you can change the column type now by plonking a new column (of the desired type) over the top of it; e.g. DT[, `%s`:=as.%s(`%s`)]. If the RHS of := has nrow(DT) elements then the assignment is called a column plonk and is the way to change a column's type. Column types can be observed with sapply(DT,typeof).", + // error("Cannot assign type '%s' to '%s' column", type2char(TYPEOF(source)), type2char(TYPEOF(target))); } if (!length(where)) { // e.g. called from rbindlist with where=R_NilValue switch (TYPEOF(target)) { @@ -1006,8 +1017,6 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, int val; switch (TYPEOF(source)) { case LGLSXP: - val = LOGICAL(source)[0]; - break; case INTSXP: val = INTEGER(source)[0]; break; diff --git a/src/data.table.h b/src/data.table.h index 55e4d60276..a234e67bce 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -213,6 +213,7 @@ SEXP coalesce(SEXP x, SEXP inplace); // utils.c bool isRealReallyInt(SEXP x); SEXP isReallyReal(SEXP x); +bool allNA(SEXP x); SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups); void coerceFill(SEXP fill, double *dfill, int32_t *ifill, int64_t *i64fill); SEXP coerceFillR(SEXP fill); diff --git a/src/utils.c b/src/utils.c index cd25750a7f..d53f848f3a 100644 --- a/src/utils.c +++ b/src/utils.c @@ -32,6 +32,27 @@ SEXP isReallyReal(SEXP x) { return(ans); } +bool allNA(SEXP x) { + const int n = length(x); + switch(TYPEOF(x)) { + case LGLSXP: + case INTSXP: { + const int *xd = INTEGER(x); + for (int i=0; i Date: Wed, 25 Sep 2019 00:43:40 -0700 Subject: [PATCH 05/43] interim --- inst/tests/tests.Rraw | 30 +++++++++++++++--------------- src/assign.c | 30 +++++++++++++++--------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 23f39bc0f4..9f8abc0c38 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -4850,19 +4850,19 @@ test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L), test(1294.05, dt[, a := list(list(1))]$a, rep(1L, 3L), warning="Coerced list RHS to integer to match.*column 1 named 'a'") test(1294.06, dt[, a := list(1L)]$a, rep(1L, 3L)) test(1294.07, dt[, a := list(1)]$a, rep(1L, 3L)) -test(1294.08, dt[, a := TRUE]$a, rep(1L, 3L), warning="Coerced logical RHS to integer") +test(1294.08, dt[, a := TRUE]$a, rep(1L, 3L)) test(1294.09, dt[, b := 1L]$b, rep(1,3)) test(1294.10, dt[, b := NA]$b, rep(NA_real_,3)) test(1294.11, dt[, b := "bla"]$b, rep(NA_real_, 3), warning=c("NAs introduced by coercion", - "Coerced character RHS to double to match.*column 2 named 'b'.*DT[, `b`:=as.character(`b`)]")) + "Coerced character RHS to double to match.*column 2 named 'b'")) test(1294.12, dt[, b := list(list(1))]$b, rep(1,3), warning="Coerced list RHS to double") -test(1294.13, dt[, b := TRUE]$b, rep(1,3), warning="Coerced logical RHS to double") +test(1294.13, dt[, b := TRUE]$b, rep(1,3)) test(1294.14, dt[, b := list(1)]$b, rep(1,3)) -test(1294.15, dt[, c := 1]$c, rep(TRUE, 3), warning="Coerced double RHS to logical") -test(1294.16, dt[, c := 1L]$c, rep(TRUE, 3), warning="Coerced integer RHS to logical") +test(1294.15, dt[, c := 1]$c, rep(TRUE, 3)) +test(1294.16, dt[, c := 1L]$c, rep(TRUE, 3)) test(1294.17, dt[, c := NA]$c, rep(NA, 3)) -test(1294.18, dt[, c := list(1)]$c, rep(TRUE, 3), warning="Coerced double RHS to logical") +test(1294.18, dt[, c := list(1)]$c, rep(TRUE, 3)) test(1294.19, dt[, c := list(list(1))]$c, rep(TRUE, 3), warning="Coerced list RHS to logical") test(1294.20, dt[, c := "bla"]$c, rep(NA, 3), warning="Coerced character RHS to logical") test(1294.21, dt[, d := 1]$d, rep(list(1), 3)) @@ -4870,10 +4870,10 @@ test(1294.22, dt[, d := 1L]$d, rep(list(1L), 3)) test(1294.23, dt[, d := TRUE]$d, rep(list(TRUE), 3)) test(1294.24, dt[, d := "bla"]$d, rep(list("bla"), 3)) test(1294.25, dt[, d := list(list(1))]$d, rep(list(1), 3)) -test(1294.26, dt[, e := 1]$e, rep("1", 3), warning="Coerced double RHS to character") -test(1294.27, dt[, e := 1L]$e, rep("1", 3), warning="Coerced integer RHS to character") -test(1294.28, dt[, e := TRUE]$e, rep("TRUE", 3), warning="Coerced logical RHS to character") -test(1294.29, dt[, e := list(list(1))]$e, rep("1", 3), warning="Coerced list RHS to character") +test(1294.26, dt[, e := 1]$e, rep("1", 3)) +test(1294.27, dt[, e := 1L]$e, rep("1", 3)) +test(1294.28, dt[, e := TRUE]$e, rep("TRUE", 3)) +test(1294.29, dt[, e := list(list(1))]$e, rep("1", 3)) test(1294.30, dt[, e := "bla"]$e, rep("bla", 3)) test(1294.31, dt[, e := list("bla2")]$e, rep("bla2", 3)) @@ -9811,7 +9811,7 @@ test(1674, forderv(c(2147483645L, 2147483646L, 2147483647L, 2147483644L), order= A = data.table(foo = c(1, 2, 3), bar = c(4, 5, 6)) A[, bar := factor(bar, levels = c(4, 5), labels = c("Boop", "Beep"), exclude = 6)] B = data.table(foo = c(1, 2, 3, 4, 5, 6), bar = structure(c(3L, 3L, 3L, 1L, 2L, NA), .Label=c("Boop","Beep",NA), class="factor")) -test(1675.1, as.integer(B[A, bar := i.bar, on="foo"]$bar), c(1:3,1:2,NA)) +test(1675.1, as.integer(B[A, bar := i.bar, on="foo"]$bar), c(1:2,NA,1:2,NA)) # remove the NA level is change in v1.12.4 A = data.table(foo = c(1, 2, 3), bar = c(4, 5, 6)) B = data.table(foo = c(1, 2, 3, 4, 5, 6), bar = c(NA, NA, NA, 4, 5, 6)) A[, bar := factor(bar, levels = c(4, 5), labels = c("Boop", "Beep"), exclude = 6)] @@ -12923,9 +12923,9 @@ test(1944.6, DT[flag == 1, sum(x), keyby = group], data.table(group=1:4, V1=INT( # assigning an int greater than length(levels) corruption of int, #2984 DT <- data.table(a = factor(c("A", "Z")), b = 1:4) c <- 3L -test(1945.1, DT[1, a:=c][1,a], factor(NA, levels=c("A","Z")), warning="RHS.*outside the levels range.*NAs generated") +test(1945.1, DT[1, a:=c], error="Cannot assign 'integer' to 'factor'") # [1,a], factor(NA, levels=c("A","Z")), warning="RHS.*outside the levels range.*NAs generated") test(1945.2, c, 3L) -test(1945.3, DT[2,1] <- c, 3L, warning="RHS.*outside the levels range.*NAs generated") +test(1945.3, DT[2,1] <- c, error="Cannot assign 'integer' to 'factor'") # 3L, warning="RHS.*outside the levels range.*NAs generated") test(1945.4, c, 3L) # subset a data.table containing an altrep derived from ]<-, ]]<- etc, #3051 @@ -14116,8 +14116,8 @@ test(2005.3, set(DT, 3L, 8i, NA), error="j is type 'complex'. Must be integer, c test(2005.4, set(DT, 1L, 2L, expression(x+2)), error="cannot be coerced to.*integer") # R's error message same as returned by as.integer(expression(x+2)) DT[,foo:=factor(c("a","b","c"))] test(2005.5, DT[2, foo:=8i], error="Can't assign to column 'foo' (type 'factor') a value of type 'complex' (not character, factor, integer or numeric)") -test(2005.6, DT[2, a:=9, verbose=TRUE], output="Coerced length-1 RHS from double to integer to match column's type. No precision was lost. If this") -test(2005.7, DT[2, a:=NA, verbose=TRUE], output="Coerced length-1 RHS from logical to integer to match column's type. If this") +test(2005.6, DT[2, a:=9, verbose=TRUE], notOutput="Coerced") +test(2005.7, DT[2, a:=NA, verbose=TRUE], notOutput="Coerced") test(2005.8, DT[2, a:=9.9]$a, INT(1,9,3), warning="Coerced double RHS to integer.*One or more RHS values contain fractions which have been lost.*9.9.*has been truncated to 9") # rbindlist raw type, #2819 diff --git a/src/assign.c b/src/assign.c index 1121f2c59f..5461f5c361 100644 --- a/src/assign.c +++ b/src/assign.c @@ -974,28 +974,20 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } break; case REALSXP: - pun = isLogical(source) || isInteger(source); + pun = isLogical(source) || isInteger(source) || TYPEOF(source)==RAWSXP; break; - //case RAWSXP: - //case STRSXP: - //case CPLXSXP: - // source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; - // ok = true; - // break; case VECSXP: pun = true; break; } if (!pun) { - SEXP tt = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; + SEXP tt = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; // e.g. RAWSXP, STRSXP and CPLXSXP if (!isString(target) && !allNA(source)) { warning("Coerced %s RHS to %s to match the type of the target column (column %d named '%s').", type2char(TYPEOF(source)), type2char(TYPEOF(target)), colnum, colname); } source = tt; } - // If the target column's type %s is correct, it's best for efficiency to avoid the coercion and create the RHS as type %s. To achieve that consider R's type postfix: typeof(0L) vs typeof(0), and typeof(NA) vs typeof(NA_integer_) vs typeof(NA_real_). You can wrap the RHS with as.%s() to avoid this warning, but that will still perform the coercion. If the target column's type is not correct, it's best to revisit where the DT was created and fix the column type there; e.g., by using colClasses= in fread(). Otherwise, you can change the column type now by plonking a new column (of the desired type) over the top of it; e.g. DT[, `%s`:=as.%s(`%s`)]. If the RHS of := has nrow(DT) elements then the assignment is called a column plonk and is the way to change a column's type. Column types can be observed with sapply(DT,typeof).", - // error("Cannot assign type '%s' to '%s' column", type2char(TYPEOF(source)), type2char(TYPEOF(target))); } if (!length(where)) { // e.g. called from rbindlist with where=R_NilValue switch (TYPEOF(target)) { @@ -1160,13 +1152,13 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, int val = ISNAN(sd[0]) ? NA_INTEGER : (int)sd[0]; for (int i=0; i Date: Wed, 25 Sep 2019 01:04:08 -0700 Subject: [PATCH 06/43] interim --- inst/tests/tests.Rraw | 7 +++ src/assign.c | 132 +----------------------------------------- 2 files changed, 8 insertions(+), 131 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9f8abc0c38..13f6fbcaef 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16162,6 +16162,13 @@ DT = data.table(x = rep(letters[c(3, 1, 2)], each = 2)) test(2114.7, DT[, `:=`(g=.GRP, f=factor(.GRP)), by = x], data.table(x=rep(c("c","a","b"),each=2), g=rep(1:3,each=2), f=factor(rep(as.character(1:3),each=2)))) +# extra tests from #996 for completeness; no warning no-alloc coerce here of 0 and 1 numerics +DT = data.table(a=1:4, b=c(FALSE, TRUE, NA, FALSE)) +test(2115.1, set(DT,3L,1L,0), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, NA, FALSE))) +test(2115.2, set(DT,3L,2L,0), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, FALSE, FALSE))) +DT = data.table(code=c("c","b","c","a"), val=10:13) +test(2115.3, DT[code=="c", val := val+1], data.table(code=c("c","b","c","a"), val=INT(11,11,13,13))) + ################################### # Add new tests above this line # diff --git a/src/assign.c b/src/assign.c index 5461f5c361..ba94510082 100644 --- a/src/assign.c +++ b/src/assign.c @@ -461,7 +461,6 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) // truelengths of both already set by alloccol } for (i=0; i oldncol) { // new column SET_VECTOR_ELT(dt, coln, targetcol=allocNAVectorLike(thisvalue, nrow)); @@ -499,141 +497,13 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) if (isVectorAtomic(thisvalue)) copyMostAttrib(thisvalue,targetcol); // class etc but not names // else for lists (such as data.frame and data.table) treat them as raw lists and drop attribs if (vlen<1) continue; // e.g. DT[,newcol:=integer()] (adding new empty column) - // delete ... targetcol = newcol; - // RHS = thisvalue; } else { // existing column targetcol = VECTOR_ELT(dt,coln); } memrecycle(targetcol, rows, 0, targetlen, thisvalue, coln+1, CHAR(STRING_ELT(names, coln))); // memrecycle is also called from dogroups; ^^ last 2 arguments just for messages - - // delete ... UNPROTECT(thisprotecti); // unprotect inside loop through columns to save protection stack - } - /* - // !! START DELETE !! - if (isFactor(targetcol)) { - // Coerce RHS to appropriate levels of LHS, adding new levels as necessary (unlike base) - // If it's the same RHS being assigned to several columns, we have to recoerce for each - // one because the levels of each target are likely different - if (isFactor(thisvalue)) { - thisvalue = PROTECT(asCharacterFactor(thisvalue)); thisprotecti++; - } - targetlevels = getAttrib(targetcol, R_LevelsSymbol); - if (isNull(targetlevels)) error("somehow this factor column has no levels"); - if (isString(thisvalue)) { - savetl_init(); // ** TO DO **: remove allocs that could fail between here and _end, or different way - for (j=0; j0) { - savetl(s); // pre-2.14.0 this will save all the uninitialised truelengths - // so 2.14.0+ may be faster, but isn't required. - // as from v1.8.0 we assume R's internal hash is positive, so don't - // save the uninitialised truelengths that by chance are negative - } - SET_TRUELENGTH(s,0); - } - for (j=0; j0) savetl(s); - SET_TRUELENGTH(s,j+1); - } - R_len_t addi = 0; - SEXP addlevels=NULL; - RHS = PROTECT(allocVector(INTSXP, length(thisvalue))); thisprotecti++; - int *iRHS = INTEGER(RHS); - for (j=0; j= length(addlevels)) { - addlevels = PROTECT(growVector(addlevels, length(addlevels)+1000)); thisprotecti++; - } - SET_STRING_ELT(addlevels,addi++,thisv); - // if-else for #1718 fix - SET_TRUELENGTH(thisv, (thisv != NA_STRING) ? (addi+length(targetlevels)) : NA_INTEGER); - } - iRHS[j] = TRUELENGTH(thisv); - } - if (addi > 0) { - R_len_t oldlen = length(targetlevels); - targetlevels = PROTECT(growVector(targetlevels, oldlen+addi)); thisprotecti++; - for (j=0; jLENGTH(targetlevels)) - && iRHS[j] != NA_INTEGER) { - warning("RHS contains %d which is outside the levels range ([1,%d]) of column %d, NAs generated", iRHS[j], LENGTH(targetlevels), i+1); - iRHS[j] = NA_INTEGER; - } - } - } - } else { - if (TYPEOF(targetcol)==TYPEOF(thisvalue) || - TYPEOF(targetcol)==VECSXP || - (isLogical(targetcol) && isIntegerReallyLogical(thisvalue))) - RHS = thisvalue; - else { - // coerce the RHS to match the type of the column, unlike [<-.data.frame, for efficiency. - if (isString(targetcol) && isFactor(thisvalue)) { - RHS = PROTECT(asCharacterFactor(thisvalue)); thisprotecti++; - if (verbose) Rprintf("Coerced factor RHS to character to match the column's type. Avoid this coercion if possible, for efficiency, by creating RHS as type character.\n"); - // TO DO: datatable.pedantic could turn this into warning - } else { - RHS = PROTECT(coerceVector(thisvalue,TYPEOF(targetcol))); thisprotecti++; - char *s1 = (char *)type2char(TYPEOF(targetcol)); - char *s2 = (char *)type2char(TYPEOF(thisvalue)); - // FR #2551, added test for equality between RHS and thisvalue to not provide the warning when length(thisvalue) == 1 - if ( length(thisvalue)==1 && TYPEOF(RHS)!=VECSXP && ( - ( isReal(thisvalue) && isInteger(targetcol) && REAL(thisvalue)[0]==INTEGER(RHS)[0] ) || // DT[,intCol:=4] rather than DT[,intCol:=4L] - ( isLogical(thisvalue) && LOGICAL(thisvalue)[0] == NA_LOGICAL ) || // DT[,intCol:=NA] - ( isInteger(thisvalue) && isReal(targetcol) ) )) { - if (verbose) Rprintf("Coerced length-1 RHS from %s to %s to match column's type.%s If this assign is happening a lot inside a loop, in particular via set(), then it may be worth avoiding this coercion by using R's type postfix on the value being assigned; e.g. typeof(0) vs typeof(0L), and typeof(NA) vs typeof(NA_integer_) vs typeof(NA_real_).\n", s2, s1, - isInteger(targetcol) && isReal(thisvalue) ? " No precision was lost." : ""); - // TO DO: datatable.pedantic could turn this into warning. Or we could catch and avoid the coerceVector allocation ourselves using a single int. - } else { - if (isReal(thisvalue) && isInteger(targetcol)) { - int w = INTEGER(isReallyReal(thisvalue))[0]; // first fraction present (1-based), 0 if none - if (w>0) { - warning("Coerced double RHS to integer to match the type of the target column (column %d named '%s'). One or more RHS values contain fractions which have been lost; e.g. item %d with value %f has been truncated to %d.", - coln+1, CHAR(STRING_ELT(names, coln)), w, REAL(thisvalue)[w-1], INTEGER(RHS)[w-1]); - } else { - warning("Coerced double RHS to integer to match the type of the target column (column %d named '%s'). The RHS values contain no fractions so would be more efficiently created as integer. Consider using R's 'L' postfix (typeof(0L) vs typeof(0)) to create constants as integer and avoid this warning. Wrapping the RHS with as.integer() will avoid this warning too but it's better if possible to create the RHS as integer in the first place so that the cost of the coercion can be avoided.", coln+1, CHAR(STRING_ELT(names, coln))); - } - } else { - warning("Coerced %s RHS to %s to match the type of the target column (column %d named '%s'). If the target column's type %s is correct, it's best for efficiency to avoid the coercion and create the RHS as type %s. To achieve that consider R's type postfix: typeof(0L) vs typeof(0), and typeof(NA) vs typeof(NA_integer_) vs typeof(NA_real_). You can wrap the RHS with as.%s() to avoid this warning, but that will still perform the coercion. If the target column's type is not correct, it's best to revisit where the DT was created and fix the column type there; e.g., by using colClasses= in fread(). Otherwise, you can change the column type now by plonking a new column (of the desired type) over the top of it; e.g. DT[, `%s`:=as.%s(`%s`)]. If the RHS of := has nrow(DT) elements then the assignment is called a column plonk and is the way to change a column's type. Column types can be observed with sapply(DT,typeof).", - s2, s1, coln+1, CHAR(STRING_ELT(names, coln)), s1, s1, s1, CHAR(STRING_ELT(names, coln)), s2, CHAR(STRING_ELT(names, coln))); - } - } - } - } - } - } - memrecycle(targetcol, rows, 0, targetlen, RHS); // also called from dogroups where these arguments are used more - UNPROTECT(thisprotecti); // unprotect inside loop through columns to save protection stack } - // !! END DELETE !! - */ + *_Last_updated = numToDo; // the updates have taken place with no error, so update .Last.updated now assignedNames = PROTECT(allocVector(STRSXP, LENGTH(cols))); protecti++; for (i=0;i Date: Wed, 25 Sep 2019 14:29:06 -0700 Subject: [PATCH 07/43] interim --- inst/tests/tests.Rraw | 6 +- src/assign.c | 169 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 169 insertions(+), 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 13f6fbcaef..47b61e8e3e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16166,8 +16166,12 @@ test(2114.7, DT[, `:=`(g=.GRP, f=factor(.GRP)), by = x], DT = data.table(a=1:4, b=c(FALSE, TRUE, NA, FALSE)) test(2115.1, set(DT,3L,1L,0), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, NA, FALSE))) test(2115.2, set(DT,3L,2L,0), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, FALSE, FALSE))) +test(2115.3, set(DT,3L,2L,-2L), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, TRUE, FALSE)), # see also test 299 + warning="Non-zero -2 taken as TRUE.*Please use.*") +test(2115.4, set(DT,4L,2L,3.14), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, TRUE, TRUE)), + warning="Non-zero 3.14.* taken as TRUE.*Please use.*") DT = data.table(code=c("c","b","c","a"), val=10:13) -test(2115.3, DT[code=="c", val := val+1], data.table(code=c("c","b","c","a"), val=INT(11,11,13,13))) +test(2115.5, DT[code=="c", val := val+1], data.table(code=c("c","b","c","a"), val=INT(11,11,13,13))) ################################### diff --git a/src/assign.c b/src/assign.c index ba94510082..ae8049e90d 100644 --- a/src/assign.c +++ b/src/assign.c @@ -813,6 +813,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } } else if (TYPEOF(target)!=TYPEOF(source)) { // checks up front, otherwise we'd need checks twice in the two branches that cater for 'where' or not + // TODO: verbose message and/or strict option for advanced users to ensure types match + // only call getOption (small cost in finding the option value) at this point when there is a type mismatch bool pun = false; switch(TYPEOF(target)) { case LGLSXP: @@ -820,14 +822,23 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const int *sD = INTEGER(source); for (int i=0; i0) { warning("Coerced double RHS to integer to match the type of the target column (column %d named '%s'). One or more RHS values contain fractions which have been lost; e.g. item %d with value %f has been truncated to %d.", colnum, colname, w, REAL(source)[w-1], (int)REAL(source)[w-1]); + // same warning text as v1.12.2 so as to reduce diff in tests. TODO: shorten text in future to put truncated %f at the begnning of the message. } pun = true; } else if (isLogical(source)) { @@ -871,7 +883,46 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, memcpy(RAW(target)+start, RAW(source), slen*SIZEOF(target)); } break; - case LGLSXP: + case LGLSXP: { + int *td = LOGICAL(target)+start; + if (slen==1) { + int val; + switch (TYPEOF(source)) { + case LGLSXP: + val = LOGICAL(source)[0]; + break; + case INTSXP: + val = INTEGER(source)[0]; + val = val==NA_INTEGER ? NA_LOGICAL : val!=0; // zero-alloc recycle length-1 + break; + case REALSXP: { + double d = REAL(source)[0]; + val = ISNAN(d) ? NA_LOGICAL : d!=0.0; + } break; + default: + error("Internal error"); // # nocov + } + for (int i=0; i1 + } + } break; + case REALSXP: { + const double *sd = REAL(source); + for (int i=0; i Date: Wed, 25 Sep 2019 17:02:09 -0700 Subject: [PATCH 08/43] interim --- src/assign.c | 323 ++++++++++++++++++--------------------------------- 1 file changed, 112 insertions(+), 211 deletions(-) diff --git a/src/assign.c b/src/assign.c index ae8049e90d..0b2d76f5cc 100644 --- a/src/assign.c +++ b/src/assign.c @@ -871,6 +871,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, source = tt; } } + +/* if (!length(where)) { // e.g. called from rbindlist with where=R_NilValue switch (TYPEOF(target)) { case RAWSXP: @@ -973,7 +975,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } } else if (si64) { error("Internal error: memrecycle source is integer64 but target is real and not integer64; target should be type integer64"); // # nocov - /* + */ /* double *td = REAL(target)+start; if (slen==1) { const double val = (double)(((int64_t *)REAL(source))[0]); @@ -982,6 +984,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const int64_t *val = (int64_t *)REAL(source); for (int i=0; i Date: Wed, 25 Sep 2019 17:16:18 -0700 Subject: [PATCH 09/43] coerce to logical warnings working again --- inst/tests/tests.Rraw | 10 +++++----- src/assign.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 47b61e8e3e..74bd5bd08a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -874,12 +874,12 @@ test(299.05, DT[2:3,c:=c(43, 44)], data.table(a=1:3,c=42:44)) test(299.06, DT[2,c:=42], data.table(a=1:3,c=INT(42,42,44))) # also see tests 302 and 303. (Ok, new test file for fast assign would be tidier). test(299.07, DT[,c:=rep(FALSE,nrow(DT))], data.table(a=1:3,c=FALSE)) # replace c column with logical -test(299.08, DT[2:3,c:=c(42,0)], error="Cannot assign 42.0.* to a logical column") -test(299.09, DT[2:3,c:=c(1,0)], data.table(a=1:3,c=c(FALSE,TRUE,FALSE))) +test(299.08, DT[2:3,c:=c(3.14,0)], data.table(a=1:3, c=c(FALSE,TRUE,FALSE)), warning="Non-zero 3.14.* taken as TRUE.*Please use") +test(299.09, DT[2:3,c:=c(0,1)], data.table(a=1:3,c=c(FALSE,FALSE,TRUE))) # no warning # FR #2551 is now changed to fit in / fix bug #5442. Stricter warnings are in place now. Check tests 1294.1-34 below. -test(299.10, DT[2,c:=42], error="Cannot assign 42.0.* to a logical column") -test(299.11, DT[2,c:=42L], error="Cannot assign 42 to a logical column") -test(299.12, DT[2:3,c:=c(0L, 0L)], data.table(a=1:3,c=FALSE)) +test(299.10, DT[2,c:=42], data.table(a=1:3, c=c(FALSE,TRUE,TRUE)), warning="Non-zero 42.0.* taken as TRUE.*Please use") +test(299.11, DT[1,c:=42L], data.table(a=1:3, c=TRUE), warning="Non-zero 42 taken as TRUE.*Please use") +test(299.12, DT[2:3,c:=c(0L, 0L)], data.table(a=1:3,c=c(TRUE,FALSE,FALSE))) # Test bug fix #1468, combining i and by. DT = data.table(a=1:3,b=1:9,v=1:9,key="a,b") diff --git a/src/assign.c b/src/assign.c index 0b2d76f5cc..0396c73930 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1163,7 +1163,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const SEXP *ld = STRING_PTR(PROTECT(getAttrib(source, R_LevelsSymbol))); protecti++; BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1], SET_STRING_ELT(target, off+i, cval)) } else { - if (!isString(source)) error("Internal error. Not coercedd earlier."); + if (!isString(source)) error("Internal error. Not coerced earlier."); BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) } break; From 45f66cc74af21af84225b6d4846b0a845c64a951 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 25 Sep 2019 18:41:44 -0700 Subject: [PATCH 10/43] interim --- src/assign.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/assign.c b/src/assign.c index 0396c73930..7ff6bdd9cc 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1168,22 +1168,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } break; case VECSXP : - // TODO: use BODY here. - if (!length(where)) { - if (TYPEOF(source)==VECSXP && len==slen) { - for (int i=0; i Date: Wed, 25 Sep 2019 19:23:45 -0700 Subject: [PATCH 11/43] interim --- src/assign.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/src/assign.c b/src/assign.c index 7ff6bdd9cc..12a108aa74 100644 --- a/src/assign.c +++ b/src/assign.c @@ -708,27 +708,9 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, error("Cannot assign 'factor' to '%s'. Factors can only be assigned to factor, character or list columns.", type2char(TYPEOF(target))); // else assigning factor to character is left to later below, avoiding wasteful asCharacterFactor } else if (!sourceIsFactor && !isString(source)) { - // source is not character or factor. If it's all-NA in another type, let it fall through - // to assign the NA later as-is without any coerce. Otherwise, error. - bool ok = true; - switch(TYPEOF(source)) { - case LGLSXP: case INTSXP: { - const int *sd = INTEGER(source); - for (int k=0; k255 || val<0) ? 0 : val, td[i]=cval) break; + case REALSXP: BODY(double, REAL, Rbyte, (ISNAN(val)||val>256||val<0) ? 0 : val, td[i]=cval) break; + default: error("Internal error"); + } + } break; case LGLSXP: { int *td = LOGICAL(target) + off; switch (TYPEOF(source)) { - //case RAWSXP: ... + case RAWSXP: BODY(Rbyte, RAW, int, val!=0, td[i]=cval) break; case LGLSXP: BODY(int, LOGICAL, int, val, td[i]=cval) break; case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval) break; case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) break; @@ -1133,8 +1124,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case INTSXP : { int *td = INTEGER(target) + off; switch (TYPEOF(source)) { - //case RAWSXP: ... - case LGLSXP: // same as INTSXP + case RAWSXP: BODY(Rbyte, RAW, int, (int)val, td[i]=cval) break; + case LGLSXP: // same as INTSXP ... case INTSXP: BODY(int, INTEGER, int, val, td[i]=cval) break; case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) break; default: error("Internal error"); From fc72fb6050e888d64c376e9305a26f8893e19bda Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 25 Sep 2019 19:50:40 -0700 Subject: [PATCH 12/43] interim --- src/assign.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/assign.c b/src/assign.c index 12a108aa74..96ceeb4215 100644 --- a/src/assign.c +++ b/src/assign.c @@ -854,6 +854,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } } + const bool sourceIsI64 = isReal(source) && Rinherits(source, char_integer64); /* if (!length(where)) { // e.g. called from rbindlist with where=R_NilValue switch (TYPEOF(target)) { @@ -864,7 +865,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const Rbyte val = RAW(source)[0]; for (int i=0; i1 case + break; case REALSXP: { const double *sourceD = REAL(source); @@ -953,7 +954,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const double val = REAL(source)[0]; for (int i=0; i1 case + //memcpy(REAL(target)+start, REAL(source), slen*SIZEOF(target)); + //memcpy(COMPLEX(target)+start, COMPLEX(source), slen*SIZEOF(target)); + + + const int off = (length(where)?0:start); // off = target offset; e.g. called from rbindlist with where=R_NilValue and start!=0 switch (TYPEOF(target)) { case RAWSXP: { Rbyte *td = RAW(target) + off; @@ -1132,6 +1141,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } } break; case REALSXP : { + if (Rinherits(target, char_integer64)) { double *td = REAL(target) + off; switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval) break; From ef1c6fd6ef21548bc1df77d565f735cac3926034 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 25 Sep 2019 20:26:08 -0700 Subject: [PATCH 13/43] interim --- src/assign.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/assign.c b/src/assign.c index 96ceeb4215..00a10ced1d 100644 --- a/src/assign.c +++ b/src/assign.c @@ -853,8 +853,6 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, source = tt; } } - - const bool sourceIsI64 = isReal(source) && Rinherits(source, char_integer64); /* if (!length(where)) { // e.g. called from rbindlist with where=R_NilValue switch (TYPEOF(target)) { @@ -1063,7 +1061,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, #undef BODY #define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) \ - { const STYPE *sd = RFUN(source); \ + { const STYPE *sd = (const STYPE *)RFUN(source); \ if (length(where)) { \ const int *wd = INTEGER(where)+start; \ if (slen==1) { \ @@ -1107,7 +1105,6 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, //memcpy(REAL(target)+start, REAL(source), slen*SIZEOF(target)); //memcpy(COMPLEX(target)+start, COMPLEX(source), slen*SIZEOF(target)); - const int off = (length(where)?0:start); // off = target offset; e.g. called from rbindlist with where=R_NilValue and start!=0 switch (TYPEOF(target)) { case RAWSXP: { @@ -1141,15 +1138,34 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } } break; case REALSXP : { - if (Rinherits(target, char_integer64)) { - double *td = REAL(target) + off; - switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval) break; - case LGLSXP: // same as INTSXP - case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : (double)val, td[i]=cval) break; - case REALSXP: BODY(double, REAL, double, val, td[i]=cval) break; - // ****** TODO: ******** integer64!! - default: error("Internal error"); + if (!Rinherits(target, char_integer64)) { + double *td = REAL(target) + off; + switch (TYPEOF(source)) { + case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval) break; + case LGLSXP: // same as INTSXP + case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) break; + case REALSXP: + if (Rinherits(source, char_integer64)) + BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) + else + BODY(double, REAL, double, val, td[i]=cval) + break; + default: error("Internal error"); + } + } else { + int64_t *td = (int64_t *)REAL(target) + off; + switch (TYPEOF(source)) { + case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval) break; + case LGLSXP: // same as INTSXP + case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : (int64_t)val, td[i]=cval) break; + case REALSXP: + if (Rinherits(source, char_integer64)) + BODY(int64_t, REAL, int64_t, val, td[i]=cval) + else + BODY(double, REAL, int64_t, !R_FINITE(val) ? NA_INTEGER64 : val, td[i]=cval) + break; + default: error("Internal error"); + } } } break; case CPLXSXP: { From 78733d91f22046190a19f6bc81f31470d494849d Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 25 Sep 2019 21:33:46 -0700 Subject: [PATCH 14/43] interim; passing --- src/assign.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/assign.c b/src/assign.c index 00a10ced1d..ca25a55493 100644 --- a/src/assign.c +++ b/src/assign.c @@ -702,6 +702,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } } const bool sourceIsFactor=isFactor(source), targetIsFactor=isFactor(target); + const bool sourceIsI64=isReal(source) && Rinherits(source, char_integer64); + const bool targetIsI64=isReal(target) && Rinherits(target, char_integer64); if (sourceIsFactor || targetIsFactor) { if (!targetIsFactor) { if (!isString(target) && !isNewList(target)) @@ -793,7 +795,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, // now continue, but with the mapped integers in the (new) source } } - } else if (TYPEOF(target)!=TYPEOF(source)) { + } else if (TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) { // checks up front, otherwise we'd need checks twice in the two branches that cater for 'where' or not // TODO: verbose message and/or strict option for advanced users to ensure types match // only call getOption (small cost in finding the option value) at this point when there is a type mismatch @@ -838,7 +840,13 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } break; case REALSXP: - pun = isLogical(source) || isInteger(source) || TYPEOF(source)==RAWSXP; + if (targetIsI64 && isReal(source) && !sourceIsI64) { + int firstReal=0; + if ((firstReal=INTEGER(isReallyReal(source))[0])) { + sprintf(memrecycle_message, "coerced to integer64 but contains a non-integer value (%f at position %d); precision lost.", REAL(source)[firstReal-1], firstReal); + } + } + pun = isLogical(source) || isInteger(source) || isReal(source) || TYPEOF(source)==RAWSXP; break; case VECSXP: pun = true; @@ -1157,12 +1165,12 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval) break; case LGLSXP: // same as INTSXP - case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : (int64_t)val, td[i]=cval) break; + case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) break; case REALSXP: if (Rinherits(source, char_integer64)) BODY(int64_t, REAL, int64_t, val, td[i]=cval) else - BODY(double, REAL, int64_t, !R_FINITE(val) ? NA_INTEGER64 : val, td[i]=cval) + BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) break; default: error("Internal error"); } @@ -1180,7 +1188,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const SEXP *ld = STRING_PTR(PROTECT(getAttrib(source, R_LevelsSymbol))); protecti++; BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1], SET_STRING_ELT(target, off+i, cval)) } else { - if (!isString(source)) error("Internal error. Not coerced earlier."); + if (!isString(source)) { source = PROTECT(coerceVector(source, STRSXP)); protecti++; } BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) } break; From b46e17379b58fea20c1d03f843e10e70f87991b9 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 25 Sep 2019 22:33:57 -0700 Subject: [PATCH 15/43] interm; passing --- inst/tests/tests.Rraw | 2 +- src/assign.c | 37 ++++++++++++++++++++----------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 74bd5bd08a..cad0ba55d2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14113,7 +14113,7 @@ test(2005.1, truelength(NULL), 0L) DT = data.table(a=1:3, b=4:6) test(2005.2, set(DT, 4L, "b", NA), error="i[1] is 4 which is out of range [1,nrow=3]") test(2005.3, set(DT, 3L, 8i, NA), error="j is type 'complex'. Must be integer, character, or numeric is coerced with warning.") -test(2005.4, set(DT, 1L, 2L, expression(x+2)), error="cannot be coerced to.*integer") # R's error message same as returned by as.integer(expression(x+2)) +test(2005.4, set(DT, 1L, 2L, expression(x+2)), error="type 'expression' cannot be coerced to 'integer'") # similar to R's error for as.integer(expression(x+2)) DT[,foo:=factor(c("a","b","c"))] test(2005.5, DT[2, foo:=8i], error="Can't assign to column 'foo' (type 'factor') a value of type 'complex' (not character, factor, integer or numeric)") test(2005.6, DT[2, a:=9, verbose=TRUE], notOutput="Coerced") diff --git a/src/assign.c b/src/assign.c index ca25a55493..d7b19509cc 100644 --- a/src/assign.c +++ b/src/assign.c @@ -710,6 +710,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, error("Cannot assign 'factor' to '%s'. Factors can only be assigned to factor, character or list columns.", type2char(TYPEOF(target))); // else assigning factor to character is left to later below, avoiding wasteful asCharacterFactor } else if (!sourceIsFactor && !isString(source)) { + // target is factor + // TODO allow integer in the range [NA,1,nlevel] again, with error outside range if (!allNA(source)) error("Cannot assign '%s' to 'factor'. Factor columns can only be assigned factor or character values, or NA in any type.", type2char(TYPEOF(source))); // else let the all-NA (most likely just one NA) fall through to regular assign below without any coerce @@ -799,7 +801,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, // checks up front, otherwise we'd need checks twice in the two branches that cater for 'where' or not // TODO: verbose message and/or strict option for advanced users to ensure types match // only call getOption (small cost in finding the option value) at this point when there is a type mismatch - bool pun = false; + //bool pun = false; switch(TYPEOF(target)) { case LGLSXP: if (isInteger(source)) { @@ -813,7 +815,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, break; // just warn on the first } } - pun = true; + // pun = true; } else if (isReal(source)) { // TODO: cater for integer64 here const double *sD = REAL(source); for (int i=0; i1 case @@ -1142,7 +1145,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case LGLSXP: // same as INTSXP ... case INTSXP: BODY(int, INTEGER, int, val, td[i]=cval) break; case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) break; - default: error("Internal error"); + default: error("type '%s' cannot be coerced to 'integer'", type2char(TYPEOF(source))); // test 2005.4 } } break; case REALSXP : { @@ -1170,7 +1173,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, if (Rinherits(source, char_integer64)) BODY(int64_t, REAL, int64_t, val, td[i]=cval) else - BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) + BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) break; default: error("Internal error"); } @@ -1188,7 +1191,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const SEXP *ld = STRING_PTR(PROTECT(getAttrib(source, R_LevelsSymbol))); protecti++; BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1], SET_STRING_ELT(target, off+i, cval)) } else { - if (!isString(source)) { source = PROTECT(coerceVector(source, STRSXP)); protecti++; } + if (!isString(source)) error("Internal error"); BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) } break; From 8c3e0649e4a589ec8e84a27f99354b0acc723712 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 25 Sep 2019 23:45:44 -0700 Subject: [PATCH 16/43] tidy --- src/assign.c | 267 ++++++--------------------------------------------- src/utils.c | 23 +++-- 2 files changed, 43 insertions(+), 247 deletions(-) diff --git a/src/assign.c b/src/assign.c index d7b19509cc..22d552c5bf 100644 --- a/src/assign.c +++ b/src/assign.c @@ -501,7 +501,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) targetcol = VECTOR_ELT(dt,coln); } memrecycle(targetcol, rows, 0, targetlen, thisvalue, coln+1, CHAR(STRING_ELT(names, coln))); - // memrecycle is also called from dogroups; ^^ last 2 arguments just for messages + // ^^ last 2 arguments just for messages } *_Last_updated = numToDo; // the updates have taken place with no error, so update .Last.updated now @@ -801,7 +801,6 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, // checks up front, otherwise we'd need checks twice in the two branches that cater for 'where' or not // TODO: verbose message and/or strict option for advanced users to ensure types match // only call getOption (small cost in finding the option value) at this point when there is a type mismatch - //bool pun = false; switch(TYPEOF(target)) { case LGLSXP: if (isInteger(source)) { @@ -815,7 +814,6 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, break; // just warn on the first } } - // pun = true; } else if (isReal(source)) { // TODO: cater for integer64 here const double *sD = REAL(source); for (int i=0; i1 - } - } break; - case REALSXP: { - const double *sd = REAL(source); - for (int i=0; i1. Truncation when INTSXP, or invalid LGLSXP was caught earlier above. - } - } - } - } - } break; - case REALSXP : { - bool si64 = Rinherits(source, char_integer64); - bool ti64 = Rinherits(target, char_integer64); - if (si64 && TYPEOF(source)!=REALSXP) - error("Internal error: source has integer64 attribute but is type '%s' not REALSXP", type2char(TYPEOF(source))); // # nocov - if (si64 == ti64) { - if (TYPEOF(source)!=REALSXP) { source = PROTECT(coerceVector(source, REALSXP)); protecti++; } - if (slen==1) { - double *td = REAL(target)+start; - const double val = REAL(source)[0]; - for (int i=0; i1 case @@ -1125,7 +916,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case LGLSXP: BODY(int, LOGICAL, Rbyte, val==1, td[i]=cval) break; case INTSXP: BODY(int, INTEGER, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval) break; case REALSXP: BODY(double, REAL, Rbyte, (ISNAN(val)||val>256||val<0) ? 0 : val, td[i]=cval) break; - default: error("Internal error"); + default: COERCE_ERROR("raw"); } } break; case LGLSXP: { @@ -1135,7 +926,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case LGLSXP: BODY(int, LOGICAL, int, val, td[i]=cval) break; case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval) break; case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) break; - default: error("Internal error"); + default: COERCE_ERROR("logical"); } } break; case INTSXP : { @@ -1145,37 +936,37 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case LGLSXP: // same as INTSXP ... case INTSXP: BODY(int, INTEGER, int, val, td[i]=cval) break; case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) break; - default: error("type '%s' cannot be coerced to 'integer'", type2char(TYPEOF(source))); // test 2005.4 + default: COERCE_ERROR("integer"); // test 2005.4 } } break; case REALSXP : { - if (!Rinherits(target, char_integer64)) { - double *td = REAL(target) + off; + if (Rinherits(target, char_integer64)) { + int64_t *td = (int64_t *)REAL(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval) break; + case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval) break; case LGLSXP: // same as INTSXP - case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) break; + case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) break; case REALSXP: if (Rinherits(source, char_integer64)) - BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) + BODY(int64_t, REAL, int64_t, val, td[i]=cval) else - BODY(double, REAL, double, val, td[i]=cval) + BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) break; - default: error("Internal error"); + default: COERCE_ERROR("integer64"); } } else { - int64_t *td = (int64_t *)REAL(target) + off; + double *td = REAL(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval) break; + case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval) break; case LGLSXP: // same as INTSXP - case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) break; + case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) break; case REALSXP: if (Rinherits(source, char_integer64)) - BODY(int64_t, REAL, int64_t, val, td[i]=cval) + BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) else - BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) + BODY(double, REAL, double, val, td[i]=cval) break; - default: error("Internal error"); + default: COERCE_ERROR("double"); } } } break; @@ -1183,7 +974,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, Rcomplex *td = COMPLEX(target) + off; switch (TYPEOF(source)) { case CPLXSXP: BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) break; - default: error("Internal error"); + default: COERCE_ERROR("complex"); } } break; case STRSXP : @@ -1191,7 +982,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const SEXP *ld = STRING_PTR(PROTECT(getAttrib(source, R_LevelsSymbol))); protecti++; BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1], SET_STRING_ELT(target, off+i, cval)) } else { - if (!isString(source)) error("Internal error"); + if (!isString(source)) error("Internal error: type %s should have been coerced to character earlier", type2char(TYPEOF(source))); // # nocov BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) } break; @@ -1200,7 +991,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, else BODY(SEXP, VECTOR_PTR, SEXP, val, SET_VECTOR_ELT(target, off+i, cval)) break; default : - error("Unsupported type in assign.c:memrecycle '%s' (where)", type2char(TYPEOF(target))); // # nocov + error("Unsupported column type in assign.c:memrecycle '%s'", type2char(TYPEOF(target))); // # nocov } UNPROTECT(protecti); return memrecycle_message[0] ? memrecycle_message : NULL; @@ -1227,7 +1018,7 @@ void writeNA(SEXP v, const int from, const int n) case REALSXP : { if (Rinherits(v, char_integer64)) { // Rinherits covers nanotime too which inherits from integer64 via S4 extends int64_t *vd = (int64_t *)REAL(v); - for (int i=from; i<=to; ++i) vd[i] = INT64_MIN; + for (int i=from; i<=to; ++i) vd[i] = NA_INTEGER64; } else { double *vd = REAL(v); for (int i=from; i<=to; ++i) vd[i] = NA_REAL; diff --git a/src/utils.c b/src/utils.c index d53f848f3a..836637b82c 100644 --- a/src/utils.c +++ b/src/utils.c @@ -34,21 +34,26 @@ SEXP isReallyReal(SEXP x) { bool allNA(SEXP x) { const int n = length(x); - switch(TYPEOF(x)) { + switch (TYPEOF(x)) { case LGLSXP: case INTSXP: { const int *xd = INTEGER(x); - for (int i=0; i Date: Thu, 26 Sep 2019 00:53:39 -0700 Subject: [PATCH 17/43] extra test; passes all revdeps --- inst/tests/tests.Rraw | 2 ++ src/assign.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index cad0ba55d2..8ccb797972 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16172,6 +16172,8 @@ test(2115.4, set(DT,4L,2L,3.14), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, TRU warning="Non-zero 3.14.* taken as TRUE.*Please use.*") DT = data.table(code=c("c","b","c","a"), val=10:13) test(2115.5, DT[code=="c", val := val+1], data.table(code=c("c","b","c","a"), val=INT(11,11,13,13))) +DT = data.table(x=factor(LETTERS[1:3]), y=1:6) +test(2115.6, copy(DT)[4:6, x:=LETTERS[1:3]], DT) # identical(RHS, levels) ################################### diff --git a/src/assign.c b/src/assign.c index 22d552c5bf..75fe353fbd 100644 --- a/src/assign.c +++ b/src/assign.c @@ -720,7 +720,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, SEXP targetLevels = PROTECT(getAttrib(target, R_LevelsSymbol)); protecti++; SEXP sourceLevels = source; // character source if (sourceIsFactor) { sourceLevels=PROTECT(getAttrib(source, R_LevelsSymbol)); protecti++; } - if (!R_compute_identical(sourceLevels, targetLevels, 0)) { + if (!sourceIsFactor || !R_compute_identical(sourceLevels, targetLevels, 0)) { // !sourceIsFactor for test 2115.6 const int nTargetLevels=length(targetLevels), nSourceLevels=length(sourceLevels); const SEXP *targetLevelsD=STRING_PTR(targetLevels), *sourceLevelsD=STRING_PTR(sourceLevels); SEXP newSource = PROTECT(allocVector(INTSXP, length(source))); protecti++; From 6a2519d38fd926bd09d1afaf1736691d125592c0 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 26 Sep 2019 01:24:02 -0700 Subject: [PATCH 18/43] news item tweaked --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 03dc60a233..39273bad0d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -224,7 +224,7 @@ # [1] "A" "b" "c" ``` -29. `DT[, integerColumn:=0]` and `set(DT,i,j,0)` no longer warns about the `0` being the wrong type (`numeric` instead of `integer`). The long warning was focussed on efficiency to help the user avoid the necessary coercion from `0` to `0L`. Although the time and space for this coercion in a single call is unmeasurably small, when placed in a loop the small overhead of any allocation on R's heap could start to become noticeable (more so for `set()` whose purpose it be low-overhead for looping). The coercion warning and its advice could be too much for new users ("what and why L?"), but advanced users appreciate the warning. Further, when assigning a value across many columns, it could be inconvenient to supply the correct type (where the columns vary in type) and frustating if a simple `0` would be clear. To balance these requirements from different types of users, the coercion is now avoided by doing the coercion ourselves in internal code without allocating an object on R's heap: an internal optimization that we will now refer to as a "punned-coercion". As there is no time or space penalty for a punned-coerce, advanced users don't need to worry about it either. Where a punned-coerce discards fractional data (e.g. 3.14 assigned to an integer column) there will still be a warning. Punned-coerce applies to length>1 vectors as well as length-1 vectors including recycling, in all cases of `:=` and `set()`. +29. `:=` and `set()` now use zero-copy type coercion. Accordingly, `DT[..., integerColumn:=0]` and `set(DT,i,j,0)` no longer warn about the `0` ('numeric') needing to be `0L` ('integer') because there is no longer any time or space used for this coercion. The old long warning was off-putting to new users ("what and why L?"), whereas advanced users appreciated the old warning so they could avoid the coercion. Although the time and space for one coercion in a single call is unmeasurably small, when placed in a loop the small overhead of any allocation on R's heap could start to become noticeable (more so for `set()` whose purpose is low-overhead looping). Further, when assigning a value across columns of varying types, it could be inconvenient to supply the correct type for every column. Hence, zero-copy coercion was introduced to satisfy all these requirements. A warning is still issued, as before, when fractional data is discarded; e.g. when 3.14 is assigned to an integer column. Zero-copy coercion applies to length>1 vectors as well as length-1 vectors. ## BUG FIXES From f5779c0f878f3ab03de01cdd8da15e8fed1b8499 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 26 Sep 2019 02:18:21 -0700 Subject: [PATCH 19/43] allNA coverage via := --- inst/tests/tests.Rraw | 11 +++++++++-- src/assign.c | 6 ++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8ccb797972..3915ee4238 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1213,7 +1213,7 @@ test(422, length(DT)==4) test(423, truelength(DT), 1028L) # Test crash bug fixed, #1656, introduced with the 1.7.0 feature -DT <- data.table(a = factor(c("A", "Z")), b = 1:4) +DT = data.table(a = factor(c("A", "Z")), b = 1:4) DT[1,1] <- "Z" test(424, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) test(425, DT[1,1] <- 1, error="Cannot assign 'double' to 'factor'") @@ -1223,7 +1223,14 @@ DT[1,a:="A"] test(428, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) test(429, DT[1,a:=2L], error="Cannot assign 'integer' to 'factor'. Factor columns can only be assigned factor or character values, or NA in any type") test(430, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) -test(431, DT[1,1:=NA], data.table(a=factor(c(NA,"Z","A","Z")),b=1:4)) +DT = data.table(a=factor(c("A","B","A","C","B")), b=1:5) +test(431.1, DT[1,1:=NA], data.table(a=factor(c(NA,"B","A","C","B")), b=1:5)) +test(431.2, DT[2,1:=NA_integer_], data.table(a=factor(c(NA,NA,"A","C","B"), levels=LETTERS[1:3]), b=1:5)) +test(431.3, DT[3,1:=NA_real_], data.table(a=factor(c(NA,NA,NA,"C","B"), levels=LETTERS[1:3]), b=1:5)) +test(431.4, DT[4,1:=NA_character_], data.table(a=factor(c(NA,NA,NA,NA,"B"), levels=LETTERS[1:3]), b=1:5)) +if (test_bit64) { + test(431.5, DT[5,1:=as.integer64(NA)], data.table(a=factor(c(NA,NA,NA,NA,NA), levels=LETTERS[1:3]), b=1:5)) +} old = getOption("datatable.alloccol") # Test that unsetting datatable.alloccol is caught, #2014 options(datatable.alloccol=NULL) # In this =NULL case, options() in R 3.0.0 returned TRUE rather than the old value. This R bug was fixed in R 3.1.1. diff --git a/src/assign.c b/src/assign.c index 75fe353fbd..595bc20f8f 100644 --- a/src/assign.c +++ b/src/assign.c @@ -712,9 +712,11 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } else if (!sourceIsFactor && !isString(source)) { // target is factor // TODO allow integer in the range [NA,1,nlevel] again, with error outside range - if (!allNA(source)) + if (!allNA(source)) { error("Cannot assign '%s' to 'factor'. Factor columns can only be assigned factor or character values, or NA in any type.", type2char(TYPEOF(source))); - // else let the all-NA (most likely just one NA) fall through to regular assign below without any coerce + } else { + source = ScalarLogical(NA_LOGICAL); // a global constant in R and won't allocate; fall through to regular zero-copy coerce of logical to integer + } } else { // either factor or character being assigned to factor column SEXP targetLevels = PROTECT(getAttrib(target, R_LevelsSymbol)); protecti++; From 328cefd83a5c4bac6a52acfd2520458b11940449 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 26 Sep 2019 03:02:03 -0700 Subject: [PATCH 20/43] allNA used in bmerge, and covered --- R/bmerge.R | 4 ++-- R/fread.R | 2 +- R/test.data.table.R | 2 +- R/utils.R | 1 + inst/tests/tests.Rraw | 14 ++++++++++++++ src/init.c | 2 ++ src/utils.c | 6 ++++++ 7 files changed, 27 insertions(+), 4 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index abb7aec2c8..3d6ab028f3 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -75,12 +75,12 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos if (xclass=="character" || iclass=="character" || xclass=="logical" || iclass=="logical" || xclass=="factor" || iclass=="factor") { - if (anyNA(i[[ic]]) && all(is.na(i[[ic]]))) { # TODO: allNA function in C + if (anyNA(i[[ic]]) && allNA(i[[ic]])) { if (verbose) cat("Coercing all-NA i.",names(i)[ic]," (",iclass,") to type ",xclass," to match type of x.",names(x)[xc],".\n",sep="") set(i, j=ic, value=match.fun(paste0("as.", xclass))(i[[ic]])) next } - else if (anyNA(x[[xc]]) && all(is.na(x[[xc]]))) { + else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { if (verbose) cat("Coercing all-NA x.",names(x)[xc]," (",xclass,") to type ",iclass," to match type of i.",names(i)[ic],".\n",sep="") set(x, j=xc, value=match.fun(paste0("as.", iclass))(x[[xc]])) next diff --git a/R/fread.R b/R/fread.R index 4bc4c4dc00..d7ea4de61b 100644 --- a/R/fread.R +++ b/R/fread.R @@ -118,7 +118,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir()) } if (!missing(autostart)) warning("'autostart' is now deprecated and ignored. Consider skip='string' or skip=n"); if (is.logical(colClasses)) { - if (!all(is.na(colClasses))) stop("colClasses is type 'logical' which is ok if all NA but it has some TRUE or FALSE values in it which is not allowed. Please consider the drop= or select= argument instead. See ?fread.") + if (!allNA(colClasses)) stop("colClasses is type 'logical' which is ok if all NA but it has some TRUE or FALSE values in it which is not allowed. Please consider the drop= or select= argument instead. See ?fread.") colClasses = NULL } if (!is.null(colClasses) && is.atomic(colClasses)) { diff --git a/R/test.data.table.R b/R/test.data.table.R index 322cd1e613..59c622c433 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -157,7 +157,7 @@ test.data.table = function(verbose=FALSE, pkg="pkg", silent=FALSE, with.other.pa # inittime=PS_rss=GC_used=GC_max_used=NULL # m = fread("memtest.csv")[inittime==.inittime] # if (nrow(m)) { - # ps_na = all(is.na(m[["PS_rss"]])) # OS with no 'ps -o rss R' support + # ps_na = allNA(m[["PS_rss"]]) # OS with no 'ps -o rss R' support # grDevices::png("memtest.png") # p = graphics::par(mfrow=c(if (ps_na) 2 else 3, 2)) # if (!ps_na) { diff --git a/R/utils.R b/R/utils.R index 0426a06654..de35e709c0 100644 --- a/R/utils.R +++ b/R/utils.R @@ -12,6 +12,7 @@ if (base::getRversion() < "3.5.0") { } isTRUEorNA = function(x) is.logical(x) && length(x)==1L && (is.na(x) || x) isTRUEorFALSE = function(x) is.logical(x) && length(x)==1L && !is.na(x) +allNA = function(x) .Call(C_allNAR, x) if (base::getRversion() < "3.2.0") { # Apr 2015 isNamespaceLoaded = function(x) x %chin% loadedNamespaces() diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3915ee4238..b58c90681c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17,6 +17,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { # other than here inside this branch. all.equal.data.table = data.table:::all.equal.data.table + allNA = data.table:::allNA any_na = data.table:::any_na as.data.table.array = data.table:::as.data.table.array as.IDate.default = data.table:::as.IDate.default @@ -16182,6 +16183,19 @@ test(2115.5, DT[code=="c", val := val+1], data.table(code=c("c","b","c","a"), va DT = data.table(x=factor(LETTERS[1:3]), y=1:6) test(2115.6, copy(DT)[4:6, x:=LETTERS[1:3]], DT) # identical(RHS, levels) +# allNA(); an aside in PR#3909 +test(2116.1, allNA(c("",NA)), FALSE) +test(2116.2, allNA(NA_character_), TRUE) +test(2116.3, allNA(as.character(c(NA,NA))), TRUE) +test(2116.4, allNA(c(Inf,NA)), FALSE) +test(2116.5, allNA(as.double(c(NA,NA))), TRUE) +if (test_bit64) { + test(2116.6, allNA(as.integer64(c(NA,0))), FALSE) + test(2116.7, allNA(as.integer64(c(NA,NA))), TRUE) +} +test(2116.8, allNA(as.raw(c(0,0))), FALSE) +test(2116.9, allNA(as.raw(c(0,255))), FALSE) + ################################### # Add new tests above this line # diff --git a/src/init.c b/src/init.c index 1147bff23a..4189db28d0 100644 --- a/src/init.c +++ b/src/init.c @@ -84,6 +84,7 @@ SEXP cj(); SEXP lock(); SEXP unlock(); SEXP islockedR(); +SEXP allNAR(); // .Externals SEXP fastmean(); @@ -174,6 +175,7 @@ R_CallMethodDef callMethods[] = { {"C_islocked", (DL_FUNC) &islockedR, -1}, {"CfrollapplyR", (DL_FUNC) &frollapplyR, -1}, {"CtestMsgR", (DL_FUNC) &testMsgR, -1}, +{"C_allNAR", (DL_FUNC) &allNAR, -1}, {NULL, NULL, 0} }; diff --git a/src/utils.c b/src/utils.c index 836637b82c..2a174489ae 100644 --- a/src/utils.c +++ b/src/utils.c @@ -33,6 +33,8 @@ SEXP isReallyReal(SEXP x) { } bool allNA(SEXP x) { + // less space and time than any(is.na(x)) at R level because that creates full size is.na(x) first before any() + // whereas this allNA can often return early on testing the first value without reading the rest const int n = length(x); switch (TYPEOF(x)) { case LGLSXP: @@ -58,6 +60,10 @@ bool allNA(SEXP x) { return false; } +SEXP allNAR(SEXP x) { + return ScalarLogical(allNA(x)); +} + /* colnamesInt * for provided data.table (or a list-like) and a subset of its columns, it returns integer positions of those columns in DT * handle columns input as: integer, double, character and NULL (handled as seq_along(x)) From 94ffa0e4aa4d0294352419129d82a2b566a9d4dd Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 26 Sep 2019 03:35:23 -0700 Subject: [PATCH 21/43] allNA empty tests --- inst/tests/tests.Rraw | 23 ++++++++++++++--------- src/utils.c | 1 + 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b58c90681c..c9248ca760 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16184,17 +16184,22 @@ DT = data.table(x=factor(LETTERS[1:3]), y=1:6) test(2115.6, copy(DT)[4:6, x:=LETTERS[1:3]], DT) # identical(RHS, levels) # allNA(); an aside in PR#3909 -test(2116.1, allNA(c("",NA)), FALSE) -test(2116.2, allNA(NA_character_), TRUE) -test(2116.3, allNA(as.character(c(NA,NA))), TRUE) -test(2116.4, allNA(c(Inf,NA)), FALSE) -test(2116.5, allNA(as.double(c(NA,NA))), TRUE) +test(2116.01, !allNA(c("",NA))) +test(2116.02, allNA(NA_character_)) +test(2116.03, allNA(as.character(c(NA,NA)))) +test(2116.04, allNA(character())) # same as all(is.na(character())) +test(2116.05, !allNA(c(Inf,NA))) +test(2116.06, allNA(as.double(c(NA,NA)))) +test(2116.07, allNA(double())) # same as all(is.na(double())) if (test_bit64) { - test(2116.6, allNA(as.integer64(c(NA,0))), FALSE) - test(2116.7, allNA(as.integer64(c(NA,NA))), TRUE) + test(2116.08, !allNA(as.integer64(c(NA,0)))) + test(2116.09, allNA(as.integer64(c(NA,NA)))) + test(2116.10, allNA(integer64())) # same as all(is.na(integer64())) } -test(2116.8, allNA(as.raw(c(0,0))), FALSE) -test(2116.9, allNA(as.raw(c(0,255))), FALSE) +test(2116.11, !allNA(as.raw(c(0,0)))) +test(2116.12, !allNA(as.raw(c(0,255)))) +test(2116.13, allNA(raw())) # same as all(is.na(raw())) +test(2116.14, allNA(NULL)) # same as all(is.na(NULL)) ################################### diff --git a/src/utils.c b/src/utils.c index 2a174489ae..f6e7421672 100644 --- a/src/utils.c +++ b/src/utils.c @@ -36,6 +36,7 @@ bool allNA(SEXP x) { // less space and time than any(is.na(x)) at R level because that creates full size is.na(x) first before any() // whereas this allNA can often return early on testing the first value without reading the rest const int n = length(x); + if (n==0) return true; // for empty raw, and NULL, to match R's all(is.na(raw())) true result, test 2116.13-14 switch (TYPEOF(x)) { case LGLSXP: case INTSXP: { From 2374b324032e5d345e586961aa932a18422b7163 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 26 Sep 2019 04:35:32 -0700 Subject: [PATCH 22/43] coverage --- inst/tests/tests.Rraw | 29 ++++++++++------ src/assign.c | 79 +++++++++++++++++++++++-------------------- 2 files changed, 61 insertions(+), 47 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c9248ca760..0e029aa5f5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1222,7 +1222,8 @@ test(426, DT[1,1] <- 2L, error="Cannot assign 'integer' to 'factor'") test(427, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) DT[1,a:="A"] test(428, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) -test(429, DT[1,a:=2L], error="Cannot assign 'integer' to 'factor'. Factor columns can only be assigned factor or character values, or NA in any type") +test(429.1, DT[1,a:=2L], error="Cannot assign 'integer' to 'factor'. Factor columns can only be assigned factor or character values, or NA in any type") +test(429.2, DT[2,b:=factor("A")], error="Cannot assign 'factor' to 'integer'. Factors can only be assigned to factor, character or list columns.") test(430, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) DT = data.table(a=factor(c("A","B","A","C","B")), b=1:5) test(431.1, DT[1,1:=NA], data.table(a=factor(c(NA,"B","A","C","B")), b=1:5)) @@ -14117,16 +14118,24 @@ test(2004.6, chmatch(c("a","b"), c("b",x2)), c(NA,1L)) # the second fallb test(2004.7, c("a","b") %in% c("b",x1,x2), c(FALSE, TRUE)) # the second fallback might be redundnant though; see comments in chmatch.c # more coverage ... -test(2005.1, truelength(NULL), 0L) -DT = data.table(a=1:3, b=4:6) -test(2005.2, set(DT, 4L, "b", NA), error="i[1] is 4 which is out of range [1,nrow=3]") -test(2005.3, set(DT, 3L, 8i, NA), error="j is type 'complex'. Must be integer, character, or numeric is coerced with warning.") -test(2005.4, set(DT, 1L, 2L, expression(x+2)), error="type 'expression' cannot be coerced to 'integer'") # similar to R's error for as.integer(expression(x+2)) +test(2005.01, truelength(NULL), 0L) +DT = data.table(a=1:3, b=4:6, c=as.raw(7:9), d=as.logical(c(1,0,1)), e=pi*1:3, f=as.complex(10:12)) +test(2005.02, set(DT, 4L, "b", NA), error="i[1] is 4 which is out of range [1,nrow=3]") +test(2005.03, set(DT, 3L, 8i, NA), error="j is type 'complex'. Must be integer, character, or numeric is coerced with warning.") +test(2005.04, set(DT, 1L, 2L, expression(x+2)), error="type 'expression' cannot be coerced to 'integer'") # similar to R's error for as.integer(expression(x+2)) DT[,foo:=factor(c("a","b","c"))] -test(2005.5, DT[2, foo:=8i], error="Can't assign to column 'foo' (type 'factor') a value of type 'complex' (not character, factor, integer or numeric)") -test(2005.6, DT[2, a:=9, verbose=TRUE], notOutput="Coerced") -test(2005.7, DT[2, a:=NA, verbose=TRUE], notOutput="Coerced") -test(2005.8, DT[2, a:=9.9]$a, INT(1,9,3), warning="Coerced double RHS to integer.*One or more RHS values contain fractions which have been lost.*9.9.*has been truncated to 9") +test(2005.05, DT[2, foo:=8i], error="Can't assign to column 'foo' (type 'factor') a value of type 'complex' (not character, factor, integer or numeric)") +test(2005.06, DT[2, a:=9, verbose=TRUE], notOutput="Coerced") +test(2005.07, DT[2, a:=NA, verbose=TRUE], notOutput="Coerced") +test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="Coerced double RHS to integer.*One or more RHS values contain fractions which have been lost.*9.9.*has been truncated to 9") +test(2005.09, set(DT, 1L, "c", expression(x+2)), error="type 'expression' cannot be coerced to 'raw'") +test(2005.10, set(DT, 1L, "d", expression(x+2)), error="type 'expression' cannot be coerced to 'logical'") +test(2005.11, set(DT, 1L, "e", expression(x+2)), error="type 'expression' cannot be coerced to 'double'") +test(2005.12, set(DT, 1L, "f", expression(x+2)), error="type 'expression' cannot be coerced to 'complex'") +if (test_bit64) { + DT[,g:=as.integer64(c(-9,0,NA))] + test(2005.13, set(DT, 1L, "g", expression(x+2)), error="type 'expression' cannot be coerced to 'integer64'") +} # rbindlist raw type, #2819 test(2006.1, rbindlist(list(data.table(x = as.raw(1), y=as.raw(3)), data.table(x = as.raw(2))), fill=TRUE), data.table(x=as.raw(1:2), y=as.raw(c(3,0)))) diff --git a/src/assign.c b/src/assign.c index 595bc20f8f..a9a07c6cff 100644 --- a/src/assign.c +++ b/src/assign.c @@ -846,10 +846,10 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } break; } - if (isString(target) || isString(source) || isComplex(target) || isComplex(source) || isNewList(source)) { + if (isString(target) || isString(source) || isNewList(source)) { // TODO if (allNA(source)) { // // e.g. to save coercing NA to NA_character_; if types match then leave it to the regular assign and the call to allNA is saved - // point to fixed NA of the target type + // point to fixed ScalarLogical(NA_LOGICAL) and fall through to standard cases below //} else { SEXP tt = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; if (!isString(target) && !isNewList(target) && !allNA(source)) { @@ -862,42 +862,42 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } #undef BODY -#define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) \ +#define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) \ { const STYPE *sd = (const STYPE *)RFUN(source); \ - if (length(where)) { \ - const int *wd = INTEGER(where)+start; \ - if (slen==1) { \ - const STYPE val = sd[0]; \ - const CTYPE cval = CAST; \ - for (int wi=0; wi Date: Thu, 26 Sep 2019 05:10:37 -0700 Subject: [PATCH 23/43] tidy --- src/assign.c | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/src/assign.c b/src/assign.c index a9a07c6cff..2baa615c2e 100644 --- a/src/assign.c +++ b/src/assign.c @@ -862,8 +862,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } #undef BODY -#define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) \ - { const STYPE *sd = (const STYPE *)RFUN(source); \ +#define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) {{ \ + const STYPE *sd = (const STYPE *)RFUN(source); \ if (length(where)) { \ const int *wd = INTEGER(where)+start; \ if (slen==1) { \ @@ -889,8 +889,9 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, if (slen==1) { \ const STYPE val = sd[0]; \ const CTYPE cval = CAST; \ - for (int i=0; i255 || val<0) ? 0 : val, td[i]=cval) break; - case REALSXP: BODY(double, REAL, Rbyte, (ISNAN(val)||val>256||val<0) ? 0 : val, td[i]=cval) break; + case RAWSXP: BODY(Rbyte, RAW, Rbyte, val, td[i]=cval) + case LGLSXP: BODY(int, LOGICAL, Rbyte, val==1, td[i]=cval) + case INTSXP: BODY(int, INTEGER, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval) + case REALSXP: BODY(double, REAL, Rbyte, (ISNAN(val)||val>256||val<0) ? 0 : val, td[i]=cval) default: COERCE_ERROR("raw"); } } break; case LGLSXP: { int *td = LOGICAL(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, int, val!=0, td[i]=cval) break; - case LGLSXP: BODY(int, LOGICAL, int, val, td[i]=cval) break; - case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval) break; - case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) break; + case RAWSXP: BODY(Rbyte, RAW, int, val!=0, td[i]=cval) + case LGLSXP: BODY(int, LOGICAL, int, val, td[i]=cval) + case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval) + case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) default: COERCE_ERROR("logical"); } } break; case INTSXP : { int *td = INTEGER(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, int, (int)val, td[i]=cval) break; + case RAWSXP: BODY(Rbyte, RAW, int, (int)val, td[i]=cval) case LGLSXP: // same as INTSXP ... - case INTSXP: BODY(int, INTEGER, int, val, td[i]=cval) break; - case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) break; + case INTSXP: BODY(int, INTEGER, int, val, td[i]=cval) + case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) default: COERCE_ERROR("integer"); // test 2005.4 } } break; @@ -945,29 +947,27 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, if (Rinherits(target, char_integer64)) { int64_t *td = (int64_t *)REAL(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval) break; + case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval) case LGLSXP: // same as INTSXP - case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) break; + case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) case REALSXP: if (Rinherits(source, char_integer64)) BODY(int64_t, REAL, int64_t, val, td[i]=cval) else BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) - break; default: COERCE_ERROR("integer64"); } } else { double *td = REAL(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval) break; + case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval) case LGLSXP: // same as INTSXP - case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) break; + case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) case REALSXP: if (Rinherits(source, char_integer64)) BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) else BODY(double, REAL, double, val, td[i]=cval) - break; default: COERCE_ERROR("double"); } } @@ -978,9 +978,9 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, switch (TYPEOF(source)) { //TODO case RAWSXP case LGLSXP: // same as INTSXP - case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? (im=NA_REAL,NA_REAL) : (im=0.0,val), {td[i].r=cval; td[i].i=im;} ) break; + case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) //TODO case REALSXP: - case CPLXSXP: BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) break; + case CPLXSXP: BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) default: COERCE_ERROR("complex"); } } break; @@ -992,11 +992,9 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, if (!isString(source)) error("Internal error: type %s should have been coerced to character earlier", type2char(TYPEOF(source))); // # nocov BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) } - break; case VECSXP : if (TYPEOF(source)!=VECSXP) BODY(SEXP, &, SEXP, val, SET_VECTOR_ELT(target, off+i, cval)) else BODY(SEXP, VECTOR_PTR, SEXP, val, SET_VECTOR_ELT(target, off+i, cval)) - break; default : error("Unsupported column type in assign.c:memrecycle '%s'", type2char(TYPEOF(target))); // # nocov } From 510da291f53b06dbb9426922957aceac7a591cf3 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 26 Sep 2019 18:26:00 +0000 Subject: [PATCH 24/43] don't create NA level when assigning NA_character_ to factor --- inst/tests/tests.Rraw | 5 +++++ src/assign.c | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0e029aa5f5..4d0d4048ca 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16210,6 +16210,11 @@ test(2116.12, !allNA(as.raw(c(0,255)))) test(2116.13, allNA(raw())) # same as all(is.na(raw())) test(2116.14, allNA(NULL)) # same as all(is.na(NULL)) +# don't create NA factor level when assigning to factor from character; bug in v1.12.2 noticed in dev part of PR#3909 +DT = data.table(a=factor(LETTERS[1:3])) +test(2117.1, levels(DT[2:3,a:=c("",NA_character_)]$a), c("A","B","C","")) +test(2117.2, DT[1,a:=NA_character_]$a, factor(c(NA,"",NA), levels=c("A","B","C",""))) + ################################### # Add new tests above this line # diff --git a/src/assign.c b/src/assign.c index 2baa615c2e..a6bf86ac12 100644 --- a/src/assign.c +++ b/src/assign.c @@ -746,6 +746,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const SEXP s = sourceLevelsD[k]; const int tl = TRUELENGTH(s); if (tl>=0) { + if (!sourceIsFactor && s==NA_STRING) continue; // don't create NA factor level when assigning character to factor; test 2117 if (tl>0) savetl(s); SET_TRUELENGTH(s, -nTargetLevels-(++nAdd)); } // else, when sourceIsString, it's normal for there to be duplicates here @@ -756,7 +757,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const int *sourceD = INTEGER(source); for (int i=0; i Date: Thu, 26 Sep 2019 19:45:01 +0000 Subject: [PATCH 25/43] rbindlist malformed NA factor level, #3915 --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 10 +++++++++- src/rbindlist.c | 29 +++++++++++++++++++++++------ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 39273bad0d..769ad5fdcd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -329,6 +329,8 @@ 44. Grouping could create a `malformed factor` and/or segfault when the factors returned by each group did not have identical levels, [#2199](https://github.com/Rdatatable/data.table/issues/2199) and [#2522](https://github.com/Rdatatable/data.table/issues/2522). Thanks to Václav Hausenblas, @franknarf1, @ben519, and @Henrik-P for reporting. +45. `rbindlist` (and printing a `data.table` with over 100 rows because that uses `rbindlist(head, tail)`) could error with `malformed factor` for unordered factor columns containing a used `NA_character_` level, [#3915](https://github.com/Rdatatable/data.table/issues/3915). This is an unusual input for unordered factors because NA_integer_ is recommended by default in R. Thanks to @sindribaldur for reporting. + ## NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4d0d4048ca..898e66dc56 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14256,7 +14256,15 @@ test(2014.4, class(fread(test_data_mult, keepLeadingZeros = TRUE)[[1]]), "charac test(2014.5, class(fread(test_data_mult, keepLeadingZeros = FALSE)[[1]]), "integer") # rbindlist should drop NA from levels of source factors, relied on by package emil -test(2015, levels(rbindlist( list( data.frame(a=factor("a",levels=c("a",NA),exclude=NULL)) ))$a), "a") # the NA level should not be retained +test(2015.1, levels(rbindlist( list( data.frame(a=factor("a",levels=c("a",NA),exclude=NULL)) ))$a), "a") # the NA level (unused in this case) should not be retained +# follow-up from #3915; since this was malformed factor (so not relied on); lets just drop the used NA level too in v1.12.4 for these regular (not ordered) factors +DT = data.table(V1 = factor(as.character(c(NA, 1:3, NA)), exclude = NULL)) +test(2015.2, list(levels(DT$V1), as.integer(DT$V1)), list(as.character(c(1:3,NA)), INT(4,1,2,3,4))) # the 4's are now moved to NA_integer_ by rbindlist +test(2015.3, unclass(rbindlist(list(DT), use.names=FALSE)$V1), setattr(INT(NA,1,2,3,NA), "levels", as.character(1:3))) +DT = data.table(V1 = factor(as.character(c(NA, 1:100, NA)), exclude = NULL)) +test(2015.4, print(DT), output="V1.*1:[ ]+.*2:[ ]+1.*101:[ ]+100.*102:[ ]+") +DT = data.table(V1 = factor(as.character(c(NA, 1:3, NA)), exclude = NULL)) +test(2015.5, print(DT), output="V1.*1:[ ]+.*2:[ ]+1.*4:[ ]+3.*5:[ ]+") # better save->load->set() message, #2996 DT = data.table(a=1:3) diff --git a/src/rbindlist.c b/src/rbindlist.c index e4752edded..78efc29fc2 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -442,19 +442,36 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) const int *id = INTEGER(thisCol); if (length(thisCol)<=1) { // recycle length-1, or NA-fill length-0 - const int val = (length(thisCol)==1 && id[0]!=NA_INTEGER) ? -TRUELENGTH(thisColStrD[id[0]-1]) : NA_INTEGER; + SEXP lev; + const int val = (length(thisCol)==1 && id[0]!=NA_INTEGER && (lev=thisColStrD[id[0]-1])!=NA_STRING) ? -TRUELENGTH(lev) : NA_INTEGER; + // ^^ #3915 and tests 2015.2-5 for (int r=0; r Date: Thu, 26 Sep 2019 20:52:15 +0000 Subject: [PATCH 26/43] coverage --- inst/tests/tests.Rraw | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 898e66dc56..5c523eefb1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14135,7 +14135,17 @@ test(2005.12, set(DT, 1L, "f", expression(x+2)), error="type 'expression' cannot if (test_bit64) { DT[,g:=as.integer64(c(-9,0,NA))] test(2005.13, set(DT, 1L, "g", expression(x+2)), error="type 'expression' cannot be coerced to 'integer64'") + test(2005.14, DT[1:2,e:=as.integer64(c(NA,-200))]$e, c(NA_real_, -200, pi*3)) } +test(2005.15, DT[2:3,c:=c(TRUE,FALSE)]$c, as.raw(INT(7,1,0))) +test(2005.16, set(DT,1L,"c",NA)$c, as.raw(INT(0,1,0))) +test(2005.17, set(DT,1:2,"c",INT(-1,255))$c, as.raw(INT(0,255,0))) +test(2005.18, DT[2:3,c:=INT(NA,256)]$c, as.raw(INT(0,0,0))) +test(2005.19, set(DT,2:3,"c",c(NA,3.14))$c, as.raw(INT(0,0,3))) +test(2005.20, DT[1:2,c:=c(Inf,0.78)]$c, as.raw(INT(0,0,3))) +test(2005.21, DT[1:2,d:=as.raw(c(0,255))]$d, c(FALSE,TRUE,TRUE)) +test(2005.22, DT[2:3,b:=as.raw(c(0,255))]$b, INT(4,0,255)) +test(2005.23, DT[1:2,e:=as.raw(c(0,255))]$e, c(0,255,pi*3)) # rbindlist raw type, #2819 test(2006.1, rbindlist(list(data.table(x = as.raw(1), y=as.raw(3)), data.table(x = as.raw(2))), fill=TRUE), data.table(x=as.raw(1:2), y=as.raw(c(3,0)))) From 4ec649d29985679ec688be07e5fc0a3278062f94 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 26 Sep 2019 22:33:22 +0000 Subject: [PATCH 27/43] news item extra nx= fixed --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 769ad5fdcd..e266e1d210 100644 --- a/NEWS.md +++ b/NEWS.md @@ -193,7 +193,7 @@ set.seed(108) x = rnorm(1e6); n = 1e3 rollfun = function(x, n, FUN) { - nx = nx = length(x) + nx = length(x) ans = rep(NA_real_, nx) for (i in n:nx) ans[i] = FUN(x[(i-n+1):i]) ans From c0bad1a23d139853383b0b89950f9b307f3b8a3c Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 26 Sep 2019 22:45:34 +0000 Subject: [PATCH 28/43] snprintf --- src/assign.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/assign.c b/src/assign.c index a6bf86ac12..ad489a3479 100644 --- a/src/assign.c +++ b/src/assign.c @@ -668,7 +668,8 @@ static bool anyNamed(SEXP x) { return false; } -static char memrecycle_message[1000]; +#define MSGSIZE 1000 +static char memrecycle_message[MSGSIZE+1]; const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, int colnum, const char *colname) // like memcpy but recycles single-item source @@ -842,7 +843,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, if (targetIsI64 && isReal(source) && !sourceIsI64) { int firstReal=0; if ((firstReal=INTEGER(isReallyReal(source))[0])) { - sprintf(memrecycle_message, "coerced to integer64 but contains a non-integer value (%f at position %d); precision lost.", REAL(source)[firstReal-1], firstReal); + snprintf(memrecycle_message, MSGSIZE, "coerced to integer64 but contains a non-integer value (%f at position %d); precision lost.", REAL(source)[firstReal-1], firstReal); } } break; From b83d133f6188fbd556ec73bf58fd6c81e6875cfe Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 26 Sep 2019 23:46:15 +0000 Subject: [PATCH 29/43] memcpy --- src/assign.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/assign.c b/src/assign.c index ad489a3479..8c27619413 100644 --- a/src/assign.c +++ b/src/assign.c @@ -906,19 +906,15 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, #define COERCE_ERROR(targetType) error("type '%s' cannot be coerced to '%s'", type2char(TYPEOF(source)), targetType); // 'targetType' for integer64 vs double - // !! TODO !! : reinstate the memcpy for when types match (including i64), are not STR or VEC, length(where)==0, and slen==len; e.g. mainly rbindlist - //memcpy(RAW(target)+start, RAW(source), slen*SIZEOF(target)); - //memcpy(LOGICAL(target)+start, LOGICAL(source), slen*SIZEOF(target)); - //memcpy(INTEGER(target)+start, INTEGER(source), slen*SIZEOF(target)); // correct type ideal length>1 case - //memcpy(REAL(target)+start, REAL(source), slen*SIZEOF(target)); - //memcpy(COMPLEX(target)+start, COMPLEX(source), slen*SIZEOF(target)); - const int off = (length(where)?0:start); // off = target offset; e.g. called from rbindlist with where=R_NilValue and start!=0 + const bool mem = length(where)==0 && slen==len; // only used if types match too switch (TYPEOF(target)) { case RAWSXP: { Rbyte *td = RAW(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, Rbyte, val, td[i]=cval) + case RAWSXP: if (mem) { + memcpy(td, RAW(source), slen*sizeof(Rbyte)); break; + } else BODY(Rbyte, RAW, Rbyte, val, td[i]=cval) case LGLSXP: BODY(int, LOGICAL, Rbyte, val==1, td[i]=cval) case INTSXP: BODY(int, INTEGER, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval) case REALSXP: BODY(double, REAL, Rbyte, (ISNAN(val)||val>256||val<0) ? 0 : val, td[i]=cval) @@ -929,7 +925,9 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, int *td = LOGICAL(target) + off; switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, int, val!=0, td[i]=cval) - case LGLSXP: BODY(int, LOGICAL, int, val, td[i]=cval) + case LGLSXP: if (mem) { + memcpy(td, LOGICAL(source), slen*sizeof(Rboolean)); break; + } else BODY(int, LOGICAL, int, val, td[i]=cval) case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval) case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) default: COERCE_ERROR("logical"); @@ -940,7 +938,9 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, int, (int)val, td[i]=cval) case LGLSXP: // same as INTSXP ... - case INTSXP: BODY(int, INTEGER, int, val, td[i]=cval) + case INTSXP: if (mem) { + memcpy(td, INTEGER(source), slen*sizeof(int)); break; + } else BODY(int, INTEGER, int, val, td[i]=cval) case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) default: COERCE_ERROR("integer"); // test 2005.4 } @@ -953,10 +953,10 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case LGLSXP: // same as INTSXP case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) case REALSXP: - if (Rinherits(source, char_integer64)) - BODY(int64_t, REAL, int64_t, val, td[i]=cval) - else - BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) + if (Rinherits(source, char_integer64)) { + if(mem) { memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break; } + else BODY(int64_t, REAL, int64_t, val, td[i]=cval) + } else BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) default: COERCE_ERROR("integer64"); } } else { @@ -966,10 +966,10 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case LGLSXP: // same as INTSXP case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) case REALSXP: - if (Rinherits(source, char_integer64)) - BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) - else - BODY(double, REAL, double, val, td[i]=cval) + if (!Rinherits(source, char_integer64)) { + if(mem) { memcpy(td, (double *)REAL(source), slen*sizeof(double)); break; } + else BODY(double, REAL, double, val, td[i]=cval) + } else BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) default: COERCE_ERROR("double"); } } @@ -982,7 +982,9 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case LGLSXP: // same as INTSXP case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) //TODO case REALSXP: - case CPLXSXP: BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) + case CPLXSXP: if (mem) { + memcpy(td, COMPLEX(source), slen*sizeof(Rcomplex)); break; + } else BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) default: COERCE_ERROR("complex"); } } break; From 2525f9b22dfcb627270516042110f9189bad0c59 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 27 Sep 2019 00:11:48 +0000 Subject: [PATCH 30/43] coverage --- inst/tests/tests.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5c523eefb1..9572ba94c3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14146,6 +14146,7 @@ test(2005.20, DT[1:2,c:=c(Inf,0.78)]$c, as.raw(INT(0,0,3))) test(2005.21, DT[1:2,d:=as.raw(c(0,255))]$d, c(FALSE,TRUE,TRUE)) test(2005.22, DT[2:3,b:=as.raw(c(0,255))]$b, INT(4,0,255)) test(2005.23, DT[1:2,e:=as.raw(c(0,255))]$e, c(0,255,pi*3)) +test(2005.24, DT[c(1,3,2), c:=as.raw(c(0,100,255))]$c, as.raw(c(0,255,100))) # rbindlist raw type, #2819 test(2006.1, rbindlist(list(data.table(x = as.raw(1), y=as.raw(3)), data.table(x = as.raw(2))), fill=TRUE), data.table(x=as.raw(1:2), y=as.raw(c(3,0)))) From a415095ca382a1b5520027739541ef19ef0dd1a3 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Fri, 27 Sep 2019 14:35:09 +0200 Subject: [PATCH 31/43] NEWS improvements --- NEWS.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index e266e1d210..1479bedd4d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -192,24 +192,24 @@ ```R set.seed(108) x = rnorm(1e6); n = 1e3 - rollfun = function(x, n, FUN) { + base_rollapply = function(x, n, FUN) { nx = length(x) ans = rep(NA_real_, nx) for (i in n:nx) ans[i] = FUN(x[(i-n+1):i]) ans } - system.time(rollfun(x, n, mean)) + system.time(base_rollapply(x, n, mean)) system.time(zoo::rollapplyr(x, n, function(x) mean(x), fill=NA)) system.time(zoo::rollmeanr(x, n, fill=NA)) system.time(frollapply(x, n, mean)) system.time(frollmean(x, n)) ### fun mean sum median - # base rollfun 8.815 5.151 60.175 + # base_rollapply 8.815 5.151 60.175 # zoo::rollapply 34.373 27.837 88.552 - # zoo::roll[fun] 0.215 0.185 NA + # zoo::roll[fun] 0.215 0.185 NA ## not fully supported # frollapply 5.404 1.419 56.475 - # froll[fun] 0.003 0.002 NA + # froll[fun] 0.003 0.002 NA ## not yet supported ``` 28. `setnames()` now accepts functions in `old=` and `new=`, [#3703](https://github.com/Rdatatable/data.table/issues/3703). Thanks @smingerson for the feature request and @shrektan for the PR. @@ -221,7 +221,7 @@ # [1] "A" "B" "C" setnames(DT, 2:3, tolower) names(DT) - # [1] "A" "b" "c" + # [1] "a" "b" "c" ``` 29. `:=` and `set()` now use zero-copy type coercion. Accordingly, `DT[..., integerColumn:=0]` and `set(DT,i,j,0)` no longer warn about the `0` ('numeric') needing to be `0L` ('integer') because there is no longer any time or space used for this coercion. The old long warning was off-putting to new users ("what and why L?"), whereas advanced users appreciated the old warning so they could avoid the coercion. Although the time and space for one coercion in a single call is unmeasurably small, when placed in a loop the small overhead of any allocation on R's heap could start to become noticeable (more so for `set()` whose purpose is low-overhead looping). Further, when assigning a value across columns of varying types, it could be inconvenient to supply the correct type for every column. Hence, zero-copy coercion was introduced to satisfy all these requirements. A warning is still issued, as before, when fractional data is discarded; e.g. when 3.14 is assigned to an integer column. Zero-copy coercion applies to length>1 vectors as well as length-1 vectors. From ba4b0cb56d5ae2ddb7ca02fd8ac75124daafab71 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Fri, 27 Sep 2019 14:39:24 +0200 Subject: [PATCH 32/43] more verbose --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1479bedd4d..30c83b2eb9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -207,9 +207,9 @@ ### fun mean sum median # base_rollapply 8.815 5.151 60.175 # zoo::rollapply 34.373 27.837 88.552 - # zoo::roll[fun] 0.215 0.185 NA ## not fully supported + # zoo::roll[fun] 0.215 0.185 NA ## median not fully supported # frollapply 5.404 1.419 56.475 - # froll[fun] 0.003 0.002 NA ## not yet supported + # froll[fun] 0.003 0.002 NA ## median not yet supported ``` 28. `setnames()` now accepts functions in `old=` and `new=`, [#3703](https://github.com/Rdatatable/data.table/issues/3703). Thanks @smingerson for the feature request and @shrektan for the PR. From c8ce5a70dd500f27c05612c363fc34fad0a8dd78 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Fri, 27 Sep 2019 15:09:21 +0200 Subject: [PATCH 33/43] allNA now does not return false by default --- src/utils.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/utils.c b/src/utils.c index f6e7421672..c3ef02e083 100644 --- a/src/utils.c +++ b/src/utils.c @@ -36,29 +36,50 @@ bool allNA(SEXP x) { // less space and time than any(is.na(x)) at R level because that creates full size is.na(x) first before any() // whereas this allNA can often return early on testing the first value without reading the rest const int n = length(x); - if (n==0) return true; // for empty raw, and NULL, to match R's all(is.na(raw())) true result, test 2116.13-14 + if (n==0) // for empty raw, and NULL, to match R's all(is.na(raw())) true result, test 2116.13-14 + return true; switch (TYPEOF(x)) { case LGLSXP: case INTSXP: { const int *xd = INTEGER(x); - for (int i=0; i Date: Fri, 27 Sep 2019 09:52:04 -0700 Subject: [PATCH 34/43] setnames news item --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 30c83b2eb9..b639b2aedf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -219,9 +219,9 @@ setnames(DT, toupper) names(DT) # [1] "A" "B" "C" - setnames(DT, 2:3, tolower) + setnames(DT, c(1,3), tolower) names(DT) - # [1] "a" "b" "c" + # [1] "a" "B" "c" ``` 29. `:=` and `set()` now use zero-copy type coercion. Accordingly, `DT[..., integerColumn:=0]` and `set(DT,i,j,0)` no longer warn about the `0` ('numeric') needing to be `0L` ('integer') because there is no longer any time or space used for this coercion. The old long warning was off-putting to new users ("what and why L?"), whereas advanced users appreciated the old warning so they could avoid the coercion. Although the time and space for one coercion in a single call is unmeasurably small, when placed in a loop the small overhead of any allocation on R's heap could start to become noticeable (more so for `set()` whose purpose is low-overhead looping). Further, when assigning a value across columns of varying types, it could be inconvenient to supply the correct type for every column. Hence, zero-copy coercion was introduced to satisfy all these requirements. A warning is still issued, as before, when fractional data is discarded; e.g. when 3.14 is assigned to an integer column. Zero-copy coercion applies to length>1 vectors as well as length-1 vectors. From 9f78001e4c3c9572827b36b9815bc8f28dfea242 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 27 Sep 2019 11:39:53 -0700 Subject: [PATCH 35/43] interim --- inst/tests/tests.Rraw | 12 +++++++++--- src/assign.c | 10 +++++++--- src/utils.c | 32 ++++++++++++-------------------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9572ba94c3..706a9dda63 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -4856,7 +4856,7 @@ test(1294.03, dt[, a := NA]$a, rep(NA_integer_, 3L)) test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L), warning=c("NAs introduced by coercion", "Coerced character RHS to integer.*column 1 named 'a'")) -test(1294.05, dt[, a := list(list(1))]$a, rep(1L, 3L), warning="Coerced list RHS to integer to match.*column 1 named 'a'") +test(1294.05, dt[, a := list(list(1))]$a, error="Cannot coerce 'list' RHS to 'integer' to match.*column 1 named 'a'") test(1294.06, dt[, a := list(1L)]$a, rep(1L, 3L)) test(1294.07, dt[, a := list(1)]$a, rep(1L, 3L)) test(1294.08, dt[, a := TRUE]$a, rep(1L, 3L)) @@ -4865,14 +4865,14 @@ test(1294.10, dt[, b := NA]$b, rep(NA_real_,3)) test(1294.11, dt[, b := "bla"]$b, rep(NA_real_, 3), warning=c("NAs introduced by coercion", "Coerced character RHS to double to match.*column 2 named 'b'")) -test(1294.12, dt[, b := list(list(1))]$b, rep(1,3), warning="Coerced list RHS to double") +test(1294.12, dt[, b := list(list(1))]$b, error="Cannot coerce 'list' RHS to 'double' to match.*column 2 named 'b'") test(1294.13, dt[, b := TRUE]$b, rep(1,3)) test(1294.14, dt[, b := list(1)]$b, rep(1,3)) test(1294.15, dt[, c := 1]$c, rep(TRUE, 3)) test(1294.16, dt[, c := 1L]$c, rep(TRUE, 3)) test(1294.17, dt[, c := NA]$c, rep(NA, 3)) test(1294.18, dt[, c := list(1)]$c, rep(TRUE, 3)) -test(1294.19, dt[, c := list(list(1))]$c, rep(TRUE, 3), warning="Coerced list RHS to logical") +test(1294.19, dt[, c := list(list(1))]$c, error="Cannot coerce 'list' RHS to 'logical' to match.*column 3 named 'c'") test(1294.20, dt[, c := "bla"]$c, rep(NA, 3), warning="Coerced character RHS to logical") test(1294.21, dt[, d := 1]$d, rep(list(1), 3)) test(1294.22, dt[, d := 1L]$d, rep(list(1L), 3)) @@ -16228,6 +16228,12 @@ test(2116.11, !allNA(as.raw(c(0,0)))) test(2116.12, !allNA(as.raw(c(0,255)))) test(2116.13, allNA(raw())) # same as all(is.na(raw())) test(2116.14, allNA(NULL)) # same as all(is.na(NULL)) +test(2116.15, allNA(list())) # same as all(is.na(list())) +# turned off allNA list support for now to avoid accidentally using it internally where we did not intend; allNA not yet exported +# https://github.com/Rdatatable/data.table/pull/3909#discussion_r329065950 +test(2116.16, allNA(list(NA, NA_character_)), error="Unsupported type 'list' passed to allNA") # base R returns true +test(2116.17, allNA(list(NA, NA)), error="Unsupported type 'list' passed to allNA") # base R returns true +test(2116.18, allNA(list(NA, c(NA,NA))), error="Unsupported type 'list' passed to allNA") # base R returns false # don't create NA factor level when assigning to factor from character; bug in v1.12.2 noticed in dev part of PR#3909 DT = data.table(a=factor(LETTERS[1:3])) diff --git a/src/assign.c b/src/assign.c index 8c27619413..b4d68703ba 100644 --- a/src/assign.c +++ b/src/assign.c @@ -801,10 +801,14 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, // now continue, but with the mapped integers in the (new) source } } - } else if (TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) { + } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) { // checks up front, otherwise we'd need checks twice in the two branches that cater for 'where' or not // TODO: verbose message and/or strict option for advanced users to ensure types match // only call getOption (small cost in finding the option value) at this point when there is a type mismatch + if (isNewList(source)) { + error("Cannot coerce 'list' RHS to '%s' to match the type of the target column (column %d named '%s').", + type2char(TYPEOF(target)), colnum, colname); + } switch(TYPEOF(target)) { case LGLSXP: if (isInteger(source)) { @@ -848,13 +852,13 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } break; } - if (isString(target) || isString(source) || isNewList(source)) { + if (isString(target) || isString(source)) { // TODO if (allNA(source)) { // // e.g. to save coercing NA to NA_character_; if types match then leave it to the regular assign and the call to allNA is saved // point to fixed ScalarLogical(NA_LOGICAL) and fall through to standard cases below //} else { SEXP tt = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; - if (!isString(target) && !isNewList(target) && !allNA(source)) { + if (!isString(target) /*&& !isNewList(target)*/ && !allNA(source)) { warning("Coerced %s RHS to %s to match the type of the target column (column %d named '%s').", type2char(TYPEOF(source)), type2char(TYPEOF(target)), colnum, colname); } diff --git a/src/utils.c b/src/utils.c index c3ef02e083..20918d6478 100644 --- a/src/utils.c +++ b/src/utils.c @@ -36,49 +36,41 @@ bool allNA(SEXP x) { // less space and time than any(is.na(x)) at R level because that creates full size is.na(x) first before any() // whereas this allNA can often return early on testing the first value without reading the rest const int n = length(x); - if (n==0) // for empty raw, and NULL, to match R's all(is.na(raw())) true result, test 2116.13-14 + if (n==0) // empty vectors (including raw(), NULL, and list()) same as R's all(is.na()) true result; tests 2116.* return true; switch (TYPEOF(x)) { + case RAWSXP: // raw doesn't support NA so always false (other than length 0 case above) + return false; case LGLSXP: case INTSXP: { const int *xd = INTEGER(x); - for (int i=0; i Date: Fri, 27 Sep 2019 13:12:08 -0700 Subject: [PATCH 36/43] saved coerce from NA to NA_character_ --- inst/tests/tests.Rraw | 7 ++++++- src/assign.c | 24 +++++++++++++----------- src/utils.c | 2 ++ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 706a9dda63..baaa13f13e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -4882,7 +4882,12 @@ test(1294.25, dt[, d := list(list(1))]$d, rep(list(1), 3)) test(1294.26, dt[, e := 1]$e, rep("1", 3)) test(1294.27, dt[, e := 1L]$e, rep("1", 3)) test(1294.28, dt[, e := TRUE]$e, rep("TRUE", 3)) -test(1294.29, dt[, e := list(list(1))]$e, rep("1", 3)) +### +### TEMPORARILY OFF IN DEV. TO BE ADDRESSED BEFORE v1.12.4 RELEASE +### REVISIT IN #3909 or #3626 +### +### test(1294.29, dt[, e := list(list(1))]$e, rep("1", 3)) +### test(1294.30, dt[, e := "bla"]$e, rep("bla", 3)) test(1294.31, dt[, e := list("bla2")]$e, rep("bla2", 3)) diff --git a/src/assign.c b/src/assign.c index b4d68703ba..f3f0d1cf94 100644 --- a/src/assign.c +++ b/src/assign.c @@ -852,18 +852,11 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } break; } - if (isString(target) || isString(source)) { - // TODO if (allNA(source)) { - // // e.g. to save coercing NA to NA_character_; if types match then leave it to the regular assign and the call to allNA is saved - // point to fixed ScalarLogical(NA_LOGICAL) and fall through to standard cases below - //} else { + if (isString(source)) { SEXP tt = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; - if (!isString(target) /*&& !isNewList(target)*/ && !allNA(source)) { - warning("Coerced %s RHS to %s to match the type of the target column (column %d named '%s').", - type2char(TYPEOF(source)), type2char(TYPEOF(target)), colnum, colname); - } + warning("Coerced %s RHS to %s to match the type of the target column (column %d named '%s').", + type2char(TYPEOF(source)), type2char(TYPEOF(target)), colnum, colname); source = tt; - // } } } @@ -997,7 +990,16 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const SEXP *ld = STRING_PTR(PROTECT(getAttrib(source, R_LevelsSymbol))); protecti++; BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1], SET_STRING_ELT(target, off+i, cval)) } else { - if (!isString(source)) error("Internal error: type %s should have been coerced to character earlier", type2char(TYPEOF(source))); // # nocov + if (!isString(source)) { + if (allNA(source)) { // saves common coercion of NA (logical) to NA_character_; if source is 'list' that was an error above + source = ScalarLogical(FALSE); // dummy input that BODY requires; a no alloc internal R constant that is a SEXP + // we're using BODY here for its case that hops via 'where', otherwise we could just do a trivial loop here + BODY(int, LOGICAL, SEXP, NA_STRING+val, SET_STRING_ELT(target, off+i, cval)) + // ^^ dummy +0 on address to avoid C warning about not using val; hence why dummy is FALSE (0) + // BODY has built-in break + } + source = PROTECT(coerceVector(source, STRSXP)); protecti++; + } BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) } case VECSXP : diff --git a/src/utils.c b/src/utils.c index 20918d6478..14e67b49aa 100644 --- a/src/utils.c +++ b/src/utils.c @@ -71,6 +71,8 @@ bool allNA(SEXP x) { } default: error("Unsupported type '%s' passed to allNA()", type2char(TYPEOF(x))); // e.g. VECSXP; tests 2116.16-18 + // turned off allNA list support for now to avoid accidentally using it internally where we did not intend; allNA not yet exported + // https://github.com/Rdatatable/data.table/pull/3909#discussion_r329065950 } } From 8d871cc100e49d9cc447927b3e701caac56c1f24 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 27 Sep 2019 15:40:56 -0700 Subject: [PATCH 37/43] assigning factor numbers works again --- inst/tests/tests.Rraw | 35 ++++++++++++++++++++--------------- src/assign.c | 41 +++++++++++++++++++++++++++++++---------- src/data.table.h | 2 +- src/utils.c | 11 +++++------ 4 files changed, 57 insertions(+), 32 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index baaa13f13e..4aba3fdd68 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1215,16 +1215,21 @@ test(423, truelength(DT), 1028L) # Test crash bug fixed, #1656, introduced with the 1.7.0 feature DT = data.table(a = factor(c("A", "Z")), b = 1:4) -DT[1,1] <- "Z" -test(424, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) -test(425, DT[1,1] <- 1, error="Cannot assign 'double' to 'factor'") -test(426, DT[1,1] <- 2L, error="Cannot assign 'integer' to 'factor'") -test(427, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) -DT[1,a:="A"] -test(428, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) -test(429.1, DT[1,a:=2L], error="Cannot assign 'integer' to 'factor'. Factor columns can only be assigned factor or character values, or NA in any type") -test(429.2, DT[2,b:=factor("A")], error="Cannot assign 'factor' to 'integer'. Factors can only be assigned to factor, character or list columns.") -test(430, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) +test(424.1, DT[1,1] <- "Z", "Z") +test(424.2, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) +test(425, DT[1,1] <- 1, 1) +test(426, DT, data.table(a=factor(c("A","Z")),b=1:4)) +test(427.1, DT[1,1] <- 2L, 2L) +test(427.2, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) +test(428, DT[1,a:="A"], data.table(a=factor(c("A","Z","A","Z")),b=1:4)) +test(429, DT[1,a:=2L], data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) + +test(430.1, DT[1,1]<-3, error="Assigning factor numbers to column 1 named 'a'. But 3.0.* is outside the level range [[]1,2[]], or is not a whole number") +test(430.2, DT[1,1]<-1.3, error="Assigning factor numbers to column 1 named 'a'. But 1.3.* is outside the level range [[]1,2[]], or is not a whole number") +test(430.3, DT[1,1:=4L], error="Assigning factor numbers.*1 named 'a'. But 4 is outside.*1,2.*") +test(430.4, DT[1,a:=TRUE], error="Cannot assign 'logical' to 'factor'. Factor columns can be assigned") +test(430.5, DT[2,b:=factor("A")], error="Cannot assign 'factor' to 'integer'. Factors can only be assigned to") + DT = data.table(a=factor(c("A","B","A","C","B")), b=1:5) test(431.1, DT[1,1:=NA], data.table(a=factor(c(NA,"B","A","C","B")), b=1:5)) test(431.2, DT[2,1:=NA_integer_], data.table(a=factor(c(NA,NA,"A","C","B"), levels=LETTERS[1:3]), b=1:5)) @@ -4855,7 +4860,7 @@ test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L), warning="Coerced double RHS to inte test(1294.03, dt[, a := NA]$a, rep(NA_integer_, 3L)) test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L), warning=c("NAs introduced by coercion", - "Coerced character RHS to integer.*column 1 named 'a'")) + "Coerced 'character' RHS to 'integer'.*column 1 named 'a'")) test(1294.05, dt[, a := list(list(1))]$a, error="Cannot coerce 'list' RHS to 'integer' to match.*column 1 named 'a'") test(1294.06, dt[, a := list(1L)]$a, rep(1L, 3L)) test(1294.07, dt[, a := list(1)]$a, rep(1L, 3L)) @@ -4864,7 +4869,7 @@ test(1294.09, dt[, b := 1L]$b, rep(1,3)) test(1294.10, dt[, b := NA]$b, rep(NA_real_,3)) test(1294.11, dt[, b := "bla"]$b, rep(NA_real_, 3), warning=c("NAs introduced by coercion", - "Coerced character RHS to double to match.*column 2 named 'b'")) + "Coerced 'character' RHS to 'double' to match.*column 2 named 'b'")) test(1294.12, dt[, b := list(list(1))]$b, error="Cannot coerce 'list' RHS to 'double' to match.*column 2 named 'b'") test(1294.13, dt[, b := TRUE]$b, rep(1,3)) test(1294.14, dt[, b := list(1)]$b, rep(1,3)) @@ -4873,7 +4878,7 @@ test(1294.16, dt[, c := 1L]$c, rep(TRUE, 3)) test(1294.17, dt[, c := NA]$c, rep(NA, 3)) test(1294.18, dt[, c := list(1)]$c, rep(TRUE, 3)) test(1294.19, dt[, c := list(list(1))]$c, error="Cannot coerce 'list' RHS to 'logical' to match.*column 3 named 'c'") -test(1294.20, dt[, c := "bla"]$c, rep(NA, 3), warning="Coerced character RHS to logical") +test(1294.20, dt[, c := "bla"]$c, rep(NA, 3), warning="Coerced 'character' RHS to 'logical'") test(1294.21, dt[, d := 1]$d, rep(list(1), 3)) test(1294.22, dt[, d := 1L]$d, rep(list(1L), 3)) test(1294.23, dt[, d := TRUE]$d, rep(list(TRUE), 3)) @@ -12937,9 +12942,9 @@ test(1944.6, DT[flag == 1, sum(x), keyby = group], data.table(group=1:4, V1=INT( # assigning an int greater than length(levels) corruption of int, #2984 DT <- data.table(a = factor(c("A", "Z")), b = 1:4) c <- 3L -test(1945.1, DT[1, a:=c], error="Cannot assign 'integer' to 'factor'") # [1,a], factor(NA, levels=c("A","Z")), warning="RHS.*outside the levels range.*NAs generated") +test(1945.1, DT[1, a:=c], error=err<-"Assigning factor numbers to column 1 named 'a'. But 3 is outside the level range [1,2]") test(1945.2, c, 3L) -test(1945.3, DT[2,1] <- c, error="Cannot assign 'integer' to 'factor'") # 3L, warning="RHS.*outside the levels range.*NAs generated") +test(1945.3, DT[2,1] <- c, error=err) test(1945.4, c, 3L) # subset a data.table containing an altrep derived from ]<-, ]]<- etc, #3051 diff --git a/src/assign.c b/src/assign.c index f3f0d1cf94..e668a15eea 100644 --- a/src/assign.c +++ b/src/assign.c @@ -712,11 +712,31 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, // else assigning factor to character is left to later below, avoiding wasteful asCharacterFactor } else if (!sourceIsFactor && !isString(source)) { // target is factor - // TODO allow integer in the range [NA,1,nlevel] again, with error outside range - if (!allNA(source)) { - error("Cannot assign '%s' to 'factor'. Factor columns can only be assigned factor or character values, or NA in any type.", type2char(TYPEOF(source))); + if (allNA(source, false)) { // return false for list and other types that allNA does not support + source = ScalarLogical(NA_LOGICAL); // a global constant in R and won't allocate; fall through to regular zero-copy coerce + } else if (isInteger(source) || isReal(source)) { + // allow assigning level numbers to factor columns; test 425, 426, 429 and 1945 + const int nlevel = length(getAttrib(target, R_LevelsSymbol)); + if (isInteger(source)) { + const int *sd = INTEGER(source); + for (int i=0; inlevel) { + error("Assigning factor numbers to column %d named '%s'. But %d is outside the level range [1,%d]", colnum, colname, val, nlevel); + } + } + } else { + const double *sd = REAL(source); + for (int i=0; inlevel)) { + error("Assigning factor numbers to column %d named '%s'. But %f is outside the level range [1,%d], or is not a whole number.", colnum, colname, val, nlevel); + } + } + } + // Now just let the valid level numbers fall through to regular assign by BODY below } else { - source = ScalarLogical(NA_LOGICAL); // a global constant in R and won't allocate; fall through to regular zero-copy coerce of logical to integer + error("Cannot assign '%s' to 'factor'. Factor columns can be assigned factor, character, NA in any type, or level numbers.", type2char(TYPEOF(source))); } } else { // either factor or character being assigned to factor column @@ -853,10 +873,9 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, break; } if (isString(source)) { - SEXP tt = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; - warning("Coerced %s RHS to %s to match the type of the target column (column %d named '%s').", - type2char(TYPEOF(source)), type2char(TYPEOF(target)), colnum, colname); - source = tt; + source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; + warning("Coerced 'character' RHS to '%s' to match the type of the target column (column %d named '%s').", + type2char(TYPEOF(target)), colnum, colname); } } @@ -991,10 +1010,12 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1], SET_STRING_ELT(target, off+i, cval)) } else { if (!isString(source)) { - if (allNA(source)) { // saves common coercion of NA (logical) to NA_character_; if source is 'list' that was an error above + if (allNA(source, true)) { // saves common coercion of NA (logical) to NA_character_ + // ^^ =errorForBadType; if type list, that was already an error earlier so we + // want to be strict now otherwise list would get to coerceVector below source = ScalarLogical(FALSE); // dummy input that BODY requires; a no alloc internal R constant that is a SEXP // we're using BODY here for its case that hops via 'where', otherwise we could just do a trivial loop here - BODY(int, LOGICAL, SEXP, NA_STRING+val, SET_STRING_ELT(target, off+i, cval)) + BODY(int, LOGICAL, SEXP, NA_STRING+val, SET_STRING_ELT(target, off+i, cval)) // ^^ dummy +0 on address to avoid C warning about not using val; hence why dummy is FALSE (0) // BODY has built-in break } diff --git a/src/data.table.h b/src/data.table.h index a234e67bce..62dc0c6735 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -213,7 +213,7 @@ SEXP coalesce(SEXP x, SEXP inplace); // utils.c bool isRealReallyInt(SEXP x); SEXP isReallyReal(SEXP x); -bool allNA(SEXP x); +bool allNA(SEXP x, bool errorForBadType); SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups); void coerceFill(SEXP fill, double *dfill, int32_t *ifill, int64_t *i64fill); SEXP coerceFillR(SEXP fill); diff --git a/src/utils.c b/src/utils.c index 14e67b49aa..e64b2408d4 100644 --- a/src/utils.c +++ b/src/utils.c @@ -32,7 +32,7 @@ SEXP isReallyReal(SEXP x) { return(ans); } -bool allNA(SEXP x) { +bool allNA(SEXP x, bool errorForBadType) { // less space and time than any(is.na(x)) at R level because that creates full size is.na(x) first before any() // whereas this allNA can often return early on testing the first value without reading the rest const int n = length(x); @@ -68,16 +68,15 @@ bool allNA(SEXP x) { return false; } return true; - } - default: - error("Unsupported type '%s' passed to allNA()", type2char(TYPEOF(x))); // e.g. VECSXP; tests 2116.16-18 + }} + if (!errorForBadType) return false; + error("Unsupported type '%s' passed to allNA()", type2char(TYPEOF(x))); // e.g. VECSXP; tests 2116.16-18 // turned off allNA list support for now to avoid accidentally using it internally where we did not intend; allNA not yet exported // https://github.com/Rdatatable/data.table/pull/3909#discussion_r329065950 - } } SEXP allNAR(SEXP x) { - return ScalarLogical(allNA(x)); + return ScalarLogical(allNA(x, /*errorForBadType=*/true)); } /* colnamesInt From 6b521e1457e1d27d42912f0ad586377a9a5764cf Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 27 Sep 2019 16:14:12 -0700 Subject: [PATCH 38/43] type complex cases and tests --- inst/tests/tests.Rraw | 14 +++++++++----- src/assign.c | 4 ++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4aba3fdd68..10b3f8d82b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14157,6 +14157,10 @@ test(2005.21, DT[1:2,d:=as.raw(c(0,255))]$d, c(FALSE,TRUE,TRUE)) test(2005.22, DT[2:3,b:=as.raw(c(0,255))]$b, INT(4,0,255)) test(2005.23, DT[1:2,e:=as.raw(c(0,255))]$e, c(0,255,pi*3)) test(2005.24, DT[c(1,3,2), c:=as.raw(c(0,100,255))]$c, as.raw(c(0,255,100))) +test(2005.25, DT[c(3,1), f:=as.raw(c(20,42))]$f, c(42+0i, 11, 20+0i)) +test(2005.26, DT[2:3, f:=c(NA,FALSE)]$f, c(42+0i, NA, 0+0i)) +test(2005.27, DT[c(1,3), f:=c(-42L,NA)]$f, c(-42+0i, NA, NA)) +test(2005.28, DT[3:2, f:=c(pi,-Inf)]$f, c(-42+0i, -Inf+0i, pi+0i)) # rbindlist raw type, #2819 test(2006.1, rbindlist(list(data.table(x = as.raw(1), y=as.raw(3)), data.table(x = as.raw(2))), fill=TRUE), data.table(x=as.raw(1:2), y=as.raw(c(3,0)))) @@ -15432,11 +15436,11 @@ test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA))) DT = data.table(A=1:5, B=-3i, C=2147483647L) test(2066.06, DT[, .(sum(B), mean(B)), by=A%%2L], data.table(A=1:0, V1=c(-9i, -6i), V2=-3i)) test(2066.07, DT[2:4, .(sum(B), mean(B)), by=A%%2L], data.table(A=0:1, V1=c(-6i, -3i), V2=-3i)) -DT[4, B:=NA] -test(2066.08, DT[, .(sum(B), mean(B)), by=A%%2L], data.table(A=1:0, V1=c(-9i, NA), V2=c(-3i, NA))) -test(2066.09, DT[2:4, .(sum(B), mean(B)), by=A%%2L], data.table(A=0:1, V1=c(NA, -3i), V2=c(NA, -3i))) -test(2066.10, DT[, .(sum(B, na.rm=TRUE), mean(B, na.rm=TRUE)), by=A%%2L], data.table(A=1:0, V1=c(-9i, -3i), V2=-3i)) -test(2066.11, DT[2:4, .(sum(B, na.rm=TRUE), mean(B, na.rm=TRUE)), by=A%%2L], data.table(A=0:1, V1=c(-3i, -3i), V2=-3i)) +test(2066.08, DT[4, B:=NA]$B, c(-3i,-3i,-3i,NA,-3i)) +test(2066.09, DT[, .(sum(B), mean(B)), by=A%%2L], data.table(A=1:0, V1=c(-9i, NA), V2=c(-3i, NA))) +test(2066.10, DT[2:4, .(sum(B), mean(B)), by=A%%2L], data.table(A=0:1, V1=c(NA, -3i), V2=c(NA, -3i))) +test(2066.11, DT[, .(sum(B, na.rm=TRUE), mean(B, na.rm=TRUE)), by=A%%2L], data.table(A=1:0, V1=c(-9i, -3i), V2=-3i)) +test(2066.12, DT[2:4, .(sum(B, na.rm=TRUE), mean(B, na.rm=TRUE)), by=A%%2L], data.table(A=0:1, V1=c(-3i, -3i), V2=-3i)) # Shift complex values, part of #3690 z = c(1:3) + c(3:1)*1i diff --git a/src/assign.c b/src/assign.c index e668a15eea..26b71c3dae 100644 --- a/src/assign.c +++ b/src/assign.c @@ -994,10 +994,10 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, Rcomplex *td = COMPLEX(target) + off; double im = 0.0; switch (TYPEOF(source)) { - //TODO case RAWSXP + case RAWSXP: BODY(Rbyte, RAW, double, (im=0.0,val), td[i].r=cval;td[i].i=im) case LGLSXP: // same as INTSXP case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) - //TODO case REALSXP: + case REALSXP: BODY(double, REAL, double, val==ISNAN(val)?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) case CPLXSXP: if (mem) { memcpy(td, COMPLEX(source), slen*sizeof(Rcomplex)); break; } else BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) From 475414a47c90595d2f4f5e2b6d5b479f7d165301 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 27 Sep 2019 17:42:49 -0700 Subject: [PATCH 39/43] added verbose message when coerce, more cases and tests --- inst/tests/tests.Rraw | 35 +++++++------ src/assign.c | 117 +++++++++++++++++++++++++----------------- 2 files changed, 88 insertions(+), 64 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 10b3f8d82b..91e1516f0a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14142,25 +14142,28 @@ test(2005.09, set(DT, 1L, "c", expression(x+2)), error="type 'expression' cannot test(2005.10, set(DT, 1L, "d", expression(x+2)), error="type 'expression' cannot be coerced to 'logical'") test(2005.11, set(DT, 1L, "e", expression(x+2)), error="type 'expression' cannot be coerced to 'double'") test(2005.12, set(DT, 1L, "f", expression(x+2)), error="type 'expression' cannot be coerced to 'complex'") +test(2005.30, DT[2:3,c:=c(TRUE,FALSE), verbose=TRUE]$c, as.raw(INT(7,1,0)), + output="Zero-copy coerce when assigning 'logical' to 'raw' column 3 named 'c'") +test(2005.31, set(DT,1L,"c",NA)$c, as.raw(INT(0,1,0))) +test(2005.32, set(DT,1:2,"c",INT(-1,255))$c, as.raw(INT(0,255,0))) +test(2005.33, DT[2:3,c:=INT(NA,256)]$c, as.raw(INT(0,0,0))) +test(2005.34, set(DT,2:3,"c",c(NA,3.14))$c, as.raw(INT(0,0,3))) +test(2005.35, DT[1:2,c:=c(Inf,0.78)]$c, as.raw(INT(0,0,3))) +test(2005.36, DT[1:2,d:=as.raw(c(0,255))]$d, c(FALSE,TRUE,TRUE)) +test(2005.37, DT[2:3,b:=as.raw(c(0,255))]$b, INT(4,0,255)) +test(2005.38, DT[1:2,e:=as.raw(c(0,255))]$e, c(0,255,pi*3)) +test(2005.39, DT[c(1,3,2), c:=as.raw(c(0,100,255)), verbose=TRUE]$c, as.raw(c(0,255,100)), + notOutput="coerce") +test(2005.40, DT[c(3,1), f:=as.raw(c(20,42))]$f, c(42+0i, 11, 20+0i)) +test(2005.41, DT[2:3, f:=c(NA,FALSE)]$f, c(42+0i, NA, 0+0i)) +test(2005.42, DT[c(1,3), f:=c(-42L,NA)]$f, c(-42+0i, NA, NA)) +test(2005.43, DT[3:2, f:=c(pi,-Inf)]$f, c(-42+0i, -Inf+0i, pi+0i)) if (test_bit64) { DT[,g:=as.integer64(c(-9,0,NA))] - test(2005.13, set(DT, 1L, "g", expression(x+2)), error="type 'expression' cannot be coerced to 'integer64'") - test(2005.14, DT[1:2,e:=as.integer64(c(NA,-200))]$e, c(NA_real_, -200, pi*3)) + test(2005.60, set(DT, 1L, "g", expression(x+2)), error="type 'expression' cannot be coerced to 'integer64'") + test(2005.61, DT[1:2,e:=as.integer64(c(NA,-200))]$e, c(NA_real_, -200, pi*3)) + test(2005.62, DT[2:3, d:=as.integer64(c(2,NA))]$d, c(FALSE,TRUE,NA), warning="Non-zero 2 taken as TRUE") } -test(2005.15, DT[2:3,c:=c(TRUE,FALSE)]$c, as.raw(INT(7,1,0))) -test(2005.16, set(DT,1L,"c",NA)$c, as.raw(INT(0,1,0))) -test(2005.17, set(DT,1:2,"c",INT(-1,255))$c, as.raw(INT(0,255,0))) -test(2005.18, DT[2:3,c:=INT(NA,256)]$c, as.raw(INT(0,0,0))) -test(2005.19, set(DT,2:3,"c",c(NA,3.14))$c, as.raw(INT(0,0,3))) -test(2005.20, DT[1:2,c:=c(Inf,0.78)]$c, as.raw(INT(0,0,3))) -test(2005.21, DT[1:2,d:=as.raw(c(0,255))]$d, c(FALSE,TRUE,TRUE)) -test(2005.22, DT[2:3,b:=as.raw(c(0,255))]$b, INT(4,0,255)) -test(2005.23, DT[1:2,e:=as.raw(c(0,255))]$e, c(0,255,pi*3)) -test(2005.24, DT[c(1,3,2), c:=as.raw(c(0,100,255))]$c, as.raw(c(0,255,100))) -test(2005.25, DT[c(3,1), f:=as.raw(c(20,42))]$f, c(42+0i, 11, 20+0i)) -test(2005.26, DT[2:3, f:=c(NA,FALSE)]$f, c(42+0i, NA, 0+0i)) -test(2005.27, DT[c(1,3), f:=c(-42L,NA)]$f, c(-42+0i, NA, NA)) -test(2005.28, DT[3:2, f:=c(pi,-Inf)]$f, c(-42+0i, -Inf+0i, pi+0i)) # rbindlist raw type, #2819 test(2006.1, rbindlist(list(data.table(x = as.raw(1), y=as.raw(3)), data.table(x = as.raw(2))), fill=TRUE), data.table(x=as.raw(1:2), y=as.raw(c(3,0)))) diff --git a/src/assign.c b/src/assign.c index 26b71c3dae..3898f1a583 100644 --- a/src/assign.c +++ b/src/assign.c @@ -822,60 +822,79 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } } } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) { - // checks up front, otherwise we'd need checks twice in the two branches that cater for 'where' or not - // TODO: verbose message and/or strict option for advanced users to ensure types match - // only call getOption (small cost in finding the option value) at this point when there is a type mismatch if (isNewList(source)) { error("Cannot coerce 'list' RHS to '%s' to match the type of the target column (column %d named '%s').", type2char(TYPEOF(target)), colnum, colname); } - switch(TYPEOF(target)) { - case LGLSXP: - if (isInteger(source)) { - const int *sD = INTEGER(source); - for (int i=0; i0) { - warning("Coerced double RHS to integer to match the type of the target column (column %d named '%s'). One or more RHS values contain fractions which have been lost; e.g. item %d with value %f has been truncated to %d.", colnum, colname, w, REAL(source)[w-1], (int)REAL(source)[w-1]); - // same warning text as v1.12.2 so as to reduce diff in tests. TODO: shorten text in future to put truncated %f at the begnning of the message. + break; + case INTSXP: + if (isReal(source) && !sourceIsI64) { + int w = INTEGER(isReallyReal(source))[0]; // first fraction present (1-based), 0 if none + if (w>0) { + warning("Coerced double RHS to integer to match the type of the target column (column %d named '%s'). One or more RHS values contain fractions which have been lost; e.g. item %d with value %f has been truncated to %d.", colnum, colname, w, REAL(source)[w-1], (int)REAL(source)[w-1]); + // same warning text as v1.12.2 so as to reduce diff in tests. TODO: shorten text in future to put truncated %f at the begnning of the message. + } } - } - break; - case REALSXP: - if (targetIsI64 && isReal(source) && !sourceIsI64) { - int firstReal=0; - if ((firstReal=INTEGER(isReallyReal(source))[0])) { - snprintf(memrecycle_message, MSGSIZE, "coerced to integer64 but contains a non-integer value (%f at position %d); precision lost.", REAL(source)[firstReal-1], firstReal); + break; + case REALSXP: + if (targetIsI64 && isReal(source) && !sourceIsI64) { + int firstReal=0; + if ((firstReal=INTEGER(isReallyReal(source))[0])) { + snprintf(memrecycle_message, MSGSIZE, "coerced to integer64 but contains a non-integer value (%f at position %d); precision lost.", REAL(source)[firstReal-1], firstReal); + } } + break; } - break; - } - if (isString(source)) { - source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; - warning("Coerced 'character' RHS to '%s' to match the type of the target column (column %d named '%s').", - type2char(TYPEOF(target)), colnum, colname); } } @@ -945,7 +964,9 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, memcpy(td, LOGICAL(source), slen*sizeof(Rboolean)); break; } else BODY(int, LOGICAL, int, val, td[i]=cval) case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval) - case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) + case REALSXP: if (sourceIsI64) + BODY(int64_t, REAL, int, val==NA_INTEGER64 ? NA_LOGICAL : val!=0, td[i]=cval) + else BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) default: COERCE_ERROR("logical"); } } break; @@ -962,14 +983,14 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } } break; case REALSXP : { - if (Rinherits(target, char_integer64)) { + if (targetIsI64) { int64_t *td = (int64_t *)REAL(target) + off; switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval) case LGLSXP: // same as INTSXP case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) case REALSXP: - if (Rinherits(source, char_integer64)) { + if (sourceIsI64) { if(mem) { memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break; } else BODY(int64_t, REAL, int64_t, val, td[i]=cval) } else BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) @@ -982,7 +1003,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case LGLSXP: // same as INTSXP case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) case REALSXP: - if (!Rinherits(source, char_integer64)) { + if (!sourceIsI64) { if(mem) { memcpy(td, (double *)REAL(source), slen*sizeof(double)); break; } else BODY(double, REAL, double, val, td[i]=cval) } else BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) From 47ad56e44d3c25f9f3b36b59a021dc3b641df852 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 27 Sep 2019 18:37:26 -0700 Subject: [PATCH 40/43] more integer64 cases (expecting some lack of coverage to cover next), and vertical alignment --- src/assign.c | 116 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 47 deletions(-) diff --git a/src/assign.c b/src/assign.c index 3898f1a583..fcfb6e045f 100644 --- a/src/assign.c +++ b/src/assign.c @@ -840,7 +840,10 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } // The following checks are up front here once, otherwise we'd need them twice in the two branches // inside BODY that cater for 'where' or not. + // The idea is to do them without a coerceVector() which allocates; zero-copy coerce + // TODO: these can be condensed into a macro similar to BODY further below switch(TYPEOF(target)) { + //TODO case RAWSXP: case LGLSXP: if (isInteger(source)) { const int *sd = INTEGER(source); @@ -869,8 +872,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, for (int i=0; i0) { - warning("Coerced double RHS to integer to match the type of the target column (column %d named '%s'). One or more RHS values contain fractions which have been lost; e.g. item %d with value %f has been truncated to %d.", colnum, colname, w, REAL(source)[w-1], (int)REAL(source)[w-1]); - // same warning text as v1.12.2 so as to reduce diff in tests. TODO: shorten text in future to put truncated %f at the begnning of the message. + if (isReal(source)) { + if (!sourceIsI64) { + int w = INTEGER(isReallyReal(source))[0]; // first fraction present (1-based), 0 if none + if (w>0) { + warning("Coerced double RHS to integer to match the type of the target column (column %d named '%s'). One or more RHS values contain fractions which have been lost; e.g. item %d with value %f has been truncated to %d.", colnum, colname, w, REAL(source)[w-1], (int)REAL(source)[w-1]); + // same warning text as v1.12.2 so as to reduce diff in tests. TODO: shorten text in future to put truncated %f at the begnning of the message. + } + } else { + const int64_t *sd = (int64_t *)REAL(source); + for (int i=0; iINT_MAX)) { + warning("Assigning integer64 to integer but %lld is outside range of integer and will be converted to NA.", (long long)val); + break; + } + } } } break; @@ -947,66 +961,70 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case RAWSXP: { Rbyte *td = RAW(target) + off; switch (TYPEOF(source)) { - case RAWSXP: if (mem) { - memcpy(td, RAW(source), slen*sizeof(Rbyte)); break; - } else BODY(Rbyte, RAW, Rbyte, val, td[i]=cval) - case LGLSXP: BODY(int, LOGICAL, Rbyte, val==1, td[i]=cval) - case INTSXP: BODY(int, INTEGER, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval) - case REALSXP: BODY(double, REAL, Rbyte, (ISNAN(val)||val>256||val<0) ? 0 : val, td[i]=cval) + case RAWSXP: if (mem) { + memcpy(td, RAW(source), slen*sizeof(Rbyte)); break; + } else BODY(Rbyte, RAW, Rbyte, val, td[i]=cval) + case LGLSXP: BODY(int, LOGICAL, Rbyte, val==1, td[i]=cval) + case INTSXP: BODY(int, INTEGER, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval) + case REALSXP: if (sourceIsI64) + BODY(int64_t, REAL, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval) + else BODY(double, REAL, Rbyte, (ISNAN(val)||val>255||val<0) ? 0 : val, td[i]=cval) default: COERCE_ERROR("raw"); } } break; case LGLSXP: { int *td = LOGICAL(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, int, val!=0, td[i]=cval) + case RAWSXP: BODY(Rbyte, RAW, int, val!=0, td[i]=cval) case LGLSXP: if (mem) { - memcpy(td, LOGICAL(source), slen*sizeof(Rboolean)); break; - } else BODY(int, LOGICAL, int, val, td[i]=cval) - case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval) + memcpy(td, LOGICAL(source), slen*sizeof(Rboolean)); break; + } else BODY(int, LOGICAL, int, val, td[i]=cval) + case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval) case REALSXP: if (sourceIsI64) - BODY(int64_t, REAL, int, val==NA_INTEGER64 ? NA_LOGICAL : val!=0, td[i]=cval) - else BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) + BODY(int64_t, REAL, int, val==NA_INTEGER64 ? NA_LOGICAL : val!=0, td[i]=cval) + else BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) default: COERCE_ERROR("logical"); } } break; case INTSXP : { int *td = INTEGER(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, int, (int)val, td[i]=cval) - case LGLSXP: // same as INTSXP ... - case INTSXP: if (mem) { - memcpy(td, INTEGER(source), slen*sizeof(int)); break; - } else BODY(int, INTEGER, int, val, td[i]=cval) - case REALSXP: BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) - default: COERCE_ERROR("integer"); // test 2005.4 + case RAWSXP: BODY(Rbyte, RAW, int, (int)val, td[i]=cval) + case LGLSXP: // same as INTSXP ... + case INTSXP: if (mem) { + memcpy(td, INTEGER(source), slen*sizeof(int)); break; + } else BODY(int, INTEGER, int, val, td[i]=cval) + case REALSXP: if (sourceIsI64) + BODY(int64_t, REAL, int, (val==NA_INTEGER64||val>INT_MAX||val<=NA_INTEGER) ? NA_INTEGER : (int)val, td[i]=cval) + else BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) + default: COERCE_ERROR("integer"); // test 2005.4 } } break; case REALSXP : { if (targetIsI64) { int64_t *td = (int64_t *)REAL(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval) + case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval) case LGLSXP: // same as INTSXP - case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) + case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) case REALSXP: if (sourceIsI64) { if(mem) { memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break; } - else BODY(int64_t, REAL, int64_t, val, td[i]=cval) - } else BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) + else BODY(int64_t, REAL, int64_t, val, td[i]=cval) + } else BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) default: COERCE_ERROR("integer64"); } } else { double *td = REAL(target) + off; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval) - case LGLSXP: // same as INTSXP - case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) + case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval) + case LGLSXP: // same as INTSXP + case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) case REALSXP: if (!sourceIsI64) { if(mem) { memcpy(td, (double *)REAL(source), slen*sizeof(double)); break; } - else BODY(double, REAL, double, val, td[i]=cval) - } else BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) + else BODY(double, REAL, double, val, td[i]=cval) + } else BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) default: COERCE_ERROR("double"); } } @@ -1015,20 +1033,22 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, Rcomplex *td = COMPLEX(target) + off; double im = 0.0; switch (TYPEOF(source)) { - case RAWSXP: BODY(Rbyte, RAW, double, (im=0.0,val), td[i].r=cval;td[i].i=im) - case LGLSXP: // same as INTSXP - case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) - case REALSXP: BODY(double, REAL, double, val==ISNAN(val)?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) - case CPLXSXP: if (mem) { - memcpy(td, COMPLEX(source), slen*sizeof(Rcomplex)); break; - } else BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) + case RAWSXP: BODY(Rbyte, RAW, double, (im=0.0,val), td[i].r=cval;td[i].i=im) + case LGLSXP: // same as INTSXP + case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) + case REALSXP: if (sourceIsI64) + BODY(int64_t, REAL, double, val==NA_INTEGER64?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) + else BODY(double, REAL, double, ISNAN(val)?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) + case CPLXSXP: if (mem) { + memcpy(td, COMPLEX(source), slen*sizeof(Rcomplex)); break; + } else BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) default: COERCE_ERROR("complex"); } } break; case STRSXP : if (sourceIsFactor) { const SEXP *ld = STRING_PTR(PROTECT(getAttrib(source, R_LevelsSymbol))); protecti++; - BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1], SET_STRING_ELT(target, off+i, cval)) + BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1], SET_STRING_ELT(target, off+i, cval)) } else { if (!isString(source)) { if (allNA(source, true)) { // saves common coercion of NA (logical) to NA_character_ @@ -1036,17 +1056,19 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, // want to be strict now otherwise list would get to coerceVector below source = ScalarLogical(FALSE); // dummy input that BODY requires; a no alloc internal R constant that is a SEXP // we're using BODY here for its case that hops via 'where', otherwise we could just do a trivial loop here - BODY(int, LOGICAL, SEXP, NA_STRING+val, SET_STRING_ELT(target, off+i, cval)) + BODY(int, LOGICAL, SEXP, NA_STRING+val, SET_STRING_ELT(target, off+i, cval)) // ^^ dummy +0 on address to avoid C warning about not using val; hence why dummy is FALSE (0) // BODY has built-in break } + if (sourceIsI64) + error("Cannot assign integer64 to a character column. Please use as.character at R level."); source = PROTECT(coerceVector(source, STRSXP)); protecti++; } - BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) + BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) } case VECSXP : - if (TYPEOF(source)!=VECSXP) BODY(SEXP, &, SEXP, val, SET_VECTOR_ELT(target, off+i, cval)) - else BODY(SEXP, VECTOR_PTR, SEXP, val, SET_VECTOR_ELT(target, off+i, cval)) + if (TYPEOF(source)!=VECSXP) BODY(SEXP, &, SEXP, val, SET_VECTOR_ELT(target, off+i, cval)) + else BODY(SEXP, VECTOR_PTR, SEXP, val, SET_VECTOR_ELT(target, off+i, cval)) default : error("Unsupported column type in assign.c:memrecycle '%s'", type2char(TYPEOF(target))); // # nocov } From 598752ae1a914a221d7e053b6e9a8305e42e4913 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 28 Sep 2019 10:05:09 +0800 Subject: [PATCH 41/43] any->all in comments --- src/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index e64b2408d4..e54e9aa2c8 100644 --- a/src/utils.c +++ b/src/utils.c @@ -33,7 +33,7 @@ SEXP isReallyReal(SEXP x) { } bool allNA(SEXP x, bool errorForBadType) { - // less space and time than any(is.na(x)) at R level because that creates full size is.na(x) first before any() + // less space and time than all(is.na(x)) at R level because that creates full size is.na(x) first before all() // whereas this allNA can often return early on testing the first value without reading the rest const int n = length(x); if (n==0) // empty vectors (including raw(), NULL, and list()) same as R's all(is.na()) true result; tests 2116.* From 5b799eacce04e3b4a7e4b2b4301c821f5e2139cf Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 27 Sep 2019 22:36:05 -0700 Subject: [PATCH 42/43] Added CHECK_RANGE macro --- inst/tests/tests.Rraw | 44 +++++++++---- src/assign.c | 147 ++++++++++++++++++------------------------ src/rbindlist.c | 12 ++-- 3 files changed, 97 insertions(+), 106 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 91e1516f0a..cdbe482c91 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -875,11 +875,11 @@ test(299.05, DT[2:3,c:=c(43, 44)], data.table(a=1:3,c=42:44)) test(299.06, DT[2,c:=42], data.table(a=1:3,c=INT(42,42,44))) # also see tests 302 and 303. (Ok, new test file for fast assign would be tidier). test(299.07, DT[,c:=rep(FALSE,nrow(DT))], data.table(a=1:3,c=FALSE)) # replace c column with logical -test(299.08, DT[2:3,c:=c(3.14,0)], data.table(a=1:3, c=c(FALSE,TRUE,FALSE)), warning="Non-zero 3.14.* taken as TRUE.*Please use") +test(299.08, DT[2:3,c:=c(3.14,0)], data.table(a=1:3, c=c(FALSE,TRUE,FALSE)), warning="3.14.*double.*at RHS position 1 taken as TRUE.*column 2 named 'c'.*logical") test(299.09, DT[2:3,c:=c(0,1)], data.table(a=1:3,c=c(FALSE,FALSE,TRUE))) # no warning # FR #2551 is now changed to fit in / fix bug #5442. Stricter warnings are in place now. Check tests 1294.1-34 below. -test(299.10, DT[2,c:=42], data.table(a=1:3, c=c(FALSE,TRUE,TRUE)), warning="Non-zero 42.0.* taken as TRUE.*Please use") -test(299.11, DT[1,c:=42L], data.table(a=1:3, c=TRUE), warning="Non-zero 42 taken as TRUE.*Please use") +test(299.10, DT[2,c:=42], data.table(a=1:3, c=c(FALSE,TRUE,TRUE)), warning="42.0.*double.*at RHS position 1.*TRUE") +test(299.11, DT[1,c:=42L], data.table(a=1:3, c=TRUE), warning="42.*integer.*at RHS position 1.*TRUE.*column 2 named 'c'.*logical") test(299.12, DT[2:3,c:=c(0L, 0L)], data.table(a=1:3,c=c(TRUE,FALSE,FALSE))) # Test bug fix #1468, combining i and by. @@ -4856,7 +4856,7 @@ test(1293, ans1, ans2) dt <- data.table(a=1:3, b=c(7,8,9), c=c(TRUE, NA, FALSE), d=as.list(4:6), e=c("a", "b", "c")) test(1294.01, dt[, a := 1]$a, rep(1L, 3L)) -test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L), warning="Coerced double RHS to integer.*column 1 named 'a'.*fractions which have been lost; e.g. item 1 with value 1.5.* truncated to 1.") +test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L), warning="1.5.*double.*position 1 truncated.*column 1 named 'a'.*integer") test(1294.03, dt[, a := NA]$a, rep(NA_integer_, 3L)) test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L), warning=c("NAs introduced by coercion", @@ -14137,7 +14137,7 @@ DT[,foo:=factor(c("a","b","c"))] test(2005.05, DT[2, foo:=8i], error="Can't assign to column 'foo' (type 'factor') a value of type 'complex' (not character, factor, integer or numeric)") test(2005.06, DT[2, a:=9, verbose=TRUE], notOutput="Coerced") test(2005.07, DT[2, a:=NA, verbose=TRUE], notOutput="Coerced") -test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="Coerced double RHS to integer.*One or more RHS values contain fractions which have been lost.*9.9.*has been truncated to 9") +test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="9.9.*double.*position 1 truncated.*column 1 named 'a'.*integer") test(2005.09, set(DT, 1L, "c", expression(x+2)), error="type 'expression' cannot be coerced to 'raw'") test(2005.10, set(DT, 1L, "d", expression(x+2)), error="type 'expression' cannot be coerced to 'logical'") test(2005.11, set(DT, 1L, "e", expression(x+2)), error="type 'expression' cannot be coerced to 'double'") @@ -14145,11 +14145,16 @@ test(2005.12, set(DT, 1L, "f", expression(x+2)), error="type 'expression' cannot test(2005.30, DT[2:3,c:=c(TRUE,FALSE), verbose=TRUE]$c, as.raw(INT(7,1,0)), output="Zero-copy coerce when assigning 'logical' to 'raw' column 3 named 'c'") test(2005.31, set(DT,1L,"c",NA)$c, as.raw(INT(0,1,0))) -test(2005.32, set(DT,1:2,"c",INT(-1,255))$c, as.raw(INT(0,255,0))) -test(2005.33, DT[2:3,c:=INT(NA,256)]$c, as.raw(INT(0,0,0))) -test(2005.34, set(DT,2:3,"c",c(NA,3.14))$c, as.raw(INT(0,0,3))) -test(2005.35, DT[1:2,c:=c(Inf,0.78)]$c, as.raw(INT(0,0,3))) -test(2005.36, DT[1:2,d:=as.raw(c(0,255))]$d, c(FALSE,TRUE,TRUE)) +test(2005.32, set(DT,1:2,"c",INT(-1,255))$c, as.raw(INT(0,255,0)), + warning="-1.*integer.*position 1 taken as 0 when assigning to column 3 named 'c'.*raw") +test(2005.33, DT[2:3,c:=INT(NA,256)]$c, as.raw(INT(0,0,0)), + warning="-2147483648.*integer.*position 1 taken as 0 when assigning to column 3 named 'c'.*raw") +test(2005.34, set(DT,2:3,"c",c(NA,3.14))$c, as.raw(INT(0,0,3)), + warning="[nN].*double.*position 1 either truncated.*or taken as 0 when assigning to column 3 named 'c'.*raw") # 'nan' for me but might vary hence [nN] +test(2005.35, DT[1:2,c:=c(Inf,0.78)]$c, as.raw(INT(0,0,3)), + warning="[iI].*double.*position 1 either truncated.*or taken as 0 when assigning to column 3 named 'c'.*raw'") # 'inf' for me but might vary hence [iI] +test(2005.36, DT[1:2,d:=as.raw(c(0,255))]$d, c(FALSE,TRUE,TRUE), + warning="255.*raw.*position 2 taken as TRUE when assigning to column 4 named 'd'.*logical") test(2005.37, DT[2:3,b:=as.raw(c(0,255))]$b, INT(4,0,255)) test(2005.38, DT[1:2,e:=as.raw(c(0,255))]$e, c(0,255,pi*3)) test(2005.39, DT[c(1,3,2), c:=as.raw(c(0,100,255)), verbose=TRUE]$c, as.raw(c(0,255,100)), @@ -14162,7 +14167,18 @@ if (test_bit64) { DT[,g:=as.integer64(c(-9,0,NA))] test(2005.60, set(DT, 1L, "g", expression(x+2)), error="type 'expression' cannot be coerced to 'integer64'") test(2005.61, DT[1:2,e:=as.integer64(c(NA,-200))]$e, c(NA_real_, -200, pi*3)) - test(2005.62, DT[2:3, d:=as.integer64(c(2,NA))]$d, c(FALSE,TRUE,NA), warning="Non-zero 2 taken as TRUE") + test(2005.62, DT[2:3, d:=as.integer64(c(2,NA))]$d, c(FALSE,TRUE,NA), + warning="2.*integer64.*position 1 taken as TRUE when assigning to column 4 named 'd'.*logical") + DT[,b:=4:6] + test(2005.63, DT[2:3, b:=as.integer64(c("2147483647","2147483648"))]$b, INT(4,2147483647,NA), + warning="2147483648.*integer64.*position 2 out-of-range [(]NA[)] when assigning to column 2 named 'b'.*integer") + test(2005.64, DT[2:3, b:=as.integer64(c("-2147483648","-2147483647"))]$b, INT(4,NA,-2147483647), + warning="-2147483648.*integer64.*position 1 out-of-range [(]NA[)].*column 2 named 'b'.*integer") + test(2005.65, DT[c(2,1,3), c:=as.integer64(c(-1,255,256))]$c, as.raw(c(255,0,0)), + warning="-1.*integer64.*position 1 taken as 0 when assigning to column 3 named 'c'.*raw") + test(2005.66, DT[2:3, f:=as.integer64(c(NA,"2147483648"))]$f, as.complex(c(-42,NA,2147483648))) + DT[,h:=LETTERS[1:3]] + test(2005.67, DT[2:3, h:=as.integer64(1:2)], error="To assign integer64 to a character column, please use as.character.") } # rbindlist raw type, #2819 @@ -14173,7 +14189,7 @@ test(2006.2, rbindlist(list(data.table(x = as.raw(1:2), y=as.raw(5:6)), data.tab if (test_bit64) { test(2007.1, rbindlist(list( list(a=as.integer64(1), b=3L), list(a=2L, b=4L) )), data.table(a=as.integer64(1:2), b=3:4)) test(2007.2, rbindlist(list( list(a=3.4, b=5L), list(a=as.integer64(4), b=6L) )), data.table(a=as.integer64(3:4), b=5:6), - warning="Column 1 of item 1: coerced to integer64 but contains a non-integer value [(]3.40.* at position 1[)]; precision lost") + warning="Column 1 of item 1: 3.4.*double.*position 1 truncated.*precision lost.*when assigning to column 1 named 'a'.*integer64'") test(2007.3, rbindlist(list( list(a=3.0, b=5L), list(a=as.integer64(4), b=6L) )), data.table(a=as.integer64(3:4), b=5:6)) test(2007.4, rbindlist(list( list(b=5:6), list(a=as.integer64(4), b=7L)), fill=TRUE), data.table(b=5:7, a=as.integer64(c(NA,NA,4)))) # tests writeNA of integer64 test(2007.5, rbindlist(list( list(a=INT(1,NA,-2)), list(a=as.integer64(c(3,NA))) )), data.table(a=as.integer64(c(1,NA,-2,3,NA)))) # int NAs combined with int64 NA @@ -16220,9 +16236,9 @@ DT = data.table(a=1:4, b=c(FALSE, TRUE, NA, FALSE)) test(2115.1, set(DT,3L,1L,0), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, NA, FALSE))) test(2115.2, set(DT,3L,2L,0), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, FALSE, FALSE))) test(2115.3, set(DT,3L,2L,-2L), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, TRUE, FALSE)), # see also test 299 - warning="Non-zero -2 taken as TRUE.*Please use.*") + warning="-2.*integer.*position 1 taken as TRUE.*column 2 named 'b'.*logical") test(2115.4, set(DT,4L,2L,3.14), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, TRUE, TRUE)), - warning="Non-zero 3.14.* taken as TRUE.*Please use.*") + warning="3.14.*double.*position 1 taken as TRUE.*column 2 named 'b'.*logical") DT = data.table(code=c("c","b","c","a"), val=10:13) test(2115.5, DT[code=="c", val := val+1], data.table(code=c("c","b","c","a"), val=INT(11,11,13,13))) DT = data.table(x=factor(LETTERS[1:3]), y=1:6) diff --git a/src/assign.c b/src/assign.c index fcfb6e045f..0e1b1f2c3f 100644 --- a/src/assign.c +++ b/src/assign.c @@ -500,8 +500,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) } else { // existing column targetcol = VECTOR_ELT(dt,coln); } - memrecycle(targetcol, rows, 0, targetlen, thisvalue, coln+1, CHAR(STRING_ELT(names, coln))); - // ^^ last 2 arguments just for messages + const char *ret = memrecycle(targetcol, rows, 0, targetlen, thisvalue, coln+1, CHAR(STRING_ELT(names, coln))); + if (ret) warning(ret); } *_Last_updated = numToDo; // the updates have taken place with no error, so update .Last.updated now @@ -669,7 +669,7 @@ static bool anyNamed(SEXP x) { } #define MSGSIZE 1000 -static char memrecycle_message[MSGSIZE+1]; +static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, int colnum, const char *colname) // like memcpy but recycles single-item source @@ -821,93 +821,68 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, // now continue, but with the mapped integers in the (new) source } } + } else if (isString(source) && !isString(target) && !isNewList(target)) { + source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; + warning("Coerced 'character' RHS to '%s' to match the type of the target column (column %d named '%s').", + type2char(TYPEOF(target)), colnum, colname); } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) { if (isNewList(source)) { error("Cannot coerce 'list' RHS to '%s' to match the type of the target column (column %d named '%s').", type2char(TYPEOF(target)), colnum, colname); } - if (isString(source)) { - source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; - warning("Coerced 'character' RHS to '%s' to match the type of the target column (column %d named '%s').", - type2char(TYPEOF(target)), colnum, colname); - } else { - if (GetVerbose()) { - // only take the (small) cost of GetVerbose() (search of options() list) when types don't match - Rprintf("Zero-copy coerce when assigning '%s' to '%s' column %d named '%s'.\n", - sourceIsI64 ? "integer64" : type2char(TYPEOF(source)), - targetIsI64 ? "integer64" : type2char(TYPEOF(target)), - colnum, colname); - } - // The following checks are up front here once, otherwise we'd need them twice in the two branches - // inside BODY that cater for 'where' or not. - // The idea is to do them without a coerceVector() which allocates; zero-copy coerce - // TODO: these can be condensed into a macro similar to BODY further below - switch(TYPEOF(target)) { - //TODO case RAWSXP: - case LGLSXP: - if (isInteger(source)) { - const int *sd = INTEGER(source); - for (int i=0; i0) { - warning("Coerced double RHS to integer to match the type of the target column (column %d named '%s'). One or more RHS values contain fractions which have been lost; e.g. item %d with value %f has been truncated to %d.", colnum, colname, w, REAL(source)[w-1], (int)REAL(source)[w-1]); - // same warning text as v1.12.2 so as to reduce diff in tests. TODO: shorten text in future to put truncated %f at the begnning of the message. - } - } else { - const int64_t *sd = (int64_t *)REAL(source); - for (int i=0; iINT_MAX)) { - warning("Assigning integer64 to integer but %lld is outside range of integer and will be converted to NA.", (long long)val); - break; - } - } - } - } - break; - case REALSXP: - if (targetIsI64 && isReal(source) && !sourceIsI64) { - int firstReal=0; - if ((firstReal=INTEGER(isReallyReal(source))[0])) { - snprintf(memrecycle_message, MSGSIZE, "coerced to integer64 but contains a non-integer value (%f at position %d); precision lost.", REAL(source)[firstReal-1], firstReal); - } - } - break; + + if (GetVerbose()) { + // only take the (small) cost of GetVerbose() (search of options() list) when types don't match + Rprintf("Zero-copy coerce when assigning '%s' to '%s' column %d named '%s'.\n", + sourceIsI64 ? "integer64" : type2char(TYPEOF(source)), + targetIsI64 ? "integer64" : type2char(TYPEOF(target)), + colnum, colname); + } + // The following checks are up front here, otherwise we'd need them twice in the two branches + // inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future. + // The idea is to do these range checks without calling coerceVector() (which allocates) + +#define CHECK_RANGE(STYPE, RFUN, COND, FMT, TO) {{ \ + const STYPE *sd = (const STYPE *)RFUN(source); \ + for (int i=0; i255, "%d", "taken as 0") + case REALSXP: if (sourceIsI64) + CHECK_RANGE(long long, REAL, val<0 || val>255, "%lld", "taken as 0") + else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val,"%f", "either truncated (precision lost) or taken as 0") + } break; + case INTSXP: + if (TYPEOF(source)==REALSXP) { + if (sourceIsI64) + CHECK_RANGE(long long, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), "%lld", "out-of-range (NA)") + else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "%f", "truncated (precision lost)") + } break; + case REALSXP: + if (targetIsI64 && isReal(source) && !sourceIsI64) { + CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "%f", "truncated (precision lost)") } } } @@ -1061,7 +1036,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, // BODY has built-in break } if (sourceIsI64) - error("Cannot assign integer64 to a character column. Please use as.character at R level."); + error("To assign integer64 to a character column, please use as.character() for clarity."); source = PROTECT(coerceVector(source, STRSXP)); protecti++; } BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) diff --git a/src/rbindlist.c b/src/rbindlist.c index 78efc29fc2..a3e64cb21a 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -276,7 +276,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) int longestLen=0, longestW=-1, longestI=-1; // just for ordered factor SEXP longestLevels=R_NilValue; // just for ordered factor bool int64=false; - bool foundName=false; + const char *foundName=NULL; bool anyNotStringOrFactor=false; SEXP firstCol=R_NilValue; int firsti=-1, firstw=-1; @@ -287,7 +287,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (w==-1) continue; // column j of final result has no input from this item (fill must be true) if (!foundName) { SEXP cn=PROTECT(getAttrib(li, R_NamesSymbol)); - if (length(cn)) { SET_STRING_ELT(ansNames, idcol+j, STRING_ELT(cn, w)); foundName=true; } + if (length(cn)) { SEXP tt; SET_STRING_ELT(ansNames, idcol+j, tt=STRING_ELT(cn, w)); foundName=CHAR(tt); } UNPROTECT(1); } SEXP thisCol = VECTOR_ELT(li, w); @@ -319,7 +319,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) } } - if (!foundName) { char buff[12]; sprintf(buff,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); } + if (!foundName) { static char buff[12]; sprintf(buff,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; } if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) if (int64 && maxType!=REALSXP) error("Internal error: column %d of result is determined to be integer64 but maxType=='%s' != REALSXP", j+1, type2char(maxType)); // # nocov @@ -517,9 +517,9 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) // do an as.list() on the atomic column; #3528 thisCol = PROTECT(coerceVector(thisCol, VECSXP)); nprotect++; } - // else coerces if needed within memrecycle; possibly with a no-alloc direct coerce - const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, ""); - if (ret) warning("Column %d of item %d: %s", w+1, i+1, ret); // currently just one warning when precision is lost; e.g. assigning 3.4 to integer64 + // else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909) + const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, idcol+j+1, foundName); + if (ret) warning("Column %d of item %d: %s", w+1, i+1, ret); // e.g. when precision is lost like assigning 3.4 to integer64 } ansloc += thisnrow; } From 7f3468026e6d9c938d866267d760f28563f6cd1a Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 27 Sep 2019 22:41:41 -0700 Subject: [PATCH 43/43] code formatting only --- src/assign.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/assign.c b/src/assign.c index 0e1b1f2c3f..9cddba9fab 100644 --- a/src/assign.c +++ b/src/assign.c @@ -851,7 +851,7 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, const char *tType = targetIsI64 ? "integer64" : type2char(TYPEOF(target)); \ snprintf(memrecycle_message, MSGSIZE, \ FMT" (type '%s') at RHS position %d "TO" when assigning to column %d named '%s' (type '%s')", \ - val, sType, i+1, colnum, colname, tType); \ + val, sType, i+1, colnum, colname, tType); \ /* string returned like this so that rbindlist can prefix it with which item of its list this refers to */ \ break; \ } \ @@ -861,28 +861,28 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, switch(TYPEOF(target)) { case LGLSXP: switch (TYPEOF(source)) { - case RAWSXP: CHECK_RANGE(Rbyte, RAW, val!=0 && val!=1, "%d", "taken as TRUE") - case INTSXP: CHECK_RANGE(int, INTEGER, val!=0 && val!=1 && val!=NA_INTEGER, "%d", "taken as TRUE") + case RAWSXP: CHECK_RANGE(Rbyte, RAW, val!=0 && val!=1, "%d", "taken as TRUE") + case INTSXP: CHECK_RANGE(int, INTEGER, val!=0 && val!=1 && val!=NA_INTEGER, "%d", "taken as TRUE") case REALSXP: if (sourceIsI64) - CHECK_RANGE(long long, REAL, val!=0 && val!=1 && val!=NA_INTEGER64, "%lld", "taken as TRUE") - else CHECK_RANGE(double, REAL, !ISNAN(val) && val!=0.0 && val!=1.0, "%f", "taken as TRUE") + CHECK_RANGE(long long, REAL, val!=0 && val!=1 && val!=NA_INTEGER64, "%lld", "taken as TRUE") + else CHECK_RANGE(double, REAL, !ISNAN(val) && val!=0.0 && val!=1.0, "%f", "taken as TRUE") } break; case RAWSXP: switch (TYPEOF(source)) { - case INTSXP: CHECK_RANGE(int, INTEGER, val<0 || val>255, "%d", "taken as 0") + case INTSXP: CHECK_RANGE(int, INTEGER, val<0 || val>255, "%d", "taken as 0") case REALSXP: if (sourceIsI64) - CHECK_RANGE(long long, REAL, val<0 || val>255, "%lld", "taken as 0") - else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val,"%f", "either truncated (precision lost) or taken as 0") + CHECK_RANGE(long long, REAL, val<0 || val>255, "%lld", "taken as 0") + else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, "%f", "either truncated (precision lost) or taken as 0") } break; case INTSXP: if (TYPEOF(source)==REALSXP) { if (sourceIsI64) - CHECK_RANGE(long long, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), "%lld", "out-of-range (NA)") - else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "%f", "truncated (precision lost)") + CHECK_RANGE(long long, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), "%lld", "out-of-range (NA)") + else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "%f", "truncated (precision lost)") } break; case REALSXP: if (targetIsI64 && isReal(source) && !sourceIsI64) { - CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "%f", "truncated (precision lost)") + CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "%f", "truncated (precision lost)") } } }