From 3109a95b51489aaa3ec811839fe0f4d380531d76 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Sun, 29 Sep 2019 17:15:59 -0700 Subject: [PATCH 1/3] non-character-allNA done directly rather than tricking BODY --- src/assign.c | 84 ++++++++++++++++++++++++++++--------------------- src/rbindlist.c | 4 ++- 2 files changed, 51 insertions(+), 37 deletions(-) diff --git a/src/assign.c b/src/assign.c index 9cddba9fab..7cb7352b6d 100644 --- a/src/assign.c +++ b/src/assign.c @@ -891,7 +891,6 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, #define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) {{ \ const STYPE *sd = (const STYPE *)RFUN(source); \ if (length(where)) { \ - const int *wd = INTEGER(where)+start; \ if (slen==1) { \ const STYPE val = sd[0]; \ const CTYPE cval = CAST; \ @@ -930,35 +929,40 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, #define COERCE_ERROR(targetType) error("type '%s' cannot be coerced to '%s'", type2char(TYPEOF(source)), targetType); // 'targetType' for integer64 vs double - const int off = (length(where)?0:start); // off = target offset; e.g. called from rbindlist with where=R_NilValue and start!=0 - const bool mem = length(where)==0 && slen==len; // only used if types match too + const int off = length(where) ? 0 : start; // off = target offset; e.g. called from rbindlist with where=R_NilValue and start!=0 + const bool mc = length(where)==0 && slen==len; // mc=memcpy; only used if types match too + const int *wd = length(where) ? INTEGER(where)+start : NULL; switch (TYPEOF(target)) { case RAWSXP: { Rbyte *td = RAW(target) + off; switch (TYPEOF(source)) { - case RAWSXP: if (mem) { + case RAWSXP: + if (mc) { memcpy(td, RAW(source), slen*sizeof(Rbyte)); break; - } else BODY(Rbyte, RAW, Rbyte, val, td[i]=cval) + } else BODY(Rbyte, RAW, Rbyte, val, td[i]=cval) case LGLSXP: BODY(int, LOGICAL, Rbyte, val==1, td[i]=cval) case INTSXP: BODY(int, INTEGER, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval) - case REALSXP: if (sourceIsI64) + case REALSXP: + if (sourceIsI64) BODY(int64_t, REAL, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval) - else BODY(double, REAL, Rbyte, (ISNAN(val)||val>255||val<0) ? 0 : val, td[i]=cval) - default: COERCE_ERROR("raw"); + else BODY(double, REAL, Rbyte, (ISNAN(val)||val>255||val<0) ? 0 : val, td[i]=cval) + default: COERCE_ERROR("raw"); } } break; case LGLSXP: { int *td = LOGICAL(target) + off; switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, int, val!=0, td[i]=cval) - case LGLSXP: if (mem) { + case LGLSXP: + if (mc) { memcpy(td, LOGICAL(source), slen*sizeof(Rboolean)); break; - } else BODY(int, LOGICAL, int, val, td[i]=cval) + } else BODY(int, LOGICAL, int, val, td[i]=cval) case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval) - case REALSXP: if (sourceIsI64) + case REALSXP: + if (sourceIsI64) BODY(int64_t, REAL, int, val==NA_INTEGER64 ? NA_LOGICAL : val!=0, td[i]=cval) - else BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) - default: COERCE_ERROR("logical"); + else BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval) + default: COERCE_ERROR("logical"); } } break; case INTSXP : { @@ -966,12 +970,14 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, switch (TYPEOF(source)) { case RAWSXP: BODY(Rbyte, RAW, int, (int)val, td[i]=cval) case LGLSXP: // same as INTSXP ... - case INTSXP: if (mem) { + case INTSXP: + if (mc) { memcpy(td, INTEGER(source), slen*sizeof(int)); break; - } else BODY(int, INTEGER, int, val, td[i]=cval) - case REALSXP: if (sourceIsI64) + } else BODY(int, INTEGER, int, val, td[i]=cval) + case REALSXP: + if (sourceIsI64) BODY(int64_t, REAL, int, (val==NA_INTEGER64||val>INT_MAX||val<=NA_INTEGER) ? NA_INTEGER : (int)val, td[i]=cval) - else BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) + else BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) default: COERCE_ERROR("integer"); // test 2005.4 } } break; @@ -984,10 +990,11 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval) case REALSXP: if (sourceIsI64) { - if(mem) { memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break; } - else BODY(int64_t, REAL, int64_t, val, td[i]=cval) + if (mc) { + memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break; + } else BODY(int64_t, REAL, int64_t, val, td[i]=cval) } else BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) - default: COERCE_ERROR("integer64"); + default: COERCE_ERROR("integer64"); } } else { double *td = REAL(target) + off; @@ -997,10 +1004,11 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval) case REALSXP: if (!sourceIsI64) { - if(mem) { memcpy(td, (double *)REAL(source), slen*sizeof(double)); break; } - else BODY(double, REAL, double, val, td[i]=cval) + if (mc) { + memcpy(td, (double *)REAL(source), slen*sizeof(double)); break; + } else BODY(double, REAL, double, val, td[i]=cval) } else BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval) - default: COERCE_ERROR("double"); + default: COERCE_ERROR("double"); } } } break; @@ -1011,13 +1019,15 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, case RAWSXP: BODY(Rbyte, RAW, double, (im=0.0,val), td[i].r=cval;td[i].i=im) case LGLSXP: // same as INTSXP case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) - case REALSXP: if (sourceIsI64) + case REALSXP: + if (sourceIsI64) BODY(int64_t, REAL, double, val==NA_INTEGER64?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) - else BODY(double, REAL, double, ISNAN(val)?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) - case CPLXSXP: if (mem) { + else BODY(double, REAL, double, ISNAN(val)?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im) + case CPLXSXP: + if (mc) { memcpy(td, COMPLEX(source), slen*sizeof(Rcomplex)); break; - } else BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) - default: COERCE_ERROR("complex"); + } else BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval) + default: COERCE_ERROR("complex"); } } break; case STRSXP : @@ -1029,21 +1039,23 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, if (allNA(source, true)) { // saves common coercion of NA (logical) to NA_character_ // ^^ =errorForBadType; if type list, that was already an error earlier so we // want to be strict now otherwise list would get to coerceVector below - source = ScalarLogical(FALSE); // dummy input that BODY requires; a no alloc internal R constant that is a SEXP - // we're using BODY here for its case that hops via 'where', otherwise we could just do a trivial loop here - BODY(int, LOGICAL, SEXP, NA_STRING+val, SET_STRING_ELT(target, off+i, cval)) - // ^^ dummy +0 on address to avoid C warning about not using val; hence why dummy is FALSE (0) - // BODY has built-in break + if (length(where)) + for (int i=0; i0) SET_STRING_ELT(target, start+wd[i]-1, NA_STRING); + else + for (int i=0; i Date: Mon, 30 Sep 2019 00:24:10 -0700 Subject: [PATCH 2/3] extra test to catch the deliberate error (wrong 'start+') in the non-BODY approach in this PR, and dogroups zero-copy-coerce enabled to reach this test --- inst/tests/tests.Rraw | 42 +++++++++++++++++++++++++----------------- src/assign.c | 38 +++++++++++++++++++++----------------- src/dogroups.c | 35 ++++++----------------------------- 3 files changed, 52 insertions(+), 63 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index cdbe482c91..24ea88018c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -875,11 +875,11 @@ test(299.05, DT[2:3,c:=c(43, 44)], data.table(a=1:3,c=42:44)) test(299.06, DT[2,c:=42], data.table(a=1:3,c=INT(42,42,44))) # also see tests 302 and 303. (Ok, new test file for fast assign would be tidier). test(299.07, DT[,c:=rep(FALSE,nrow(DT))], data.table(a=1:3,c=FALSE)) # replace c column with logical -test(299.08, DT[2:3,c:=c(3.14,0)], data.table(a=1:3, c=c(FALSE,TRUE,FALSE)), warning="3.14.*double.*at RHS position 1 taken as TRUE.*column 2 named 'c'.*logical") +test(299.08, DT[2:3,c:=c(3.14,0)], data.table(a=1:3, c=c(FALSE,TRUE,FALSE)), warning="3.14.*double.*at RHS position 1 taken as TRUE.*logical.*column 2 named 'c'") test(299.09, DT[2:3,c:=c(0,1)], data.table(a=1:3,c=c(FALSE,FALSE,TRUE))) # no warning # FR #2551 is now changed to fit in / fix bug #5442. Stricter warnings are in place now. Check tests 1294.1-34 below. test(299.10, DT[2,c:=42], data.table(a=1:3, c=c(FALSE,TRUE,TRUE)), warning="42.0.*double.*at RHS position 1.*TRUE") -test(299.11, DT[1,c:=42L], data.table(a=1:3, c=TRUE), warning="42.*integer.*at RHS position 1.*TRUE.*column 2 named 'c'.*logical") +test(299.11, DT[1,c:=42L], data.table(a=1:3, c=TRUE), warning="42.*integer.*at RHS position 1.*TRUE.*logical.*column 2 named 'c'") test(299.12, DT[2:3,c:=c(0L, 0L)], data.table(a=1:3,c=c(TRUE,FALSE,FALSE))) # Test bug fix #1468, combining i and by. @@ -3673,11 +3673,15 @@ test(1133.1, DT[, new := c(1,2), by=x], error="Supplied 2 items to be assigned t test(1133.2, DT, data.table(x=INT(1,1,1,1,1,2,2))) # on an already existing column: DT[, new:=99L] -test(1133.3, DT[, new := c(1,2), by=x], error="Type of RHS ('double') must match LHS ('integer')") +test(1133.3, DT[, new := c(1,2), by=x], error="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'") test(1133.4, DT[, new := c(1L,2L), by=x], error="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'") test(1133.5, DT, data.table(x=INT(1,1,1,1,1,2,2), new=99L)) test(1133.6, DT[, new := rep(-.GRP, .N), by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(-1,-1,-1,-1,-1,-2,-2))) test(1133.7, DT[, new := .N, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(5,5,5,5,5,2,2))) +# on a new column with warning on 2nd assign +DT[,new:=NULL] +test(1133.8, DT[, new := if (.GRP==1L) 7L else 3.4, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(7,7,7,7,7,3,3)), + warning="Group 2 column 'new': 3.4.*double.*at RHS position 1 truncated.*precision lost.*integer") # Fix for FR #2496 - catch `{` in `:=` expression in `j`: DT <- data.table(x=c("A", "A", "B", "B"), val =1:4) @@ -4856,7 +4860,7 @@ test(1293, ans1, ans2) dt <- data.table(a=1:3, b=c(7,8,9), c=c(TRUE, NA, FALSE), d=as.list(4:6), e=c("a", "b", "c")) test(1294.01, dt[, a := 1]$a, rep(1L, 3L)) -test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L), warning="1.5.*double.*position 1 truncated.*column 1 named 'a'.*integer") +test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L), warning="1.5.*double.*position 1 truncated.*integer.*column 1 named 'a'") test(1294.03, dt[, a := NA]$a, rep(NA_integer_, 3L)) test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L), warning=c("NAs introduced by coercion", @@ -14137,7 +14141,7 @@ DT[,foo:=factor(c("a","b","c"))] test(2005.05, DT[2, foo:=8i], error="Can't assign to column 'foo' (type 'factor') a value of type 'complex' (not character, factor, integer or numeric)") test(2005.06, DT[2, a:=9, verbose=TRUE], notOutput="Coerced") test(2005.07, DT[2, a:=NA, verbose=TRUE], notOutput="Coerced") -test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="9.9.*double.*position 1 truncated.*column 1 named 'a'.*integer") +test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="9.9.*double.*position 1 truncated.*integer.*column 1 named 'a'") test(2005.09, set(DT, 1L, "c", expression(x+2)), error="type 'expression' cannot be coerced to 'raw'") test(2005.10, set(DT, 1L, "d", expression(x+2)), error="type 'expression' cannot be coerced to 'logical'") test(2005.11, set(DT, 1L, "e", expression(x+2)), error="type 'expression' cannot be coerced to 'double'") @@ -14146,15 +14150,15 @@ test(2005.30, DT[2:3,c:=c(TRUE,FALSE), verbose=TRUE]$c, as.raw(INT(7,1,0)), output="Zero-copy coerce when assigning 'logical' to 'raw' column 3 named 'c'") test(2005.31, set(DT,1L,"c",NA)$c, as.raw(INT(0,1,0))) test(2005.32, set(DT,1:2,"c",INT(-1,255))$c, as.raw(INT(0,255,0)), - warning="-1.*integer.*position 1 taken as 0 when assigning to column 3 named 'c'.*raw") + warning="-1.*integer.*position 1 taken as 0 when assigning.*raw.*column 3 named 'c'") test(2005.33, DT[2:3,c:=INT(NA,256)]$c, as.raw(INT(0,0,0)), - warning="-2147483648.*integer.*position 1 taken as 0 when assigning to column 3 named 'c'.*raw") + warning="-2147483648.*integer.*position 1 taken as 0 when assigning.*raw.*column 3 named 'c'") test(2005.34, set(DT,2:3,"c",c(NA,3.14))$c, as.raw(INT(0,0,3)), - warning="[nN].*double.*position 1 either truncated.*or taken as 0 when assigning to column 3 named 'c'.*raw") # 'nan' for me but might vary hence [nN] + warning="[nN].*double.*position 1 either truncated.*or taken as 0 when assigning.*raw.*column 3 named 'c'") # 'nan' for me but might vary hence [nN] test(2005.35, DT[1:2,c:=c(Inf,0.78)]$c, as.raw(INT(0,0,3)), - warning="[iI].*double.*position 1 either truncated.*or taken as 0 when assigning to column 3 named 'c'.*raw'") # 'inf' for me but might vary hence [iI] + warning="[iI].*double.*position 1 either truncated.*or taken as 0 when assigning.*raw.*column 3 named 'c'") # 'inf' for me but might vary hence [iI] test(2005.36, DT[1:2,d:=as.raw(c(0,255))]$d, c(FALSE,TRUE,TRUE), - warning="255.*raw.*position 2 taken as TRUE when assigning to column 4 named 'd'.*logical") + warning="255.*raw.*position 2 taken as TRUE when assigning.*logical.*column 4 named 'd'") test(2005.37, DT[2:3,b:=as.raw(c(0,255))]$b, INT(4,0,255)) test(2005.38, DT[1:2,e:=as.raw(c(0,255))]$e, c(0,255,pi*3)) test(2005.39, DT[c(1,3,2), c:=as.raw(c(0,100,255)), verbose=TRUE]$c, as.raw(c(0,255,100)), @@ -14168,14 +14172,14 @@ if (test_bit64) { test(2005.60, set(DT, 1L, "g", expression(x+2)), error="type 'expression' cannot be coerced to 'integer64'") test(2005.61, DT[1:2,e:=as.integer64(c(NA,-200))]$e, c(NA_real_, -200, pi*3)) test(2005.62, DT[2:3, d:=as.integer64(c(2,NA))]$d, c(FALSE,TRUE,NA), - warning="2.*integer64.*position 1 taken as TRUE when assigning to column 4 named 'd'.*logical") + warning="2.*integer64.*position 1 taken as TRUE when assigning.*logical.*column 4 named 'd'") DT[,b:=4:6] test(2005.63, DT[2:3, b:=as.integer64(c("2147483647","2147483648"))]$b, INT(4,2147483647,NA), - warning="2147483648.*integer64.*position 2 out-of-range [(]NA[)] when assigning to column 2 named 'b'.*integer") + warning="2147483648.*integer64.*position 2 out-of-range [(]NA[)] when assigning.*integer.*column 2 named 'b'") test(2005.64, DT[2:3, b:=as.integer64(c("-2147483648","-2147483647"))]$b, INT(4,NA,-2147483647), - warning="-2147483648.*integer64.*position 1 out-of-range [(]NA[)].*column 2 named 'b'.*integer") + warning="-2147483648.*integer64.*position 1 out-of-range [(]NA[)].*integer.*column 2 named 'b'") test(2005.65, DT[c(2,1,3), c:=as.integer64(c(-1,255,256))]$c, as.raw(c(255,0,0)), - warning="-1.*integer64.*position 1 taken as 0 when assigning to column 3 named 'c'.*raw") + warning="-1.*integer64.*position 1 taken as 0 when assigning.*raw.*column 3 named 'c'") test(2005.66, DT[2:3, f:=as.integer64(c(NA,"2147483648"))]$f, as.complex(c(-42,NA,2147483648))) DT[,h:=LETTERS[1:3]] test(2005.67, DT[2:3, h:=as.integer64(1:2)], error="To assign integer64 to a character column, please use as.character.") @@ -14189,7 +14193,7 @@ test(2006.2, rbindlist(list(data.table(x = as.raw(1:2), y=as.raw(5:6)), data.tab if (test_bit64) { test(2007.1, rbindlist(list( list(a=as.integer64(1), b=3L), list(a=2L, b=4L) )), data.table(a=as.integer64(1:2), b=3:4)) test(2007.2, rbindlist(list( list(a=3.4, b=5L), list(a=as.integer64(4), b=6L) )), data.table(a=as.integer64(3:4), b=5:6), - warning="Column 1 of item 1: 3.4.*double.*position 1 truncated.*precision lost.*when assigning to column 1 named 'a'.*integer64'") + warning="Column 1 of item 1: 3.4.*double.*position 1 truncated.*precision lost.*when assigning.*integer64.*column 1 named 'a'") test(2007.3, rbindlist(list( list(a=3.0, b=5L), list(a=as.integer64(4), b=6L) )), data.table(a=as.integer64(3:4), b=5:6)) test(2007.4, rbindlist(list( list(b=5:6), list(a=as.integer64(4), b=7L)), fill=TRUE), data.table(b=5:7, a=as.integer64(c(NA,NA,4)))) # tests writeNA of integer64 test(2007.5, rbindlist(list( list(a=INT(1,NA,-2)), list(a=as.integer64(c(3,NA))) )), data.table(a=as.integer64(c(1,NA,-2,3,NA)))) # int NAs combined with int64 NA @@ -16236,9 +16240,9 @@ DT = data.table(a=1:4, b=c(FALSE, TRUE, NA, FALSE)) test(2115.1, set(DT,3L,1L,0), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, NA, FALSE))) test(2115.2, set(DT,3L,2L,0), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, FALSE, FALSE))) test(2115.3, set(DT,3L,2L,-2L), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, TRUE, FALSE)), # see also test 299 - warning="-2.*integer.*position 1 taken as TRUE.*column 2 named 'b'.*logical") + warning="-2.*integer.*position 1 taken as TRUE.*logical.*column 2 named 'b'") test(2115.4, set(DT,4L,2L,3.14), data.table(a=INT(1,2,0,4), b=c(FALSE, TRUE, TRUE, TRUE)), - warning="3.14.*double.*position 1 taken as TRUE.*column 2 named 'b'.*logical") + warning="3.14.*double.*position 1 taken as TRUE.*logical.*column 2 named 'b'") DT = data.table(code=c("c","b","c","a"), val=10:13) test(2115.5, DT[code=="c", val := val+1], data.table(code=c("c","b","c","a"), val=INT(11,11,13,13))) DT = data.table(x=factor(LETTERS[1:3]), y=1:6) @@ -16273,6 +16277,10 @@ DT = data.table(a=factor(LETTERS[1:3])) test(2117.1, levels(DT[2:3,a:=c("",NA_character_)]$a), c("A","B","C","")) test(2117.2, DT[1,a:=NA_character_]$a, factor(c(NA,"",NA), levels=c("A","B","C",""))) +# assigning NA (non-character) to character column by group to trigger zero-copy-coerce case in memrecycle +DT = data.table(A=rep(1:2,each=3), B=3:4, v=letters[1:6]) +test(2118, DT[B==3L,v:=NA,by=A]$v, c(NA,"b",NA,"d",NA,"f")) + ################################### # Add new tests above this line # diff --git a/src/assign.c b/src/assign.c index 7cb7352b6d..090327c696 100644 --- a/src/assign.c +++ b/src/assign.c @@ -683,6 +683,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, error("Internal error: recycle length error not caught earlier. slen=%d len=%d", slen, len); // # nocov // Internal error because the column has already been added to the DT, so length mismatch should have been caught before adding the column. // for 5647 this used to limit slen to len, but no longer + if (colname==NULL) + error("Internal error: memrecycle has received NULL colname"); // # nocov *memrecycle_message = '\0'; int protecti=0; if (isNewList(source)) { @@ -849,10 +851,11 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, if (COND) { \ const char *sType = sourceIsI64 ? "integer64" : type2char(TYPEOF(source)); \ const char *tType = targetIsI64 ? "integer64" : type2char(TYPEOF(target)); \ - snprintf(memrecycle_message, MSGSIZE, \ - FMT" (type '%s') at RHS position %d "TO" when assigning to column %d named '%s' (type '%s')", \ - val, sType, i+1, colnum, colname, tType); \ - /* string returned like this so that rbindlist can prefix it with which item of its list this refers to */ \ + int n = snprintf(memrecycle_message, MSGSIZE, \ + FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s'", val, sType, i+1, tType); \ + if (colnum>0 && n>0 && n0) SET_STRING_ELT(target, start+wd[i]-1, NA_STRING); - else + if (length(where)) { + for (int i=0; i0) SET_STRING_ELT(target, wd[i]-1, NA_STRING); + } else { for (int i=0; i1 && vlen!=grpn) { SEXP colname = isNull(target) ? STRING_ELT(newnames, INTEGER(lhs)[j]-origncol-1) : STRING_ELT(dtnames,INTEGER(lhs)[j]-1); @@ -315,8 +313,10 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX SET_STRING_ELT(dtnames, INTEGER(lhs)[j]-1, STRING_ELT(newnames, INTEGER(lhs)[j]-origncol-1)); copyMostAttrib(RHS, target); // attributes of first group dominate; e.g. initial factor levels come from first group } - memrecycle(target, order, INTEGER(starts)[i]-1, grpn, RHS, 0, ""); + const char *warn = memrecycle(target, order, INTEGER(starts)[i]-1, grpn, RHS, 0, ""); // can't error here because length mismatch already checked for all jval columns before starting to add any new columns + if (warn) + warning("Group %d column '%s': %s", i+1, CHAR(STRING_ELT(dtnames,INTEGER(lhs)[j]-1)), warn); } UNPROTECT(1); // jval continue; @@ -395,8 +395,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX if (tsize==4) { int *td = INTEGER(target); int *sd = INTEGER(source); - for (int r=0; r0 if (TYPEOF(source) != TYPEOF(target)) From 40b50e5292ed4ebed5d9b5f8b3fd7de17c7a54c5 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Mon, 30 Sep 2019 00:49:41 -0700 Subject: [PATCH 3/3] coverage --- inst/tests/tests.Rraw | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 24ea88018c..d61cd583cb 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16278,8 +16278,10 @@ test(2117.1, levels(DT[2:3,a:=c("",NA_character_)]$a), c("A","B","C","")) test(2117.2, DT[1,a:=NA_character_]$a, factor(c(NA,"",NA), levels=c("A","B","C",""))) # assigning NA (non-character) to character column by group to trigger zero-copy-coerce case in memrecycle +# used to be inconvenient error ('Type of RHS must match LHS') in v1.12.2 and before, and user had to use NA_character_ DT = data.table(A=rep(1:2,each=3), B=3:4, v=letters[1:6]) -test(2118, DT[B==3L,v:=NA,by=A]$v, c(NA,"b",NA,"d",NA,"f")) +test(2118.1, DT[B==3L,v:=NA,by=A]$v, c(NA,"b",NA,"d",NA,"f")) +test(2118.2, DT[,v:=NA,by=A]$v, rep(NA_character_,6L)) ###################################