diff --git a/NEWS.md b/NEWS.md index fd48fdcade..aae1234553 100644 --- a/NEWS.md +++ b/NEWS.md @@ -127,6 +127,8 @@ 16. `DT[, min(colB), by=colA]` when `colB` is type `character` would miss blank strings (`""`) at the beginning of a group and return the smallest non-blank instead of blank, [#4848](https://github.com/Rdatatable/data.table/issues/4848). Thanks to Vadim Khotilovich for reporting and for the PR fixing it. +17. Assigning a wrong-length or non-list vector to a list column could segfault, [#4166](https://github.com/Rdatatable/data.table/issues/4166) [#4667](https://github.com/Rdatatable/data.table/issues/4667) [#4678](https://github.com/Rdatatable/data.table/issues/4678) [#4729](https://github.com/Rdatatable/data.table/issues/4729). Thanks to @fklirono, Kun Ren, @kevinvzandvoort and @peterlittlejohn for reporting, and to Václav Tlapák for the PR. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 00fba2de01..29a76866cc 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17634,10 +17634,24 @@ test(2188.15, fifelse(TRUE, NA, factor('a'), factor('a', levels = c('a','b'))), test(2188.16, fifelse(c(NA, NA), 1L, 2L, NULL), c(NA_integer_, NA_integer_)) # NULL `na` is treated as NA # rolling join expected output on non-matching join column has been fixed #1913 -dt = data.table(ID=1:5, A=c(1.3, 1.7, 2.4, 0.9, 0.6)) +DT = data.table(ID=1:5, A=c(1.3, 1.7, 2.4, 0.9, 0.6)) buckets = data.table(BucketID=1:4, BinA=1:4) -dt[, A.copy := A] -test(2189.1, buckets[dt, on=c("BinA"="A"), roll=-Inf], data.table(BucketID = c(2L, 2L, 3L, 1L, 1L), BinA = c(1.3, 1.7, 2.4, 0.9, 0.6), ID = 1:5, A.copy = c(1.3, 1.7, 2.4, 0.9, 0.6))) +DT[, A.copy := A] +test(2189.1, buckets[DT, on=c("BinA"="A"), roll=-Inf], data.table(BucketID = c(2L, 2L, 3L, 1L, 1L), BinA = c(1.3, 1.7, 2.4, 0.9, 0.6), ID = 1:5, A.copy = c(1.3, 1.7, 2.4, 0.9, 0.6))) buckets[, BinA := as.numeric(BinA)] -test(2189.2, buckets[dt, on=c("BinA"="A"), roll=-Inf], data.table(BucketID = c(2L, 2L, 3L, 1L, 1L), BinA = c(1.3, 1.7, 2.4, 0.9, 0.6), ID = 1:5, A.copy = c(1.3, 1.7, 2.4, 0.9, 0.6))) +test(2189.2, buckets[DT, on=c("BinA"="A"), roll=-Inf], data.table(BucketID = c(2L, 2L, 3L, 1L, 1L), BinA = c(1.3, 1.7, 2.4, 0.9, 0.6), ID = 1:5, A.copy = c(1.3, 1.7, 2.4, 0.9, 0.6))) + +# segfault subassigning non-list type to list column, #4166 +DT = data.table(a=list(1:2, 3, 4)) +test(2190.1, DT[, a:=1:4], error="Supplied 4 items to be assigned to 3 items of column 'a'.*please use rep") +test(2190.2, DT[1:2, a:=structure(c(1L, 2L), att='t') ]$a, list(structure(1L, att='t'), structure(2L, att='t'), 4)) +test(2190.3, DT[1:2, a:=structure(c(1, 2), att='t') ]$a, list(structure(1, att='t'), structure(2, att='t'), 4)) +test(2190.4, DT[1:2, a:=structure(as.raw(c(1, 2)), att='t') ]$a, list(structure(as.raw(1), att='t'), structure(as.raw(2), att='t'), 4)) +test(2190.5, DT[1:2, a:=structure(as.complex(c(1, 2)), att='t')]$a, list(structure(as.complex(1), att='t'), structure(as.complex(2), att='t'), 4)) +test(2190.6, DT[1:2, a:=structure(c(TRUE, FALSE), att='t') ]$a, list(structure(TRUE, att='t'), structure(FALSE, att='t'), 4)) +test(2190.7, DT[1:2, a:=structure(c('a', 'b'), att='t') ]$a, list(structure('a', att='t'), structure('b', att='t'), 4)) +if (test_bit64) { + test(2190.8, DT[1:2, a:=as.integer64(1:2) ]$a, list(as.integer64(1), as.integer64(2), 4)) +} +test(2190.9, DT[1:2, a:=call('sum', 1)], error="type 'language' cannot be coerced to 'list'") diff --git a/src/assign.c b/src/assign.c index bc1c91188f..c87d99bdb9 100644 --- a/src/assign.c +++ b/src/assign.c @@ -434,7 +434,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) } // RHS of assignment to new column is zero length but we'll use its type to create all-NA column of that type } - { + { int j; if (isMatrix(thisvalue) && (j=INTEGER(getAttrib(thisvalue, R_DimSymbol))[1]) > 1) // matrix passes above (considered atomic vector) warning(_("%d column matrix RHS of := will be treated as one vector"), j); @@ -445,8 +445,9 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) error(_("Can't assign to column '%s' (type 'factor') a value of type '%s' (not character, factor, integer or numeric)"), CHAR(STRING_ELT(names,coln)),type2char(TYPEOF(thisvalue))); } - if (nrow>0 && targetlen>0 && vlen>1 && vlen!=targetlen && (TYPEOF(existing)!=VECSXP || TYPEOF(thisvalue)==VECSXP)) { - // note that isNewList(R_NilValue) is true so it needs to be TYPEOF(existing)!=VECSXP above + if (nrow>0 && targetlen>0 && vlen>1 && vlen!=targetlen && !(TYPEOF(existing)==VECSXP && targetlen==1)) { + // We allow assigning objects of arbitrary to single items of list columns for convenience. + // Note that isNewList(R_NilValue) is true so it needs to be !(TYPEOF(existing)==VECSXP) above error(_("Supplied %d items to be assigned to %d items of column '%s'. If you wish to 'recycle' the RHS please use rep() to make this intent clear to readers of your code."), vlen, targetlen, CHAR(colnam)); } } @@ -1065,11 +1066,25 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval)) } case VECSXP : - case EXPRSXP : // #546 - if (TYPEOF(source)!=VECSXP && TYPEOF(source)!=EXPRSXP) - BODY(SEXP, &, SEXP, val, SET_VECTOR_ELT(target, off+i, cval)) - else - BODY(SEXP, SEXPPTR_RO, SEXP, val, SET_VECTOR_ELT(target, off+i, cval)) + case EXPRSXP : { // #546 #4350 + if (len == 1 && TYPEOF(source)!=VECSXP && TYPEOF(source)!=EXPRSXP) { + BODY(SEXP, &, SEXP, val, SET_VECTOR_ELT(target, off+i, cval)) + } else { + switch (TYPEOF(source)) { + // no protect of CAST needed because SET_VECTOR_ELT protects it, and it can't get released by copyMostAttrib or anything else inside BODY + // copyMostAttrib is appended to CAST so as to be outside loop + case RAWSXP: BODY(Rbyte, RAW, SEXP, ScalarRaw(val); copyMostAttrib(source,cval), SET_VECTOR_ELT(target,off+i,cval)) + case LGLSXP: BODY(int, INTEGER, SEXP, ScalarLogical(val);copyMostAttrib(source,cval), SET_VECTOR_ELT(target,off+i,cval)) + case INTSXP: BODY(int, INTEGER, SEXP, ScalarInteger(val);copyMostAttrib(source,cval), SET_VECTOR_ELT(target,off+i,cval)) + case REALSXP: BODY(double, REAL, SEXP, ScalarReal(val); copyMostAttrib(source,cval), SET_VECTOR_ELT(target,off+i,cval)) + case CPLXSXP: BODY(Rcomplex, COMPLEX, SEXP, ScalarComplex(val);copyMostAttrib(source,cval), SET_VECTOR_ELT(target,off+i,cval)) + case STRSXP: BODY(SEXP, STRING_PTR, SEXP, ScalarString(val); copyMostAttrib(source,cval), SET_VECTOR_ELT(target,off+i,cval)) + case VECSXP: + case EXPRSXP: BODY(SEXP, SEXPPTR_RO, SEXP, val, SET_VECTOR_ELT(target,off+i,cval)) + default: COERCE_ERROR("list"); + } + } + } break; default : error(_("Unsupported column type in assign.c:memrecycle '%s'"), type2char(TYPEOF(target))); // # nocov }