Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CRAN_Release.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,16 @@ grep mkChar *.c # see comment in bmerge.c about passing this grep. I'
# or ii) passed to setAttrib (which protects, providing leak-seals above are ok)
# ScalarLogical in R now returns R's global TRUE from R 3.1.0; Apr 2014. Before that it allocated.
# Aside: ScalarInteger may return globals for small integers in future version of R.
grep ScalarInteger *.c # Check all Scalar* either PROTECTed, return-ed or passed to setAttrib.
grep ScalarInteger *.c | grep -v PROTECT | grep -v setAttrib | grep -v return # Check all Scalar* either PROTECTed, return-ed or passed to setAttrib.
grep ScalarString *.c | grep -v PROTECT | grep -v setAttrib | grep -v return
grep ScalarLogical *.c # Now we depend on 3.1.0+, check ScalarLogical is NOT PROTECTed.
grep ScalarString *.c

# Inspect missing PROTECTs
# To pass this grep is why we like SET_VECTOR_ELT(,,var=allocVector()) style on one line.
# If a PROTECT is not needed then a comment is added explaining why and including "PROTECT" in the comment to pass this grep
grep allocVector *.c | grep -v PROTECT | grep -v SET_VECTOR_ELT | grep -v setAttrib | grep -v return
grep coerceVector *.c | grep -v PROTECT | grep -v SET_VECTOR_ELT | grep -v setAttrib | grep -v return
grep asCharacter *.c | grep -v PROTECT | grep -v SET_VECTOR_ELT | grep -v setAttrib | grep -v return

cd ..
R
Expand Down
3 changes: 3 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -6685,6 +6685,9 @@ test(1477.07, transpose(strsplit(ll, ",", fixed=TRUE)), tstrsplit(ll, ",", fixed
test(1477.08, transpose(1:5), error="l must be a list")
test(1477.09, transpose(list(as.complex(c(1, 1+5i)))), error="Unsupported column type")
test(1477.10, transpose(list(list(1:5))), error="Item 1 of list input is")
test(1477.11, transpose(as.list(1:5), fill=1:2), error="fill must be NULL or length=1 vector")
test(1477.12, transpose(as.list(1:5), ignore.empty=NA), error="ignore.empty should be logical TRUE/FALSE")
test(1477.13, transpose(list()), list())

# #480 `setDT` and 'lapply'
ll = list(data.frame(a=1), data.frame(x=1, y=2), NULL, list())
Expand Down
65 changes: 30 additions & 35 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,37 +279,27 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
// cols : column names or numbers corresponding to the values to set
// rows : row numbers to assign
R_len_t i, j, numToDo, targetlen, vlen, r, oldncol, oldtncol, coln, protecti=0, newcolnum, indexLength;
SEXP targetcol, RHS, names, nullint, thisv, targetlevels, newcol, s, colnam, klass, tmp, colorder, key, index, a, assignedNames, indexNames;
SEXP targetcol, names, nullint, thisv, targetlevels, newcol, s, colnam, tmp, colorder, key, index, a, assignedNames, indexNames;
SEXP bindingIsLocked = getAttrib(dt, install(".data.table.locked"));
bool verbose=GetVerbose(), anytodelete=false, isDataTable=false;
bool verbose=GetVerbose(), anytodelete=false;
const char *c1, *tc1, *tc2;
int *buf, k=0, newKeyLength, indexNo;
int *buf, newKeyLength, indexNo;
size_t size; // must be size_t otherwise overflow later in memcpy
if (isNull(dt)) error("assign has been passed a NULL dt");
if (TYPEOF(dt) != VECSXP) error("dt passed to assign isn't type VECSXP");
if (length(bindingIsLocked) && LOGICAL(bindingIsLocked)[0])
error(".SD is locked. Updating .SD by reference using := or set are reserved for future use. Use := in j directly. Or use copy(.SD) as a (slow) last resort, until shallow() is exported.");

klass = getAttrib(dt, R_ClassSymbol);
if (isNull(klass)) error("Input passed to assign has no class attribute. Must be a data.table or data.frame.");
// Check if there is a class "data.table" somewhere (#5115).
// We allow set() on data.frame too; e.g. package Causata uses set() on a data.frame in tests/testTransformationReplay.R
// := is only allowed on a data.table. However, the ":=" = stop(...) message in data.table.R will have already
// detected use on a data.frame before getting to this point.
for (i=0; i<length(klass); i++) { // There doesn't seem to be an R API interface to inherits(), but manually here isn't too bad.
if (strcmp(CHAR(STRING_ELT(klass, i)), "data.table") == 0) break;
}
if (i<length(klass))
isDataTable = true;
else {
for (i=0; i<length(klass); i++) {
if (strcmp(CHAR(STRING_ELT(klass, i)), "data.frame") == 0) break;
}
if (i == length(klass)) error("Input is not a data.table, data.frame or an object that inherits from either.");
isDataTable = false; // meaning data.frame from now on. Can use set() on existing columns but not add new ones because DF aren't over-allocated.
}
// For data.frame, can use set() on existing columns but not add new ones because DF are not over-allocated.
bool isDataTable = INHERITS(dt, char_datatable);
if (!isDataTable && !INHERITS(dt, char_dataframe))
error("Internal error: dt passed to Cassign is not a data.table or data.frame"); // # nocov

oldncol = LENGTH(dt);
names = getAttrib(dt,R_NamesSymbol);
names = getAttrib(dt, R_NamesSymbol);
if (isNull(names)) error("dt passed to assign has no names");
if (length(names)!=oldncol)
error("Internal error in assign: length of names (%d) is not length of dt (%d)",length(names),oldncol); // # nocov
Expand All @@ -327,7 +317,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
// fast way to assign to whole column, without creating 1:nrow(x) vector up in R, or here in C
} else {
if (isReal(rows)) {
rows = PROTECT(rows = coerceVector(rows, INTSXP)); protecti++;
rows = PROTECT(coerceVector(rows, INTSXP)); protecti++;
warning("Coerced i from numeric to integer. Please pass integer for efficiency; e.g., 2L rather than 2");
}
if (!isInteger(rows))
Expand Down Expand Up @@ -360,12 +350,13 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
if (isString(cols)) {
PROTECT(tmp = chmatch(cols, names, 0)); protecti++;
buf = (int *) R_alloc(length(cols), sizeof(int));
int k=0;
for (i=0; i<length(cols); i++) {
if (INTEGER(tmp)[i] == 0) buf[k++] = i;
}
if (k>0) {
if (!isDataTable) error("set() on a data.frame is for changing existing columns, not adding new ones. Please use a data.table for that. data.table's are over-allocated and don't shallow copy.");
PROTECT(newcolnames = allocVector(STRSXP, k)); protecti++;
newcolnames = PROTECT(allocVector(STRSXP, k)); protecti++;
for (i=0; i<k; i++) {
SET_STRING_ELT(newcolnames, i, STRING_ELT(cols, buf[i]));
INTEGER(tmp)[buf[i]] = oldncol+i+1;
Expand Down Expand Up @@ -467,6 +458,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
// truelengths of both already set by alloccol
}
for (i=0; i<length(cols); i++) {
int thisprotecti = 0; // UNPROTECT(thisprotecti) at the end of this loop to save protection stack
coln = INTEGER(cols)[i]-1;
SEXP thisvalue = RHS_list_of_columns ? VECTOR_ELT(values, i) : values;
if (TYPEOF(thisvalue)==NILSXP) {
Expand All @@ -488,18 +480,19 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
} else {
if (verbose) Rprintf("Direct plonk of unnamed RHS, no copy.\n"); // e.g. DT[,a:=as.character(a)] as tested by 754.3
}
SET_VECTOR_ELT(dt,coln,thisvalue); // plonk new column in as it's already the correct length
SET_VECTOR_ELT(dt, coln, thisvalue); // plonk new column in as it's already the correct length
setAttrib(thisvalue, R_NamesSymbol, R_NilValue); // clear names such as DT[,a:=mapvector[a]]
setAttrib(thisvalue, R_DimSymbol, R_NilValue); // so that matrix is treated as vector
setAttrib(thisvalue, R_DimNamesSymbol, R_NilValue); // the 3rd of the 3 attribs not copied by copyMostAttrib, for consistency.
continue;
}
SEXP RHS;

if (coln+1 > oldncol) { // new column
newcol = allocNAVector(TYPEOF(thisvalue),nrow);
SET_VECTOR_ELT(dt, coln, newcol=allocNAVector(TYPEOF(thisvalue), nrow));
// initialize with NAs for when 'rows' is a subset and it doesn't touch
// do not try to save the time to NA fill (contiguous branch free assign anyway) since being
// sure all items will be written to (isNull(rows), length(rows), vlen<1, targetlen) is not worth the risk.
SET_VECTOR_ELT(dt,coln,newcol);
if (isVectorAtomic(thisvalue)) copyMostAttrib(thisvalue,newcol); // class etc but not names
// else for lists (such as data.frame and data.table) treat them as raw lists and drop attribs
if (vlen<1) continue; // e.g. DT[,newcol:=integer()] (adding new empty column)
Expand All @@ -512,7 +505,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
// If it's the same RHS being assigned to several columns, we have to recoerce for each
// one because the levels of each target are likely different
if (isFactor(thisvalue)) {
PROTECT(thisvalue = asCharacterFactor(thisvalue)); protecti++;
thisvalue = PROTECT(asCharacterFactor(thisvalue)); thisprotecti++;
}
targetlevels = getAttrib(targetcol, R_LevelsSymbol);
if (isNull(targetlevels)) error("somehow this factor column has no levels");
Expand All @@ -535,15 +528,15 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
}
R_len_t addi = 0;
SEXP addlevels=NULL;
PROTECT(RHS = allocVector(INTSXP, length(thisvalue))); protecti++;
RHS = PROTECT(allocVector(INTSXP, length(thisvalue))); thisprotecti++;
int *iRHS = INTEGER(RHS);
for (j=0; j<length(thisvalue); j++) {
thisv = STRING_ELT(thisvalue,j);
if (TRUELENGTH(thisv)==0) {
if (addi==0) {
PROTECT(addlevels = allocVector(STRSXP, 100)); protecti++;
addlevels = PROTECT(allocVector(STRSXP, 100)); thisprotecti++;
} else if (addi >= length(addlevels)) {
PROTECT(addlevels = growVector(addlevels, length(addlevels)+1000)); protecti++;
addlevels = PROTECT(growVector(addlevels, length(addlevels)+1000)); thisprotecti++;
}
SET_STRING_ELT(addlevels,addi++,thisv);
// if-else for #1718 fix
Expand All @@ -553,7 +546,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
}
if (addi > 0) {
R_len_t oldlen = length(targetlevels);
PROTECT(targetlevels = growVector(targetlevels, oldlen+addi)); protecti++;
targetlevels = PROTECT(growVector(targetlevels, oldlen+addi)); thisprotecti++;
for (j=0; j<addi; j++)
SET_STRING_ELT(targetlevels, oldlen+j, STRING_ELT(addlevels, j));
setAttrib(targetcol, R_LevelsSymbol, targetlevels);
Expand All @@ -566,13 +559,13 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
error("Internal error: up front checks (before starting to modify DT) didn't catch type of RHS ('%s') assigning to factor column '%s'. please report to data.table issue tracker.", type2char(TYPEOF(thisvalue)), CHAR(STRING_ELT(names,coln))); // # nocov
int *iRHS;
if (isReal(thisvalue) || TYPEOF(thisvalue)==LGLSXP) {
PROTECT(RHS = coerceVector(thisvalue,INTSXP)); protecti++;
RHS = PROTECT(coerceVector(thisvalue,INTSXP)); thisprotecti++;
iRHS = INTEGER(RHS);
// silence warning on singleton NAs
if (iRHS[0] != NA_INTEGER) warning("Coerced '%s' RHS to 'integer' to match the factor column's underlying type. Character columns are now recommended (can be in keys), or coerce RHS to integer or character first.", type2char(TYPEOF(thisvalue)));
} else { // thisvalue is integer
// make sure to copy thisvalue. May be modified below. See #2984
RHS = PROTECT(duplicate(thisvalue)); protecti++;
RHS = PROTECT(duplicate(thisvalue)); thisprotecti++;
iRHS = INTEGER(RHS);
}
for (int j=0; j<length(RHS); j++) {
Expand All @@ -589,11 +582,11 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
else {
// coerce the RHS to match the type of the column, unlike [<-.data.frame, for efficiency.
if (isString(targetcol) && isFactor(thisvalue)) {
PROTECT(RHS = asCharacterFactor(thisvalue)); protecti++;
RHS = PROTECT(asCharacterFactor(thisvalue)); thisprotecti++;
if (verbose) Rprintf("Coerced factor RHS to character to match the column's type. Avoid this coercion if possible, for efficiency, by creating RHS as type character.\n");
// TO DO: datatable.pedantic could turn this into warning
} else {
PROTECT(RHS = coerceVector(thisvalue,TYPEOF(targetcol))); protecti++;
RHS = PROTECT(coerceVector(thisvalue,TYPEOF(targetcol))); thisprotecti++;
char *s1 = (char *)type2char(TYPEOF(targetcol));
char *s2 = (char *)type2char(TYPEOF(thisvalue));
// FR #2551, added test for equality between RHS and thisvalue to not provide the warning when length(thisvalue) == 1
Expand Down Expand Up @@ -623,9 +616,10 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
}
}
memrecycle(targetcol, rows, 0, targetlen, RHS); // also called from dogroups where these arguments are used more
UNPROTECT(thisprotecti); // unprotect inside loop through columns to save protection stack
}
*_Last_updated = numToDo; // the updates have taken place with no error, so update .Last.updated now
PROTECT(assignedNames = allocVector(STRSXP, LENGTH(cols))); protecti++;
assignedNames = PROTECT(allocVector(STRSXP, LENGTH(cols))); protecti++;
for (i=0;i<LENGTH(cols);i++) SET_STRING_ELT(assignedNames,i,STRING_ELT(names,INTEGER(cols)[i]-1));
key = getAttrib(dt, sym_sorted);
if (length(key)) {
Expand Down Expand Up @@ -1124,7 +1118,8 @@ SEXP setcolorder(SEXP x, SEXP o)
}
Free(seen);

SEXP *tmp = Calloc(ncol, SEXP), *xd = VECTOR_PTR(x), *namesd = STRING_PTR(names);
SEXP *tmp = Calloc(ncol, SEXP);
SEXP *xd = VECTOR_PTR(x), *namesd = STRING_PTR(names);
for (int i=0; i<ncol; ++i) tmp[i] = xd[od[i]-1];
memcpy(xd, tmp, ncol*sizeof(SEXP)); // sizeof is type size_t so no overflow here
for (int i=0; i<ncol; ++i) tmp[i] = namesd[od[i]-1];
Expand Down
Loading