Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
Rprintf("RHS for item %d has been duplicated because NAMED==%d MAYBE_SHARED==%d, but then is being plonked. length(values)==%d; length(cols)==%d)\n",
i+1, NAMED(thisvalue), MAYBE_SHARED(thisvalue), length(values), length(cols));
}
thisvalue = duplicate(thisvalue); // PROTECT not needed as assigned as element to protected list below.
thisvalue = copyAsPlain(thisvalue); // PROTECT not needed as assigned as element to protected list below.
} else {
if (verbose) Rprintf("Direct plonk of unnamed RHS, no copy. NAMED==%d, MAYBE_SHARED==%d\n", NAMED(thisvalue), MAYBE_SHARED(thisvalue)); // e.g. DT[,a:=as.character(a)] as tested by 754.5
}
Expand Down Expand Up @@ -564,7 +564,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
if (iRHS[0] != NA_INTEGER) warning("Coerced '%s' RHS to 'integer' to match the factor column's underlying type. Character columns are now recommended (can be in keys), or coerce RHS to integer or character first.", type2char(TYPEOF(thisvalue)));
} else { // thisvalue is integer
// make sure to copy thisvalue. May be modified below. See #2984
RHS = PROTECT(duplicate(thisvalue)); thisprotecti++;
RHS = PROTECT(copyAsPlain(thisvalue)); thisprotecti++;
iRHS = INTEGER(RHS);
}
for (int j=0; j<length(RHS); j++) {
Expand Down Expand Up @@ -735,7 +735,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
if (anytodelete) {
// Delete any columns assigned NULL (there was a 'continue' earlier in loop above)
// In reverse order to make repeated memmove easy. Otherwise cols would need to be updated as well after each delete.
PROTECT(colorder = duplicate(cols)); protecti++;
PROTECT(colorder = copyAsPlain(cols)); protecti++;
R_isort(INTEGER(colorder),LENGTH(cols));
PROTECT(colorder = match(cols, colorder, 0)); protecti++; // actually matches colorder to cols (oddly, arguments are that way around)
// Can't find a visible R entry point to return ordering of cols, above is only way I could find.
Expand Down Expand Up @@ -809,9 +809,9 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source)
// SEXP pointed to.
// If source is already not named (because j already created a fresh unnamed vector within a list()) we don't want to
// duplicate unnecessarily, hence checking for named rather than duplicating always.
// See #481, #1270 and tests 1341.* fail without this duplicate().
// See #481, #1270 and tests 1341.* fail without this copy.
if (anyNamed(source)) {
source = PROTECT(duplicate(source)); protecti++;
source = PROTECT(copyAsPlain(source)); protecti++;
}
}
if (!length(where)) { // e.g. called from rbindlist with where=R_NilValue
Expand Down
2 changes: 1 addition & 1 deletion src/coalesce.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) {
error("Item %d is length %d but the first item is length %d. Only singletons are recycled.", i+2, length(item), nrow);
}
if (!inplace) {
first = PROTECT(duplicate(first)); nprotect++;
first = PROTECT(copyAsPlain(first)); nprotect++;
if (verbose) Rprintf("coalesce copied first item (inplace=FALSE)\n");
}
void **valP = (void **)R_alloc(nval, sizeof(void *));
Expand Down
1 change: 1 addition & 0 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ void coerceFill(SEXP fill, double *dfill, int32_t *ifill, int64_t *i64fill);
SEXP coerceFillR(SEXP fill);
bool INHERITS(SEXP x, SEXP char_);
bool Rinherits(SEXP x, SEXP char_);
SEXP copyAsPlain(SEXP x);
void copySharedColumns(SEXP x);
SEXP lock(SEXP x);
SEXP unlock(SEXP x);
Expand Down
2 changes: 1 addition & 1 deletion src/fmelt.c
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ SEXP fmelt(SEXP DT, SEXP id, SEXP measure, SEXP varfactor, SEXP valfactor, SEXP
// edge case no measure.vars
if (!data.lmax) {
ans = shallowwrapper(DT, data.idcols);
ans = PROTECT(duplicate(ans)); protecti++;
ans = PROTECT(copyAsPlain(ans)); protecti++;
} else {
ansvals = PROTECT(getvaluecols(DT, dtnames, LOGICAL(valfactor)[0], verbose, &data)); protecti++;
ansvars = PROTECT(getvarcols(DT, dtnames, LOGICAL(varfactor)[0], verbose, &data)); protecti++;
Expand Down
4 changes: 2 additions & 2 deletions src/reorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ SEXP reorder(SEXP x, SEXP order)
error("Column %d is length %d which differs from length of column 1 (%d). Invalid data.table.", i+1, length(v), nrow);
if (SIZEOF(v) > maxSize)
maxSize=SIZEOF(v);
if (ALTREP(v)) SET_VECTOR_ELT(x,i,duplicate(v)); // expand compact vector in place ready for reordering by reference
if (ALTREP(v)) SET_VECTOR_ELT(x, i, copyAsPlain(v));
}
copySharedColumns(x); // otherwise two columns which point to the same vector would be reordered and then re-reordered, issues linked in PR#3768
} else {
Expand All @@ -33,7 +33,7 @@ SEXP reorder(SEXP x, SEXP order)
if (!isInteger(order)) error("order must be an integer vector");
if (length(order) != nrow) error("nrow(x)[%d]!=length(order)[%d]",nrow,length(order));
int nprotect = 0;
if (ALTREP(order)) { order=PROTECT(duplicate(order)); nprotect++; } // TODO: how to fetch range of ALTREP compact vector
if (ALTREP(order)) { order=PROTECT(copyAsPlain(order)); nprotect++; } // TODO: if it's an ALTREP sequence some optimizations are possible rather than expand

const int *restrict idx = INTEGER(order);
int i=0;
Expand Down
2 changes: 1 addition & 1 deletion src/subset.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) {
for (int i=0; i<LENGTH(cols); i++) {
SEXP thisCol = VECTOR_ELT(x, colD[i]-1);
checkCol(thisCol, colD[i], nrow, x);
SET_VECTOR_ELT(ans, i, duplicate(thisCol));
SET_VECTOR_ELT(ans, i, copyAsPlain(thisCol));
// materialize the column subset as we have always done for now, until REFCNT is on by default in R (TODO)
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/transpose.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {
if (!isNewList(l))
error("l must be a list.");
if (!length(l))
return(duplicate(l));
return(copyAsPlain(l));
if (!isLogical(ignoreArg) || LOGICAL(ignoreArg)[0]==NA_LOGICAL)
error("ignore.empty should be logical TRUE/FALSE.");
bool ignore = LOGICAL(ignoreArg)[0];
Expand Down
68 changes: 60 additions & 8 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,36 +173,88 @@ bool Rinherits(SEXP x, SEXP char_) {
return ans;
}

SEXP copyAsPlain(SEXP x) {
// v1.12.2 and before used standard R duplicate() to do this. But that's not guaranteed to not return an ALTREP.
// e.g. ALTREP 'wrapper' on factor column (with materialized INTSXP) in package VIM under example(hotdeck)
// .Internal(inspect(x[[5]]))
// @558adf4d9508 13 INTSXP g0c0 [OBJ,NAM(7),ATT] wrapper [srt=-2147483648,no_na=0]
// 'AsPlain' is intended to convey unALTREP-ing; i.e. materializing and removing any ALTREP attributes too
// For non-ALTREP this should do the same as R's duplicate(); but doesn't quite currently, so has to divert to duplicated() for now
// Intended for use on columns; to either un-ALTREP them or duplicate shared memory columns; see copySharedColumns() below
// Not intended to be called on a DT VECSXP where a concept of 'deep' might refer to whether the columns are copied

if (!ALTREP(x)) return duplicate(x);
// would prefer not to have this line, but without it test 1639.064 fails :
// Running test id 1639.064 Error in `[.data.table`(r, -ii) :
// Item 2 of i is -1 and item 1 is NA. Cannot mix negatives and NA.
// Calls: test.data.table ... FUN -> make.levels -> rbindlist -> [ -> [.data.table
// Perhaps related to row names and the copyMostAttrib() below is not quite sufficient

size_t n = XLENGTH(x);
SEXP ans = PROTECT(allocVector(TYPEOF(x), XLENGTH(x)));
switch (TYPEOF(ans)) {
case RAWSXP:
memcpy(RAW(ans), RAW(x), n*sizeof(Rbyte)); // # nocov; add coverage when ALTREP is turned on for all types
break; // # nocov
case LGLSXP:
memcpy(LOGICAL(ans), LOGICAL(x), n*sizeof(Rboolean)); // # nocov
break; // # nocov
case INTSXP:
memcpy(INTEGER(ans), INTEGER(x), n*sizeof(int)); // covered by 10:1 after test 178
break;
case REALSXP:
memcpy(REAL(ans), REAL(x), n*sizeof(double)); // covered by as.Date("2013-01-01")+seq(1,1000,by=10) after test 1075
break;
case CPLXSXP:
memcpy(COMPLEX(ans), COMPLEX(x), n*sizeof(Rcomplex)); // # nocov
break; // # nocov
case STRSXP: {
const SEXP *xp=STRING_PTR(x); // covered by as.character(as.hexmode(1:500)) after test 642
for (R_xlen_t i=0; i<n; ++i) SET_STRING_ELT(ans, i, xp[i]);
} break;
case VECSXP: {
const SEXP *xp=VECTOR_PTR(x); // # nocov
for (R_xlen_t i=0; i<n; ++i) SET_VECTOR_ELT(ans, i, xp[i]); // # nocov
} break; // # nocov
default:
error("Internal error: unsupported type '%s' passed to copyAsPlain()", type2char(TYPEOF(x))); // # nocov
}
copyMostAttrib(x, ans); // e.g. factor levels, class etc, but not names, dim or dimnames
if (ALTREP(ans))
error("Internal error: type '%s' passed to copyAsPlain() but it seems copyMostAttrib() retains ALTREP attributes", type2char(TYPEOF(x))); // # nocov
UNPROTECT(1);
return ans;
}

void copySharedColumns(SEXP x) {
const int ncol = length(x);
if (!isNewList(x) || ncol==1) return;
bool *shared = (bool *)R_alloc(ncol, sizeof(bool)); // on R heap in case duplicate() fails
bool *shared = (bool *)R_alloc(ncol, sizeof(bool)); // on R heap in case alloc fails
int *savetl = (int *)R_alloc(ncol, sizeof(int)); // on R heap for convenience but could be a calloc
int nShared = 0;
int nShared=0, thistl=0;
const SEXP *xp = VECTOR_PTR(x);
for (int i=0; i<ncol; ++i) {
SEXP thiscol = xp[i];
const int thistl = TRUELENGTH(thiscol);
if (thistl<0) {
if (ALTREP(thiscol) || (thistl=TRUELENGTH(thiscol))<0) {
shared[i] = true;
nShared++;
// do not duplicate() here as the duplicate() might fail. Careful to restore tl first to all columns.
// do not copyAsPlain() here as the alloc might fail: careful to restore tl first to all columns.
// Aside: thistl is which column shares the same address as this one in case that's ever useful in future.
} else {
shared[i] = false;
savetl[i] = thistl; // these are vectors which are all expected to have tl, unlike CHARSXP which often don't (savetl() has CHARSXP in mind)
SET_TRUELENGTH(thiscol, -i-1);
SET_TRUELENGTH(thiscol, -i-1); // just on plain vectors, not on ALTREP
}
}
// now we know nShared and which ones they are (if any), restore original tl back to all columns
for (int i=0; i<ncol; ++i) {
if (!shared[i]) SET_TRUELENGTH(VECTOR_ELT(x, i), savetl[i]);
}
// now that truelength has been restored for all columns, we can finally call duplicate()
// now that truelength has been restored for all columns, we can finally call copyAsPlain()
if (nShared) {
for (int i=0; i<ncol; ++i) {
if (shared[i])
SET_VECTOR_ELT(x, i, duplicate(VECTOR_ELT(x, i)));
SET_VECTOR_ELT(x, i, copyAsPlain(VECTOR_ELT(x, i)));
}
if (GetVerbose()) Rprintf("Found and copied %d column%s with a shared memory address\n", nShared, nShared>1?"s":"");
// GetVerbose() (slightly expensive call of all options) called here only when needed
Expand Down
2 changes: 1 addition & 1 deletion src/wrappers.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ SEXP expandAltRep(SEXP x)
for (int i=0; i<LENGTH(x); i++) {
SEXP col = VECTOR_ELT(x,i);
if (ALTREP(col)) {
SET_VECTOR_ELT(x, i, duplicate(col));
SET_VECTOR_ELT(x, i, copyAsPlain(col));
}
}
return R_NilValue;
Expand Down