Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 62 additions & 73 deletions CRAN_Release.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ require(data.table)
test.data.table()


###############################################
# Compiles from source when OpenMP is disabled
###############################################
############################################
# Check compiles ok when OpenMP is disabled
############################################
vi ~/.R/Makevars
# Make line SHLIB_OPENMP_CFLAGS= active to remove -fopenmp
R CMD build .
Expand All @@ -154,34 +154,6 @@ require(data.table) # observe startup message about no OpenMP detected
test.data.table()


###############################################
# valgrind
###############################################

# TODO: use R-devel instead with --with-valgrind-instrumentation=2 too as per R-exts$4.3.2

vi ~/.R/Makevars # make the -O0 -g line active, for info on source lines with any problems
R --vanilla CMD INSTALL data.table_1.11.1.tar.gz
R -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --show-leak-kinds=definite" --vanilla
require(data.table)
test.data.table()

gctorture(TRUE) # very very slow, though. Don't run with suggests tests.
gctorture2(step=100)
test.data.table()

# Investigated and ignore :
# Tests 648 and 1262 (see their comments) have single precision issues under valgrind that don't occur on CRAN, even Solaris.
# Ignore all "set address range perms" warnings :
# http://stackoverflow.com/questions/13558067/what-does-this-valgrind-warning-mean-warning-set-address-range-perms
# Ignore heap summaries around test 1705 and 1707/1708 due to the fork() test opening/closing, I guess.
# Tests 1729.4, 1729.8, 1729.11, 1729.13 again have precision issues under valgrind only.
# Leaks for tests 1738.5, 1739.3 but no data.table .c lines are flagged, rather libcairo.so
# and libfontconfig.so via GEMetricInfo and GEStrWidth in libR.so

vi ~/.R/Makevars # make the -O3 line active again


#####################################################
# R-devel with UBSAN, ASAN and strict-barrier on too
#####################################################
Expand All @@ -199,72 +171,89 @@ cd R-devel
# If use later gcc-8, add F77=gfortran-8
# LIBS="-lpthread" otherwise ld error about DSO missing

## Rarely needed: 32bit on 64bit Ubuntu for tracing any 32bit-only problems
dpkg --add-architecture i386
apt-get update
apt-get install libc6:i386 libstdc++6:i386 gcc-multilib g++-multilib gfortran-multilib libbz2-dev:i386 liblzma-dev:i386 libpcre3-dev:i386 libcurl3-dev:i386 libstdc++-7-dev:i386
sudo apt-get purge libcurl4-openssl-dev # cannot coexist, it seems
sudo apt-get install libcurl4-openssl-dev:i386
cd ~/build/32bit/R-devel
./configure --without-recommended-packages --disable-byte-compiled-packages --disable-openmp --without-readline --without-x CC="gcc -m32" CXX="g++ -m32" F77="gfortran -m32" FC=${F77} OBJC=${CC} LDFLAGS="-L/usr/local/lib" LIBnn=lib LIBS="-lpthread" CFLAGS="-O0 -g -Wall -pedantic"
##

make
alias Rdevel='~/build/R-devel/bin/R --vanilla'
cd ~/GitHub/data.table
Rdevel CMD INSTALL data.table_1.11.1.tar.gz
# Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with
# them so should be passed through to here
# Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with them so should be passed through to here
Rdevel
install.packages(c("bit64","xts","nanotime"), repos="http://cloud.r-project.org") # minimum packages needed to not skip any tests in test.data.table()
require(data.table)
test.data.table() # slower than usual, naturally, due to UBSAN and ASAN. Too slow to run R CMD check.
for (i in 1:10) test.data.table() # last resort: try several runs; e.g a few tests generate data with a non-fixed random seed
for (i in 1:10) test.data.table() # try several runs; e.g a few tests generate data with a non-fixed random seed
# Throws /0 errors on R's summary.c (tests 648 and 1185.2) but ignore those: https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=16000
q("no")

# Rebuild without ASAN/UBSAN and test again under torture
## In case want to ever try again with 32bit on 64bit Ubuntu for tracing any 32bit-only problems
## dpkg --add-architecture i386
## apt-get update
## apt-get install libc6:i386 libstdc++6:i386 gcc-multilib g++-multilib gfortran-multilib libbz2-dev:i386 liblzma-dev:i386 libpcre3-dev:i386 libcurl3-dev:i386 libstdc++-7-dev:i386
## sudo apt-get purge libcurl4-openssl-dev # cannot coexist, it seems
## sudo apt-get install libcurl4-openssl-dev:i386
## cd ~/build/32bit/R-devel
## ./configure --without-recommended-packages --disable-byte-compiled-packages --disable-openmp --without-readline --without-x CC="gcc -m32" CXX="g++ -m32" F77="gfortran -m32" FC=${F77} OBJC=${CC} LDFLAGS="-L/usr/local/lib" LIBnn=lib LIBS="-lpthread" CFLAGS="-O0 -g -Wall -pedantic"
##


###############################################
# valgrind (see R-exts$4.3.2)
###############################################

cd ~/build/R-devel
make clean
./configure --without-recommended-packages --disable-byte-compiled-packages --disable-openmp CC="gcc -fno-omit-frame-pointer" CFLAGS="-O0 -g -Wall -pedantic" LIBS="-lpthread"
./configure --without-recommended-packages --disable-byte-compiled-packages --disable-openmp --with-valgrind-instrumentation=1 CC="gcc" CFLAGS="-O0 -g -Wall -pedantic" LIBS="-lpthread"
make

cd ~/GitHub/data.table
vi ~/.R/Makevars # make the -O0 -g line active, for info on source lines with any problems
Rdevel CMD INSTALL data.table_1.11.1.tar.gz
install.packages("bit64")
require(bit64)
require(data.table)
test.data.table() # just quick re-check
gctorture2(step=100) # 1h 26m
print(Sys.time()); started.at<-proc.time(); try(test.data.table()); print(Sys.time()); print(timetaken(started.at))
export LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/7/libasan.so
Rdevel -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --show-leak-kinds=definite"
# gctorture(TRUE) # very very slow.
# gctorture2(step=100) # 1h 26m
print(Sys.time()); require(data.table); print(Sys.time()); started.at<-proc.time(); try(test.data.table()); print(Sys.time()); print(timetaken(started.at))
# Running test id 1437.0331 Error : protect(): protection stack overflow

# Investigated and ignore :
# Tests 648 and 1262 (see their comments) have single precision issues under valgrind that don't occur on CRAN, even Solaris.
# Ignore all "set address range perms" warnings :
# http://stackoverflow.com/questions/13558067/what-does-this-valgrind-warning-mean-warning-set-address-range-perms
# Ignore heap summaries around test 1705 and 1707/1708 due to the fork() test opening/closing, I guess.
# Tests 1729.4, 1729.8, 1729.11, 1729.13 again have precision issues under valgrind only.
# Leaks for tests 1738.5, 1739.3 but no data.table .c lines are flagged, rather libcairo.so
# and libfontconfig.so via GEMetricInfo and GEStrWidth in libR.so

vi ~/.R/Makevars # make the -O3 line active again


#############################################################################
# TODO: recompile without USE_RINTERNALS and recheck write barrier under ASAN
#############################################################################
There are some things to overcome to achieve compile without USE_RINTERNALS, though.


###############################################
# Single precision e.g. CRAN's Solaris Sparc
###############################################

########################################################################
# Single precision e.g. CRAN's Solaris-Sparc when it was alive on CRAN.
# Not Solaris-x86 which is still on CRAN and easier.
########################################################################
#
# Adding --disable-long-double (see R-exts) in the same configure as ASAN/UBSAN fails. Hence separately.

cd ~/build
rm -rf R-devel # 'make clean' isn't enough: results in link error, oddly.
tar xvf R-devel.tar.gz
cd R-devel
./configure CC="gcc -std=gnu99" CFLAGS="-O0 -g -Wall -pedantic -ffloat-store -fexcess-precision=standard" --without-recommended-packages --disable-byte-compiled-packages --disable-long-double
make
Rdevel
install.packages("bit64")
q("no")
Rdevel CMD INSTALL ~/data.table_1.9.9.tar.gz
Rdevel
.Machine$sizeof.longdouble # check 0
require(data.table)
require(bit64)
test.data.table()
q("no")
## cd ~/build
## rm -rf R-devel # 'make clean' isn't enough: results in link error, oddly.
## tar xvf R-devel.tar.gz
## cd R-devel
## ./configure CC="gcc -std=gnu99" CFLAGS="-O0 -g -Wall -pedantic -ffloat-store -fexcess-precision=standard" --without-recommended-packages --disable-byte-compiled-packages --disable-long-double
## make
## Rdevel
## install.packages("bit64")
## q("no")
## Rdevel CMD INSTALL ~/data.table_1.9.9.tar.gz
## Rdevel
## .Machine$sizeof.longdouble # check 0
## require(data.table)
## require(bit64)
## test.data.table()
## q("no")


###############################################
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ environment:

- R_VERSION: release # the single Windows.zip binary (both 32bit/64bit) that users following dev version of installation instructions should click

# - R_VERSION: devel # temporarily off due to #2767 `memory exhausted` on both 32bit and 64bit, R-devel only
# - R_VERSION: devel # temp off #2767

before_build:
- cmd: ECHO no Revision metadata added to DESCRIPTION
Expand Down
11 changes: 4 additions & 7 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,21 +145,18 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
// NEW: cols argument to specify the columns to shallow copy on. If NULL, all columns.
// called from alloccol where n is checked carefully, or from shallow() at R level
// where n is set to truelength (i.e. a shallow copy only with no size change)
SEXP newdt, names, newnames;
R_len_t i,l;
int protecti=0;
PROTECT(newdt = allocVector(VECSXP, n)); // to do, use growVector here?
protecti++;
SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here?
//copyMostAttrib(dt, newdt); // including class
DUPLICATE_ATTRIB(newdt, dt);
// TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It
// also increases truelength. Perhaps make that distinction, then, and split out, but marked
// so that the next change knows to duplicate.
// Does copyMostAttrib duplicate each attrib or does it point? It seems to point, hence DUPLICATE_ATTRIB
// for now otherwise example(merge.data.table) fails (since attr(d4,"sorted") gets written by setnames).
names = getAttrib(dt, R_NamesSymbol);
PROTECT(newnames = allocVector(STRSXP, n));
protecti++;
SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++;
SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++;
if (isNull(cols)) {
l = LENGTH(dt);
for (i=0; i<l; i++) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,i));
Expand Down Expand Up @@ -479,7 +476,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v
if ( MAYBE_SHARED(thisvalue) || // set() protects the NAMED of atomic vectors from .Call setting arguments to 2 by wrapping with list
(TYPEOF(values)==VECSXP && i>LENGTH(values)-1) || // recycled RHS would have columns pointing to others, #185.
(TYPEOF(values)!=VECSXP && i>0) // assigning the same values to a second column. Have to ensure a copy #2540
) {
) {
if (verbose) {
if (length(values)==length(cols)) {
// usual branch
Expand Down
18 changes: 9 additions & 9 deletions src/dogroups.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
ngrpcols = length(grpcols);
nrowgroups = length(VECTOR_ELT(groups,0));
// fix for longstanding FR/bug, #495. E.g., DT[, c(sum(v1), lapply(.SD, mean)), by=grp, .SDcols=v2:v3] resulted in error.. the idea is, 1) we create .SDall, which is normally == .SD. But if extra vars are detected in jexp other than .SD, then .SD becomes a shallow copy of .SDall with only .SDcols in .SD. Since internally, we don't make a copy, changing .SDall will reflect in .SD. Hopefully this'll workout :-).
SDall = findVar(install(".SDall"), env);
SDall = PROTECT(findVar(install(".SDall"), env)); protecti++; // PROTECT for rchk

defineVar(sym_BY, BY = allocVector(VECSXP, ngrpcols), env);
defineVar(sym_BY, BY = PROTECT(allocVector(VECSXP, ngrpcols)), env); protecti++; // PROTECT for rchk
bynames = PROTECT(allocVector(STRSXP, ngrpcols)); protecti++; // TO DO: do we really need bynames, can we assign names afterwards in one step?
for (i=0; i<ngrpcols; i++) {
j = INTEGER(grpcols)[i]-1;
Expand All @@ -61,18 +61,18 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
if (isNull(jiscols) && (length(bynames)!=length(groups) || length(bynames)!=length(grpcols))) error("!length(bynames)[%d]==length(groups)[%d]==length(grpcols)[%d]",length(bynames),length(groups),length(grpcols));
// TO DO: check this check above.

N = findVar(install(".N"), env);
GRP = findVar(install(".GRP"), env);
iSD = findVar(install(".iSD"), env); // 1-row and possibly no cols (if no i variables are used via JIS)
xSD = findVar(install(".xSD"), env);
N = PROTECT(findVar(install(".N"), env)); protecti++; // PROTECT for rchk
GRP = PROTECT(findVar(install(".GRP"), env)); protecti++;
iSD = PROTECT(findVar(install(".iSD"), env)); protecti++; // 1-row and possibly no cols (if no i variables are used via JIS)
xSD = PROTECT(findVar(install(".xSD"), env)); protecti++;
R_len_t maxGrpSize = 0;
for (R_len_t i=0; i<LENGTH(lens); i++) {
if (INTEGER(lens)[i] > maxGrpSize) maxGrpSize = INTEGER(lens)[i];
}
defineVar(install(".I"), I = allocVector(INTSXP, maxGrpSize), env);
defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++;
R_LockBinding(install(".I"), env);

dtnames = getAttrib(dt, R_NamesSymbol); // added here to fix #4990 - `:=` did not issue recycling warning during "by"
dtnames = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; // added here to fix #4990 - `:=` did not issue recycling warning during "by"
// fetch rownames of .SD. rownames[1] is set to -thislen for each group, in case .SD is passed to
// non data.table aware package that uses rownames
for (s = ATTRIB(SDall); s != R_NilValue && TAG(s)!=R_RowNamesSymbol; s = CDR(s));
Expand All @@ -83,7 +83,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX

// fetch names of .SD and prepare symbols. In case they are copied-on-write by user assigning to those variables
// using <- in j (which is valid, useful and tested), they are repointed to the .SD cols for each group.
names = getAttrib(SDall, R_NamesSymbol);
names = PROTECT(getAttrib(SDall, R_NamesSymbol)); protecti++;
if (length(names) != length(SDall)) error("length(names)!=length(SD)");
SEXP *nameSyms = (SEXP *)R_alloc(length(names), sizeof(SEXP));
for(i = 0; i < length(SDall); i++) {
Expand Down
45 changes: 21 additions & 24 deletions src/fmelt.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,30 @@

// generate from 1 to n (a simple fun for melt, vecseq is convenient from R due to SEXP inputs)
SEXP seq_int(int n, int start) {
SEXP ans = R_NilValue;
int i;
if (n <= 0) return(ans);
PROTECT(ans = allocVector(INTSXP, n));
for (i=0; i<n; i++) INTEGER(ans)[i] = start+i;
if (n <= 0) return(R_NilValue);
SEXP ans = PROTECT(allocVector(INTSXP, n));
for (int i=0; i<n; i++) INTEGER(ans)[i] = start+i;
UNPROTECT(1);
return(ans);
}

// very specific "set_diff" for integers
SEXP set_diff(SEXP x, int n) {
SEXP ans, xmatch;
int i, j = 0;
if (TYPEOF(x) != INTSXP) error("'x' must be an integer");
if (n <= 0) error("'n' must be a positive integer");
xmatch = match(x, seq_int(n, 1), 0); // took a while to realise: matches vec against x - thanks to comment from Matthew in assign.c!

SEXP table = PROTECT(seq_int(n, 1)); // TODO: using match to 1:n seems odd here, why use match at all
SEXP xmatch = PROTECT(match(x, table, 0)); // Old comment:took a while to realise: matches vec against x - thanks to comment from Matt in assign.c!
int *buf = (int *) R_alloc(n, sizeof(int));
for (i=0; i<n; i++) {
int j=0;
for (int i=0; i<n; i++) {
if (INTEGER(xmatch)[i] == 0) {
buf[j++] = i+1;
}
}
n = j;
PROTECT(ans = allocVector(INTSXP, n));
SEXP ans = PROTECT(allocVector(INTSXP, n));
if (n) memcpy(INTEGER(ans), buf, sizeof(int) * n); // sizeof is of type size_t - no integer overflow issues
UNPROTECT(1);
UNPROTECT(3);
return(ans);
}

Expand Down Expand Up @@ -166,7 +163,8 @@ static SEXP unlist_(SEXP xint) {
SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) {
int i, ncol=LENGTH(DT), targetcols=0, protecti=0, u=0, v=0;
SEXP thiscol, idcols = R_NilValue, valuecols = R_NilValue, tmp, tmp2, booltmp, unqtmp, ans;
SEXP dtnames = getAttrib(DT, R_NamesSymbol);
SEXP dtnames = PROTECT(getAttrib(DT, R_NamesSymbol)); // PROTECT for rchk
protecti++;

if (isNull(id) && isNull(measure)) {
for (i=0; i<ncol; i++) {
Expand Down Expand Up @@ -490,11 +488,11 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str
if (data->lvalues == 1) {
thisvaluecols = VECTOR_ELT(data->valuecols, 0);
// tmp fix for #1055
thisnames = PROTECT(allocVector(STRSXP, length(thisvaluecols)));
thisnames = PROTECT(allocVector(STRSXP, length(thisvaluecols))); protecti++;
for (i=0; i<length(thisvaluecols); i++) {
SET_STRING_ELT(thisnames, i, STRING_ELT(dtnames, INTEGER(thisvaluecols)[i]-1));
}
matchvals = PROTECT(match(thisnames, thisnames, 0));
matchvals = PROTECT(match(thisnames, thisnames, 0)); protecti++;
if (data->narm) {
for (j=0; j<data->lmax; j++) {
thislen = length(VECTOR_ELT(data->naidx, j));
Expand All @@ -511,7 +509,6 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str
}
nlevels = data->lmax;
}
UNPROTECT(2); // matchvals, thisnames
} else {
if (data->narm) {
for (j=0; j<data->lmax; j++) {
Expand All @@ -529,26 +526,26 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str
nlevels = data->lmax;
}
}
SEXP tmp = PROTECT(mkString("factor"));
SEXP tmp = PROTECT(mkString("factor")); protecti++;
setAttrib(target, R_ClassSymbol, tmp);
UNPROTECT(1); // tmp
cnt = 0;
if (data->lvalues == 1) {
levels = PROTECT(allocVector(STRSXP, nlevels));
levels = PROTECT(allocVector(STRSXP, nlevels)); protecti++;
thisvaluecols = VECTOR_ELT(data->valuecols, 0); // levels will be column names
for (i=0; i<data->lmax; i++) {
if (data->narm) {
if (length(VECTOR_ELT(data->naidx, i)) == 0) continue;
}
SET_STRING_ELT(levels, cnt++, STRING_ELT(dtnames, INTEGER(thisvaluecols)[i]-1));
}
} else levels = PROTECT(coerceVector(seq_int(nlevels, 1), STRSXP)); // generate levels = 1:nlevels
} else {
SEXP tt = PROTECT(seq_int(nlevels, 1)); protecti++; // generate levels = 1:nlevels
levels = PROTECT(coerceVector(tt, STRSXP)); protecti++; // tt PROTECTED for rchk
}
// base::unique is fast on vectors, and the levels on variable columns are usually small
SEXP uniqueLangSxp = PROTECT(lang2(install("unique"), levels));
tmp = PROTECT(eval(uniqueLangSxp, R_GlobalEnv));
SEXP uniqueLangSxp = PROTECT(lang2(install("unique"), levels)); protecti++;
tmp = PROTECT(eval(uniqueLangSxp, R_GlobalEnv)); protecti++;
setAttrib(target, R_LevelsSymbol, tmp);
UNPROTECT(1); // tmp
UNPROTECT(2); // levels, uniqueLangSxp
if (!varfactor) SET_VECTOR_ELT(ansvars, 0, asCharacterFactor(target));
UNPROTECT(protecti);
return(ansvars);
Expand Down
Loading