diff --git a/CRAN_Release.cmd b/CRAN_Release.cmd index d6a2591857..3f42f5ae31 100644 --- a/CRAN_Release.cmd +++ b/CRAN_Release.cmd @@ -142,9 +142,9 @@ require(data.table) test.data.table() -############################################### -# Compiles from source when OpenMP is disabled -############################################### +############################################ +# Check compiles ok when OpenMP is disabled +############################################ vi ~/.R/Makevars # Make line SHLIB_OPENMP_CFLAGS= active to remove -fopenmp R CMD build . @@ -154,34 +154,6 @@ require(data.table) # observe startup message about no OpenMP detected test.data.table() -############################################### -# valgrind -############################################### - -# TODO: use R-devel instead with --with-valgrind-instrumentation=2 too as per R-exts$4.3.2 - -vi ~/.R/Makevars # make the -O0 -g line active, for info on source lines with any problems -R --vanilla CMD INSTALL data.table_1.11.1.tar.gz -R -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --show-leak-kinds=definite" --vanilla -require(data.table) -test.data.table() - -gctorture(TRUE) # very very slow, though. Don't run with suggests tests. -gctorture2(step=100) -test.data.table() - -# Investigated and ignore : -# Tests 648 and 1262 (see their comments) have single precision issues under valgrind that don't occur on CRAN, even Solaris. -# Ignore all "set address range perms" warnings : -# http://stackoverflow.com/questions/13558067/what-does-this-valgrind-warning-mean-warning-set-address-range-perms -# Ignore heap summaries around test 1705 and 1707/1708 due to the fork() test opening/closing, I guess. -# Tests 1729.4, 1729.8, 1729.11, 1729.13 again have precision issues under valgrind only. -# Leaks for tests 1738.5, 1739.3 but no data.table .c lines are flagged, rather libcairo.so -# and libfontconfig.so via GEMetricInfo and GEStrWidth in libR.so - -vi ~/.R/Makevars # make the -O3 line active again - - ##################################################### # R-devel with UBSAN, ASAN and strict-barrier on too ##################################################### @@ -199,43 +171,60 @@ cd R-devel # If use later gcc-8, add F77=gfortran-8 # LIBS="-lpthread" otherwise ld error about DSO missing -## Rarely needed: 32bit on 64bit Ubuntu for tracing any 32bit-only problems -dpkg --add-architecture i386 -apt-get update -apt-get install libc6:i386 libstdc++6:i386 gcc-multilib g++-multilib gfortran-multilib libbz2-dev:i386 liblzma-dev:i386 libpcre3-dev:i386 libcurl3-dev:i386 libstdc++-7-dev:i386 -sudo apt-get purge libcurl4-openssl-dev # cannot coexist, it seems -sudo apt-get install libcurl4-openssl-dev:i386 -cd ~/build/32bit/R-devel -./configure --without-recommended-packages --disable-byte-compiled-packages --disable-openmp --without-readline --without-x CC="gcc -m32" CXX="g++ -m32" F77="gfortran -m32" FC=${F77} OBJC=${CC} LDFLAGS="-L/usr/local/lib" LIBnn=lib LIBS="-lpthread" CFLAGS="-O0 -g -Wall -pedantic" -## - make alias Rdevel='~/build/R-devel/bin/R --vanilla' cd ~/GitHub/data.table Rdevel CMD INSTALL data.table_1.11.1.tar.gz -# Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with -# them so should be passed through to here +# Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with them so should be passed through to here Rdevel install.packages(c("bit64","xts","nanotime"), repos="http://cloud.r-project.org") # minimum packages needed to not skip any tests in test.data.table() require(data.table) test.data.table() # slower than usual, naturally, due to UBSAN and ASAN. Too slow to run R CMD check. -for (i in 1:10) test.data.table() # last resort: try several runs; e.g a few tests generate data with a non-fixed random seed +for (i in 1:10) test.data.table() # try several runs; e.g a few tests generate data with a non-fixed random seed # Throws /0 errors on R's summary.c (tests 648 and 1185.2) but ignore those: https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=16000 q("no") -# Rebuild without ASAN/UBSAN and test again under torture +## In case want to ever try again with 32bit on 64bit Ubuntu for tracing any 32bit-only problems +## dpkg --add-architecture i386 +## apt-get update +## apt-get install libc6:i386 libstdc++6:i386 gcc-multilib g++-multilib gfortran-multilib libbz2-dev:i386 liblzma-dev:i386 libpcre3-dev:i386 libcurl3-dev:i386 libstdc++-7-dev:i386 +## sudo apt-get purge libcurl4-openssl-dev # cannot coexist, it seems +## sudo apt-get install libcurl4-openssl-dev:i386 +## cd ~/build/32bit/R-devel +## ./configure --without-recommended-packages --disable-byte-compiled-packages --disable-openmp --without-readline --without-x CC="gcc -m32" CXX="g++ -m32" F77="gfortran -m32" FC=${F77} OBJC=${CC} LDFLAGS="-L/usr/local/lib" LIBnn=lib LIBS="-lpthread" CFLAGS="-O0 -g -Wall -pedantic" +## + + +############################################### +# valgrind (see R-exts$4.3.2) +############################################### + +cd ~/build/R-devel make clean -./configure --without-recommended-packages --disable-byte-compiled-packages --disable-openmp CC="gcc -fno-omit-frame-pointer" CFLAGS="-O0 -g -Wall -pedantic" LIBS="-lpthread" +./configure --without-recommended-packages --disable-byte-compiled-packages --disable-openmp --with-valgrind-instrumentation=1 CC="gcc" CFLAGS="-O0 -g -Wall -pedantic" LIBS="-lpthread" make + +cd ~/GitHub/data.table +vi ~/.R/Makevars # make the -O0 -g line active, for info on source lines with any problems Rdevel CMD INSTALL data.table_1.11.1.tar.gz -install.packages("bit64") -require(bit64) -require(data.table) -test.data.table() # just quick re-check -gctorture2(step=100) # 1h 26m -print(Sys.time()); started.at<-proc.time(); try(test.data.table()); print(Sys.time()); print(timetaken(started.at)) +export LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/7/libasan.so +Rdevel -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --show-leak-kinds=definite" +# gctorture(TRUE) # very very slow. +# gctorture2(step=100) # 1h 26m +print(Sys.time()); require(data.table); print(Sys.time()); started.at<-proc.time(); try(test.data.table()); print(Sys.time()); print(timetaken(started.at)) # Running test id 1437.0331 Error : protect(): protection stack overflow +# Investigated and ignore : +# Tests 648 and 1262 (see their comments) have single precision issues under valgrind that don't occur on CRAN, even Solaris. +# Ignore all "set address range perms" warnings : +# http://stackoverflow.com/questions/13558067/what-does-this-valgrind-warning-mean-warning-set-address-range-perms +# Ignore heap summaries around test 1705 and 1707/1708 due to the fork() test opening/closing, I guess. +# Tests 1729.4, 1729.8, 1729.11, 1729.13 again have precision issues under valgrind only. +# Leaks for tests 1738.5, 1739.3 but no data.table .c lines are flagged, rather libcairo.so +# and libfontconfig.so via GEMetricInfo and GEStrWidth in libR.so + +vi ~/.R/Makevars # make the -O3 line active again + ############################################################################# # TODO: recompile without USE_RINTERNALS and recheck write barrier under ASAN @@ -243,28 +232,28 @@ print(Sys.time()); started.at<-proc.time(); try(test.data.table()); print(Sys.ti There are some things to overcome to achieve compile without USE_RINTERNALS, though. -############################################### -# Single precision e.g. CRAN's Solaris Sparc -############################################### - +######################################################################## +# Single precision e.g. CRAN's Solaris-Sparc when it was alive on CRAN. +# Not Solaris-x86 which is still on CRAN and easier. +######################################################################## +# # Adding --disable-long-double (see R-exts) in the same configure as ASAN/UBSAN fails. Hence separately. - -cd ~/build -rm -rf R-devel # 'make clean' isn't enough: results in link error, oddly. -tar xvf R-devel.tar.gz -cd R-devel -./configure CC="gcc -std=gnu99" CFLAGS="-O0 -g -Wall -pedantic -ffloat-store -fexcess-precision=standard" --without-recommended-packages --disable-byte-compiled-packages --disable-long-double -make -Rdevel -install.packages("bit64") -q("no") -Rdevel CMD INSTALL ~/data.table_1.9.9.tar.gz -Rdevel -.Machine$sizeof.longdouble # check 0 -require(data.table) -require(bit64) -test.data.table() -q("no") +## cd ~/build +## rm -rf R-devel # 'make clean' isn't enough: results in link error, oddly. +## tar xvf R-devel.tar.gz +## cd R-devel +## ./configure CC="gcc -std=gnu99" CFLAGS="-O0 -g -Wall -pedantic -ffloat-store -fexcess-precision=standard" --without-recommended-packages --disable-byte-compiled-packages --disable-long-double +## make +## Rdevel +## install.packages("bit64") +## q("no") +## Rdevel CMD INSTALL ~/data.table_1.9.9.tar.gz +## Rdevel +## .Machine$sizeof.longdouble # check 0 +## require(data.table) +## require(bit64) +## test.data.table() +## q("no") ############################################### diff --git a/appveyor.yml b/appveyor.yml index 90852cebd8..dfe85c6c1c 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -28,7 +28,7 @@ environment: - R_VERSION: release # the single Windows.zip binary (both 32bit/64bit) that users following dev version of installation instructions should click -# - R_VERSION: devel # temporarily off due to #2767 `memory exhausted` on both 32bit and 64bit, R-devel only +# - R_VERSION: devel # temp off #2767 before_build: - cmd: ECHO no Revision metadata added to DESCRIPTION diff --git a/src/assign.c b/src/assign.c index 3b78b7adc0..c919294ef7 100644 --- a/src/assign.c +++ b/src/assign.c @@ -145,11 +145,9 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) // NEW: cols argument to specify the columns to shallow copy on. If NULL, all columns. // called from alloccol where n is checked carefully, or from shallow() at R level // where n is set to truelength (i.e. a shallow copy only with no size change) - SEXP newdt, names, newnames; R_len_t i,l; int protecti=0; - PROTECT(newdt = allocVector(VECSXP, n)); // to do, use growVector here? - protecti++; + SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here? //copyMostAttrib(dt, newdt); // including class DUPLICATE_ATTRIB(newdt, dt); // TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It @@ -157,9 +155,8 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) // so that the next change knows to duplicate. // Does copyMostAttrib duplicate each attrib or does it point? It seems to point, hence DUPLICATE_ATTRIB // for now otherwise example(merge.data.table) fails (since attr(d4,"sorted") gets written by setnames). - names = getAttrib(dt, R_NamesSymbol); - PROTECT(newnames = allocVector(STRSXP, n)); - protecti++; + SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; + SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++; if (isNull(cols)) { l = LENGTH(dt); for (i=0; iLENGTH(values)-1) || // recycled RHS would have columns pointing to others, #185. (TYPEOF(values)!=VECSXP && i>0) // assigning the same values to a second column. Have to ensure a copy #2540 - ) { + ) { if (verbose) { if (length(values)==length(cols)) { // usual branch diff --git a/src/dogroups.c b/src/dogroups.c index eab3e2cf26..57c71bd6c6 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -41,9 +41,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX ngrpcols = length(grpcols); nrowgroups = length(VECTOR_ELT(groups,0)); // fix for longstanding FR/bug, #495. E.g., DT[, c(sum(v1), lapply(.SD, mean)), by=grp, .SDcols=v2:v3] resulted in error.. the idea is, 1) we create .SDall, which is normally == .SD. But if extra vars are detected in jexp other than .SD, then .SD becomes a shallow copy of .SDall with only .SDcols in .SD. Since internally, we don't make a copy, changing .SDall will reflect in .SD. Hopefully this'll workout :-). - SDall = findVar(install(".SDall"), env); + SDall = PROTECT(findVar(install(".SDall"), env)); protecti++; // PROTECT for rchk - defineVar(sym_BY, BY = allocVector(VECSXP, ngrpcols), env); + defineVar(sym_BY, BY = PROTECT(allocVector(VECSXP, ngrpcols)), env); protecti++; // PROTECT for rchk bynames = PROTECT(allocVector(STRSXP, ngrpcols)); protecti++; // TO DO: do we really need bynames, can we assign names afterwards in one step? for (i=0; i maxGrpSize) maxGrpSize = INTEGER(lens)[i]; } - defineVar(install(".I"), I = allocVector(INTSXP, maxGrpSize), env); + defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++; R_LockBinding(install(".I"), env); - dtnames = getAttrib(dt, R_NamesSymbol); // added here to fix #4990 - `:=` did not issue recycling warning during "by" + dtnames = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; // added here to fix #4990 - `:=` did not issue recycling warning during "by" // fetch rownames of .SD. rownames[1] is set to -thislen for each group, in case .SD is passed to // non data.table aware package that uses rownames for (s = ATTRIB(SDall); s != R_NilValue && TAG(s)!=R_RowNamesSymbol; s = CDR(s)); @@ -83,7 +83,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX // fetch names of .SD and prepare symbols. In case they are copied-on-write by user assigning to those variables // using <- in j (which is valid, useful and tested), they are repointed to the .SD cols for each group. - names = getAttrib(SDall, R_NamesSymbol); + names = PROTECT(getAttrib(SDall, R_NamesSymbol)); protecti++; if (length(names) != length(SDall)) error("length(names)!=length(SD)"); SEXP *nameSyms = (SEXP *)R_alloc(length(names), sizeof(SEXP)); for(i = 0; i < length(SDall); i++) { diff --git a/src/fmelt.c b/src/fmelt.c index 0cdd2cef58..cace3587b8 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -5,33 +5,30 @@ // generate from 1 to n (a simple fun for melt, vecseq is convenient from R due to SEXP inputs) SEXP seq_int(int n, int start) { - SEXP ans = R_NilValue; - int i; - if (n <= 0) return(ans); - PROTECT(ans = allocVector(INTSXP, n)); - for (i=0; ilvalues == 1) { thisvaluecols = VECTOR_ELT(data->valuecols, 0); // tmp fix for #1055 - thisnames = PROTECT(allocVector(STRSXP, length(thisvaluecols))); + thisnames = PROTECT(allocVector(STRSXP, length(thisvaluecols))); protecti++; for (i=0; inarm) { for (j=0; jlmax; j++) { thislen = length(VECTOR_ELT(data->naidx, j)); @@ -511,7 +509,6 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str } nlevels = data->lmax; } - UNPROTECT(2); // matchvals, thisnames } else { if (data->narm) { for (j=0; jlmax; j++) { @@ -529,12 +526,11 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str nlevels = data->lmax; } } - SEXP tmp = PROTECT(mkString("factor")); + SEXP tmp = PROTECT(mkString("factor")); protecti++; setAttrib(target, R_ClassSymbol, tmp); - UNPROTECT(1); // tmp cnt = 0; if (data->lvalues == 1) { - levels = PROTECT(allocVector(STRSXP, nlevels)); + levels = PROTECT(allocVector(STRSXP, nlevels)); protecti++; thisvaluecols = VECTOR_ELT(data->valuecols, 0); // levels will be column names for (i=0; ilmax; i++) { if (data->narm) { @@ -542,13 +538,14 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str } SET_STRING_ELT(levels, cnt++, STRING_ELT(dtnames, INTEGER(thisvaluecols)[i]-1)); } - } else levels = PROTECT(coerceVector(seq_int(nlevels, 1), STRSXP)); // generate levels = 1:nlevels + } else { + SEXP tt = PROTECT(seq_int(nlevels, 1)); protecti++; // generate levels = 1:nlevels + levels = PROTECT(coerceVector(tt, STRSXP)); protecti++; // tt PROTECTED for rchk + } // base::unique is fast on vectors, and the levels on variable columns are usually small - SEXP uniqueLangSxp = PROTECT(lang2(install("unique"), levels)); - tmp = PROTECT(eval(uniqueLangSxp, R_GlobalEnv)); + SEXP uniqueLangSxp = PROTECT(lang2(install("unique"), levels)); protecti++; + tmp = PROTECT(eval(uniqueLangSxp, R_GlobalEnv)); protecti++; setAttrib(target, R_LevelsSymbol, tmp); - UNPROTECT(1); // tmp - UNPROTECT(2); // levels, uniqueLangSxp if (!varfactor) SET_VECTOR_ELT(ansvars, 0, asCharacterFactor(target)); UNPROTECT(protecti); return(ansvars); diff --git a/src/freadR.c b/src/freadR.c index 73c4ed2784..ef5af95261 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -39,7 +39,6 @@ static int8_t *type; static int8_t *size; static int ncol = 0; static int64_t dtnrows = 0; -static int protecti=0; static _Bool verbose = 0; static _Bool warningsAreErrors = 0; @@ -74,7 +73,6 @@ SEXP freadR( warningsAreErrors = LOGICAL(warnings2errorsArg)[0]; freadMainArgs args; - protecti=0; ncol = 0; dtnrows = 0; const char *ch, *ch2; @@ -178,20 +176,20 @@ SEXP freadR( DT = R_NilValue; // created by callback freadMain(args); - UNPROTECT(protecti); + if (DT!=R_NilValue) UNPROTECT(1); // DT is allocated in allocateDT. See notes there. return DT; } _Bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, int ncol) { + int protecti = 0; // use typeSize superfluously to avoid not-used warning; otherwise could move typeSize from fread.h into fread.c if (typeSize[CT_BOOL8_N]!=1) STOP("Internal error: typeSize[CT_BOOL8_N] != 1"); if (typeSize[CT_STRING]!=8) STOP("Internal error: typeSize[CT_STRING] != 1"); colNamesSxp = R_NilValue; if (colNames!=NULL) { - colNamesSxp = PROTECT(allocVector(STRSXP, ncol)); - protecti++; + colNamesSxp = PROTECT(allocVector(STRSXP, ncol)); protecti++; for (int i=0; i= 0; --i) { - elem = VECTOR_ELT(factorLevels, i); + SEXP elem = VECTOR_ELT(factorLevels, i); n = LENGTH(elem); for (j = n-1; j >= 0; --j) { idx = data.hash(elem, j, &data); @@ -209,7 +208,7 @@ SEXP combineFactorLevels(SEXP factorLevels, int * factorType, Rboolean * isRowOr } } - SEXP finalLevels = PROTECT(allocVector(STRSXP, uniqlen)); + SEXP finalLevels = PROTECT(allocVector(STRSXP, uniqlen)); // UNPROTECTed at the end of this function R_len_t counter = 0; if (*factorType == 2) { int *locs = (int *)R_alloc(len, sizeof(int)); @@ -221,8 +220,7 @@ SEXP combineFactorLevels(SEXP factorLevels, int * factorType, Rboolean * isRowOr SEXP tmp; for (i = 0; i < len; ++i) { if (!isRowOrdered[i]) continue; - - elem = VECTOR_ELT(factorLevels, i); + SEXP elem = VECTOR_ELT(factorLevels, i); n = LENGTH(elem); for (j = locs[i]; j < n; ++j) { idx = data.hash(elem, j, &data); @@ -261,8 +259,7 @@ SEXP combineFactorLevels(SEXP factorLevels, int * factorType, Rboolean * isRowOr Rboolean record; for (i = 0; i < len; ++i) { if (isRowOrdered[i]) continue; - - elem = VECTOR_ELT(factorLevels, i); + SEXP elem = VECTOR_ELT(factorLevels, i); n = LENGTH(elem); for (j = 0; j < n; ++j) { idx = data.hash(elem, j, &data); @@ -296,7 +293,7 @@ SEXP combineFactorLevels(SEXP factorLevels, int * factorType, Rboolean * isRowOr normalFactor: if (*factorType == 1) { for (i = 0; i < len; ++i) { - elem = VECTOR_ELT(factorLevels, i); + SEXP elem = VECTOR_ELT(factorLevels, i); n = LENGTH(elem); for (j = 0; j < n; ++j) { idx = data.hash(elem, j, &data); @@ -318,7 +315,7 @@ SEXP combineFactorLevels(SEXP factorLevels, int * factorType, Rboolean * isRowOr // CleanHashTable(&data); No longer needed now we use R_alloc(). But the hash table approach // will be removed completely at some point. - + UNPROTECT(1); // finalLevels return finalLevels; } @@ -611,7 +608,7 @@ SEXP rbindlist(SEXP l, SEXP sexp_usenames, SEXP sexp_fill, SEXP idcol) { struct preprocessData data; Rboolean usenames, fill, to_copy = FALSE, coerced=FALSE, isidcol = !isNull(idcol); SEXP fnames = R_NilValue, findices = R_NilValue, f_ind = R_NilValue, ans, lf, li, target, thiscol, levels; - SEXP factorLevels = R_NilValue, finalFactorLevels; + SEXP factorLevels = R_NilValue; R_len_t protecti=0; // first level of error checks @@ -755,7 +752,7 @@ SEXP rbindlist(SEXP l, SEXP sexp_usenames, SEXP sexp_fill, SEXP idcol) { } } if (data.is_factor[j]) { - finalFactorLevels = combineFactorLevels(factorLevels, &(data.is_factor[j]), isRowOrdered); + SEXP finalFactorLevels = PROTECT(combineFactorLevels(factorLevels, &(data.is_factor[j]), isRowOrdered)); SEXP factorLangSxp = PROTECT(lang3(install(data.is_factor[j] == 1 ? "factor" : "ordered"), target, finalFactorLevels)); SET_VECTOR_ELT(ans, j+isidcol, eval(factorLangSxp, R_GlobalEnv));