Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions CRAN_Release.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ grep "class *=" data.table/src/*.c # quite a few but none global
# Failed clang 3.9.1 -O3 due to this, I think.
grep "&REAL" data.table/src/*.c

# No use of long long, instead use int64_t. TODO
# grep "long long" data.table/src/*.c

# seal leak potential where two unprotected API calls are passed to the same
# function call, usually involving install() or mkChar()
# Greppable thanks to single lines and wide screens
Expand Down Expand Up @@ -71,7 +74,7 @@ grep ScalarString *.c

cd
R
cc(clean=TRUE) # to compile with -pedandic
cc(clean=TRUE) # to compile with -pedandic. Also use very latest gcc (currently gcc-7) as CRAN does
q("no")
R CMD build data.table
R CMD check data.table_1.10.1.tar.gz --as-cran
Expand Down Expand Up @@ -146,7 +149,7 @@ tar xvf R-devel.tar.gz
cd R-devel
# Following R-exts#4.3.3
# (clang 3.6.0 works but gcc 4.9.2 fails in R's distance.c:256 error: ‘*.Lubsan_data0’ not specified in enclosing parallel)
./configure CC="clang -std=gnu99 -fsanitize=undefined,address" CFLAGS="-fno-omit-frame-pointer -O0 -g -Wall -pedantic -mtune=native" --without-recommended-packages --disable-byte-compiled-packages
./configure CC="clang -std=gnu99 -fsanitize=undefined,address" CFLAGS="-fno-omit-frame-pointer -O0 -g -Wall -pedantic -mtune=native" --without-recommended-packages --disable-byte-compiled-packages
make
alias Rdevel='~/build/R-devel/bin/R --vanilla'
Rdevel
Expand Down Expand Up @@ -244,7 +247,7 @@ shutdown now # doesn't return you to host prompt properly so just kill the win
sudo apt-get update
sudo apt-get -y install htop
sudo apt-get -y install r-base r-base-dev
sudo apt-get -y build-dep r-base-dev
sudo apt-get -y build-dep r-base-dev
sudo apt-get -y build-dep qpdf
sudo apt-get -y build-dep r-cran-rgl
sudo apt-get -y build-dep r-cran-rmpi
Expand Down Expand Up @@ -302,7 +305,7 @@ old = 0
new = 0
for (p in deps) {
fn = paste0(p, "_", avail[p,"Version"], ".tar.gz")
if (!file.exists(fn) ||
if (!file.exists(fn) ||
identical(tryCatch(packageVersion(p), error=function(e)FALSE), FALSE) ||
packageVersion(p) != avail[p,"Version"]) {
system(paste0("rm -f ", p, "*.tar.gz")) # Remove previous *.tar.gz. -f to be silent if not there (i.e. first time seeing this package)
Expand Down Expand Up @@ -388,7 +391,7 @@ run = function(all=FALSE) {
x = deps[!x]
if (!length(x)) { cat("All package checks have already run. To rerun all: run(all=TRUE).\n"); return(); }
cat("Running checks for",length(x),"packages\n")
cmd = paste0("ls -1 *.tar.gz | grep -E '", paste0(x,collapse="|"),"' | parallel R CMD check")
cmd = paste0("ls -1 *.tar.gz | grep -E '", paste0(x,collapse="|"),"' | parallel R CMD check")
} else {
cmd = "rm -rf *.Rcheck ; ls -1 *.tar.gz | parallel R CMD check"
# apx 2.5 hrs for 313 packages on my 4 cpu laptop with 8 threads
Expand Down
64 changes: 36 additions & 28 deletions NEWS.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -2411,7 +2411,7 @@ alloc.col <- function(DT, n=getOption("datatable.alloccol"), verbose=getOption("
name = as.character(name)
assign(name,ans,parent.frame(),inherits=TRUE)
}
.Call(Csetnamed,ans,0L)
.Call(Csetmutable,ans)
}

selfrefok <- function(DT,verbose=getOption("datatable.verbose")) {
Expand Down
40 changes: 23 additions & 17 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ if (!.devtesting) {
# tests can be used but in dev they test the package in .GlobalEnv. If we used ::: throughout tests, that
# would pick up the installed version and in dev you'd have to reinstall every time which slows down dev.
# NB: The string "data.table:::" should exist nowhere else in this file other than here inside this branch.

test = data.table:::test
INT = data.table:::INT
compactprint = data.table:::compactprint
Expand Down Expand Up @@ -78,8 +78,8 @@ if (!.devtesting) {
.shallow = data.table:::.shallow
getdots = data.table:::getdots
binary = data.table:::binary
# Also, for functions that are masked by other packages, we need to map the data.table one. Or else,

# Also, for functions that are masked by other packages, we need to map the data.table one. Or else,
# the other package's function would be picked up. As above, we only need to do this because we desire
# to develop in .GlobalEnv with cc().
# NB: The string "data.table::" should exist nowhere else in this file other than here inside this branch.
Expand Down Expand Up @@ -5785,7 +5785,7 @@ test(1418.2, DT[2,c("foo","baz"):=10L,verbose=TRUE], output=".*Dropping index 'b
DT <- data.table(x1 = c(1,1,1,1,1,2,2,2,2,2),
x2 = c(1,1,2,2,2,1,1,2,2,2),
x3 = c(1,2,1,1,2,1,1,1,1,2),
y = rnorm(10),
y = rnorm(10),
key = c("x1", "x2", "x3"))
thisDT <- copy(DT)[2, x1 := 3]
test(1419.1, key(thisDT), NULL)
Expand Down Expand Up @@ -7041,8 +7041,8 @@ test(1545.137, key(.shallow(x1, cols=c("a1", "a2"), retain.key=TRUE)), "a1")
test(1545.138, key(.shallow(x1, cols=c("a3"), retain.key=TRUE)), NULL)

# tests for #2336. .shallow now retains indices as well
x1 <- data.table(a1 = c('a', 'a', 'a', 'a', 'b', 'c'),
a2 = c(1L, 1L, 1L, 2L, 2L, 2L),
x1 <- data.table(a1 = c('a', 'a', 'a', 'a', 'b', 'c'),
a2 = c(1L, 1L, 1L, 2L, 2L, 2L),
a3 = c(FALSE, TRUE, FALSE, FALSE, FALSE, TRUE))
setindex(x1, a1, a2, a3)
setindex(x1, a1, a3)
Expand Down Expand Up @@ -9505,12 +9505,12 @@ if (.Platform$OS.type=="unix") {
# as used by package popEpi in its tests
test(1704, all.equal(data.table( a=1:3, b=4:6 ), data.table( A=1:3, B=4:6 ), check.attributes=FALSE))

if (.Platform$OS.type!="windows") {
# setDTthreads(2) was at the top of this file (tests.Rraw). Since CRAN's policy is max two.
# However, under UBSAN and ASAN, threads are limited to 1 thread, so only run this test when we have 2 threads.
if (.Platform$OS.type!="windows" && getDTthreads()==2) {
# On Windows: "'mc.cores' > 1 is not supported on Windows".
# parallel package isn't parallel on Windows, but data.table is.
if ("package:parallel" %in% search()) { #1745 and #1727
test(1705, getDTthreads()<=2) # this was set at the top of tests.Rraw. CRAN's policy is max two.
# not 1, to pass on Rdevel clag UBSAN and ASAN without OpenMP
lx <- replicate(4, runif(1e5), simplify=FALSE)
f <- function(mc.cores = 2, threads = 2) {
setDTthreads(threads)
Expand All @@ -9519,9 +9519,11 @@ if (.Platform$OS.type!="windows") {
f(1, 1) # was always ok
f(2, 1) # was always ok
f(1, 2) # was always ok
f(2, 2) # used to hang. Now should not because data.table auto switches to single threaded
# commenting out avoid_openmp_hang_within_fork() confirms this test catches catches the hang
test(1706, getDTthreads()<=2) # Tests that it reverts to previous state after use of mclapply
f(2, 2) # Used to hang. Now should not because data.table auto switches to single threaded
# Commenting out avoid_openmp_hang_within_fork() confirms this test catches catches the hang
test(1705, getDTthreads()==1) # Stays in single-threaded mode after returning from mclapply's fork
setDTthreads(2)
test(1706, getDTthreads()==2) # User returned to multi-threaded after fork.
} else {
cat("Tests 1705 and 1706 not run. If required call library(parallel) first.\n")
}
Expand Down Expand Up @@ -10362,11 +10364,15 @@ test(1759, fread("alluniquechar.csv")[c(1,2,4999,5000)],
# fread should use multiple threads on single column input.
# tests 2 threads; the very reasonable limit on CRAN
# file needs to be reasonably large for threads to kick in (minimum chunkSize is 1MB currently)
N = if (TRUE) 2e6 else 1e9 # offline speed check
fwrite(data.table(A=sample(10,N,replace=TRUE)), f<-tempfile())
test(1760.1, file.info(f)$size > 4*1024*1024)
test(1760.2, fread(f, verbose=TRUE, nThread=2), output="using 2 threads")
unlink(f)
if (getDTthreads() != 2) {
cat("Test 1760 not run because this session either has no OpenMP or has been limited to one thread (e.g. under UBSAN and ASAN)\n")
} else {
N = if (TRUE) 2e6 else 1e9 # offline speed check
fwrite(data.table(A=sample(10,N,replace=TRUE)), f<-tempfile())
test(1760.1, file.info(f)$size > 4*1024*1024)
test(1760.2, fread(f, verbose=TRUE, nThread=2), output="using 2 threads")
unlink(f)
}

# fread single column with superfluous fill=TRUE, #2118
test(1761.1, fread("1\n2\n3", fill=TRUE), data.table(V1=1:3))
Expand Down
37 changes: 9 additions & 28 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
SETLENGTH(newdt,l);
SET_TRUELENGTH(newdt,n);
setselfref(newdt);
// SET_NAMED(dt,1); // for some reason, R seems to set NAMED=2 via setAttrib? Need NAMED to be 1 for passing to assign via a .C dance before .Call (which sets NAMED to 2), and we can't use .C with DUP=FALSE on lists.
UNPROTECT(protecti);
return(newdt);
}
Expand Down Expand Up @@ -477,11 +476,16 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v
}
vlen = length(thisvalue);
if (length(rows)==0 && targetlen==vlen && (vlen>0 || nrow==0)) {
if ( NAMED(thisvalue)==2 || // set() protects the NAMED of atomic vectors from .Call setting arguments to 2 by wrapping with list
if ( MAYBE_SHARED(thisvalue) || // set() protects the NAMED of atomic vectors from .Call setting arguments to 2 by wrapping with list
(TYPEOF(values)==VECSXP && i>LENGTH(values)-1)) { // recycled RHS would have columns pointing to others, #185.
if (verbose) {
if (NAMED(thisvalue)==2) Rprintf("RHS for item %d has been duplicated because NAMED is %d, but then is being plonked.\n",i+1, NAMED(thisvalue));
else Rprintf("RHS for item %d has been duplicated because the list of RHS values (length %d) is being recycled, but then is being plonked.\n", i+1, length(values));
if (length(values)==length(cols)) {
// usual branch
Rprintf("RHS for item %d has been duplicated because NAMED is %d, but then is being plonked.\n", i+1, NAMED(thisvalue));
} else {
// rare branch where the lhs of := is longer than the items on the rhs of :=
Rprintf("RHS for item %d has been duplicated because the list of RHS values (length %d) is being recycled, but then is being plonked.\n", i+1, length(values));
}
}
thisvalue = duplicate(thisvalue); // PROTECT not needed as assigned as element to protected list below.
} else {
Expand Down Expand Up @@ -778,7 +782,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v
}

static Rboolean anyNamed(SEXP x) {
if (NAMED(x)) return TRUE;
if (MAYBE_REFERENCED(x)) return TRUE;
if (isNewList(x)) for (int i=0; i<LENGTH(x); i++)
if (anyNamed(VECTOR_ELT(x,i))) return TRUE;
return FALSE;
Expand Down Expand Up @@ -1017,26 +1021,3 @@ SEXP pointWrapper(SEXP to, SEXP to_idx, SEXP from, SEXP from_idx) {
return(to);
}

/*
SEXP pointer(SEXP x) {
SEXP ans;
PROTECT(ans = allocVector(REALSXP, 1));
REAL(ans)[0] = (double)x;
UNPROTECT(1);
return(ans);
}

SEXP named(SEXP x) {
SEXP y = (SEXP)(REAL(x)[0]);
Rprintf("%d length = %d\n",NAMED(y), LENGTH(y));
return(R_NilValue);
}

void setnamed(double *x, int *v) { // call by .Call(,DUP=FALSE) only.
SEXP y = (SEXP)(*x);
Rprintf("%d length = %d\n",NAMED(y), LENGTH(y));
SET_NAMED(y,*v);
}
*/


8 changes: 8 additions & 0 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@
#endif
#define MAX(a,b) (((a)>(b))?(a):(b))

// Backport macros added to R in 2017 so we don't need to update dependency from R 3.0.0
#ifndef MAYBE_SHARED
# define MAYBE_SHARED(x) (NAMED(x) > 1)
#endif
#ifndef MAYBE_REFERENCED
# define MAYBE_REFERENCED(x) ( NAMED(x) > 0 )
#endif

// init.c
void setSizes();
SEXP char_integer64;
Expand Down
11 changes: 2 additions & 9 deletions src/frank.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@

extern SEXP char_integer64;

static union {
double d;
unsigned long long ull;
} u;

SEXP dt_na(SEXP x, SEXP cols) {
int i, j, n=0, this;
double *dv;
Expand Down Expand Up @@ -45,8 +40,7 @@ SEXP dt_na(SEXP x, SEXP cols) {
if (isString(class) && STRING_ELT(class, 0) == char_integer64) {
dv = (double *)REAL(v);
for (j=0; j<n; j++) {
u.d = dv[j];
LOGICAL(ans)[j] |= (u.ull == NA_INT64_LL); // TODO: can be == NA_INT64_D directly
LOGICAL(ans)[j] |= (DtoLL(dv[j]) == NA_INT64_LL); // TODO: can be == NA_INT64_D directly
}
} else {
for (j=0; j<n; j++) LOGICAL(ans)[j] |= ISNAN(REAL(v)[j]);
Expand Down Expand Up @@ -171,8 +165,7 @@ SEXP anyNA(SEXP x, SEXP cols) {
if (isString(class) && STRING_ELT(class, 0) == char_integer64) {
dv = (double *)REAL(v);
for (j=0; j<n; j++) {
u.d = dv[j];
if (u.ull == NA_INT64_LL) {
if (DtoLL(dv[j]) == NA_INT64_LL) {
LOGICAL(ans)[0] = 1;
break;
}
Expand Down
2 changes: 1 addition & 1 deletion src/freadR.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ _Bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, int ncol)
for (int i=0; i<ncol; i++) {
SEXP this;
if (colNames[i].len<=0) {
char buff[10];
char buff[12];
sprintf(buff,"V%d",i+1);
this = mkChar(buff);
} else {
Expand Down
37 changes: 16 additions & 21 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ SEXP rbindlist();
SEXP vecseq();
SEXP copyattr();
SEXP setlistelt();
SEXP setnamed();
SEXP setmutable();
SEXP address();
SEXP copyNamedInList();
SEXP fmelt();
Expand Down Expand Up @@ -101,7 +101,7 @@ R_CallMethodDef callMethods[] = {
{"Cvecseq", (DL_FUNC) &vecseq, -1},
{"Ccopyattr", (DL_FUNC) &copyattr, -1},
{"Csetlistelt", (DL_FUNC) &setlistelt, -1},
{"Csetnamed", (DL_FUNC) &setnamed, -1},
{"Csetmutable", (DL_FUNC) &setmutable, -1},
{"Caddress", (DL_FUNC) &address, -1},
{"CcopyNamedInList", (DL_FUNC) &copyNamedInList, -1},
{"Cfmelt", (DL_FUNC) &fmelt, -1},
Expand Down Expand Up @@ -174,11 +174,13 @@ void attribute_visible R_init_datatable(DllInfo *info)
if ((int)NA_INTEGER != (int)INT_MIN) error("Checking NA_INTEGER [%d] == INT_MIN [%d] %s", NA_INTEGER, INT_MIN, msg);
if ((int)NA_INTEGER != (int)NA_LOGICAL) error("Checking NA_INTEGER [%d] == NA_LOGICAL [%d] %s", NA_INTEGER, NA_LOGICAL, msg);
if (sizeof(int) != 4) error("Checking sizeof(int) [%d] is 4 %s", sizeof(int), msg);
if (sizeof(double) != 8) error("Checking sizeof(double) [%d] is 8 %s", sizeof(double), msg); // 8 on both 32bit and 64bit.
if (sizeof(double) != 8) error("Checking sizeof(double) [%d] is 8 %s", sizeof(double), msg); // 8 on both 32bit and 64bit
// alignof not available in C99: if (alignof(double) != 8) error("Checking alignof(double) [%d] is 8 %s", alignof(double), msg); // 8 on both 32bit and 64bit
if (sizeof(long long) != 8) error("Checking sizeof(long long) [%d] is 8 %s", sizeof(long long), msg);
if (sizeof(char *) != 4 && sizeof(char *) != 8) error("Checking sizeof(pointer) [%d] is 4 or 8 %s", sizeof(char *), msg);
if (sizeof(SEXP) != sizeof(char *)) error("Checking sizeof(SEXP) [%d] == sizeof(pointer) [%d] %s", sizeof(SEXP), sizeof(char *), msg);
if (sizeof(uint64_t) != 8) error("Checking sizeof(uint64_t) [%d] is 8 %s", sizeof(uint64_t), msg);
if (sizeof(int64_t) != 8) error("Checking sizeof(int64_t) [%d] is 8 %s", sizeof(int64_t), msg);
if (sizeof(signed char) != 1) error("Checking sizeof(signed char) [%d] is 1 %s", sizeof(signed char), msg);
if (sizeof(int8_t) != 1) error("Checking sizeof(int8_t) [%d] is 1 %s", sizeof(int8_t), msg);

Expand Down Expand Up @@ -273,30 +275,23 @@ inline Rboolean INHERITS(SEXP x, SEXP char_) {
inline long long DtoLL(double x) {
// Type punning such as
// *(long long *)&REAL(column)[i]
// is undefined by C standards. This was the cause of v1.10.2 failing on 31 Jan 2017
// under clang 3.9.1 -O3 and solaris-sparc but was ok on solaris-x86 and gcc.
// Then the union method :
// union {double d; long long ll;} u;
// u.d = x;
// return u.ll;
// passed on some of those but still failed on MacOS with latest clang from latest
// Xcode 8.2 and -O2. It seems that memcpy is the safest way, is clear, and compilers
// will optimize away the call overhead.
// There is a grep in CRAN_Release.cmd to detect type punning; use this I64 instead.
//
// is undefined by C standards. This may have been the cause of 1.10.2 failing on 31 Jan 2017
// under clang 3.9.1 -O3 and solaris-sparc but not solaris-x86 or gcc.
// There is now a grep in CRAN_Release.cmd; use this union method instead.
// int64_t may help rather than 'long long' (TODO: replace all long long with int64_t)
// The two types must be the same size. That is checked in R_init_datatable (above)
// where sizeof(long long)==sizeof(double)==8 is checked.
// where sizeof(int64_t)==sizeof(double)==8 is checked.
// Endianness should not matter because whether big or little, endianness is the same
// inside this process, and the two types are the same size.
long long ll;
memcpy(&ll, &x, 8);
return ll;
union {double d; int64_t i64;} u; // not static, inline instead
u.d = x;
return (long long)u.i64;
}

inline double LLtoD(long long x) {
double d;
memcpy(&d, &x, 8);
return d;
union {double d; int64_t i64;} u;
u.i64 = (int64_t)x;
return u.d;
}


Expand Down
Loading