Skip to content
Merged

Forder #3124

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
4f38b45
interim
mattdowle Oct 11, 2018
e6e4ec7
interim
mattdowle Oct 11, 2018
212013d
interim
mattdowle Oct 11, 2018
d0f2706
interim
mattdowle Oct 11, 2018
a2f72b8
bit packing had unintended effect of not always retaining appearance …
mattdowle Oct 11, 2018
d17726e
added appearance-order comments: by-column vs by-row
mattdowle Oct 12, 2018
7f47b34
clamped single thread load to nrow
mattdowle Oct 12, 2018
c70498b
STL reduced to 65536 and counts halved to uint16_t
mattdowle Oct 12, 2018
e88f300
removed if(radix==0) special case for robustness
mattdowle Oct 12, 2018
5df4788
implemented nest and ironed out crashes
mattdowle Oct 13, 2018
f0ca8ce
interim
mattdowle Oct 17, 2018
9a1ccf7
merge master
mattdowle Oct 18, 2018
d745dbc
bin compression
mattdowle Oct 18, 2018
463f644
sort unique bytes
mattdowle Oct 19, 2018
0377657
interim
mattdowle Oct 19, 2018
6ff2610
interim. can't recover ptr from compression, will have to use truelen…
mattdowle Oct 19, 2018
0933398
interim
mattdowle Oct 19, 2018
583abc3
interim
mattdowle Oct 19, 2018
1433d40
grouped now skip
mattdowle Oct 19, 2018
82a0c24
interim
mattdowle Oct 21, 2018
fbe01ca
interim
mattdowle Oct 21, 2018
54586b5
sort direction in sort_ugrp; branch free and low dimension too
mattdowle Oct 21, 2018
5ad8d7b
interim
mattdowle Oct 22, 2018
850b56c
interim
mattdowle Oct 22, 2018
b23bdd6
interim
mattdowle Oct 22, 2018
d6e2bce
interim
mattdowle Oct 22, 2018
31eac04
interim
mattdowle Oct 23, 2018
8d6f303
interim
mattdowle Oct 23, 2018
5394840
interim
mattdowle Oct 23, 2018
7f4031b
more edge cases fixed
mattdowle Oct 24, 2018
080e400
more edge cases fixed
mattdowle Oct 24, 2018
7ac3d26
interim
mattdowle Oct 24, 2018
505c618
utf8 on uniques
mattdowle Oct 24, 2018
fb2b1c6
final edge case (all Inf/NaN); passes tests
mattdowle Oct 24, 2018
ff67092
tidy
mattdowle Oct 24, 2018
5655b74
overflow ngrp fixed
mattdowle Oct 24, 2018
21ea75d
tidy
mattdowle Oct 24, 2018
09cdd2f
tidy
mattdowle Oct 24, 2018
d71cfcb
tidy
mattdowle Oct 24, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CRAN_Release.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ cd R-devel-strict # important to change directory name before building not af
make
alias Rdevel-strict='~/build/R-devel-strict/bin/R --vanilla'
cd ~/GitHub/data.table
## edit ~/.R/Makevars and activate "CFLAGS=-O0 -g" there to trace
Rdevel-strict CMD INSTALL data.table_1.11.8.tar.gz
# Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with them so should be passed through to here
Rdevel-strict
Expand Down
4 changes: 2 additions & 2 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1859,10 +1859,10 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
if (verbose) {last.started.at=proc.time();cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()}
setkeyv(ans,names(ans)[seq_along(byval)])
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
} else if (!missing(keyby) || (haskey(x) && bysameorder)) {
} else if (!missing(keyby) || (haskey(x) && bysameorder && (byjoin || (length(allbyvars) && identical(allbyvars,head(key(x),length(allbyvars))))))) {
setattr(ans,"sorted",names(ans)[seq_along(grpcols)])
}
alloc.col(ans) # TO DO: overallocate in dogroups in the first place and remove this line
alloc.col(ans) # TODO: overallocate in dogroups in the first place and remove this line
}

.optmean <- function(expr) { # called by optimization of j inside [.data.table only. Outside for a small speed advantage.
Expand Down
10 changes: 5 additions & 5 deletions R/test.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ test <- function(num,x,y=TRUE,error=NULL,warning=NULL,output=NULL,message=NULL)
}
if (!fail && !length(error) && (!length(output) || !missing(y))) { # TODO test y when output=, too
y = try(y,TRUE)
if (identical(x,y)) return(invisible())
if (identical(x,y)) return(invisible(TRUE))
all.equal.result = TRUE
if (is.data.table(x) && is.data.table(y)) {
if (!selfrefok(x) || !selfrefok(y)) {
Expand All @@ -230,12 +230,12 @@ test <- function(num,x,y=TRUE,error=NULL,warning=NULL,output=NULL,message=NULL)
setattr(yc,"row.names",NULL)
setattr(xc,"index",NULL) # too onerous to create test RHS with the correct index as well, just check result
setattr(yc,"index",NULL)
if (identical(xc,yc) && identical(key(x),key(y))) return(invisible()) # check key on original x and y because := above might have cleared it on xc or yc
if (identical(xc,yc) && identical(key(x),key(y))) return(invisible(TRUE)) # check key on original x and y because := above might have cleared it on xc or yc
if (isTRUE(all.equal.result<-all.equal(xc,yc)) && identical(key(x),key(y)) &&
identical(vapply_1c(xc,typeof), vapply_1c(yc,typeof))) return(invisible())
identical(vapply_1c(xc,typeof), vapply_1c(yc,typeof))) return(invisible(TRUE))
}
}
if (is.atomic(x) && is.atomic(y) && isTRUE(all.equal.result<-all.equal(x,y,check.names=!isTRUE(y))) && typeof(x)==typeof(y)) return(invisible())
if (is.atomic(x) && is.atomic(y) && isTRUE(all.equal.result<-all.equal(x,y,check.names=!isTRUE(y))) && typeof(x)==typeof(y)) return(invisible(TRUE))
# For test 617 on r-prerel-solaris-sparc on 7 Mar 2013
# nocov start
if (!fail) {
Expand All @@ -255,6 +255,6 @@ test <- function(num,x,y=TRUE,error=NULL,warning=NULL,output=NULL,message=NULL)
assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE)
# nocov end
}
invisible()
invisible(!fail)
}

105 changes: 56 additions & 49 deletions inst/tests/tests.Rraw

Large diffs are not rendered by default.

27 changes: 17 additions & 10 deletions src/bmerge.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 +209,20 @@ static union {
static int mid, tmplow, tmpupp; // global to save them being added to recursive stack. Maybe optimizer would do this anyway.
static SEXP ic, xc;

static uint64_t i64twiddle(void *p, int i)
{
return ((uint64_t *)p)[i] ^ 0x8000000000000000;
// Always ascending and NA first (0) when used by bmerge
}

void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisgrp, int lowmax, int uppmax)
// col is >0 and <=ncol-1 if this range of [xlow,xupp] and [ilow,iupp] match up to but not including that column
// lowmax=1 if xlowIn is the lower bound of this group (needed for roll)
// uppmax=1 if xuppIn is the upper bound of this group (needed for roll)
// new: col starts with -1 for non-equi joins, which gathers rows from nested id group counter 'thisgrp'
{
int xlow=xlowIn, xupp=xuppIn, ilow=ilowIn, iupp=iuppIn, j, k, ir, lir, tmp;
Rboolean isInt64=FALSE;
bool isInt64=false;
ir = lir = ilow + (iupp-ilow)/2; // lir = logical i row.
if (o) ir = o[lir]-1; // ir = the actual i row if i were ordered
if (col>-1) {
Expand Down Expand Up @@ -329,13 +335,14 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
if (xval.s == ival.s) tmpupp=mid; else ilow=mid; // see above re ==
}
break;
case REALSXP :
case REALSXP : {
isInt64 = INHERITS(xc, char_integer64);
twiddle = isInt64 ? &i64twiddle : &dtwiddle;
ival.ull = twiddle(DATAPTR(ic), ir, 1);
uint64_t (*twiddle)(void *, int) = isInt64 ? &i64twiddle : &dtwiddle;
// TODO: remove this last remaining use of i64twiddle. remove DATAPTR too.
ival.ull = twiddle(DATAPTR(ic), ir);
while(xlow < xupp-1) {
mid = xlow + (xupp-xlow)/2;
xval.ull = twiddle(DATAPTR(xc), XIND(mid), 1);
xval.ull = twiddle(DATAPTR(xc), XIND(mid));
if (xval.ull<ival.ull) {
xlow=mid;
} else if (xval.ull>ival.ull) {
Expand All @@ -345,12 +352,12 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
tmpupp = mid;
while(tmplow<xupp-1) {
mid = tmplow + (xupp-tmplow)/2;
xval.ull = twiddle(DATAPTR(xc), XIND(mid), 1);
xval.ull = twiddle(DATAPTR(xc), XIND(mid));
if (xval.ull == ival.ull) tmplow=mid; else xupp=mid;
}
while(xlow<tmpupp-1) {
mid = xlow + (tmpupp-xlow)/2;
xval.ull = twiddle(DATAPTR(xc), XIND(mid), 1);
xval.ull = twiddle(DATAPTR(xc), XIND(mid));
if (xval.ull == ival.ull) tmpupp=mid; else xlow=mid;
}
break;
Expand Down Expand Up @@ -380,17 +387,17 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
if (col>-1) {
while(tmplow<iupp-1) {
mid = tmplow + (iupp-tmplow)/2;
xval.ull = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid, 1 );
xval.ull = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid);
if (xval.ull == ival.ull) tmplow=mid; else iupp=mid;
}
while(ilow<tmpupp-1) {
mid = ilow + (tmpupp-ilow)/2;
xval.ull = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid, 1 );
xval.ull = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid);
if (xval.ull == ival.ull) tmpupp=mid; else ilow=mid;
}
}
// ilow and iupp now surround the group in ic, too
break;
} break;
default:
error("Type '%s' not supported as key column", type2char(TYPEOF(xc)));
}
Expand Down
6 changes: 2 additions & 4 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ SEXP sym_index;
SEXP sym_BY;
SEXP sym_starts, char_starts;
SEXP sym_maxgrpn;
Rboolean INHERITS(SEXP x, SEXP char_);
bool INHERITS(SEXP x, SEXP char_);
long long DtoLL(double x);
double LLtoD(long long x);
double NA_INT64_D;
Expand All @@ -90,9 +90,7 @@ Rboolean isDatatable(SEXP x);

// forder.c
int StrCmp(SEXP x, SEXP y);
unsigned long long dtwiddle(void *p, int i, int order);
unsigned long long i64twiddle(void *p, int i, int order);
unsigned long long (*twiddle)(void *, int, int);
uint64_t dtwiddle(void *p, int i);
SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP naArg);
bool need2utf8(SEXP x, int n);
SEXP isReallyReal(SEXP);
Expand Down
Loading