Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -4538,7 +4538,7 @@ ll <- list(data.table(x=1, y=-1, x=-2), data.table(y=10, y=20, y=30, x=-10, a="a
test(1288.1, rbindlist(ll, use.names=TRUE, fill=FALSE), error = "Item 2 has 7 columns, inconsistent with item 1 which has 3 columns")
# modified after fixing #725
test(1288.2, rbindlist(ll, use.names=TRUE, fill=TRUE),
data.table(x=c(1,-10), y=c(-1,10), x=c(-2, NA), y=c(NA,20), y=c(NA,30), a=c(NA, "a"), b=c(NA, Inf), c=factor(c(NA, 1))))
data.table(x=c(1,-10), x=c(-2, NA), y=c(-1,10), y=c(NA,20), y=c(NA,30), a=c(NA, "a"), b=c(NA, Inf), c=factor(c(NA, 1))))

# check the name of output are consistent when binding two empty dts with one empy and other non-empty dt
dt1 <- data.table(x=1:5, y=6:10)
Expand Down Expand Up @@ -12350,6 +12350,11 @@ setDF(DT)
test(1953.4, melt.data.table(DT, id.vars = 'id', measure.vars = 'a'),
error = "must be a data.table")

# appearance order of two low-cardinality columns that were squashed in pr#3124
DT = data.table(A=INT(1,3,2,3,2), B=1:5) # respect groups in 1st column (3's and 2's)
test(1954, forderv(DT, sort=FALSE, retGrp=TRUE), structure(INT(1,2,4,3,5), starts=1:5, maxgrpn=1L))


###################################
# Add new tests above this line #
###################################
Expand Down
71 changes: 41 additions & 30 deletions src/forder.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,18 +394,16 @@ static void count_group(const uint8_t *x, const int n, bool *out_skip, uint16_t
for (int i=0; i<ngrp; i++) counts[ugrp[i]] = 0; // ready for next time to save initializing the 256 vector
}

static int dround = 0; // No rounding by default, for now. Handles #1642, #1728, #1463, #485
static uint64_t dmask1;
static uint64_t dmask2;
static int dround=0; // No rounding by default, for now. Handles #1642, #1728, #1463, #485
static uint64_t dmask=0;

SEXP setNumericRounding(SEXP droundArg)
// init.c has initial call with default of 2
{
if (!isInteger(droundArg) || LENGTH(droundArg)!=1) error("Must an integer or numeric vector length 1");
if (INTEGER(droundArg)[0] < 0 || INTEGER(droundArg)[0] > 2) error("Must be 2, 1 or 0");
dround = INTEGER(droundArg)[0];
dmask1 = dround ? 1 << (8*dround-1) : 0;
dmask2 = 0xffffffffffffffff << dround*8;
dmask = dround ? 1 << (8*dround-1) : 0;
return R_NilValue;
}

Expand All @@ -424,29 +422,20 @@ int getNumericRounding_C()
// for floating point finite you have to flip the other bits too if it was signed: http://stereopsis.com/radix.html
uint64_t dtwiddle(void *p, int i)
{
union { // inside for thread-safety
union {
double d;
uint64_t ull;
} u;
uint64_t u64;
} u; // local for thread safety
u.d = ((double *)p)[i];
if (R_FINITE(u.d)) {
u.ull = (u.d) ? u.ull + ((u.ull & dmask1) << 1) : 0; // handle 0, -0 case. Fix for issues/743.
// tested on vector length 100e6. was the fastest fix (see results at the bottom of page)
} else if (ISNAN(u.d)) {
/* 1. NA twiddled to all bits 0, sorts first. R's value 1954 cleared.
2. NaN twiddled to set just bit 13, sorts immediately after NA. 13th bit to be
consistent with "quiet" na bit but any bit outside last 2 bytes would do.
(ref r-devel post: http://r.789695.n4.nabble.com/Question-re-NA-NaNs-in-R-td4685014.html)
3. This also normalises a difference between NA on 32bit R (bit 13 set) and 64bit R (bit 13 not set)
4. -Inf twiddled to : 0 sign, exponent all 0, mantissa all 1, sorts after NaN
5. +Inf twiddled to : 1 sign, exponent all 1, mantissa all 0, sorts last since finite
numbers are defined by not-all-1 in exponent */
u.ull = (ISNA(u.d) ? 0 : (1ULL << 51));
return u.ull;
//return (nalast == 1 ? ~u.ull : u.ull);
if (u.d==0) u.d=0; // changes -0.0 to 0.0, issue #743
u.u64 ^= (u.u64 & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000; // always flip sign bit and if negative (sign bit was set) flip other bits too
u.u64 += (u.u64 & dmask) << 1; // when dround==1|2, if 8th|16th bit is set, round up before chopping last 1|2 bytes
return u.u64 >> (dround*8);
}
uint64_t mask = (u.ull & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000; // always flip sign bit and if negative (sign bit was set) flip other bits too
return (u.ull ^ mask) & dmask2; // TODO: why not shift right to reduce range.
if (ISNAN(u.d)) return ISNA(u.d) ? 0 /*NA*/ : 1 /*NaN*/; // also normalises a difference between NA on 32bit R (bit 13 set) and 64bit R (bit 13 not set)
if (isinf(u.d)) return signbit(u.d) ? 2 /*-Inf*/ : (0xffffffffffffffff>>(dround*8)) /*+Inf*/;
Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf");
}

SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP naArg)
Expand Down Expand Up @@ -521,7 +510,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP
SEXP x = VECTOR_ELT(DT,INTEGER(by)[col]-1);
uint64_t min, max; // min and max of non-NA values
int na_count=0;
if (sort) sort=INTEGER(orderArg)[col]; // +1 or -1
if (sort) sort=INTEGER(orderArg)[col]; // +1(asc) -1(desc) 0(first-appearance)
//Rprintf("sort = %d\n", sort);
switch(TYPEOF(x)) {
case INTSXP : case LGLSXP : // TODO skip LGL and assume range [0,1]
Expand Down Expand Up @@ -562,9 +551,17 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP
if (spare==0) {
spare = 8-firstBits; // left align to byte boundary to get better first split.
} else {
if (spare >= firstBits) {
spare -= firstBits; // new spare is also how many bits to left shift
} else {
if (sort==0) {
// can't squash into previous spare as that will change first-appearance order within that radix-byte
// found thanks to test 1246.55 with seed 1540402216L and added new direct test 1954
spare = 8-firstBits;
nradix++;
}
else if (spare >= firstBits) {
// easiest case. just left shift a few bits within these nbyte to fill the spare gap
spare -= firstBits; // new spare is also how many bits to now left shift
}
else {
if (nbyte<8) {
spare += 8-firstBits;
nbyte++; // after shift, will need an extra byte
Expand Down Expand Up @@ -592,8 +589,8 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP
}
if (isReal) { min2--; max2++; } // -Inf and +Inf in min-1 and max+1 spots respectively

const uint64_t naval = ((nalast==1) == (sort==1)) ? max+1+isReal*2 : min-1-isReal*2;
const uint64_t nanval = ((nalast==1) == (sort==1)) ? max+2 : min-2; // only used when isReal
const uint64_t naval = ((nalast==1) == (sort>=0)) ? max+1+isReal*2 : min-1-isReal*2;
const uint64_t nanval = ((nalast==1) == (sort>=0)) ? max+2 : min-2; // only used when isReal

// several columns could squash into 1 byte. due to this bit squashing is why we deal
// with asc|desc here, otherwise it could be done in the ugrp sorting by reversing the ugrp insert sort
Expand Down Expand Up @@ -695,6 +692,13 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP
char *TMP = malloc(n*sizeof(int));

for (int radix=0; radix<nradix; radix++) {
/*Rprintf("Before radix %d ...", radix);
for (int i=0; i<n; i++) {
Rprintf("%03d: ",i);
for (int r=0; r<nradix; r++) Rprintf("%03d ",key[r][i]);
Rprintf("\n");
}*/

flipflop();
stackgrps = radix<(nradix-1) || LOGICAL(retGrp)[0]; // if just ordering, we can save allocating and writing the last (and biggest) group size vector
int tmp=gs[1-flip][0]; for (int i=1; i<gsngrp[1-flip]; i++) tmp=(gs[1-flip][i]+=tmp); // cumulate, so we can parallelize
Expand Down Expand Up @@ -892,6 +896,13 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP
}
}
}
/*Rprintf("After last radix ...\n");
for (int i=0; i<n; i++) {
Rprintf("%03d: ",i);
for (int r=0; r<nradix; r++) Rprintf("%03d ", key[r][i]);
Rprintf("\n");
}*/

omp_set_nested(0);
free(TMP); TMP=NULL; // TMP_size=0; // TODO: place in cleanup()
for (int i=0; i<nradix; i++) {free(key[i]); key[i]=NULL;} // TODO: place in cleanup()
Expand Down