Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@

24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report.

25. Complex columns used in `j` during grouping would get mangled, [#3639](https://github.com/Rdatatable/data.table/issues/3639). A related bug prevented assigning complex values using `:=` except for full-column plonks. We still do not support grouping `by` a complex column. Thanks to @eliocamp for filing the bug report.

#### NOTES

1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
Expand Down
1 change: 0 additions & 1 deletion R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -889,7 +889,6 @@ replace_order = function(isub, verbose, env) {
} # else maybe a call to transform or something which returns a list.
av = all.vars(jsub,TRUE) # TRUE fixes bug #1294 which didn't see b in j=fns[[b]](c)
use.I = ".I" %chin% av
# browser()
if (any(c(".SD","eval","get","mget") %chin% av)) {
if (missing(.SDcols)) {
# here we need to use 'dupdiff' instead of 'setdiff'. Ex: setdiff(c("x", "x"), NULL) will give 'x'.
Expand Down
33 changes: 33 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -7346,6 +7346,17 @@ test(1540.35, DT1[DT3, lapply(.SD, function(x) x * mul),
by=.EACHI, on=c(x="q", y="r"), roll=TRUE],
DT1.copy[DT3, lapply(.SD, function(x) x * mul),
by=.EACHI, roll=TRUE])
## more coverage tests for by = .EACHI, on = c(LHS = 'RHS') for numeric type
set.seed(45L)
DT1 = data.table(x=sample(letters[1:3], 15, TRUE), y=sample(6:10, 15, TRUE),
a=sample(100, 15), b=runif(15))
DT2 = CJ(x=letters[1:3], y=6:10)[, mul := sample(20, 15)][sample(15L, 5L)]
DT3 = rbindlist(list(DT2, list(x="d", y=7L, mul=100L)))
DT3 = DT3[sample(nrow(DT3))]
DT1[ , x_num := match(x, letters) + .1]
DT3[ , x_num := match(x, letters) + .1]
test(1540.36, DT1[DT3[1:3], .(y = x_num), by=.EACHI, on=c(x_num="x_num")],
data.table(x_num = c(3.1, 4.1, 3.1), y = c(2.1, NA, NA)))

# to do: add tests for :=

Expand Down Expand Up @@ -15265,6 +15276,28 @@ test(2064.1, x[i, class(date), verbose=TRUE], 'Date',
test(2064.2, i[x, class(date), verbose=TRUE], 'Date',
output="Coerced integer column i.date to type double for join to match type of x.date")

# complex values in grouping, #3639
set.seed(42)
DT = CJ(x = 1:10, a = c("a", "b"), b = 1:2)
DT[ , z := complex(rnorm(1:.N), rnorm(1:.N))]
## can simplify this test after #1444
test(2065.1, all.equal(setkey(copy(DT), NULL), DT[, .(x = x, z = z), by = .(a, b)][order(x, a, b)], ignore.col.order = TRUE))
test(2065.2, DT[ , base::sum(z), by = a], data.table(a = c('a', 'b'), V1 = c(5.0582228485073+0i, -1.8644229822705+0i)))
test(2065.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V1 = c(16.031422657932, 13.533483145656)))
## mimicking test 171.3 for coverage
x = data.table(A=c(25L,85L,25L,25L,85L), B=c("a","a","b","c","c"), z=0:4 + (4:0)*1i)
test(2065.4, x[ , data.table(A, z)[A==25, z] + data.table(A, z)[A==85, z], by=B],
data.table(B = c('a', 'c'), V1 = c(1, 7) + (c(7, 1))*1i))
## mimicking test 771 for coverage
a = data.table(first=1:6, third=c(1i,1,1i,3,3i,4), key="first")
b = data.table(first=c(3,4,4,5,6,7,8), second=1:7, key="first")
test(2065.5, b[ , third:=a[b, third, by=.EACHI]], error="Supplied 2 items to be assigned to 7 items of")
# also works for assignment, as noted in #3690
DT[ , z_sum := base::sum(z), by = .(a, b)]
test(2065.6, DT[ , z_sum := base::sum(z), by = .(a, b)][1:3, z_sum],
c(1.8791864549242+0i, 3.17903639358309+0i, -4.18868631527035+0i))
test(2065.7, DT[1L, z_sum := 1i][1L, z_sum], 1i)


###################################
# Add new tests above this line #
Expand Down
13 changes: 13 additions & 0 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,15 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source)
td[w-1] = sd[i&mask];
}
} break;
case CPLXSXP: {
Rcomplex *td = COMPLEX(target);
const Rcomplex *sd = COMPLEX(source);
for (int i=0; i<len; i++) {
const int w = wd[i];
if (w<1) continue;
td[w-1] = sd[i&mask];
}
} break;
case STRSXP : {
const SEXP *sd = STRING_PTR(source);
for (int i=0; i<len; i++) {
Expand Down Expand Up @@ -1002,6 +1011,10 @@ void writeNA(SEXP v, const int from, const int n)
for (int i=from; i<=to; ++i) vd[i] = NA_REAL;
}
} break;
case CPLXSXP: {
Rcomplex *vd = COMPLEX(v);
for (int i=from; i<=to; ++i) vd[i] = NA_CPLX;
} break;
case STRSXP :
// character columns are initialized with blank string (""). So replace the all-"" with all-NA_character_
// Since "" and NA_character_ are global constants in R, it should be ok to not use SET_STRING_ELT here. But use it anyway for safety (revisit if proved slow)
Expand Down
1 change: 1 addition & 0 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ double LLtoD(long long x);
bool GetVerbose();
double NA_INT64_D;
long long NA_INT64_LL;
Rcomplex NA_CPLX; // initialized in init.c; see there for comments

// cj.c
SEXP cj(SEXP base_list);
Expand Down
34 changes: 29 additions & 5 deletions src/dogroups.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
case REALSXP :
REAL(VECTOR_ELT(SDall,j))[0] = NA_REAL;
break;
case CPLXSXP : {
COMPLEX(VECTOR_ELT(SDall, j))[0] = NA_CPLX;
} break;
case STRSXP :
SET_STRING_ELT(VECTOR_ELT(SDall,j),0,NA_STRING);
break;
Expand All @@ -167,14 +170,17 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
case REALSXP :
REAL(VECTOR_ELT(xSD,j))[0] = NA_REAL;
break;
case CPLXSXP : {
COMPLEX(VECTOR_ELT(xSD, j))[0] = NA_CPLX;
} break;
case STRSXP :
SET_STRING_ELT(VECTOR_ELT(xSD,j),0,NA_STRING);
break;
case VECSXP :
SET_VECTOR_ELT(VECTOR_ELT(xSD,j),0,R_NilValue);
break;
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we nocov this branch (or set it to error even?)

IIUC hitting this branch would require first joining on VECSXP column in on, I don't think we have any plan to support list-to-list joins?

Copy link
Copy Markdown
Member

@mattdowle mattdowle Jul 16, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't no cov it otherwise it'll drop off our todo list. I'd say don't error either because something somewhere might be using it. I'd say just leave it as-is for now until it can be covered at a later date.

default:
error("Logical error. Type of column should have been checked by now");
error("Logical error. Type of column should have been checked by now"); // #nocov
}
}
} else {
Expand Down Expand Up @@ -214,13 +220,21 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
rownum = iI[k]-1;
td[k] = sd[rownum]; // on 32bit copies pointers too
}
} else { // size 8
} else if (size==8) {
double *td = REAL(target);
const double *sd = REAL(source);
for (int k=0; k<grpn; ++k) {
rownum = iI[k]-1;
td[k] = sd[rownum]; // on 64bit copies pointers too
}
} else { // size 16
// #3634 -- CPLXSXP columns have size 16
Rcomplex *td = COMPLEX(target);
const Rcomplex *sd = COMPLEX(source);
for (int k=0; k<grpn; ++k) {
rownum = iI[k]-1;
td[k] = sd[rownum];
}
}
}
if (LOGICAL(verbose)[0]) { tblock[1] += clock()-tstart; nblock[1]++; }
Expand Down Expand Up @@ -302,7 +316,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
}
memrecycle(target, order, INTEGER(starts)[i]-1, grpn, RHS); // length mismatch checked above for all jval columns before starting to add any new columns
copyMostAttrib(RHS, target); // not names, otherwise test 778 would fail.
/* OLD FIX: commented now. The fix below resulted in segfault on factor columns because I dint set the "levels"
/* OLD FIX: commented now. The fix below resulted in segfault on factor columns because I didn't set the "levels"
Instead of fixing that, I just removed setting class if it's factor. Not appropriate fix.
Correct fix of copying all attributes (except names) added above. Now, everything should be alright.
Test 1144 (#5104) will provide the right output now. Modified accordingly.
Expand Down Expand Up @@ -382,14 +396,20 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
for (int j=0; j<ngrpcols; ++j) {
target = VECTOR_ELT(ans,j);
source = VECTOR_ELT(groups, INTEGER(grpcols)[j]-1); // target and source the same type by construction above
if (SIZEOF(target)==4) {
int tsize = SIZEOF(target);
if (tsize==4) {
int *td = INTEGER(target);
int *sd = INTEGER(source);
for (int r=0; r<maxn; ++r) td[ansloc+r] = sd[igrp];
} else {
} else if (tsize==8) {
double *td = REAL(target);
double *sd = REAL(source);
for (int r=0; r<maxn; ++r) td[ansloc+r] = sd[igrp];
} else {
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one is in the ngrpcols loop, so covering it requires being able to group by complex, which we can't (yet)

// #3634 -- CPLXSXP columns have size 16
Rcomplex *td = COMPLEX(target);
Rcomplex *sd = COMPLEX(source);
for (int r=0; r<maxn; ++r) td[ansloc+r] = sd[igrp];
Comment thread
MichaelChirico marked this conversation as resolved.
}
// Shouldn't need SET_* to age objects here since groups, TO DO revisit.
}
Comment thread
MichaelChirico marked this conversation as resolved.
Expand All @@ -415,6 +435,10 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
double *td = REAL(target)+thisansloc;
for (int r=0; r<maxn; ++r) td[r] = NA_REAL;
} break;
case CPLXSXP : {
Rcomplex *td = COMPLEX(target) + thisansloc;
for (int r=0; r<maxn; ++r) td[r] = NA_CPLX;
} break;
case STRSXP :
for (int r=0; r<maxn; ++r) SET_STRING_ELT(target,thisansloc+r,NA_STRING);
break;
Expand Down
5 changes: 4 additions & 1 deletion src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ R_CallMethodDef callMethods[] = {
{"CcolnamesInt", (DL_FUNC) &colnamesInt, -1},
{"CcoerceFillR", (DL_FUNC) &coerceFillR, -1},
{"CinitLastUpdated", (DL_FUNC) &initLastUpdated, -1},
{"Ccj", (DL_FUNC) &cj, -1},
{"Ccj", (DL_FUNC) &cj, -1},
{"Ccoalesce", (DL_FUNC) &coalesce, -1},
{NULL, NULL, 0}
};
Expand Down Expand Up @@ -255,6 +255,9 @@ void attribute_visible R_init_datatable(DllInfo *info)
if (ISNAN(NA_INT64_D)) error("ISNAN(NA_INT64_D) is TRUE but should not be");
if (isnan(NA_INT64_D)) error("isnan(NA_INT64_D) is TRUE but should not be");

NA_CPLX.r = NA_REAL; // NA_REAL is defined as R_NaReal which is not a strict constant and thus initializer {NA_REAL, NA_REAL} can't be used in .h
NA_CPLX.i = NA_REAL; // https://github.com/Rdatatable/data.table/pull/3689/files#r304117234

setNumericRounding(PROTECT(ScalarInteger(0))); // #1642, #1728, #1463, #485
UNPROTECT(1);

Expand Down
1 change: 0 additions & 1 deletion src/subset.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ static void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA)
case CPLXSXP : {
Rcomplex *sp = COMPLEX(source);
Rcomplex *ap = COMPLEX(ans);
Rcomplex NA_CPLX = { NA_REAL, NA_REAL };
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looking at it again, would just inlining = (Rcomplex){NA_REAL, NA_REAL} do the trick?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think (Rcomplex){NA_REAL, NA_REAL} will work. Because Rcomplex is a struct. The {NA_REAL, NA_REAL} is special syntax just for initialization of a variable/struct.

PARLOOP(NA_CPLX)
} break;
case RAWSXP : {
Expand Down