diff --git a/NEWS.md b/NEWS.md index baa1d256dd..a54583a3dc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -48,6 +48,8 @@ `rbindlist(l, ignore.attr=TRUE)` and `rbind` also gains argument `ignore.attr` to manually deactivate the safety-net of binding columns with different column classes, [#3911](https://github.com/Rdatatable/data.table/issues/3911), [#5542](https://github.com/Rdatatable/data.table/issues/5542). Thanks to @dcaseykc, @fox34, @adrian-quintario, @berg-michael, @arunsrinivasan, @statquant, @pkress, @jrausch12, @therosko, @OfekShilon, @iMissile, @tdhock for the request and @ben-schwen for the PR. +16. `fcase()` supports scalars in conditions (e.g. supplying just `TRUE`), vectors in `default=` (so the default can vary by row), and `default=` is now lazily evaluated, [#5461](https://github.com/Rdatatable/data.table/issues/5461). Thanks @sindribaldur for the feature request, which has been highly requested, @shrektan for doing most of the implementation, and @MichaelChirico for sewing things up. + ## BUG FIXES 1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix. diff --git a/R/wrappers.R b/R/wrappers.R index a018b91ae9..a339a919e6 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -6,7 +6,7 @@ fcoalesce = function(...) .Call(Ccoalesce, list(...), FALSE) setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) -fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L]) +fcase = function(..., default=NA) .Call(CfcaseR, parent.frame(), as.list(substitute(list(..., TRUE, default)))[-1L]) colnamesInt = function(x, cols, check_dups=FALSE, skip_absent=FALSE) .Call(CcolnamesInt, x, cols, check_dups, skip_absent) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 643bd6134c..a614a9e2f3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16646,9 +16646,9 @@ test(2127.65, fcase(test_vec1, list(1)), list(1,1,1,1,1, NULL, NULL, NULL, NULL, test(2127.66, fcase(test_vec1, as.Date("2019-10-11")), c(rep(as.Date("2019-10-11"),5),rep(NA,6))) test(2127.67, fcase(test_vec1, factor("a", levels=letters[1:3])), factor(c(rep("a",5),rep("NA",6)), levels=letters[1:3])) test(2127.68, fcase(test_vec1, 1L, default = 1:2), error = "Length of 'default' must be 1.") -test(2127.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has a different length than argument #1. Please make sure all logical conditions have the same length.") +test(2127.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has length 12 which differs from that of argument #1 (11). Please make sure all logical conditions have the same length or length 1.") test(2127.70, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, 2), error = "Argument #4 has different class than argument #2, Please make sure all output values have the same class.") -test(2127.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #4 must either be 1 or length of logical condition.") +test(2127.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #4 (2) must either be 1 or match the length of the logical condition (11).") test(2127.72, fcase(TRUE, 1L, FALSE, stop("bang!")), 1L) test(2127.73, fcase(test_vec1, 1L, test_vec2, 0:10), as.integer(c( 1, 1, 1, 1, 1, NA, 6, 7, 8, 9, 10))) test(2127.74, fcase(test_vec1, 0:10, test_vec2, 0L), as.integer(c( 0, 1, 2, 3, 4, NA, 0, 0, 0, 0, 0))) @@ -18967,3 +18967,24 @@ test(2275.974, options=c(datatable.verbose=TRUE, datatable.forder.auto.index=TRU test(2275.975, options=c(datatable.verbose=TRUE, datatable.forder.auto.index=TRUE), forderv(d, "x", na.last=TRUE), notOutput="forder.*setting index.*retGrp=0, retStats=1") test(2275.99, forderv(data.table(a=1), reuseSorting=c(TRUE, TRUE)), error="reuseSorting must be") + +# fcase supports vector default values, #4258 +## for default +test(2276.01, fcase(c(TRUE, FALSE, NA, NA), 1:4, default=11:13), error="Length of 'default' must be 1 or 4.") +test(2276.02, fcase(c(TRUE, FALSE, NA, NA), 1:4, default=11:14), c(1L, 12:14)) +test(2276.03, fcase(c(TRUE, FALSE, NA, NA), 1:4 + 0.1, default=11:14 + 0.1), c(1L, 12:14) + 0.1) +test(2276.04, fcase(c(TRUE, FALSE, NA, NA), (1:4)+1i, default=(11:14)+1i), c(1L, 12:14)+1i) +test(2276.05, fcase(c(TRUE, FALSE, NA, NA), as.character(1:4), default=as.character(11:14)), as.character(c(1L, 12:14))) +test(2276.06, fcase(c(TRUE, FALSE, NA, NA), as.list(1:4), default=as.list(11:14)), as.list(c(1L, 12:14))) +## for scalar condition +test(2276.07, fcase(c(TRUE, FALSE, NA, NA), 1:4, TRUE, 11:13), error="Length of output value #4 (3) must either be 1 or match the length of the logical condition (4).") +test(2276.08, fcase(c(TRUE, FALSE, NA, NA), 1:4, TRUE, 11:14), c(1L, 12:14)) +test(2276.09, fcase(c(TRUE, FALSE, NA, NA), 1:4 + 0.1, TRUE, 11:14 + 0.1), c(1L, 12:14) + 0.1) +test(2276.10, fcase(c(TRUE, FALSE, NA, NA), (1:4)+1i, TRUE, (11:14)+1i), c(1L, 12:14)+1i) +test(2276.11, fcase(c(TRUE, FALSE, NA, NA), as.character(1:4), TRUE, as.character(11:14)), as.character(c(1L, 12:14))) +test(2276.12, fcase(c(TRUE, FALSE, NA, NA), as.list(1:4), TRUE, as.list(11:14)), as.list(c(1L, 12:14))) +test(2276.13, fcase(TRUE, 1L, default=stop("lazy eval")), 1L) # default is lazy eval'ed +test(2276.14, fcase(c(TRUE, FALSE), 1L, c(TRUE, TRUE), NA), c(1L, NA_integer_)) # scalar NA will be converted + +# output is missing +test(2276.15, fcase(c(TRUE, FALSE), NA_integer_, c(TRUE, TRUE), 2L), c(NA_integer_, 2L)) diff --git a/src/data.table.h b/src/data.table.h index a848ef0344..ed63978d65 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -267,7 +267,7 @@ SEXP testMsgR(SEXP status, SEXP x, SEXP k); //fifelse.c SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na); -SEXP fcaseR(SEXP na, SEXP rho, SEXP args); +SEXP fcaseR(SEXP rho, SEXP args); //snprintf.c int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...); diff --git a/src/fifelse.c b/src/fifelse.c index 72b7f2c010..d3bc0fdb13 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -201,105 +201,106 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) { return ans; } -SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { - const int narg=length(args); +SEXP fcaseR(SEXP rho, SEXP args) { + const int narg=length(args); // `default` will take the last two positions if (narg % 2) { error(_("Received %d inputs; please supply an even number of arguments in ..., " "consisting of logical condition, resulting value pairs (in that order). " - "Note that the default argument must be named explicitly, e.g., default=0"), narg); + "Note that the default argument must be named explicitly, e.g., default=0"), narg - 2); } - if (narg==0) return R_NilValue; - - SEXP cons0 = PROTECT(eval(SEXPPTR_RO(args)[0], rho)); - SEXP value0 = PROTECT(eval(SEXPPTR_RO(args)[1], rho)); // value0 will be compared to from loop so leave it protected throughout - SEXPTYPE type0 = TYPEOF(value0); - int64_t len0=xlength(cons0), len2=len0; - if (isS4(value0) && !INHERITS(value0, char_nanotime)) { - error(_("S4 class objects (except nanotime) are not supported. Please see https://github.com/Rdatatable/data.table/issues/4131.")); - // otherwise 'invalid type/length (S4/1) in vector allocation' from test 2132.3 - } - SEXP ans = PROTECT(allocVector(type0, len0)); - SEXP tracker = PROTECT(allocVector(INTSXP, len0)); - int *restrict p = INTEGER(tracker); - copyMostAttrib(value0, ans); - - bool nonna=!isNull(na); - if (nonna) { - if (xlength(na) != 1) { - error(_("Length of 'default' must be 1.")); - } - SEXPTYPE tn = TYPEOF(na); - if (tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL) { - nonna = false; - } else { - if (tn != type0) { - error(_("Resulting value is of type %s but 'default' is of type %s. " - "Please make sure that both arguments have the same type."), type2char(type0), type2char(tn)); - } - if (!R_compute_identical(PROTECT(getAttrib(value0,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0)) { - error(_("Resulting value has different class than 'default'. " - "Please make sure that both arguments have the same class.")); - } - UNPROTECT(2); - if (isFactor(value0)) { - if (!R_compute_identical(PROTECT(getAttrib(value0,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) { - error(_("Resulting value and 'default' are both type factor but their levels are different.")); - } - UNPROTECT(2); - } - } - } - + int nprotect=0, l; + int64_t len0=0, len1=0, len2=0; + SEXP ans=R_NilValue, value0=R_NilValue, tracker=R_NilValue, whens=R_NilValue, thens=R_NilValue; + PROTECT_INDEX Iwhens, Ithens; + PROTECT_WITH_INDEX(whens, &Iwhens); nprotect++; + PROTECT_WITH_INDEX(thens, &Ithens); nprotect++; + SEXPTYPE type0=NILSXP; + // naout means if the output is scalar logic na + bool imask = true, naout = false, idefault = false; + int *restrict p = NULL; const int n = narg/2; for (int i=0; i0) { - if (xlength(cons) != len0) { - error(_("Argument #%d has a different length than argument #1. " - "Please make sure all logical conditions have the same length."), - i*2+1); + const int *restrict pwhens = LOGICAL(whens); + l = 0; + if (i == 0) { + len0 = xlength(whens); + len2 = len0; + type0 = TYPEOF(thens); + value0 = thens; + ans = PROTECT(allocVector(type0, len0)); nprotect++; + copyMostAttrib(thens, ans); + tracker = PROTECT(allocVector(INTSXP, len0)); nprotect++; + p = INTEGER(tracker); + } else { + imask = false; + naout = xlength(thens) == 1 && TYPEOF(thens) == LGLSXP && LOGICAL(thens)[0]==NA_LOGICAL; + if (xlength(whens) != len0 && xlength(whens) != 1) { + // no need to check `idefault` here because the con for default is always `TRUE` + error(_("Argument #%d has length %lld which differs from that of argument #1 (%lld). " + "Please make sure all logical conditions have the same length or length 1."), + i*2+1, (long long)xlength(whens), (long long)len0); } - if (TYPEOF(outs) != type0) { - error(_("Argument #%d is of type %s, however argument #2 is of type %s. " - "Please make sure all output values have the same type."), - i*2+2, type2char(TYPEOF(outs)), type2char(type0)); + if (!naout && TYPEOF(thens) != type0) { + if (idefault) { + error(_("Resulting value is of type %s but 'default' is of type %s. " + "Please make sure that both arguments have the same type."), type2char(type0), type2char(TYPEOF(thens))); + } else { + error(_("Argument #%d is of type %s, however argument #2 is of type %s. " + "Please make sure all output values have the same type."), + i*2+2, type2char(TYPEOF(thens)), type2char(type0)); + } } - if (!R_compute_identical(PROTECT(getAttrib(value0,R_ClassSymbol)), PROTECT(getAttrib(outs,R_ClassSymbol)), 0)) { - error(_("Argument #%d has different class than argument #2, " - "Please make sure all output values have the same class."), i*2+2); + if (!naout) { + if (!R_compute_identical(PROTECT(getAttrib(value0, R_ClassSymbol)), PROTECT(getAttrib(thens, R_ClassSymbol)), 0)) { + if (idefault) { + error(_("Resulting value has different class than 'default'. " + "Please make sure that both arguments have the same class.")); + } else { + error(_("Argument #%d has different class than argument #2, " + "Please make sure all output values have the same class."), i*2+2); + } + } + UNPROTECT(2); // class(value0), class(thens) } - UNPROTECT(2); - if (isFactor(value0)) { - if (!R_compute_identical(PROTECT(getAttrib(value0,R_LevelsSymbol)), PROTECT(getAttrib(outs,R_LevelsSymbol)), 0)) { - error(_("Argument #2 and argument #%d are both factor but their levels are different."), i*2+2); + if (!naout && isFactor(value0)) { + if (!R_compute_identical(PROTECT(getAttrib(value0, R_LevelsSymbol)), PROTECT(getAttrib(thens, R_LevelsSymbol)), 0)) { + if (idefault) { + error(_("Resulting value and 'default' are both type factor but their levels are different.")); + } else { + error(_("Argument #2 and argument #%d are both factor but their levels are different."), i*2+2); + } } - UNPROTECT(2); + UNPROTECT(2); // levels(value0), levels(thens) } } - int64_t len1 = xlength(outs); - if (len1!=len0 && len1!=1) { - error(_("Length of output value #%d must either be 1 or length of logical condition."), i*2+2); + len1 = xlength(thens); + if (len1 != len0 && len1 != 1) { + if (idefault) { + error(_("Length of 'default' must be 1 or %lld."), (long long)len0); + } else { + error(_("Length of output value #%d (%lld) must either be 1 or match the length of the logical condition (%lld)."), i*2+2, (long long)len1, (long long)len0); + } } - int64_t amask = len1>1 ? INT64_MAX : 0; - const int *restrict pcons = LOGICAL(cons); - const bool imask = i==0; - int64_t l=0; // how many this case didn't satisfy; i.e. left for next case - switch(TYPEOF(outs)) { + int64_t thenMask = len1>1 ? INT64_MAX : 0, whenMask = xlength(whens)>1 ? INT64_MAX : 0; + switch(TYPEOF(ans)) { case LGLSXP: { - const int *restrict pouts = LOGICAL(outs); + const int *restrict pthens; + if (!naout) pthens = LOGICAL(thens); // the content is not useful if out is NA_LOGICAL scalar int *restrict pans = LOGICAL(ans); - const int pna = nonna ? LOGICAL(na)[0] : NA_LOGICAL; + const int pna = NA_LOGICAL; for (int64_t j=0; j