Rdatatable · MichaelChirico · Jul 29, 2024 · Feb 27, 2020 · Feb 27, 2020 · Feb 27, 2020
@@ -48,6 +48,8 @@
 
 `rbindlist(l, ignore.attr=TRUE)` and `rbind` also gains argument `ignore.attr` to manually deactivate the safety-net of binding columns with different column classes, [#3911](https://github.com/Rdatatable/data.table/issues/3911), [#5542](https://github.com/Rdatatable/data.table/issues/5542). Thanks to @dcaseykc, @fox34, @adrian-quintario, @berg-michael, @arunsrinivasan, @statquant, @pkress, @jrausch12, @therosko, @OfekShilon, @iMissile, @tdhock for the request and @ben-schwen for the PR.
 
+16. `fcase()` supports scalars in conditions (e.g. supplying just `TRUE`), vectors in `default=` (so the default can vary by row), and `default=` is now lazily evaluated, [#5461](https://github.com/Rdatatable/data.table/issues/5461). Thanks @sindribaldur for the feature request, which has been highly requested, @shrektan for doing most of the implementation, and @MichaelChirico for sewing things up.
+
 ## BUG FIXES
 
 1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix.

@@ -6,7 +6,7 @@ fcoalesce   = function(...) .Call(Ccoalesce, list(...), FALSE)
 setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE)
 
 fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na)
-fcase   = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L])
+fcase   = function(..., default=NA) .Call(CfcaseR, parent.frame(), as.list(substitute(list(..., TRUE, default)))[-1L])
 
 colnamesInt = function(x, cols, check_dups=FALSE, skip_absent=FALSE) .Call(CcolnamesInt, x, cols, check_dups, skip_absent)
 

@@ -16646,9 +16646,9 @@ test(2127.65, fcase(test_vec1, list(1)), list(1,1,1,1,1, NULL, NULL, NULL, NULL,
 test(2127.66, fcase(test_vec1, as.Date("2019-10-11")), c(rep(as.Date("2019-10-11"),5),rep(NA,6)))
 test(2127.67, fcase(test_vec1, factor("a", levels=letters[1:3])), factor(c(rep("a",5),rep("NA",6)), levels=letters[1:3]))
 test(2127.68, fcase(test_vec1, 1L, default = 1:2), error = "Length of 'default' must be 1.")
-test(2127.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has a different length than argument #1. Please make sure all logical conditions have the same length.")
+test(2127.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has length 12 which differs from that of argument #1 (11). Please make sure all logical conditions have the same length or length 1.")
 test(2127.70, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, 2), error = "Argument #4 has different class than argument #2, Please make sure all output values have the same class.")
-test(2127.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #4 must either be 1 or length of logical condition.")
+test(2127.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #4 (2) must either be 1 or match the length of the logical condition (11).")
 test(2127.72, fcase(TRUE, 1L, FALSE, stop("bang!")), 1L)
 test(2127.73, fcase(test_vec1, 1L, test_vec2, 0:10), as.integer(c( 1, 1, 1, 1, 1, NA, 6, 7, 8, 9, 10)))
 test(2127.74, fcase(test_vec1, 0:10, test_vec2, 0L), as.integer(c( 0, 1, 2, 3, 4, NA, 0, 0, 0, 0, 0)))
@@ -18967,3 +18967,24 @@ test(2275.974, options=c(datatable.verbose=TRUE, datatable.forder.auto.index=TRU
 test(2275.975, options=c(datatable.verbose=TRUE, datatable.forder.auto.index=TRUE),
      forderv(d, "x", na.last=TRUE), notOutput="forder.*setting index.*retGrp=0, retStats=1")
 test(2275.99, forderv(data.table(a=1), reuseSorting=c(TRUE, TRUE)), error="reuseSorting must be")
+
+# fcase supports vector default values, #4258
+## for default
+test(2276.01, fcase(c(TRUE, FALSE, NA, NA), 1:4, default=11:13), error="Length of 'default' must be 1 or 4.")
+test(2276.02, fcase(c(TRUE, FALSE, NA, NA), 1:4, default=11:14), c(1L, 12:14))
+test(2276.03, fcase(c(TRUE, FALSE, NA, NA), 1:4 + 0.1, default=11:14 + 0.1), c(1L, 12:14) + 0.1)
+test(2276.04, fcase(c(TRUE, FALSE, NA, NA), (1:4)+1i, default=(11:14)+1i), c(1L, 12:14)+1i)
+test(2276.05, fcase(c(TRUE, FALSE, NA, NA), as.character(1:4), default=as.character(11:14)), as.character(c(1L, 12:14)))
+test(2276.06, fcase(c(TRUE, FALSE, NA, NA), as.list(1:4), default=as.list(11:14)), as.list(c(1L, 12:14)))
+## for scalar condition
+test(2276.07, fcase(c(TRUE, FALSE, NA, NA), 1:4, TRUE, 11:13), error="Length of output value #4 (3) must either be 1 or match the length of the logical condition (4).")
+test(2276.08, fcase(c(TRUE, FALSE, NA, NA), 1:4, TRUE, 11:14), c(1L, 12:14))
+test(2276.09, fcase(c(TRUE, FALSE, NA, NA), 1:4 + 0.1, TRUE, 11:14 + 0.1), c(1L, 12:14) + 0.1)
+test(2276.10, fcase(c(TRUE, FALSE, NA, NA), (1:4)+1i, TRUE, (11:14)+1i), c(1L, 12:14)+1i)
+test(2276.11, fcase(c(TRUE, FALSE, NA, NA), as.character(1:4), TRUE, as.character(11:14)), as.character(c(1L, 12:14)))
+test(2276.12, fcase(c(TRUE, FALSE, NA, NA), as.list(1:4), TRUE, as.list(11:14)), as.list(c(1L, 12:14)))
+test(2276.13, fcase(TRUE, 1L, default=stop("lazy eval")), 1L) # default is lazy eval'ed
+test(2276.14, fcase(c(TRUE, FALSE), 1L, c(TRUE, TRUE), NA), c(1L, NA_integer_)) # scalar NA will be converted
+
+# output is missing
+test(2276.15, fcase(c(TRUE, FALSE), NA_integer_, c(TRUE, TRUE), 2L), c(NA_integer_, 2L))
@@ -267,7 +267,7 @@ SEXP testMsgR(SEXP status, SEXP x, SEXP k);
 
 //fifelse.c
 SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na);
-SEXP fcaseR(SEXP na, SEXP rho, SEXP args);
+SEXP fcaseR(SEXP rho, SEXP args);
 
 //snprintf.c
 int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...);

@@ -201,105 +201,106 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) {
   return ans;
 }
 
-SEXP fcaseR(SEXP na, SEXP rho, SEXP args) {
-  const int narg=length(args);
+SEXP fcaseR(SEXP rho, SEXP args) {
+  const int narg=length(args); // `default` will take the last two positions
   if (narg % 2) {
     error(_("Received %d inputs; please supply an even number of arguments in ..., "
             "consisting of logical condition, resulting value pairs (in that order). "
-            "Note that the default argument must be named explicitly, e.g., default=0"), narg);
+            "Note that the default argument must be named explicitly, e.g., default=0"), narg - 2);
   }
-  if (narg==0) return R_NilValue;
-
-  SEXP cons0 = PROTECT(eval(SEXPPTR_RO(args)[0], rho));
-  SEXP value0 = PROTECT(eval(SEXPPTR_RO(args)[1], rho)); // value0 will be compared to from loop so leave it protected throughout
-  SEXPTYPE type0 = TYPEOF(value0);
-  int64_t len0=xlength(cons0), len2=len0;
-  if (isS4(value0) && !INHERITS(value0, char_nanotime)) {
-    error(_("S4 class objects (except nanotime) are not supported. Please see https://github.com/Rdatatable/data.table/issues/4131."));
-    // otherwise 'invalid type/length (S4/1) in vector allocation' from test 2132.3
-  }
-  SEXP ans = PROTECT(allocVector(type0, len0));
-  SEXP tracker = PROTECT(allocVector(INTSXP, len0));
-  int *restrict p = INTEGER(tracker);
-  copyMostAttrib(value0, ans);
-
-  bool nonna=!isNull(na);
-  if (nonna) {
-    if (xlength(na) != 1) {
-      error(_("Length of 'default' must be 1."));
-    }
-    SEXPTYPE tn = TYPEOF(na);
-    if (tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL) {
-      nonna = false;
-    } else {
-      if (tn != type0) {
-        error(_("Resulting value is of type %s but 'default' is of type %s. "
-                "Please make sure that both arguments have the same type."), type2char(type0), type2char(tn));
-      }
-      if (!R_compute_identical(PROTECT(getAttrib(value0,R_ClassSymbol)),  PROTECT(getAttrib(na,R_ClassSymbol)), 0)) {
-        error(_("Resulting value has different class than 'default'. "
-                "Please make sure that both arguments have the same class."));
-      }
-      UNPROTECT(2);
-      if (isFactor(value0)) {
-        if (!R_compute_identical(PROTECT(getAttrib(value0,R_LevelsSymbol)),  PROTECT(getAttrib(na,R_LevelsSymbol)), 0)) {
-          error(_("Resulting value and 'default' are both type factor but their levels are different."));
-        }
-        UNPROTECT(2);
-      }
-    }
-  }
-
+  int nprotect=0, l;
+  int64_t len0=0, len1=0, len2=0;
+  SEXP ans=R_NilValue, value0=R_NilValue, tracker=R_NilValue, whens=R_NilValue, thens=R_NilValue;
+  PROTECT_INDEX Iwhens, Ithens;
+  PROTECT_WITH_INDEX(whens, &Iwhens); nprotect++;
+  PROTECT_WITH_INDEX(thens, &Ithens); nprotect++;
+  SEXPTYPE type0=NILSXP;
+  // naout means if the output is scalar logic na
+  bool imask = true, naout = false, idefault = false;
+  int *restrict p = NULL;
   const int n = narg/2;
   for (int i=0; i<n; ++i) {
-    SEXP cons = PROTECT(i==0 ? cons0 : eval(SEXPPTR_RO(args)[2*i], rho)); // protect cons0 again for easy unprotect at the end of this loop
-    SEXP outs = PROTECT(i==0 ? value0 : eval(SEXPPTR_RO(args)[2*i+1], rho));
-    if (isS4(outs) && !INHERITS(outs, char_nanotime)) {
+    idefault = i == (n - 1); // mark if the current eval is the `default` on R side
+    REPROTECT(whens = eval(SEXPPTR_RO(args)[2*i], rho), Iwhens);
+    REPROTECT(thens = eval(SEXPPTR_RO(args)[2*i+1], rho), Ithens);
+    if (isS4(thens) && !INHERITS(thens, char_nanotime)) {
       error(_("S4 class objects (except nanotime) are not supported. Please see https://github.com/Rdatatable/data.table/issues/4131."));
     }
-    if (!isLogical(cons)) {
-      error(_("Argument #%d must be logical."), 2*i+1);
+    if (!isLogical(whens)) {
+      error(_("Argument #%d must be logical but was of type %s."), 2*i+1, type2char(TYPEOF(whens)));
     }
-    if (i>0) {
-      if (xlength(cons) != len0) {
-        error(_("Argument #%d has a different length than argument #1. "
-                "Please make sure all logical conditions have the same length."),
-                i*2+1);
+    const int *restrict pwhens = LOGICAL(whens);
+    l = 0;
+    if (i == 0) {
+      len0 = xlength(whens);
+      len2 = len0;
+      type0 = TYPEOF(thens);
+      value0 = thens;
+      ans = PROTECT(allocVector(type0, len0)); nprotect++;
+      copyMostAttrib(thens, ans);
+      tracker = PROTECT(allocVector(INTSXP, len0)); nprotect++;
+      p = INTEGER(tracker);     
+    } else {
+      imask = false;
+      naout = xlength(thens) == 1 && TYPEOF(thens) == LGLSXP && LOGICAL(thens)[0]==NA_LOGICAL;
+      if (xlength(whens) != len0 && xlength(whens) != 1) {
+        // no need to check `idefault` here because the con for default is always `TRUE`
+        error(_("Argument #%d has length %lld which differs from that of argument #1 (%lld). "
+                "Please make sure all logical conditions have the same length or length 1."),
+                i*2+1, (long long)xlength(whens), (long long)len0);
       }
-      if (TYPEOF(outs) != type0) {
-        error(_("Argument #%d is of type %s, however argument #2 is of type %s. "
-                "Please make sure all output values have the same type."),
-                i*2+2, type2char(TYPEOF(outs)), type2char(type0));
+      if (!naout && TYPEOF(thens) != type0) {
+        if (idefault) {
+          error(_("Resulting value is of type %s but 'default' is of type %s. "
+                  "Please make sure that both arguments have the same type."), type2char(type0), type2char(TYPEOF(thens)));
+        } else {
+          error(_("Argument #%d is of type %s, however argument #2 is of type %s. "
+                  "Please make sure all output values have the same type."),
+                  i*2+2, type2char(TYPEOF(thens)), type2char(type0));
+        }
       }
-      if (!R_compute_identical(PROTECT(getAttrib(value0,R_ClassSymbol)),  PROTECT(getAttrib(outs,R_ClassSymbol)), 0)) {
-        error(_("Argument #%d has different class than argument #2, "
-                "Please make sure all output values have the same class."), i*2+2);
+      if (!naout) {
+        if (!R_compute_identical(PROTECT(getAttrib(value0, R_ClassSymbol)),  PROTECT(getAttrib(thens, R_ClassSymbol)), 0)) {
+          if (idefault) {
+            error(_("Resulting value has different class than 'default'. "
+                    "Please make sure that both arguments have the same class."));
+          } else {
+            error(_("Argument #%d has different class than argument #2, "
+                    "Please make sure all output values have the same class."), i*2+2);
+          }
+        }
+        UNPROTECT(2); // class(value0), class(thens)
       }
-      UNPROTECT(2);
-      if (isFactor(value0)) {
-        if (!R_compute_identical(PROTECT(getAttrib(value0,R_LevelsSymbol)),  PROTECT(getAttrib(outs,R_LevelsSymbol)), 0)) {
-          error(_("Argument #2 and argument #%d are both factor but their levels are different."), i*2+2);
+      if (!naout && isFactor(value0)) {
+        if (!R_compute_identical(PROTECT(getAttrib(value0, R_LevelsSymbol)),  PROTECT(getAttrib(thens, R_LevelsSymbol)), 0)) {
+          if (idefault) {
+            error(_("Resulting value and 'default' are both type factor but their levels are different."));
+          } else {
+            error(_("Argument #2 and argument #%d are both factor but their levels are different."), i*2+2);
+          }
         }
-        UNPROTECT(2);
+        UNPROTECT(2); // levels(value0), levels(thens)
       }
     }
-    int64_t len1 = xlength(outs);
-    if (len1!=len0 && len1!=1) {
-      error(_("Length of output value #%d must either be 1 or length of logical condition."), i*2+2);
+    len1 = xlength(thens);
+    if (len1 != len0 && len1 != 1) {
+      if (idefault) {
+        error(_("Length of 'default' must be 1 or %lld."), (long long)len0);
+      } else {
+        error(_("Length of output value #%d (%lld) must either be 1 or match the length of the logical condition (%lld)."), i*2+2, (long long)len1, (long long)len0);
+      }
     }
-    int64_t amask = len1>1 ? INT64_MAX : 0;
-    const int *restrict pcons = LOGICAL(cons);
-    const bool imask = i==0;
-    int64_t l=0; // how many this case didn't satisfy; i.e. left for next case
-    switch(TYPEOF(outs)) {
+    int64_t thenMask = len1>1 ? INT64_MAX : 0, whenMask = xlength(whens)>1 ? INT64_MAX : 0;
+    switch(TYPEOF(ans)) {
     case LGLSXP: {
-      const int *restrict pouts = LOGICAL(outs);
+      const int *restrict pthens;
+      if (!naout) pthens = LOGICAL(thens); // the content is not useful if out is NA_LOGICAL scalar
       int *restrict pans = LOGICAL(ans);
-      const int pna = nonna ? LOGICAL(na)[0] : NA_LOGICAL;
+      const int pna = NA_LOGICAL;
       for (int64_t j=0; j<len2; ++j) {
         const int64_t idx = imask ? j : p[j];
-        if (pcons[idx]==1) {
-          pans[idx] = pouts[idx & amask];
+        if (pwhens[idx & whenMask]==1) {
+          pans[idx] = naout ? pna : pthens[idx & thenMask];
         } else {
           if (imask) {
             pans[j] = pna;
@@ -309,13 +310,14 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) {
       }
     } break;
     case INTSXP: {
-      const int *restrict pouts = INTEGER(outs);
+      const int *restrict pthens;
+      if (!naout) pthens = INTEGER(thens); // the content is not useful if out is NA_LOGICAL scalar
       int *restrict pans = INTEGER(ans);
-      const int pna = nonna ? INTEGER(na)[0] : NA_INTEGER;
+      const int pna = NA_INTEGER;
       for (int64_t j=0; j<len2; ++j) {
         const int64_t idx = imask ? j : p[j];
-        if (pcons[idx]==1) {
-          pans[idx] = pouts[idx & amask];
+        if (pwhens[idx & whenMask]==1) {
+          pans[idx] = naout ? pna : pthens[idx & thenMask];
         } else {
           if (imask) {
             pans[j] = pna;
@@ -325,14 +327,15 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) {
       }
     } break;
     case REALSXP: {
-      const double *restrict pouts = REAL(outs);
+      const double *restrict pthens;
+      if (!naout) pthens = REAL(thens); // the content is not useful if out is NA_LOGICAL scalar
       double *restrict pans = REAL(ans);
-      const double na_double = INHERITS(outs, char_integer64) ? NA_INT64_D : NA_REAL;
-      const double pna = nonna ? REAL(na)[0] : na_double;
+      const double na_double = INHERITS(ans, char_integer64) ? NA_INT64_D : NA_REAL;
+      const double pna = na_double;
       for (int64_t j=0; j<len2; ++j) {
         const int64_t idx = imask ? j : p[j];
-        if (pcons[idx]==1) {
-          pans[idx] = pouts[idx & amask];
+        if (pwhens[idx & whenMask]==1) {
+          pans[idx] = naout ? pna : pthens[idx & thenMask];
         } else {
           if (imask) {
             pans[j] = pna;
@@ -342,13 +345,14 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) {
       }
     } break;
     case CPLXSXP: {
-      const Rcomplex *restrict pouts = COMPLEX(outs);
+      const Rcomplex *restrict pthens;
+      if (!naout) pthens = COMPLEX(thens); // the content is not useful if out is NA_LOGICAL scalar
       Rcomplex *restrict pans = COMPLEX(ans);
-      const Rcomplex pna = nonna ? COMPLEX(na)[0] : NA_CPLX;
+      const Rcomplex pna = NA_CPLX;
       for (int64_t j=0; j<len2; ++j) {
         const int64_t idx = imask ? j : p[j];
-        if (pcons[idx]==1) {
-          pans[idx] = pouts[idx & amask];
+        if (pwhens[idx & whenMask]==1) {
+          pans[idx] = naout ? pna : pthens[idx & thenMask];
         } else {
           if (imask) {
             pans[j] = pna;
@@ -358,44 +362,43 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) {
       }
     } break;
     case STRSXP: {
-      const SEXP *restrict pouts = STRING_PTR_RO(outs);
-      const SEXP pna = nonna ? STRING_PTR_RO(na)[0] : NA_STRING;
+      const SEXP *restrict pthens;
+      if (!naout) pthens = STRING_PTR_RO(thens); // the content is not useful if out is NA_LOGICAL scalar
+      const SEXP pna = NA_STRING;
       for (int64_t j=0; j<len2; ++j) {
         const int64_t idx = imask ? j : p[j];
-        if (pcons[idx]==1) {
-          SET_STRING_ELT(ans, idx, pouts[idx & amask]);
+        if (pwhens[idx & whenMask]==1) {
+          SET_STRING_ELT(ans, idx, naout ? pna : pthens[idx & thenMask]);
         } else {
           if (imask) {
-            SET_STRING_ELT(ans, idx, pna);
+            SET_STRING_ELT(ans, j, pna);
           }
           p[l++] = idx;
         }
       }
     } break;
     case VECSXP: {
-      const SEXP *restrict pouts = SEXPPTR_RO(outs);
-      const SEXP pna = SEXPPTR_RO(na)[0];
+      // the default value of VECSXP is `NULL` so we don't need to explicitly
+      // assign the NA values as it does for other atomic types
+      const SEXP *restrict pthens;
+      if (!naout) pthens = SEXPPTR_RO(thens); // the content is not useful if out is NA_LOGICAL scalar
       for (int64_t j=0; j<len2; ++j) {
         const int64_t idx = imask ? j : p[j];
-        if (pcons[idx]==1) {
-          SET_VECTOR_ELT(ans, idx, pouts[idx & amask]);
+        if (pwhens[idx & whenMask]==1) {
+          if (!naout) SET_VECTOR_ELT(ans, idx, pthens[idx & thenMask]);
         } else {
-          if (imask && nonna) {
-            SET_VECTOR_ELT(ans, idx, pna);
-          }
           p[l++] = idx;
         }
       }
     } break;
     default:
-      error(_("Type '%s' is not supported"), type2char(TYPEOF(outs)));
+      error(_("Type '%s' is not supported."), type2char(TYPEOF(ans)));
     }
-    UNPROTECT(2); // this cons and outs
     if (l==0) {
       break;  // stop early as nothing left to do
     }
     len2 = l;
   }
-  UNPROTECT(4); // cons0, value0, ans, tracker
+  UNPROTECT(nprotect); // whens, thens, ans, tracker
   return ans;
 }