From a1980334adf8ad99af06373ad6afb60bb5ba470a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Sat, 2 Oct 2021 12:00:50 +0200 Subject: [PATCH 1/4] improve coverage gfuns --- inst/tests/tests.Rraw | 11 +++++++++++ src/gsumm.c | 6 ++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index bfa901d315..949535a659 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18215,3 +18215,14 @@ DT1 = data.table(id=1:3, grp=c('a', 'a', 'b'), value=4:6) DT2 = data.table(grp = c('a', 'b'), agg = list(c('1' = 4, '2' = 5), c('3' = 6))) test(2217, DT1[, by = grp, .(agg = list(setNames(as.numeric(value), id)))], DT2) +# gforce improve coverage +DT = data.table(g=1:2, i=c(NA, 1:4, NA), f=factor(letters[1:6])) +options(datatable.optimize = 2L) +funs = c("sum", "mean", "min", "max", "median", "var", "sd", "prod") +testnum = 2218 +for (fun in funs) { + testnum = testnum + 0.01 + test(testnum, EVAL("DT[,",fun,"(i, na.rm='a'), g]"), error="na.rm must be TRUE or FALSE") + testnum = testnum + 0.01 + test(testnum, EVAL("DT[,",fun,"(f), g]"), error=sprintf("%s is not meaningful for factors.", fun)) +} diff --git a/src/gsumm.c b/src/gsumm.c index be3b0f7855..fc6eafd580 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -583,7 +583,8 @@ SEXP gmean(SEXP x, SEXP narmArg) double started = wallclock(); const bool verbose=GetVerbose(); if (verbose) Rprintf(_("This gmean took (narm=%s) ... "), narm?"TRUE":"FALSE"); // narm=TRUE only at this point - if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmean"); + if (nrow != n) + error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmean"); bool anyNA=false; SEXP ans=R_NilValue; int protecti=0; @@ -918,7 +919,8 @@ static SEXP gfirstlast(SEXP x, const bool first, const int w, const bool headw) const bool nosubset = irowslen == -1; const bool issorted = !isunsorted; // make a const-bool for use inside loops const int n = nosubset ? length(x) : irowslen; - if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, first?"gfirst":"glast"); + if (nrow != n) + error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, first?"gfirst":"glast"); if (w==1 && headw) error(_("Internal error: gfirstlast headw should only be true when w>1")); int anslen = ngrp; if (headw) { From 9d58cf7be383498f7a5a6e1fcc0b41584d4e5037 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Sat, 2 Oct 2021 13:02:51 +0200 Subject: [PATCH 2/4] more coverage --- inst/tests/tests.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 949535a659..ed0aeb85f3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18226,3 +18226,4 @@ for (fun in funs) { testnum = testnum + 0.01 test(testnum, EVAL("DT[,",fun,"(f), g]"), error=sprintf("%s is not meaningful for factors.", fun)) } +test(testnum+0.01, DT[, gprod(.SD), g], error="GForce prod can only be applied to columns, not .SD or similar.") From da8c290b2676a8c15f8d3d2481fe0a5aeba36368 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Sat, 2 Oct 2021 13:14:27 +0200 Subject: [PATCH 3/4] more cov --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ed0aeb85f3..91c6ad51c3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18216,7 +18216,7 @@ DT2 = data.table(grp = c('a', 'b'), agg = list(c('1' = 4, '2' = 5), c('3' = 6))) test(2217, DT1[, by = grp, .(agg = list(setNames(as.numeric(value), id)))], DT2) # gforce improve coverage -DT = data.table(g=1:2, i=c(NA, 1:4, NA), f=factor(letters[1:6])) +DT = data.table(g=1:2, i=c(NA, 1:4, NA), f=factor(letters[1:6]), l=as.list(1:6)) options(datatable.optimize = 2L) funs = c("sum", "mean", "min", "max", "median", "var", "sd", "prod") testnum = 2218 @@ -18226,4 +18226,4 @@ for (fun in funs) { testnum = testnum + 0.01 test(testnum, EVAL("DT[,",fun,"(f), g]"), error=sprintf("%s is not meaningful for factors.", fun)) } -test(testnum+0.01, DT[, gprod(.SD), g], error="GForce prod can only be applied to columns, not .SD or similar.") +test(testnum+0.01, DT[, prod(l), g], error="GForce prod can only be applied to columns, not .SD or similar.") From c83f72c79fc8141b4b4c7c2b74bfe75603fbff66 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Mon, 4 Oct 2021 19:19:24 +0200 Subject: [PATCH 4/4] revert newlines (nrow!=n) error --- src/gsumm.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/gsumm.c b/src/gsumm.c index fc6eafd580..5bb2620243 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -348,8 +348,7 @@ SEXP gsum(SEXP x, SEXP narmArg) double started = wallclock(); const bool verbose=GetVerbose(); if (verbose) Rprintf(_("This gsum (narm=%s) took ... "), narm?"TRUE":"FALSE"); - if (nrow != n) - error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gsum"); + if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gsum"); bool anyNA=false; SEXP ans; switch(TYPEOF(x)) { @@ -583,8 +582,7 @@ SEXP gmean(SEXP x, SEXP narmArg) double started = wallclock(); const bool verbose=GetVerbose(); if (verbose) Rprintf(_("This gmean took (narm=%s) ... "), narm?"TRUE":"FALSE"); // narm=TRUE only at this point - if (nrow != n) - error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmean"); + if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmean"); bool anyNA=false; SEXP ans=R_NilValue; int protecti=0; @@ -730,8 +728,7 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min) const int n = nosubset ? length(x) : irowslen; //clock_t start = clock(); SEXP ans; - if (nrow != n) - error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gminmax"); + if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gminmax"); // GForce guarantees each group has at least one value; i.e. we don't need to consider length-0 per group here switch(TYPEOF(x)) { case LGLSXP: case INTSXP: { @@ -866,8 +863,7 @@ SEXP gmedian(SEXP x, SEXP narmArg) { error(_("%s is not meaningful for factors."), "median"); const bool isInt64 = INHERITS(x, char_integer64), narm = LOGICAL(narmArg)[0]; const int n = (irowslen == -1) ? length(x) : irowslen; - if (nrow != n) - error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmedian"); + if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmedian"); SEXP ans = PROTECT(allocVector(REALSXP, ngrp)); double *ansd = REAL(ans); const bool nosubset = irowslen==-1; @@ -919,8 +915,7 @@ static SEXP gfirstlast(SEXP x, const bool first, const int w, const bool headw) const bool nosubset = irowslen == -1; const bool issorted = !isunsorted; // make a const-bool for use inside loops const int n = nosubset ? length(x) : irowslen; - if (nrow != n) - error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, first?"gfirst":"glast"); + if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, first?"gfirst":"glast"); if (w==1 && headw) error(_("Internal error: gfirstlast headw should only be true when w>1")); int anslen = ngrp; if (headw) { @@ -1023,8 +1018,7 @@ static SEXP gvarsd1(SEXP x, SEXP narmArg, bool isSD) if (inherits(x, "factor")) error(_("%s is not meaningful for factors."), isSD ? "sd" : "var"); const int n = (irowslen == -1) ? length(x) : irowslen; - if (nrow != n) - error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gvar"); + if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gvar"); SEXP sub, ans = PROTECT(allocVector(REALSXP, ngrp)); double *ansd = REAL(ans); const bool nosubset = irowslen==-1; @@ -1121,8 +1115,7 @@ SEXP gprod(SEXP x, SEXP narmArg) { const int n = nosubset ? length(x) : irowslen; //clock_t start = clock(); SEXP ans; - if (nrow != n) - error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gprod"); + if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gprod"); long double *s = malloc(ngrp * sizeof(long double)); if (!s) error(_("Unable to allocate %d * %d bytes for gprod"), ngrp, sizeof(long double)); for (int i=0; i