From eefed86be90b7d9a60015d90bb5d84079e76b22e Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Wed, 14 Oct 2020 19:58:47 -0600 Subject: [PATCH 1/8] copying parts attempt; failing test 1619.1 --- inst/tests/tests.Rraw | 9 +++++++++ src/dogroups.c | 42 ++++++++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index dc49a604a5..dd982b7e8d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17158,3 +17158,12 @@ for (i in 0:4) test(2155+i/10, data.table(A=1:7, V1=42) ) +# dogroups.c eval(j) could create list columns containing altrep references to the specials, #PRNUM +# thanks to revdep testing of 1.13.2 where package tstools revealed this via ts() creating ALTREP, #4758 +# the attr(value,"class")<-"newclass" lines mimics a line at the end of stats::ts(). When the +# length(value)>=64, R creates an ALTREP REF wrapper. Which dogroups.c now catches. +# Hence this test needs to be at least 128 rows, 2 groups of 64 each. +DT = data.table(series=c("ts1","ts2"), value=rnorm(128)) +test(2156, DT[,list(list({attr(value,"class")<-"newclass";value})),by=series]$V1[[1L]][1L], + DT[1,value]) + diff --git a/src/dogroups.c b/src/dogroups.c index 4c243104a5..b1e9baa0f9 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -3,7 +3,7 @@ #include #include -static bool anySpecialStatic(SEXP x) { +static SEXP copySpecialStaticAndAltrep(SEXP x) { // Special refers to special symbols .BY, .I, .N, and .GRP; see special-symbols.Rd // Static because these are like C static arrays which are the same memory for each group; e.g., dogroups // creates .SD for the largest group once up front, overwriting the contents for each group. Their @@ -42,15 +42,16 @@ static bool anySpecialStatic(SEXP x) { const int n = length(x); // use length() not LENGTH() because LENGTH() on NULL is segfault in R<3.5 where we still define USE_RINTERNALS // (see data.table.h), and isNewList() is true for NULL - if (n==0) - return false; - if (isVectorAtomic(x)) - return TRUELENGTH(x)<0; - if (isNewList(x)) for (int i=0; i0 && TRUELENGTH(x)<0) || ALTREP(x)) + return copyAsPlain(x); + if (isNewList(x)) for (int i=0; i1 && thislen!=maxn && grpn>0) { // grpn>0 for grouping empty tables; test 1986 error(_("Supplied %d items for column %d of group %d which has %d rows. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code."), thislen, j+1, i+1, maxn); } - bool copied = false; - if (isNewList(target) && anySpecialStatic(source)) { // see comments in anySpecialStatic() - source = PROTECT(duplicate(source)); - copied = true; + bool prot = false; + if (isNewList(target)) { + source = PROTECT(copySpecialStaticAndAltrep(source)); + // mostly this protect will be unnecessary as the components will be changed; see its comments + prot = true; } memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, ""); - if (copied) UNPROTECT(1); + if (prot) UNPROTECT(1); } } ansloc += maxn; @@ -427,7 +429,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } } else ans = R_NilValue; // Now reset length of .SD columns and .I to length of largest group, otherwise leak if the last group is smaller (often is). - // Also reset truelength on specials; see comments in anySpecialStatic(). + // Also reset truelength on specials; see comments in anySpecialStatic(). // TODO update all comments using anySpecialStatic name for (int j=0; j Date: Wed, 14 Oct 2020 20:05:21 -0600 Subject: [PATCH 2/8] working and passing new test --- src/dogroups.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/src/dogroups.c b/src/dogroups.c index b1e9baa0f9..e55144a36b 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -3,7 +3,7 @@ #include #include -static SEXP copySpecialStaticAndAltrep(SEXP x) { +static bool anySpecialStatic(SEXP x) { // Special refers to special symbols .BY, .I, .N, and .GRP; see special-symbols.Rd // Static because these are like C static arrays which are the same memory for each group; e.g., dogroups // creates .SD for the largest group once up front, overwriting the contents for each group. Their @@ -42,16 +42,15 @@ static SEXP copySpecialStaticAndAltrep(SEXP x) { const int n = length(x); // use length() not LENGTH() because LENGTH() on NULL is segfault in R<3.5 where we still define USE_RINTERNALS // (see data.table.h), and isNewList() is true for NULL - if ((isVectorAtomic(x) && n>0 && TRUELENGTH(x)<0) || ALTREP(x)) - return copyAsPlain(x); - if (isNewList(x)) for (int i=0; i1 && thislen!=maxn && grpn>0) { // grpn>0 for grouping empty tables; test 1986 error(_("Supplied %d items for column %d of group %d which has %d rows. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code."), thislen, j+1, i+1, maxn); } - bool prot = false; - if (isNewList(target)) { - source = PROTECT(copySpecialStaticAndAltrep(source)); - // mostly this protect will be unnecessary as the components will be changed; see its comments - prot = true; + bool copied = false; + if (isNewList(target) && anySpecialStatic(source)) { // see comments in anySpecialStatic() + source = PROTECT(duplicate(source)); + copied = true; } memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, ""); - if (prot) UNPROTECT(1); + if (copied) UNPROTECT(1); } } ansloc += maxn; @@ -429,7 +427,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } } else ans = R_NilValue; // Now reset length of .SD columns and .I to length of largest group, otherwise leak if the last group is smaller (often is). - // Also reset truelength on specials; see comments in anySpecialStatic(). // TODO update all comments using anySpecialStatic name + // Also reset truelength on specials; see comments in anySpecialStatic(). for (int j=0; j Date: Wed, 14 Oct 2020 20:19:56 -0600 Subject: [PATCH 3/8] negative truelength should not be retained in result; added test which fails currently --- inst/tests/tests.Rraw | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index dd982b7e8d..17084957aa 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17164,6 +17164,7 @@ for (i in 0:4) test(2155+i/10, # length(value)>=64, R creates an ALTREP REF wrapper. Which dogroups.c now catches. # Hence this test needs to be at least 128 rows, 2 groups of 64 each. DT = data.table(series=c("ts1","ts2"), value=rnorm(128)) -test(2156, DT[,list(list({attr(value,"class")<-"newclass";value})),by=series]$V1[[1L]][1L], - DT[1,value]) +test(2156.1, DT[,list(list({attr(value,"class")<-"newclass";value})),by=series]$V1[[1L]][1L], + DT[1,value]) +test(2156.2, truelength(DT[,list(list(value)),by=series]$V1[[1L]])>=0L) # not -64 carried over by duplicate() of the .SD column From a325030a2e8566236818a6d9d316b4193ecf815e Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Thu, 15 Oct 2020 19:21:40 -0600 Subject: [PATCH 4/8] copyAsPlain's comments addressed, nocovs removed, and used from dogroups.c. Passing all tests including new ones --- src/dogroups.c | 4 ++-- src/utils.c | 50 ++++++++++++++++++++++++-------------------------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/dogroups.c b/src/dogroups.c index e55144a36b..6f88c321b6 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -298,7 +298,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } bool copied = false; if (isNewList(target) && anySpecialStatic(RHS)) { // see comments in anySpecialStatic() - RHS = PROTECT(duplicate(RHS)); + RHS = PROTECT(copyAsPlain(RHS)); copied = true; } const char *warn = memrecycle(target, order, INTEGER(starts)[i]-1, grpn, RHS, 0, -1, 0, ""); @@ -403,7 +403,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } bool copied = false; if (isNewList(target) && anySpecialStatic(source)) { // see comments in anySpecialStatic() - source = PROTECT(duplicate(source)); + source = PROTECT(copyAsPlain(source)); copied = true; } memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, ""); diff --git a/src/utils.c b/src/utils.c index 87348beb7c..7464419647 100644 --- a/src/utils.c +++ b/src/utils.c @@ -234,31 +234,28 @@ bool Rinherits(SEXP x, SEXP char_) { } SEXP copyAsPlain(SEXP x) { - // v1.12.2 and before used standard R duplicate() to do this. But that's not guaranteed to not return an ALTREP. + // v1.12.2 and before used standard R duplicate() to do this. But duplicate() is not guaranteed to not return an ALTREP. // e.g. ALTREP 'wrapper' on factor column (with materialized INTSXP) in package VIM under example(hotdeck) // .Internal(inspect(x[[5]])) // @558adf4d9508 13 INTSXP g0c0 [OBJ,NAM(7),ATT] wrapper [srt=-2147483648,no_na=0] - // 'AsPlain' is intended to convey unALTREP-ing; i.e. materializing and removing any ALTREP attributes too - // For non-ALTREP this should do the same as R's duplicate(); but doesn't quite currently, so has to divert to duplicated() for now + // 'AsPlain' is intended to convey unALTREP-ing; i.e. materializing and removing any ALTREP wrappers/attributes + // For non-ALTREP this should do the same as R's duplicate(). // Intended for use on columns; to either un-ALTREP them or duplicate shared memory columns; see copySharedColumns() below // Not intended to be called on a DT VECSXP where a concept of 'deep' might refer to whether the columns are copied - - if (!ALTREP(x)) return duplicate(x); - // would prefer not to have this line, but without it test 1639.064 fails : - // Running test id 1639.064 Error in `[.data.table`(r, -ii) : - // Item 2 of i is -1 and item 1 is NA. Cannot mix negatives and NA. - // Calls: test.data.table ... FUN -> make.levels -> rbindlist -> [ -> [.data.table - // Perhaps related to row names and the copyMostAttrib() below is not quite sufficient - - size_t n = XLENGTH(x); - SEXP ans = PROTECT(allocVector(TYPEOF(x), XLENGTH(x))); - switch (TYPEOF(ans)) { + + if (!isVectorAtomic(x) && !isNewList(x)) { + // e.g. defer to R the CLOSXP in test 173.3 where a list column item is the function 'mean' + return duplicate(x); + } + const int64_t n = XLENGTH(x); + SEXP ans = PROTECT(allocVector(TYPEOF(x), n)); + switch (TYPEOF(x)) { case RAWSXP: - memcpy(RAW(ans), RAW(x), n*sizeof(Rbyte)); // # nocov; add coverage when ALTREP is turned on for all types - break; // # nocov + memcpy(RAW(ans), RAW(x), n*sizeof(Rbyte)); + break; case LGLSXP: - memcpy(LOGICAL(ans), LOGICAL(x), n*sizeof(Rboolean)); // # nocov - break; // # nocov + memcpy(LOGICAL(ans), LOGICAL(x), n*sizeof(Rboolean)); + break; case INTSXP: memcpy(INTEGER(ans), INTEGER(x), n*sizeof(int)); // covered by 10:1 after test 178 break; @@ -266,22 +263,23 @@ SEXP copyAsPlain(SEXP x) { memcpy(REAL(ans), REAL(x), n*sizeof(double)); // covered by as.Date("2013-01-01")+seq(1,1000,by=10) after test 1075 break; case CPLXSXP: - memcpy(COMPLEX(ans), COMPLEX(x), n*sizeof(Rcomplex)); // # nocov - break; // # nocov + memcpy(COMPLEX(ans), COMPLEX(x), n*sizeof(Rcomplex)); + break; case STRSXP: { const SEXP *xp=STRING_PTR(x); // covered by as.character(as.hexmode(1:500)) after test 642 - for (R_xlen_t i=0; i Date: Thu, 15 Oct 2020 20:20:03 -0600 Subject: [PATCH 5/8] deal with NULL in copyAsPlain --- src/utils.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index 7464419647..b44bddf324 100644 --- a/src/utils.c +++ b/src/utils.c @@ -243,6 +243,10 @@ SEXP copyAsPlain(SEXP x) { // Intended for use on columns; to either un-ALTREP them or duplicate shared memory columns; see copySharedColumns() below // Not intended to be called on a DT VECSXP where a concept of 'deep' might refer to whether the columns are copied + if (isNull(x)) { + // deal with up front because isNewList(R_NilValue) is true + return R_NilValue; + } if (!isVectorAtomic(x) && !isNewList(x)) { // e.g. defer to R the CLOSXP in test 173.3 where a list column item is the function 'mean' return duplicate(x); @@ -279,7 +283,7 @@ SEXP copyAsPlain(SEXP x) { DUPLICATE_ATTRIB(ans, x); // aside: unlike R's duplicate we do not copy truelength here; important for dogroups.c which uses negative truelenth to mark its specials if (ALTREP(ans)) - error(_("Internal error: copyAsPlain returning an ALTREP for type '%s'"), type2char(TYPEOF(x))); // # nocov + error(_("Internal error: copyAsPlain returning ALTREP for type '%s'"), type2char(TYPEOF(x))); // # nocov UNPROTECT(1); return ans; } From 1040c4f92c5dab79b889f7304423707db6ec29d3 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Thu, 15 Oct 2020 20:47:52 -0600 Subject: [PATCH 6/8] added PR number into test comment --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 17084957aa..03dc80a967 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17158,7 +17158,7 @@ for (i in 0:4) test(2155+i/10, data.table(A=1:7, V1=42) ) -# dogroups.c eval(j) could create list columns containing altrep references to the specials, #PRNUM +# dogroups.c eval(j) could create list columns containing altrep references to the specials, #4759 # thanks to revdep testing of 1.13.2 where package tstools revealed this via ts() creating ALTREP, #4758 # the attr(value,"class")<-"newclass" lines mimics a line at the end of stats::ts(). When the # length(value)>=64, R creates an ALTREP REF wrapper. Which dogroups.c now catches. From 0376ce9ef6433350be99cb8f509f3c6b38512aa2 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Thu, 15 Oct 2020 21:19:05 -0600 Subject: [PATCH 7/8] cover null case in copyAsPlain --- inst/tests/tests.Rraw | 3 +++ 1 file changed, 3 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 03dc80a967..78a391e108 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17167,4 +17167,7 @@ DT = data.table(series=c("ts1","ts2"), value=rnorm(128)) test(2156.1, DT[,list(list({attr(value,"class")<-"newclass";value})),by=series]$V1[[1L]][1L], DT[1,value]) test(2156.2, truelength(DT[,list(list(value)),by=series]$V1[[1L]])>=0L) # not -64 carried over by duplicate() of the .SD column +# cover NULL case in copyAsPlain by putting a NULL alongside a dogroups .SD column. The 'if(.GRP==1L)' is just for fun. +test(2156.3, sapply(DT[, list(if (.GRP==1L) list(value,NULL) else list(NULL,value)), by=series]$V1, length), + INT(64,0,0,64)) From 72392edea45445b737091262c291e47b624fe569 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Thu, 15 Oct 2020 21:48:43 -0600 Subject: [PATCH 8/8] mark the default: label as nocov too; codecov seems to be highlighting it in red --- src/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index b44bddf324..37e5750b4a 100644 --- a/src/utils.c +++ b/src/utils.c @@ -277,7 +277,7 @@ SEXP copyAsPlain(SEXP x) { const SEXP *xp=SEXPPTR_RO(x); for (int64_t i=0; i