From 4d2b9abf9509e9f7714abd9fd6f6b1b25f9e01d3 Mon Sep 17 00:00:00 2001 From: MarkusBonsch Date: Tue, 25 Sep 2018 23:15:32 +0200 Subject: [PATCH 1/5] Fixed #2984 --- NEWS.md | 1 + inst/tests/tests.Rraw | 7 +++++++ src/assign.c | 10 +++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 295b231b80..ff99928942 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,7 @@ 2. `keyby=` on columns for which an index exists now uses the index (new feature 7 in v1.11.6 below) but if an `i` subset is present in the same query then it could segfault, [#3062](https://github.com/Rdatatable/data.table/issues/3062). Again thanks to @renkun-ken for reporting. +3. Assignments of variables to factor columns are now certain to not alter the original variable [#2984](https://github.com/Rdatatable/data.table/issues/2984). Thanks to @radfordneal for reporting and @MarkusBonsch for fixing. ### Changes in v1.11.6 (on CRAN 19 Sep 2018) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 264b4be07e..4137b8a1bb 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12239,6 +12239,13 @@ test(1944.4, DT[flag == 1 & group == 1, x], 6L) test(1944.5, indices(DT), "group__flag") test(1944.6, DT[flag == 1, sum(x), keyby = group], data.table(group=1:4, V1=INT(6,3,18,17), key="group")) +## tests for issue #2984 +DT <- data.table(a = factor(c("A", "Z")), b = 1:4) +c <- 3L +DT[1, a:=c] +test(1945.1, c, 3L) +DT[2,1]<- c +test(1945.2, c, 3L) ################################### # Add new tests above this line # diff --git a/src/assign.c b/src/assign.c index c7dadbe243..de1005ad86 100644 --- a/src/assign.c +++ b/src/assign.c @@ -575,7 +575,15 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v protecti++; // silence warning on singleton NAs if (INTEGER(RHS)[0] != NA_INTEGER) warning("Coerced '%s' RHS to 'integer' to match the factor column's underlying type. Character columns are now recommended (can be in keys), or coerce RHS to integer or character first.", type2char(TYPEOF(thisvalue))); - } else RHS = thisvalue; + } else { + // make sure to copy thisvalue if it is referenced. May be modified below. See #2984 + if(MAYBE_REFERENCED(thisvalue)) { + RHS = duplicate(thisvalue); + } else { + RHS = thisvalue; + } + + } for (j=0; jLENGTH(targetlevels)) && INTEGER(RHS)[j] != NA_INTEGER) { From a484670329fec4c41207800939b37f1e62b9864a Mon Sep 17 00:00:00 2001 From: MarkusBonsch Date: Tue, 25 Sep 2018 23:37:07 +0200 Subject: [PATCH 2/5] Had to fix existing test --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4137b8a1bb..497b7ddbed 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1218,7 +1218,7 @@ DT[1,a:="A"] test(428, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4)) DT[1,a:=2L] test(429, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4)) -test(430, DT[1,1]<- 3L, NA_integer_, warning="RHS contains 3 which is outside the levels range.*1,2.*of column 1, NAs generated") +test(430, DT[1,1]<- 3L, 3L, warning="RHS contains 3 which is outside the levels range.*1,2.*of column 1, NAs generated") test(431, DT[1,1:=4L], data.table(a=factor(c(NA,"Z","A","Z")),b=1:4), warning="RHS contains 4 which is outside the levels range.*1,2.*of column 1, NAs generated") From f26b15ea79b8900863473a31b91356686f965879 Mon Sep 17 00:00:00 2001 From: MarkusBonsch Date: Tue, 25 Sep 2018 23:57:08 +0200 Subject: [PATCH 3/5] Modified code to increase test coverage --- src/assign.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/assign.c b/src/assign.c index de1005ad86..c2441b190b 100644 --- a/src/assign.c +++ b/src/assign.c @@ -576,13 +576,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v // silence warning on singleton NAs if (INTEGER(RHS)[0] != NA_INTEGER) warning("Coerced '%s' RHS to 'integer' to match the factor column's underlying type. Character columns are now recommended (can be in keys), or coerce RHS to integer or character first.", type2char(TYPEOF(thisvalue))); } else { - // make sure to copy thisvalue if it is referenced. May be modified below. See #2984 - if(MAYBE_REFERENCED(thisvalue)) { - RHS = duplicate(thisvalue); - } else { - RHS = thisvalue; - } - + // make sure to copy thisvalue. May be modified below. See #2984 + RHS = duplicate(thisvalue); } for (j=0; jLENGTH(targetlevels)) From a3768a848ae773a7cbe1523160bfd29b5a4f54c4 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 25 Sep 2018 20:30:18 -0700 Subject: [PATCH 4/5] Added protect. Tweaked news item. --- NEWS.md | 3 ++- src/assign.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index ff99928942..4d378a02e4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,7 +9,8 @@ 2. `keyby=` on columns for which an index exists now uses the index (new feature 7 in v1.11.6 below) but if an `i` subset is present in the same query then it could segfault, [#3062](https://github.com/Rdatatable/data.table/issues/3062). Again thanks to @renkun-ken for reporting. -3. Assignments of variables to factor columns are now certain to not alter the original variable [#2984](https://github.com/Rdatatable/data.table/issues/2984). Thanks to @radfordneal for reporting and @MarkusBonsch for fixing. +3. Assigning an out-of-range integer to an item in a factor column (a rare operation) correctly created an `NA` in that spot with warning, but now no longer corrupts the variable, [#2984](https://github.com/Rdatatable/data.table/issues/2984). Thanks to @radfordneal for reporting and @MarkusBonsch for fixing. Assigning a string which is missing from the factor levels has always automatically appended the string to the factor levels. + ### Changes in v1.11.6 (on CRAN 19 Sep 2018) diff --git a/src/assign.c b/src/assign.c index c2441b190b..c1e943d82c 100644 --- a/src/assign.c +++ b/src/assign.c @@ -577,7 +577,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v if (INTEGER(RHS)[0] != NA_INTEGER) warning("Coerced '%s' RHS to 'integer' to match the factor column's underlying type. Character columns are now recommended (can be in keys), or coerce RHS to integer or character first.", type2char(TYPEOF(thisvalue))); } else { // make sure to copy thisvalue. May be modified below. See #2984 - RHS = duplicate(thisvalue); + RHS = PROTECT(duplicate(thisvalue)); + protecti++; } for (j=0; jLENGTH(targetlevels)) From 66b475ba64443a59de77f975607d82db59325c10 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 25 Sep 2018 22:38:06 -0700 Subject: [PATCH 5/5] news item tweak --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 4d378a02e4..3c57114fd8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,7 +9,7 @@ 2. `keyby=` on columns for which an index exists now uses the index (new feature 7 in v1.11.6 below) but if an `i` subset is present in the same query then it could segfault, [#3062](https://github.com/Rdatatable/data.table/issues/3062). Again thanks to @renkun-ken for reporting. -3. Assigning an out-of-range integer to an item in a factor column (a rare operation) correctly created an `NA` in that spot with warning, but now no longer corrupts the variable, [#2984](https://github.com/Rdatatable/data.table/issues/2984). Thanks to @radfordneal for reporting and @MarkusBonsch for fixing. Assigning a string which is missing from the factor levels has always automatically appended the string to the factor levels. +3. Assigning an out-of-range integer to an item in a factor column (a rare operation) correctly created an `NA` in that spot with warning, but now no longer also corrupts the variable being assigned, [#2984](https://github.com/Rdatatable/data.table/issues/2984). Thanks to @radfordneal for reporting and @MarkusBonsch for fixing. Assigning a string which is missing from the factor levels continues to automatically append the string to the factor levels. ### Changes in v1.11.6 (on CRAN 19 Sep 2018)