diff --git a/NEWS.md b/NEWS.md index 2d5ee8bbf7..34542e8a95 100644 --- a/NEWS.md +++ b/NEWS.md @@ -149,7 +149,7 @@ 26. `melt()` now outputs scalar logical `NA` instead of `NULL` in rows corresponding to missing list columns, for consistency with non-list columns when using `na.rm=TRUE`, [#5053](https://github.com/Rdatatable/data.table/pull/5053). Thanks to Toby Dylan Hocking for the PR. -27. `as.data.frame(DT)` now removes any indices in addition to removing any key, [#5042](https://github.com/Rdatatable/data.table/issues/5042). When indices were left intact, a subsequent subset or reorder of the `data.frame` would not update the indices since they are treated just like any other `data.frame` attribute, causing incorrect results if the result is later converted back to `data.table` again. +27. `as.data.frame(DT)`, `setDF(DT)` and `as.list(DT)` now remove the `"index"` attribute which contains any indices (a.k.a. secondary keys), as they already did for other `data.table`-only attributes such as the primary key stored in the `"sorted"` attribute. When indices were left intact, a subsequent subset or reorder of the `data.frame` by `data.frame`-code in base R or other packages would not update the indices, causing incorrect results if then converted back to `data.table`, [#4889](https://github.com/Rdatatable/data.table/issues/4889) [#5042](https://github.com/Rdatatable/data.table/issues/5042). Thanks @OfekShilon for the report and the PR. ## NOTES diff --git a/R/data.table.R b/R/data.table.R index c15d65f034..0a5a38785b 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2140,7 +2140,7 @@ as.data.frame.data.table = function(x, ...) setattr(ans,"row.names",.set_row_names(nrow(x))) # since R 2.4.0, data.frames can have non-character row names setattr(ans,"class","data.frame") setattr(ans,"sorted",NULL) # remove so if you convert to df, do something, and convert back, it is not sorted - setattr(ans,"index",NULL) #5042 + setattr(ans,"index",NULL) #4889 #5042 setattr(ans,".internal.selfref",NULL) # leave tl intact, no harm, ans @@ -2157,6 +2157,7 @@ as.list.data.table = function(x, ...) { setattr(ans, "class", NULL) setattr(ans, "row.names", NULL) setattr(ans, "sorted", NULL) + setattr(ans, "index", NULL) #4889 #5042 setattr(ans,".internal.selfref", NULL) # needed to pass S4 tests for example ans } @@ -2716,6 +2717,7 @@ setDF = function(x, rownames=NULL) { setattr(x, "row.names", rn) setattr(x, "class", "data.frame") setattr(x, "sorted", NULL) + setattr(x, "index", NULL) #4889 #5042 setattr(x, ".internal.selfref", NULL) } else if (is.data.frame(x)) { if (!is.null(rownames)) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aa654a236c..b6093cf9cd 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17802,3 +17802,11 @@ test(2199.1, as.data.table(as.list(1:2))[, .SD,.SDcols=(-1L)], data.table(V2=2 test(2199.2, as.data.table(as.list(1:2))[, .SD,.SDcols=(-(1L))], data.table(V2=2L)) test(2199.3, as.data.table(as.list(1:3))[, .SD,.SDcols=(-1L)], data.table(V2=2L, V3=3L)) test(2199.4, data.table(V1=-1L, V2=-2L, V3=-3L)[,.SD,.SDcols=-V2:-V1], error="not found") + +# setDF now drops index attributes, #4889 +d = data.table(a=1:100, b=1:100) +setindex(d, a) +setDF(d) +d[1:50, "a"] = d[51:100, "a"] +setDT(d) +test(2200, nrow(d[a==99]), 2L) diff --git a/man/setDF.Rd b/man/setDF.Rd index f50c9ae491..57cba39433 100644 --- a/man/setDF.Rd +++ b/man/setDF.Rd @@ -15,7 +15,7 @@ setDF(x, rownames=NULL) } \details{ - All \code{data.table} attributes including any keys of the input data.table are stripped off. + All \code{data.table} attributes including any keys and indices of the input data.table are stripped off. When using \code{rownames}, recall that the row names of a \code{data.frame} must be unique. By default, the assigned set of row names is simply the sequence 1, \ldots, \code{nrow(x)} (or \code{length(x)} for \code{list}s). } diff --git a/src/utils.c b/src/utils.c index 0c4f04fa39..a1d9093b8d 100644 --- a/src/utils.c +++ b/src/utils.c @@ -381,11 +381,11 @@ SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg) { #include #endif SEXP dt_zlib_version() { - char out[51]; + char out[71]; #ifndef NOZLIB - snprintf(out, 50, "zlibVersion()==%s ZLIB_VERSION==%s", zlibVersion(), ZLIB_VERSION); + snprintf(out, 70, "zlibVersion()==%s ZLIB_VERSION==%s", zlibVersion(), ZLIB_VERSION); #else - snprintf(out, 50, _("zlib header files were not found when data.table was compiled")); + snprintf(out, 70, _("zlib header files were not found when data.table was compiled")); #endif return ScalarString(mkChar(out)); }