diff --git a/NEWS.md b/NEWS.md index aae1234553..5f2c8210cf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -149,6 +149,8 @@ 5. v1.13.0 (July 2020) fixed a segfault/corruption/error (depending on version of R and circumstances) in `dcast()` when `fun.aggregate` returned `NA` (type `logical`) in an otherwise `character` result, [#2394](https://github.com/Rdatatable/data.table/issues/2394). This fix was the result of other internal rework and there was no news item at the time. A new test to cover this case has now been added. Thanks Vadim Khotilovich for reporting, and Michael Chirico for investigating, pinpointing when the fix occurred and adding the test. +6. `DT[subset]` where `DT[(subset)]` or `DT[subset==TRUE]` was intended; i.e., subsetting by a logical column whose name conflicts with an existing function, now gives a friendlier error message, [#5014](https://github.com/Rdatatable/data.table/issues/5014). Thanks @michaelchirico for the suggestion and PR, and @ColeMiller1 for helping with the fix. + # data.table [v1.14.0](https://github.com/Rdatatable/data.table/milestone/23?closed=1) (21 Feb 2021) diff --git a/R/data.table.R b/R/data.table.R index dbede49855..fa92561489 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -405,14 +405,17 @@ replace_dot_alias = function(e) { } else { # isub is a single symbol name such as B in DT[B] i = try(eval(isub, parent.frame(), parent.frame()), silent=TRUE) - if (inherits(i,"try-error")) { + if (inherits(i,"try-error") || is.function(i)) { # must be "not found" since isub is a mere symbol col = try(eval(isub, x), silent=TRUE) # is it a column name? - msg = if (inherits(col,"try-error")) " and it is not a column name either." - else paste0(" but it is a column of type ", typeof(col),". If you wish to select rows where that column contains TRUE", - ", or perhaps that column contains row numbers of itself to select, try DT[(col)], DT[DT$col], or DT[col==TRUE] is particularly clear and is optimized.") - stop(as.character(isub), " is not found in calling scope", msg, - " When the first argument inside DT[...] is a single symbol (e.g. DT[var]), data.table looks for var in calling scope.") + msg = if (inherits(col, "try-error")) gettextf( + "'%s' is not found in calling scope and it is not a column name either. ", + as.character(isub) + ) else gettextf( + "'%s' is not found in calling scope, but it is a column of type %s. If you wish to select rows where that column contains TRUE, or perhaps that column contains row numbers of itself to select, try DT[(col)], DT[DT$col], or DT[col==TRUE} is particularly clear and is optimized. ", + as.character(isub), typeof(col) + ) + stop(msg, "When the first argument inside DT[...] is a single symbol (e.g. DT[var]), data.table looks for var in calling scope.") } } if (restore.N) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 36925a4998..070f7cf379 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11452,16 +11452,18 @@ if (exists("B")) rm(B) if (exists("NOTEXIST")) rm(NOTEXIST) if (exists("MyCol")) rm(MyCol) DT <- data.table(A = c(FALSE, TRUE), B = 2:1, C=c(2,3), MyCol=c(2,2)) -test(1773.01, DT[A], error = "A is not found in calling scope but it is a column of type logical.*==TRUE.*When the first argument") -test(1773.02, DT[B], error = "B is not found in calling scope but it is a column of type integer.*DT\\[\\(col\\)\\].*When the first argument") # 697 -test(1773.03, DT[C], error = "i has evaluated to type closure. Expecting logical, integer or double") # C picks up stats::C in calling scope -test(1773.04, DT[MyCol], error="MyCol is not found in calling scope but it is a column of type double.*DT\\[\\(col\\)\\].*When the first argument") -test(1773.05, DT[NOTEXIST], error = "NOTEXIST is not found in calling scope and it is not a column name either. When the first argument") +test(1773.01, DT[A], error = "'A' is not found in calling scope, but it is a column of type logical.*==TRUE.*When the first argument") +test(1773.02, DT[B], error = "'B' is not found in calling scope, but it is a column of type integer.*DT\\[\\(col\\)\\].*When the first argument") # 697 +test(1773.03, DT[C], error = "'C' is not found in calling scope, but it is a column of type double") # C picks up stats::C in calling scope +test(1773.04, DT[MyCol], error="'MyCol' is not found in calling scope, but it is a column of type double.*DT\\[\\(col\\)\\].*When the first argument") +test(1773.05, DT[NOTEXIST], error = "'NOTEXIST' is not found in calling scope and it is not a column name either. When the first argument") test(1773.06, DT[(A)], DT[2]) test(1773.07, DT[A==TRUE], DT[2]) test(1773.08, DT[(B)], data.table(A=c(TRUE,FALSE), B=1:2, C=c(3,2), MyCol=2)) test(1773.09, DT[(MyCol)], data.table(A=c(TRUE,TRUE), B=INT(1,1), C=c(3,3), MyCol=2)) test(1773.10, DT[(C)], data.table(A=c(TRUE,NA), B=c(1L,NA), C=c(3,NA), MyCol=c(2,NA))) +test(1773.11, data.table(subset=c(TRUE,FALSE))[subset], # i being a function name that's also a column name, #5014 + error="'subset' is not found in calling scope, but") # New as.data.table.array method in v1.10.5 set.seed(1L) @@ -16703,7 +16705,7 @@ set.seed(1) vDT = data.table(i_id = unique(iDT$i_id))[, .(v = runif(5,0,10), p = sample(c(5,5,10,10,10))), by=i_id] test(2120.01, !exists("i_id")) # quick verify in case there's an i_id in .GlobalEnv when testing in dev test(2120.02, iDT[i_id, order(e_date, e_time)], # first of all, the correct error - error="i_id is not found in calling scope but it is a column of type character") + error="'i_id' is not found in calling scope, but it is a column of type character") tmp = vDT[c("B","C","A"), on=.(i_id), .N, by=.EACHI] # split long statement in 2120.05 up as per demo in #3669 test(2120.03, tmp, data.table(i_id=c("B","C","A"), N=5L)) # just make sure the helper tmp is correct test(2120.04, tmp[iDT[i_id, order(e_date, e_time)]], # i_id obtained from tmp; this is what broke in dev 1.12.3