From c52cf730028edde16e5f062d263d4dd17d868561 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 22 May 2021 19:25:34 -0700 Subject: [PATCH] detect a function-cum-column in i --- NEWS.md | 2 ++ R/data.table.R | 19 +++++++++++++------ inst/tests/tests.Rraw | 16 ++++++++++------ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index aae1234553..6dcb03a3c5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -149,6 +149,8 @@ 5. v1.13.0 (July 2020) fixed a segfault/corruption/error (depending on version of R and circumstances) in `dcast()` when `fun.aggregate` returned `NA` (type `logical`) in an otherwise `character` result, [#2394](https://github.com/Rdatatable/data.table/issues/2394). This fix was the result of other internal rework and there was no news item at the time. A new test to cover this case has now been added. Thanks Vadim Khotilovich for reporting, and Michael Chirico for investigating, pinpointing when the fix occurred and adding the test. +6. Running `DT[subset]` where `DT[(subset)]` was intended, i.e., when attempting to subset a table based on a column whose name happens to conflict with an existing function, gives a friendlier error message, [#5014](https://github.com/Rdatatable/data.table/issues/5014). Thanks @michaelchirico for the suggestion and PR and @ColeMiller1 for helping with the fix. + # data.table [v1.14.0](https://github.com/Rdatatable/data.table/milestone/23?closed=1) (21 Feb 2021) diff --git a/R/data.table.R b/R/data.table.R index dbede49855..b320f0ac17 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -405,14 +405,21 @@ replace_dot_alias = function(e) { } else { # isub is a single symbol name such as B in DT[B] i = try(eval(isub, parent.frame(), parent.frame()), silent=TRUE) - if (inherits(i,"try-error")) { + if (inherits(i,"try-error") || is.function(i)) { # must be "not found" since isub is a mere symbol col = try(eval(isub, x), silent=TRUE) # is it a column name? - msg = if (inherits(col,"try-error")) " and it is not a column name either." - else paste0(" but it is a column of type ", typeof(col),". If you wish to select rows where that column contains TRUE", - ", or perhaps that column contains row numbers of itself to select, try DT[(col)], DT[DT$col], or DT[col==TRUE] is particularly clear and is optimized.") - stop(as.character(isub), " is not found in calling scope", msg, - " When the first argument inside DT[...] is a single symbol (e.g. DT[var]), data.table looks for var in calling scope.") + if (inherits(col, "try-error")) { + msg = gettextf( + "'%s' is not found in calling scope and it is not a column name either. ", + as.character(isub) + ) + } else { + msg = gettextf( + "'%s' is not found in calling scope, but it is a column of type %s. If you wish to select rows where that column contains TRUE, or perhaps that column contains row numbers of itself to select, try DT[(col)], DT[DT$col], or DT[col==TRUE} is particularly clear and is optimized. ", + as.character(isub), typeof(col) + ) + } + stop(msg, "When the first argument inside DT[...] is a single symbol (e.g. DT[var]), data.table looks for var in calling scope.") } } if (restore.N) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ceed569e30..feb379a79f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11452,11 +11452,11 @@ if (exists("B")) rm(B) if (exists("NOTEXIST")) rm(NOTEXIST) if (exists("MyCol")) rm(MyCol) DT <- data.table(A = c(FALSE, TRUE), B = 2:1, C=c(2,3), MyCol=c(2,2)) -test(1773.01, DT[A], error = "A is not found in calling scope but it is a column of type logical.*==TRUE.*When the first argument") -test(1773.02, DT[B], error = "B is not found in calling scope but it is a column of type integer.*DT\\[\\(col\\)\\].*When the first argument") # 697 -test(1773.03, DT[C], error = "i has evaluated to type closure. Expecting logical, integer or double") # C picks up stats::C in calling scope -test(1773.04, DT[MyCol], error="MyCol is not found in calling scope but it is a column of type double.*DT\\[\\(col\\)\\].*When the first argument") -test(1773.05, DT[NOTEXIST], error = "NOTEXIST is not found in calling scope and it is not a column name either. When the first argument") +test(1773.01, DT[A], error = "'A' is not found in calling scope, but it is a column of type logical.*==TRUE.*When the first argument") +test(1773.02, DT[B], error = "'B' is not found in calling scope, but it is a column of type integer.*DT\\[\\(col\\)\\].*When the first argument") # 697 +test(1773.03, DT[C], error = "'C' is not found in calling scope, but it is a column of type double") # C picks up stats::C in calling scope +test(1773.04, DT[MyCol], error="'MyCol' is not found in calling scope, but it is a column of type double.*DT\\[\\(col\\)\\].*When the first argument") +test(1773.05, DT[NOTEXIST], error = "'NOTEXIST' is not found in calling scope and it is not a column name either. When the first argument") test(1773.06, DT[(A)], DT[2]) test(1773.07, DT[A==TRUE], DT[2]) test(1773.08, DT[(B)], data.table(A=c(TRUE,FALSE), B=1:2, C=c(3,2), MyCol=2)) @@ -16690,7 +16690,7 @@ set.seed(1) vDT = data.table(i_id = unique(iDT$i_id))[, .(v = runif(5,0,10), p = sample(c(5,5,10,10,10))), by=i_id] test(2120.01, !exists("i_id")) # quick verify in case there's an i_id in .GlobalEnv when testing in dev test(2120.02, iDT[i_id, order(e_date, e_time)], # first of all, the correct error - error="i_id is not found in calling scope but it is a column of type character") + error="'i_id' is not found in calling scope, but it is a column of type character") tmp = vDT[c("B","C","A"), on=.(i_id), .N, by=.EACHI] # split long statement in 2120.05 up as per demo in #3669 test(2120.03, tmp, data.table(i_id=c("B","C","A"), N=5L)) # just make sure the helper tmp is correct test(2120.04, tmp[iDT[i_id, order(e_date, e_time)]], # i_id obtained from tmp; this is what broke in dev 1.12.3 @@ -17662,3 +17662,7 @@ test(2190.9, DT[1:2, a:=call('sum', 1)], error="type 'language' cannot be coerce # adding test for (since fixed) 'could not find function "."' when verbose=TRUE, #3196 DT = data.table(i1 = c(234L, 250L, 169L, 234L, 147L, 96L, 96L, 369L, 147L, 96L), i4 = c(79L, 113L, 270L, -121L, 113L, 113L, -121L, 179L, -228L, 113L), v = 0) test(2191, DT[1:5, sum(v), by=.(i5 = 1:5 %% 2L), verbose=TRUE], data.table(i5=1:0, V1=c(0,0)), output="gforce") + +# when i evaluates to a function that's also a column name, give a friendlier message #5014 +DT = data.table(subset = (1:10)>5) +test(2192, DT[subset], error="'subset' is not found in calling scope, but")