From c15f8d7851f2442c05b86f2cc3a34a40fbd632a6 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 3 Feb 2020 23:45:23 +0800 Subject: [PATCH 1/3] friendlier error when := shows up in i --- NEWS.md | 1 + R/data.table.R | 14 +++++++++++++- inst/tests/tests.Rraw | 8 ++++---- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2d607e9bad..b9511589c2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -105,6 +105,7 @@ unit = "s") 4. `fifelse` and `fcase` notify users that S4 objects (except `nanotime`) are not supported [#4135](https://github.com/Rdatatable/data.table/issues/4135). Thanks to @torema-ed for bringing it to our attention and Morgan Jacob for the PR. +5. One of the more common mistakes is accidentally using `:=` in `i` instead of `j` (like `DT[new_var := 5]`). The error in this case is now friendlier for getting you back to working code, [#4227](https://github.com/Rdatatable/data.table/issues/4227). # data.table [v1.12.8](https://github.com/Rdatatable/data.table/milestone/15?closed=1) (09 Dec 2019) diff --git a/R/data.table.R b/R/data.table.R index af4f14de0b..745c1425e4 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2611,7 +2611,19 @@ vecseq = function(x,y,clamp) .Call(Cvecseq,x,y,clamp) # .Call(Caddress, x) increments NAM() when x is vector with NAM(1). Referring object within non-primitive function is enough to increment reference. address = function(x) .Call(Caddress, eval(substitute(x), parent.frame())) -":=" = function(...) stop('Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").') +":=" = function(...) { + # detect common mistake -- use := in i after forgetting a comma to leave it blank, #4227 + find_doteq_in_i = function(e) { + if (is.call(e)) { + if (is.symbol(e[[1L]]) && e[[1L]] == '[.data.table' && is.call(e[[3L]]) && is.symbol(e[[3L]][[1L]]) && e[[3L]][[1L]] == ':=') { + stop("Assignment with := is intended for use in j only, but is present in i, the first argument after [. Most often, this happens when forgetting to leave the first argument blank (e.g. DT[newvar := 5] instead of DT[ , new_var := 5]). Please double-check your syntax (see traceback() after this error for another view of this).") + } + else lapply(e[-1L], find_doteq_in_i) + } + } + for (calli in sys.calls()) find_doteq_in_i(calli) + stop('Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").') +} setDF = function(x, rownames=NULL) { if (!is.list(x)) stop("setDF only accepts data.table, data.frame or list of equal length as input") diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f6d17a0762..23540effac 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16770,7 +16770,7 @@ test(2132.2, fifelse(TRUE, 1, s2), error = "S4 class objects (except nanot test(2132.3, fcase(TRUE, s1, FALSE, s2), error = "S4 class objects (except nanotime) are not supported. Please see https://github.com/Rdatatable/data.table/issues/4131.") rm(s1, s2, class2132) - -######################## -# Add new tests here # -######################## +# friendly error for common mistake of using := in i instead of j +DT = data.table(a = 1) +test(2133.1, DT[b := 2], error="Assignment with := is intended for use in j only, but is present in i") +test(2133.2, DT[DT[mpg_per_cl := mpg/cyl], on='ast'], error="Assignment with := is intended for use in j only, but is present in i") From 472642937e5c26a613c45caa5741503e17fe7e1d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 14 Feb 2020 10:36:16 +0800 Subject: [PATCH 2/3] update for hughs feedback --- R/data.table.R | 2 +- inst/tests/tests.Rraw | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 745c1425e4..4e71186a57 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2616,7 +2616,7 @@ address = function(x) .Call(Caddress, eval(substitute(x), parent.frame())) find_doteq_in_i = function(e) { if (is.call(e)) { if (is.symbol(e[[1L]]) && e[[1L]] == '[.data.table' && is.call(e[[3L]]) && is.symbol(e[[3L]][[1L]]) && e[[3L]][[1L]] == ':=') { - stop("Assignment with := is intended for use in j only, but is present in i, the first argument after [. Most often, this happens when forgetting to leave the first argument blank (e.g. DT[newvar := 5] instead of DT[ , new_var := 5]). Please double-check your syntax (see traceback() after this error for another view of this).") + stop("Operator := detected in i, the first argument after [, but is only valid in j. Most often, this happens when forgetting to leave the first argument blank (e.g. DT[newvar := 5] instead of DT[ , new_var := 5]). Please double-check your syntax (see traceback() after this error for another view of this).") } else lapply(e[-1L], find_doteq_in_i) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fb1308c540..184afa95d5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16778,5 +16778,5 @@ rm(s1, s2, class2132) # friendly error for common mistake of using := in i instead of j DT = data.table(a = 1) -test(2133.1, DT[b := 2], error="Assignment with := is intended for use in j only, but is present in i") -test(2133.2, DT[DT[mpg_per_cl := mpg/cyl], on='ast'], error="Assignment with := is intended for use in j only, but is present in i") +test(2133.1, DT[b := 2], error="Operator := detected in i, the first argument after [, but is only valid in j") +test(2133.2, DT[DT[mpg_per_cl := mpg/cyl], on='ast'], error="Operator := detected in i, the first argument after [, but is only valid in j") From d5243df0be286a5cca4401393e67d254988456dc Mon Sep 17 00:00:00 2001 From: mattdowle Date: Sat, 15 Feb 2020 13:29:11 -0700 Subject: [PATCH 3/3] tweaked wording and embellished news item --- NEWS.md | 15 ++++++++++++++- R/data.table.R | 4 ++-- inst/tests/tests.Rraw | 7 ++++--- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index d336982aa9..d70f9fcf91 100644 --- a/NEWS.md +++ b/NEWS.md @@ -109,7 +109,20 @@ unit = "s") 4. `fifelse` and `fcase` notify users that S4 objects (except `nanotime`) are not supported [#4135](https://github.com/Rdatatable/data.table/issues/4135). Thanks to @torema-ed for bringing it to our attention and Morgan Jacob for the PR. -5. One of the more common mistakes is accidentally using `:=` in `i` instead of `j` (like `DT[new_var := 5]`). The error in this case is now friendlier for getting you back to working code, [#4227](https://github.com/Rdatatable/data.table/issues/4227). +5. The error message when mistakenly using `:=` in `i` instead of `j` has been much improved, [#4227](https://github.com/Rdatatable/data.table/issues/4227). Thanks to Hugh Parsonage for the detailed suggestion. + + ```R + > DT = data.table(A=1:2) + > DT[B:=3] + Error: Operator := detected in i, the first argument inside DT[...], but is only valid in the second argument, j. Most often, this happens when forgetting the first comma (e.g. DT[newvar := 5] instead of DT[ , new_var := 5]). Please double-check the syntax. Run traceback(), and debugger() to get a line number. + > DT[,B:=3] + > DT + A B + + 1: 1 3 + 2: 2 3 + ``` + # data.table [v1.12.8](https://github.com/Rdatatable/data.table/milestone/15?closed=1) (09 Dec 2019) diff --git a/R/data.table.R b/R/data.table.R index 4e71186a57..cb7fd665ba 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2612,11 +2612,11 @@ vecseq = function(x,y,clamp) .Call(Cvecseq,x,y,clamp) address = function(x) .Call(Caddress, eval(substitute(x), parent.frame())) ":=" = function(...) { - # detect common mistake -- use := in i after forgetting a comma to leave it blank, #4227 + # detect common mistake -- using := in i due to forgetting a comma to leave it blank, #4227 find_doteq_in_i = function(e) { if (is.call(e)) { if (is.symbol(e[[1L]]) && e[[1L]] == '[.data.table' && is.call(e[[3L]]) && is.symbol(e[[3L]][[1L]]) && e[[3L]][[1L]] == ':=') { - stop("Operator := detected in i, the first argument after [, but is only valid in j. Most often, this happens when forgetting to leave the first argument blank (e.g. DT[newvar := 5] instead of DT[ , new_var := 5]). Please double-check your syntax (see traceback() after this error for another view of this).") + stop("Operator := detected in i, the first argument inside DT[...], but is only valid in the second argument, j. Most often, this happens when forgetting the first comma (e.g. DT[newvar := 5] instead of DT[ , new_var := 5]). Please double-check the syntax. Run traceback(), and debugger() to get a line number.") } else lapply(e[-1L], find_doteq_in_i) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 184afa95d5..d270daf270 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16776,7 +16776,8 @@ test(2132.2, fifelse(TRUE, 1, s2), error = "S4 class objects (except nanot test(2132.3, fcase(TRUE, s1, FALSE, s2), error = "S4 class objects (except nanotime) are not supported. Please see https://github.com/Rdatatable/data.table/issues/4131.") rm(s1, s2, class2132) -# friendly error for common mistake of using := in i instead of j +# friendlier error for common mistake of using := in i instead of j, #4227 DT = data.table(a = 1) -test(2133.1, DT[b := 2], error="Operator := detected in i, the first argument after [, but is only valid in j") -test(2133.2, DT[DT[mpg_per_cl := mpg/cyl], on='ast'], error="Operator := detected in i, the first argument after [, but is only valid in j") +test(2133.1, DT[b := 2], error="Operator := detected in i, the first argument inside DT[...], but is only valid in the second argument, j.") +test(2133.2, DT[DT[mpg_per_cl := mpg/cyl], on='ast'], error="Operator := detected in i, the first argument inside") +