From 204b125aa9ed8040b737688efe2248a13db6734f Mon Sep 17 00:00:00 2001 From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com> Date: Mon, 25 May 2020 19:15:56 -0400 Subject: [PATCH 1/7] Update data.table.R --- R/data.table.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/data.table.R b/R/data.table.R index 98651d6e0b..5ae5485258 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -428,6 +428,12 @@ replace_dot_alias = function(e) { on_ops = .parse_on(substitute(on), isnull_inames) on = on_ops[[1L]] ops = on_ops[[2L]] + if (any(ops > 1L)) { ## fix for #4489; ops = c("==", "<=", "<", ">=", ">", "!=") + ## include new warning? warning seems obnoxious. Tests affected with warning: + #### 1654, 1655.4, 1655.5, 1660.1, 1660.2, 1682.1, 1682.2, 1682.3, 1682.4, 1682.5, 1682.6, 1682.7, 1683.1, 1683.2, 1845.1, 1845.2, 1846.1, 1846.2, 1872.13, 1948.01, 1948.02, 1948.03, 1948.04, 1948.05, 1948.06, 1948.09, 1967.73, 1967.74, 1984.02, 1989.1, 2062.1, 2062.2, 2092.3, 2105.1, 2105.2 + # if (!allow.cartesian) warning("allow.cartesian is FALSE but non-equi join detected. Non-equi joins typically result in more rows. Therefore, allow.cartesian is being changed to TRUE") + allow.cartesian = TRUE + } # TODO: collect all '==' ops first to speeden up Cnestedid rightcols = colnamesInt(x, names(on), check_dups=FALSE) leftcols = colnamesInt(i, unname(on), check_dups=FALSE) From f9935b340d7b435e32bf43db1e5ad444e25d0cb2 Mon Sep 17 00:00:00 2001 From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com> Date: Mon, 25 May 2020 19:16:53 -0400 Subject: [PATCH 2/7] Update NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 7ecd4bc5af..e515ca2808 100644 --- a/NEWS.md +++ b/NEWS.md @@ -109,6 +109,8 @@ unit = "s") 13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388). +14. Non-equi joins now automatically set allow.cartesian to `TRUE`. Closes [4489](https://github.com/Rdatatable/data.table/issues/4489) + ## NOTES 0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. From d5a192fdef17b41cfdbba8785d9929503c90bf52 Mon Sep 17 00:00:00 2001 From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com> Date: Mon, 25 May 2020 19:23:33 -0400 Subject: [PATCH 3/7] Update tests.Rraw --- inst/tests/tests.Rraw | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3a6148221d..d39af078b7 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16861,3 +16861,12 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN)) test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) + +# Set allow.cartesian = TRUE when non-equi #4489 + +set.seed(2) +dt = data.table(time = 1:8, v = sample(8)) +dt[time == 2L, v := 2L] +d[time == 7L, v := 7L] + +test(2139.1, dt[dt, on = .(time > time, v > v), .N, by = .EACHI], data.table(time = 1:8, v = INT(5,2,6,1,8,4,7,3), N = INT(3,5,2,4,0,1,0,0))) From 27ef8d692ae78daee509bf496f8a060f2fe11f0e Mon Sep 17 00:00:00 2001 From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com> Date: Mon, 25 May 2020 19:37:14 -0400 Subject: [PATCH 4/7] Give credit for issue --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index e515ca2808..76b881868b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -109,7 +109,7 @@ unit = "s") 13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388). -14. Non-equi joins now automatically set allow.cartesian to `TRUE`. Closes [4489](https://github.com/Rdatatable/data.table/issues/4489) +14. Non-equi joins now automatically set allow.cartesian to `TRUE`. Closes [4489](https://github.com/Rdatatable/data.table/issues/4489). Thanks to @Henrik-P for reporting. ## NOTES From dea41a6a08e1cdb00e431e41a545cf1efef9e858 Mon Sep 17 00:00:00 2001 From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com> Date: Mon, 25 May 2020 19:38:17 -0400 Subject: [PATCH 5/7] change d to dt. embarrassing typo --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d39af078b7..264fcf42ab 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16867,6 +16867,6 @@ test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) set.seed(2) dt = data.table(time = 1:8, v = sample(8)) dt[time == 2L, v := 2L] -d[time == 7L, v := 7L] +dt[time == 7L, v := 7L] test(2139.1, dt[dt, on = .(time > time, v > v), .N, by = .EACHI], data.table(time = 1:8, v = INT(5,2,6,1,8,4,7,3), N = INT(3,5,2,4,0,1,0,0))) From 0522cd301df3e521d531fd6d3c060fe9a5b03124 Mon Sep 17 00:00:00 2001 From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com> Date: Mon, 25 May 2020 19:40:10 -0400 Subject: [PATCH 6/7] removed commented out warning --- R/data.table.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 5ae5485258..4bc161c728 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -429,9 +429,6 @@ replace_dot_alias = function(e) { on = on_ops[[1L]] ops = on_ops[[2L]] if (any(ops > 1L)) { ## fix for #4489; ops = c("==", "<=", "<", ">=", ">", "!=") - ## include new warning? warning seems obnoxious. Tests affected with warning: - #### 1654, 1655.4, 1655.5, 1660.1, 1660.2, 1682.1, 1682.2, 1682.3, 1682.4, 1682.5, 1682.6, 1682.7, 1683.1, 1683.2, 1845.1, 1845.2, 1846.1, 1846.2, 1872.13, 1948.01, 1948.02, 1948.03, 1948.04, 1948.05, 1948.06, 1948.09, 1967.73, 1967.74, 1984.02, 1989.1, 2062.1, 2062.2, 2092.3, 2105.1, 2105.2 - # if (!allow.cartesian) warning("allow.cartesian is FALSE but non-equi join detected. Non-equi joins typically result in more rows. Therefore, allow.cartesian is being changed to TRUE") allow.cartesian = TRUE } # TODO: collect all '==' ops first to speeden up Cnestedid From 3eb6d07122ef1d43032821af9d3b2fa4b9d053e3 Mon Sep 17 00:00:00 2001 From: Cole Miller <57992489+ColeMiller1@users.noreply.github.com> Date: Tue, 26 May 2020 07:00:34 -0400 Subject: [PATCH 7/7] Removed randomness of test --- inst/tests/tests.Rraw | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 264fcf42ab..cfb979855b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16864,8 +16864,7 @@ test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) # Set allow.cartesian = TRUE when non-equi #4489 -set.seed(2) -dt = data.table(time = 1:8, v = sample(8)) +dt = data.table(time = 1:8, v = INT(5,7,6,1,8,4,2,3)) dt[time == 2L, v := 2L] dt[time == 7L, v := 7L]