From fcaf5ee474d19b980b86029ad560b9f568ae73d3 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 12:39:16 -0700 Subject: [PATCH 1/8] Update tests.R --- .ci/atime/tests.R | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index fddfb1347b..81e4844440 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -119,6 +119,16 @@ test.list <- atime::atime_test_list( data.table:::setDT(L) }, Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) - Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) + Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15"), # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) + + # Issue reported in: https://github.com/Rdatatable/data.table/issues/6286 + "by verbose arg" = atime::atime_test( + N = 10^seq(6, 8), + setup = { + dt = data.table(a = 1:N) + }, + expr = data.table:::copy(dt)[, 1, by = a, verbose = TRUE], + "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e", + "Fast" = "aa75d79376478b3e8f80fd6f31dcf53be8bf3404") ) # nolint end: undesirable_operator_linter. From 586c41230582ccb822d9655a9a8e691ee5dd24a3 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 18:05:41 -0700 Subject: [PATCH 2/8] Update tests.R --- .ci/atime/tests.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 81e4844440..0882b5e8ae 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -122,13 +122,13 @@ test.list <- atime::atime_test_list( Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15"), # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) # Issue reported in: https://github.com/Rdatatable/data.table/issues/6286 - "by verbose arg" = atime::atime_test( - N = 10^seq(6, 8), + "by with verbose = TRUE" = atime::atime_test( + N = 10^seq(1, 8), setup = { - dt = data.table(a = 1:N) - }, + dt = data.table(a = 1:N) + }, expr = data.table:::copy(dt)[, 1, by = a, verbose = TRUE], - "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e", - "Fast" = "aa75d79376478b3e8f80fd6f31dcf53be8bf3404") + "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e", + "Fast" = "cd497408bb4dc6650d871b5076e738420ff431d7") ) # nolint end: undesirable_operator_linter. From 6dd597ebc4dd6ace3b325cc4cb428465f197c1cf Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 18:21:17 -0700 Subject: [PATCH 3/8] Introduced the changes from #6296 --- src/dogroups.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/dogroups.c b/src/dogroups.c index da9a7da172..f4e8967573 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -69,7 +69,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX int nprotect=0; SEXP ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, target, source; Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE; - clock_t tstart=0, tblock[10]={0}; int nblock[10]={0}; + double tstart=0, tblock[10]={0}; int nblock[10]={0}; const bool verbose = LOGICAL(verboseArg)[0]==1; if (!isInteger(order)) error(_("Internal error: order not integer vector")); // # nocov @@ -227,7 +227,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX writeNA(VECTOR_ELT(xSD, j), 0, 1, false); } } else { - if (verbose) tstart = clock(); + if (verbose) tstart = wallclock(); SETLENGTH(I, grpn); int *iI = INTEGER(I); if (LENGTH(order)==0) { @@ -239,7 +239,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX for (int j=0; j0; Rprintf(_("\n %s took %.3fs for %d groups\n"), w ? "collecting discontiguous groups" : "memcpy contiguous groups", - 1.0*tblock[w]/CLOCKS_PER_SEC, nblock[w]); - Rprintf(_(" eval(j) took %.3fs for %d calls\n"), 1.0*tblock[2]/CLOCKS_PER_SEC, nblock[2]); + 1.0*tblock[w], nblock[w]); + Rprintf(_(" eval(j) took %.3fs for %d calls\n"), 1.0*tblock[2], nblock[2]); } UNPROTECT(nprotect); return(ans); From 1b3e2f3f8332605619cea017783626b67d809944 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 18:41:09 -0700 Subject: [PATCH 4/8] Update tests.R --- .ci/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 0882b5e8ae..8f1d2046df 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -123,7 +123,7 @@ test.list <- atime::atime_test_list( # Issue reported in: https://github.com/Rdatatable/data.table/issues/6286 "by with verbose = TRUE" = atime::atime_test( - N = 10^seq(1, 8), + N = 10^seq(5, 9), setup = { dt = data.table(a = 1:N) }, From 2dc2e49f9406188e6b8ce75219a739fd519831bd Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 19:01:55 -0700 Subject: [PATCH 5/8] Since by is an internal (not exported function) --- .ci/atime/tests.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 8f1d2046df..baa016386e 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -123,11 +123,12 @@ test.list <- atime::atime_test_list( # Issue reported in: https://github.com/Rdatatable/data.table/issues/6286 "by with verbose = TRUE" = atime::atime_test( - N = 10^seq(5, 9), + N = 10^seq(1, 9), setup = { dt = data.table(a = 1:N) + dt_mod <- copy(dt) }, - expr = data.table:::copy(dt)[, 1, by = a, verbose = TRUE], + expr = data.table:::`[.data.table`(dt_mod, , 1, by = a, verbose = TRUE), "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e", "Fast" = "cd497408bb4dc6650d871b5076e738420ff431d7") ) From 9854be6f1d4e589405b044321a31032cad7aafcb Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 20:13:57 -0700 Subject: [PATCH 6/8] rm commit SHAs for Slow/Fast since the comment (https://github.com/Rdatatable/data.table/issues/6286#issuecomment-2231962058) is invalid or OS was the source of difference rather than the version --- .ci/atime/tests.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index baa016386e..d8d2a223aa 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -122,14 +122,13 @@ test.list <- atime::atime_test_list( Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15"), # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) # Issue reported in: https://github.com/Rdatatable/data.table/issues/6286 - "by with verbose = TRUE" = atime::atime_test( + # To be fixed in: https://github.com/Rdatatable/data.table/pull/6296 + "by with verbose = TRUE improved in #6296" = atime::atime_test( N = 10^seq(1, 9), setup = { dt = data.table(a = 1:N) dt_mod <- copy(dt) }, - expr = data.table:::`[.data.table`(dt_mod, , 1, by = a, verbose = TRUE), - "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e", - "Fast" = "cd497408bb4dc6650d871b5076e738420ff431d7") + expr = data.table:::`[.data.table`(dt_mod, , 1, by = a, verbose = TRUE)) ) # nolint end: undesirable_operator_linter. From 9cafc920534c5f6b52d60a09126a733899569863 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 20:59:24 -0700 Subject: [PATCH 7/8] Wrote some comments, brought back 'Slow', and removed the other tests to emphasize this one --- .ci/atime/tests.R | 53 ++++------------------------------------------- 1 file changed, 4 insertions(+), 49 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index d8d2a223aa..f70985ab82 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -73,54 +73,6 @@ test.list <- atime::atime_test_list( paste0('useDynLib(', new.Package_)) }, - # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 - # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 - "shallow regression fixed in #4440" = atime::atime_test( - N = 10^seq(3,8), - setup = { - set.seed(1L) - dt <- data.table(a = sample.int(N)) - setindexv(dt, "a") - }, - expr = data.table:::shallow(dt), - # Before = "", This needs to be updated later as there are two issues here: A) The source of regression (or the particular commit that led to it) is not clear; B) Older versions of data.table are having problems when being installed in this manner (This includes commits from before March 20 2020, when the issue that discovered or first mentioned the regression was created) - Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression - Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) - - # Test based on: https://github.com/Rdatatable/data.table/issues/5424 - # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 - # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 - "memrecycle regression fixed in #5463" = atime::atime_test( - N = 10^seq(3, 8), - setup = { - n <- N/100 - set.seed(2L) - dt <- data.table( - g = sample(seq_len(n), N, TRUE), - x = runif(N), - key = "g") - dt_mod <- copy(dt) - }, - expr = data.table:::`[.data.table`(dt_mod, , N := .N, by = g), - Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) - Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) - Fixed = "58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) - - # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 - # To be fixed in: https://github.com/Rdatatable/data.table/pull/5427 - "setDT improved in #5427" = atime::atime_test( - N = 10^seq(1, 7), - setup = { - L <- replicate(N, 1, simplify = FALSE) - setDT(L) - }, - expr = { - data.table:::setattr(L, "class", NULL) - data.table:::setDT(L) - }, - Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) - Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15"), # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) - # Issue reported in: https://github.com/Rdatatable/data.table/issues/6286 # To be fixed in: https://github.com/Rdatatable/data.table/pull/6296 "by with verbose = TRUE improved in #6296" = atime::atime_test( @@ -129,6 +81,9 @@ test.list <- atime::atime_test_list( dt = data.table(a = 1:N) dt_mod <- copy(dt) }, - expr = data.table:::`[.data.table`(dt_mod, , 1, by = a, verbose = TRUE)) + expr = data.table:::`[.data.table`(dt_mod, , 1, by = a, verbose = TRUE) + "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e" # Using the latest version of data.table I have in my fork (master branch, does not include the improvement thus slow). Just replace this with the preceding commit in data.table's master or the parent of the initial commit in the PR that fixes the issue. + # Fast = "" <-- Just change it to the mc or last commit after that PR is merged. + ) ) # nolint end: undesirable_operator_linter. From 5c9ce3a653f35a03c1c460744a90e403db5aa6fc Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 21:18:53 -0700 Subject: [PATCH 8/8] rm unnecessary quotes, fixed the commit SHA, added the missing comma --- .ci/atime/tests.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index f70985ab82..ae3f5a8587 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -81,8 +81,8 @@ test.list <- atime::atime_test_list( dt = data.table(a = 1:N) dt_mod <- copy(dt) }, - expr = data.table:::`[.data.table`(dt_mod, , 1, by = a, verbose = TRUE) - "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e" # Using the latest version of data.table I have in my fork (master branch, does not include the improvement thus slow). Just replace this with the preceding commit in data.table's master or the parent of the initial commit in the PR that fixes the issue. + expr = data.table:::`[.data.table`(dt_mod, , 1, by = a, verbose = TRUE), + Slow = "cd497408bb4dc6650d871b5076e738420ff431d7" # Using the latest version of data.table I have in my fork (master branch, does not include the improvement thus slow). Just replace this with the preceding commit in data.table's master or the parent of the initial commit in the PR that fixes the issue. # Fast = "" <-- Just change it to the mc or last commit after that PR is merged. ) )