From 87cd37e26993890eeb2778015e5f6847057ff619 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 13 Mar 2024 22:35:41 -0700 Subject: [PATCH 01/42] Added my updated workflow. --- .github/workflows/autocomment.yml | 75 +++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 .github/workflows/autocomment.yml diff --git a/.github/workflows/autocomment.yml b/.github/workflows/autocomment.yml new file mode 100644 index 0000000000..0a5f38e713 --- /dev/null +++ b/.github/workflows/autocomment.yml @@ -0,0 +1,75 @@ +name: Autocomment atime-based performance regression analysis on PRs + +on: + pull_request: + branches: + - '*' + types: + - opened + - reopened + - synchronize + +jobs: + comment: + runs-on: ubuntu-latest + container: ghcr.io/iterative/cml:0-dvc2-base1 + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: safe-dir-check + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + run: | + git config --global --add safe.directory '*' + + - name: Manage refs + run: | + git switch "${GITHUB_BASE_REF}" + git switch "${GITHUB_HEAD_REF}" + git for-each-ref --format="%(refname)" | while read ref; do + git show-ref --quiet --verify $ref 2>/dev/null || git update-ref -d $ref + done + shell: bash + + - name: R Setup + uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - name: Set up the required system dependency libgit2 + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + run: | + sudo apt-get update -y + sudo apt-get install -y libgit2-dev + + - name: Install atime and initiate magic + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + run: | + start=`date +%s` + R -e 'r = getOption("repos"); r["CRAN"] = "http://cran.us.r-project.org"; options(repos = r); install.packages("atime", dependencies = TRUE); atime::atime_pkg(Sys.getenv("GITHUB_WORKSPACE"))' + end=`date +%s` + runtime=$((end-start)) + echo "ATIME_STEP_RUNTIME=$((end-start))" >> $GITHUB_ENV + + - name: Upload results + id: artifact-upload-step + uses: actions/upload-artifact@v4 + with: + name: atime-results + path: inst/atime/ + + - name: Push generated plot along with relevant information on the PR thread as a GH-bot comment + env: + repo_token: ${{ secrets.GITHUB_TOKEN }} + run: | + echo "![Comparison Plot](./inst/atime/tests_all_facet.png)" >> report.md + echo "\nGenerated via commit ${{ github.event.pull_request.head.sha }}" >> report.md + echo "\nDownload link for the artifact containing the test results: [↓ atime-results.zip](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts/${{ steps.artifact-upload-step.outputs.artifact-id }})" >> report.md + echo "\nTime taken to install \`atime\` and run \`atime_pkg\` on the test(s): $((ATIME_STEP_RUNTIME%3600/60)) minutes and $((ATIME_STEP_RUNTIME%60)) seconds" >> report.md + cml comment update report.md From 8716caa26667a5bb96d2b884d3eaa43602a1125d Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 13 Mar 2024 23:12:21 -0700 Subject: [PATCH 02/42] Changed to a workflow that uses my action from the marketplace --- .github/workflows/autocomment.yml | 64 +++---------------------------- 1 file changed, 5 insertions(+), 59 deletions(-) diff --git a/.github/workflows/autocomment.yml b/.github/workflows/autocomment.yml index 0a5f38e713..0f9a2b32cb 100644 --- a/.github/workflows/autocomment.yml +++ b/.github/workflows/autocomment.yml @@ -13,63 +13,9 @@ jobs: comment: runs-on: ubuntu-latest container: ghcr.io/iterative/cml:0-dvc2-base1 - + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + repo_token: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: safe-dir-check - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - run: | - git config --global --add safe.directory '*' - - - name: Manage refs - run: | - git switch "${GITHUB_BASE_REF}" - git switch "${GITHUB_HEAD_REF}" - git for-each-ref --format="%(refname)" | while read ref; do - git show-ref --quiet --verify $ref 2>/dev/null || git update-ref -d $ref - done - shell: bash - - - name: R Setup - uses: r-lib/actions/setup-r@v2 - with: - use-public-rspm: true - - - name: Set up the required system dependency libgit2 - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - run: | - sudo apt-get update -y - sudo apt-get install -y libgit2-dev - - - name: Install atime and initiate magic - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - run: | - start=`date +%s` - R -e 'r = getOption("repos"); r["CRAN"] = "http://cran.us.r-project.org"; options(repos = r); install.packages("atime", dependencies = TRUE); atime::atime_pkg(Sys.getenv("GITHUB_WORKSPACE"))' - end=`date +%s` - runtime=$((end-start)) - echo "ATIME_STEP_RUNTIME=$((end-start))" >> $GITHUB_ENV - - - name: Upload results - id: artifact-upload-step - uses: actions/upload-artifact@v4 - with: - name: atime-results - path: inst/atime/ - - - name: Push generated plot along with relevant information on the PR thread as a GH-bot comment - env: - repo_token: ${{ secrets.GITHUB_TOKEN }} - run: | - echo "![Comparison Plot](./inst/atime/tests_all_facet.png)" >> report.md - echo "\nGenerated via commit ${{ github.event.pull_request.head.sha }}" >> report.md - echo "\nDownload link for the artifact containing the test results: [↓ atime-results.zip](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts/${{ steps.artifact-upload-step.outputs.artifact-id }})" >> report.md - echo "\nTime taken to install \`atime\` and run \`atime_pkg\` on the test(s): $((ATIME_STEP_RUNTIME%3600/60)) minutes and $((ATIME_STEP_RUNTIME%60)) seconds" >> report.md - cml comment update report.md + - uses: Anirban166/Autocomment-atime-results@v1.1.0 From 3b17614e50cf298c031806b847e911204aa96638 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 18:19:22 -0700 Subject: [PATCH 03/42] Added the current set of tests --- inst/atime/tests.R | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 inst/atime/tests.R diff --git a/inst/atime/tests.R b/inst/atime/tests.R new file mode 100644 index 0000000000..e023eec213 --- /dev/null +++ b/inst/atime/tests.R @@ -0,0 +1,69 @@ +pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { + pkg_find_replace <- function(glob, FIND, REPLACE) { + atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE) + } + Package_regex <- gsub(".", "_?", old.Package, fixed = TRUE) + Package_ <- gsub(".", "_", old.Package, fixed = TRUE) + new.Package_ <- paste0(Package_, "_", sha) + pkg_find_replace( + "DESCRIPTION", + paste0("Package:\\s+", old.Package), + paste("Package:", new.Package)) + pkg_find_replace( + file.path("src", "Makevars.*in"), + Package_regex, + new.Package_) + pkg_find_replace( + file.path("R", "onLoad.R"), + Package_regex, + new.Package_) + pkg_find_replace( + file.path("R", "onLoad.R"), + sprintf('packageVersion\\("%s"\\)', old.Package), + sprintf('packageVersion\\("%s"\\)', new.Package)) + pkg_find_replace( + file.path("src", "init.c"), + paste0("R_init_", Package_regex), + paste0("R_init_", gsub("[.]", "_", new.Package_))) + pkg_find_replace( + "NAMESPACE", + sprintf('useDynLib\\("?%s"?', Package_regex), + paste0('useDynLib(', new.Package_)) + } + +test.list <- list( + # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 + # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 + "Test regression fixed in #4440" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(3,8), + setup = quote({ + set.seed(1L) + dt <- data.table(a = sample(N, N)) + setindex(dt, a) + }), + expr = quote(data.table:::shallow(dt)), + Before = "9d3b9202fddb980345025a4f6ac451ed26a423be", # This needs to be changed later. Currently assigned to the merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) as the source of regression (or the particular commit that led to it) is not clear. In addition, older versions of data.table are having problems when being installed in this manner. (This includes commits from before Mar 20, 2020 or when the issue that discovered or first mentioned the regression was created) + Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression + Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) + + # Test based on: https://github.com/Rdatatable/data.table/issues/5424 + # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 + # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 + "Test regression fixed in #5463" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(3, 8), + expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), + setup = quote({ + n <- N/100 + set.seed(1L) + dt <- data.table( + g = sample(seq_len(n), N, TRUE), + x = runif(N), + key = "g") + dt_mod <- copy(dt) + }), + Before = "19b7866112614db53eb3e909c097407d91cd6738", # Parent of the regression commit (https://github.com/Rdatatable/data.table/commit/0895fa247afcf6b38044bd5f56c0d209691ddb31), which is the parent of the first commit in the PR that causes the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) + Regression = "0895fa247afcf6b38044bd5f56c0d209691ddb31", # The regression commit is the parent of the first commit in the PR that fixed the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) + Fixed = "1e03fe7b890e63da9651d997ea52548c90b3ae32") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5493/commits) +) From ca2ec3c2f1bf46cc4d08b2cddf78c04efb57fc06 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 18:47:00 -0700 Subject: [PATCH 04/42] Revert the change on the failing commit for the second test case --- inst/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index e023eec213..79e5d0e05e 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -65,5 +65,5 @@ test.list <- list( }), Before = "19b7866112614db53eb3e909c097407d91cd6738", # Parent of the regression commit (https://github.com/Rdatatable/data.table/commit/0895fa247afcf6b38044bd5f56c0d209691ddb31), which is the parent of the first commit in the PR that causes the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) Regression = "0895fa247afcf6b38044bd5f56c0d209691ddb31", # The regression commit is the parent of the first commit in the PR that fixed the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) - Fixed = "1e03fe7b890e63da9651d997ea52548c90b3ae32") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5493/commits) + Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5493/commits) ) From 30eb1366c10c1a4e296bd81dfc9a1f0b25b4a20f Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 19:14:07 -0700 Subject: [PATCH 05/42] Update tests.R --- inst/atime/tests.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 79e5d0e05e..fd309002cb 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -67,3 +67,4 @@ test.list <- list( Regression = "0895fa247afcf6b38044bd5f56c0d209691ddb31", # The regression commit is the parent of the first commit in the PR that fixed the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5493/commits) ) +# Test to see if the workflow is triggered with the path filter spec From 1a797ec81195decace8935b33334d158ad1a7148 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 19:22:21 -0700 Subject: [PATCH 06/42] Trigger test (path filter) --- src/negate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/negate.c b/src/negate.c index 4db3767ff8..2e393d445f 100644 --- a/src/negate.c +++ b/src/negate.c @@ -11,7 +11,6 @@ void negateByRef(SEXP x) { } } - SEXP notchin(SEXP x, SEXP table) { // see discussion in PR#4931 SEXP result = PROTECT(chin(x, table)); From 39f0676d45c5efc832f4221f75a70e1952915872 Mon Sep 17 00:00:00 2001 From: Ani Date: Fri, 12 Apr 2024 18:09:49 -0700 Subject: [PATCH 07/42] Test current state of tests --- inst/atime/tests.R | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index fd309002cb..a02f07b52c 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -1,3 +1,26 @@ +# A function to customize R package metadata and source files to facilitate version-specific installation and testing. +# +# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in `Makevars` and version checking code in `onLoad.R`) +# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. +# It appends a SHA1 hash to the package name (`PKG.SHA`), ensuring each version can be installed and tested separately. +# +# @param old.Package Current name of the package. +# @param new.Package New name of the package, including a SHA hash. +# @param sha SHA1 hash used for differentiating versions. +# @param new.pkg.path Path to the package files. +# +# @details +# The function modifies: +# - DESCRIPTION, updating the package name. +# - Makevars, customizing the shared object file name and adjusting the build settings. +# - R/onLoad.R, adapting custom version checking for package loading operations. +# - NAMESPACE, changing namespace settings for dynamic linking. +# +# @examples +# pkg.edit.fun("data.table", "data.table.some_40_digit_SHA1_hash", "some_40_digit_SHA1_hash", "/path/to/data.table") +# +# @return None (performs in-place file modifications) +# @note This setup is typically unnecessary for most packages but essential for `data.table` due to its unique configuration. pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { pkg_find_replace <- function(glob, FIND, REPLACE) { atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE) @@ -63,8 +86,7 @@ test.list <- list( key = "g") dt_mod <- copy(dt) }), - Before = "19b7866112614db53eb3e909c097407d91cd6738", # Parent of the regression commit (https://github.com/Rdatatable/data.table/commit/0895fa247afcf6b38044bd5f56c0d209691ddb31), which is the parent of the first commit in the PR that causes the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) - Regression = "0895fa247afcf6b38044bd5f56c0d209691ddb31", # The regression commit is the parent of the first commit in the PR that fixed the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) - Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5493/commits) + Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Commit preceding the regression causing commit (https://github.com/Rdatatable/data.table/pull/4491/commits/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) + Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) + Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) ) -# Test to see if the workflow is triggered with the path filter spec From 24918230ee51ed8999bea2b537dd2cfd3c60ed9e Mon Sep 17 00:00:00 2001 From: Ani Date: Fri, 12 Apr 2024 22:37:22 -0700 Subject: [PATCH 08/42] Test suggested changes --- inst/atime/tests.R | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index a02f07b52c..68770347f3 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -1,8 +1,8 @@ # A function to customize R package metadata and source files to facilitate version-specific installation and testing. # -# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in `Makevars` and version checking code in `onLoad.R`) +# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) # to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. -# It appends a SHA1 hash to the package name (`PKG.SHA`), ensuring each version can be installed and tested separately. +# It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately. # # @param old.Package Current name of the package. # @param new.Package New name of the package, including a SHA hash. @@ -17,10 +17,10 @@ # - NAMESPACE, changing namespace settings for dynamic linking. # # @examples -# pkg.edit.fun("data.table", "data.table.some_40_digit_SHA1_hash", "some_40_digit_SHA1_hash", "/path/to/data.table") +# pkg.edit.fun("data.table", "data.table.some_SHA1_hash", "some_SHA1_hash", "/path/to/data.table") # # @return None (performs in-place file modifications) -# @note This setup is typically unnecessary for most packages but essential for `data.table` due to its unique configuration. +# @note This setup is typically unnecessary for most packages but essential for data.table due to its unique configuration. pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { pkg_find_replace <- function(glob, FIND, REPLACE) { atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE) @@ -54,6 +54,22 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { paste0('useDynLib(', new.Package_)) } +# A list of performance tests. +# +# Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments: +# - N: A numeric sequence of data sizes to vary. +# - setup: An expression evaluated for every data size before measuring time/memory. +# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. +# This must call a function from data.table using a syntax with double or triple colon prefix. +# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. +# (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table) +# +# Optional parameters that may be useful to configure tests: +# - times: Number of times each expression is evaluated (default is 10). +# - seconds.limit: The maximum median timing (in seconds) of an expression. No timings for larger N are computed past that threshold. +# - sha.vec: Named character vector or a list of vectors that specify data.table-specific commit SHAs for testing across those different git commit versions. +# For historical regressions, use 'Before', 'Regression', and 'Fixed' (otherwise something like 'Slow' or 'Fast' ideally). +# @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information. test.list <- list( # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 @@ -62,8 +78,8 @@ test.list <- list( N = 10^seq(3,8), setup = quote({ set.seed(1L) - dt <- data.table(a = sample(N, N)) - setindex(dt, a) + dt <- data.table(a = sample.int(N)) + setindexv(dt, "a") }), expr = quote(data.table:::shallow(dt)), Before = "9d3b9202fddb980345025a4f6ac451ed26a423be", # This needs to be changed later. Currently assigned to the merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) as the source of regression (or the particular commit that led to it) is not clear. In addition, older versions of data.table are having problems when being installed in this manner. (This includes commits from before Mar 20, 2020 or when the issue that discovered or first mentioned the regression was created) @@ -76,16 +92,16 @@ test.list <- list( "Test regression fixed in #5463" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(3, 8), - expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), setup = quote({ n <- N/100 - set.seed(1L) + set.seed(2L) dt <- data.table( g = sample(seq_len(n), N, TRUE), x = runif(N), key = "g") dt_mod <- copy(dt) }), + expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Commit preceding the regression causing commit (https://github.com/Rdatatable/data.table/pull/4491/commits/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) From 55bc2dbbb5683a2eccd800d668ebfa4461680318 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 16 Apr 2024 22:10:32 -0700 Subject: [PATCH 09/42] Testing another case --- inst/atime/tests.R | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 68770347f3..f3a108b470 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -104,5 +104,22 @@ test.list <- list( expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Commit preceding the regression causing commit (https://github.com/Rdatatable/data.table/pull/4491/commits/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) - Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) + Fixed = "58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) + + # Test based on https://github.com/Rdatatable/data.table/issues/4200 + # Performance regression fixed in: https://github.com/Rdatatable/data.table/pull/4558 + "Test regression fixed in #4558" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(1, 20), + expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id3)), + setup = quote({ + set.seed(108) + d <- data.table( + id3 = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), + v1 = sample(5L, N, TRUE), + v2 = sample(5L, N, TRUE)) + }), + "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", + "Regression" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", + "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") ) From eae7f120520e8ebe5b84e226a11631ebb123aec6 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 16 Apr 2024 22:27:57 -0700 Subject: [PATCH 10/42] Change a variable name to be more generic --- inst/atime/tests.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index f3a108b470..1575b0a091 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -111,11 +111,11 @@ test.list <- list( "Test regression fixed in #4558" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(1, 20), - expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id3)), + expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id)), setup = quote({ set.seed(108) d <- data.table( - id3 = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), + id = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), v1 = sample(5L, N, TRUE), v2 = sample(5L, N, TRUE)) }), From 1fb384c7b2c7e24f6f9767499fe7241f877e9909 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 16 Apr 2024 23:01:22 -0700 Subject: [PATCH 11/42] Update the other two test case comments to the current state of data.table --- inst/atime/tests.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 1575b0a091..1f1035eabe 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -82,7 +82,7 @@ test.list <- list( setindexv(dt, "a") }), expr = quote(data.table:::shallow(dt)), - Before = "9d3b9202fddb980345025a4f6ac451ed26a423be", # This needs to be changed later. Currently assigned to the merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) as the source of regression (or the particular commit that led to it) is not clear. In addition, older versions of data.table are having problems when being installed in this manner. (This includes commits from before Mar 20, 2020 or when the issue that discovered or first mentioned the regression was created) + # Before = "", This needs to be updated later as there are two issues here: A) The source of regression (or the particular commit that led to it) is not clear; B) Older versions of data.table are having problems when being installed in this manner (This includes commits from before March 20 2020, when the issue that discovered or first mentioned the regression was created) Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) @@ -102,7 +102,7 @@ test.list <- list( dt_mod <- copy(dt) }), expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), - Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Commit preceding the regression causing commit (https://github.com/Rdatatable/data.table/pull/4491/commits/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) + Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Fixed = "58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) From 0c299fff11fee150404fd2652f6d6ffa44419637 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Apr 2024 14:20:02 -0700 Subject: [PATCH 12/42] Trying an old commit for 'Regression' --- inst/atime/tests.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 1f1035eabe..ac7ad8bb37 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -120,6 +120,6 @@ test.list <- list( v2 = sample(5L, N, TRUE)) }), "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", - "Regression" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", - "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") + "Regression" = "c0052964694a4c618ab182aa474f924d40576d94", # Since regression was reported on Jan 26 2020 (https://github.com/Rdatatable/data.table/issues/4200) + "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) ) From b1ecd99f96ca985d2fd31348161dc09076de836a Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Apr 2024 16:34:26 -0700 Subject: [PATCH 13/42] Switched directory (to accommodate the specific case for data.table) --- .ci/atime/tests.R | 125 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 .ci/atime/tests.R diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R new file mode 100644 index 0000000000..ac7ad8bb37 --- /dev/null +++ b/.ci/atime/tests.R @@ -0,0 +1,125 @@ +# A function to customize R package metadata and source files to facilitate version-specific installation and testing. +# +# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) +# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. +# It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately. +# +# @param old.Package Current name of the package. +# @param new.Package New name of the package, including a SHA hash. +# @param sha SHA1 hash used for differentiating versions. +# @param new.pkg.path Path to the package files. +# +# @details +# The function modifies: +# - DESCRIPTION, updating the package name. +# - Makevars, customizing the shared object file name and adjusting the build settings. +# - R/onLoad.R, adapting custom version checking for package loading operations. +# - NAMESPACE, changing namespace settings for dynamic linking. +# +# @examples +# pkg.edit.fun("data.table", "data.table.some_SHA1_hash", "some_SHA1_hash", "/path/to/data.table") +# +# @return None (performs in-place file modifications) +# @note This setup is typically unnecessary for most packages but essential for data.table due to its unique configuration. +pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { + pkg_find_replace <- function(glob, FIND, REPLACE) { + atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE) + } + Package_regex <- gsub(".", "_?", old.Package, fixed = TRUE) + Package_ <- gsub(".", "_", old.Package, fixed = TRUE) + new.Package_ <- paste0(Package_, "_", sha) + pkg_find_replace( + "DESCRIPTION", + paste0("Package:\\s+", old.Package), + paste("Package:", new.Package)) + pkg_find_replace( + file.path("src", "Makevars.*in"), + Package_regex, + new.Package_) + pkg_find_replace( + file.path("R", "onLoad.R"), + Package_regex, + new.Package_) + pkg_find_replace( + file.path("R", "onLoad.R"), + sprintf('packageVersion\\("%s"\\)', old.Package), + sprintf('packageVersion\\("%s"\\)', new.Package)) + pkg_find_replace( + file.path("src", "init.c"), + paste0("R_init_", Package_regex), + paste0("R_init_", gsub("[.]", "_", new.Package_))) + pkg_find_replace( + "NAMESPACE", + sprintf('useDynLib\\("?%s"?', Package_regex), + paste0('useDynLib(', new.Package_)) + } + +# A list of performance tests. +# +# Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments: +# - N: A numeric sequence of data sizes to vary. +# - setup: An expression evaluated for every data size before measuring time/memory. +# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. +# This must call a function from data.table using a syntax with double or triple colon prefix. +# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. +# (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table) +# +# Optional parameters that may be useful to configure tests: +# - times: Number of times each expression is evaluated (default is 10). +# - seconds.limit: The maximum median timing (in seconds) of an expression. No timings for larger N are computed past that threshold. +# - sha.vec: Named character vector or a list of vectors that specify data.table-specific commit SHAs for testing across those different git commit versions. +# For historical regressions, use 'Before', 'Regression', and 'Fixed' (otherwise something like 'Slow' or 'Fast' ideally). +# @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information. +test.list <- list( + # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 + # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 + "Test regression fixed in #4440" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(3,8), + setup = quote({ + set.seed(1L) + dt <- data.table(a = sample.int(N)) + setindexv(dt, "a") + }), + expr = quote(data.table:::shallow(dt)), + # Before = "", This needs to be updated later as there are two issues here: A) The source of regression (or the particular commit that led to it) is not clear; B) Older versions of data.table are having problems when being installed in this manner (This includes commits from before March 20 2020, when the issue that discovered or first mentioned the regression was created) + Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression + Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) + + # Test based on: https://github.com/Rdatatable/data.table/issues/5424 + # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 + # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 + "Test regression fixed in #5463" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(3, 8), + setup = quote({ + n <- N/100 + set.seed(2L) + dt <- data.table( + g = sample(seq_len(n), N, TRUE), + x = runif(N), + key = "g") + dt_mod <- copy(dt) + }), + expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), + Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) + Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) + Fixed = "58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) + + # Test based on https://github.com/Rdatatable/data.table/issues/4200 + # Performance regression fixed in: https://github.com/Rdatatable/data.table/pull/4558 + "Test regression fixed in #4558" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(1, 20), + expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id)), + setup = quote({ + set.seed(108) + d <- data.table( + id = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), + v1 = sample(5L, N, TRUE), + v2 = sample(5L, N, TRUE)) + }), + "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", + "Regression" = "c0052964694a4c618ab182aa474f924d40576d94", # Since regression was reported on Jan 26 2020 (https://github.com/Rdatatable/data.table/issues/4200) + "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) +) From cb8306acbe7d85b766f4b39ab1748ed6d78ccb2c Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Apr 2024 16:34:56 -0700 Subject: [PATCH 14/42] Delete inst/atime/tests.R --- inst/atime/tests.R | 125 --------------------------------------------- 1 file changed, 125 deletions(-) delete mode 100644 inst/atime/tests.R diff --git a/inst/atime/tests.R b/inst/atime/tests.R deleted file mode 100644 index ac7ad8bb37..0000000000 --- a/inst/atime/tests.R +++ /dev/null @@ -1,125 +0,0 @@ -# A function to customize R package metadata and source files to facilitate version-specific installation and testing. -# -# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) -# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. -# It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately. -# -# @param old.Package Current name of the package. -# @param new.Package New name of the package, including a SHA hash. -# @param sha SHA1 hash used for differentiating versions. -# @param new.pkg.path Path to the package files. -# -# @details -# The function modifies: -# - DESCRIPTION, updating the package name. -# - Makevars, customizing the shared object file name and adjusting the build settings. -# - R/onLoad.R, adapting custom version checking for package loading operations. -# - NAMESPACE, changing namespace settings for dynamic linking. -# -# @examples -# pkg.edit.fun("data.table", "data.table.some_SHA1_hash", "some_SHA1_hash", "/path/to/data.table") -# -# @return None (performs in-place file modifications) -# @note This setup is typically unnecessary for most packages but essential for data.table due to its unique configuration. -pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { - pkg_find_replace <- function(glob, FIND, REPLACE) { - atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE) - } - Package_regex <- gsub(".", "_?", old.Package, fixed = TRUE) - Package_ <- gsub(".", "_", old.Package, fixed = TRUE) - new.Package_ <- paste0(Package_, "_", sha) - pkg_find_replace( - "DESCRIPTION", - paste0("Package:\\s+", old.Package), - paste("Package:", new.Package)) - pkg_find_replace( - file.path("src", "Makevars.*in"), - Package_regex, - new.Package_) - pkg_find_replace( - file.path("R", "onLoad.R"), - Package_regex, - new.Package_) - pkg_find_replace( - file.path("R", "onLoad.R"), - sprintf('packageVersion\\("%s"\\)', old.Package), - sprintf('packageVersion\\("%s"\\)', new.Package)) - pkg_find_replace( - file.path("src", "init.c"), - paste0("R_init_", Package_regex), - paste0("R_init_", gsub("[.]", "_", new.Package_))) - pkg_find_replace( - "NAMESPACE", - sprintf('useDynLib\\("?%s"?', Package_regex), - paste0('useDynLib(', new.Package_)) - } - -# A list of performance tests. -# -# Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments: -# - N: A numeric sequence of data sizes to vary. -# - setup: An expression evaluated for every data size before measuring time/memory. -# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. -# This must call a function from data.table using a syntax with double or triple colon prefix. -# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. -# (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table) -# -# Optional parameters that may be useful to configure tests: -# - times: Number of times each expression is evaluated (default is 10). -# - seconds.limit: The maximum median timing (in seconds) of an expression. No timings for larger N are computed past that threshold. -# - sha.vec: Named character vector or a list of vectors that specify data.table-specific commit SHAs for testing across those different git commit versions. -# For historical regressions, use 'Before', 'Regression', and 'Fixed' (otherwise something like 'Slow' or 'Fast' ideally). -# @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information. -test.list <- list( - # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 - # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 - "Test regression fixed in #4440" = list( - pkg.edit.fun = pkg.edit.fun, - N = 10^seq(3,8), - setup = quote({ - set.seed(1L) - dt <- data.table(a = sample.int(N)) - setindexv(dt, "a") - }), - expr = quote(data.table:::shallow(dt)), - # Before = "", This needs to be updated later as there are two issues here: A) The source of regression (or the particular commit that led to it) is not clear; B) Older versions of data.table are having problems when being installed in this manner (This includes commits from before March 20 2020, when the issue that discovered or first mentioned the regression was created) - Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression - Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) - - # Test based on: https://github.com/Rdatatable/data.table/issues/5424 - # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 - # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 - "Test regression fixed in #5463" = list( - pkg.edit.fun = pkg.edit.fun, - N = 10^seq(3, 8), - setup = quote({ - n <- N/100 - set.seed(2L) - dt <- data.table( - g = sample(seq_len(n), N, TRUE), - x = runif(N), - key = "g") - dt_mod <- copy(dt) - }), - expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), - Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) - Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) - Fixed = "58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) - - # Test based on https://github.com/Rdatatable/data.table/issues/4200 - # Performance regression fixed in: https://github.com/Rdatatable/data.table/pull/4558 - "Test regression fixed in #4558" = list( - pkg.edit.fun = pkg.edit.fun, - N = 10^seq(1, 20), - expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id)), - setup = quote({ - set.seed(108) - d <- data.table( - id = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), - v1 = sample(5L, N, TRUE), - v2 = sample(5L, N, TRUE)) - }), - "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", - "Regression" = "c0052964694a4c618ab182aa474f924d40576d94", # Since regression was reported on Jan 26 2020 (https://github.com/Rdatatable/data.table/issues/4200) - "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) -) From 853a74ceab744d86919c7a3a9eb2151c41b88313 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Apr 2024 18:24:50 -0700 Subject: [PATCH 15/42] Trigger updated workflow (v1.2.0) From 69845fd04338696f67bd914ca8615df1a54855b4 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Apr 2024 21:13:20 -0700 Subject: [PATCH 16/42] Test a Slow/Fast case --- .ci/atime/tests.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index ac7ad8bb37..6dd568211e 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -121,5 +121,17 @@ test.list <- list( }), "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", "Regression" = "c0052964694a4c618ab182aa474f924d40576d94", # Since regression was reported on Jan 26 2020 (https://github.com/Rdatatable/data.table/issues/4200) - "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) + "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) + + # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 + # Fixed in: https://github.com/Rdatatable/data.table/pull/5427 + "Test regression fixed in #5427" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(1, 7), + setup = { + DT = replicate(N, 1, simplify = FALSE) + }, + expr = data.table:::setDT(DT), + "Slow"= "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) + "Fast"="1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) ) From ebcf5ddb68e1133d35020f505ea126a7f77cccdb Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 18 Apr 2024 11:25:09 -0700 Subject: [PATCH 17/42] Quote setup and expr for the fourth test case and comment out the third one --- .ci/atime/tests.R | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 6dd568211e..dece0449a6 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -108,30 +108,30 @@ test.list <- list( # Test based on https://github.com/Rdatatable/data.table/issues/4200 # Performance regression fixed in: https://github.com/Rdatatable/data.table/pull/4558 - "Test regression fixed in #4558" = list( - pkg.edit.fun = pkg.edit.fun, - N = 10^seq(1, 20), - expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id)), - setup = quote({ - set.seed(108) - d <- data.table( - id = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), - v1 = sample(5L, N, TRUE), - v2 = sample(5L, N, TRUE)) - }), - "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", - "Regression" = "c0052964694a4c618ab182aa474f924d40576d94", # Since regression was reported on Jan 26 2020 (https://github.com/Rdatatable/data.table/issues/4200) - "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) + #"Test regression fixed in #4558" = list( + #pkg.edit.fun = pkg.edit.fun, + #N = 10^seq(1, 20), + #expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id)), + #setup = quote({ + # set.seed(108) + # d <- data.table( + # id = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), + # v1 = sample(5L, N, TRUE), + # v2 = sample(5L, N, TRUE)) + # }), + # "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", + # "Regression" = "c0052964694a4c618ab182aa474f924d40576d94", # Since regression was reported on Jan 26 2020 (https://github.com/Rdatatable/data.table/issues/4200) + # "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 # Fixed in: https://github.com/Rdatatable/data.table/pull/5427 "Test regression fixed in #5427" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(1, 7), - setup = { + setup = quote({ DT = replicate(N, 1, simplify = FALSE) - }, - expr = data.table:::setDT(DT), + }), + expr = quote(data.table:::setDT(DT)), "Slow"= "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) "Fast"="1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) ) From 870676a765cd5b1568809de0087fcf88c2f656e9 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 18 Apr 2024 12:41:08 -0700 Subject: [PATCH 18/42] Changed to more appropriate label names --- .ci/atime/tests.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index dece0449a6..3fd8fec3b4 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -73,7 +73,7 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { test.list <- list( # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 - "Test regression fixed in #4440" = list( + "Test performance regression fixed in #4440" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(3,8), setup = quote({ @@ -89,7 +89,7 @@ test.list <- list( # Test based on: https://github.com/Rdatatable/data.table/issues/5424 # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 - "Test regression fixed in #5463" = list( + "Test performance regression fixed in #5463" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(3, 8), setup = quote({ @@ -124,14 +124,14 @@ test.list <- list( # "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 - # Fixed in: https://github.com/Rdatatable/data.table/pull/5427 - "Test regression fixed in #5427" = list( + # To be fixed in: https://github.com/Rdatatable/data.table/pull/5427 + "Test performance improvement implemented in #5427" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(1, 7), setup = quote({ DT = replicate(N, 1, simplify = FALSE) }), expr = quote(data.table:::setDT(DT)), - "Slow"= "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) - "Fast"="1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) + "Slow" = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) + "Fast" = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) ) From 5467fd33f528cdc098e38f0cb98f7166fb18930a Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 18 Apr 2024 13:00:38 -0700 Subject: [PATCH 19/42] Removed unnecessary double quotes --- .ci/atime/tests.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 3fd8fec3b4..b58cd763ae 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -119,9 +119,9 @@ test.list <- list( # v1 = sample(5L, N, TRUE), # v2 = sample(5L, N, TRUE)) # }), - # "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", - # "Regression" = "c0052964694a4c618ab182aa474f924d40576d94", # Since regression was reported on Jan 26 2020 (https://github.com/Rdatatable/data.table/issues/4200) - # "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) + # Before = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", + # Regression = "c0052964694a4c618ab182aa474f924d40576d94", # Since regression was reported on Jan 26 2020 (https://github.com/Rdatatable/data.table/issues/4200) + # Fixed = "ba32f3cba38ec270587e395f6e6c26a80be36be6"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 # To be fixed in: https://github.com/Rdatatable/data.table/pull/5427 @@ -132,6 +132,6 @@ test.list <- list( DT = replicate(N, 1, simplify = FALSE) }), expr = quote(data.table:::setDT(DT)), - "Slow" = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) - "Fast" = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) + Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) + Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) ) From 0cd4ac8485af7a3fb40db46d3a9b82f540c28c4f Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 23 Apr 2024 13:24:36 -0700 Subject: [PATCH 20/42] Test new atime code --- .ci/atime/tests.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index b58cd763ae..0eaaa8f674 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -129,9 +129,10 @@ test.list <- list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(1, 7), setup = quote({ - DT = replicate(N, 1, simplify = FALSE) + L <- replicate(N, 1, simplify = FALSE) + data.table:::setattr(L, "class", NULL) + data.table:::setDT(L) }), expr = quote(data.table:::setDT(DT)), Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) -) From 7971d869864e7f8bb4afd4edfd09e1c32c751b8a Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 23 Apr 2024 13:39:24 -0700 Subject: [PATCH 21/42] Move setattr() and setDT inside expr --- .ci/atime/tests.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 0eaaa8f674..12256381de 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -130,9 +130,7 @@ test.list <- list( N = 10^seq(1, 7), setup = quote({ L <- replicate(N, 1, simplify = FALSE) - data.table:::setattr(L, "class", NULL) - data.table:::setDT(L) }), - expr = quote(data.table:::setDT(DT)), + expr = quote(data.table:::setattr(L, "class", NULL); data.table:::setDT(DT)), Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) From f9b11de66394ff67699e4cee04c3fb44ffe6ea47 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 23 Apr 2024 13:42:23 -0700 Subject: [PATCH 22/42] Update tests.R --- .ci/atime/tests.R | 1 + 1 file changed, 1 insertion(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 12256381de..73641f1f83 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -130,6 +130,7 @@ test.list <- list( N = 10^seq(1, 7), setup = quote({ L <- replicate(N, 1, simplify = FALSE) + setDT(L) }), expr = quote(data.table:::setattr(L, "class", NULL); data.table:::setDT(DT)), Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) From d0e2a97cb9cee137b8df6beedf06d58b17d75b23 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 23 Apr 2024 13:47:10 -0700 Subject: [PATCH 23/42] Update tests.R --- .ci/atime/tests.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 73641f1f83..a7492130e0 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -132,6 +132,9 @@ test.list <- list( L <- replicate(N, 1, simplify = FALSE) setDT(L) }), - expr = quote(data.table:::setattr(L, "class", NULL); data.table:::setDT(DT)), + expr = quote({ + data.table:::setattr(L, "class", NULL) + data.table:::setDT(L) + }), Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) From bc8243f99ab83ee86d8b4bcb317235eaa6fec6cc Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 23 Apr 2024 14:01:27 -0700 Subject: [PATCH 24/42] Close test.list --- .ci/atime/tests.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index a7492130e0..9e9bcfa1b9 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -129,8 +129,7 @@ test.list <- list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(1, 7), setup = quote({ - L <- replicate(N, 1, simplify = FALSE) - setDT(L) + L <- replicate(N, 1, simplify = FALSE) }), expr = quote({ data.table:::setattr(L, "class", NULL) @@ -138,3 +137,4 @@ test.list <- list( }), Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) +) From 50515bb06f3a0087bac6355ec75bfe789205014f Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 23 Apr 2024 14:28:23 -0700 Subject: [PATCH 25/42] Bring back the setDT call inside setup due to the irregular memory curve --- .ci/atime/tests.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 9e9bcfa1b9..8896606ca8 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -126,13 +126,14 @@ test.list <- list( # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 # To be fixed in: https://github.com/Rdatatable/data.table/pull/5427 "Test performance improvement implemented in #5427" = list( - pkg.edit.fun = pkg.edit.fun, + pkg.edit.fun = pkg.edit.fun, N = 10^seq(1, 7), - setup = quote({ - L <- replicate(N, 1, simplify = FALSE) + setup = quote({ + L <- replicate(N, 1, simplify = FALSE) + setDT(L) }), expr = quote({ - data.table:::setattr(L, "class", NULL) + data.table:::setattr(L, "class", NULL) data.table:::setDT(L) }), Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) From 5a1234a6a580290eb58ad35ff7380ef29fc39d19 Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 29 Apr 2024 13:38:13 -0700 Subject: [PATCH 26/42] Made the changes Toby introduced (switching to atime_test primarily) --- .ci/atime/tests.R | 88 +++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 52 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 8896606ca8..39b53992da 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -1,7 +1,7 @@ # A function to customize R package metadata and source files to facilitate version-specific installation and testing. # -# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) -# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. +# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) +# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. # It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately. # # @param old.Package Current name of the package. @@ -29,7 +29,7 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { Package_ <- gsub(".", "_", old.Package, fixed = TRUE) new.Package_ <- paste0(Package_, "_", sha) pkg_find_replace( - "DESCRIPTION", + "DESCRIPTION", paste0("Package:\\s+", old.Package), paste("Package:", new.Package)) pkg_find_replace( @@ -55,44 +55,45 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { } # A list of performance tests. -# +# # Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments: # - N: A numeric sequence of data sizes to vary. # - setup: An expression evaluated for every data size before measuring time/memory. -# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. +# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. # This must call a function from data.table using a syntax with double or triple colon prefix. -# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. +# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. # (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table) # # Optional parameters that may be useful to configure tests: # - times: Number of times each expression is evaluated (default is 10). -# - seconds.limit: The maximum median timing (in seconds) of an expression. No timings for larger N are computed past that threshold. -# - sha.vec: Named character vector or a list of vectors that specify data.table-specific commit SHAs for testing across those different git commit versions. -# For historical regressions, use 'Before', 'Regression', and 'Fixed' (otherwise something like 'Slow' or 'Fast' ideally). +# - seconds.limit: The maximum median timing (in seconds) of an expression. No timings for larger N are computed past that threshold. Default of 0.01 seconds should be sufficient for most tests. +# - Historical references (short version name = commit SHA1). +# For historical regressions, use version names 'Before', 'Regression', and 'Fixed' +# When there was no regression, use 'Slow' and 'Fast' # @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information. -test.list <- list( - # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 +# nolint start: undesirable_operator_linter. ':::' needed+appropriate here. +test.list <- atime::atime_test_list( + # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 - "Test performance regression fixed in #4440" = list( - pkg.edit.fun = pkg.edit.fun, + pkg.edit.fun = pkg.edit.fun, + "shallow regression fixed in #4440" = atime::atime_test( N = 10^seq(3,8), - setup = quote({ + setup = { set.seed(1L) dt <- data.table(a = sample.int(N)) setindexv(dt, "a") - }), - expr = quote(data.table:::shallow(dt)), + }, + expr = data.table:::shallow(dt), # Before = "", This needs to be updated later as there are two issues here: A) The source of regression (or the particular commit that led to it) is not clear; B) Older versions of data.table are having problems when being installed in this manner (This includes commits from before March 20 2020, when the issue that discovered or first mentioned the regression was created) Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) # Test based on: https://github.com/Rdatatable/data.table/issues/5424 # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 - # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 - "Test performance regression fixed in #5463" = list( - pkg.edit.fun = pkg.edit.fun, + # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 + "memrecycle regression fixed in #5463" = atime::atime_test( N = 10^seq(3, 8), - setup = quote({ + setup = { n <- N/100 set.seed(2L) dt <- data.table( @@ -100,42 +101,25 @@ test.list <- list( x = runif(N), key = "g") dt_mod <- copy(dt) - }), - expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), + }, + expr = data.table:::`[.data.table`(dt_mod, , N := .N, by = g), Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Fixed = "58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) - # Test based on https://github.com/Rdatatable/data.table/issues/4200 - # Performance regression fixed in: https://github.com/Rdatatable/data.table/pull/4558 - #"Test regression fixed in #4558" = list( - #pkg.edit.fun = pkg.edit.fun, - #N = 10^seq(1, 20), - #expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id)), - #setup = quote({ - # set.seed(108) - # d <- data.table( - # id = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), - # v1 = sample(5L, N, TRUE), - # v2 = sample(5L, N, TRUE)) - # }), - # Before = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", - # Regression = "c0052964694a4c618ab182aa474f924d40576d94", # Since regression was reported on Jan 26 2020 (https://github.com/Rdatatable/data.table/issues/4200) - # Fixed = "ba32f3cba38ec270587e395f6e6c26a80be36be6"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4558) - # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 # To be fixed in: https://github.com/Rdatatable/data.table/pull/5427 - "Test performance improvement implemented in #5427" = list( - pkg.edit.fun = pkg.edit.fun, - N = 10^seq(1, 7), - setup = quote({ - L <- replicate(N, 1, simplify = FALSE) - setDT(L) - }), - expr = quote({ - data.table:::setattr(L, "class", NULL) - data.table:::setDT(L) - }), - Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) - Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) + "setDT improved in #5427" = atime::atime_test( + N = 10^seq(1, 7), + setup = { + L <- replicate(N, 1, simplify = FALSE) + setDT(L) + }, + expr = { + data.table:::setattr(L, "class", NULL) + data.table:::setDT(L) + }, + Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) + Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) ) +# nolint end: undesirable_operator_linter. From fc6ff3362088f05612943bb84c38e2c3b9205f1e Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 29 Apr 2024 13:58:12 -0700 Subject: [PATCH 27/42] Checking if this works (to test the other changes) --- .ci/atime/tests.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 39b53992da..5ef1cd8278 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -75,8 +75,8 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { test.list <- atime::atime_test_list( # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 - pkg.edit.fun = pkg.edit.fun, "shallow regression fixed in #4440" = atime::atime_test( + pkg.edit.fun = pkg.edit.fun, N = 10^seq(3,8), setup = { set.seed(1L) @@ -92,6 +92,7 @@ test.list <- atime::atime_test_list( # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 "memrecycle regression fixed in #5463" = atime::atime_test( + pkg.edit.fun = pkg.edit.fun, N = 10^seq(3, 8), setup = { n <- N/100 @@ -110,6 +111,7 @@ test.list <- atime::atime_test_list( # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 # To be fixed in: https://github.com/Rdatatable/data.table/pull/5427 "setDT improved in #5427" = atime::atime_test( + pkg.edit.fun = pkg.edit.fun, N = 10^seq(1, 7), setup = { L <- replicate(N, 1, simplify = FALSE) From 6817a94e098e08d5611a227dc7620a5bf46f0f44 Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 29 Apr 2024 14:12:52 -0700 Subject: [PATCH 28/42] Update tests.R --- .ci/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 5ef1cd8278..3a09105f96 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -72,7 +72,7 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { # When there was no regression, use 'Slow' and 'Fast' # @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information. # nolint start: undesirable_operator_linter. ':::' needed+appropriate here. -test.list <- atime::atime_test_list( +test.list <- list( # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 "shallow regression fixed in #4440" = atime::atime_test( From e02c93516fc8aa42373ce1f6d9dfe14610ea7fe1 Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 29 Apr 2024 14:39:31 -0700 Subject: [PATCH 29/42] Update tests.R --- .ci/atime/tests.R | 73 ++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 49 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 3a09105f96..675e65257a 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -1,27 +1,25 @@ -# A function to customize R package metadata and source files to facilitate version-specific installation and testing. -# -# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) -# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. -# It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately. -# -# @param old.Package Current name of the package. -# @param new.Package New name of the package, including a SHA hash. -# @param sha SHA1 hash used for differentiating versions. -# @param new.pkg.path Path to the package files. -# -# @details -# The function modifies: -# - DESCRIPTION, updating the package name. -# - Makevars, customizing the shared object file name and adjusting the build settings. -# - R/onLoad.R, adapting custom version checking for package loading operations. -# - NAMESPACE, changing namespace settings for dynamic linking. +# A list of performance tests. # -# @examples -# pkg.edit.fun("data.table", "data.table.some_SHA1_hash", "some_SHA1_hash", "/path/to/data.table") +# Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments: +# - N: A numeric sequence of data sizes to vary. +# - setup: An expression evaluated for every data size before measuring time/memory. +# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. +# This must call a function from data.table using a syntax with double or triple colon prefix. +# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. +# (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table) # -# @return None (performs in-place file modifications) -# @note This setup is typically unnecessary for most packages but essential for data.table due to its unique configuration. -pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { +# Optional parameters that may be useful to configure tests: +# - times: Number of times each expression is evaluated (default is 10). +# - seconds.limit: The maximum median timing (in seconds) of an expression. No timings for larger N are computed past that threshold. Default of 0.01 seconds should be sufficient for most tests. +# - Historical references (short version name = commit SHA1). +# For historical regressions, use version names 'Before', 'Regression', and 'Fixed' +# When there was no regression, use 'Slow' and 'Fast' +# @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information. +# nolint start: undesirable_operator_linter. ':::' needed+appropriate here. +test.list <- atime::atime_test_list( + # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 + # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 + pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { pkg_find_replace <- function(glob, FIND, REPLACE) { atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE) } @@ -52,31 +50,9 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { "NAMESPACE", sprintf('useDynLib\\("?%s"?', Package_regex), paste0('useDynLib(', new.Package_)) - } - -# A list of performance tests. -# -# Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments: -# - N: A numeric sequence of data sizes to vary. -# - setup: An expression evaluated for every data size before measuring time/memory. -# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. -# This must call a function from data.table using a syntax with double or triple colon prefix. -# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. -# (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table) -# -# Optional parameters that may be useful to configure tests: -# - times: Number of times each expression is evaluated (default is 10). -# - seconds.limit: The maximum median timing (in seconds) of an expression. No timings for larger N are computed past that threshold. Default of 0.01 seconds should be sufficient for most tests. -# - Historical references (short version name = commit SHA1). -# For historical regressions, use version names 'Before', 'Regression', and 'Fixed' -# When there was no regression, use 'Slow' and 'Fast' -# @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information. -# nolint start: undesirable_operator_linter. ':::' needed+appropriate here. -test.list <- list( - # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 - # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 + }, "shallow regression fixed in #4440" = atime::atime_test( - pkg.edit.fun = pkg.edit.fun, + pkg.edit.fun = pkg.edit.fun, N = 10^seq(3,8), setup = { set.seed(1L) @@ -92,7 +68,7 @@ test.list <- list( # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 "memrecycle regression fixed in #5463" = atime::atime_test( - pkg.edit.fun = pkg.edit.fun, + pkg.edit.fun = pkg.edit.fun, N = 10^seq(3, 8), setup = { n <- N/100 @@ -111,7 +87,7 @@ test.list <- list( # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 # To be fixed in: https://github.com/Rdatatable/data.table/pull/5427 "setDT improved in #5427" = atime::atime_test( - pkg.edit.fun = pkg.edit.fun, + pkg.edit.fun = pkg.edit.fun, N = 10^seq(1, 7), setup = { L <- replicate(N, 1, simplify = FALSE) @@ -124,4 +100,3 @@ test.list <- list( Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) ) -# nolint end: undesirable_operator_linter. From e2171cee4d5fb7507870c8d31681a55adc9b7616 Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 29 Apr 2024 15:12:04 -0700 Subject: [PATCH 30/42] Update tests.R --- .ci/atime/tests.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 675e65257a..7efc3be847 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -52,7 +52,6 @@ test.list <- atime::atime_test_list( paste0('useDynLib(', new.Package_)) }, "shallow regression fixed in #4440" = atime::atime_test( - pkg.edit.fun = pkg.edit.fun, N = 10^seq(3,8), setup = { set.seed(1L) @@ -68,7 +67,6 @@ test.list <- atime::atime_test_list( # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 "memrecycle regression fixed in #5463" = atime::atime_test( - pkg.edit.fun = pkg.edit.fun, N = 10^seq(3, 8), setup = { n <- N/100 @@ -87,7 +85,6 @@ test.list <- atime::atime_test_list( # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 # To be fixed in: https://github.com/Rdatatable/data.table/pull/5427 "setDT improved in #5427" = atime::atime_test( - pkg.edit.fun = pkg.edit.fun, N = 10^seq(1, 7), setup = { L <- replicate(N, 1, simplify = FALSE) From 27ade595e065af43a6bce9aca4783f265e092b61 Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 29 Apr 2024 15:35:04 -0700 Subject: [PATCH 31/42] Update tests.R --- .ci/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 7efc3be847..d62fbd8e6d 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -95,5 +95,5 @@ test.list <- atime::atime_test_list( data.table:::setDT(L) }, Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) - Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) + Fast = "af48a805e7a5026a0c2d0a7fd9b587fea5cfa3c4") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) ) From 2738ce597f4e016a1b59ba08ff9b5cb49cbb160d Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 29 Apr 2024 15:39:32 -0700 Subject: [PATCH 32/42] Update tests.R --- .ci/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index d62fbd8e6d..35fa5d7524 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -95,5 +95,5 @@ test.list <- atime::atime_test_list( data.table:::setDT(L) }, Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) - Fast = "af48a805e7a5026a0c2d0a7fd9b587fea5cfa3c4") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) + Fast = "2487c61656335764980e478c323f7e6ce4e6d4ca") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) ) From c57ca22313546c06df5fca280545715ee8dfdb86 Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 29 Apr 2024 15:59:42 -0700 Subject: [PATCH 33/42] Update tests.R --- .ci/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 35fa5d7524..bbbdaa3dee 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -95,5 +95,5 @@ test.list <- atime::atime_test_list( data.table:::setDT(L) }, Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) - Fast = "2487c61656335764980e478c323f7e6ce4e6d4ca") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) + Fast = "2487c61656335764980e478c323f7e6ce4e6d4ca") # Merge commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427) ) From be51db1684bd56dd446d26181720adc4243de4a8 Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 29 Apr 2024 16:23:43 -0700 Subject: [PATCH 34/42] Makes sense why the previous runs failed (setdt-wide here exists but only uptil the 'correct comment' commit since this is an old fork). Should work now --- .ci/atime/tests.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index bbbdaa3dee..5107cd83c1 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -95,5 +95,6 @@ test.list <- atime::atime_test_list( data.table:::setDT(L) }, Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) - Fast = "2487c61656335764980e478c323f7e6ce4e6d4ca") # Merge commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427) + Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) for my fork here. + # Merge commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427) # <-- This too would probably work but in my fork I don't have any commits beyond 1872f473b20fdcddc5c1b35d79fe9229cd9a1d15 for that branch, ) From 95433cb27fa85489f6ee13647f84969c451a29b7 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 28 May 2024 14:16:14 -0700 Subject: [PATCH 35/42] Update tests.R --- .ci/atime/tests.R | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 5107cd83c1..970bff65b2 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -95,6 +95,22 @@ test.list <- atime::atime_test_list( data.table:::setDT(L) }, Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801) - Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) for my fork here. + Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15"), # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) for my fork here. # Merge commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427) # <-- This too would probably work but in my fork I don't have any commits beyond 1872f473b20fdcddc5c1b35d79fe9229cd9a1d15 for that branch, + + # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 + # To be fixed in: https://github.com/Rdatatable/data.table/pull/5427 + "[1, 2]" = atime::atime_test( + N = 10^seq(1, 4), + setup = { + set.seed(123L) + dt <- data.table( + id = seq_len(N), + val = rnorm(N)) + dt + }, + expr = data.table:::`[.data.table`(dt, , .(vs = (sum(val))), by = .(id)), + "Before"="be2f72e6f5c90622fe72e1c315ca05769a9dc854", + "Regression"="e793f53466d99f86e70fc2611b708ae8c601a451", + "Fixed"="58409197426ced4714af842650b0cc3b9e2cb842") ) From 4b386b75ee0dd3e3423dd8a2ca68314c9d573b45 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 16 Jul 2024 16:43:51 -0700 Subject: [PATCH 36/42] Update tests.R --- .ci/atime/tests.R | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 970bff65b2..1f121fe829 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -98,19 +98,13 @@ test.list <- atime::atime_test_list( Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15"), # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits) for my fork here. # Merge commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427) # <-- This too would probably work but in my fork I don't have any commits beyond 1872f473b20fdcddc5c1b35d79fe9229cd9a1d15 for that branch, - # Issue reported in: https://github.com/Rdatatable/data.table/issues/5426 - # To be fixed in: https://github.com/Rdatatable/data.table/pull/5427 - "[1, 2]" = atime::atime_test( - N = 10^seq(1, 4), - setup = { - set.seed(123L) - dt <- data.table( - id = seq_len(N), - val = rnorm(N)) - dt + # Issue reported in: https://github.com/Rdatatable/data.table/issues/6286 + "by verbose arg" = atime::atime_test( + N = 10^seq(1, 7), + setup = { + dt = data.table(a = 1:N) }, - expr = data.table:::`[.data.table`(dt, , .(vs = (sum(val))), by = .(id)), - "Before"="be2f72e6f5c90622fe72e1c315ca05769a9dc854", - "Regression"="e793f53466d99f86e70fc2611b708ae8c601a451", - "Fixed"="58409197426ced4714af842650b0cc3b9e2cb842") + expr = system.time(copy(dt)[, 1, data.table:::by = a, verbose = TRUE]), + "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e", + "Fast" = "aa75d79376478b3e8f80fd6f31dcf53be8bf3404") ) From 051f9404f4a212cd8bf6ee2d029db518329987a6 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 16 Jul 2024 17:15:24 -0700 Subject: [PATCH 37/42] Update tests.R --- .ci/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index e587ef68c8..b24c37b220 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -103,7 +103,7 @@ test.list <- atime::atime_test_list( setup = { dt = data.table(a = 1:N) }, - expr = system.time(copy(dt)[, 1, data.table:::by = a, verbose = TRUE]), + expr = system.time(copy(dt)[, 1, by = a, verbose = TRUE]), "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e", "Fast" = "aa75d79376478b3e8f80fd6f31dcf53be8bf3404") ) From fb990b013e91509056b974e8279c6282e236c877 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 16 Jul 2024 20:14:04 -0700 Subject: [PATCH 38/42] Update tests.R --- .ci/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index b24c37b220..1f733d8f95 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -103,7 +103,7 @@ test.list <- atime::atime_test_list( setup = { dt = data.table(a = 1:N) }, - expr = system.time(copy(dt)[, 1, by = a, verbose = TRUE]), + expr = system.time(data.table:::copy(dt)[, 1, by = a, verbose = TRUE]), "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e", "Fast" = "aa75d79376478b3e8f80fd6f31dcf53be8bf3404") ) From c8c10830879b921f7b8259ba50a3857981816816 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 11:49:56 -0700 Subject: [PATCH 39/42] Update tests.R --- .ci/atime/tests.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 1f733d8f95..bd1772dcce 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -99,11 +99,11 @@ test.list <- atime::atime_test_list( # Issue reported in: https://github.com/Rdatatable/data.table/issues/6286 "by verbose arg" = atime::atime_test( - N = 10^seq(1, 7), + N = 10^seq(6, 7), setup = { dt = data.table(a = 1:N) }, - expr = system.time(data.table:::copy(dt)[, 1, by = a, verbose = TRUE]), + expr = data.table:::copy(dt)[, 1, by = a, verbose = TRUE], "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e", "Fast" = "aa75d79376478b3e8f80fd6f31dcf53be8bf3404") ) From 168389ef8059499574195d2f1b372d2c28f753be Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 12:11:44 -0700 Subject: [PATCH 40/42] Synced master branch for git revparse to be able to find the commits. From e931f865b37c8ed8feb0470ea683be44ced10ab8 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 13:43:14 -0700 Subject: [PATCH 41/42] Update tests.R --- .ci/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index bd1772dcce..893c2b57b4 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -105,5 +105,5 @@ test.list <- atime::atime_test_list( }, expr = data.table:::copy(dt)[, 1, by = a, verbose = TRUE], "Slow" = "a01f00f7438daf4612280d6886e6929fa8c8f76e", - "Fast" = "aa75d79376478b3e8f80fd6f31dcf53be8bf3404") + "Fast" = "cd497408bb4dc6650d871b5076e738420ff431d7") ) From d80b6d71fd4f4747e764bbcc89f44d43619aac7b Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 17 Jul 2024 17:40:16 -0700 Subject: [PATCH 42/42] Update tests.R --- .ci/atime/tests.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 893c2b57b4..816a7621eb 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -51,7 +51,7 @@ test.list <- atime::atime_test_list( }, "shallow regression fixed in #4440" = atime::atime_test( - N = 10^seq(3,8), + N = 10^seq(3, 8), setup = { set.seed(1L) dt <- data.table(a = sample.int(N)) @@ -98,8 +98,8 @@ test.list <- atime::atime_test_list( # Merge commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427) # <-- This too would probably work but in my fork I don't have any commits beyond 1872f473b20fdcddc5c1b35d79fe9229cd9a1d15 for that branch, # Issue reported in: https://github.com/Rdatatable/data.table/issues/6286 - "by verbose arg" = atime::atime_test( - N = 10^seq(6, 7), + "by with verbose = TRUE" = atime::atime_test( + N = 10^seq(1, 8), setup = { dt = data.table(a = 1:N) },