From 51cdcd5abef72a592f8bb2d86c853d632aaa7d99 Mon Sep 17 00:00:00 2001
From: Brice Dutheil <brice.dutheil@gmail.com>
Date: Wed, 11 Jun 2025 17:41:46 +0200
Subject: [PATCH 1/9] feat(ci): PR performance gate

---
 .gitlab/benchmarks.yml                        | 26 +++++++++++++++++++
 .../bp-runner.fail-on-regression.yml          |  7 +++++
 2 files changed, 33 insertions(+)
 create mode 100644 .gitlab/benchmarks/bp-runner.fail-on-regression.yml

diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml
index 8b621365070..17f149d7fc8 100644
--- a/.gitlab/benchmarks.yml
+++ b/.gitlab/benchmarks.yml
@@ -68,6 +68,32 @@ benchmarks-post-results:
     - job: benchmarks-dacapo
       artifacts: true
 
+check-big-regressions:
+  extends: .benchmarks
+  needs: [ benchmarks-startup, benchmarks-load, benchmarks-dacapo ]
+  when: on_success
+  tags: ["arch:amd64"]
+  # ARTIFACTS_DIR /go/src/github.com/DataDog/apm-reliability/dd-trace-java/reports/
+  # need to convert them
+  script:
+    - !reference [ .benchmarks, script ]
+    - echo $(pwd)
+    - | 
+      for benchmarkType in startup load; do
+          find "$ARTIFACTS_DIR/$benchmarkType" -name "benchmark-baseline.json" -o -name "benchmark-candidate.json" | while read file; do
+            relpath="${file#$ARTIFACTS_DIR/$benchmarkType/}"
+            prefix="${relpath%/benchmark-*}" # Remove the trailing /benchmark-(baseline|candidate).json
+            prefix="${prefix#./}" # Remove any leading ./
+            prefix="${prefix//\//-}" # Replace / with -
+            case "$file" in
+              *benchmark-baseline.json) type="baseline" ;;
+              *benchmark-candidate.json) type="candidate" ;;
+            esac
+            cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.json"
+          done
+      done
+    - bp-runner $ARTIFACTS_DIR/../.gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
+
 .dsm-kafka-benchmarks:
   stage: benchmarks
   rules:
diff --git a/.gitlab/benchmarks/bp-runner.fail-on-regression.yml b/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
new file mode 100644
index 00000000000..fc63ef88e96
--- /dev/null
+++ b/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
@@ -0,0 +1,7 @@
+experiments:
+  - name: Run regression check
+    steps:
+      - name: Regression Check
+        run: fail_on_regression
+        # Applies on all scenarios
+        regression_threshold: 20.0 # percents

From 1e75a62c7fa0d41bbe90d603338bb1a76caca24a Mon Sep 17 00:00:00 2001
From: Brice Dutheil <brice.dutheil@gmail.com>
Date: Thu, 12 Jun 2025 12:17:03 +0200
Subject: [PATCH 2/9] chore(ci): Review comments

---
 .gitlab/benchmarks.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml
index 17f149d7fc8..3245295ba13 100644
--- a/.gitlab/benchmarks.yml
+++ b/.gitlab/benchmarks.yml
@@ -77,7 +77,6 @@ check-big-regressions:
   # need to convert them
   script:
     - !reference [ .benchmarks, script ]
-    - echo $(pwd)
     - | 
       for benchmarkType in startup load; do
           find "$ARTIFACTS_DIR/$benchmarkType" -name "benchmark-baseline.json" -o -name "benchmark-candidate.json" | while read file; do
@@ -89,10 +88,11 @@ check-big-regressions:
               *benchmark-baseline.json) type="baseline" ;;
               *benchmark-candidate.json) type="candidate" ;;
             esac
-            cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.json"
+            echo "Moving $file to $ARTIFACTS_DIR/${type}-${prefix}.converted.json"
+            cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.converted.json"
           done
       done
-    - bp-runner $ARTIFACTS_DIR/../.gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
+    - bp-runner .gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
 
 .dsm-kafka-benchmarks:
   stage: benchmarks

From 109573f83ca1616f6c790d6c86dbd6bb6df22421 Mon Sep 17 00:00:00 2001
From: Brice Dutheil <brice.dutheil@gmail.com>
Date: Thu, 12 Jun 2025 12:29:47 +0200
Subject: [PATCH 3/9] feat(ci): Explicitly rely on job artifacts, drop dacapo

---
 .gitlab/benchmarks.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml
index 3245295ba13..9adadcdee36 100644
--- a/.gitlab/benchmarks.yml
+++ b/.gitlab/benchmarks.yml
@@ -70,7 +70,11 @@ benchmarks-post-results:
 
 check-big-regressions:
   extends: .benchmarks
-  needs: [ benchmarks-startup, benchmarks-load, benchmarks-dacapo ]
+  needs:
+    - job: benchmarks-startup
+      artifacts: true
+    - job: benchmarks-load
+      artifacts: true
   when: on_success
   tags: ["arch:amd64"]
   # ARTIFACTS_DIR /go/src/github.com/DataDog/apm-reliability/dd-trace-java/reports/

From 3f56fdfe24a69b0a19fcd8ee9d028ff875242d35 Mon Sep 17 00:00:00 2001
From: Brice Dutheil <brice.dutheil@gmail.com>
Date: Thu, 12 Jun 2025 14:27:01 +0200
Subject: [PATCH 4/9] fix(ci): revert removal of ARTIFACTSDIR for
 bp-runner.fail-on-regression.yml path

---
 .gitlab/benchmarks.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml
index 9adadcdee36..f1cb259fb26 100644
--- a/.gitlab/benchmarks.yml
+++ b/.gitlab/benchmarks.yml
@@ -96,7 +96,7 @@ check-big-regressions:
             cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.converted.json"
           done
       done
-    - bp-runner .gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
+    - bp-runner $ARTIFACTS_DIR/../.gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
 
 .dsm-kafka-benchmarks:
   stage: benchmarks

From 52b5b58ad62433e0f9b7bcbc5f6d891a84c2da37 Mon Sep 17 00:00:00 2001
From: Brice Dutheil <brice.dutheil@gmail.com>
Date: Thu, 12 Jun 2025 14:50:49 +0200
Subject: [PATCH 5/9] chore(ci): replace ARTIFACTSDIR by CI_PROJECT_DIR for
 bp-runner.fail-on-regression.yml path

---
 .gitlab/benchmarks.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml
index f1cb259fb26..3f6716fd67b 100644
--- a/.gitlab/benchmarks.yml
+++ b/.gitlab/benchmarks.yml
@@ -96,7 +96,7 @@ check-big-regressions:
             cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.converted.json"
           done
       done
-    - bp-runner $ARTIFACTS_DIR/../.gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
+    - bp-runner $CI_PROJECT_DIR/.gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
 
 .dsm-kafka-benchmarks:
   stage: benchmarks

From 4018257611bdbc037d167934ecaddfd84f9d7b56 Mon Sep 17 00:00:00 2001
From: Brice Dutheil <brice.dutheil@gmail.com>
Date: Thu, 12 Jun 2025 16:43:26 +0200
Subject: [PATCH 6/9] chore(ci): Move threshold to 10%

---
 .gitlab/benchmarks/bp-runner.fail-on-regression.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab/benchmarks/bp-runner.fail-on-regression.yml b/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
index fc63ef88e96..e5c43e8194f 100644
--- a/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
+++ b/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
@@ -4,4 +4,4 @@ experiments:
       - name: Regression Check
         run: fail_on_regression
         # Applies on all scenarios
-        regression_threshold: 20.0 # percents
+        regression_threshold: 10.0 # percents

From e6af2699b30e56486b790485f9e682e310c549d0 Mon Sep 17 00:00:00 2001
From: Brice Dutheil <brice.dutheil@gmail.com>
Date: Fri, 13 Jun 2025 17:58:25 +0200
Subject: [PATCH 7/9] chore(ci): Allow failure on PR gate to exercise it

---
 .gitlab/benchmarks.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml
index 3f6716fd67b..c29abff07eb 100644
--- a/.gitlab/benchmarks.yml
+++ b/.gitlab/benchmarks.yml
@@ -77,6 +77,7 @@ check-big-regressions:
       artifacts: true
   when: on_success
   tags: ["arch:amd64"]
+  allow_failure: true # Exercise the job before making it mandatory
   # ARTIFACTS_DIR /go/src/github.com/DataDog/apm-reliability/dd-trace-java/reports/
   # need to convert them
   script:

From 5bf274c47308066a15a3a4c46c0eb4aa9b0215ea Mon Sep 17 00:00:00 2001
From: Brice Dutheil <brice.dutheil@gmail.com>
Date: Wed, 18 Jun 2025 17:08:36 +0200
Subject: [PATCH 8/9] chore(ci): Disable failure but increase regression
 threshold (to be refined)

---
 .gitlab/benchmarks.yml                              | 1 -
 .gitlab/benchmarks/bp-runner.fail-on-regression.yml | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml
index c29abff07eb..3f6716fd67b 100644
--- a/.gitlab/benchmarks.yml
+++ b/.gitlab/benchmarks.yml
@@ -77,7 +77,6 @@ check-big-regressions:
       artifacts: true
   when: on_success
   tags: ["arch:amd64"]
-  allow_failure: true # Exercise the job before making it mandatory
   # ARTIFACTS_DIR /go/src/github.com/DataDog/apm-reliability/dd-trace-java/reports/
   # need to convert them
   script:
diff --git a/.gitlab/benchmarks/bp-runner.fail-on-regression.yml b/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
index e5c43e8194f..fc63ef88e96 100644
--- a/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
+++ b/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
@@ -4,4 +4,4 @@ experiments:
       - name: Regression Check
         run: fail_on_regression
         # Applies on all scenarios
-        regression_threshold: 10.0 # percents
+        regression_threshold: 20.0 # percents

From 333b53aa0407febe06157f6517daff9e9f777bf5 Mon Sep 17 00:00:00 2001
From: Brice Dutheil <brice.dutheil@gmail.com>
Date: Wed, 25 Jun 2025 16:43:15 +0200
Subject: [PATCH 9/9] chore(ci): Choose 10% as threshold.

20% was chosen due to variability of the results on unrelated changes.

After discussion in lp-sync, 20% won't get us to move forward and entice to fix the problems. So instead choose a closer threshold to 10%.
---
 .gitlab/benchmarks/bp-runner.fail-on-regression.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab/benchmarks/bp-runner.fail-on-regression.yml b/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
index fc63ef88e96..e5c43e8194f 100644
--- a/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
+++ b/.gitlab/benchmarks/bp-runner.fail-on-regression.yml
@@ -4,4 +4,4 @@ experiments:
       - name: Regression Check
         run: fail_on_regression
         # Applies on all scenarios
-        regression_threshold: 20.0 # percents
+        regression_threshold: 10.0 # percents