NNPDF · Radonirinaunimi · Mar 11, 2025 · Feb 21, 2025 · Feb 21, 2025 · Feb 21, 2025
diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
@@ -0,0 +1,54 @@
+name: regression
+
+# start job only for PRs when a label is added.
+on:
+  pull_request:
+    types: [labeled]
+
+jobs:
+  regresstion:
+    if: contains(github.event.pull_request.labels.*.name, 'run-regression')
+    name: regression
+    runs-on: pineko-stbc3
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          # tags needed for dynamic versioning
+          fetch-depth: 0
+      - name: Install and configure Poetry
+        uses: snok/install-poetry@v1
+        with:
+          virtualenvs-create: false
+          installer-parallel: true
+      - name: Install dependencies 🐍
+        run: poetry install --no-interaction --no-root --with test -E nnpdf
+      - name: Install project 🐍
+        # it is required to repeat extras, otherwise they will be removed from
+        # the environment
+        run: poetry install --no-interaction -E nnpdf --with test
+      - name: Get data files 📦
+        id: cache-data-files
+        uses: actions/cache@v4
+        with:
+          path: theory_productions
+          key: theory_productions-v6
+      - name: Download data files 📦
+        if: steps.cache-data_files.outputs.cache-hit != 'true'
+        run: |
+          sh download_test_data.sh
+      - name: Restore cached numba compile code 📮
+        id: cache-numba
+        uses: actions/cache@v4
+        with:
+          path: src/pineko/__pycache__
+          key: numba-cache-${{ runner.os }}-${{ hashFiles('**/*.py') }}
+          restore-keys: numba-cache-${{ runner.os }}-
+      - name: Generate FK table predictions and perform regression tests 💣
+        run: |
+          sh regression_check.sh
+      - name: Save updated numba cache 📮
+        uses: actions/cache@v4
+        with:
+          path: src/pineko/__pycache__
+          key: numba-cache-${{ runner.os }}-${{ hashFiles('**/*.py') }}
diff --git a/download_test_data.sh b/download_test_data.sh
@@ -1,3 +1,4 @@
 #!/bin/bash
+wget -r -np -nH --cut-dirs=1 -l 4  -e robots=off --no-verbose -R index.* https://data.nnpdf.science/pineko/theory_productions/
 wget -r -np -nH --cut-dirs=1 -l 4  -e robots=off --no-verbose -P benchmarks -R index.* https://data.nnpdf.science/pineko/data_files/
 wget -r -np -nH --cut-dirs=1 -l 4  -e robots=off --no-verbose -P benchmarks -R index.* https://data.nnpdf.science/pineko/fakepdfs/
diff --git a/pineko.ci.toml b/pineko.ci.toml
@@ -0,0 +1,15 @@
+[general]
+nnpdf=true
+
+[paths]
+# inputs
+grids = "./theory_productions/data/grids"
+operator_card_template_name = "_template.ci.yaml"
+# outputs
+operator_cards = "./theory_productions/operator_cards"
+ekos = "./theory_productions/data/ekos"
+fktables = "./theory_productions/data/fktables"
+
+[paths.logs]
+eko = "./theory_productions/logs/eko"
+fk = "./theory_productions/logs/fk"
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,7 +35,7 @@ pandas = "^2.1"
 rich = "^12.5.1"
 click = "^8.0.4"
 tomli = "^2.0.1"
-nnpdf-data = { version = "*", optional = true}
+nnpdf-data = { version = ">=0.0.3", optional = true}
 
 [tool.poetry.group.docs]
 optional = true
@@ -56,6 +56,7 @@ pytest-cov = "^4.0.0"
 pytest-env = "^0.6.2"
 pylint = "^3.1.0"
 banana-hep = "^0.6.13"
+pineappl-cli = "^0.8.7"
 
 [tool.poetry.group.dev.dependencies]
 pdbpp = "^0.10.3"

diff --git a/regression_check.sh b/regression_check.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+
+set -euo pipefail
+
+THEORY_ID=40008005 # NNLO QCD with EXA
+PDF_NAME="NNPDF40_nnlo_as_01180"
+
+POLARIZED_THEORY_ID=41100010  # NLO QCD⊗EWK with TRN
+POLARIZED_POLPDF_NAME="NNPDFpol20_nlo_as_01180"
+POLARIZED_UNPOLPDF_NAME="NNPDF40_nlo_pch_as_01180"
+
+LIST_DIS_DATASETS=(
+  "HERA_CC_318GEV_EP-SIGMARED"
+  "NNPDF_POS_2P24GEV_F2D"
+)
+
+LIST_HADRONIC_DATASETS=(
+  "ATLAS_Z0_7TEV_36PB_ETA"
+  "LHCB_WPWM_8TEV_MUON_Y"
+  "ATLAS_SINGLETOP_8TEV_T-RAP-NORM"
+)
+
+LIST_POLARIZED_HADRONIC_DATASETS=(
+  "STAR_WMWP_510GEV_WP-AL"
+)
+
+get_pdf_combinations() {
+  OBJECTNAME=$1
+
+  # Define the combination of PDF sets depending on the types
+  if [[ "$OBJECTNAME" == *"-POL"* ]]; then
+    PDFSETNAMES="$POLARIZED_POLPDF_NAME $POLARIZED_UNPOLPDF_NAME"
+  elif [[ "$OBJECTNAME" == *"-UNPOL"* ]]; then
+    PDFSETNAMES="$POLARIZED_UNPOLPDF_NAME"
+  else
+    PDFSETNAMES="$PDF_NAME"  # Fall to the NNPDF4.0 unpolarized set
+  fi
+  echo "$PDFSETNAMES"
+}
+
+compare_fks_with_grids() {
+  THEORYID=$1
+
+  # Compare the Hadronic FK tables with the Grids
+  grids=(theory_productions/data/grids/"$THEORYID"/*.pineappl.lz4)
+  for gridpath in "${grids[@]}"; do
+    gridname=$(basename "$gridpath")
+    PDFSETNAMES=$(get_pdf_combinations "$gridname")
+    pineko compare ./theory_productions/data/fktables/"$THEORYID"/"$gridname" \
+      ./theory_productions/data/grids/"$THEORYID"/"$gridname" 3 0 \
+      $PDFSETNAMES --threshold 2 # set threshold to 2 permille
+  done
+}
+
+compare_fktables() {
+  REFERED_FK=$1
+  CURRENT_FK=$2
+  PDFSETNAMES=$3
+
+  # Extract the predictions - the last column
+  diffs=($(pineappl diff $REFERED_FK $CURRENT_FK "$PDFSETNAMES" | awk 'NR>2 {print $NF}'))
+
+  preds_length=${#diffs[@]} # Get the length of the predictions
+  for ((bin=0; bin<preds_length; bin++)); do
+    pred_value=${diffs[bin]}
+    value=$(printf "%.16f" "$pred_value") # Make sure it is in float representation
+    # https://www.shell-tips.com/bash/math-arithmetic-calculation/#gsc.tab=0
+    abs_diff=$(echo "scale=10; if ($value< 0) -($value) else $value" | bc)
+    check_diff=$(echo "$abs_diff > 0.001" | bc) # Set threshold to 1 permille
+
+    if [[ $check_diff -eq 1 ]]; then
+      echo "Bin $bin: ($REFERED_FK) and ($CURRENT_FK) differ more than 1 permille."
+      exit 1
+    fi
+  done
+}
+
+compare_fks_with_reference() {
+  THEORYID=$1
+
+  fktables=(./theory_productions/data/fktables/"$THEORYID"/*.pineappl.lz4)
+  for fktable_path in "${fktables[@]}"; do
+    fkname=$(basename "$fktable_path")
+    PDFSETNAMES=$(get_pdf_combinations "$fkname")
+    PDFSETNAMES=$(echo "$PDFSETNAMES" | sed 's/ /+p,/g')
+    fkref="./theory_productions/data/fktables/$THEORYID/$fkname"
+    fkcur="./theory_productions/reference_fks/$THEORYID/$fkname"
+    compare_fktables "$fkref" "$fkcur" "$PDFSETNAMES"
+  done
+}
+
+dis_predictions() {
+  THEORYID=$1
+  NFONLL_ID=$(($THEORYID*100))
+
+  for dataset in "${LIST_DIS_DATASETS[@]}"; do
+    pineko fonll -c pineko.ci.toml tcards $THEORYID
+    pineko fonll -c pineko.ci.toml ekos --overwrite $THEORYID $dataset
+    pineko fonll -c pineko.ci.toml fks --overwrite $THEORYID $dataset
+    pineko fonll -c pineko.ci.toml combine --overwrite $THEORYID $dataset \
+      --FFNS3 $NFONLL_ID \
+      --FFN03 $(($NFONLL_ID+1)) \
+      --FFNS4zeromass $(($NFONLL_ID+2)) \
+      --FFNS4massive $(($NFONLL_ID+3)) \
+      --FFN04 $(($NFONLL_ID+4)) \
+      --FFNS5zeromass $(($NFONLL_ID+5)) \
+      --FFNS5massive $(($NFONLL_ID+6))
+  done
+}
+
+hadronic_predictions() {
+  THEORYID=$1
+  LIST_DATASETS=$2
+
+  IFS='|' read -r -a ARRAY_DATASETS <<< "$LIST_DATASETS"
+  for dataset in "${ARRAY_DATASETS[@]}"; do
+    pineko theory -c pineko.ci.toml opcards --overwrite $THEORYID $dataset
+    pineko theory -c pineko.ci.toml ekos --overwrite $THEORYID $dataset
+    pineko theory -c pineko.ci.toml fks --overwrite $THEORYID $dataset
+  done
+
+  compare_fks_with_grids $THEORYID
+}
+
+# Expand the hadronic datasets
+LIST_HADRONIC_DATA=$(IFS='|'; echo "${LIST_HADRONIC_DATASETS[*]}")
+LIST_POLARIZED_DATA=$(IFS='|'; echo "${LIST_POLARIZED_HADRONIC_DATASETS[*]}")
+
+# Unpolarized runs
+dis_predictions $THEORY_ID
+hadronic_predictions $THEORY_ID "$LIST_HADRONIC_DATA"
+compare_fks_with_reference $THEORY_ID
+
+# Polarized runs with multiple convolutions
+hadronic_predictions $POLARIZED_THEORY_ID "$LIST_POLARIZED_DATA"
+compare_fks_with_reference $POLARIZED_THEORY_ID
diff --git a/src/pineko/cli/compare.py b/src/pineko/cli/compare.py
@@ -16,7 +16,10 @@
 @click.argument("pdfs", type=click.STRING, nargs=-1)
 @click.option("--xir", default=1.0, help="renormalization scale variation")
 @click.option("--xif", default=1.0, help="factorization scale variation")
-def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif):
+@click.option(
+    "--threshold", default=5.0, help="threshold in permille to accept Grid -> FK"
+)
+def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif, threshold):
     """Compare process level PineAPPL grid and derived FK Table.
 
     The comparison between the grid stored at PINEAPPL_PATH, and the FK table
@@ -40,5 +43,7 @@ def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif):
     pdf2 = pdfs[1] if len(pdfs) == 2 else None
     # Note that we need to cast to string before printing to avoid ellipsis ...
     rich.print(
-        comparator.compare(pine, fk, max_as, max_al, pdf1, xir, xif, pdf2).to_string()
+        comparator.compare(
+            pine, fk, max_as, max_al, pdf1, xir, xif, threshold, pdf2
+        ).to_string()
     )
diff --git a/src/pineko/comparator.py b/src/pineko/comparator.py
@@ -6,7 +6,11 @@
 import rich
 
 
-def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None):
+class GridtoFKError(Exception):
+    """Raised when the difference between the Grid and FK table is above some threshold."""
+
+
+def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, threshold=5.0, pdf2=None):
     """Build comparison table.
 
     Parameters
@@ -25,6 +29,9 @@ def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None):
         renormalization scale variation
     xif : float
         factorization scale variation
+    threshold: float
+        check if the difference between the Grid and FK table is above the
+        threshold then raise an error
     pdf2: str or None
         PDF set for the second convolution, if different from the first
 
@@ -112,4 +119,10 @@ def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None):
     df["PineAPPL"] = before
     df["FkTable"] = after
     df["permille_error"] = (after / before - 1.0) * 1000.0
+
+    if (df["permille_error"].abs() >= threshold).any():
+        raise GridtoFKError(
+            f"The difference between the Grid and FK is above {threshold} permille."
+        )
+
     return df