Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions .github/workflows/regression.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: regression

# start job only for PRs when a label is added.
on:
pull_request:
types: [labeled]

jobs:
regresstion:
if: contains(github.event.pull_request.labels.*.name, 'run-regression')
name: regression
runs-on: pineko-stbc3

steps:
- uses: actions/checkout@v2
with:
# tags needed for dynamic versioning
fetch-depth: 0
- name: Install and configure Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: false
installer-parallel: true
- name: Install dependencies 🐍
run: poetry install --no-interaction --no-root --with test -E nnpdf
- name: Install project 🐍
# it is required to repeat extras, otherwise they will be removed from
# the environment
run: poetry install --no-interaction -E nnpdf --with test
- name: Get data files 📦
id: cache-data-files
uses: actions/cache@v4
with:
path: theory_productions
key: theory_productions-v6
- name: Download data files 📦
if: steps.cache-data_files.outputs.cache-hit != 'true'
run: |
sh download_test_data.sh
- name: Restore cached numba compile code 📮
id: cache-numba
uses: actions/cache@v4
with:
path: src/pineko/__pycache__
key: numba-cache-${{ runner.os }}-${{ hashFiles('**/*.py') }}
restore-keys: numba-cache-${{ runner.os }}-
- name: Generate FK table predictions and perform regression tests 💣
run: |
sh regression_check.sh
- name: Save updated numba cache 📮
uses: actions/cache@v4
with:
path: src/pineko/__pycache__
key: numba-cache-${{ runner.os }}-${{ hashFiles('**/*.py') }}
1 change: 1 addition & 0 deletions download_test_data.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/bash
wget -r -np -nH --cut-dirs=1 -l 4 -e robots=off --no-verbose -R index.* https://data.nnpdf.science/pineko/theory_productions/
wget -r -np -nH --cut-dirs=1 -l 4 -e robots=off --no-verbose -P benchmarks -R index.* https://data.nnpdf.science/pineko/data_files/
wget -r -np -nH --cut-dirs=1 -l 4 -e robots=off --no-verbose -P benchmarks -R index.* https://data.nnpdf.science/pineko/fakepdfs/
15 changes: 15 additions & 0 deletions pineko.ci.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[general]
nnpdf=true

[paths]
# inputs
grids = "./theory_productions/data/grids"
operator_card_template_name = "_template.ci.yaml"
# outputs
operator_cards = "./theory_productions/operator_cards"
ekos = "./theory_productions/data/ekos"
fktables = "./theory_productions/data/fktables"

[paths.logs]
eko = "./theory_productions/logs/eko"
fk = "./theory_productions/logs/fk"
37 changes: 36 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pandas = "^2.1"
rich = "^12.5.1"
click = "^8.0.4"
tomli = "^2.0.1"
nnpdf-data = { version = "*", optional = true}
nnpdf-data = { version = ">=0.0.3", optional = true}

[tool.poetry.group.docs]
optional = true
Expand All @@ -56,6 +56,7 @@ pytest-cov = "^4.0.0"
pytest-env = "^0.6.2"
pylint = "^3.1.0"
banana-hep = "^0.6.13"
pineappl-cli = "^0.8.7"

[tool.poetry.group.dev.dependencies]
pdbpp = "^0.10.3"
Expand Down
136 changes: 136 additions & 0 deletions regression_check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#!/bin/bash

set -euo pipefail

THEORY_ID=40008005 # NNLO QCD with EXA
PDF_NAME="NNPDF40_nnlo_as_01180"

POLARIZED_THEORY_ID=41100010 # NLO QCD⊗EWK with TRN
POLARIZED_POLPDF_NAME="NNPDFpol20_nlo_as_01180"
POLARIZED_UNPOLPDF_NAME="NNPDF40_nlo_pch_as_01180"

LIST_DIS_DATASETS=(
"HERA_CC_318GEV_EP-SIGMARED"
"NNPDF_POS_2P24GEV_F2D"
)

LIST_HADRONIC_DATASETS=(
"ATLAS_Z0_7TEV_36PB_ETA"
"LHCB_WPWM_8TEV_MUON_Y"
"ATLAS_SINGLETOP_8TEV_T-RAP-NORM"
)

LIST_POLARIZED_HADRONIC_DATASETS=(
"STAR_WMWP_510GEV_WP-AL"
)

get_pdf_combinations() {
OBJECTNAME=$1

# Define the combination of PDF sets depending on the types
if [[ "$OBJECTNAME" == *"-POL"* ]]; then
PDFSETNAMES="$POLARIZED_POLPDF_NAME $POLARIZED_UNPOLPDF_NAME"
elif [[ "$OBJECTNAME" == *"-UNPOL"* ]]; then
PDFSETNAMES="$POLARIZED_UNPOLPDF_NAME"
else
PDFSETNAMES="$PDF_NAME" # Fall to the NNPDF4.0 unpolarized set
fi
echo "$PDFSETNAMES"
}

compare_fks_with_grids() {
THEORYID=$1

# Compare the Hadronic FK tables with the Grids
grids=(theory_productions/data/grids/"$THEORYID"/*.pineappl.lz4)
for gridpath in "${grids[@]}"; do
gridname=$(basename "$gridpath")
PDFSETNAMES=$(get_pdf_combinations "$gridname")
pineko compare ./theory_productions/data/fktables/"$THEORYID"/"$gridname" \
./theory_productions/data/grids/"$THEORYID"/"$gridname" 3 0 \
$PDFSETNAMES --threshold 2 # set threshold to 2 permille
done
}

compare_fktables() {
REFERED_FK=$1
CURRENT_FK=$2
PDFSETNAMES=$3

# Extract the predictions - the last column
diffs=($(pineappl diff $REFERED_FK $CURRENT_FK "$PDFSETNAMES" | awk 'NR>2 {print $NF}'))

preds_length=${#diffs[@]} # Get the length of the predictions
for ((bin=0; bin<preds_length; bin++)); do
pred_value=${diffs[bin]}
value=$(printf "%.16f" "$pred_value") # Make sure it is in float representation
# https://www.shell-tips.com/bash/math-arithmetic-calculation/#gsc.tab=0
abs_diff=$(echo "scale=10; if ($value< 0) -($value) else $value" | bc)
check_diff=$(echo "$abs_diff > 0.001" | bc) # Set threshold to 1 permille

if [[ $check_diff -eq 1 ]]; then
echo "Bin $bin: ($REFERED_FK) and ($CURRENT_FK) differ more than 1 permille."
exit 1
fi
done
}

compare_fks_with_reference() {
THEORYID=$1

fktables=(./theory_productions/data/fktables/"$THEORYID"/*.pineappl.lz4)
for fktable_path in "${fktables[@]}"; do
fkname=$(basename "$fktable_path")
PDFSETNAMES=$(get_pdf_combinations "$fkname")
PDFSETNAMES=$(echo "$PDFSETNAMES" | sed 's/ /+p,/g')
fkref="./theory_productions/data/fktables/$THEORYID/$fkname"
fkcur="./theory_productions/reference_fks/$THEORYID/$fkname"
compare_fktables "$fkref" "$fkcur" "$PDFSETNAMES"
done
}

dis_predictions() {
THEORYID=$1
NFONLL_ID=$(($THEORYID*100))

for dataset in "${LIST_DIS_DATASETS[@]}"; do
pineko fonll -c pineko.ci.toml tcards $THEORYID
pineko fonll -c pineko.ci.toml ekos --overwrite $THEORYID $dataset
pineko fonll -c pineko.ci.toml fks --overwrite $THEORYID $dataset
pineko fonll -c pineko.ci.toml combine --overwrite $THEORYID $dataset \
--FFNS3 $NFONLL_ID \
--FFN03 $(($NFONLL_ID+1)) \
--FFNS4zeromass $(($NFONLL_ID+2)) \
--FFNS4massive $(($NFONLL_ID+3)) \
--FFN04 $(($NFONLL_ID+4)) \
--FFNS5zeromass $(($NFONLL_ID+5)) \
--FFNS5massive $(($NFONLL_ID+6))
done
}

hadronic_predictions() {
THEORYID=$1
LIST_DATASETS=$2

IFS='|' read -r -a ARRAY_DATASETS <<< "$LIST_DATASETS"
for dataset in "${ARRAY_DATASETS[@]}"; do
pineko theory -c pineko.ci.toml opcards --overwrite $THEORYID $dataset
pineko theory -c pineko.ci.toml ekos --overwrite $THEORYID $dataset
pineko theory -c pineko.ci.toml fks --overwrite $THEORYID $dataset
done

compare_fks_with_grids $THEORYID
}

# Expand the hadronic datasets
LIST_HADRONIC_DATA=$(IFS='|'; echo "${LIST_HADRONIC_DATASETS[*]}")
LIST_POLARIZED_DATA=$(IFS='|'; echo "${LIST_POLARIZED_HADRONIC_DATASETS[*]}")

# Unpolarized runs
dis_predictions $THEORY_ID
hadronic_predictions $THEORY_ID "$LIST_HADRONIC_DATA"
compare_fks_with_reference $THEORY_ID

# Polarized runs with multiple convolutions
hadronic_predictions $POLARIZED_THEORY_ID "$LIST_POLARIZED_DATA"
compare_fks_with_reference $POLARIZED_THEORY_ID
9 changes: 7 additions & 2 deletions src/pineko/cli/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
@click.argument("pdfs", type=click.STRING, nargs=-1)
@click.option("--xir", default=1.0, help="renormalization scale variation")
@click.option("--xif", default=1.0, help="factorization scale variation")
def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif):
@click.option(
"--threshold", default=5.0, help="threshold in permille to accept Grid -> FK"
)
def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif, threshold):
"""Compare process level PineAPPL grid and derived FK Table.

The comparison between the grid stored at PINEAPPL_PATH, and the FK table
Expand All @@ -40,5 +43,7 @@ def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif):
pdf2 = pdfs[1] if len(pdfs) == 2 else None
# Note that we need to cast to string before printing to avoid ellipsis ...
rich.print(
comparator.compare(pine, fk, max_as, max_al, pdf1, xir, xif, pdf2).to_string()
comparator.compare(
pine, fk, max_as, max_al, pdf1, xir, xif, threshold, pdf2
).to_string()
)
15 changes: 14 additions & 1 deletion src/pineko/comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
import rich


def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None):
class GridtoFKError(Exception):
"""Raised when the difference between the Grid and FK table is above some threshold."""


def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, threshold=5.0, pdf2=None):
"""Build comparison table.

Parameters
Expand All @@ -25,6 +29,9 @@ def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None):
renormalization scale variation
xif : float
factorization scale variation
threshold: float
check if the difference between the Grid and FK table is above the
threshold then raise an error
pdf2: str or None
PDF set for the second convolution, if different from the first

Expand Down Expand Up @@ -112,4 +119,10 @@ def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None):
df["PineAPPL"] = before
df["FkTable"] = after
df["permille_error"] = (after / before - 1.0) * 1000.0

if (df["permille_error"].abs() >= threshold).any():
raise GridtoFKError(
f"The difference between the Grid and FK is above {threshold} permille."
)

return df