diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml new file mode 100644 index 00000000..c90e4363 --- /dev/null +++ b/.github/workflows/regression.yml @@ -0,0 +1,54 @@ +name: regression + +# start job only for PRs when a label is added. +on: + pull_request: + types: [labeled] + +jobs: + regresstion: + if: contains(github.event.pull_request.labels.*.name, 'run-regression') + name: regression + runs-on: pineko-stbc3 + + steps: + - uses: actions/checkout@v2 + with: + # tags needed for dynamic versioning + fetch-depth: 0 + - name: Install and configure Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: false + installer-parallel: true + - name: Install dependencies 🐍 + run: poetry install --no-interaction --no-root --with test -E nnpdf + - name: Install project 🐍 + # it is required to repeat extras, otherwise they will be removed from + # the environment + run: poetry install --no-interaction -E nnpdf --with test + - name: Get data files 📦 + id: cache-data-files + uses: actions/cache@v4 + with: + path: theory_productions + key: theory_productions-v6 + - name: Download data files 📦 + if: steps.cache-data_files.outputs.cache-hit != 'true' + run: | + sh download_test_data.sh + - name: Restore cached numba compile code 📮 + id: cache-numba + uses: actions/cache@v4 + with: + path: src/pineko/__pycache__ + key: numba-cache-${{ runner.os }}-${{ hashFiles('**/*.py') }} + restore-keys: numba-cache-${{ runner.os }}- + - name: Generate FK table predictions and perform regression tests 💣 + run: | + sh regression_check.sh + - name: Save updated numba cache 📮 + uses: actions/cache@v4 + with: + path: src/pineko/__pycache__ + key: numba-cache-${{ runner.os }}-${{ hashFiles('**/*.py') }} diff --git a/download_test_data.sh b/download_test_data.sh index 385d5c7e..4621dc0d 100644 --- a/download_test_data.sh +++ b/download_test_data.sh @@ -1,3 +1,4 @@ #!/bin/bash +wget -r -np -nH --cut-dirs=1 -l 4 -e robots=off --no-verbose -R index.* https://data.nnpdf.science/pineko/theory_productions/ wget -r -np -nH --cut-dirs=1 -l 4 -e robots=off --no-verbose -P benchmarks -R index.* https://data.nnpdf.science/pineko/data_files/ wget -r -np -nH --cut-dirs=1 -l 4 -e robots=off --no-verbose -P benchmarks -R index.* https://data.nnpdf.science/pineko/fakepdfs/ diff --git a/pineko.ci.toml b/pineko.ci.toml new file mode 100644 index 00000000..0a202214 --- /dev/null +++ b/pineko.ci.toml @@ -0,0 +1,15 @@ +[general] +nnpdf=true + +[paths] +# inputs +grids = "./theory_productions/data/grids" +operator_card_template_name = "_template.ci.yaml" +# outputs +operator_cards = "./theory_productions/operator_cards" +ekos = "./theory_productions/data/ekos" +fktables = "./theory_productions/data/fktables" + +[paths.logs] +eko = "./theory_productions/logs/eko" +fk = "./theory_productions/logs/fk" diff --git a/poetry.lock b/poetry.lock index cc68482a..b2382632 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1721,6 +1721,41 @@ cli = ["pineappl-cli"] docs = ["nbsphinx (>=0.9.2)", "sphinx (>=6.2.1)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-bibtex (>=2.5.0)"] test = ["pytest", "pytest-cov"] +[[package]] +name = "pineappl-cli" +version = "0.8.7" +description = "Read, write, and query PineAPPL grids" +optional = false +python-versions = "*" +files = [ + {file = "pineappl_cli-0.8.7-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9623beb98b3a58da1050848598c41001468f61d43b56b0c6ec0a712c892931b0"}, + {file = "pineappl_cli-0.8.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:434f91fa09d7942b463026d11398d423821046dde4d80b17d6ddf43994c1f9a1"}, + {file = "pineappl_cli-0.8.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e5bbd6927fce4ecdcd7de327013448f6dfcf9e47067c3f5ee72815eee3443c8"}, + {file = "pineappl_cli-0.8.7-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f59f9c8f048bf04e92a3ab030892d6093c71b3580ece682bca423c0d26c31a11"}, + {file = "pineappl_cli-0.8.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:26625e80846b8eb0a590e1294eee22b5feda45ba6785131c3f70f00c168da398"}, + {file = "pineappl_cli-0.8.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c78f1a8a77d5c23cdbabb0a0f92e28234a6917ea49725e24ec2d84a4d00151a"}, + {file = "pineappl_cli-0.8.7-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:df56c693a998a1dcf9cf74882f212c6afd51375e90a73683cc16e795a7a48b98"}, + {file = "pineappl_cli-0.8.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b3ddeb079aab4741cc7e34e530eca3f6dea00afb1d0b0ea6406fec88b96d217d"}, + {file = "pineappl_cli-0.8.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c73222a02afeed8a5999f4a9a7e96aed5bee112b116f11f001994a3bc6f4b13"}, + {file = "pineappl_cli-0.8.7-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1f2da5b73bd2ac4d7f5226db0d72bc424fde39a0cea42b3dfa266d100c3ceccb"}, + {file = "pineappl_cli-0.8.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:033543cd3be5d48f5e95abffd7638a6197f324566d734b21dbca83ce3bea9e10"}, + {file = "pineappl_cli-0.8.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12a6457b5ae97aa392ca249974894e7f1e60cb881f48b3c409db2712f1928320"}, + {file = "pineappl_cli-0.8.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f77412b1ad7726e937c361868399fd8923a48ae7de9e768b31fe404337514179"}, + {file = "pineappl_cli-0.8.7-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:27eafd03b1a352f67bb6c0e26a03b027039efae67485be5d0dec7f650a559c33"}, + {file = "pineappl_cli-0.8.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d8d1bfcbb1996387d020298bcf7d4b6276b935fcc8e89afb14ed86cc94db0b1f"}, + {file = "pineappl_cli-0.8.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63ae66d14cfe38eb3d31e445a344ff5e672561d88bdf04da359f9b5a1f63def7"}, + {file = "pineappl_cli-0.8.7-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:1172d800b3e4d22c322892b2f7e603028f37d6848690407e759bfb16a320544a"}, + {file = "pineappl_cli-0.8.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0756f6ed6bf1bc081fe40647e2dded2c806afdd3ccbd1301d484f6c971977e1f"}, + {file = "pineappl_cli-0.8.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8753bcfeac586b084013b6731804a8c178dc9333faaf28d5e806178481f84a86"}, + {file = "pineappl_cli-0.8.7-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f6857e1add4d1028f0f5171dde30b3e1e5f2b0c4f722cd4f2c21717846d2a7c7"}, + {file = "pineappl_cli-0.8.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:0ae8539b0464faa2ab5fd22916fa12acd7b607ea97db3dbda7f189c70dc2402e"}, + {file = "pineappl_cli-0.8.7-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c7c1d6ce3b86ba1baa23eeb44bb7fde15d9d2cd51115d702d81a7965ea79dbf"}, + {file = "pineappl_cli-0.8.7-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:95dc225a1fd507c8979d96cd5195e62efab4449f72f4f4d12713a00a9e90bb25"}, + {file = "pineappl_cli-0.8.7-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:7436b6a048b9a526b19c04ad1577008b1fd089df75a920e4b8bb2dc9942d37da"}, + {file = "pineappl_cli-0.8.7-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:0d43ca94f4fa25848ea95048f1c451227bd68d022afb3c1e5346ab2e6d783e75"}, + {file = "pineappl_cli-0.8.7-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:2b7a9c8265b89cb176cec909003c9106bba5e13de4d6997fea7301111319ff2f"}, +] + [[package]] name = "platformdirs" version = "4.3.6" @@ -2682,4 +2717,4 @@ nnpdf = ["nnpdf-data"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "650387179ae5ea33ecbb1046d689aaee9bbf9be8610194321da027eebb1a8814" +content-hash = "ed38f32b8a314865b33cb54ac0f82d978a440ae9b3dc240edf0da8d65b2782c8" diff --git a/pyproject.toml b/pyproject.toml index b9c07b30..342df04f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ pandas = "^2.1" rich = "^12.5.1" click = "^8.0.4" tomli = "^2.0.1" -nnpdf-data = { version = "*", optional = true} +nnpdf-data = { version = ">=0.0.3", optional = true} [tool.poetry.group.docs] optional = true @@ -56,6 +56,7 @@ pytest-cov = "^4.0.0" pytest-env = "^0.6.2" pylint = "^3.1.0" banana-hep = "^0.6.13" +pineappl-cli = "^0.8.7" [tool.poetry.group.dev.dependencies] pdbpp = "^0.10.3" diff --git a/regression_check.sh b/regression_check.sh new file mode 100755 index 00000000..c01ea7f0 --- /dev/null +++ b/regression_check.sh @@ -0,0 +1,136 @@ +#!/bin/bash + +set -euo pipefail + +THEORY_ID=40008005 # NNLO QCD with EXA +PDF_NAME="NNPDF40_nnlo_as_01180" + +POLARIZED_THEORY_ID=41100010 # NLO QCD⊗EWK with TRN +POLARIZED_POLPDF_NAME="NNPDFpol20_nlo_as_01180" +POLARIZED_UNPOLPDF_NAME="NNPDF40_nlo_pch_as_01180" + +LIST_DIS_DATASETS=( + "HERA_CC_318GEV_EP-SIGMARED" + "NNPDF_POS_2P24GEV_F2D" +) + +LIST_HADRONIC_DATASETS=( + "ATLAS_Z0_7TEV_36PB_ETA" + "LHCB_WPWM_8TEV_MUON_Y" + "ATLAS_SINGLETOP_8TEV_T-RAP-NORM" +) + +LIST_POLARIZED_HADRONIC_DATASETS=( + "STAR_WMWP_510GEV_WP-AL" +) + +get_pdf_combinations() { + OBJECTNAME=$1 + + # Define the combination of PDF sets depending on the types + if [[ "$OBJECTNAME" == *"-POL"* ]]; then + PDFSETNAMES="$POLARIZED_POLPDF_NAME $POLARIZED_UNPOLPDF_NAME" + elif [[ "$OBJECTNAME" == *"-UNPOL"* ]]; then + PDFSETNAMES="$POLARIZED_UNPOLPDF_NAME" + else + PDFSETNAMES="$PDF_NAME" # Fall to the NNPDF4.0 unpolarized set + fi + echo "$PDFSETNAMES" +} + +compare_fks_with_grids() { + THEORYID=$1 + + # Compare the Hadronic FK tables with the Grids + grids=(theory_productions/data/grids/"$THEORYID"/*.pineappl.lz4) + for gridpath in "${grids[@]}"; do + gridname=$(basename "$gridpath") + PDFSETNAMES=$(get_pdf_combinations "$gridname") + pineko compare ./theory_productions/data/fktables/"$THEORYID"/"$gridname" \ + ./theory_productions/data/grids/"$THEORYID"/"$gridname" 3 0 \ + $PDFSETNAMES --threshold 2 # set threshold to 2 permille + done +} + +compare_fktables() { + REFERED_FK=$1 + CURRENT_FK=$2 + PDFSETNAMES=$3 + + # Extract the predictions - the last column + diffs=($(pineappl diff $REFERED_FK $CURRENT_FK "$PDFSETNAMES" | awk 'NR>2 {print $NF}')) + + preds_length=${#diffs[@]} # Get the length of the predictions + for ((bin=0; bin 0.001" | bc) # Set threshold to 1 permille + + if [[ $check_diff -eq 1 ]]; then + echo "Bin $bin: ($REFERED_FK) and ($CURRENT_FK) differ more than 1 permille." + exit 1 + fi + done +} + +compare_fks_with_reference() { + THEORYID=$1 + + fktables=(./theory_productions/data/fktables/"$THEORYID"/*.pineappl.lz4) + for fktable_path in "${fktables[@]}"; do + fkname=$(basename "$fktable_path") + PDFSETNAMES=$(get_pdf_combinations "$fkname") + PDFSETNAMES=$(echo "$PDFSETNAMES" | sed 's/ /+p,/g') + fkref="./theory_productions/data/fktables/$THEORYID/$fkname" + fkcur="./theory_productions/reference_fks/$THEORYID/$fkname" + compare_fktables "$fkref" "$fkcur" "$PDFSETNAMES" + done +} + +dis_predictions() { + THEORYID=$1 + NFONLL_ID=$(($THEORYID*100)) + + for dataset in "${LIST_DIS_DATASETS[@]}"; do + pineko fonll -c pineko.ci.toml tcards $THEORYID + pineko fonll -c pineko.ci.toml ekos --overwrite $THEORYID $dataset + pineko fonll -c pineko.ci.toml fks --overwrite $THEORYID $dataset + pineko fonll -c pineko.ci.toml combine --overwrite $THEORYID $dataset \ + --FFNS3 $NFONLL_ID \ + --FFN03 $(($NFONLL_ID+1)) \ + --FFNS4zeromass $(($NFONLL_ID+2)) \ + --FFNS4massive $(($NFONLL_ID+3)) \ + --FFN04 $(($NFONLL_ID+4)) \ + --FFNS5zeromass $(($NFONLL_ID+5)) \ + --FFNS5massive $(($NFONLL_ID+6)) + done +} + +hadronic_predictions() { + THEORYID=$1 + LIST_DATASETS=$2 + + IFS='|' read -r -a ARRAY_DATASETS <<< "$LIST_DATASETS" + for dataset in "${ARRAY_DATASETS[@]}"; do + pineko theory -c pineko.ci.toml opcards --overwrite $THEORYID $dataset + pineko theory -c pineko.ci.toml ekos --overwrite $THEORYID $dataset + pineko theory -c pineko.ci.toml fks --overwrite $THEORYID $dataset + done + + compare_fks_with_grids $THEORYID +} + +# Expand the hadronic datasets +LIST_HADRONIC_DATA=$(IFS='|'; echo "${LIST_HADRONIC_DATASETS[*]}") +LIST_POLARIZED_DATA=$(IFS='|'; echo "${LIST_POLARIZED_HADRONIC_DATASETS[*]}") + +# Unpolarized runs +dis_predictions $THEORY_ID +hadronic_predictions $THEORY_ID "$LIST_HADRONIC_DATA" +compare_fks_with_reference $THEORY_ID + +# Polarized runs with multiple convolutions +hadronic_predictions $POLARIZED_THEORY_ID "$LIST_POLARIZED_DATA" +compare_fks_with_reference $POLARIZED_THEORY_ID diff --git a/src/pineko/cli/compare.py b/src/pineko/cli/compare.py index 92e4d986..a6968eda 100644 --- a/src/pineko/cli/compare.py +++ b/src/pineko/cli/compare.py @@ -16,7 +16,10 @@ @click.argument("pdfs", type=click.STRING, nargs=-1) @click.option("--xir", default=1.0, help="renormalization scale variation") @click.option("--xif", default=1.0, help="factorization scale variation") -def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif): +@click.option( + "--threshold", default=5.0, help="threshold in permille to accept Grid -> FK" +) +def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif, threshold): """Compare process level PineAPPL grid and derived FK Table. The comparison between the grid stored at PINEAPPL_PATH, and the FK table @@ -40,5 +43,7 @@ def subcommand(fktable_path, grid_path, max_as, max_al, pdfs, xir, xif): pdf2 = pdfs[1] if len(pdfs) == 2 else None # Note that we need to cast to string before printing to avoid ellipsis ... rich.print( - comparator.compare(pine, fk, max_as, max_al, pdf1, xir, xif, pdf2).to_string() + comparator.compare( + pine, fk, max_as, max_al, pdf1, xir, xif, threshold, pdf2 + ).to_string() ) diff --git a/src/pineko/comparator.py b/src/pineko/comparator.py index bca2b382..6856e81c 100644 --- a/src/pineko/comparator.py +++ b/src/pineko/comparator.py @@ -6,7 +6,11 @@ import rich -def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None): +class GridtoFKError(Exception): + """Raised when the difference between the Grid and FK table is above some threshold.""" + + +def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, threshold=5.0, pdf2=None): """Build comparison table. Parameters @@ -25,6 +29,9 @@ def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None): renormalization scale variation xif : float factorization scale variation + threshold: float + check if the difference between the Grid and FK table is above the + threshold then raise an error pdf2: str or None PDF set for the second convolution, if different from the first @@ -112,4 +119,10 @@ def compare(pine, fktable, max_as, max_al, pdf1, xir, xif, pdf2=None): df["PineAPPL"] = before df["FkTable"] = after df["permille_error"] = (after / before - 1.0) * 1000.0 + + if (df["permille_error"].abs() >= threshold).any(): + raise GridtoFKError( + f"The difference between the Grid and FK is above {threshold} permille." + ) + return df