diff --git a/FKTable Playground.ipynb b/FKTable Playground.ipynb
new file mode 100644
index 0000000000..29acfc8932
--- /dev/null
+++ b/FKTable Playground.ipynb
@@ -0,0 +1,1140 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "cceb1080",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "from validphys.loader import _get_nnpdf_profile\n",
+ "from validphys.api import API\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from validphys.convolution import central_predictions\n",
+ "\n",
+ "profile = _get_nnpdf_profile()\n",
+ "#yaml_db = Path(profile[\"data_path\"]) / \"yamldb\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6b1eb8f3",
+ "metadata": {},
+ "source": [
+ "The `yaml_db` folder is a temporary thing as it contains files that look like:\n",
+ "\n",
+ "```yaml\n",
+ "conversion_factor: 1.0\n",
+ "operands:\n",
+ "- - NMC_NC_EM_D_F2\n",
+ "- - NMC_NC_EM_P_F2\n",
+ "operation: RATIO\n",
+ "target_dataset: NMCPD\n",
+ "```\n",
+ "\n",
+ "This information will eventually be part of the new commondata format of course.\n",
+ "\n",
+ "The `operation` is applied to the first level of the list while the second level is just concatenated. This is necessary since `pineappl` fktables might contain one layer of concatenation which is already done for the \"classic\" fktables.\n",
+ "\n",
+ "The `pineappl` fktables will live inside the appropiate `theory_xxx` folder `/pineappls`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "051581e1",
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "NMCPD_dw_ite\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: NMCPD_dw_ite\n",
+ "nData: 260 nSys: 105\n",
+ "-- COMMONDATA Files for NMCPD_dw_ite successfully read.\n",
+ "\n",
+ "NMC\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: NMC\n",
+ "nData: 292 nSys: 16\n",
+ "-- COMMONDATA Files for NMC successfully read.\n",
+ "\n",
+ "SLACP_dwsh\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: SLACP_dwsh\n",
+ "nData: 211 nSys: 3\n",
+ "-- COMMONDATA Files for SLACP_dwsh successfully read.\n",
+ "\n",
+ "SLACD_dw_ite\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: SLACD_dw_ite\n",
+ "nData: 211 nSys: 103\n",
+ "-- COMMONDATA Files for SLACD_dw_ite successfully read.\n",
+ "\n",
+ "BCDMSP_dwsh\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: BCDMSP_dwsh\n",
+ "nData: 351 nSys: 11\n",
+ "-- COMMONDATA Files for BCDMSP_dwsh successfully read.\n",
+ "\n",
+ "BCDMSD_dw_ite\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: BCDMSD_dw_ite\n",
+ "nData: 254 nSys: 108\n",
+ "-- COMMONDATA Files for BCDMSD_dw_ite successfully read.\n",
+ "\n",
+ "CHORUSNUPb_dw_ite\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CHORUSNUPb_dw_ite\n",
+ "nData: 607 nSys: 1014\n",
+ "-- COMMONDATA Files for CHORUSNUPb_dw_ite successfully read.\n",
+ "\n",
+ "CHORUSNBPb_dw_ite\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CHORUSNBPb_dw_ite\n",
+ "nData: 607 nSys: 114\n",
+ "-- COMMONDATA Files for CHORUSNBPb_dw_ite successfully read.\n",
+ "\n",
+ "NTVNUDMNFe_dw_ite\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: NTVNUDMNFe_dw_ite\n",
+ "nData: 45 nSys: 1003\n",
+ "-- COMMONDATA Files for NTVNUDMNFe_dw_ite successfully read.\n",
+ "\n",
+ "NTVNBDMNFe_dw_ite\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: NTVNBDMNFe_dw_ite\n",
+ "nData: 45 nSys: 103\n",
+ "-- COMMONDATA Files for NTVNBDMNFe_dw_ite successfully read.\n",
+ "\n",
+ "HERACOMBNCEM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: HERACOMBNCEM\n",
+ "nData: 159 nSys: 170\n",
+ "-- COMMONDATA Files for HERACOMBNCEM successfully read.\n",
+ "\n",
+ "HERACOMBNCEP460\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: HERACOMBNCEP460\n",
+ "nData: 209 nSys: 170\n",
+ "-- COMMONDATA Files for HERACOMBNCEP460 successfully read.\n",
+ "\n",
+ "HERACOMBNCEP575\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: HERACOMBNCEP575\n",
+ "nData: 260 nSys: 170\n",
+ "-- COMMONDATA Files for HERACOMBNCEP575 successfully read.\n",
+ "\n",
+ "HERACOMBNCEP820\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: HERACOMBNCEP820\n",
+ "nData: 112 nSys: 170\n",
+ "-- COMMONDATA Files for HERACOMBNCEP820 successfully read.\n",
+ "\n",
+ "HERACOMBNCEP920\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: HERACOMBNCEP920\n",
+ "nData: 485 nSys: 170\n",
+ "-- COMMONDATA Files for HERACOMBNCEP920 successfully read.\n",
+ "\n",
+ "HERACOMBCCEM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: HERACOMBCCEM\n",
+ "nData: 42 nSys: 170\n",
+ "-- COMMONDATA Files for HERACOMBCCEM successfully read.\n",
+ "\n",
+ "HERACOMBCCEP\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: HERACOMBCCEP\n",
+ "nData: 39 nSys: 170\n",
+ "-- COMMONDATA Files for HERACOMBCCEP successfully read.\n",
+ "\n",
+ "HERACOMB_SIGMARED_C\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: HERACOMB_SIGMARED_C\n",
+ "nData: 52 nSys: 167\n",
+ "-- COMMONDATA Files for HERACOMB_SIGMARED_C successfully read.\n",
+ "\n",
+ "HERACOMB_SIGMARED_B\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: HERACOMB_SIGMARED_B\n",
+ "nData: 27 nSys: 167\n",
+ "-- COMMONDATA Files for HERACOMB_SIGMARED_B successfully read.\n",
+ "\n",
+ "CDFZRAP_NEW\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CDFZRAP_NEW\n",
+ "nData: 28 nSys: 11\n",
+ "-- COMMONDATA Files for CDFZRAP_NEW successfully read.\n",
+ "\n",
+ "D0ZRAP_40\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: D0ZRAP_40\n",
+ "nData: 28 nSys: 1\n",
+ "-- COMMONDATA Files for D0ZRAP_40 successfully read.\n",
+ "\n",
+ "ATLASWZRAP36PB\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASWZRAP36PB\n",
+ "nData: 30 nSys: 32\n",
+ "-- COMMONDATA Files for ATLASWZRAP36PB successfully read.\n",
+ "\n",
+ "ATLASZHIGHMASS49FB\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASZHIGHMASS49FB\n",
+ "nData: 13 nSys: 11\n",
+ "-- COMMONDATA Files for ATLASZHIGHMASS49FB successfully read.\n",
+ "\n",
+ "ATLASLOMASSDY11EXT\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASLOMASSDY11EXT\n",
+ "nData: 6 nSys: 8\n",
+ "-- COMMONDATA Files for ATLASLOMASSDY11EXT successfully read.\n",
+ "\n",
+ "ATLASWZRAP11CC\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASWZRAP11CC\n",
+ "nData: 46 nSys: 133\n",
+ "-- COMMONDATA Files for ATLASWZRAP11CC successfully read.\n",
+ "\n",
+ "ATLASWZRAP11CF\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASWZRAP11CF\n",
+ "nData: 15 nSys: 133\n",
+ "-- COMMONDATA Files for ATLASWZRAP11CF successfully read.\n",
+ "\n",
+ "ATLASDY2D8TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASDY2D8TEV\n",
+ "nData: 48 nSys: 37\n",
+ "-- COMMONDATA Files for ATLASDY2D8TEV successfully read.\n",
+ "\n",
+ "ATLAS_DY_2D_8TEV_LOWMASS\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_DY_2D_8TEV_LOWMASS\n",
+ "nData: 84 nSys: 277\n",
+ "-- COMMONDATA Files for ATLAS_DY_2D_8TEV_LOWMASS successfully read.\n",
+ "\n",
+ "ATLAS_WZ_TOT_13TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_WZ_TOT_13TEV\n",
+ "nData: 3 nSys: 4\n",
+ "-- COMMONDATA Files for ATLAS_WZ_TOT_13TEV successfully read.\n",
+ "\n",
+ "ATLAS_WP_JET_8TEV_PT\n",
+ "ATLAS_WM_JET_8TEV_PT\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_WM_JET_8TEV_PT\n",
+ "nData: 16 nSys: 124\n",
+ "-- COMMONDATA Files for ATLAS_WM_JET_8TEV_PT successfully read.\n",
+ "\n",
+ "ATLASZPT8TEVMDIST\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASZPT8TEVMDIST\n",
+ "nData: 64 nSys: 102\n",
+ "-- COMMONDATA Files for ATLASZPT8TEVMDIST successfully read.\n",
+ "\n",
+ "ATLASZPT8TEVYDIST\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASZPT8TEVYDIST\n",
+ "nData: 120 nSys: 102\n",
+ "-- COMMONDATA Files for ATLASZPT8TEVYDIST successfully read.\n",
+ "\n",
+ "ATLASTTBARTOT7TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASTTBARTOT7TEV\n",
+ "nData: 1 nSys: 3\n",
+ "-- COMMONDATA Files for ATLASTTBARTOT7TEV successfully read.\n",
+ "\n",
+ "ATLASTTBARTOT8TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASTTBARTOT8TEV\n",
+ "nData: 1 nSys: 3\n",
+ "-- COMMONDATA Files for ATLASTTBARTOT8TEV successfully read.\n",
+ "\n",
+ "ATLAS_TTBARTOT_13TEV_FULLLUMI\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_TTBARTOT_13TEV_FULLLUMI\n",
+ "nData: 1 nSys: 2\n",
+ "-- COMMONDATA Files for ATLAS_TTBARTOT_13TEV_FULLLUMI successfully read.\n",
+ "\n",
+ "ATLAS_TTB_DIFF_8TEV_LJ_TRAPNORM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_TTB_DIFF_8TEV_LJ_TRAPNORM\n",
+ "nData: 5 nSys: 135\n",
+ "-- COMMONDATA Files for ATLAS_TTB_DIFF_8TEV_LJ_TRAPNORM successfully read.\n",
+ "\n",
+ "ATLAS_TTB_DIFF_8TEV_LJ_TTRAPNORM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_TTB_DIFF_8TEV_LJ_TTRAPNORM\n",
+ "nData: 5 nSys: 135\n",
+ "-- COMMONDATA Files for ATLAS_TTB_DIFF_8TEV_LJ_TTRAPNORM successfully read.\n",
+ "\n",
+ "ATLAS_TOPDIFF_DILEPT_8TEV_TTRAPNORM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_TOPDIFF_DILEPT_8TEV_TTRAPNORM\n",
+ "nData: 5 nSys: 5\n",
+ "-- COMMONDATA Files for ATLAS_TOPDIFF_DILEPT_8TEV_TTRAPNORM successfully read.\n",
+ "\n",
+ "ATLAS_1JET_8TEV_R06_DEC\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_1JET_8TEV_R06_DEC\n",
+ "nData: 171 nSys: 677\n",
+ "-- COMMONDATA Files for ATLAS_1JET_8TEV_R06_DEC successfully read.\n",
+ "\n",
+ "ATLAS_2JET_7TEV_R06\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_2JET_7TEV_R06\n",
+ "nData: 90 nSys: 474\n",
+ "-- COMMONDATA Files for ATLAS_2JET_7TEV_R06 successfully read.\n",
+ "\n",
+ "ATLASPHT15_SF\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLASPHT15_SF\n",
+ "nData: 53 nSys: 2\n",
+ "-- COMMONDATA Files for ATLASPHT15_SF successfully read.\n",
+ "\n",
+ "ATLAS_SINGLETOP_TCH_R_7TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_R_7TEV\n",
+ "nData: 1 nSys: 13\n",
+ "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_R_7TEV successfully read.\n",
+ "\n",
+ "ATLAS_SINGLETOP_TCH_R_13TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_R_13TEV\n",
+ "nData: 1 nSys: 1\n",
+ "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_R_13TEV successfully read.\n",
+ "\n",
+ "ATLAS_SINGLETOP_TCH_DIFF_7TEV_T_RAP_NORM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_T_RAP_NORM\n",
+ "nData: 3 nSys: 17\n",
+ "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_DIFF_7TEV_T_RAP_NORM successfully read.\n",
+ "\n",
+ "ATLAS_SINGLETOP_TCH_DIFF_7TEV_TBAR_RAP_NORM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_TBAR_RAP_NORM\n",
+ "nData: 3 nSys: 15\n",
+ "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_DIFF_7TEV_TBAR_RAP_NORM successfully read.\n",
+ "\n",
+ "ATLAS_SINGLETOP_TCH_DIFF_8TEV_T_RAP_NORM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_T_RAP_NORM\n",
+ "nData: 3 nSys: 31\n",
+ "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_DIFF_8TEV_T_RAP_NORM successfully read.\n",
+ "\n",
+ "ATLAS_SINGLETOP_TCH_DIFF_8TEV_TBAR_RAP_NORM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_TBAR_RAP_NORM\n",
+ "nData: 3 nSys: 31\n",
+ "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_DIFF_8TEV_TBAR_RAP_NORM successfully read.\n",
+ "\n",
+ "CMSWEASY840PB\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMSWEASY840PB\n",
+ "nData: 11 nSys: 11\n",
+ "-- COMMONDATA Files for CMSWEASY840PB successfully read.\n",
+ "\n",
+ "CMSWMASY47FB\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMSWMASY47FB\n",
+ "nData: 11 nSys: 11\n",
+ "-- COMMONDATA Files for CMSWMASY47FB successfully read.\n",
+ "\n",
+ "CMSDY2D11\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMSDY2D11\n",
+ "nData: 132 nSys: 133\n",
+ "-- COMMONDATA Files for CMSDY2D11 successfully read.\n",
+ "\n",
+ "CMSWMU8TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMSWMU8TEV\n",
+ "nData: 22 nSys: 45\n",
+ "-- COMMONDATA Files for CMSWMU8TEV successfully read.\n",
+ "\n",
+ "CMSZDIFF12\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMSZDIFF12\n",
+ "nData: 50 nSys: 52\n",
+ "-- COMMONDATA Files for CMSZDIFF12 successfully read.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CMS_2JET_7TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMS_2JET_7TEV\n",
+ "nData: 54 nSys: 88\n",
+ "-- COMMONDATA Files for CMS_2JET_7TEV successfully read.\n",
+ "\n",
+ "CMS_1JET_8TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMS_1JET_8TEV\n",
+ "nData: 239 nSys: 293\n",
+ "-- COMMONDATA Files for CMS_1JET_8TEV successfully read.\n",
+ "\n",
+ "CMSTTBARTOT7TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMSTTBARTOT7TEV\n",
+ "nData: 1 nSys: 2\n",
+ "-- COMMONDATA Files for CMSTTBARTOT7TEV successfully read.\n",
+ "\n",
+ "CMSTTBARTOT8TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMSTTBARTOT8TEV\n",
+ "nData: 1 nSys: 2\n",
+ "-- COMMONDATA Files for CMSTTBARTOT8TEV successfully read.\n",
+ "\n",
+ "CMSTTBARTOT13TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMSTTBARTOT13TEV\n",
+ "nData: 1 nSys: 2\n",
+ "-- COMMONDATA Files for CMSTTBARTOT13TEV successfully read.\n",
+ "\n",
+ "CMSTOPDIFF8TEVTTRAPNORM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMSTOPDIFF8TEVTTRAPNORM\n",
+ "nData: 10 nSys: 21\n",
+ "-- COMMONDATA Files for CMSTOPDIFF8TEVTTRAPNORM successfully read.\n",
+ "\n",
+ "CMSTTBARTOT5TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMSTTBARTOT5TEV\n",
+ "nData: 1 nSys: 2\n",
+ "-- COMMONDATA Files for CMSTTBARTOT5TEV successfully read.\n",
+ "\n",
+ "CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM\n",
+ "nData: 16 nSys: 44\n",
+ "-- COMMONDATA Files for CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM successfully read.\n",
+ "\n",
+ "CMS_TTB_DIFF_13TEV_2016_2L_TRAP\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMS_TTB_DIFF_13TEV_2016_2L_TRAP\n",
+ "nData: 10 nSys: 10\n",
+ "-- COMMONDATA Files for CMS_TTB_DIFF_13TEV_2016_2L_TRAP successfully read.\n",
+ "\n",
+ "CMS_TTB_DIFF_13TEV_2016_LJ_TRAP\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMS_TTB_DIFF_13TEV_2016_LJ_TRAP\n",
+ "nData: 11 nSys: 11\n",
+ "-- COMMONDATA Files for CMS_TTB_DIFF_13TEV_2016_LJ_TRAP successfully read.\n",
+ "\n",
+ "CMS_SINGLETOP_TCH_TOT_7TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMS_SINGLETOP_TCH_TOT_7TEV\n",
+ "nData: 1 nSys: 3\n",
+ "-- COMMONDATA Files for CMS_SINGLETOP_TCH_TOT_7TEV successfully read.\n",
+ "\n",
+ "CMS_SINGLETOP_TCH_R_8TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMS_SINGLETOP_TCH_R_8TEV\n",
+ "nData: 1 nSys: 1\n",
+ "-- COMMONDATA Files for CMS_SINGLETOP_TCH_R_8TEV successfully read.\n",
+ "\n",
+ "CMS_SINGLETOP_TCH_R_13TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: CMS_SINGLETOP_TCH_R_13TEV\n",
+ "nData: 1 nSys: 1\n",
+ "-- COMMONDATA Files for CMS_SINGLETOP_TCH_R_13TEV successfully read.\n",
+ "\n",
+ "LHCBZ940PB\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: LHCBZ940PB\n",
+ "nData: 9 nSys: 11\n",
+ "-- COMMONDATA Files for LHCBZ940PB successfully read.\n",
+ "\n",
+ "LHCBZEE2FB_40\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: LHCBZEE2FB_40\n",
+ "nData: 17 nSys: 19\n",
+ "-- COMMONDATA Files for LHCBZEE2FB_40 successfully read.\n",
+ "\n",
+ "LHCBWZMU7TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: LHCBWZMU7TEV\n",
+ "nData: 33 nSys: 35\n",
+ "-- COMMONDATA Files for LHCBWZMU7TEV successfully read.\n",
+ "\n",
+ "LHCBWZMU8TEV\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: LHCBWZMU8TEV\n",
+ "nData: 34 nSys: 36\n",
+ "-- COMMONDATA Files for LHCBWZMU8TEV successfully read.\n",
+ "\n",
+ "LHCB_Z_13TEV_DIMUON\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: LHCB_Z_13TEV_DIMUON\n",
+ "nData: 18 nSys: 19\n",
+ "-- COMMONDATA Files for LHCB_Z_13TEV_DIMUON successfully read.\n",
+ "\n",
+ "LHCB_Z_13TEV_DIELECTRON\n",
+ "\n",
+ "-- Reading COMMONDATA for Dataset: LHCB_Z_13TEV_DIELECTRON\n",
+ "nData: 17 nSys: 18\n",
+ "-- COMMONDATA Files for LHCB_Z_13TEV_DIELECTRON successfully read.\n",
+ "\n",
+ " vp pine ratio CMSTTBARTOT5TEV, ['QCD']\n",
+ " 0 0 0\n",
+ "data \n",
+ "0 69.137978 63.366976 1.091073\n",
+ " vp pine ratio CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM, ['QCD']\n",
+ " 0 0 0\n",
+ "data \n",
+ "0 0.002605 0.003382 0.770101\n",
+ "1 0.002313 0.002982 0.775740\n",
+ "2 0.001639 0.002114 0.775299\n",
+ "3 0.000570 0.000741 0.769081\n",
+ "4 0.002545 0.002664 0.955411\n",
+ "5 0.002176 0.002363 0.921011\n",
+ "6 0.001513 0.001637 0.924156\n",
+ "7 0.000536 0.000578 0.927862\n",
+ "8 0.000998 0.001038 0.961663\n",
+ "9 0.000857 0.000930 0.921232\n",
+ "10 0.000624 0.000678 0.920042\n",
+ "11 0.000254 0.000276 0.918816\n",
+ "12 0.000072 0.000069 1.032299\n",
+ "13 0.000064 0.000069 0.923609\n",
+ "14 0.000059 0.000064 0.927297\n",
+ "15 0.000032 0.000035 0.918087\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Test them all\n",
+ "if True:\n",
+ " from yaml import safe_load\n",
+ " pdf = API.pdf(pdf=\"NNPDF40_nnlo_as_01180\")\n",
+ " all_res = []\n",
+ " # Reference here a NNPDF40 runcard to read up all datasets\n",
+ " #nnpdf40_runcard = safe_load(Path(\"/home/juacrumar/NNPDF-testing/nnpdf/n3fit/NNPDF40_with_pineappl.yml\").read_text())\n",
+ " nnpdf40_runcard = safe_load(Path(\"/mount/storage/Academic_Workspace/NNPDF/src/nnpdf/n3fit/NNPDF40_with_pineappl.yml\").read_text())\n",
+ " for d in nnpdf40_runcard[\"dataset_inputs\"]:\n",
+ " target_ds = d[\"dataset\"]\n",
+ " #if any(skipthis in target_ds for skipthis in [\"HERA\", \"NMC\", \"NTV\", \"CHORUS\", \"SLAC\", \"BCD\"]):\n",
+ " # continue\n",
+ " print(target_ds)\n",
+ " cfac = d.get(\"cfac\", [])\n",
+ " old_ds = API.dataset(dataset_input={\"dataset\": target_ds, \"cfac\": cfac}, theoryid=200, use_cuts=\"internal\")\n",
+ " ds = API.dataset(dataset_input={\"dataset\": target_ds, \"cfac\": cfac}, theoryid=400, use_cuts=\"internal\")\n",
+ " new_cp = central_predictions(ds, pdf)\n",
+ " cp = central_predictions(old_ds, pdf)\n",
+ " all_res.append(pd.concat([new_cp, cp, new_cp/cp], axis=1, keys=[\"vp\", \"pine\", f\"ratio {target_ds}, {cfac}\"]))\n",
+ " \n",
+ " for i in all_res:\n",
+ " mean_ratio = i[i.columns[2]].mean()\n",
+ " if not (0.95 < mean_ratio < 1.05):\n",
+ " print(i)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "a4d88771",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "target_ds = \"ATLAS_WP_JET_8TEV_PT\"\n",
+ "cfac = [\"QCD\"] # [\"NRM\"]\n",
+ "old_ds = API.dataset(dataset_input={\"dataset\": target_ds, \"cfac\": cfac}, theoryid=200, use_cuts=\"internal\")\n",
+ "ds = API.dataset(dataset_input={\"dataset\": target_ds, \"cfac\": cfac}, theoryid=400, use_cuts=\"internal\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "316a571e",
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "-- Reading COMMONDATA for Dataset: ATLAS_WP_JET_8TEV_PT\n",
+ "nData: 16 nSys: 124\n",
+ "-- COMMONDATA Files for ATLAS_WP_JET_8TEV_PT successfully read.\n",
+ "\n",
+ "LHAPDF 6.4.0 loading /usr/share/lhapdf/LHAPDF/NNPDF40_nnlo_as_01180/NNPDF40_nnlo_as_01180_0000.dat\n",
+ "NNPDF40_nnlo_as_01180 PDF set, member #0, version 1; LHAPDF ID = 331100\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " pine | \n",
+ " vp | \n",
+ " ratio vp/ratio | \n",
+ " ratio pine/vp | \n",
+ "
\n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | data | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " 5611.439817 | \n",
+ " 5616.248344 | \n",
+ " 1.000857 | \n",
+ " 0.999144 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3154.767573 | \n",
+ " 3157.559479 | \n",
+ " 1.000885 | \n",
+ " 0.999116 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1194.467638 | \n",
+ " 1195.354244 | \n",
+ " 1.000742 | \n",
+ " 0.999258 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 515.187956 | \n",
+ " 515.574565 | \n",
+ " 1.000750 | \n",
+ " 0.999250 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 246.767542 | \n",
+ " 246.941654 | \n",
+ " 1.000706 | \n",
+ " 0.999295 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 126.644243 | \n",
+ " 126.731587 | \n",
+ " 1.000690 | \n",
+ " 0.999311 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 70.244577 | \n",
+ " 70.295367 | \n",
+ " 1.000723 | \n",
+ " 0.999277 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 31.544483 | \n",
+ " 31.566211 | \n",
+ " 1.000689 | \n",
+ " 0.999312 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 11.817506 | \n",
+ " 11.825441 | \n",
+ " 1.000671 | \n",
+ " 0.999329 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 5.029027 | \n",
+ " 5.032150 | \n",
+ " 1.000621 | \n",
+ " 0.999379 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 2.290481 | \n",
+ " 2.291755 | \n",
+ " 1.000556 | \n",
+ " 0.999444 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 1.123706 | \n",
+ " 1.124275 | \n",
+ " 1.000506 | \n",
+ " 0.999494 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 0.565894 | \n",
+ " 0.566158 | \n",
+ " 1.000466 | \n",
+ " 0.999534 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 0.245723 | \n",
+ " 0.245824 | \n",
+ " 1.000415 | \n",
+ " 0.999586 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 0.054556 | \n",
+ " 0.054573 | \n",
+ " 1.000327 | \n",
+ " 0.999673 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " pine vp ratio vp/ratio ratio pine/vp\n",
+ " 0 0 0 0\n",
+ "data \n",
+ "1 5611.439817 5616.248344 1.000857 0.999144\n",
+ "2 3154.767573 3157.559479 1.000885 0.999116\n",
+ "3 1194.467638 1195.354244 1.000742 0.999258\n",
+ "4 515.187956 515.574565 1.000750 0.999250\n",
+ "5 246.767542 246.941654 1.000706 0.999295\n",
+ "6 126.644243 126.731587 1.000690 0.999311\n",
+ "7 70.244577 70.295367 1.000723 0.999277\n",
+ "8 31.544483 31.566211 1.000689 0.999312\n",
+ "9 11.817506 11.825441 1.000671 0.999329\n",
+ "10 5.029027 5.032150 1.000621 0.999379\n",
+ "11 2.290481 2.291755 1.000556 0.999444\n",
+ "12 1.123706 1.124275 1.000506 0.999494\n",
+ "13 0.565894 0.566158 1.000466 0.999534\n",
+ "14 0.245723 0.245824 1.000415 0.999586\n",
+ "15 0.054556 0.054573 1.000327 0.999673"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Let's try to get a prediction out of it\n",
+ "pdf = API.pdf(pdf=\"NNPDF40_nnlo_as_01180\")\n",
+ "new_cp = central_predictions(ds, pdf)\n",
+ "cp = central_predictions(old_ds, pdf)\n",
+ "pd.concat([new_cp, cp, cp/new_cp, new_cp/cp], axis=1, keys=[\"pine\", \"vp\", \"ratio vp/ratio\", \"ratio pine/vp\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "0905d0c2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pine_fkspec = ds.fkspecs[0]\n",
+ "old_fkspec = old_ds.fkspecs[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "acd19243",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "LHAPDF 6.4.0 loading all 101 PDFs in set NNPDF40_nnlo_as_01180\n",
+ "NNPDF40_nnlo_as_01180, version 1; 101 PDF members\n",
+ "[0.5982222 0.53206434 0.37638204 0.13101867 0.59686953 0.51087613\n",
+ " 0.35425162 0.12440635 0.23262168 0.1988827 0.14422644 0.05755101\n",
+ " 0.01654738 0.01469668 0.01335605 0.00692297]\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pineappl\n",
+ "pines = [pineappl.fk_table.FkTable.read(i.as_posix()) for i in pine_fkspec.fkpath]\n",
+ "# Inspect the pineappl prediction\n",
+ "res_pine = []\n",
+ "pp = pines[0]\n",
+ "lpdf = pdf.load()\n",
+ "\n",
+ "for p in pines:\n",
+ " res_pine.append(p.convolute_with_one(2212, lpdf.central_member.xfxQ2))\n",
+ "total_pine = np.concatenate(res_pine)\n",
+ "print(total_pine)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "f3a77d17",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's inspect the content of the old fktables, remove the cfactor for now\n",
+ "from validphys.fkparser import load_fktable\n",
+ "old_fkspec.cfactors = False\n",
+ "old_fktabledata = load_fktable(old_fkspec)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "7ab604e1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "hadronic?: True\n",
+ "Q: 1.65\n",
+ "n: 16\n",
+ "xgrid shape: (40,)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f\"hadronic?: {old_fktabledata.hadronic}\")\n",
+ "print(f\"Q: {old_fktabledata.Q0}\")\n",
+ "print(f\"n: {old_fktabledata.ndata}\")\n",
+ "print(f\"xgrid shape: {old_fktabledata.xgrid.shape}\")\n",
+ "#old_fktabledata.sigma"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "62709983",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# First read the metadata that vp `FKTableData` needs and that all subgrids share\n",
+ "Q0 = np.sqrt(pp.muf2())\n",
+ "xgrid = pp.x_grid()\n",
+ "# Hadronic means in practice that not all luminosity combinations are just electron X proton\n",
+ "hadronic = not all(-11 in i for i in pp.lumi())\n",
+ "# Now prepare the concatenation of grids\n",
+ "fktables = []\n",
+ "for p in pines:\n",
+ " tmp = p.table().T/p.bin_normalizations()\n",
+ " fktables.append(tmp.T)\n",
+ "fktable = np.concatenate(fktables, axis=0)\n",
+ "ndata = fktable.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "d9d5a130",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[(100, 21),\n",
+ " (100, 203),\n",
+ " (100, 208),\n",
+ " (100, 200),\n",
+ " (100, 103),\n",
+ " (100, 108),\n",
+ " (100, 115),\n",
+ " (21, 21),\n",
+ " (21, 203),\n",
+ " (21, 208),\n",
+ " (21, 200),\n",
+ " (21, 103),\n",
+ " (21, 108),\n",
+ " (21, 115),\n",
+ " (21, 100),\n",
+ " (200, 203),\n",
+ " (200, 208),\n",
+ " (203, 203),\n",
+ " (203, 208),\n",
+ " (203, 200),\n",
+ " (203, 103),\n",
+ " (203, 108),\n",
+ " (203, 115),\n",
+ " (203, 100),\n",
+ " (208, 208),\n",
+ " (208, 200),\n",
+ " (208, 103),\n",
+ " (208, 108),\n",
+ " (208, 115),\n",
+ " (208, 100),\n",
+ " (200, 200),\n",
+ " (200, 103),\n",
+ " (200, 108),\n",
+ " (200, 115),\n",
+ " (200, 100),\n",
+ " (103, 103),\n",
+ " (103, 108),\n",
+ " (103, 115),\n",
+ " (103, 100),\n",
+ " (108, 108),\n",
+ " (108, 115),\n",
+ " (108, 100),\n",
+ " (115, 115),\n",
+ " (115, 100),\n",
+ " (100, 100)]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pp.lumi()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "b95c8549",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'df' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
+ "Input \u001b[0;32mIn [11]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241m.\u001b[39mcolumns\n",
+ "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "99782f74",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Now let's try to join the fktable, luminosity and xgrid into a pandas dataframe\n",
+ "# keeping compatibility with validphys and, hopefully, 50% of my own sanity\n",
+ "\n",
+ "# Step 1), make the luminosity into a 14x14 mask for the evolution basis\n",
+ "eko_numbering_scheme = (22, 100, 21, 200, 203, 208, 215, 224, 235, 103, 108, 115, 124, 135)\n",
+ "# note that this is the same ordering that was used in fktables\n",
+ "co = []\n",
+ "for i, j in pp.lumi():\n",
+ " # Ask where this would fall in a 14x14 matrix\n",
+ " idx = eko_numbering_scheme.index(i)\n",
+ " jdx = eko_numbering_scheme.index(j)\n",
+ " co.append(idx*14 + jdx)\n",
+ " \n",
+ "# Step 2) prepare the indices for the dataframe\n",
+ "xi = np.arange(len(xgrid))\n",
+ "ni = np.arange(ndata)\n",
+ "mi = pd.MultiIndex.from_product([ni, xi, xi], names=[\"data\", \"x1\", \"x2\"])\n",
+ "\n",
+ "# Step 3) Now play with the array until we flatten it in the right way?\n",
+ "# The fktables for pineappl have this extra factor of x...\n",
+ "# The output of pineappl is (ndata, flavours, x, x)\n",
+ "lf = len(co)\n",
+ "xfktable = fktable.reshape(ndata, lf, -1)/(xgrid[:,None]*xgrid[None,:]).flatten()\n",
+ "fkmod = np.moveaxis(xfktable, 1, -1)\n",
+ "fkframe = fkmod.reshape(-1, lf)\n",
+ "\n",
+ "# Uff, big\n",
+ "df = pd.DataFrame(fkframe, index=mi, columns=co)\n",
+ "\n",
+ "from validphys.convolution import central_hadron_predictions\n",
+ "from validphys.coredata import FKTableData\n",
+ "fk = FKTableData(sigma=df, ndata=ndata, Q0=Q0, metadata=None, hadronic=True, xgrid=xgrid)\n",
+ "central_hadron_predictions(fk, pdf)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "67e323f7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create a luminosity tensor and check that the results are correct\n",
+ "from validphys.pdfbases import evolution\n",
+ "\n",
+ "evol_basis = (\n",
+ " \"photon\",\n",
+ " \"singlet\",\n",
+ " \"g\",\n",
+ " \"V\",\n",
+ " \"V3\",\n",
+ " \"V8\",\n",
+ " \"V15\",\n",
+ " \"V24\",\n",
+ " \"V35\",\n",
+ " \"T3\",\n",
+ " \"T8\",\n",
+ " \"T15\",\n",
+ " \"T24\",\n",
+ " \"T35\",\n",
+ ")\n",
+ "total_pdf = evolution.grid_values(pdf, evol_basis, xgrid, [Q0]).squeeze()[0]/xgrid\n",
+ "print(total_pdf.shape)\n",
+ "lumi = np.einsum('ij,kl->ikjl', total_pdf, total_pdf)\n",
+ "lumi_masked = lumi[flavour_map]\n",
+ "print(fktable.shape)\n",
+ "print(lumi_masked.shape)\n",
+ "res = np.einsum('ijkl,jkl->i', fktable, lumi_masked)\n",
+ "#pd.concat([pd.DataFrame(res), cp, pd.DataFrame(res)/cp, ], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c2ff2522",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "xfktable.reshape(48,91,-1).shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4905c4a9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from validphys.fkparser import open_fkpath, _parse_string, _parse_header, _build_sigma\n",
+ "from validphys.fkparser import _parse_flavour_map, _parse_hadronic_fast_kernel\n",
+ "try:\n",
+ " f.close()\n",
+ "except:\n",
+ " pass\n",
+ "f = open_fkpath(old_fkspec.fkpath)\n",
+ "line_and_stream = enumerate(f, start=1)\n",
+ "lineno, header = next(line_and_stream)\n",
+ "res = {}\n",
+ "while True:\n",
+ " marker, header_name = _parse_header(lineno, header)\n",
+ " if header_name == \"FastKernel\":\n",
+ " break\n",
+ " if header_name == \"FlavourMap\":\n",
+ " out, lineno, header = _parse_flavour_map(line_and_stream)\n",
+ " else:\n",
+ " out, lineno, header = _parse_string(line_and_stream)\n",
+ " res[header_name] = out "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ffd41ff3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "res[\"FlavourMap\"].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3d00059d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "i_hate_pandas = _parse_hadronic_fast_kernel(f)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bde47252",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "i_hate_pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c7dcc9c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "old_fktabledata.sigma"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "910dbb9d",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "nnvortex",
+ "language": "python",
+ "name": "nnvortex"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index e84f57d9f6..adc3c6328e 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -52,6 +52,7 @@ requirements:
- sphinxcontrib-bibtex
- docutils =0.16 # This dependency is not explicity needed but https://github.com/NNPDF/nnpdf/issues/1220
- curio >=1.0
+ - pineappl >=0.5.2
test:
requires:
diff --git a/n3fit/src/n3fit/layers/observable.py b/n3fit/src/n3fit/layers/observable.py
index 603804f523..7ac2f08df8 100644
--- a/n3fit/src/n3fit/layers/observable.py
+++ b/n3fit/src/n3fit/layers/observable.py
@@ -5,7 +5,7 @@
def _is_unique(list_of_arrays):
- """ Check whether the list of arrays more than one different arrays """
+ """Check whether the list of arrays more than one different arrays"""
the_first = list_of_arrays[0]
for i in list_of_arrays[1:]:
if not np.array_equal(the_first, i):
@@ -15,30 +15,30 @@ def _is_unique(list_of_arrays):
class Observable(MetaLayer, ABC):
"""
- This class is the parent of the DIS and DY convolutions.
- All backend-dependent code necessary for the convolutions
- is (must be) concentrated here
+ This class is the parent of the DIS and DY convolutions.
+ All backend-dependent code necessary for the convolutions
+ is (must be) concentrated here
- The methods gen_mask and call must be overriden by the observables
- where
- - gen_mask: it is called by the initializer and generates the mask between
- fktables and pdfs
- - call: this is what does the actual operation
+ The methods gen_mask and call must be overriden by the observables
+ where
+ - gen_mask: it is called by the initializer and generates the mask between
+ fktables and pdfs
+ - call: this is what does the actual operation
- Parameters
- ----------
- fktable_dicts: list
- list of fktable_dicts which define basis and xgrid for the fktables in the list
- fktable_arr: list
- list of fktables for this observable
- operation_name: str
- string defining the name of the operation to be applied to the fktables
- nfl: int
- number of flavours in the pdf (default:14)
+ Parameters
+ ----------
+ fktable_data: list[validphys.coredata.FKTableData]
+ list of FK which define basis and xgrid for the fktables in the list
+ fktable_arr: list
+ list of fktables for this observable
+ operation_name: str
+ string defining the name of the operation to be applied to the fktables
+ nfl: int
+ number of flavours in the pdf (default:14)
"""
- def __init__(self, fktable_dicts, fktable_arr, operation_name, nfl=14, **kwargs):
+ def __init__(self, fktable_data, fktable_arr, operation_name, nfl=14, **kwargs):
super(MetaLayer, self).__init__(**kwargs)
self.nfl = nfl
@@ -46,9 +46,9 @@ def __init__(self, fktable_dicts, fktable_arr, operation_name, nfl=14, **kwargs)
basis = []
xgrids = []
self.fktables = []
- for fktable, fk in zip(fktable_dicts, fktable_arr):
- xgrids.append(fktable["xgrid"])
- basis.append(fktable["basis"])
+ for fkdata, fk in zip(fktable_data, fktable_arr):
+ xgrids.append(fkdata.xgrid.reshape(1, -1))
+ basis.append(fkdata.luminosity_mapping)
self.fktables.append(op.numpy_to_tensor(fk))
# check how many xgrids this dataset needs
diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py
index 9ab6376996..fa0688110d 100644
--- a/n3fit/src/n3fit/model_gen.py
+++ b/n3fit/src/n3fit/model_gen.py
@@ -134,23 +134,23 @@ def observable_generator(
model_obs_ex = []
model_inputs = []
# The first step is to compute the observable for each of the datasets
- for dataset_dict in spec_dict["datasets"]:
+ for dataset in spec_dict["datasets"]:
# Get the generic information of the dataset
- dataset_name = dataset_dict["name"]
+ dataset_name = dataset.name
# Look at what kind of layer do we need for this dataset
- if dataset_dict["hadronic"]:
+ if dataset.hadronic:
Obs_Layer = DY
else:
Obs_Layer = DIS
# Set the operation (if any) to be applied to the fktables of this dataset
- operation_name = dataset_dict["operation"]
+ operation_name = dataset.operation
# Now generate the observable layer, which takes the following information:
# operation name
# dataset name
- # list of fktable_dictionaries
+ # list of validphys.coredata.FKTableData objects
# these will then be used to check how many different pdf inputs are needed
# (and convolutions if given the case)
@@ -158,8 +158,8 @@ def observable_generator(
# Positivity (and integrability, which is a special kind of positivity...)
# enters only at the "training" part of the models
obs_layer_tr = Obs_Layer(
- dataset_dict["fktables"],
- dataset_dict["tr_fktables"],
+ dataset.fktables_data,
+ dataset.training_fktables(),
operation_name,
name=f"dat_{dataset_name}",
)
@@ -167,39 +167,39 @@ def observable_generator(
elif spec_dict.get("data_transformation_tr") is not None:
# Data transformation needs access to the full array of output data
obs_layer_ex = Obs_Layer(
- dataset_dict["fktables"],
- dataset_dict["ex_fktables"],
+ dataset.fktables_data,
+ dataset.fktables(),
operation_name,
name=f"exp_{dataset_name}",
)
obs_layer_tr = obs_layer_vl = obs_layer_ex
else:
obs_layer_tr = Obs_Layer(
- dataset_dict["fktables"],
- dataset_dict["tr_fktables"],
+ dataset.fktables_data,
+ dataset.training_fktables(),
operation_name,
name=f"dat_{dataset_name}",
)
obs_layer_ex = Obs_Layer(
- dataset_dict["fktables"],
- dataset_dict["ex_fktables"],
+ dataset.fktables_data,
+ dataset.fktables(),
operation_name,
name=f"exp_{dataset_name}",
)
obs_layer_vl = Obs_Layer(
- dataset_dict["fktables"],
- dataset_dict["vl_fktables"],
+ dataset.fktables_data,
+ dataset.validation_fktables(),
operation_name,
name=f"val_{dataset_name}",
)
# To know how many xpoints we compute we are duplicating functionality from obs_layer
if obs_layer_tr.splitting is None:
- xgrid = dataset_dict["fktables"][0]["xgrid"]
+ xgrid = dataset.fktables_data[0].xgrid.reshape(1, -1)
model_inputs.append(xgrid)
dataset_xsizes.append(xgrid.shape[1])
else:
- xgrids = [i["xgrid"] for i in dataset_dict["fktables"]]
+ xgrids = [i.xgrid.reshape(1, -1) for i in dataset.fktables_data]
model_inputs += xgrids
dataset_xsizes.append(sum([i.shape[1] for i in xgrids]))
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
index 81caa0d78c..a690cd27a1 100644
--- a/n3fit/src/n3fit/model_trainer.py
+++ b/n3fit/src/n3fit/model_trainer.py
@@ -131,6 +131,8 @@ def __init__(
"""
# Save all input information
self.exp_info = exp_info
+ if pos_info is None:
+ pos_info = []
self.pos_info = pos_info
self.integ_info = integ_info
if self.integ_info is not None:
@@ -275,7 +277,7 @@ def _fill_the_dictionaries(self):
self.experimental["ndata"] += nd_tr + nd_vl
for dataset in exp_dict["datasets"]:
- self.all_datasets.append(dataset["name"])
+ self.all_datasets.append(dataset.name)
self.all_datasets = set(self.all_datasets)
for pos_dict in self.pos_info:
@@ -532,7 +534,7 @@ def _generate_observables(
force_set_smallest = input_arr.min() > 1e-9
if force_set_smallest:
new_xgrid = np.linspace(
- start=1/input_arr_size, stop=1.0, endpoint=False, num=input_arr_size
+ start=1 / input_arr_size, stop=1.0, endpoint=False, num=input_arr_size
)
else:
new_xgrid = np.linspace(start=0, stop=1.0, endpoint=False, num=input_arr_size)
@@ -566,8 +568,7 @@ def _generate_observables(
scaler = PchipInterpolator(map_from, map_to)
except ValueError:
raise ValueError(
- "interpolation_points is larger than the number of unique "
- "input x-values"
+ "interpolation_points is larger than the number of unique " "input x-values"
)
self._scaler = lambda x: np.concatenate([scaler(np.log(x)), x], axis=-1)
@@ -918,7 +919,9 @@ def hyperparametrizable(self, params):
# by adding it to this dictionary
dict_out = {
"status": passed,
- "loss": self._hyper_loss(fold_losses=l_hyper, n3pdfs=n3pdfs, experimental_models=exp_models),
+ "loss": self._hyper_loss(
+ fold_losses=l_hyper, n3pdfs=n3pdfs, experimental_models=exp_models
+ ),
"validation_loss": np.average(l_valid),
"experimental_loss": np.average(l_exper),
"kfold_meta": {
diff --git a/n3fit/src/n3fit/tests/test_layers.py b/n3fit/src/n3fit/tests/test_layers.py
index d8df1326e3..b1d662450f 100644
--- a/n3fit/src/n3fit/tests/test_layers.py
+++ b/n3fit/src/n3fit/tests/test_layers.py
@@ -2,7 +2,7 @@
Tests for the layers of n3fit
This module checks that the layers do what they would do with numpy
"""
-
+import dataclasses
import numpy as np
from validphys.pdfbases import fitbasis_to_NN31IC
from n3fit.backends import operations as op
@@ -14,9 +14,19 @@
NDATA = 3
THRESHOLD = 1e-6
+
+@dataclasses.dataclass
+class _fake_FKTableData:
+ """Fake validphys.coredata.FKTableData to be used in the tests"""
+
+ fktable: np.array
+ luminosity_mapping: np.array
+ xgrid: np.array
+
+
# Helper functions
def generate_input_had(flavs=3, xsize=2, ndata=4, n_combinations=None):
- """ Generates fake input (fktable and array of combinations) for the hadronic convolution
+ """Generates fake input (fktable and array of combinations) for the hadronic convolution
Parameters
----------
@@ -39,9 +49,7 @@ def generate_input_had(flavs=3, xsize=2, ndata=4, n_combinations=None):
lc = len(combinations)
else:
bchoice = sorted(
- np.random.choice(
- np.arange(flavs * flavs), size=n_combinations, replace=False
- )
+ np.random.choice(np.arange(flavs * flavs), size=n_combinations, replace=False)
)
combinations = [all_combinations[i] for i in bchoice]
lc = n_combinations
@@ -52,7 +60,7 @@ def generate_input_had(flavs=3, xsize=2, ndata=4, n_combinations=None):
# Generate an FK table, PDF and combinations list for DIS
def generate_input_DIS(flavs=3, xsize=2, ndata=5, n_combinations=-1):
- """ Generates fake input (fktable and array of combinations) for the DIS convolution
+ """Generates fake input (fktable and array of combinations) for the DIS convolution
Parameters
----------
@@ -82,34 +90,32 @@ def generate_input_DIS(flavs=3, xsize=2, ndata=5, n_combinations=-1):
def generate_DIS(nfk=1):
- fkdicts = []
- for i in range(nfk):
+ fktables = []
+ for _ in range(nfk):
fk, comb = generate_input_DIS(
flavs=FLAVS, xsize=XSIZE, ndata=NDATA, n_combinations=FLAVS - 1
)
- fkdicts.append({"fktable": fk, "basis": comb, "xgrid": np.ones((1, XSIZE))})
- return fkdicts
+ fktables.append(_fake_FKTableData(fk, comb, np.ones((1, XSIZE))))
+ return fktables
def generate_had(nfk=1):
- fkdicts = []
- for i in range(nfk):
- fk, comb = generate_input_had(
- flavs=FLAVS, xsize=XSIZE, ndata=NDATA, n_combinations=FLAVS
- )
- fkdicts.append({"fktable": fk, "basis": comb, "xgrid": np.ones((1, XSIZE))})
- return fkdicts
+ fktables = []
+ for _ in range(nfk):
+ fk, comb = generate_input_had(flavs=FLAVS, xsize=XSIZE, ndata=NDATA, n_combinations=FLAVS)
+ fktables.append(_fake_FKTableData(fk, comb, np.ones((1, XSIZE))))
+ return fktables
# Tests
def test_DIS_basis():
- fkdicts = generate_DIS(2)
- fks = [i['fktable'] for i in fkdicts]
- obs_layer = layers.DIS(fkdicts, fks, "NULL", nfl=FLAVS)
+ fktables = generate_DIS(2)
+ fks = [i.fktable for i in fktables]
+ obs_layer = layers.DIS(fktables, fks, "NULL", nfl=FLAVS)
# Get the masks from the layer
all_masks = obs_layer.all_masks
- for result, fk in zip(all_masks, fkdicts):
- comb = fk["basis"]
+ for result, fk in zip(all_masks, fktables):
+ comb = fk.luminosity_mapping
# Compute the basis with numpy
reference = np.zeros(FLAVS, dtype=bool)
for i in comb:
@@ -118,13 +124,13 @@ def test_DIS_basis():
def test_DY_basis():
- fkdicts = generate_had(2)
- fks = [i['fktable'] for i in fkdicts]
- obs_layer = layers.DY(fkdicts, fks, "NULL", nfl=FLAVS)
+ fktables = generate_had(2)
+ fks = [i.fktable for i in fktables]
+ obs_layer = layers.DY(fktables, fks, "NULL", nfl=FLAVS)
# Get the mask from the layer
all_masks = obs_layer.all_masks
- for result, fk in zip(all_masks, fkdicts):
- comb = fk["basis"]
+ for result, fk in zip(all_masks, fktables):
+ comb = fk.luminosity_mapping
reference = np.zeros((FLAVS, FLAVS))
for i, j in comb:
reference[i, j] = True
@@ -135,9 +141,9 @@ def test_DIS():
tests = [(2, "ADD"), (1, "NULL")]
for nfk, ope in tests:
# Input values
- fkdicts = generate_DIS(nfk)
- fks = [i['fktable'] for i in fkdicts]
- obs_layer = layers.DIS(fkdicts, fks, ope, nfl=FLAVS)
+ fktables = generate_DIS(nfk)
+ fks = [i.fktable for i in fktables]
+ obs_layer = layers.DIS(fktables, fks, ope, nfl=FLAVS)
pdf = np.random.rand(XSIZE, FLAVS)
kp = op.numpy_to_tensor(np.expand_dims(pdf, 0))
# generate the n3fit results
@@ -148,8 +154,8 @@ def test_DIS():
if len(all_masks) < nfk:
all_masks *= nfk
reference = 0
- for fkdict, mask in zip(fkdicts, all_masks):
- fk = fkdict["fktable"]
+ for fktabledata, mask in zip(fktables, all_masks):
+ fk = fktabledata.fktable
pdf_masked = pdf.T[mask.numpy()].T
reference += np.tensordot(fk, pdf_masked, axes=[[2, 1], [0, 1]])
assert np.allclose(result, reference, THRESHOLD)
@@ -159,12 +165,12 @@ def test_DY():
tests = [(2, "ADD"), (1, "NULL")]
for nfk, ope in tests:
# Input values
- fkdicts = generate_had(nfk)
- fks = [i['fktable'] for i in fkdicts]
- obs_layer = layers.DY(fkdicts, fks, ope, nfl=FLAVS)
+ fktables = generate_had(nfk)
+ fks = [i.fktable for i in fktables]
+ obs_layer = layers.DY(fktables, fks, ope, nfl=FLAVS)
pdf = np.random.rand(XSIZE, FLAVS)
# Add batch dimension (0) and replica dimension (-1)
- kp = op.numpy_to_tensor(np.expand_dims(pdf, [0,-1]))
+ kp = op.numpy_to_tensor(np.expand_dims(pdf, [0, -1]))
# generate the n3fit results
result_tensor = obs_layer(kp)
result = op.evaluate(result_tensor)
@@ -173,8 +179,8 @@ def test_DY():
if len(all_masks) < nfk:
all_masks *= nfk
reference = 0
- for fkdict, mask in zip(fkdicts, all_masks):
- fk = fkdict["fktable"]
+ for fktabledata, mask in zip(fktables, all_masks):
+ fk = fktabledata.fktable
lumi = np.tensordot(pdf, pdf, axes=0)
lumi_perm = np.moveaxis(lumi, [1, 3], [0, 1])
lumi_masked = lumi_perm[mask.numpy()]
@@ -197,12 +203,12 @@ def test_rotation_flavour():
# Apply the rotation using numpy tensordot
x = np.ones(8) # Vector in the flavour basis v_i
x = np.expand_dims(x, axis=[0, 1]) # Give to the input the shape (1,1,8)
- mat = fitbasis_to_NN31IC(flav_info, 'FLAVOUR') # Rotation matrix R_ij, i=flavour, j=evolution
+ mat = fitbasis_to_NN31IC(flav_info, "FLAVOUR") # Rotation matrix R_ij, i=flavour, j=evolution
res_np = np.tensordot(x, mat, (2, 0)) # Vector in the evolution basis u_j=R_ij*vi
# Apply the rotation through the rotation layer
x = op.numpy_to_tensor(x)
- rotmat = layers.FlavourToEvolution(flav_info, 'FLAVOUR')
+ rotmat = layers.FlavourToEvolution(flav_info, "FLAVOUR")
res_layer = rotmat(x)
assert np.alltrue(res_np == res_layer)
@@ -222,33 +228,34 @@ def test_rotation_evol():
# Apply the rotation using numpy tensordot
x = np.ones(8) # Vector in the flavour basis v_i
x = np.expand_dims(x, axis=[0, 1]) # Give to the input the shape (1,1,8)
- mat = fitbasis_to_NN31IC(flav_info, 'EVOL') # Rotation matrix R_ij, i=flavour, j=evolution
+ mat = fitbasis_to_NN31IC(flav_info, "EVOL") # Rotation matrix R_ij, i=flavour, j=evolution
res_np = np.tensordot(x, mat, (2, 0)) # Vector in the evolution basis u_j=R_ij*vi
# Apply the rotation through the rotation layer
x = op.numpy_to_tensor(x)
- rotmat = layers.FlavourToEvolution(flav_info, 'EVOL')
+ rotmat = layers.FlavourToEvolution(flav_info, "EVOL")
res_layer = rotmat(x)
- assert np.alltrue(res_np == res_layer)
-
+ assert np.alltrue(res_np == res_layer)
+
+
def test_mask():
- """ Test the mask layer """
+ """Test the mask layer"""
SIZE = 100
fi = np.random.rand(SIZE)
# Check that the multiplier works
vals = [0.0, 2.0, np.random.rand()]
for val in vals:
- masker = layers.Mask(c = val)
+ masker = layers.Mask(c=val)
ret = masker(fi)
- np.testing.assert_allclose(ret, val*fi, rtol=1e-5)
+ np.testing.assert_allclose(ret, val * fi, rtol=1e-5)
# Check that the boolean works
np_mask = np.random.randint(0, 2, size=SIZE, dtype=bool)
- masker = layers.Mask(bool_mask = np_mask)
+ masker = layers.Mask(bool_mask=np_mask)
ret = masker(fi)
masked_fi = fi[np_mask]
np.testing.assert_allclose(ret, masked_fi, rtol=1e-5)
# Check that the combination works!
rn_val = vals[-1]
- masker = layers.Mask(bool_mask = np_mask, c = rn_val)
+ masker = layers.Mask(bool_mask=np_mask, c=rn_val)
ret = masker(fi)
- np.testing.assert_allclose(ret, masked_fi*rn_val, rtol=1e-5)
+ np.testing.assert_allclose(ret, masked_fi * rn_val, rtol=1e-5)
diff --git a/nnpdfcpp/data/theory.db b/nnpdfcpp/data/theory.db
index 8e95cc4371..6ff99d7a07 100644
Binary files a/nnpdfcpp/data/theory.db and b/nnpdfcpp/data/theory.db differ
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 0ec4eef36c..70b3c09e00 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -941,7 +941,12 @@ def _parse_lagrange_multiplier(self, kind, theoryid, setdict):
raise ConfigError(bad_msg, setdict.keys(), e.args[0]) from e
except ValueError as e:
raise ConfigError(bad_msg) from e
- return self.loader.check_posset(theoryno, name, maxlambda)
+ if kind == "posdataset":
+ return self.loader.check_posset(theoryno, name, maxlambda)
+ elif kind == "integdataset":
+ return self.loader.check_integset(theoryno, name, maxlambda)
+ else:
+ raise ConfigError(f"The lagrange multiplier type {kind} is not understood")
@element_of("posdatasets")
def parse_posdataset(self, posset: dict, *, theoryid):
diff --git a/validphys2/src/validphys/convolution.py b/validphys2/src/validphys/convolution.py
index 5203cb8e77..01795dcdf2 100644
--- a/validphys2/src/validphys/convolution.py
+++ b/validphys2/src/validphys/convolution.py
@@ -120,9 +120,15 @@ def _predictions(dataset, pdf, fkfunc):
"commondata and is not supported."
)
cuts = dataset.cuts.load()
- all_predictions = [
- fkfunc(load_fktable(fk).with_cuts(cuts), pdf) for fk in dataset.fkspecs
- ]
+ all_predictions = []
+ for fk in dataset.fkspecs:
+ fk_w_cuts = load_fktable(fk).with_cuts(cuts)
+ all_predictions.append(fkfunc(fk_w_cuts, pdf))
+ # Old fktables repeated values to make DEN and NUM sizes match in RATIO operations
+ # pineappl tables instead just contain the one value used
+ # The code below works for both situation while keeping `true_div` as the operation
+ if dataset.op == "RATIO":
+ all_predictions[-1] = all_predictions[-1].values
return opfunc(*all_predictions)
diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py
index 8681856129..3b68ee4819 100644
--- a/validphys2/src/validphys/core.py
+++ b/validphys2/src/validphys/core.py
@@ -39,6 +39,8 @@
from validphys.hyperoptplot import HyperoptTrial
from validphys.utils import experiments_to_dataset_inputs
from validphys.lhapdfset import LHAPDFSet
+from validphys.fkparser import load_fktable
+from validphys.pineparser import pineappl_reader
log = logging.getLogger(__name__)
@@ -320,7 +322,7 @@ def plot_kinlabels(self):
class DataSetInput(TupleComp):
- """Represents whatever the user enters in the YAML to specidy a
+ """Represents whatever the user enters in the YAML to specify a
dataset."""
def __init__(self, *, name, sys, cfac, frac, weight, custom_group):
self.name=name
@@ -451,7 +453,8 @@ def __init__(self, *, name, commondata, fkspecs, thspec, cuts,
if isinstance(fkspecs, FKTableSpec):
fkspecs = (fkspecs,)
- self.fkspecs = tuple(fkspecs)
+ fkspecs = tuple(fkspecs)
+ self.fkspecs = fkspecs
self.thspec = thspec
self.cuts = cuts
@@ -498,6 +501,16 @@ def load(self):
data = DataSet(data, intmask)
return data
+ def load_commondata(self):
+ """Strips the commondata loading from `load`"""
+ cd = self.commondata.load()
+ if self.cuts is not None:
+ loaded_cuts = self.cuts.load()
+ if not (hasattr(loaded_cuts, '_full') and loaded_cuts._full):
+ intmask = [int(ele) for ele in loaded_cuts]
+ cd = CommonData(cd, intmask)
+ return cd
+
def to_unweighted(self):
"""Return a copy of the dataset with the weight set to one."""
return self.__class__(
@@ -515,28 +528,75 @@ def __str__(self):
return self.name
class FKTableSpec(TupleComp):
- def __init__(self, fkpath, cfactors):
+ """
+ In Legacy Mode each fktable has one single path.
+ For pineappl tables instead a FKTable is formed by any number of grids
+ therefore in order to check whether we have a new-format or old-format table
+ we will just check whether fkpath is a list
+ For now holds the metadata as an attribute to this function.
+ This is useless/transitional since this metadata is already in the new CommonData format
+ """
+
+ def __init__(self, fkpath, cfactors, metadata=None):
+ self.cfactors = cfactors if cfactors is not None else []
+ # NOTE: the legacy interface is expected to be removed by NNPDF5.0
+ # so please don't write code that relies on it
+ self.legacy = True
+ if isinstance(fkpath, (tuple, list)):
+ self.legacy = False
+ fkpath = tuple(fkpath)
self.fkpath = fkpath
- self.cfactors = cfactors
- super().__init__(fkpath, cfactors)
+ self.metadata = metadata
- #NOTE: We cannot do this because Fkset owns the fktable, and trying
- #to reuse the loaded one fails after it gets deleted.
- #@functools.lru_cache()
- def load(self):
+ # If this is a yaml file that loads an applgrid-converted pineappl,
+ # keep also the name of the target
+ # this is needed since we can now easily reutilize grids
+ if not self.legacy and self.metadata.get("appl"):
+ super().__init__(fkpath, cfactors, self.metadata.get("target_dataset"))
+ else:
+ super().__init__(fkpath, cfactors)
+
+ def _load_legacy(self):
return FKTable(str(self.fkpath), [str(factor) for factor in self.cfactors])
-class PositivitySetSpec(DataSetSpec):
- """Extends DataSetSpec to work around the particularities of the positivity datasets"""
+ def _load_pineappl(self):
+ log.info("Reading: %s", self.fkpath)
+ return pineappl_reader(self)
+
+ def load_with_cuts(self, cuts):
+ """Load the fktable and apply cuts inmediately. Returns a FKTableData"""
+ return load_fktable(self).with_cuts(cuts)
+
+ def load(self):
+ if self.legacy:
+ return self._load_legacy()
+ return self._load_pineappl()
+
+
+class LagrangeSetSpec(DataSetSpec):
+ """Extends DataSetSpec to work around the particularities of the positivity, integrability
+ and other Lagrange Multiplier datasets.
+ Internally (for libNNPDF) they are always PositivitySets
+ """
def __init__(self, name, commondataspec, fkspec, maxlambda, thspec):
cuts = Cuts(commondataspec, None)
self.maxlambda = maxlambda
- super().__init__(name=name, commondata=commondataspec, fkspecs=fkspec, thspec=thspec, cuts=cuts)
- if len(self.fkspecs) > 1:
- # The signature of the function does not accept operations either
- # so more than one fktable cannot be utilized
- raise ValueError("Positivity datasets can only contain one fktable")
+ super().__init__(
+ name=name,
+ commondata=commondataspec,
+ fkspecs=fkspec,
+ thspec=thspec,
+ cuts=cuts,
+ )
+
+ def to_unweighted(self):
+ log.warning(
+ "Trying to unweight %s, %s are always unweighted",
+ self.__class__.__name__,
+ self.name,
+ )
+ return self
@functools.lru_cache()
def load(self):
@@ -544,11 +604,14 @@ def load(self):
fk = self.fkspecs[0].load()
return PositivitySet(cd, fk, self.maxlambda)
- def to_unweighted(self):
- log.warning("Trying to unweight %s, PositivitySetSpec are always unweighted", self.name)
- return self
+
+class PositivitySetSpec(LagrangeSetSpec):
+ pass
+class IntegrabilitySetSpec(LagrangeSetSpec):
+ pass
+
#We allow to expand the experiment as a list of datasets
class DataGroupSpec(TupleComp, namespaces.NSList):
@@ -578,6 +641,10 @@ def load(self):
sets.append(loaded_data)
return Experiment(sets, self.name)
+ @functools.lru_cache(maxsize=32)
+ def load_commondata(self):
+ return [d.load_commondata() for d in self.datasets]
+
@property
def thspec(self):
#TODO: Is this good enough? Should we explicitly pass the theory
@@ -731,6 +798,14 @@ def __repr__(self):
def __str__(self):
return f"Theory {self.id}"
+ @property
+ def yamldb_path(self):
+ return self.path / "yamldb"
+
+ def is_pineappl(self):
+ """Check whether this theory is a pineappl-based theory"""
+ return self.yamldb_path.exists()
+
class ThCovMatSpec:
def __init__(self, path):
self.path = path
diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py
index c0934fa62d..5e73e3777f 100644
--- a/validphys2/src/validphys/coredata.py
+++ b/validphys2/src/validphys/coredata.py
@@ -42,17 +42,37 @@ class FKTableData:
metadata : dict
Other information contained in the FKTable.
+
+ protected: bool
+ When a fktable is protected cuts will not be applied.
+ The most common use-case is when a total cross section is used
+ as a normalization table for a differential cross section,
+ in legacy code (<= NNPDF4.0) both fktables would be cut using the differential index.
"""
hadronic: bool
Q0: float
ndata: int
- xgrid: np.array
+ xgrid: np.ndarray
sigma: pd.DataFrame
metadata: dict = dataclasses.field(default_factory=dict, repr=False)
+ protected: bool = False
+
+ def with_cfactor(self, cfactor):
+ """Returns a copy of the FKTableData object with cfactors applied to the fktable"""
+ if all(c == 1.0 for c in cfactor):
+ return self
+ if len(cfactor) != self.ndata:
+ if self.protected:
+ cfactor = cfactor[0]
+ else:
+ name = self.metadata.get("target_dataset")
+ raise ValueError(
+ f"The length of cfactor for {name} differs from the number of datapoints in the grid"
+ )
+ new_sigma = self.sigma.multiply(pd.Series(cfactor), axis=0, level=0)
+ return dataclasses.replace(self, sigma=new_sigma)
- # TODO: When we move to something other than the current fktable format,
- # we should apply the cuts directly before loading the file.
def with_cuts(self, cuts):
"""Return a copy of the FKTabe with the cuts applied. The data index
of the sigma operator (the outermost level), contains the data point
@@ -87,14 +107,73 @@ def with_cuts(self, cuts):
>>> assert newtable.ndata == 2
>>> assert newtable.metadata['GridInfo'].ndata == 3
"""
- if hasattr(cuts, 'load'):
+ if hasattr(cuts, "load"):
cuts = cuts.load()
- if cuts is None:
+ if cuts is None or self.protected:
return self
newndata = len(cuts)
newsigma = self.sigma.loc[cuts]
return dataclasses.replace(self, ndata=newndata, sigma=newsigma)
+ @property
+ def luminosity_mapping(self):
+ """Return the flavour combinations that contribute to the fktable
+ in the form of a single array
+
+ The return shape is:
+ (nbasis,) for DIS
+ (nbasis*2,) for hadronic
+ """
+ basis = self.sigma.columns.to_numpy()
+ if self.hadronic:
+ ret = np.zeros(14 * 14, dtype=bool)
+ ret[basis] = True
+ basis = np.array(np.where(ret.reshape(14, 14))).T.reshape(-1)
+ return basis
+
+ def get_np_fktable(self):
+ """Returns the fktable as a dense numpy array that can be directly
+ manipulated with numpy
+
+ The return shape is:
+ (ndata, nx, nbasis) for DIS
+ (ndata, nx, nx, nbasis) for hadronic
+ where nx is the length of the xgrid
+ and nbasis the number of flavour contributions that contribute
+ """
+ # Read up the shape of the output table
+ ndata = self.ndata
+ nx = len(self.xgrid)
+ nbasis = self.sigma.shape[1]
+
+ if ndata == 0:
+ if self.hadronic:
+ return np.zeros((ndata, nbasis, nx, nx))
+ return np.zeros((ndata, nbasis, nx))
+
+ # Make the dataframe into a dense numpy array
+
+ # First get the data index out of the way
+ # this is necessary because cuts/shifts and for performance reasons
+ # otherwise we will be putting things in a numpy array in very awkward orders
+ ns = self.sigma.unstack(level=("data",), fill_value=0)
+ x1 = ns.index.get_level_values(0)
+
+ if self.hadronic:
+ x2 = ns.index.get_level_values(1)
+ fk_raw = np.zeros((nx, nx, ns.shape[1]))
+ fk_raw[x2, x1, :] = ns.values
+
+ # The output is (ndata, basis, x1, x2)
+ fktable = fk_raw.reshape((nx, nx, nbasis, ndata)).T
+ else:
+ fk_raw = np.zeros((nx, ns.shape[1]))
+ fk_raw[x1, :] = ns.values
+
+ # The output is (ndata, basis, x1)
+ fktable = fk_raw.reshape((nx, nbasis, ndata)).T
+
+ return fktable
@dataclasses.dataclass(eq=False)
class CFactorData:
@@ -157,6 +236,7 @@ class CommonData:
type (ADD/MULT/RAND) and name
(CORR/UNCORR/THEORYCORR/SKIP)
"""
+
setname: str
ndata: int
commondataproc: str
@@ -185,11 +265,13 @@ def with_cuts(self, cuts):
"""
# Ensure that the cuts we're applying applies to this dataset
# only check, however, if the cuts is of type :py:class:`validphys.core.Cuts`
- if hasattr(cuts, 'name') and self.setname != cuts.name:
- raise ValueError(f"The cuts provided are for {cuts.name} which does not apply "
- f"to this commondata file: {self.setname}")
+ if hasattr(cuts, "name") and self.setname != cuts.name:
+ raise ValueError(
+ f"The cuts provided are for {cuts.name} which does not apply "
+ f"to this commondata file: {self.setname}"
+ )
- if hasattr(cuts, 'load'):
+ if hasattr(cuts, "load"):
cuts = cuts.load()
if cuts is None:
return self
@@ -200,9 +282,7 @@ def with_cuts(self, cuts):
newndata = len(cuts)
new_commondata_table = self.commondata_table.loc[cuts]
- return dataclasses.replace(
- self, ndata=newndata, commondata_table=new_commondata_table
- )
+ return dataclasses.replace(self, ndata=newndata, commondata_table=new_commondata_table)
@property
def central_values(self):
@@ -245,7 +325,6 @@ def additive_errors(self):
add_table.columns = add_systype["name"].to_numpy()
return add_table.loc[:, add_table.columns != "SKIP"]
-
def systematic_errors(self, central_values=None):
"""Returns all systematic errors as absolute uncertainties, with a
single column for each uncertainty. Converts
@@ -272,7 +351,5 @@ def systematic_errors(self, central_values=None):
"""
if central_values is None:
central_values = self.central_values.to_numpy()
- converted_mult_errors = (
- self.multiplicative_errors * central_values[:, np.newaxis] / 100
- )
+ converted_mult_errors = self.multiplicative_errors * central_values[:, np.newaxis] / 100
return pd.concat((self.additive_errors, converted_mult_errors), axis=1)
diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py
index 9c930c8f2a..c0b9bb29fe 100644
--- a/validphys2/src/validphys/filters.py
+++ b/validphys2/src/validphys/filters.py
@@ -164,7 +164,7 @@ def _filter_real_data(filter_path, data):
nfull, ncut = _write_ds_cut_data(path, dataset)
total_data_points += nfull
total_cut_data_points += ncut
- dataset.load().Export(str(path))
+ dataset.load_commondata().Export(str(path))
return total_data_points, total_cut_data_points
diff --git a/validphys2/src/validphys/fkparser.py b/validphys2/src/validphys/fkparser.py
index f7d2a53fb9..8213a994f0 100644
--- a/validphys2/src/validphys/fkparser.py
+++ b/validphys2/src/validphys/fkparser.py
@@ -52,25 +52,25 @@ class GridInfo:
@functools.lru_cache()
def load_fktable(spec):
"""Load the data corresponding to a FKSpec object. The cfactors
- will be applied to the grid."""
- with open_fkpath(spec.fkpath) as handle:
- tabledata = parse_fktable(handle)
+ will be applied to the grid.
+ If we have a new-type fktable, call directly `load()`, otherwise
+ fallback to the old parser
+ """
+ if spec.legacy:
+ with open_fkpath(spec.fkpath) as handle:
+ tabledata = parse_fktable(handle)
+ else:
+ tabledata = spec.load()
+
if not spec.cfactors:
return tabledata
- ndata = tabledata.ndata
- cfprod = np.ones(ndata)
+ cfprod = 1.0
for cf in spec.cfactors:
with open(cf, "rb") as f:
cfdata = parse_cfactor(f)
- if len(cfdata.central_value) != ndata:
- raise BadCFactorError(
- "Length of cfactor data does not match the length of the fktable."
- )
cfprod *= cfdata.central_value
- # TODO: Find a way to do this in place
- tabledata.sigma = tabledata.sigma.multiply(pd.Series(cfprod), axis=0, level=0)
- return tabledata
+ return tabledata.with_cfactor(cfprod)
def _get_compressed_buffer(path):
archive = tarfile.open(path)
diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py
index f944b95d18..6f287e2321 100644
--- a/validphys2/src/validphys/loader.py
+++ b/validphys2/src/validphys/loader.py
@@ -26,11 +26,12 @@
from reportengine import filefinder
from validphys.core import (CommonDataSpec, FitSpec, TheoryIDSpec, FKTableSpec,
- PositivitySetSpec, DataSetSpec, PDF, Cuts, DataGroupSpec,
- peek_commondata_metadata, CutsPolicy,
+ PositivitySetSpec, IntegrabilitySetSpec, DataSetSpec, PDF, Cuts,
+ DataGroupSpec, peek_commondata_metadata, CutsPolicy,
InternalCutsWrapper, HyperscanSpec)
from validphys.utils import tempfile_cleaner
from validphys import lhaindex
+from validphys import pineparser
DEFAULT_NNPDF_PROFILE_PATH = f"{sys.prefix}/share/NNPDF/nnprofile.yaml"
@@ -358,6 +359,32 @@ def check_fktable(self, theoryID, setname, cfac):
cfactors = self.check_cfactor(theoryID, setname, cfac)
return FKTableSpec(fkpath, cfactors)
+ def check_fkyaml(self, fkpath, theoryID, cfac):
+ """Load a pineappl fktable
+ Receives a yaml file describing the fktables necessary for a given observable
+ the theory ID and the corresponding cfactors
+ """
+ _, theopath = self.check_theoryID(theoryID)
+ metadata, fklist = pineparser.get_yaml_information(fkpath, theopath)
+ op = metadata["operation"]
+
+ # TODO:
+ # at the moment there are no pineappl specific c-factors
+ # so they need to be loaded from the NNPDF names / compounds files
+ cfac_name = metadata["target_dataset"]
+ # check whether there is a compound file
+ cpath = theopath / "compound" / f"FK_{cfac_name}-COMPOUND.dat"
+ if cpath.exists():
+ # Get the target filenames
+ tnames = [i[3:-4] for i in cpath.read_text().split() if i.endswith(".dat")]
+ cfactors = [self.check_cfactor(theoryID, i, cfac) for i in tnames]
+ else:
+ cfactors = [self.check_cfactor(theoryID, cfac_name, cfac)]
+ ###
+
+ fkspecs = [FKTableSpec(i, c, metadata) for i, c in zip(fklist, cfactors)]
+ return fkspecs, op
+
def check_compound(self, theoryID, setname, cfac):
thid, theopath = self.check_theoryID(theoryID)
compound_spec_path = theopath / 'compound' / ('FK_%s-COMPOUND.dat' % setname)
@@ -407,11 +434,19 @@ def check_cfactor(self, theoryID, setname, cfactors):
return tuple(cf)
def check_posset(self, theoryID, setname, postlambda):
+ """Load a positivity dataset"""
cd = self.check_commondata(setname, 'DEFAULT')
fk = self.check_fktable(theoryID, setname, [])
th = self.check_theoryID(theoryID)
return PositivitySetSpec(setname, cd, fk, postlambda, th)
+ def check_integset(self, theoryID, setname, postlambda):
+ """Load an integrability dataset"""
+ cd = self.check_commondata(setname, 'DEFAULT')
+ fk = self.check_fktable(theoryID, setname, [])
+ th = self.check_theoryID(theoryID)
+ return IntegrabilitySetSpec(setname, cd, fk, postlambda, th)
+
def get_posset(self, theoryID, setname, postlambda):
return self.check_posset(theoryID, setname, postlambda).load()
@@ -482,8 +517,12 @@ def check_dataset(self,
cuts=CutsPolicy.INTERNAL,
use_fitcommondata=False,
fit=None,
- weight=1):
-
+ weight=1,
+ ):
+ """Loads a given dataset
+ If the dataset contains new-type fktables, use the
+ pineappl loading function, otherwise fallback to legacy
+ """
if not isinstance(theoryid, TheoryIDSpec):
theoryid = self.check_theoryID(theoryid)
@@ -491,11 +530,17 @@ def check_dataset(self,
commondata = self.check_commondata(
name, sysnum, use_fitcommondata=use_fitcommondata, fit=fit)
- try:
- fkspec, op = self.check_compound(theoryno, name, cfac)
- except CompoundNotFound:
- fkspec = self.check_fktable(theoryno, name, cfac)
- op = None
+
+ if theoryid.is_pineappl():
+ # If it is a pineappl theory, use the pineappl reader
+ fkpath = (theoryid.yamldb_path / name).with_suffix(".yaml")
+ fkspec, op = self.check_fkyaml(fkpath, theoryno, cfac)
+ else:
+ try:
+ fkspec, op = self.check_compound(theoryno, name, cfac)
+ except CompoundNotFound:
+ fkspec = self.check_fktable(theoryno, name, cfac)
+ op = None
#Note this is simply for convenience when scripting. The config will
#construct the actual Cuts object by itself
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index f2ceda5573..c3709293c5 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -6,8 +6,8 @@
functions make calls to libnnpdf C++ library.
"""
+import functools
from collections import defaultdict
-from copy import deepcopy
import hashlib
import logging
@@ -18,12 +18,13 @@
from reportengine.table import table
from validphys.n3fit_data_utils import (
- common_data_reader_experiment,
- positivity_reader,
+ validphys_group_extractor,
)
+from validphys.core import IntegrabilitySetSpec, TupleComp
log = logging.getLogger(__name__)
+
def replica_trvlseed(replica, trvlseed, same_trvl_per_replica=False):
"""Generates the ``trvlseed`` for a ``replica``."""
# TODO: move to the new infrastructure
@@ -35,6 +36,7 @@ def replica_trvlseed(replica, trvlseed, same_trvl_per_replica=False):
res = np.random.randint(0, pow(2, 31))
return res
+
def replica_nnseed(replica, nnseed):
"""Generates the ``nnseed`` for a ``replica``."""
np.random.seed(seed=nnseed)
@@ -42,6 +44,7 @@ def replica_nnseed(replica, nnseed):
res = np.random.randint(0, pow(2, 31))
return res
+
def replica_mcseed(replica, mcseed, genrep):
"""Generates the ``mcseed`` for a ``replica``."""
if not genrep:
@@ -52,6 +55,23 @@ def replica_mcseed(replica, mcseed, genrep):
return res
+class _TrMasks(TupleComp):
+ """Class holding the training validation mask for a group of datasets
+ If the same group of dataset receives the same trvlseed then the mask
+ will be the same.
+ This class holds said information so it can be reused easily, i.e.,
+ ``group_name`` and ``seed`` define the ``masks``.
+ """
+
+ def __init__(self, group_name, seed, masks=None):
+ self.masks = masks
+ super().__init__(group_name, seed)
+
+ def __iter__(self):
+ for m in self.masks:
+ yield m
+
+
def tr_masks(data, replica_trvlseed):
"""Generate the boolean masks used to split data into training and
validation points. Returns a list of 1-D boolean arrays, one for each
@@ -61,7 +81,7 @@ def tr_masks(data, replica_trvlseed):
tr_data = data[tr_mask]
"""
- nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10 ** 8
+ nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10**8
nameseed += replica_trvlseed
# TODO: update this to new random infrastructure.
np.random.seed(nameseed)
@@ -78,7 +98,8 @@ def tr_masks(data, replica_trvlseed):
)
np.random.shuffle(mask)
trmask_partial.append(mask)
- return trmask_partial
+ return _TrMasks(str(data), replica_trvlseed, trmask_partial)
+
def kfold_masks(kpartitions, data):
"""Collect the masks (if any) due to kfolding for this data.
@@ -147,50 +168,24 @@ def kfold_masks(kpartitions, data):
return list_folds
-def _mask_fk_tables(dataset_dicts, tr_masks):
- """
- Internal function which masks the fktables for a group of datasets.
-
- Parameters
- ----------
- dataset_dicts: list[dict]
- list of datasets dictionaries returned by
- :py:func:`validphys.n3fit_data_utils.common_data_reader_experiment`.
- tr_masks: list[np.array]
- a tuple containing the lists of training masks for each dataset.
-
- Return
- ------
- data_trmask: np.array
- boolean array resulting from concatenating the training masks of
- each dataset.
-
- Note: the returned masks are only used in order to mask the covmat
+@functools.lru_cache
+def fittable_datasets_masked(data, tr_masks):
+ """Generate a list of :py:class:`validphys.n3fit_data_utils.FittableDataSet`
+ from a group of dataset and the corresponding training/validation masks
"""
- trmask_partial = tr_masks
- for dataset_dict, tr_mask in zip(dataset_dicts, trmask_partial):
- # Generate the training and validation fktables
- tr_fks = []
- vl_fks = []
- ex_fks = []
- vl_mask = ~tr_mask
- for fktable_dict in dataset_dict["fktables"]:
- tr_fks.append(fktable_dict["fktable"][tr_mask])
- vl_fks.append(fktable_dict["fktable"][vl_mask])
- ex_fks.append(fktable_dict.get("fktable"))
- dataset_dict["tr_fktables"] = tr_fks
- dataset_dict["vl_fktables"] = vl_fks
- dataset_dict["ex_fktables"] = ex_fks
-
- return np.concatenate(trmask_partial)
+ # This is separated from fitting_data_dict so that we can cache the result
+ # when the trvlseed is the same for all replicas (great for parallel replicas)
+ return validphys_group_extractor(data.datasets, tr_masks.masks)
def fitting_data_dict(
data,
make_replica,
+ dataset_inputs_loaded_cd_with_cuts,
dataset_inputs_t0_covmat_from_systematics,
tr_masks,
kfold_masks,
+ fittable_datasets_masked,
diagonal_basis=None,
):
"""
@@ -235,17 +230,13 @@ def fitting_data_dict(
"""
# TODO: Plug in the python data loading when available. Including but not
# limited to: central values, ndata, replica generation, covmat construction
- spec_c = data.load()
- ndata = spec_c.GetNData()
- expdata_true = spec_c.get_cv().reshape(1, ndata)
+ expdata_true = np.concatenate([d.central_values for d in dataset_inputs_loaded_cd_with_cuts])
expdata = make_replica
-
- datasets = common_data_reader_experiment(spec_c, data)
-
- # t0 covmat
- covmat = dataset_inputs_t0_covmat_from_systematics
+ tr_masks = tr_masks.masks
+ covmat = dataset_inputs_t0_covmat_from_systematics # t0 covmat
inv_true = np.linalg.inv(covmat)
+ fittable_datasets = fittable_datasets_masked
if diagonal_basis:
log.info("working in diagonal basis.")
@@ -256,21 +247,17 @@ def fitting_data_dict(
dt_trans_tr = None
dt_trans_vl = None
-
- # Copy dataset dict because we mutate it.
- datasets_copy = deepcopy(datasets)
-
- tr_mask = _mask_fk_tables(datasets_copy, tr_masks)
+ tr_mask = np.concatenate(tr_masks)
vl_mask = ~tr_mask
if diagonal_basis:
expdata = np.matmul(dt_trans, expdata)
# make a 1d array of the diagonal
covmat_tr = eig[tr_mask]
- invcovmat_tr = 1./covmat_tr
+ invcovmat_tr = 1.0 / covmat_tr
covmat_vl = eig[vl_mask]
- invcovmat_vl = 1./covmat_vl
+ invcovmat_vl = 1.0 / covmat_vl
# prepare a masking rotation
dt_trans_tr = dt_trans[tr_mask]
@@ -285,7 +272,6 @@ def fitting_data_dict(
ndata_tr = np.count_nonzero(tr_mask)
expdata_tr = expdata[tr_mask].reshape(1, ndata_tr)
-
ndata_vl = np.count_nonzero(vl_mask)
expdata_vl = expdata[vl_mask].reshape(1, ndata_vl)
@@ -298,10 +284,14 @@ def fitting_data_dict(
folds["validation"].append(fold[vl_mask])
folds["experimental"].append(~fold)
+ # This dictionary contains a list of fittable datasets
+ # which contains the instructions on how to generaet each observable for the fit
+ # plus the information that glue all of them together (covmat, ndata, etc)
+ # TODO: for consistency with the rest of validphys a FittableGroup should be created
dict_out = {
- "datasets": datasets_copy,
+ "datasets": fittable_datasets,
"name": str(data),
- "expdata_true": expdata_true,
+ "expdata_true": expdata_true.reshape(1, -1),
"invcovmat_true": inv_true,
"covmat": covmat,
"trmask": tr_mask,
@@ -314,14 +304,16 @@ def fitting_data_dict(
"expdata_vl": expdata_vl,
"positivity": False,
"count_chi2": True,
- "folds" : folds,
+ "folds": folds,
"data_transformation_tr": dt_trans_tr,
"data_transformation_vl": dt_trans_vl,
}
return dict_out
+
exps_fitting_data_dict = collect("fitting_data_dict", ("group_dataset_inputs_by_experiment",))
+
def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nnseed):
"""For a single replica return a tuple of the inputs to this function.
Used with `collect` over replicas to avoid having to perform multiple
@@ -335,8 +327,11 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn
"""
return (replica, exps_fitting_data_dict, replica_nnseed)
+
replicas_nnseed_fitting_data_dict = collect("replica_nnseed_fitting_data_dict", ("replicas",))
-groups_replicas_indexed_make_replica = collect('indexed_make_replica' , ("group_dataset_inputs_by_experiment","replicas"))
+groups_replicas_indexed_make_replica = collect(
+ "indexed_make_replica", ("group_dataset_inputs_by_experiment", "replicas")
+)
@table
@@ -390,10 +385,10 @@ def validation_pseudodata(pseudodata_table, training_mask):
@table
def replica_training_mask_table(replica_training_mask):
- """Same as ``replica_training_mask`` but with a table decorator.
- """
+ """Same as ``replica_training_mask`` but with a table decorator."""
return replica_training_mask
+
def replica_training_mask(exps_tr_masks, replica, experiments_index):
"""Save the boolean mask used to split data into training and validation
for a given replica as a pandas DataFrame, indexed by
@@ -440,25 +435,19 @@ def replica_training_mask(exps_tr_masks, replica, experiments_index):
[345 rows x 1 columns]
"""
- all_masks = np.concatenate([
- ds_mask
- for exp_masks in exps_tr_masks
- for ds_mask in exp_masks
- ])
- return pd.DataFrame(
- all_masks,
- columns=[f"replica {replica}"],
- index=experiments_index
- )
+ all_masks = np.concatenate([ds_mask for exp_masks in exps_tr_masks for ds_mask in exp_masks])
+ return pd.DataFrame(all_masks, columns=[f"replica {replica}"], index=experiments_index)
+
replicas_training_mask = collect("replica_training_mask", ("replicas",))
+
@table
def training_mask_table(training_mask):
- """Same as ``training_mask`` but with a table decorator
- """
+ """Same as ``training_mask`` but with a table decorator"""
return training_mask
+
def training_mask(replicas_training_mask):
"""Save the boolean mask used to split data into training and validation
for each replica as a pandas DataFrame, indexed by
@@ -502,14 +491,14 @@ def training_mask(replicas_training_mask):
return pd.concat(replicas_training_mask, axis=1)
-def fitting_pos_dict(posdataset):
- """Loads a positivity dataset. For more information see
- :py:func:`validphys.n3fit_data_utils.positivity_reader`.
+def _fitting_lagrange_dict(lambdadataset):
+ """Loads a generic lambda dataset, often used for positivity and integrability datasets
+ For more information see :py:func:`validphys.n3fit_data_utils.positivity_reader`.
Parameters
----------
- posdataset: validphys.core.PositivitySetSpec
- Positivity set which is to be loaded.
+ lambdadataset: validphys.core.LagrangeSetSpec
+ Positivity (or integrability) set which is to be loaded.
Examples
--------
@@ -518,23 +507,50 @@ def fitting_pos_dict(posdataset):
>>> pos = API.fitting_pos_dict(posdataset=posdataset, theoryid=162)
>>> len(pos)
9
-
"""
- log.info("Loading positivity dataset %s", posdataset)
- return positivity_reader(posdataset)
+ integrability = isinstance(lambdadataset, IntegrabilitySetSpec)
+ mode = "integrability" if integrability else "positivity"
+ log.info("Loading %s dataset %s", mode, lambdadataset)
+ positivity_datasets = validphys_group_extractor([lambdadataset], [])
+ ndata = positivity_datasets[0].ndata
+ return {
+ "datasets": positivity_datasets,
+ "trmask": np.ones(ndata, dtype=np.bool),
+ "name": lambdadataset.name,
+ "expdata": np.zeros((1, ndata)),
+ "ndata": ndata,
+ "positivity": True,
+ "lambda": lambdadataset.maxlambda,
+ "count_chi2": False,
+ "integrability": integrability,
+ }
-posdatasets_fitting_pos_dict = collect("fitting_pos_dict", ("posdatasets",))
+def posdatasets_fitting_pos_dict(posdatasets=None):
+ """Loads all positivity datasets. It is not allowed to be empty.
-#can't use collect here because integdatasets might not exist.
+ Parameters
+ ----------
+ integdatasets: list[validphys.core.PositivitySetSpec]
+ list containing the settings for the positivity sets. Examples of
+ these can be found in the runcards located in n3fit/runcards. They have
+ a format similar to ``dataset_input``.
+ """
+ if posdatasets is not None:
+ return [_fitting_lagrange_dict(i) for i in posdatasets]
+ log.warning("Not using any positivity datasets.")
+ return None
+
+
+# can't use collect here because integdatasets might not exist.
def integdatasets_fitting_integ_dict(integdatasets=None):
- """Loads a integrability dataset. Calls same function as
+ """Loads the integrability datasets. Calls same function as
:py:func:`fitting_pos_dict`, except on each element of
``integdatasets`` if ``integdatasets`` is not None.
Parameters
----------
- integdatasets: list[validphys.core.PositivitySetSpec]
+ integdatasets: list[validphys.core.IntegrabilitySetSpec]
list containing the settings for the integrability sets. Examples of
these can be found in the runcards located in n3fit/runcards. They have
a format similar to ``dataset_input``.
@@ -552,12 +568,6 @@ def integdatasets_fitting_integ_dict(integdatasets=None):
"""
if integdatasets is not None:
- integ_info = []
- for integ_set in integdatasets:
- log.info("Loading integrability dataset %s", integ_set)
- # Use the same reader as positivity observables
- integ_dict = positivity_reader(integ_set)
- integ_info.append(integ_dict)
- return integ_info
+ return [_fitting_lagrange_dict(i) for i in integdatasets]
log.warning("Not using any integrability datasets.")
return None
diff --git a/validphys2/src/validphys/n3fit_data_utils.py b/validphys2/src/validphys/n3fit_data_utils.py
index 80fe469756..3b54e29bd1 100644
--- a/validphys2/src/validphys/n3fit_data_utils.py
+++ b/validphys2/src/validphys/n3fit_data_utils.py
@@ -1,168 +1,107 @@
"""
n3fit_data_utils.py
-Library of helper functions to n3fit_data.py for reading libnnpdf objects.
+This module reads validphys :py:class:`validphys.core.DataSetSpec`
+and extracts the relevant information into :py:class:`validphys.n3fit_data_utils.FittableDataSet`
+
+The ``validphys_group_extractor`` will loop over every dataset of a given group
+loading their fktables (and applying any necessary cuts).
"""
+from itertools import zip_longest
+import dataclasses
import numpy as np
-def fk_parser(fk, is_hadronic=False):
- """
- # Arguments:
- - `fk`: fktable object
-
- # Return:
- - `dict_out`: dictionary with all information about the fktable
- - 'xgrid'
- - 'nx'
- - 'ndata'
- - 'basis'
- - 'fktable'
- """
-
- # Dimensional information
- nonzero = fk.GetNonZero()
- ndata = fk.GetNData()
- nx = fk.GetTx()
-
- # Basis of active flavours
- nbasis = nonzero
- basis = fk.get_flmap()
-
- # Read the xgrid and the fktable to numpy arrays
- xgrid_flat = fk.get_xgrid()
- fktable_flat = fk.get_sigma()
-
- # target shape
- if is_hadronic:
- nxsqrt = int(np.sqrt(nx))
- shape_out = (ndata, nbasis, nxsqrt, nxsqrt)
- xgrid = xgrid_flat.reshape(1, nxsqrt)
- else:
- shape_out = (ndata, nbasis, nx)
- xgrid = xgrid_flat.reshape(1, nx)
-
- # remove padding from the fktable (if any)
- l = len(fktable_flat)
- # get the padding
- pad_position = fk.GetDSz()
- pad_size = fk.GetDSz() - nx * nonzero
- # remove the padding
- if pad_size > 0:
- mask = np.ones(l, dtype=bool)
- for i in range(1, int(l / pad_position) + 1):
- marker = pad_position * i
- mask[marker - pad_size : marker] = False
- fktable_array = fktable_flat[mask]
- else:
- fktable_array = fktable_flat
- # reshape
- fktable = fktable_array.reshape(shape_out)
-
- dict_out = {
- "ndata": ndata,
- "nbasis": nbasis,
- "nonzero": nbasis,
- "basis": basis,
- "nx": nx,
- "xgrid": xgrid,
- "fktable": fktable,
- }
- return dict_out
-
-
-def common_data_reader_dataset(dataset_c, dataset_spec):
+@dataclasses.dataclass
+class FittableDataSet:
"""
- Import fktable, common data and experimental data for the given data_name
-
- # Arguments:
- - `dataset_c`: c representation of the dataset object
- - `dataset_spec`: python representation of the dataset object
-
- #Returns:
- - `[dataset_dict]`: a (len 1 list of) dictionary with:
- - dataset: full dataset object from which all data has been extracted
- - hadronic: is hadronic?
- - operation: operation to perform on the fktables below
- - fktables : a list of dictionaries with the following items
- - ndata: number of data points
- - nonzero/nbasis: number of PDF entries which are non zero
- - basis: array containing the information of the non-zero PDF entries
- - nx: number of points in the xgrid (1-D), i.e., for hadronic the total number is nx * nx
- - xgrid: grid of x points
- - fktable: 3/4-D array of shape (ndata, nonzero, nx, (nx))
-
- instead of the dictionary object that model_gen needs
+ Python version of the libNNPDF dataset
+ to be merged with the product of the new CommonData dataset
+
+ Parameters
+ ----------
+ name: str
+ name of the dataset
+ fktables_data: list(:py:class:`validphys.coredata.FKTableData`)
+ list of coredata fktable objects
+
+ operation: str
+ operation to be applied to the fktables in the dataset, default "NULL"
+ frac: float
+ fraction of the data to enter the training set
+ training_mask: bool
+ training mask to apply to the fktable
"""
- how_many = dataset_c.GetNSigma()
- dict_fktables = []
- for i in range(how_many):
- fktable = dataset_c.GetFK(i)
- dict_fktables.append(fk_parser(fktable, dataset_c.IsHadronic()))
-
- dataset_dict = {
- "fktables": dict_fktables,
- "hadronic": dataset_c.IsHadronic(),
- "operation": dataset_spec.op,
- "name": dataset_c.GetSetName(),
- "frac": dataset_spec.frac,
- "ndata": dataset_c.GetNData(),
- }
-
- return [dataset_dict]
-
-
-def common_data_reader_experiment(experiment_c, experiment_spec):
- """
- Wrapper around the experiments. Loop over all datasets in an experiment,
- calls common_data_reader on them and return a list with the content.
-
- # Arguments:
- - `experiment_c`: c representation of the experiment object
- - `experiment_spec`: python representation of the experiment object
-
- # Returns:
- - `[parsed_datasets]`: a list of dictionaries output from `common_data_reader_dataset`
- """
- parsed_datasets = []
- for dataset_c, dataset_spec in zip(experiment_c.DataSets(), experiment_spec.datasets):
- parsed_datasets += common_data_reader_dataset(dataset_c, dataset_spec)
- return parsed_datasets
-
-def positivity_reader(pos_spec):
+ # NOTE:
+ # This class tries to be compatible with the libNNPDF dataset class
+ # after commondata is also in python, FittableDataSet can inherit from the vp dataset
+ # which knows how to generate its "fittable" version.
+
+ name: str
+ fktables_data: list # of validphys.coredata.FKTableData objects
+
+ # Things that can have default values:
+ operation: str = "NULL"
+ frac: float = 1.0
+ training_mask: np.ndarray = None # boolean array
+
+ def __post_init__(self):
+ self._tr_mask = None
+ self._vl_mask = None
+ if self.training_mask is not None:
+ data_idx = self.fktables_data[0].sigma.index.get_level_values(0).unique()
+ self._tr_mask = data_idx[self.training_mask].values
+ self._vl_mask = data_idx[~self.training_mask].values
+
+ @property
+ def ndata(self):
+ """Number of datapoints in the dataset"""
+ return self.fktables_data[0].ndata
+
+ @property
+ def hadronic(self):
+ """Returns true if this is a hadronic collision dataset"""
+ return self.fktables_data[0].hadronic
+
+ def fktables(self):
+ """Return the list of fktable tensors for the dataset"""
+ return [fk.get_np_fktable() for fk in self.fktables_data]
+
+ def training_fktables(self):
+ """Return the fktable tensors for the trainig data"""
+ if self._tr_mask is not None:
+ return [fk.with_cuts(self._tr_mask).get_np_fktable() for fk in self.fktables_data]
+ return self.fktables()
+
+ def validation_fktables(self):
+ """Return the fktable tensors for the validation data"""
+ if self._vl_mask is not None:
+ return [fk.with_cuts(self._vl_mask).get_np_fktable() for fk in self.fktables_data]
+ return self.fktables()
+
+
+def validphys_group_extractor(datasets, tr_masks):
"""
- Specific reader for positivity sets
+ Receives a grouping spec from validphys (most likely an experiment)
+ and loops over its content extracting and parsing all information required for the fit
+
+ Parameters
+ ----------
+ datasets: list(:py:class:`validphys.core.DataSetSpec`)
+ List of dataset specs in this group
+ tr_masks: list(np.array)
+ List of training masks to be set for each dataset
+
+ Returns
+ -------
+ loaded_obs: list (:py:class:`validphys.n3fit_data_utils.FittableDataSet`)
"""
- pos_c = pos_spec.load()
- ndata = pos_c.GetNData()
-
- parsed_set = [fk_parser(pos_c, pos_c.IsHadronic())]
-
- pos_sets = [
- {
- "fktables": parsed_set,
- "hadronic": pos_c.IsHadronic(),
- "operation": "NULL",
- "name": pos_spec.name,
- "frac": 1.0,
- "ndata": ndata,
- "tr_fktables": [i["fktable"] for i in parsed_set],
- }
- ]
-
- positivity_factor = pos_spec.maxlambda
-
- dict_out = {
- "datasets": pos_sets,
- "trmask": np.ones(ndata, dtype=np.bool),
- "name": pos_spec.name,
- "expdata": np.zeros((1, ndata)),
- "ndata": ndata,
- "positivity": True,
- "lambda": positivity_factor,
- "count_chi2": False,
- "integrability": "INTEG" in pos_spec.name,
- }
-
- return dict_out
+ loaded_obs = []
+ # Use zip_longest since tr_mask can be (and it is fine) an empty list
+ for dspec, mask in zip_longest(datasets, tr_masks):
+ # Load all fktables with the appropiate cuts
+ fktables = [fk.load_with_cuts(dspec.cuts) for fk in dspec.fkspecs]
+ # And now put them in a FittableDataSet object which
+ loaded_obs.append(FittableDataSet(dspec.name, fktables, dspec.op, dspec.frac, mask))
+ return loaded_obs
diff --git a/validphys2/src/validphys/pineparser.py b/validphys2/src/validphys/pineparser.py
new file mode 100644
index 0000000000..1df490c8ed
--- /dev/null
+++ b/validphys2/src/validphys/pineparser.py
@@ -0,0 +1,316 @@
+"""
+ Loader for the pineappl-based FKTables
+
+ The FKTables for pineappl have ``pineappl.lz4`` and can be utilized
+ directly with the ``pineappl`` cli as well as read with ``pineappl.fk_table``
+"""
+import numpy as np
+import pandas as pd
+
+from reportengine.compat import yaml
+
+from validphys.coredata import FKTableData
+
+########### This part might eventually be part of whatever commondata reader
+EXT = "pineappl.lz4"
+
+
+class YamlFileNotFound(FileNotFoundError):
+ """ymldb file for dataset not found."""
+
+
+class GridFileNotFound(FileNotFoundError):
+ """PineAPPL file for FK table not found."""
+
+
+def _load_yaml(yaml_file):
+ """Load a dataset.yaml file.
+
+ Parameters
+ ----------
+ yaml_file : Path
+ path of the yaml file for the given dataset
+
+ Returns
+ -------
+ dict :
+ noramlized parsed file content
+ """
+ if not yaml_file.exists():
+ raise YamlFileNotFound(yaml_file)
+ ret = yaml.safe_load(yaml_file.read_text())
+ # Make sure the operations are upper-cased for compound-compatibility
+ ret["operation"] = "NULL" if ret["operation"] is None else ret["operation"].upper()
+ return ret
+
+
+def pineko_yaml(yaml_file, grids_folder, check_grid_existence=True):
+ """Given a yaml_file, returns the corresponding dictionary and grids.
+
+ The dictionary contains all information and we return an extra field
+ with all the grids to be loaded for the given dataset.
+
+ Parameters
+ ----------
+ yaml_file : pathlib.Path
+ path of the yaml file for the given dataset
+ grids_folder : pathlib.Path
+ path of the grids folder
+ check_grid_existence: bool
+ if True (default) checks whether the grid exists
+
+ Returns
+ -------
+ yaml_content: dict
+ Metadata prepared for the FKTables
+ paths: list(list(path))
+ List (of lists) with all the grids that will need to be loaded
+ """
+ yaml_content = _load_yaml(yaml_file)
+
+ # Turn the operands and the members into paths (and check all of them exist)
+ ret = []
+ for operand in yaml_content["operands"]:
+ tmp = []
+ for member in operand:
+ p = grids_folder / f"{member}.{EXT}"
+ if not p.exists() and check_grid_existence:
+ raise GridFileNotFound(f"Failed to find {p}")
+ tmp.append(p)
+ ret.append(tmp)
+
+ return yaml_content, ret
+
+
+def pineko_apfelcomb_compatibility_flags(gridpaths, metadata):
+ """
+ Prepare the apfelcomb-pineappl compatibility fixes by matching the apfelcomb content
+ of the metadata to the grids that are being loaded.
+
+ These fixes can be of only three types:
+
+ - normalization:
+ normalization per subgrid
+
+ normalization:
+ grid_name: factor
+
+ - repetition_flag:
+ when a grid was actually the same point repeated X times
+ NNPDF cfactors and cuts are waiting for this repetition and so we need to keep track of it
+
+ repetition_flag:
+ grid_name
+
+ - shifts:
+ only for ATLASZPT8TEVMDIST
+ the points in this dataset are not contiguous so the index is shifted
+
+ shifts:
+ grid_name: shift_int
+
+ Returns
+ -------
+ apfelcomb_norm: np.array
+ Per-point normalization factor to be applied to the grid
+ to be compatible with the data
+ apfelcomb_repetition_flag: bool
+ Whether the fktable is a single point which gets repeated up to a certain size
+ (for instance to normalize a distribution)
+ shift: list(int)
+ Shift in the data index for each grid that forms the fktable
+ """
+ if metadata.get("apfelcomb") is None:
+ return None
+
+ # Can't pathlib understand double suffixes?
+ operands = [i.name.replace(f".{EXT}", "") for i in gridpaths]
+ ret = {}
+
+ # Check whether we have a normalization active and whether it affects any of the grids
+ if metadata["apfelcomb"].get("normalization") is not None:
+ norm_info = metadata["apfelcomb"]["normalization"]
+ # Now fill the operands that need normalization
+ ret["normalization"] = [norm_info.get(op, 1.0) for op in operands]
+
+ # Check whether the repetition flag is active
+ if metadata["apfelcomb"].get("repetition_flag") is not None:
+ if len(operands) == 1:
+ ret["repetition_flag"] = operands[0] in metadata["apfelcomb"]["repetition_flag"]
+ else:
+ # Just for the sake of it, let's check whether we did something stupid
+ if any(op in metadata["apfelcomb"]["repetition_flag"] for op in operands):
+ raise ValueError(f"The yaml info for {metadata['target_dataset']} is broken")
+
+ # Check whether the dataset has shifts
+ # NOTE: this only happens for ATLASZPT8TEVMDIST, if that gets fixed we might as well remove it
+ if metadata["apfelcomb"].get("shifts") is not None:
+ shift_info = metadata["apfelcomb"]["shifts"]
+ ret["shifts"] = [shift_info.get(op, 0) for op in operands]
+
+ return ret
+
+
+def _pinelumi_to_columns(pine_luminosity, hadronic):
+ """Makes the pineappl luminosity into the column indices of a dataframe
+ These corresponds to the indices of a flattened (14x14) matrix for hadronic observables
+ and the non-zero indices of the 14-flavours for DIS
+
+ Parameters
+ ----------
+ pine_luminosity: list(tuple)
+ list with a pair of flavours per channel
+ hadronic: bool
+ flag for hadronic / DIS observables
+
+ Returns
+ -------
+ list(int): list of labels for the columns
+ """
+ evol_basis_pids = tuple(
+ [22, 100, 21, 200]
+ + [200 + n**2 - 1 for n in range(2, 6 + 1)]
+ + [100 + n**2 - 1 for n in range(2, 6 + 1)]
+ )
+ flav_size = len(evol_basis_pids)
+ columns = []
+ if hadronic:
+ for i, j in pine_luminosity:
+ idx = evol_basis_pids.index(i)
+ jdx = evol_basis_pids.index(j)
+ columns.append(flav_size * idx + jdx)
+ else:
+ # The proton might come from both sides
+ try:
+ columns = [evol_basis_pids.index(i) for _, i in pine_luminosity]
+ except ValueError:
+ columns = [evol_basis_pids.index(i) for i, _ in pine_luminosity]
+ return columns
+
+
+def get_yaml_information(yaml_file, theorypath):
+ """Reads the yaml information from a yaml compound file
+
+ Transitional function: the call to "pineko" might be to some other commondata reader
+ that will know how to extract the information from the commondata
+ """
+ # The pineappl grids are just stored where the fktables would be, "fastkernel"
+ grids_folder = theorypath / "fastkernel"
+ return pineko_yaml(yaml_file, grids_folder)
+
+
+def pineappl_reader(fkspec):
+ """
+ Receives a fkspec, which contains the path to the grids that are to be read by pineappl
+ as well as metadata that fixes things like conversion factors or apfelcomb flag.
+
+ Note that both conversion factors and apfelcomb flags will be eventually removed.
+
+ For more information on the reading of pienappl tables:
+ https://pineappl.readthedocs.io/en/latest/modules/pineappl/pineappl.html#pineappl.pineappl.PyFkTable
+
+ About the reader:
+ Each pineappl table is a 4-dimensional grid with:
+ (ndata, active channels, x1, x2)
+ for DIS grids x2 will contain one single number.
+ The luminosity channels are given in a (flav1, flav2) format and thus need to be converted
+ to the 1-D index of a (14x14) luminosity tensor in order to put in the form of a dataframe.
+
+ All grids in pineappl are constructed with the exact same xgrid,
+ the active channels can vary and so when grids are concatenated for an observable
+ the gaps are filled with 0s.
+
+ The pineappl grids are such that obs = sum_{bins} fk * f (*f) * bin_w
+ so in order to use them together with old-style grids (obs = sum_{bins} fk * xf (*xf))
+ it is necessary to remove the factor of x and the normalization of the bins.
+
+ About apfelcomb flags:
+ old commondata files and old grids have evolved together
+ and fixes and hacks have been incorporated in one or another
+ for the new theory to be compatible with old commpondata it is necessary
+ to keep track of said hacks (and to apply conversion factors when required)
+
+ Returns
+ -------
+ validphys.coredata.FKTableData
+ an FKTableData object containing all necessary information to compute predictions
+ """
+ from pineappl.fk_table import FkTable
+
+ pines = [FkTable.read(i) for i in fkspec.fkpath]
+
+ # Extract metadata from the first grid
+ pine_rep = pines[0]
+
+ # Is it hadronic? (at the moment only hadronic and DIS are considered)
+ hadronic = pine_rep.key_values()["initial_state_1"] == pine_rep.key_values()["initial_state_2"]
+ # Sanity check (in case at some point we start fitting things that are not protons)
+ if hadronic and pine_rep.key_values()["initial_state_1"] != "2212":
+ raise ValueError(
+ "pineappl_reader is not prepared to read a hadronic fktable with no protons!"
+ )
+ Q0 = np.sqrt(pine_rep.muf2())
+ xgrid = pine_rep.x_grid()
+ xi = np.arange(len(xgrid))
+ protected = False
+
+ apfelcomb = pineko_apfelcomb_compatibility_flags(fkspec.fkpath, fkspec.metadata)
+
+ # fktables in pineapplgrid are for obs = fk * f while previous fktables were obs = fk * xf
+ # prepare the grid all tables will be divided by
+ if hadronic:
+ xdivision = np.prod(np.meshgrid(xgrid, xgrid), axis=0)
+ else:
+ xdivision = xgrid[:, np.newaxis]
+
+ partial_fktables = []
+ ndata = 0
+ for i, p in enumerate(pines):
+
+ # Remove the bin normalization
+ raw_fktable = (p.table().T / p.bin_normalizations()).T
+ n = raw_fktable.shape[0]
+
+ # Apply the apfelcomb fixes _if_ they are needed
+ if apfelcomb is not None:
+ if apfelcomb.get("normalization") is not None:
+ raw_fktable = raw_fktable * apfelcomb["normalization"][i]
+ if apfelcomb.get("repetition_flag", False):
+ raw_fktable = raw_fktable[0:1]
+ n = 1
+ protected = True
+ if apfelcomb.get("shifts") is not None:
+ ndata += apfelcomb["shifts"][i]
+
+ # Check conversion factors and remove the x* from the fktable
+ raw_fktable *= fkspec.metadata.get("conversion_factor", 1.0) / xdivision
+
+ # Create the multi-index for the dataframe
+ # for optimized pineappls different grids can potentially have different indices
+ # so they need to be indexed separately and then concatenated only at the end
+ lumi_columns = _pinelumi_to_columns(p.lumi(), hadronic)
+ lf = len(lumi_columns)
+ data_idx = np.arange(ndata, ndata + n)
+ if hadronic:
+ idx = pd.MultiIndex.from_product([data_idx, xi, xi], names=["data", "x1", "x2"])
+ else:
+ idx = pd.MultiIndex.from_product([data_idx, xi], names=["data", "x"])
+
+ # Now concatenate (data, x1, x2) and move the flavours to the columns
+ df_fktable = raw_fktable.swapaxes(0, 1).reshape(lf, -1).T
+ partial_fktables.append(pd.DataFrame(df_fktable, columns=lumi_columns, index=idx))
+
+ ndata += n
+
+ # Finallly concatenate all fktables, sort by flavours and fill any holes
+ sigma = pd.concat(partial_fktables, sort=True, copy=False).fillna(0.0)
+
+ return FKTableData(
+ sigma=sigma,
+ ndata=ndata,
+ Q0=Q0,
+ metadata=fkspec.metadata,
+ hadronic=hadronic,
+ xgrid=xgrid,
+ protected=protected,
+ )
diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
index e46ce9bf2e..98334103e2 100644
--- a/validphys2/src/validphys/results.py
+++ b/validphys2/src/validphys/results.py
@@ -77,8 +77,14 @@ def __len__(self):
class DataResult(StatsResult):
"""Holds the relevant information from a given dataset"""
- def __init__(self, dataobj, covmat, sqrtcovmat):
- self._central_value = dataobj.get_cv()
+ def __init__(self, dataset, covmat, sqrtcovmat):
+ # The commondata is currently a libNNPDF object
+ loaded_cd = dataset.load_commondata()
+ if isinstance(loaded_cd, list):
+ cv = np.concatenate([cd.get_cv() for cd in loaded_cd])
+ else:
+ cv = loaded_cd.get_cv()
+ self._central_value = cv
stats = Stats(self._central_value)
self._covmat = covmat
self._sqrtcovmat = sqrtcovmat
@@ -462,9 +468,8 @@ def results(dataset: (DataSetSpec), pdf: PDF, covariance_matrix, sqrt_covmat):
The theory is specified as part of the dataset.
A group of datasets is also allowed.
(as a result of the C++ code layout)."""
- data = dataset.load()
return (
- DataResult(data, covariance_matrix, sqrt_covmat),
+ DataResult(dataset, covariance_matrix, sqrt_covmat),
ThPredictionsResult.from_convolution(pdf, dataset),
)
@@ -493,7 +498,7 @@ def pdf_results(
th_results = [ThPredictionsResult.from_convolution(pdf, dataset) for pdf in pdfs]
- return (DataResult(dataset.load(), covariance_matrix, sqrt_covmat), *th_results)
+ return (DataResult(dataset, covariance_matrix, sqrt_covmat), *th_results)
@require_one("pdfs", "pdf")
diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py
index f9b88596ee..1a90302cac 100644
--- a/validphys2/src/validphys/theorycovariance/tests.py
+++ b/validphys2/src/validphys/theorycovariance/tests.py
@@ -219,7 +219,7 @@ def all_matched_data_lengths(all_matched_datasets):
"""Returns a list of the data sets lengths."""
lens = []
for rlist in all_matched_datasets:
- lens.append(rlist[0].load().GetNData())
+ lens.append(rlist[0].load_commondata().GetNData())
return lens
|