diff --git a/FKTable Playground.ipynb b/FKTable Playground.ipynb
new file mode 100644
index 0000000000..29acfc8932
--- /dev/null
+++ b/FKTable Playground.ipynb	
@@ -0,0 +1,1140 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "cceb1080",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "from validphys.loader import _get_nnpdf_profile\n",
+    "from validphys.api import API\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from validphys.convolution import central_predictions\n",
+    "\n",
+    "profile = _get_nnpdf_profile()\n",
+    "#yaml_db = Path(profile[\"data_path\"]) / \"yamldb\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6b1eb8f3",
+   "metadata": {},
+   "source": [
+    "The `yaml_db` folder is a temporary thing as it contains files that look like:\n",
+    "\n",
+    "```yaml\n",
+    "conversion_factor: 1.0\n",
+    "operands:\n",
+    "- - NMC_NC_EM_D_F2\n",
+    "- - NMC_NC_EM_P_F2\n",
+    "operation: RATIO\n",
+    "target_dataset: NMCPD\n",
+    "```\n",
+    "\n",
+    "This information will eventually be part of the new commondata format of course.\n",
+    "\n",
+    "The `operation` is applied to the first level of the list while the second level is just concatenated. This is necessary since `pineappl` fktables might contain one layer of concatenation which is already done for the \"classic\" fktables.\n",
+    "\n",
+    "The `pineappl` fktables will live inside the appropiate `theory_xxx` folder `/pineappls`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "051581e1",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "NMCPD_dw_ite\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: NMCPD_dw_ite\n",
+      "nData: 260 nSys: 105\n",
+      "-- COMMONDATA Files for NMCPD_dw_ite successfully read.\n",
+      "\n",
+      "NMC\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: NMC\n",
+      "nData: 292 nSys: 16\n",
+      "-- COMMONDATA Files for NMC successfully read.\n",
+      "\n",
+      "SLACP_dwsh\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: SLACP_dwsh\n",
+      "nData: 211 nSys: 3\n",
+      "-- COMMONDATA Files for SLACP_dwsh successfully read.\n",
+      "\n",
+      "SLACD_dw_ite\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: SLACD_dw_ite\n",
+      "nData: 211 nSys: 103\n",
+      "-- COMMONDATA Files for SLACD_dw_ite successfully read.\n",
+      "\n",
+      "BCDMSP_dwsh\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: BCDMSP_dwsh\n",
+      "nData: 351 nSys: 11\n",
+      "-- COMMONDATA Files for BCDMSP_dwsh successfully read.\n",
+      "\n",
+      "BCDMSD_dw_ite\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: BCDMSD_dw_ite\n",
+      "nData: 254 nSys: 108\n",
+      "-- COMMONDATA Files for BCDMSD_dw_ite successfully read.\n",
+      "\n",
+      "CHORUSNUPb_dw_ite\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CHORUSNUPb_dw_ite\n",
+      "nData: 607 nSys: 1014\n",
+      "-- COMMONDATA Files for CHORUSNUPb_dw_ite successfully read.\n",
+      "\n",
+      "CHORUSNBPb_dw_ite\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CHORUSNBPb_dw_ite\n",
+      "nData: 607 nSys: 114\n",
+      "-- COMMONDATA Files for CHORUSNBPb_dw_ite successfully read.\n",
+      "\n",
+      "NTVNUDMNFe_dw_ite\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: NTVNUDMNFe_dw_ite\n",
+      "nData: 45 nSys: 1003\n",
+      "-- COMMONDATA Files for NTVNUDMNFe_dw_ite successfully read.\n",
+      "\n",
+      "NTVNBDMNFe_dw_ite\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: NTVNBDMNFe_dw_ite\n",
+      "nData: 45 nSys: 103\n",
+      "-- COMMONDATA Files for NTVNBDMNFe_dw_ite successfully read.\n",
+      "\n",
+      "HERACOMBNCEM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: HERACOMBNCEM\n",
+      "nData: 159 nSys: 170\n",
+      "-- COMMONDATA Files for HERACOMBNCEM successfully read.\n",
+      "\n",
+      "HERACOMBNCEP460\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: HERACOMBNCEP460\n",
+      "nData: 209 nSys: 170\n",
+      "-- COMMONDATA Files for HERACOMBNCEP460 successfully read.\n",
+      "\n",
+      "HERACOMBNCEP575\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: HERACOMBNCEP575\n",
+      "nData: 260 nSys: 170\n",
+      "-- COMMONDATA Files for HERACOMBNCEP575 successfully read.\n",
+      "\n",
+      "HERACOMBNCEP820\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: HERACOMBNCEP820\n",
+      "nData: 112 nSys: 170\n",
+      "-- COMMONDATA Files for HERACOMBNCEP820 successfully read.\n",
+      "\n",
+      "HERACOMBNCEP920\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: HERACOMBNCEP920\n",
+      "nData: 485 nSys: 170\n",
+      "-- COMMONDATA Files for HERACOMBNCEP920 successfully read.\n",
+      "\n",
+      "HERACOMBCCEM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: HERACOMBCCEM\n",
+      "nData: 42 nSys: 170\n",
+      "-- COMMONDATA Files for HERACOMBCCEM successfully read.\n",
+      "\n",
+      "HERACOMBCCEP\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: HERACOMBCCEP\n",
+      "nData: 39 nSys: 170\n",
+      "-- COMMONDATA Files for HERACOMBCCEP successfully read.\n",
+      "\n",
+      "HERACOMB_SIGMARED_C\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: HERACOMB_SIGMARED_C\n",
+      "nData: 52 nSys: 167\n",
+      "-- COMMONDATA Files for HERACOMB_SIGMARED_C successfully read.\n",
+      "\n",
+      "HERACOMB_SIGMARED_B\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: HERACOMB_SIGMARED_B\n",
+      "nData: 27 nSys: 167\n",
+      "-- COMMONDATA Files for HERACOMB_SIGMARED_B successfully read.\n",
+      "\n",
+      "CDFZRAP_NEW\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CDFZRAP_NEW\n",
+      "nData: 28 nSys: 11\n",
+      "-- COMMONDATA Files for CDFZRAP_NEW successfully read.\n",
+      "\n",
+      "D0ZRAP_40\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: D0ZRAP_40\n",
+      "nData: 28 nSys: 1\n",
+      "-- COMMONDATA Files for D0ZRAP_40 successfully read.\n",
+      "\n",
+      "ATLASWZRAP36PB\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASWZRAP36PB\n",
+      "nData: 30 nSys: 32\n",
+      "-- COMMONDATA Files for ATLASWZRAP36PB successfully read.\n",
+      "\n",
+      "ATLASZHIGHMASS49FB\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASZHIGHMASS49FB\n",
+      "nData: 13 nSys: 11\n",
+      "-- COMMONDATA Files for ATLASZHIGHMASS49FB successfully read.\n",
+      "\n",
+      "ATLASLOMASSDY11EXT\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASLOMASSDY11EXT\n",
+      "nData: 6 nSys: 8\n",
+      "-- COMMONDATA Files for ATLASLOMASSDY11EXT successfully read.\n",
+      "\n",
+      "ATLASWZRAP11CC\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASWZRAP11CC\n",
+      "nData: 46 nSys: 133\n",
+      "-- COMMONDATA Files for ATLASWZRAP11CC successfully read.\n",
+      "\n",
+      "ATLASWZRAP11CF\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASWZRAP11CF\n",
+      "nData: 15 nSys: 133\n",
+      "-- COMMONDATA Files for ATLASWZRAP11CF successfully read.\n",
+      "\n",
+      "ATLASDY2D8TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASDY2D8TEV\n",
+      "nData: 48 nSys: 37\n",
+      "-- COMMONDATA Files for ATLASDY2D8TEV successfully read.\n",
+      "\n",
+      "ATLAS_DY_2D_8TEV_LOWMASS\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_DY_2D_8TEV_LOWMASS\n",
+      "nData: 84 nSys: 277\n",
+      "-- COMMONDATA Files for ATLAS_DY_2D_8TEV_LOWMASS successfully read.\n",
+      "\n",
+      "ATLAS_WZ_TOT_13TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_WZ_TOT_13TEV\n",
+      "nData: 3 nSys: 4\n",
+      "-- COMMONDATA Files for ATLAS_WZ_TOT_13TEV successfully read.\n",
+      "\n",
+      "ATLAS_WP_JET_8TEV_PT\n",
+      "ATLAS_WM_JET_8TEV_PT\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_WM_JET_8TEV_PT\n",
+      "nData: 16 nSys: 124\n",
+      "-- COMMONDATA Files for ATLAS_WM_JET_8TEV_PT successfully read.\n",
+      "\n",
+      "ATLASZPT8TEVMDIST\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASZPT8TEVMDIST\n",
+      "nData: 64 nSys: 102\n",
+      "-- COMMONDATA Files for ATLASZPT8TEVMDIST successfully read.\n",
+      "\n",
+      "ATLASZPT8TEVYDIST\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASZPT8TEVYDIST\n",
+      "nData: 120 nSys: 102\n",
+      "-- COMMONDATA Files for ATLASZPT8TEVYDIST successfully read.\n",
+      "\n",
+      "ATLASTTBARTOT7TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASTTBARTOT7TEV\n",
+      "nData: 1 nSys: 3\n",
+      "-- COMMONDATA Files for ATLASTTBARTOT7TEV successfully read.\n",
+      "\n",
+      "ATLASTTBARTOT8TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASTTBARTOT8TEV\n",
+      "nData: 1 nSys: 3\n",
+      "-- COMMONDATA Files for ATLASTTBARTOT8TEV successfully read.\n",
+      "\n",
+      "ATLAS_TTBARTOT_13TEV_FULLLUMI\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_TTBARTOT_13TEV_FULLLUMI\n",
+      "nData: 1 nSys: 2\n",
+      "-- COMMONDATA Files for ATLAS_TTBARTOT_13TEV_FULLLUMI successfully read.\n",
+      "\n",
+      "ATLAS_TTB_DIFF_8TEV_LJ_TRAPNORM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_TTB_DIFF_8TEV_LJ_TRAPNORM\n",
+      "nData: 5 nSys: 135\n",
+      "-- COMMONDATA Files for ATLAS_TTB_DIFF_8TEV_LJ_TRAPNORM successfully read.\n",
+      "\n",
+      "ATLAS_TTB_DIFF_8TEV_LJ_TTRAPNORM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_TTB_DIFF_8TEV_LJ_TTRAPNORM\n",
+      "nData: 5 nSys: 135\n",
+      "-- COMMONDATA Files for ATLAS_TTB_DIFF_8TEV_LJ_TTRAPNORM successfully read.\n",
+      "\n",
+      "ATLAS_TOPDIFF_DILEPT_8TEV_TTRAPNORM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_TOPDIFF_DILEPT_8TEV_TTRAPNORM\n",
+      "nData: 5 nSys: 5\n",
+      "-- COMMONDATA Files for ATLAS_TOPDIFF_DILEPT_8TEV_TTRAPNORM successfully read.\n",
+      "\n",
+      "ATLAS_1JET_8TEV_R06_DEC\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_1JET_8TEV_R06_DEC\n",
+      "nData: 171 nSys: 677\n",
+      "-- COMMONDATA Files for ATLAS_1JET_8TEV_R06_DEC successfully read.\n",
+      "\n",
+      "ATLAS_2JET_7TEV_R06\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_2JET_7TEV_R06\n",
+      "nData: 90 nSys: 474\n",
+      "-- COMMONDATA Files for ATLAS_2JET_7TEV_R06 successfully read.\n",
+      "\n",
+      "ATLASPHT15_SF\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLASPHT15_SF\n",
+      "nData: 53 nSys: 2\n",
+      "-- COMMONDATA Files for ATLASPHT15_SF successfully read.\n",
+      "\n",
+      "ATLAS_SINGLETOP_TCH_R_7TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_R_7TEV\n",
+      "nData: 1 nSys: 13\n",
+      "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_R_7TEV successfully read.\n",
+      "\n",
+      "ATLAS_SINGLETOP_TCH_R_13TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_R_13TEV\n",
+      "nData: 1 nSys: 1\n",
+      "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_R_13TEV successfully read.\n",
+      "\n",
+      "ATLAS_SINGLETOP_TCH_DIFF_7TEV_T_RAP_NORM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_T_RAP_NORM\n",
+      "nData: 3 nSys: 17\n",
+      "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_DIFF_7TEV_T_RAP_NORM successfully read.\n",
+      "\n",
+      "ATLAS_SINGLETOP_TCH_DIFF_7TEV_TBAR_RAP_NORM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_TBAR_RAP_NORM\n",
+      "nData: 3 nSys: 15\n",
+      "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_DIFF_7TEV_TBAR_RAP_NORM successfully read.\n",
+      "\n",
+      "ATLAS_SINGLETOP_TCH_DIFF_8TEV_T_RAP_NORM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_T_RAP_NORM\n",
+      "nData: 3 nSys: 31\n",
+      "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_DIFF_8TEV_T_RAP_NORM successfully read.\n",
+      "\n",
+      "ATLAS_SINGLETOP_TCH_DIFF_8TEV_TBAR_RAP_NORM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_TBAR_RAP_NORM\n",
+      "nData: 3 nSys: 31\n",
+      "-- COMMONDATA Files for ATLAS_SINGLETOP_TCH_DIFF_8TEV_TBAR_RAP_NORM successfully read.\n",
+      "\n",
+      "CMSWEASY840PB\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMSWEASY840PB\n",
+      "nData: 11 nSys: 11\n",
+      "-- COMMONDATA Files for CMSWEASY840PB successfully read.\n",
+      "\n",
+      "CMSWMASY47FB\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMSWMASY47FB\n",
+      "nData: 11 nSys: 11\n",
+      "-- COMMONDATA Files for CMSWMASY47FB successfully read.\n",
+      "\n",
+      "CMSDY2D11\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMSDY2D11\n",
+      "nData: 132 nSys: 133\n",
+      "-- COMMONDATA Files for CMSDY2D11 successfully read.\n",
+      "\n",
+      "CMSWMU8TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMSWMU8TEV\n",
+      "nData: 22 nSys: 45\n",
+      "-- COMMONDATA Files for CMSWMU8TEV successfully read.\n",
+      "\n",
+      "CMSZDIFF12\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMSZDIFF12\n",
+      "nData: 50 nSys: 52\n",
+      "-- COMMONDATA Files for CMSZDIFF12 successfully read.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CMS_2JET_7TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMS_2JET_7TEV\n",
+      "nData: 54 nSys: 88\n",
+      "-- COMMONDATA Files for CMS_2JET_7TEV successfully read.\n",
+      "\n",
+      "CMS_1JET_8TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMS_1JET_8TEV\n",
+      "nData: 239 nSys: 293\n",
+      "-- COMMONDATA Files for CMS_1JET_8TEV successfully read.\n",
+      "\n",
+      "CMSTTBARTOT7TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMSTTBARTOT7TEV\n",
+      "nData: 1 nSys: 2\n",
+      "-- COMMONDATA Files for CMSTTBARTOT7TEV successfully read.\n",
+      "\n",
+      "CMSTTBARTOT8TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMSTTBARTOT8TEV\n",
+      "nData: 1 nSys: 2\n",
+      "-- COMMONDATA Files for CMSTTBARTOT8TEV successfully read.\n",
+      "\n",
+      "CMSTTBARTOT13TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMSTTBARTOT13TEV\n",
+      "nData: 1 nSys: 2\n",
+      "-- COMMONDATA Files for CMSTTBARTOT13TEV successfully read.\n",
+      "\n",
+      "CMSTOPDIFF8TEVTTRAPNORM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMSTOPDIFF8TEVTTRAPNORM\n",
+      "nData: 10 nSys: 21\n",
+      "-- COMMONDATA Files for CMSTOPDIFF8TEVTTRAPNORM successfully read.\n",
+      "\n",
+      "CMSTTBARTOT5TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMSTTBARTOT5TEV\n",
+      "nData: 1 nSys: 2\n",
+      "-- COMMONDATA Files for CMSTTBARTOT5TEV successfully read.\n",
+      "\n",
+      "CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM\n",
+      "nData: 16 nSys: 44\n",
+      "-- COMMONDATA Files for CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM successfully read.\n",
+      "\n",
+      "CMS_TTB_DIFF_13TEV_2016_2L_TRAP\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMS_TTB_DIFF_13TEV_2016_2L_TRAP\n",
+      "nData: 10 nSys: 10\n",
+      "-- COMMONDATA Files for CMS_TTB_DIFF_13TEV_2016_2L_TRAP successfully read.\n",
+      "\n",
+      "CMS_TTB_DIFF_13TEV_2016_LJ_TRAP\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMS_TTB_DIFF_13TEV_2016_LJ_TRAP\n",
+      "nData: 11 nSys: 11\n",
+      "-- COMMONDATA Files for CMS_TTB_DIFF_13TEV_2016_LJ_TRAP successfully read.\n",
+      "\n",
+      "CMS_SINGLETOP_TCH_TOT_7TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMS_SINGLETOP_TCH_TOT_7TEV\n",
+      "nData: 1 nSys: 3\n",
+      "-- COMMONDATA Files for CMS_SINGLETOP_TCH_TOT_7TEV successfully read.\n",
+      "\n",
+      "CMS_SINGLETOP_TCH_R_8TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMS_SINGLETOP_TCH_R_8TEV\n",
+      "nData: 1 nSys: 1\n",
+      "-- COMMONDATA Files for CMS_SINGLETOP_TCH_R_8TEV successfully read.\n",
+      "\n",
+      "CMS_SINGLETOP_TCH_R_13TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: CMS_SINGLETOP_TCH_R_13TEV\n",
+      "nData: 1 nSys: 1\n",
+      "-- COMMONDATA Files for CMS_SINGLETOP_TCH_R_13TEV successfully read.\n",
+      "\n",
+      "LHCBZ940PB\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: LHCBZ940PB\n",
+      "nData: 9 nSys: 11\n",
+      "-- COMMONDATA Files for LHCBZ940PB successfully read.\n",
+      "\n",
+      "LHCBZEE2FB_40\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: LHCBZEE2FB_40\n",
+      "nData: 17 nSys: 19\n",
+      "-- COMMONDATA Files for LHCBZEE2FB_40 successfully read.\n",
+      "\n",
+      "LHCBWZMU7TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: LHCBWZMU7TEV\n",
+      "nData: 33 nSys: 35\n",
+      "-- COMMONDATA Files for LHCBWZMU7TEV successfully read.\n",
+      "\n",
+      "LHCBWZMU8TEV\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: LHCBWZMU8TEV\n",
+      "nData: 34 nSys: 36\n",
+      "-- COMMONDATA Files for LHCBWZMU8TEV successfully read.\n",
+      "\n",
+      "LHCB_Z_13TEV_DIMUON\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: LHCB_Z_13TEV_DIMUON\n",
+      "nData: 18 nSys: 19\n",
+      "-- COMMONDATA Files for LHCB_Z_13TEV_DIMUON successfully read.\n",
+      "\n",
+      "LHCB_Z_13TEV_DIELECTRON\n",
+      "\n",
+      "-- Reading COMMONDATA for Dataset: LHCB_Z_13TEV_DIELECTRON\n",
+      "nData: 17 nSys: 18\n",
+      "-- COMMONDATA Files for LHCB_Z_13TEV_DIELECTRON successfully read.\n",
+      "\n",
+      "             vp       pine ratio CMSTTBARTOT5TEV, ['QCD']\n",
+      "              0          0                              0\n",
+      "data                                                     \n",
+      "0     69.137978  63.366976                       1.091073\n",
+      "            vp      pine ratio CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM, ['QCD']\n",
+      "             0         0                                              0\n",
+      "data                                                                   \n",
+      "0     0.002605  0.003382                                       0.770101\n",
+      "1     0.002313  0.002982                                       0.775740\n",
+      "2     0.001639  0.002114                                       0.775299\n",
+      "3     0.000570  0.000741                                       0.769081\n",
+      "4     0.002545  0.002664                                       0.955411\n",
+      "5     0.002176  0.002363                                       0.921011\n",
+      "6     0.001513  0.001637                                       0.924156\n",
+      "7     0.000536  0.000578                                       0.927862\n",
+      "8     0.000998  0.001038                                       0.961663\n",
+      "9     0.000857  0.000930                                       0.921232\n",
+      "10    0.000624  0.000678                                       0.920042\n",
+      "11    0.000254  0.000276                                       0.918816\n",
+      "12    0.000072  0.000069                                       1.032299\n",
+      "13    0.000064  0.000069                                       0.923609\n",
+      "14    0.000059  0.000064                                       0.927297\n",
+      "15    0.000032  0.000035                                       0.918087\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test them all\n",
+    "if True:\n",
+    "    from yaml import safe_load\n",
+    "    pdf = API.pdf(pdf=\"NNPDF40_nnlo_as_01180\")\n",
+    "    all_res = []\n",
+    "    # Reference here a NNPDF40 runcard to read up all datasets\n",
+    "    #nnpdf40_runcard = safe_load(Path(\"/home/juacrumar/NNPDF-testing/nnpdf/n3fit/NNPDF40_with_pineappl.yml\").read_text())\n",
+    "    nnpdf40_runcard = safe_load(Path(\"/mount/storage/Academic_Workspace/NNPDF/src/nnpdf/n3fit/NNPDF40_with_pineappl.yml\").read_text())\n",
+    "    for d in nnpdf40_runcard[\"dataset_inputs\"]:\n",
+    "        target_ds = d[\"dataset\"]\n",
+    "        #if any(skipthis in target_ds for skipthis in [\"HERA\", \"NMC\", \"NTV\", \"CHORUS\", \"SLAC\", \"BCD\"]):\n",
+    "        #    continue\n",
+    "        print(target_ds)\n",
+    "        cfac = d.get(\"cfac\", [])\n",
+    "        old_ds = API.dataset(dataset_input={\"dataset\": target_ds, \"cfac\": cfac}, theoryid=200, use_cuts=\"internal\")\n",
+    "        ds = API.dataset(dataset_input={\"dataset\": target_ds, \"cfac\": cfac}, theoryid=400, use_cuts=\"internal\")\n",
+    "        new_cp = central_predictions(ds, pdf)\n",
+    "        cp = central_predictions(old_ds, pdf)\n",
+    "        all_res.append(pd.concat([new_cp, cp, new_cp/cp], axis=1, keys=[\"vp\", \"pine\", f\"ratio {target_ds}, {cfac}\"]))\n",
+    "        \n",
+    "    for i in all_res:\n",
+    "        mean_ratio = i[i.columns[2]].mean()\n",
+    "        if not (0.95 < mean_ratio < 1.05):\n",
+    "            print(i)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "a4d88771",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_ds = \"ATLAS_WP_JET_8TEV_PT\"\n",
+    "cfac = [\"QCD\"] # [\"NRM\"]\n",
+    "old_ds = API.dataset(dataset_input={\"dataset\": target_ds, \"cfac\": cfac}, theoryid=200, use_cuts=\"internal\")\n",
+    "ds = API.dataset(dataset_input={\"dataset\": target_ds, \"cfac\": cfac}, theoryid=400, use_cuts=\"internal\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "316a571e",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "-- Reading COMMONDATA for Dataset: ATLAS_WP_JET_8TEV_PT\n",
+      "nData: 16 nSys: 124\n",
+      "-- COMMONDATA Files for ATLAS_WP_JET_8TEV_PT successfully read.\n",
+      "\n",
+      "LHAPDF 6.4.0 loading /usr/share/lhapdf/LHAPDF/NNPDF40_nnlo_as_01180/NNPDF40_nnlo_as_01180_0000.dat\n",
+      "NNPDF40_nnlo_as_01180 PDF set, member #0, version 1; LHAPDF ID = 331100\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th>pine</th>\n",
+       "      <th>vp</th>\n",
+       "      <th>ratio vp/ratio</th>\n",
+       "      <th>ratio pine/vp</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>0</th>\n",
+       "      <th>0</th>\n",
+       "      <th>0</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>data</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>5611.439817</td>\n",
+       "      <td>5616.248344</td>\n",
+       "      <td>1.000857</td>\n",
+       "      <td>0.999144</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3154.767573</td>\n",
+       "      <td>3157.559479</td>\n",
+       "      <td>1.000885</td>\n",
+       "      <td>0.999116</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1194.467638</td>\n",
+       "      <td>1195.354244</td>\n",
+       "      <td>1.000742</td>\n",
+       "      <td>0.999258</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>515.187956</td>\n",
+       "      <td>515.574565</td>\n",
+       "      <td>1.000750</td>\n",
+       "      <td>0.999250</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>246.767542</td>\n",
+       "      <td>246.941654</td>\n",
+       "      <td>1.000706</td>\n",
+       "      <td>0.999295</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>126.644243</td>\n",
+       "      <td>126.731587</td>\n",
+       "      <td>1.000690</td>\n",
+       "      <td>0.999311</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>70.244577</td>\n",
+       "      <td>70.295367</td>\n",
+       "      <td>1.000723</td>\n",
+       "      <td>0.999277</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>31.544483</td>\n",
+       "      <td>31.566211</td>\n",
+       "      <td>1.000689</td>\n",
+       "      <td>0.999312</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>11.817506</td>\n",
+       "      <td>11.825441</td>\n",
+       "      <td>1.000671</td>\n",
+       "      <td>0.999329</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>5.029027</td>\n",
+       "      <td>5.032150</td>\n",
+       "      <td>1.000621</td>\n",
+       "      <td>0.999379</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>2.290481</td>\n",
+       "      <td>2.291755</td>\n",
+       "      <td>1.000556</td>\n",
+       "      <td>0.999444</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>1.123706</td>\n",
+       "      <td>1.124275</td>\n",
+       "      <td>1.000506</td>\n",
+       "      <td>0.999494</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>0.565894</td>\n",
+       "      <td>0.566158</td>\n",
+       "      <td>1.000466</td>\n",
+       "      <td>0.999534</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>0.245723</td>\n",
+       "      <td>0.245824</td>\n",
+       "      <td>1.000415</td>\n",
+       "      <td>0.999586</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>0.054556</td>\n",
+       "      <td>0.054573</td>\n",
+       "      <td>1.000327</td>\n",
+       "      <td>0.999673</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             pine           vp ratio vp/ratio ratio pine/vp\n",
+       "                0            0              0             0\n",
+       "data                                                       \n",
+       "1     5611.439817  5616.248344       1.000857      0.999144\n",
+       "2     3154.767573  3157.559479       1.000885      0.999116\n",
+       "3     1194.467638  1195.354244       1.000742      0.999258\n",
+       "4      515.187956   515.574565       1.000750      0.999250\n",
+       "5      246.767542   246.941654       1.000706      0.999295\n",
+       "6      126.644243   126.731587       1.000690      0.999311\n",
+       "7       70.244577    70.295367       1.000723      0.999277\n",
+       "8       31.544483    31.566211       1.000689      0.999312\n",
+       "9       11.817506    11.825441       1.000671      0.999329\n",
+       "10       5.029027     5.032150       1.000621      0.999379\n",
+       "11       2.290481     2.291755       1.000556      0.999444\n",
+       "12       1.123706     1.124275       1.000506      0.999494\n",
+       "13       0.565894     0.566158       1.000466      0.999534\n",
+       "14       0.245723     0.245824       1.000415      0.999586\n",
+       "15       0.054556     0.054573       1.000327      0.999673"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Let's try to get a prediction out of it\n",
+    "pdf = API.pdf(pdf=\"NNPDF40_nnlo_as_01180\")\n",
+    "new_cp = central_predictions(ds, pdf)\n",
+    "cp = central_predictions(old_ds, pdf)\n",
+    "pd.concat([new_cp, cp, cp/new_cp, new_cp/cp], axis=1, keys=[\"pine\", \"vp\", \"ratio vp/ratio\", \"ratio pine/vp\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "0905d0c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pine_fkspec = ds.fkspecs[0]\n",
+    "old_fkspec = old_ds.fkspecs[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "acd19243",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LHAPDF 6.4.0 loading all 101 PDFs in set NNPDF40_nnlo_as_01180\n",
+      "NNPDF40_nnlo_as_01180, version 1; 101 PDF members\n",
+      "[0.5982222  0.53206434 0.37638204 0.13101867 0.59686953 0.51087613\n",
+      " 0.35425162 0.12440635 0.23262168 0.1988827  0.14422644 0.05755101\n",
+      " 0.01654738 0.01469668 0.01335605 0.00692297]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pineappl\n",
+    "pines = [pineappl.fk_table.FkTable.read(i.as_posix()) for i in pine_fkspec.fkpath]\n",
+    "# Inspect the pineappl prediction\n",
+    "res_pine = []\n",
+    "pp = pines[0]\n",
+    "lpdf = pdf.load()\n",
+    "\n",
+    "for p in pines:\n",
+    "    res_pine.append(p.convolute_with_one(2212, lpdf.central_member.xfxQ2))\n",
+    "total_pine = np.concatenate(res_pine)\n",
+    "print(total_pine)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "f3a77d17",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's inspect the content of the old fktables, remove the cfactor for now\n",
+    "from validphys.fkparser import load_fktable\n",
+    "old_fkspec.cfactors = False\n",
+    "old_fktabledata = load_fktable(old_fkspec)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "7ab604e1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "hadronic?: True\n",
+      "Q: 1.65\n",
+      "n: 16\n",
+      "xgrid shape: (40,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"hadronic?: {old_fktabledata.hadronic}\")\n",
+    "print(f\"Q: {old_fktabledata.Q0}\")\n",
+    "print(f\"n: {old_fktabledata.ndata}\")\n",
+    "print(f\"xgrid shape: {old_fktabledata.xgrid.shape}\")\n",
+    "#old_fktabledata.sigma"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "62709983",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First read the metadata that vp `FKTableData` needs and that all subgrids share\n",
+    "Q0 = np.sqrt(pp.muf2())\n",
+    "xgrid = pp.x_grid()\n",
+    "# Hadronic means in practice that not all luminosity combinations are just electron X proton\n",
+    "hadronic = not all(-11 in i for i in pp.lumi())\n",
+    "# Now prepare the concatenation of grids\n",
+    "fktables = []\n",
+    "for p in pines:\n",
+    "    tmp = p.table().T/p.bin_normalizations()\n",
+    "    fktables.append(tmp.T)\n",
+    "fktable = np.concatenate(fktables, axis=0)\n",
+    "ndata = fktable.shape[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "d9d5a130",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(100, 21),\n",
+       " (100, 203),\n",
+       " (100, 208),\n",
+       " (100, 200),\n",
+       " (100, 103),\n",
+       " (100, 108),\n",
+       " (100, 115),\n",
+       " (21, 21),\n",
+       " (21, 203),\n",
+       " (21, 208),\n",
+       " (21, 200),\n",
+       " (21, 103),\n",
+       " (21, 108),\n",
+       " (21, 115),\n",
+       " (21, 100),\n",
+       " (200, 203),\n",
+       " (200, 208),\n",
+       " (203, 203),\n",
+       " (203, 208),\n",
+       " (203, 200),\n",
+       " (203, 103),\n",
+       " (203, 108),\n",
+       " (203, 115),\n",
+       " (203, 100),\n",
+       " (208, 208),\n",
+       " (208, 200),\n",
+       " (208, 103),\n",
+       " (208, 108),\n",
+       " (208, 115),\n",
+       " (208, 100),\n",
+       " (200, 200),\n",
+       " (200, 103),\n",
+       " (200, 108),\n",
+       " (200, 115),\n",
+       " (200, 100),\n",
+       " (103, 103),\n",
+       " (103, 108),\n",
+       " (103, 115),\n",
+       " (103, 100),\n",
+       " (108, 108),\n",
+       " (108, 115),\n",
+       " (108, 100),\n",
+       " (115, 115),\n",
+       " (115, 100),\n",
+       " (100, 100)]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pp.lumi()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "b95c8549",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'df' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Input \u001b[0;32mIn [11]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241m.\u001b[39mcolumns\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99782f74",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now let's try to join the fktable, luminosity and xgrid into a pandas dataframe\n",
+    "# keeping compatibility with validphys and, hopefully, 50% of my own sanity\n",
+    "\n",
+    "# Step 1), make the luminosity into a 14x14 mask for the evolution basis\n",
+    "eko_numbering_scheme = (22, 100, 21, 200, 203, 208, 215, 224, 235, 103, 108, 115, 124, 135)\n",
+    "# note that this is the same ordering that was used in fktables\n",
+    "co = []\n",
+    "for i, j in pp.lumi():\n",
+    "    # Ask where this would fall in a 14x14 matrix\n",
+    "    idx = eko_numbering_scheme.index(i)\n",
+    "    jdx = eko_numbering_scheme.index(j)\n",
+    "    co.append(idx*14 + jdx)\n",
+    "    \n",
+    "# Step 2) prepare the indices for the dataframe\n",
+    "xi = np.arange(len(xgrid))\n",
+    "ni = np.arange(ndata)\n",
+    "mi = pd.MultiIndex.from_product([ni, xi, xi], names=[\"data\", \"x1\", \"x2\"])\n",
+    "\n",
+    "# Step 3) Now play with the array until we flatten it in the right way?\n",
+    "# The fktables for pineappl have this extra factor of x...\n",
+    "# The output of pineappl is (ndata, flavours, x, x)\n",
+    "lf = len(co)\n",
+    "xfktable = fktable.reshape(ndata, lf, -1)/(xgrid[:,None]*xgrid[None,:]).flatten()\n",
+    "fkmod = np.moveaxis(xfktable, 1, -1)\n",
+    "fkframe = fkmod.reshape(-1, lf)\n",
+    "\n",
+    "# Uff, big\n",
+    "df = pd.DataFrame(fkframe, index=mi, columns=co)\n",
+    "\n",
+    "from validphys.convolution import central_hadron_predictions\n",
+    "from validphys.coredata import FKTableData\n",
+    "fk = FKTableData(sigma=df, ndata=ndata,  Q0=Q0, metadata=None, hadronic=True, xgrid=xgrid)\n",
+    "central_hadron_predictions(fk, pdf)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67e323f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a luminosity tensor and check that the results are correct\n",
+    "from validphys.pdfbases import evolution\n",
+    "\n",
+    "evol_basis = (\n",
+    "    \"photon\",\n",
+    "    \"singlet\",\n",
+    "    \"g\",\n",
+    "    \"V\",\n",
+    "    \"V3\",\n",
+    "    \"V8\",\n",
+    "    \"V15\",\n",
+    "    \"V24\",\n",
+    "    \"V35\",\n",
+    "    \"T3\",\n",
+    "    \"T8\",\n",
+    "    \"T15\",\n",
+    "    \"T24\",\n",
+    "    \"T35\",\n",
+    ")\n",
+    "total_pdf = evolution.grid_values(pdf, evol_basis, xgrid, [Q0]).squeeze()[0]/xgrid\n",
+    "print(total_pdf.shape)\n",
+    "lumi = np.einsum('ij,kl->ikjl', total_pdf, total_pdf)\n",
+    "lumi_masked = lumi[flavour_map]\n",
+    "print(fktable.shape)\n",
+    "print(lumi_masked.shape)\n",
+    "res = np.einsum('ijkl,jkl->i', fktable, lumi_masked)\n",
+    "#pd.concat([pd.DataFrame(res), cp, pd.DataFrame(res)/cp,  ], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2ff2522",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "xfktable.reshape(48,91,-1).shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4905c4a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from validphys.fkparser import open_fkpath, _parse_string, _parse_header, _build_sigma\n",
+    "from validphys.fkparser import _parse_flavour_map, _parse_hadronic_fast_kernel\n",
+    "try:\n",
+    "    f.close()\n",
+    "except:\n",
+    "    pass\n",
+    "f = open_fkpath(old_fkspec.fkpath)\n",
+    "line_and_stream = enumerate(f, start=1)\n",
+    "lineno, header = next(line_and_stream)\n",
+    "res = {}\n",
+    "while True:\n",
+    "    marker, header_name = _parse_header(lineno, header)\n",
+    "    if header_name == \"FastKernel\":\n",
+    "        break\n",
+    "    if header_name == \"FlavourMap\":\n",
+    "        out, lineno, header = _parse_flavour_map(line_and_stream)\n",
+    "    else:\n",
+    "        out, lineno, header = _parse_string(line_and_stream)\n",
+    "    res[header_name] = out   "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ffd41ff3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res[\"FlavourMap\"].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d00059d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "i_hate_pandas = _parse_hadronic_fast_kernel(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bde47252",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "i_hate_pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c7dcc9c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "old_fktabledata.sigma"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "910dbb9d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "nnvortex",
+   "language": "python",
+   "name": "nnvortex"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index e84f57d9f6..adc3c6328e 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -52,6 +52,7 @@ requirements:
         - sphinxcontrib-bibtex
         - docutils =0.16 # This dependency is not explicity needed but https://github.com/NNPDF/nnpdf/issues/1220
         - curio >=1.0
+        - pineappl >=0.5.2
 
 test:
     requires:
diff --git a/n3fit/src/n3fit/layers/observable.py b/n3fit/src/n3fit/layers/observable.py
index 603804f523..7ac2f08df8 100644
--- a/n3fit/src/n3fit/layers/observable.py
+++ b/n3fit/src/n3fit/layers/observable.py
@@ -5,7 +5,7 @@
 
 
 def _is_unique(list_of_arrays):
-    """ Check whether the list of arrays more than one different arrays """
+    """Check whether the list of arrays more than one different arrays"""
     the_first = list_of_arrays[0]
     for i in list_of_arrays[1:]:
         if not np.array_equal(the_first, i):
@@ -15,30 +15,30 @@ def _is_unique(list_of_arrays):
 
 class Observable(MetaLayer, ABC):
     """
-        This class is the parent of the DIS and DY convolutions.
-        All backend-dependent code necessary for the convolutions
-                                    is (must be) concentrated here
+    This class is the parent of the DIS and DY convolutions.
+    All backend-dependent code necessary for the convolutions
+                                is (must be) concentrated here
 
-        The methods gen_mask and call must be overriden by the observables
-        where
-            - gen_mask: it is called by the initializer and generates the mask between
-                        fktables and pdfs
-            - call: this is what does the actual operation
+    The methods gen_mask and call must be overriden by the observables
+    where
+        - gen_mask: it is called by the initializer and generates the mask between
+                    fktables and pdfs
+        - call: this is what does the actual operation
 
 
-        Parameters
-        ----------
-            fktable_dicts: list
-                list of fktable_dicts which define basis and xgrid for the fktables in the list
-            fktable_arr: list
-                list of fktables for this observable
-            operation_name: str
-                string defining the name of the operation to be applied to the fktables
-            nfl: int
-                number of flavours in the pdf (default:14)
+    Parameters
+    ----------
+        fktable_data: list[validphys.coredata.FKTableData]
+            list of FK which define basis and xgrid for the fktables in the list
+        fktable_arr: list
+            list of fktables for this observable
+        operation_name: str
+            string defining the name of the operation to be applied to the fktables
+        nfl: int
+            number of flavours in the pdf (default:14)
     """
 
-    def __init__(self, fktable_dicts, fktable_arr, operation_name, nfl=14, **kwargs):
+    def __init__(self, fktable_data, fktable_arr, operation_name, nfl=14, **kwargs):
         super(MetaLayer, self).__init__(**kwargs)
 
         self.nfl = nfl
@@ -46,9 +46,9 @@ def __init__(self, fktable_dicts, fktable_arr, operation_name, nfl=14, **kwargs)
         basis = []
         xgrids = []
         self.fktables = []
-        for fktable, fk in zip(fktable_dicts, fktable_arr):
-            xgrids.append(fktable["xgrid"])
-            basis.append(fktable["basis"])
+        for fkdata, fk in zip(fktable_data, fktable_arr):
+            xgrids.append(fkdata.xgrid.reshape(1, -1))
+            basis.append(fkdata.luminosity_mapping)
             self.fktables.append(op.numpy_to_tensor(fk))
 
         # check how many xgrids this dataset needs
diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py
index 9ab6376996..fa0688110d 100644
--- a/n3fit/src/n3fit/model_gen.py
+++ b/n3fit/src/n3fit/model_gen.py
@@ -134,23 +134,23 @@ def observable_generator(
     model_obs_ex = []
     model_inputs = []
     # The first step is to compute the observable for each of the datasets
-    for dataset_dict in spec_dict["datasets"]:
+    for dataset in spec_dict["datasets"]:
         # Get the generic information of the dataset
-        dataset_name = dataset_dict["name"]
+        dataset_name = dataset.name
 
         # Look at what kind of layer do we need for this dataset
-        if dataset_dict["hadronic"]:
+        if dataset.hadronic:
             Obs_Layer = DY
         else:
             Obs_Layer = DIS
 
         # Set the operation (if any) to be applied to the fktables of this dataset
-        operation_name = dataset_dict["operation"]
+        operation_name = dataset.operation
 
         # Now generate the observable layer, which takes the following information:
         # operation name
         # dataset name
-        # list of fktable_dictionaries
+        # list of validphys.coredata.FKTableData objects
         #   these will then be used to check how many different pdf inputs are needed
         #   (and convolutions if given the case)
 
@@ -158,8 +158,8 @@ def observable_generator(
             # Positivity (and integrability, which is a special kind of positivity...)
             # enters only at the "training" part of the models
             obs_layer_tr = Obs_Layer(
-                dataset_dict["fktables"],
-                dataset_dict["tr_fktables"],
+                dataset.fktables_data,
+                dataset.training_fktables(),
                 operation_name,
                 name=f"dat_{dataset_name}",
             )
@@ -167,39 +167,39 @@ def observable_generator(
         elif spec_dict.get("data_transformation_tr") is not None:
             # Data transformation needs access to the full array of output data
             obs_layer_ex = Obs_Layer(
-                dataset_dict["fktables"],
-                dataset_dict["ex_fktables"],
+                dataset.fktables_data,
+                dataset.fktables(),
                 operation_name,
                 name=f"exp_{dataset_name}",
             )
             obs_layer_tr = obs_layer_vl = obs_layer_ex
         else:
             obs_layer_tr = Obs_Layer(
-                dataset_dict["fktables"],
-                dataset_dict["tr_fktables"],
+                dataset.fktables_data,
+                dataset.training_fktables(),
                 operation_name,
                 name=f"dat_{dataset_name}",
             )
             obs_layer_ex = Obs_Layer(
-                dataset_dict["fktables"],
-                dataset_dict["ex_fktables"],
+                dataset.fktables_data,
+                dataset.fktables(),
                 operation_name,
                 name=f"exp_{dataset_name}",
             )
             obs_layer_vl = Obs_Layer(
-                dataset_dict["fktables"],
-                dataset_dict["vl_fktables"],
+                dataset.fktables_data,
+                dataset.validation_fktables(),
                 operation_name,
                 name=f"val_{dataset_name}",
             )
 
         # To know how many xpoints we compute we are duplicating functionality from obs_layer
         if obs_layer_tr.splitting is None:
-            xgrid = dataset_dict["fktables"][0]["xgrid"]
+            xgrid = dataset.fktables_data[0].xgrid.reshape(1, -1)
             model_inputs.append(xgrid)
             dataset_xsizes.append(xgrid.shape[1])
         else:
-            xgrids = [i["xgrid"] for i in dataset_dict["fktables"]]
+            xgrids = [i.xgrid.reshape(1, -1) for i in dataset.fktables_data]
             model_inputs += xgrids
             dataset_xsizes.append(sum([i.shape[1] for i in xgrids]))
 
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
index 81caa0d78c..a690cd27a1 100644
--- a/n3fit/src/n3fit/model_trainer.py
+++ b/n3fit/src/n3fit/model_trainer.py
@@ -131,6 +131,8 @@ def __init__(
         """
         # Save all input information
         self.exp_info = exp_info
+        if pos_info is None:
+            pos_info = []
         self.pos_info = pos_info
         self.integ_info = integ_info
         if self.integ_info is not None:
@@ -275,7 +277,7 @@ def _fill_the_dictionaries(self):
             self.experimental["ndata"] += nd_tr + nd_vl
 
             for dataset in exp_dict["datasets"]:
-                self.all_datasets.append(dataset["name"])
+                self.all_datasets.append(dataset.name)
         self.all_datasets = set(self.all_datasets)
 
         for pos_dict in self.pos_info:
@@ -532,7 +534,7 @@ def _generate_observables(
             force_set_smallest = input_arr.min() > 1e-9
             if force_set_smallest:
                 new_xgrid = np.linspace(
-                    start=1/input_arr_size, stop=1.0, endpoint=False, num=input_arr_size
+                    start=1 / input_arr_size, stop=1.0, endpoint=False, num=input_arr_size
                 )
             else:
                 new_xgrid = np.linspace(start=0, stop=1.0, endpoint=False, num=input_arr_size)
@@ -566,8 +568,7 @@ def _generate_observables(
                 scaler = PchipInterpolator(map_from, map_to)
             except ValueError:
                 raise ValueError(
-                    "interpolation_points is larger than the number of unique "
-                                    "input x-values"
+                    "interpolation_points is larger than the number of unique " "input x-values"
                 )
             self._scaler = lambda x: np.concatenate([scaler(np.log(x)), x], axis=-1)
 
@@ -918,7 +919,9 @@ def hyperparametrizable(self, params):
             # by adding it to this dictionary
             dict_out = {
                 "status": passed,
-                "loss": self._hyper_loss(fold_losses=l_hyper, n3pdfs=n3pdfs, experimental_models=exp_models),
+                "loss": self._hyper_loss(
+                    fold_losses=l_hyper, n3pdfs=n3pdfs, experimental_models=exp_models
+                ),
                 "validation_loss": np.average(l_valid),
                 "experimental_loss": np.average(l_exper),
                 "kfold_meta": {
diff --git a/n3fit/src/n3fit/tests/test_layers.py b/n3fit/src/n3fit/tests/test_layers.py
index d8df1326e3..b1d662450f 100644
--- a/n3fit/src/n3fit/tests/test_layers.py
+++ b/n3fit/src/n3fit/tests/test_layers.py
@@ -2,7 +2,7 @@
     Tests for the layers of n3fit
     This module checks that the layers do what they would do with numpy
 """
-
+import dataclasses
 import numpy as np
 from validphys.pdfbases import fitbasis_to_NN31IC
 from n3fit.backends import operations as op
@@ -14,9 +14,19 @@
 NDATA = 3
 THRESHOLD = 1e-6
 
+
+@dataclasses.dataclass
+class _fake_FKTableData:
+    """Fake validphys.coredata.FKTableData to be used in the tests"""
+
+    fktable: np.array
+    luminosity_mapping: np.array
+    xgrid: np.array
+
+
 # Helper functions
 def generate_input_had(flavs=3, xsize=2, ndata=4, n_combinations=None):
-    """ Generates fake input (fktable and array of combinations) for the hadronic convolution
+    """Generates fake input (fktable and array of combinations) for the hadronic convolution
 
     Parameters
     ----------
@@ -39,9 +49,7 @@ def generate_input_had(flavs=3, xsize=2, ndata=4, n_combinations=None):
         lc = len(combinations)
     else:
         bchoice = sorted(
-            np.random.choice(
-                np.arange(flavs * flavs), size=n_combinations, replace=False
-            )
+            np.random.choice(np.arange(flavs * flavs), size=n_combinations, replace=False)
         )
         combinations = [all_combinations[i] for i in bchoice]
         lc = n_combinations
@@ -52,7 +60,7 @@ def generate_input_had(flavs=3, xsize=2, ndata=4, n_combinations=None):
 
 # Generate an FK table, PDF and combinations list for DIS
 def generate_input_DIS(flavs=3, xsize=2, ndata=5, n_combinations=-1):
-    """ Generates fake input (fktable and array of combinations) for the DIS convolution
+    """Generates fake input (fktable and array of combinations) for the DIS convolution
 
     Parameters
     ----------
@@ -82,34 +90,32 @@ def generate_input_DIS(flavs=3, xsize=2, ndata=5, n_combinations=-1):
 
 
 def generate_DIS(nfk=1):
-    fkdicts = []
-    for i in range(nfk):
+    fktables = []
+    for _ in range(nfk):
         fk, comb = generate_input_DIS(
             flavs=FLAVS, xsize=XSIZE, ndata=NDATA, n_combinations=FLAVS - 1
         )
-        fkdicts.append({"fktable": fk, "basis": comb, "xgrid": np.ones((1, XSIZE))})
-    return fkdicts
+        fktables.append(_fake_FKTableData(fk, comb, np.ones((1, XSIZE))))
+    return fktables
 
 
 def generate_had(nfk=1):
-    fkdicts = []
-    for i in range(nfk):
-        fk, comb = generate_input_had(
-            flavs=FLAVS, xsize=XSIZE, ndata=NDATA, n_combinations=FLAVS
-        )
-        fkdicts.append({"fktable": fk, "basis": comb, "xgrid": np.ones((1, XSIZE))})
-    return fkdicts
+    fktables = []
+    for _ in range(nfk):
+        fk, comb = generate_input_had(flavs=FLAVS, xsize=XSIZE, ndata=NDATA, n_combinations=FLAVS)
+        fktables.append(_fake_FKTableData(fk, comb, np.ones((1, XSIZE))))
+    return fktables
 
 
 # Tests
 def test_DIS_basis():
-    fkdicts = generate_DIS(2)
-    fks = [i['fktable'] for i in fkdicts]
-    obs_layer = layers.DIS(fkdicts, fks, "NULL", nfl=FLAVS)
+    fktables = generate_DIS(2)
+    fks = [i.fktable for i in fktables]
+    obs_layer = layers.DIS(fktables, fks, "NULL", nfl=FLAVS)
     # Get the masks from the layer
     all_masks = obs_layer.all_masks
-    for result, fk in zip(all_masks, fkdicts):
-        comb = fk["basis"]
+    for result, fk in zip(all_masks, fktables):
+        comb = fk.luminosity_mapping
         # Compute the basis with numpy
         reference = np.zeros(FLAVS, dtype=bool)
         for i in comb:
@@ -118,13 +124,13 @@ def test_DIS_basis():
 
 
 def test_DY_basis():
-    fkdicts = generate_had(2)
-    fks = [i['fktable'] for i in fkdicts]
-    obs_layer = layers.DY(fkdicts, fks, "NULL", nfl=FLAVS)
+    fktables = generate_had(2)
+    fks = [i.fktable for i in fktables]
+    obs_layer = layers.DY(fktables, fks, "NULL", nfl=FLAVS)
     # Get the mask from the layer
     all_masks = obs_layer.all_masks
-    for result, fk in zip(all_masks, fkdicts):
-        comb = fk["basis"]
+    for result, fk in zip(all_masks, fktables):
+        comb = fk.luminosity_mapping
         reference = np.zeros((FLAVS, FLAVS))
         for i, j in comb:
             reference[i, j] = True
@@ -135,9 +141,9 @@ def test_DIS():
     tests = [(2, "ADD"), (1, "NULL")]
     for nfk, ope in tests:
         # Input values
-        fkdicts = generate_DIS(nfk)
-        fks = [i['fktable'] for i in fkdicts]
-        obs_layer = layers.DIS(fkdicts, fks, ope, nfl=FLAVS)
+        fktables = generate_DIS(nfk)
+        fks = [i.fktable for i in fktables]
+        obs_layer = layers.DIS(fktables, fks, ope, nfl=FLAVS)
         pdf = np.random.rand(XSIZE, FLAVS)
         kp = op.numpy_to_tensor(np.expand_dims(pdf, 0))
         # generate the n3fit results
@@ -148,8 +154,8 @@ def test_DIS():
         if len(all_masks) < nfk:
             all_masks *= nfk
         reference = 0
-        for fkdict, mask in zip(fkdicts, all_masks):
-            fk = fkdict["fktable"]
+        for fktabledata, mask in zip(fktables, all_masks):
+            fk = fktabledata.fktable
             pdf_masked = pdf.T[mask.numpy()].T
             reference += np.tensordot(fk, pdf_masked, axes=[[2, 1], [0, 1]])
         assert np.allclose(result, reference, THRESHOLD)
@@ -159,12 +165,12 @@ def test_DY():
     tests = [(2, "ADD"), (1, "NULL")]
     for nfk, ope in tests:
         # Input values
-        fkdicts = generate_had(nfk)
-        fks = [i['fktable'] for i in fkdicts]
-        obs_layer = layers.DY(fkdicts, fks, ope, nfl=FLAVS)
+        fktables = generate_had(nfk)
+        fks = [i.fktable for i in fktables]
+        obs_layer = layers.DY(fktables, fks, ope, nfl=FLAVS)
         pdf = np.random.rand(XSIZE, FLAVS)
         # Add batch dimension (0) and replica dimension (-1)
-        kp = op.numpy_to_tensor(np.expand_dims(pdf, [0,-1]))
+        kp = op.numpy_to_tensor(np.expand_dims(pdf, [0, -1]))
         # generate the n3fit results
         result_tensor = obs_layer(kp)
         result = op.evaluate(result_tensor)
@@ -173,8 +179,8 @@ def test_DY():
         if len(all_masks) < nfk:
             all_masks *= nfk
         reference = 0
-        for fkdict, mask in zip(fkdicts, all_masks):
-            fk = fkdict["fktable"]
+        for fktabledata, mask in zip(fktables, all_masks):
+            fk = fktabledata.fktable
             lumi = np.tensordot(pdf, pdf, axes=0)
             lumi_perm = np.moveaxis(lumi, [1, 3], [0, 1])
             lumi_masked = lumi_perm[mask.numpy()]
@@ -197,12 +203,12 @@ def test_rotation_flavour():
     # Apply the rotation using numpy tensordot
     x = np.ones(8)  # Vector in the flavour basis v_i
     x = np.expand_dims(x, axis=[0, 1])  # Give to the input the shape (1,1,8)
-    mat = fitbasis_to_NN31IC(flav_info, 'FLAVOUR')  # Rotation matrix R_ij, i=flavour, j=evolution
+    mat = fitbasis_to_NN31IC(flav_info, "FLAVOUR")  # Rotation matrix R_ij, i=flavour, j=evolution
     res_np = np.tensordot(x, mat, (2, 0))  # Vector in the evolution basis u_j=R_ij*vi
 
     # Apply the rotation through the rotation layer
     x = op.numpy_to_tensor(x)
-    rotmat = layers.FlavourToEvolution(flav_info, 'FLAVOUR')
+    rotmat = layers.FlavourToEvolution(flav_info, "FLAVOUR")
     res_layer = rotmat(x)
     assert np.alltrue(res_np == res_layer)
 
@@ -222,33 +228,34 @@ def test_rotation_evol():
     # Apply the rotation using numpy tensordot
     x = np.ones(8)  # Vector in the flavour basis v_i
     x = np.expand_dims(x, axis=[0, 1])  # Give to the input the shape (1,1,8)
-    mat = fitbasis_to_NN31IC(flav_info, 'EVOL')  # Rotation matrix R_ij, i=flavour, j=evolution
+    mat = fitbasis_to_NN31IC(flav_info, "EVOL")  # Rotation matrix R_ij, i=flavour, j=evolution
     res_np = np.tensordot(x, mat, (2, 0))  # Vector in the evolution basis u_j=R_ij*vi
 
     # Apply the rotation through the rotation layer
     x = op.numpy_to_tensor(x)
-    rotmat = layers.FlavourToEvolution(flav_info, 'EVOL')
+    rotmat = layers.FlavourToEvolution(flav_info, "EVOL")
     res_layer = rotmat(x)
-    assert np.alltrue(res_np == res_layer)    
-    
+    assert np.alltrue(res_np == res_layer)
+
+
 def test_mask():
-    """ Test the mask layer """
+    """Test the mask layer"""
     SIZE = 100
     fi = np.random.rand(SIZE)
     # Check that the multiplier works
     vals = [0.0, 2.0, np.random.rand()]
     for val in vals:
-        masker = layers.Mask(c = val)
+        masker = layers.Mask(c=val)
         ret = masker(fi)
-        np.testing.assert_allclose(ret, val*fi, rtol=1e-5)
+        np.testing.assert_allclose(ret, val * fi, rtol=1e-5)
     # Check that the boolean works
     np_mask = np.random.randint(0, 2, size=SIZE, dtype=bool)
-    masker = layers.Mask(bool_mask = np_mask)
+    masker = layers.Mask(bool_mask=np_mask)
     ret = masker(fi)
     masked_fi = fi[np_mask]
     np.testing.assert_allclose(ret, masked_fi, rtol=1e-5)
     # Check that the combination works!
     rn_val = vals[-1]
-    masker = layers.Mask(bool_mask = np_mask, c = rn_val)
+    masker = layers.Mask(bool_mask=np_mask, c=rn_val)
     ret = masker(fi)
-    np.testing.assert_allclose(ret, masked_fi*rn_val, rtol=1e-5)
+    np.testing.assert_allclose(ret, masked_fi * rn_val, rtol=1e-5)
diff --git a/nnpdfcpp/data/theory.db b/nnpdfcpp/data/theory.db
index 8e95cc4371..6ff99d7a07 100644
Binary files a/nnpdfcpp/data/theory.db and b/nnpdfcpp/data/theory.db differ
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 0ec4eef36c..70b3c09e00 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -941,7 +941,12 @@ def _parse_lagrange_multiplier(self, kind, theoryid, setdict):
             raise ConfigError(bad_msg, setdict.keys(), e.args[0]) from e
         except ValueError as e:
             raise ConfigError(bad_msg) from e
-        return self.loader.check_posset(theoryno, name, maxlambda)
+        if kind == "posdataset":
+            return self.loader.check_posset(theoryno, name, maxlambda)
+        elif kind == "integdataset":
+            return self.loader.check_integset(theoryno, name, maxlambda)
+        else:
+            raise ConfigError(f"The lagrange multiplier type {kind} is not understood")
 
     @element_of("posdatasets")
     def parse_posdataset(self, posset: dict, *, theoryid):
diff --git a/validphys2/src/validphys/convolution.py b/validphys2/src/validphys/convolution.py
index 5203cb8e77..01795dcdf2 100644
--- a/validphys2/src/validphys/convolution.py
+++ b/validphys2/src/validphys/convolution.py
@@ -120,9 +120,15 @@ def _predictions(dataset, pdf, fkfunc):
             "commondata and is not supported."
         )
     cuts = dataset.cuts.load()
-    all_predictions = [
-        fkfunc(load_fktable(fk).with_cuts(cuts), pdf) for fk in dataset.fkspecs
-    ]
+    all_predictions = []
+    for fk in dataset.fkspecs:
+        fk_w_cuts = load_fktable(fk).with_cuts(cuts)
+        all_predictions.append(fkfunc(fk_w_cuts, pdf))
+    # Old fktables repeated values to make DEN and NUM sizes match in RATIO operations
+    # pineappl tables instead just contain the one value used
+    # The code below works for both situation while keeping `true_div` as the operation
+    if dataset.op == "RATIO":
+        all_predictions[-1] = all_predictions[-1].values
     return opfunc(*all_predictions)
 
 
diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py
index 8681856129..3b68ee4819 100644
--- a/validphys2/src/validphys/core.py
+++ b/validphys2/src/validphys/core.py
@@ -39,6 +39,8 @@
 from validphys.hyperoptplot import HyperoptTrial
 from validphys.utils import experiments_to_dataset_inputs
 from validphys.lhapdfset import LHAPDFSet
+from validphys.fkparser import load_fktable
+from validphys.pineparser import pineappl_reader
 
 log = logging.getLogger(__name__)
 
@@ -320,7 +322,7 @@ def plot_kinlabels(self):
 
 
 class DataSetInput(TupleComp):
-    """Represents whatever the user enters in the YAML to specidy a
+    """Represents whatever the user enters in the YAML to specify a
     dataset."""
     def __init__(self, *, name, sys, cfac, frac, weight, custom_group):
         self.name=name
@@ -451,7 +453,8 @@ def __init__(self, *, name, commondata, fkspecs, thspec, cuts,
 
         if isinstance(fkspecs, FKTableSpec):
             fkspecs = (fkspecs,)
-        self.fkspecs = tuple(fkspecs)
+        fkspecs = tuple(fkspecs)
+        self.fkspecs = fkspecs
         self.thspec = thspec
 
         self.cuts = cuts
@@ -498,6 +501,16 @@ def load(self):
                 data = DataSet(data, intmask)
         return data
 
+    def load_commondata(self):
+        """Strips the commondata loading from `load`"""
+        cd = self.commondata.load()
+        if self.cuts is not None:
+            loaded_cuts = self.cuts.load()
+            if not (hasattr(loaded_cuts, '_full') and loaded_cuts._full):
+                intmask = [int(ele) for ele in loaded_cuts]
+                cd = CommonData(cd, intmask)
+        return cd
+
     def to_unweighted(self):
         """Return a copy of the dataset with the weight set to one."""
         return self.__class__(
@@ -515,28 +528,75 @@ def __str__(self):
         return self.name
 
 class FKTableSpec(TupleComp):
-    def __init__(self, fkpath, cfactors):
+    """
+    In Legacy Mode each fktable has one single path.
+    For pineappl tables instead a FKTable is formed by any number of grids
+    therefore in order to check whether we have a new-format or old-format table
+    we will just check whether fkpath is a list
+    For now holds the metadata as an attribute to this function.
+    This is useless/transitional since this metadata is already in the new CommonData format
+    """
+
+    def __init__(self, fkpath, cfactors, metadata=None):
+        self.cfactors = cfactors if cfactors is not None else []
+        # NOTE: the legacy interface is expected to be removed by NNPDF5.0
+        # so please don't write code that relies on it
+        self.legacy = True
+        if isinstance(fkpath, (tuple, list)):
+            self.legacy = False
+            fkpath = tuple(fkpath)
         self.fkpath = fkpath
-        self.cfactors = cfactors
-        super().__init__(fkpath, cfactors)
+        self.metadata = metadata
 
-    #NOTE: We cannot do this because Fkset owns the fktable, and trying
-    #to reuse the loaded one fails after it gets deleted.
-    #@functools.lru_cache()
-    def load(self):
+        # If this is a yaml file that loads an applgrid-converted pineappl,
+        # keep also the name of the target
+        # this is needed since we can now easily reutilize grids
+        if not self.legacy and self.metadata.get("appl"):
+            super().__init__(fkpath, cfactors, self.metadata.get("target_dataset"))
+        else:
+            super().__init__(fkpath, cfactors)
+
+    def _load_legacy(self):
         return FKTable(str(self.fkpath), [str(factor) for factor in self.cfactors])
 
-class PositivitySetSpec(DataSetSpec):
-    """Extends DataSetSpec to work around the particularities of the positivity datasets"""
+    def _load_pineappl(self):
+        log.info("Reading: %s", self.fkpath)
+        return pineappl_reader(self)
+
+    def load_with_cuts(self, cuts):
+        """Load the fktable and apply cuts inmediately. Returns a FKTableData"""
+        return load_fktable(self).with_cuts(cuts)
+
+    def load(self):
+        if self.legacy:
+            return self._load_legacy()
+        return self._load_pineappl()
+
+
+class LagrangeSetSpec(DataSetSpec):
+    """Extends DataSetSpec to work around the particularities of the positivity, integrability
+    and other Lagrange Multiplier datasets.
+    Internally (for libNNPDF) they are always PositivitySets
+    """
 
     def __init__(self, name, commondataspec, fkspec, maxlambda, thspec):
         cuts = Cuts(commondataspec, None)
         self.maxlambda = maxlambda
-        super().__init__(name=name, commondata=commondataspec, fkspecs=fkspec, thspec=thspec, cuts=cuts)
-        if len(self.fkspecs) > 1:
-            # The signature of the function does not accept operations either
-            # so more than one fktable cannot be utilized
-            raise ValueError("Positivity datasets can only contain one fktable")
+        super().__init__(
+            name=name,
+            commondata=commondataspec,
+            fkspecs=fkspec,
+            thspec=thspec,
+            cuts=cuts,
+        )
+
+    def to_unweighted(self):
+        log.warning(
+            "Trying to unweight %s, %s are always unweighted",
+            self.__class__.__name__,
+            self.name,
+        )
+        return self
 
     @functools.lru_cache()
     def load(self):
@@ -544,11 +604,14 @@ def load(self):
         fk = self.fkspecs[0].load()
         return PositivitySet(cd, fk, self.maxlambda)
 
-    def to_unweighted(self):
-        log.warning("Trying to unweight %s, PositivitySetSpec are always unweighted", self.name)
-        return self
+
+class PositivitySetSpec(LagrangeSetSpec):
+    pass
 
 
+class IntegrabilitySetSpec(LagrangeSetSpec):
+    pass
+
 #We allow to expand the experiment as a list of datasets
 class DataGroupSpec(TupleComp, namespaces.NSList):
 
@@ -578,6 +641,10 @@ def load(self):
             sets.append(loaded_data)
         return Experiment(sets, self.name)
 
+    @functools.lru_cache(maxsize=32)
+    def load_commondata(self):
+        return [d.load_commondata() for d in self.datasets]
+
     @property
     def thspec(self):
         #TODO: Is this good enough? Should we explicitly pass the theory
@@ -731,6 +798,14 @@ def __repr__(self):
     def __str__(self):
         return f"Theory {self.id}"
 
+    @property
+    def yamldb_path(self):
+        return self.path / "yamldb"
+
+    def is_pineappl(self):
+        """Check whether this theory is a pineappl-based theory"""
+        return self.yamldb_path.exists()
+
 class ThCovMatSpec:
     def __init__(self, path):
         self.path = path
diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py
index c0934fa62d..5e73e3777f 100644
--- a/validphys2/src/validphys/coredata.py
+++ b/validphys2/src/validphys/coredata.py
@@ -42,17 +42,37 @@ class FKTableData:
 
     metadata : dict
         Other information contained in the FKTable.
+
+    protected: bool
+        When a fktable is protected cuts will not be applied.
+        The most common use-case is when a total cross section is used
+        as a normalization table for a differential cross section,
+        in legacy code (<= NNPDF4.0) both fktables would be cut using the differential index.
     """
 
     hadronic: bool
     Q0: float
     ndata: int
-    xgrid: np.array
+    xgrid: np.ndarray
     sigma: pd.DataFrame
     metadata: dict = dataclasses.field(default_factory=dict, repr=False)
+    protected: bool = False
+
+    def with_cfactor(self, cfactor):
+        """Returns a copy of the FKTableData object with cfactors applied to the fktable"""
+        if all(c == 1.0 for c in cfactor):
+            return self
+        if len(cfactor) != self.ndata:
+            if self.protected:
+                cfactor = cfactor[0]
+            else:
+                name = self.metadata.get("target_dataset")
+                raise ValueError(
+                    f"The length of cfactor for {name} differs from the number of datapoints in the grid"
+                )
+        new_sigma = self.sigma.multiply(pd.Series(cfactor), axis=0, level=0)
+        return dataclasses.replace(self, sigma=new_sigma)
 
-    # TODO: When we move to something other than the current fktable format,
-    # we should apply the cuts directly before loading the file.
     def with_cuts(self, cuts):
         """Return a copy of the FKTabe with the cuts applied.  The data index
         of the sigma operator (the outermost level), contains the data point
@@ -87,14 +107,73 @@ def with_cuts(self, cuts):
         >>> assert newtable.ndata == 2
         >>> assert newtable.metadata['GridInfo'].ndata == 3
         """
-        if hasattr(cuts, 'load'):
+        if hasattr(cuts, "load"):
             cuts = cuts.load()
-        if cuts is None:
+        if cuts is None or self.protected:
             return self
         newndata = len(cuts)
         newsigma = self.sigma.loc[cuts]
         return dataclasses.replace(self, ndata=newndata, sigma=newsigma)
 
+    @property
+    def luminosity_mapping(self):
+        """Return the flavour combinations that contribute to the fktable
+        in the form of a single array
+
+        The return shape is:
+            (nbasis,) for DIS
+            (nbasis*2,) for hadronic
+        """
+        basis = self.sigma.columns.to_numpy()
+        if self.hadronic:
+            ret = np.zeros(14 * 14, dtype=bool)
+            ret[basis] = True
+            basis = np.array(np.where(ret.reshape(14, 14))).T.reshape(-1)
+        return basis
+
+    def get_np_fktable(self):
+        """Returns the fktable as a dense numpy array that can be directly
+        manipulated with numpy
+
+        The return shape is:
+            (ndata, nx, nbasis) for DIS
+            (ndata, nx, nx, nbasis) for hadronic
+        where nx is the length of the xgrid
+        and nbasis the number of flavour contributions that contribute
+        """
+        # Read up the shape of the output table
+        ndata = self.ndata
+        nx = len(self.xgrid)
+        nbasis = self.sigma.shape[1]
+
+        if ndata == 0:
+            if self.hadronic:
+                return np.zeros((ndata, nbasis, nx, nx))
+            return np.zeros((ndata, nbasis, nx))
+
+        # Make the dataframe into a dense numpy array
+
+        # First get the data index out of the way
+        # this is necessary because cuts/shifts and for performance reasons
+        # otherwise we will be putting things in a numpy array in very awkward orders
+        ns = self.sigma.unstack(level=("data",), fill_value=0)
+        x1 = ns.index.get_level_values(0)
+
+        if self.hadronic:
+            x2 = ns.index.get_level_values(1)
+            fk_raw = np.zeros((nx, nx, ns.shape[1]))
+            fk_raw[x2, x1, :] = ns.values
+
+            # The output is (ndata, basis, x1, x2)
+            fktable = fk_raw.reshape((nx, nx, nbasis, ndata)).T
+        else:
+            fk_raw = np.zeros((nx, ns.shape[1]))
+            fk_raw[x1, :] = ns.values
+
+            # The output is (ndata, basis, x1)
+            fktable = fk_raw.reshape((nx, nbasis, ndata)).T
+
+        return fktable
 
 @dataclasses.dataclass(eq=False)
 class CFactorData:
@@ -157,6 +236,7 @@ class CommonData:
         type (ADD/MULT/RAND) and name
         (CORR/UNCORR/THEORYCORR/SKIP)
     """
+
     setname: str
     ndata: int
     commondataproc: str
@@ -185,11 +265,13 @@ def with_cuts(self, cuts):
         """
         # Ensure that the cuts we're applying applies to this dataset
         # only check, however, if the cuts is of type :py:class:`validphys.core.Cuts`
-        if hasattr(cuts, 'name') and self.setname != cuts.name:
-            raise ValueError(f"The cuts provided are for {cuts.name} which does not apply "
-                    f"to this commondata file: {self.setname}")
+        if hasattr(cuts, "name") and self.setname != cuts.name:
+            raise ValueError(
+                f"The cuts provided are for {cuts.name} which does not apply "
+                f"to this commondata file: {self.setname}"
+            )
 
-        if hasattr(cuts, 'load'):
+        if hasattr(cuts, "load"):
             cuts = cuts.load()
         if cuts is None:
             return self
@@ -200,9 +282,7 @@ def with_cuts(self, cuts):
 
         newndata = len(cuts)
         new_commondata_table = self.commondata_table.loc[cuts]
-        return dataclasses.replace(
-            self, ndata=newndata, commondata_table=new_commondata_table
-        )
+        return dataclasses.replace(self, ndata=newndata, commondata_table=new_commondata_table)
 
     @property
     def central_values(self):
@@ -245,7 +325,6 @@ def additive_errors(self):
         add_table.columns = add_systype["name"].to_numpy()
         return add_table.loc[:, add_table.columns != "SKIP"]
 
-
     def systematic_errors(self, central_values=None):
         """Returns all systematic errors as absolute uncertainties, with a
         single column for each uncertainty. Converts
@@ -272,7 +351,5 @@ def systematic_errors(self, central_values=None):
         """
         if central_values is None:
             central_values = self.central_values.to_numpy()
-        converted_mult_errors = (
-            self.multiplicative_errors * central_values[:, np.newaxis] / 100
-        )
+        converted_mult_errors = self.multiplicative_errors * central_values[:, np.newaxis] / 100
         return pd.concat((self.additive_errors, converted_mult_errors), axis=1)
diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py
index 9c930c8f2a..c0b9bb29fe 100644
--- a/validphys2/src/validphys/filters.py
+++ b/validphys2/src/validphys/filters.py
@@ -164,7 +164,7 @@ def _filter_real_data(filter_path, data):
         nfull, ncut = _write_ds_cut_data(path, dataset)
         total_data_points += nfull
         total_cut_data_points += ncut
-        dataset.load().Export(str(path))
+        dataset.load_commondata().Export(str(path))
     return total_data_points, total_cut_data_points
 
 
diff --git a/validphys2/src/validphys/fkparser.py b/validphys2/src/validphys/fkparser.py
index f7d2a53fb9..8213a994f0 100644
--- a/validphys2/src/validphys/fkparser.py
+++ b/validphys2/src/validphys/fkparser.py
@@ -52,25 +52,25 @@ class GridInfo:
 @functools.lru_cache()
 def load_fktable(spec):
     """Load the data corresponding to a FKSpec object. The cfactors
-    will be applied to the grid."""
-    with open_fkpath(spec.fkpath) as handle:
-        tabledata = parse_fktable(handle)
+    will be applied to the grid.
+    If we have a new-type fktable, call directly `load()`, otherwise
+    fallback to the old parser
+    """
+    if spec.legacy:
+        with open_fkpath(spec.fkpath) as handle:
+            tabledata = parse_fktable(handle)
+    else:
+        tabledata = spec.load()
+
     if not spec.cfactors:
         return tabledata
 
-    ndata = tabledata.ndata
-    cfprod = np.ones(ndata)
+    cfprod = 1.0
     for cf in spec.cfactors:
         with open(cf, "rb") as f:
             cfdata = parse_cfactor(f)
-        if len(cfdata.central_value) != ndata:
-            raise BadCFactorError(
-                "Length of cfactor data does not match the length of the fktable."
-            )
         cfprod *= cfdata.central_value
-    # TODO: Find a way to do this in place
-    tabledata.sigma = tabledata.sigma.multiply(pd.Series(cfprod), axis=0, level=0)
-    return tabledata
+    return tabledata.with_cfactor(cfprod)
 
 def _get_compressed_buffer(path):
     archive = tarfile.open(path)
diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py
index f944b95d18..6f287e2321 100644
--- a/validphys2/src/validphys/loader.py
+++ b/validphys2/src/validphys/loader.py
@@ -26,11 +26,12 @@
 from reportengine import filefinder
 
 from validphys.core import (CommonDataSpec, FitSpec, TheoryIDSpec, FKTableSpec,
-                            PositivitySetSpec, DataSetSpec, PDF, Cuts, DataGroupSpec,
-                            peek_commondata_metadata, CutsPolicy,
+                            PositivitySetSpec, IntegrabilitySetSpec, DataSetSpec, PDF, Cuts,
+                            DataGroupSpec, peek_commondata_metadata, CutsPolicy,
                             InternalCutsWrapper, HyperscanSpec)
 from validphys.utils import tempfile_cleaner
 from validphys import lhaindex
+from validphys import pineparser
 
 DEFAULT_NNPDF_PROFILE_PATH = f"{sys.prefix}/share/NNPDF/nnprofile.yaml"
 
@@ -358,6 +359,32 @@ def check_fktable(self, theoryID, setname, cfac):
         cfactors = self.check_cfactor(theoryID, setname, cfac)
         return FKTableSpec(fkpath, cfactors)
 
+    def check_fkyaml(self, fkpath, theoryID, cfac):
+        """Load a pineappl fktable
+        Receives a yaml file describing the fktables necessary for a given observable
+        the theory ID and the corresponding cfactors
+        """
+        _, theopath = self.check_theoryID(theoryID)
+        metadata, fklist = pineparser.get_yaml_information(fkpath, theopath)
+        op = metadata["operation"]
+
+        # TODO:
+        #      at the moment there are no pineappl specific c-factors
+        #      so they need to be loaded from the NNPDF names / compounds files
+        cfac_name = metadata["target_dataset"]
+        # check whether there is a compound file
+        cpath = theopath / "compound" / f"FK_{cfac_name}-COMPOUND.dat"
+        if cpath.exists():
+            # Get the target filenames
+            tnames = [i[3:-4] for i in cpath.read_text().split() if i.endswith(".dat")]
+            cfactors = [self.check_cfactor(theoryID, i, cfac) for i in tnames]
+        else:
+            cfactors = [self.check_cfactor(theoryID, cfac_name, cfac)]
+        ###
+
+        fkspecs = [FKTableSpec(i, c, metadata) for i, c in zip(fklist, cfactors)]
+        return fkspecs, op
+
     def check_compound(self, theoryID, setname, cfac):
         thid, theopath = self.check_theoryID(theoryID)
         compound_spec_path = theopath / 'compound' / ('FK_%s-COMPOUND.dat' % setname)
@@ -407,11 +434,19 @@ def check_cfactor(self, theoryID, setname, cfactors):
         return tuple(cf)
 
     def check_posset(self, theoryID, setname, postlambda):
+        """Load a positivity dataset"""
         cd = self.check_commondata(setname, 'DEFAULT')
         fk = self.check_fktable(theoryID, setname, [])
         th =  self.check_theoryID(theoryID)
         return PositivitySetSpec(setname, cd, fk, postlambda, th)
 
+    def check_integset(self, theoryID, setname, postlambda):
+        """Load an integrability dataset"""
+        cd = self.check_commondata(setname, 'DEFAULT')
+        fk = self.check_fktable(theoryID, setname, [])
+        th =  self.check_theoryID(theoryID)
+        return IntegrabilitySetSpec(setname, cd, fk, postlambda, th)
+
     def get_posset(self, theoryID, setname, postlambda):
         return self.check_posset(theoryID, setname, postlambda).load()
 
@@ -482,8 +517,12 @@ def check_dataset(self,
                       cuts=CutsPolicy.INTERNAL,
                       use_fitcommondata=False,
                       fit=None,
-                      weight=1):
-
+                      weight=1,
+                      ):
+        """Loads a given dataset
+        If the dataset contains new-type fktables, use the
+        pineappl loading function, otherwise fallback to legacy
+        """
         if not isinstance(theoryid, TheoryIDSpec):
             theoryid = self.check_theoryID(theoryid)
 
@@ -491,11 +530,17 @@ def check_dataset(self,
 
         commondata = self.check_commondata(
             name, sysnum, use_fitcommondata=use_fitcommondata, fit=fit)
-        try:
-            fkspec, op = self.check_compound(theoryno, name, cfac)
-        except CompoundNotFound:
-            fkspec = self.check_fktable(theoryno, name, cfac)
-            op = None
+
+        if theoryid.is_pineappl():
+            # If it is a pineappl theory, use the pineappl reader
+            fkpath = (theoryid.yamldb_path / name).with_suffix(".yaml")
+            fkspec, op = self.check_fkyaml(fkpath, theoryno, cfac)
+        else:
+            try:
+                fkspec, op = self.check_compound(theoryno, name, cfac)
+            except CompoundNotFound:
+                fkspec = self.check_fktable(theoryno, name, cfac)
+                op = None
 
         #Note this is simply for convenience when scripting. The config will
         #construct the actual Cuts object by itself
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index f2ceda5573..c3709293c5 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -6,8 +6,8 @@
 functions make calls to libnnpdf C++ library.
 
 """
+import functools
 from collections import defaultdict
-from copy import deepcopy
 import hashlib
 import logging
 
@@ -18,12 +18,13 @@
 from reportengine.table import table
 
 from validphys.n3fit_data_utils import (
-    common_data_reader_experiment,
-    positivity_reader,
+    validphys_group_extractor,
 )
+from validphys.core import IntegrabilitySetSpec, TupleComp
 
 log = logging.getLogger(__name__)
 
+
 def replica_trvlseed(replica, trvlseed, same_trvl_per_replica=False):
     """Generates the ``trvlseed`` for a ``replica``."""
     # TODO: move to the new infrastructure
@@ -35,6 +36,7 @@ def replica_trvlseed(replica, trvlseed, same_trvl_per_replica=False):
         res = np.random.randint(0, pow(2, 31))
     return res
 
+
 def replica_nnseed(replica, nnseed):
     """Generates the ``nnseed`` for a ``replica``."""
     np.random.seed(seed=nnseed)
@@ -42,6 +44,7 @@ def replica_nnseed(replica, nnseed):
         res = np.random.randint(0, pow(2, 31))
     return res
 
+
 def replica_mcseed(replica, mcseed, genrep):
     """Generates the ``mcseed`` for a ``replica``."""
     if not genrep:
@@ -52,6 +55,23 @@ def replica_mcseed(replica, mcseed, genrep):
     return res
 
 
+class _TrMasks(TupleComp):
+    """Class holding the training validation mask for a group of datasets
+    If the same group of dataset receives the same trvlseed then the mask
+    will be the same.
+    This class holds said information so it can be reused easily, i.e.,
+    ``group_name`` and ``seed`` define the ``masks``.
+    """
+
+    def __init__(self, group_name, seed, masks=None):
+        self.masks = masks
+        super().__init__(group_name, seed)
+
+    def __iter__(self):
+        for m in self.masks:
+            yield m
+
+
 def tr_masks(data, replica_trvlseed):
     """Generate the boolean masks used to split data into training and
     validation points. Returns a list of 1-D boolean arrays, one for each
@@ -61,7 +81,7 @@ def tr_masks(data, replica_trvlseed):
         tr_data = data[tr_mask]
 
     """
-    nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10 ** 8
+    nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10**8
     nameseed += replica_trvlseed
     # TODO: update this to new random infrastructure.
     np.random.seed(nameseed)
@@ -78,7 +98,8 @@ def tr_masks(data, replica_trvlseed):
         )
         np.random.shuffle(mask)
         trmask_partial.append(mask)
-    return trmask_partial
+    return _TrMasks(str(data), replica_trvlseed, trmask_partial)
+
 
 def kfold_masks(kpartitions, data):
     """Collect the masks (if any) due to kfolding for this data.
@@ -147,50 +168,24 @@ def kfold_masks(kpartitions, data):
     return list_folds
 
 
-def _mask_fk_tables(dataset_dicts, tr_masks):
-    """
-    Internal function which masks the fktables for a group of datasets.
-
-    Parameters
-    ----------
-        dataset_dicts: list[dict]
-            list of datasets dictionaries returned by
-            :py:func:`validphys.n3fit_data_utils.common_data_reader_experiment`.
-        tr_masks: list[np.array]
-            a tuple containing the lists of training masks for each dataset.
-
-    Return
-    ------
-        data_trmask: np.array
-            boolean array resulting from concatenating the training masks of
-            each dataset.
-
-    Note: the returned masks are only used in order to mask the covmat
+@functools.lru_cache
+def fittable_datasets_masked(data, tr_masks):
+    """Generate a list of :py:class:`validphys.n3fit_data_utils.FittableDataSet`
+    from a group of dataset and the corresponding training/validation masks
     """
-    trmask_partial = tr_masks
-    for dataset_dict, tr_mask in zip(dataset_dicts, trmask_partial):
-        # Generate the training and validation fktables
-        tr_fks = []
-        vl_fks = []
-        ex_fks = []
-        vl_mask = ~tr_mask
-        for fktable_dict in dataset_dict["fktables"]:
-            tr_fks.append(fktable_dict["fktable"][tr_mask])
-            vl_fks.append(fktable_dict["fktable"][vl_mask])
-            ex_fks.append(fktable_dict.get("fktable"))
-        dataset_dict["tr_fktables"] = tr_fks
-        dataset_dict["vl_fktables"] = vl_fks
-        dataset_dict["ex_fktables"] = ex_fks
-
-    return np.concatenate(trmask_partial)
+    # This is separated from fitting_data_dict so that we can cache the result
+    # when the trvlseed is the same for all replicas (great for parallel replicas)
+    return validphys_group_extractor(data.datasets, tr_masks.masks)
 
 
 def fitting_data_dict(
     data,
     make_replica,
+    dataset_inputs_loaded_cd_with_cuts,
     dataset_inputs_t0_covmat_from_systematics,
     tr_masks,
     kfold_masks,
+    fittable_datasets_masked,
     diagonal_basis=None,
 ):
     """
@@ -235,17 +230,13 @@ def fitting_data_dict(
     """
     # TODO: Plug in the python data loading when available. Including but not
     # limited to: central values, ndata, replica generation, covmat construction
-    spec_c = data.load()
-    ndata = spec_c.GetNData()
-    expdata_true = spec_c.get_cv().reshape(1, ndata)
+    expdata_true = np.concatenate([d.central_values for d in dataset_inputs_loaded_cd_with_cuts])
 
     expdata = make_replica
-
-    datasets = common_data_reader_experiment(spec_c, data)
-
-    # t0 covmat
-    covmat = dataset_inputs_t0_covmat_from_systematics
+    tr_masks = tr_masks.masks
+    covmat = dataset_inputs_t0_covmat_from_systematics  # t0 covmat
     inv_true = np.linalg.inv(covmat)
+    fittable_datasets = fittable_datasets_masked
 
     if diagonal_basis:
         log.info("working in diagonal basis.")
@@ -256,21 +247,17 @@ def fitting_data_dict(
         dt_trans_tr = None
         dt_trans_vl = None
 
-
-    # Copy dataset dict because we mutate it.
-    datasets_copy = deepcopy(datasets)
-
-    tr_mask = _mask_fk_tables(datasets_copy, tr_masks)
+    tr_mask = np.concatenate(tr_masks)
     vl_mask = ~tr_mask
 
     if diagonal_basis:
         expdata = np.matmul(dt_trans, expdata)
         # make a 1d array of the diagonal
         covmat_tr = eig[tr_mask]
-        invcovmat_tr = 1./covmat_tr
+        invcovmat_tr = 1.0 / covmat_tr
 
         covmat_vl = eig[vl_mask]
-        invcovmat_vl = 1./covmat_vl
+        invcovmat_vl = 1.0 / covmat_vl
 
         # prepare a masking rotation
         dt_trans_tr = dt_trans[tr_mask]
@@ -285,7 +272,6 @@ def fitting_data_dict(
     ndata_tr = np.count_nonzero(tr_mask)
     expdata_tr = expdata[tr_mask].reshape(1, ndata_tr)
 
-
     ndata_vl = np.count_nonzero(vl_mask)
     expdata_vl = expdata[vl_mask].reshape(1, ndata_vl)
 
@@ -298,10 +284,14 @@ def fitting_data_dict(
         folds["validation"].append(fold[vl_mask])
         folds["experimental"].append(~fold)
 
+    # This dictionary contains a list of fittable datasets
+    # which contains the instructions on how to generaet each observable for the fit
+    # plus the information that glue all of them together (covmat, ndata, etc)
+    # TODO: for consistency with the rest of validphys a FittableGroup should be created
     dict_out = {
-        "datasets": datasets_copy,
+        "datasets": fittable_datasets,
         "name": str(data),
-        "expdata_true": expdata_true,
+        "expdata_true": expdata_true.reshape(1, -1),
         "invcovmat_true": inv_true,
         "covmat": covmat,
         "trmask": tr_mask,
@@ -314,14 +304,16 @@ def fitting_data_dict(
         "expdata_vl": expdata_vl,
         "positivity": False,
         "count_chi2": True,
-        "folds" : folds,
+        "folds": folds,
         "data_transformation_tr": dt_trans_tr,
         "data_transformation_vl": dt_trans_vl,
     }
     return dict_out
 
+
 exps_fitting_data_dict = collect("fitting_data_dict", ("group_dataset_inputs_by_experiment",))
 
+
 def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nnseed):
     """For a single replica return a tuple of the inputs to this function.
     Used with `collect` over replicas to avoid having to perform multiple
@@ -335,8 +327,11 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn
     """
     return (replica, exps_fitting_data_dict, replica_nnseed)
 
+
 replicas_nnseed_fitting_data_dict = collect("replica_nnseed_fitting_data_dict", ("replicas",))
-groups_replicas_indexed_make_replica = collect('indexed_make_replica' , ("group_dataset_inputs_by_experiment","replicas"))
+groups_replicas_indexed_make_replica = collect(
+    "indexed_make_replica", ("group_dataset_inputs_by_experiment", "replicas")
+)
 
 
 @table
@@ -390,10 +385,10 @@ def validation_pseudodata(pseudodata_table, training_mask):
 
 @table
 def replica_training_mask_table(replica_training_mask):
-    """Same as ``replica_training_mask`` but with a table decorator.
-    """
+    """Same as ``replica_training_mask`` but with a table decorator."""
     return replica_training_mask
 
+
 def replica_training_mask(exps_tr_masks, replica, experiments_index):
     """Save the boolean mask used to split data into training and validation
     for a given replica as a pandas DataFrame, indexed by
@@ -440,25 +435,19 @@ def replica_training_mask(exps_tr_masks, replica, experiments_index):
 
     [345 rows x 1 columns]
     """
-    all_masks = np.concatenate([
-        ds_mask
-        for exp_masks in exps_tr_masks
-        for ds_mask in exp_masks
-    ])
-    return pd.DataFrame(
-        all_masks,
-        columns=[f"replica {replica}"],
-        index=experiments_index
-    )
+    all_masks = np.concatenate([ds_mask for exp_masks in exps_tr_masks for ds_mask in exp_masks])
+    return pd.DataFrame(all_masks, columns=[f"replica {replica}"], index=experiments_index)
+
 
 replicas_training_mask = collect("replica_training_mask", ("replicas",))
 
+
 @table
 def training_mask_table(training_mask):
-    """Same as ``training_mask`` but with a table decorator
-    """
+    """Same as ``training_mask`` but with a table decorator"""
     return training_mask
 
+
 def training_mask(replicas_training_mask):
     """Save the boolean mask used to split data into training and validation
     for each replica as a pandas DataFrame, indexed by
@@ -502,14 +491,14 @@ def training_mask(replicas_training_mask):
     return pd.concat(replicas_training_mask, axis=1)
 
 
-def fitting_pos_dict(posdataset):
-    """Loads a positivity dataset. For more information see
-    :py:func:`validphys.n3fit_data_utils.positivity_reader`.
+def _fitting_lagrange_dict(lambdadataset):
+    """Loads a generic lambda dataset, often used for positivity and integrability datasets
+    For more information see :py:func:`validphys.n3fit_data_utils.positivity_reader`.
 
     Parameters
     ----------
-    posdataset: validphys.core.PositivitySetSpec
-        Positivity set which is to be loaded.
+    lambdadataset: validphys.core.LagrangeSetSpec
+        Positivity (or integrability) set which is to be loaded.
 
     Examples
     --------
@@ -518,23 +507,50 @@ def fitting_pos_dict(posdataset):
     >>> pos = API.fitting_pos_dict(posdataset=posdataset, theoryid=162)
     >>> len(pos)
     9
-
     """
-    log.info("Loading positivity dataset %s", posdataset)
-    return positivity_reader(posdataset)
+    integrability = isinstance(lambdadataset, IntegrabilitySetSpec)
+    mode = "integrability" if integrability else "positivity"
+    log.info("Loading %s dataset %s", mode, lambdadataset)
+    positivity_datasets = validphys_group_extractor([lambdadataset], [])
+    ndata = positivity_datasets[0].ndata
+    return {
+        "datasets": positivity_datasets,
+        "trmask": np.ones(ndata, dtype=np.bool),
+        "name": lambdadataset.name,
+        "expdata": np.zeros((1, ndata)),
+        "ndata": ndata,
+        "positivity": True,
+        "lambda": lambdadataset.maxlambda,
+        "count_chi2": False,
+        "integrability": integrability,
+    }
 
-posdatasets_fitting_pos_dict = collect("fitting_pos_dict", ("posdatasets",))
 
+def posdatasets_fitting_pos_dict(posdatasets=None):
+    """Loads all positivity datasets. It is not allowed to be empty.
 
-#can't use collect here because integdatasets might not exist.
+    Parameters
+    ----------
+    integdatasets: list[validphys.core.PositivitySetSpec]
+        list containing the settings for the positivity sets. Examples of
+        these can be found in the runcards located in n3fit/runcards. They have
+        a format similar to ``dataset_input``.
+    """
+    if posdatasets is not None:
+        return [_fitting_lagrange_dict(i) for i in posdatasets]
+    log.warning("Not using any positivity datasets.")
+    return None
+
+
+# can't use collect here because integdatasets might not exist.
 def integdatasets_fitting_integ_dict(integdatasets=None):
-    """Loads a integrability dataset. Calls same function as
+    """Loads the integrability datasets. Calls same function as
     :py:func:`fitting_pos_dict`, except on each element of
     ``integdatasets`` if ``integdatasets`` is not None.
 
     Parameters
     ----------
-    integdatasets: list[validphys.core.PositivitySetSpec]
+    integdatasets: list[validphys.core.IntegrabilitySetSpec]
         list containing the settings for the integrability sets. Examples of
         these can be found in the runcards located in n3fit/runcards. They have
         a format similar to ``dataset_input``.
@@ -552,12 +568,6 @@ def integdatasets_fitting_integ_dict(integdatasets=None):
 
     """
     if integdatasets is not None:
-        integ_info = []
-        for integ_set in integdatasets:
-            log.info("Loading integrability dataset %s", integ_set)
-            # Use the same reader as positivity observables
-            integ_dict = positivity_reader(integ_set)
-            integ_info.append(integ_dict)
-        return integ_info
+        return [_fitting_lagrange_dict(i) for i in integdatasets]
     log.warning("Not using any integrability datasets.")
     return None
diff --git a/validphys2/src/validphys/n3fit_data_utils.py b/validphys2/src/validphys/n3fit_data_utils.py
index 80fe469756..3b54e29bd1 100644
--- a/validphys2/src/validphys/n3fit_data_utils.py
+++ b/validphys2/src/validphys/n3fit_data_utils.py
@@ -1,168 +1,107 @@
 """
 n3fit_data_utils.py
 
-Library of helper functions to n3fit_data.py for reading libnnpdf objects.
+This module reads validphys :py:class:`validphys.core.DataSetSpec`
+and extracts the relevant information into :py:class:`validphys.n3fit_data_utils.FittableDataSet`
+
+The ``validphys_group_extractor`` will loop over every dataset of a given group
+loading their fktables (and applying any necessary cuts).
 """
+from itertools import zip_longest
+import dataclasses
 import numpy as np
 
 
-def fk_parser(fk, is_hadronic=False):
-    """
-    # Arguments:
-        - `fk`: fktable object
-
-    # Return:
-        - `dict_out`: dictionary with all information about the fktable
-            - 'xgrid'
-            - 'nx'
-            - 'ndata'
-            - 'basis'
-            - 'fktable'
-    """
-
-    # Dimensional information
-    nonzero = fk.GetNonZero()
-    ndata = fk.GetNData()
-    nx = fk.GetTx()
-
-    # Basis of active flavours
-    nbasis = nonzero
-    basis = fk.get_flmap()
-
-    # Read the xgrid and the fktable to numpy arrays
-    xgrid_flat = fk.get_xgrid()
-    fktable_flat = fk.get_sigma()
-
-    # target shape
-    if is_hadronic:
-        nxsqrt = int(np.sqrt(nx))
-        shape_out = (ndata, nbasis, nxsqrt, nxsqrt)
-        xgrid = xgrid_flat.reshape(1, nxsqrt)
-    else:
-        shape_out = (ndata, nbasis, nx)
-        xgrid = xgrid_flat.reshape(1, nx)
-
-    # remove padding from the fktable (if any)
-    l = len(fktable_flat)
-    # get the padding
-    pad_position = fk.GetDSz()
-    pad_size = fk.GetDSz() - nx * nonzero
-    # remove the padding
-    if pad_size > 0:
-        mask = np.ones(l, dtype=bool)
-        for i in range(1, int(l / pad_position) + 1):
-            marker = pad_position * i
-            mask[marker - pad_size : marker] = False
-        fktable_array = fktable_flat[mask]
-    else:
-        fktable_array = fktable_flat
-    # reshape
-    fktable = fktable_array.reshape(shape_out)
-
-    dict_out = {
-        "ndata": ndata,
-        "nbasis": nbasis,
-        "nonzero": nbasis,
-        "basis": basis,
-        "nx": nx,
-        "xgrid": xgrid,
-        "fktable": fktable,
-    }
-    return dict_out
-
-
-def common_data_reader_dataset(dataset_c, dataset_spec):
+@dataclasses.dataclass
+class FittableDataSet:
     """
-    Import fktable, common data and experimental data for the given data_name
-
-    # Arguments:
-        - `dataset_c`: c representation of the dataset object
-        - `dataset_spec`: python representation of the dataset object
-
-    #Returns:
-        - `[dataset_dict]`: a (len 1 list of) dictionary with:
-            - dataset: full dataset object from which all data has been extracted
-            - hadronic: is hadronic?
-            - operation: operation to perform on the fktables below
-            - fktables : a list of dictionaries with the following items
-                - ndata: number of data points
-                - nonzero/nbasis: number of PDF entries which are non zero
-                - basis: array containing the information of the non-zero PDF entries
-                - nx: number of points in the xgrid (1-D), i.e., for hadronic the total number is nx * nx
-                - xgrid: grid of x points
-                - fktable: 3/4-D array of shape (ndata, nonzero, nx, (nx))
-
-    instead of the dictionary object that model_gen needs
+    Python version of the libNNPDF dataset
+    to be merged with the product of the new CommonData dataset
+
+    Parameters
+    ----------
+        name: str
+            name of the dataset
+        fktables_data: list(:py:class:`validphys.coredata.FKTableData`)
+            list of coredata fktable objects
+
+        operation: str
+            operation to be applied to the fktables in the dataset, default "NULL"
+        frac: float
+            fraction of the data to enter the training set
+        training_mask: bool
+            training mask to apply to the fktable
     """
-    how_many = dataset_c.GetNSigma()
-    dict_fktables = []
-    for i in range(how_many):
-        fktable = dataset_c.GetFK(i)
-        dict_fktables.append(fk_parser(fktable, dataset_c.IsHadronic()))
-
-    dataset_dict = {
-        "fktables": dict_fktables,
-        "hadronic": dataset_c.IsHadronic(),
-        "operation": dataset_spec.op,
-        "name": dataset_c.GetSetName(),
-        "frac": dataset_spec.frac,
-        "ndata": dataset_c.GetNData(),
-    }
-
-    return [dataset_dict]
-
-
-def common_data_reader_experiment(experiment_c, experiment_spec):
-    """
-    Wrapper around the experiments. Loop over all datasets in an experiment,
-    calls common_data_reader on them and return a list with the content.
-
-    # Arguments:
-        - `experiment_c`: c representation of the experiment object
-        - `experiment_spec`: python representation of the experiment object
-
-    # Returns:
-        - `[parsed_datasets]`: a list of dictionaries output from `common_data_reader_dataset`
-    """
-    parsed_datasets = []
-    for dataset_c, dataset_spec in zip(experiment_c.DataSets(), experiment_spec.datasets):
-        parsed_datasets += common_data_reader_dataset(dataset_c, dataset_spec)
-    return parsed_datasets
-
 
-def positivity_reader(pos_spec):
+    # NOTE:
+    # This class tries to be compatible with the libNNPDF dataset class
+    # after commondata is also in python, FittableDataSet can inherit from the vp dataset
+    # which knows how to generate its "fittable" version.
+
+    name: str
+    fktables_data: list  # of validphys.coredata.FKTableData objects
+
+    # Things that can have default values:
+    operation: str = "NULL"
+    frac: float = 1.0
+    training_mask: np.ndarray = None  # boolean array
+
+    def __post_init__(self):
+        self._tr_mask = None
+        self._vl_mask = None
+        if self.training_mask is not None:
+            data_idx = self.fktables_data[0].sigma.index.get_level_values(0).unique()
+            self._tr_mask = data_idx[self.training_mask].values
+            self._vl_mask = data_idx[~self.training_mask].values
+
+    @property
+    def ndata(self):
+        """Number of datapoints in the dataset"""
+        return self.fktables_data[0].ndata
+
+    @property
+    def hadronic(self):
+        """Returns true if this is a hadronic collision dataset"""
+        return self.fktables_data[0].hadronic
+
+    def fktables(self):
+        """Return the list of fktable tensors for the dataset"""
+        return [fk.get_np_fktable() for fk in self.fktables_data]
+
+    def training_fktables(self):
+        """Return the fktable tensors for the trainig data"""
+        if self._tr_mask is not None:
+            return [fk.with_cuts(self._tr_mask).get_np_fktable() for fk in self.fktables_data]
+        return self.fktables()
+
+    def validation_fktables(self):
+        """Return the fktable tensors for the validation data"""
+        if self._vl_mask is not None:
+            return [fk.with_cuts(self._vl_mask).get_np_fktable() for fk in self.fktables_data]
+        return self.fktables()
+
+
+def validphys_group_extractor(datasets, tr_masks):
     """
-    Specific reader for positivity sets
+    Receives a grouping spec from validphys (most likely an experiment)
+    and loops over its content extracting and parsing all information required for the fit
+
+    Parameters
+    ----------
+        datasets: list(:py:class:`validphys.core.DataSetSpec`)
+            List of dataset specs in this group
+        tr_masks: list(np.array)
+            List of training masks to be set for each dataset
+
+    Returns
+    -------
+        loaded_obs: list (:py:class:`validphys.n3fit_data_utils.FittableDataSet`)
     """
-    pos_c = pos_spec.load()
-    ndata = pos_c.GetNData()
-
-    parsed_set = [fk_parser(pos_c, pos_c.IsHadronic())]
-
-    pos_sets = [
-        {
-            "fktables": parsed_set,
-            "hadronic": pos_c.IsHadronic(),
-            "operation": "NULL",
-            "name": pos_spec.name,
-            "frac": 1.0,
-            "ndata": ndata,
-            "tr_fktables": [i["fktable"] for i in parsed_set],
-        }
-    ]
-
-    positivity_factor = pos_spec.maxlambda
-
-    dict_out = {
-        "datasets": pos_sets,
-        "trmask": np.ones(ndata, dtype=np.bool),
-        "name": pos_spec.name,
-        "expdata": np.zeros((1, ndata)),
-        "ndata": ndata,
-        "positivity": True,
-        "lambda": positivity_factor,
-        "count_chi2": False,
-        "integrability": "INTEG" in pos_spec.name,
-    }
-
-    return dict_out
+    loaded_obs = []
+    # Use zip_longest since tr_mask can be (and it is fine) an empty list
+    for dspec, mask in zip_longest(datasets, tr_masks):
+        # Load all fktables with the appropiate cuts
+        fktables = [fk.load_with_cuts(dspec.cuts) for fk in dspec.fkspecs]
+        # And now put them in a FittableDataSet object which
+        loaded_obs.append(FittableDataSet(dspec.name, fktables, dspec.op, dspec.frac, mask))
+    return loaded_obs
diff --git a/validphys2/src/validphys/pineparser.py b/validphys2/src/validphys/pineparser.py
new file mode 100644
index 0000000000..1df490c8ed
--- /dev/null
+++ b/validphys2/src/validphys/pineparser.py
@@ -0,0 +1,316 @@
+"""
+    Loader for the pineappl-based FKTables
+
+    The FKTables for pineappl have ``pineappl.lz4`` and can be utilized
+    directly with the ``pineappl`` cli as well as read with ``pineappl.fk_table``
+"""
+import numpy as np
+import pandas as pd
+
+from reportengine.compat import yaml
+
+from validphys.coredata import FKTableData
+
+########### This part might eventually be part of whatever commondata reader
+EXT = "pineappl.lz4"
+
+
+class YamlFileNotFound(FileNotFoundError):
+    """ymldb file for dataset not found."""
+
+
+class GridFileNotFound(FileNotFoundError):
+    """PineAPPL file for FK table not found."""
+
+
+def _load_yaml(yaml_file):
+    """Load a dataset.yaml file.
+
+    Parameters
+    ----------
+    yaml_file : Path
+        path of the yaml file for the given dataset
+
+    Returns
+    -------
+    dict :
+        noramlized parsed file content
+    """
+    if not yaml_file.exists():
+        raise YamlFileNotFound(yaml_file)
+    ret = yaml.safe_load(yaml_file.read_text())
+    # Make sure the operations are upper-cased for compound-compatibility
+    ret["operation"] = "NULL" if ret["operation"] is None else ret["operation"].upper()
+    return ret
+
+
+def pineko_yaml(yaml_file, grids_folder, check_grid_existence=True):
+    """Given a yaml_file, returns the corresponding dictionary and grids.
+
+    The dictionary contains all information and we return an extra field
+    with all the grids to be loaded for the given dataset.
+
+    Parameters
+    ----------
+    yaml_file : pathlib.Path
+        path of the yaml file for the given dataset
+    grids_folder : pathlib.Path
+        path of the grids folder
+    check_grid_existence: bool
+        if True (default) checks whether the grid exists
+
+    Returns
+    -------
+    yaml_content: dict
+        Metadata prepared for the FKTables
+    paths: list(list(path))
+        List (of lists) with all the grids that will need to be loaded
+    """
+    yaml_content = _load_yaml(yaml_file)
+
+    # Turn the operands and the members into paths (and check all of them exist)
+    ret = []
+    for operand in yaml_content["operands"]:
+        tmp = []
+        for member in operand:
+            p = grids_folder / f"{member}.{EXT}"
+            if not p.exists() and check_grid_existence:
+                raise GridFileNotFound(f"Failed to find {p}")
+            tmp.append(p)
+        ret.append(tmp)
+
+    return yaml_content, ret
+
+
+def pineko_apfelcomb_compatibility_flags(gridpaths, metadata):
+    """
+    Prepare the apfelcomb-pineappl compatibility fixes by matching the apfelcomb content
+    of the metadata to the grids that are being loaded.
+
+    These fixes can be of only three types:
+
+    - normalization:
+        normalization per subgrid
+
+        normalization:
+            grid_name: factor
+
+    - repetition_flag:
+        when a grid was actually the same point repeated X times
+        NNPDF cfactors and cuts are waiting for this repetition and so we need to keep track of it
+
+        repetition_flag:
+            grid_name
+
+    - shifts:
+        only for ATLASZPT8TEVMDIST
+        the points in this dataset are not contiguous so the index is shifted
+
+        shifts:
+            grid_name: shift_int
+
+    Returns
+    -------
+        apfelcomb_norm: np.array
+            Per-point normalization factor to be applied to the grid
+            to be compatible with the data
+        apfelcomb_repetition_flag: bool
+            Whether the fktable is a single point which gets repeated up to a certain size
+            (for instance to normalize a distribution)
+        shift: list(int)
+            Shift in the data index for each grid that forms the fktable
+    """
+    if metadata.get("apfelcomb") is None:
+        return None
+
+    # Can't pathlib understand double suffixes?
+    operands = [i.name.replace(f".{EXT}", "") for i in gridpaths]
+    ret = {}
+
+    # Check whether we have a normalization active and whether it affects any of the grids
+    if metadata["apfelcomb"].get("normalization") is not None:
+        norm_info = metadata["apfelcomb"]["normalization"]
+        # Now fill the operands that need normalization
+        ret["normalization"] = [norm_info.get(op, 1.0) for op in operands]
+
+    # Check whether the repetition flag is active
+    if metadata["apfelcomb"].get("repetition_flag") is not None:
+        if len(operands) == 1:
+            ret["repetition_flag"] = operands[0] in metadata["apfelcomb"]["repetition_flag"]
+        else:
+            # Just for the sake of it, let's check whether we did something stupid
+            if any(op in metadata["apfelcomb"]["repetition_flag"] for op in operands):
+                raise ValueError(f"The yaml info for {metadata['target_dataset']} is broken")
+
+    # Check whether the dataset has shifts
+    # NOTE: this only happens for ATLASZPT8TEVMDIST, if that gets fixed we might as well remove it
+    if metadata["apfelcomb"].get("shifts") is not None:
+        shift_info = metadata["apfelcomb"]["shifts"]
+        ret["shifts"] = [shift_info.get(op, 0) for op in operands]
+
+    return ret
+
+
+def _pinelumi_to_columns(pine_luminosity, hadronic):
+    """Makes the pineappl luminosity into the column indices of a dataframe
+    These corresponds to the indices of a flattened (14x14) matrix for hadronic observables
+    and the non-zero indices of the 14-flavours for DIS
+
+    Parameters
+    ----------
+        pine_luminosity: list(tuple)
+            list with a pair of flavours per channel
+        hadronic: bool
+            flag for hadronic / DIS observables
+
+    Returns
+    -------
+        list(int): list of labels for the columns
+    """
+    evol_basis_pids = tuple(
+        [22, 100, 21, 200]
+        + [200 + n**2 - 1 for n in range(2, 6 + 1)]
+        + [100 + n**2 - 1 for n in range(2, 6 + 1)]
+    )
+    flav_size = len(evol_basis_pids)
+    columns = []
+    if hadronic:
+        for i, j in pine_luminosity:
+            idx = evol_basis_pids.index(i)
+            jdx = evol_basis_pids.index(j)
+            columns.append(flav_size * idx + jdx)
+    else:
+        # The proton might come from both sides
+        try:
+            columns = [evol_basis_pids.index(i) for _, i in pine_luminosity]
+        except ValueError:
+            columns = [evol_basis_pids.index(i) for i, _ in pine_luminosity]
+    return columns
+
+
+def get_yaml_information(yaml_file, theorypath):
+    """Reads the yaml information from a yaml compound file
+
+    Transitional function: the call to "pineko" might be to some other commondata reader
+    that will know how to extract the information from the commondata
+    """
+    # The pineappl grids are just stored where the fktables would be, "fastkernel"
+    grids_folder = theorypath / "fastkernel"
+    return pineko_yaml(yaml_file, grids_folder)
+
+
+def pineappl_reader(fkspec):
+    """
+    Receives a fkspec, which contains the path to the grids that are to be read by pineappl
+    as well as metadata that fixes things like conversion factors or apfelcomb flag.
+
+    Note that both conversion factors and apfelcomb flags will be eventually removed.
+
+    For more information on the reading of pienappl tables:
+        https://pineappl.readthedocs.io/en/latest/modules/pineappl/pineappl.html#pineappl.pineappl.PyFkTable
+
+    About the reader:
+        Each pineappl table is a 4-dimensional grid with:
+            (ndata, active channels, x1, x2)
+        for DIS grids x2 will contain one single number.
+        The luminosity channels are given in a (flav1, flav2) format and thus need to be converted
+        to the 1-D index of a (14x14) luminosity tensor in order to put in the form of a dataframe.
+
+        All grids in pineappl are constructed with the exact same xgrid,
+        the active channels can vary and so when grids are concatenated for an observable
+        the gaps are filled with 0s.
+
+        The pineappl grids are such that obs = sum_{bins} fk * f (*f) * bin_w
+        so in order to use them together with old-style grids (obs = sum_{bins} fk * xf (*xf))
+        it is necessary to remove the factor of x and the normalization of the bins.
+
+    About apfelcomb flags:
+        old commondata files and old grids have evolved together
+        and fixes and hacks have been incorporated in one or another
+        for the new theory to be compatible with old commpondata it is necessary
+        to keep track of said hacks (and to apply conversion factors when required)
+
+    Returns
+    -------
+        validphys.coredata.FKTableData
+            an FKTableData object containing all necessary information to compute predictions
+    """
+    from pineappl.fk_table import FkTable
+
+    pines = [FkTable.read(i) for i in fkspec.fkpath]
+
+    # Extract metadata from the first grid
+    pine_rep = pines[0]
+
+    # Is it hadronic? (at the moment only hadronic and DIS are considered)
+    hadronic = pine_rep.key_values()["initial_state_1"] == pine_rep.key_values()["initial_state_2"]
+    # Sanity check (in case at some point we start fitting things that are not protons)
+    if hadronic and pine_rep.key_values()["initial_state_1"] != "2212":
+        raise ValueError(
+            "pineappl_reader is not prepared to read a hadronic fktable with no protons!"
+        )
+    Q0 = np.sqrt(pine_rep.muf2())
+    xgrid = pine_rep.x_grid()
+    xi = np.arange(len(xgrid))
+    protected = False
+
+    apfelcomb = pineko_apfelcomb_compatibility_flags(fkspec.fkpath, fkspec.metadata)
+
+    # fktables in pineapplgrid are for obs = fk * f while previous fktables were obs = fk * xf
+    # prepare the grid all tables will be divided by
+    if hadronic:
+        xdivision = np.prod(np.meshgrid(xgrid, xgrid), axis=0)
+    else:
+        xdivision = xgrid[:, np.newaxis]
+
+    partial_fktables = []
+    ndata = 0
+    for i, p in enumerate(pines):
+
+        # Remove the bin normalization
+        raw_fktable = (p.table().T / p.bin_normalizations()).T
+        n = raw_fktable.shape[0]
+
+        # Apply the apfelcomb fixes _if_ they are needed
+        if apfelcomb is not None:
+            if apfelcomb.get("normalization") is not None:
+                raw_fktable = raw_fktable * apfelcomb["normalization"][i]
+            if apfelcomb.get("repetition_flag", False):
+                raw_fktable = raw_fktable[0:1]
+                n = 1
+                protected = True
+            if apfelcomb.get("shifts") is not None:
+                ndata += apfelcomb["shifts"][i]
+
+        # Check conversion factors and remove the x* from the fktable
+        raw_fktable *= fkspec.metadata.get("conversion_factor", 1.0) / xdivision
+
+        # Create the multi-index for the dataframe
+        # for optimized pineappls different grids can potentially have different indices
+        # so they need to be indexed separately and then concatenated only at the end
+        lumi_columns = _pinelumi_to_columns(p.lumi(), hadronic)
+        lf = len(lumi_columns)
+        data_idx = np.arange(ndata, ndata + n)
+        if hadronic:
+            idx = pd.MultiIndex.from_product([data_idx, xi, xi], names=["data", "x1", "x2"])
+        else:
+            idx = pd.MultiIndex.from_product([data_idx, xi], names=["data", "x"])
+
+        # Now concatenate (data, x1, x2) and move the flavours to the columns
+        df_fktable = raw_fktable.swapaxes(0, 1).reshape(lf, -1).T
+        partial_fktables.append(pd.DataFrame(df_fktable, columns=lumi_columns, index=idx))
+
+        ndata += n
+
+    # Finallly concatenate all fktables, sort by flavours and fill any holes
+    sigma = pd.concat(partial_fktables, sort=True, copy=False).fillna(0.0)
+
+    return FKTableData(
+        sigma=sigma,
+        ndata=ndata,
+        Q0=Q0,
+        metadata=fkspec.metadata,
+        hadronic=hadronic,
+        xgrid=xgrid,
+        protected=protected,
+    )
diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
index e46ce9bf2e..98334103e2 100644
--- a/validphys2/src/validphys/results.py
+++ b/validphys2/src/validphys/results.py
@@ -77,8 +77,14 @@ def __len__(self):
 class DataResult(StatsResult):
     """Holds the relevant information from a given dataset"""
 
-    def __init__(self, dataobj, covmat, sqrtcovmat):
-        self._central_value = dataobj.get_cv()
+    def __init__(self, dataset, covmat, sqrtcovmat):
+        # The commondata is currently a libNNPDF object
+        loaded_cd = dataset.load_commondata()
+        if isinstance(loaded_cd, list):
+            cv = np.concatenate([cd.get_cv() for cd in loaded_cd])
+        else:
+            cv = loaded_cd.get_cv()
+        self._central_value = cv
         stats = Stats(self._central_value)
         self._covmat = covmat
         self._sqrtcovmat = sqrtcovmat
@@ -462,9 +468,8 @@ def results(dataset: (DataSetSpec), pdf: PDF, covariance_matrix, sqrt_covmat):
     The theory is specified as part of the dataset.
     A group of datasets is also allowed.
     (as a result of the C++ code layout)."""
-    data = dataset.load()
     return (
-        DataResult(data, covariance_matrix, sqrt_covmat),
+        DataResult(dataset, covariance_matrix, sqrt_covmat),
         ThPredictionsResult.from_convolution(pdf, dataset),
     )
 
@@ -493,7 +498,7 @@ def pdf_results(
 
     th_results = [ThPredictionsResult.from_convolution(pdf, dataset) for pdf in pdfs]
 
-    return (DataResult(dataset.load(), covariance_matrix, sqrt_covmat), *th_results)
+    return (DataResult(dataset, covariance_matrix, sqrt_covmat), *th_results)
 
 
 @require_one("pdfs", "pdf")
diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py
index f9b88596ee..1a90302cac 100644
--- a/validphys2/src/validphys/theorycovariance/tests.py
+++ b/validphys2/src/validphys/theorycovariance/tests.py
@@ -219,7 +219,7 @@ def all_matched_data_lengths(all_matched_datasets):
     """Returns a list of the data sets lengths."""
     lens = []
     for rlist in all_matched_datasets:
-        lens.append(rlist[0].load().GetNData())
+        lens.append(rlist[0].load_commondata().GetNData())
     return lens