diff --git a/doc/sphinx/source/vp/examples.rst b/doc/sphinx/source/vp/examples.rst index 9940022567..47991a2cf0 100644 --- a/doc/sphinx/source/vp/examples.rst +++ b/doc/sphinx/source/vp/examples.rst @@ -69,6 +69,6 @@ NNLO pdf NNPDF40_nnlo_as_01180 NNPDF4. NNLO pdf hessian NNPDF40_nnlo_as_01180_hessian NNPDF4.0 NNLO hessian PDF set generated from replicas NLO fit NNPDF40_nlo_as_01180 NNPDF4.0 NLO fit with 100 replicas (+ central replica) NNLO fit NNPDF40_nnlo_lowprecision NNPDF4.0 NNLO low precision fit (theory 162) with 50 replicas (+ central replica) -NNLO fit (iterated) NNPDF40_nnlo_lowprecision_iterated Iteration of NNPDF40_nnlo_lowprecision +NNLO fit (iterated) NNPDF40_nnlo_low_precision_iterated Iteration of NNPDF40_nnlo_lowprecision fit NNPDF40_example_closure_test ``n3fit`` closure test fit with 30 replicas before and after postfit =================================== =================================== ================================================================== diff --git a/doc/sphinx/source/vp/pydataobjs.rst b/doc/sphinx/source/vp/pydataobjs.rst index 7164322b27..329cd92be8 100644 --- a/doc/sphinx/source/vp/pydataobjs.rst +++ b/doc/sphinx/source/vp/pydataobjs.rst @@ -255,3 +255,20 @@ from the API:: "theoryid": 162 } total_cov = API.dataset_inputs_covmat_from_systematics(**inp) + +Loading LHAPDF PDFs +------------------- + +A wrapper class for LHAPDF PDFs is implemented in the :py:mod:`validphys.lhapdfset` module. +An instance of this module will provide with a handful of useful wrappers to the underlying +LHAPDF python interface. This is also the output of the ``pdf.load()`` method. + +For example, the following will return the values for all 100 members of NNPDF4.0 for +the gluon and the d-quark, at three values of ``x`` at ``Q=91.2``. + +.. code-block:: python + from validphys.api import API + pdf = API.pdf(pdf="NNPDF40_nnlo_as_01180") + l_pdf = pdf.load() + alpha_s = l_pdf.central_member.alphasQ(91.2) + results = l_pdf.grid_values([21,1], [0.1, 0.2, 0.3], [91.2]) \ No newline at end of file diff --git a/validphys2/src/validphys/app.py b/validphys2/src/validphys/app.py index 62fb529536..614022a9c7 100644 --- a/validphys2/src/validphys/app.py +++ b/validphys2/src/validphys/app.py @@ -15,6 +15,7 @@ import contextlib +import lhapdf from reportengine import app from validphys.config import Config, Environment @@ -131,7 +132,7 @@ def init(self): import NNPDF NNPDF.SetVerbosity(0) - NNPDF.SetLHAPDFVerbosity(0) + lhapdf.setVerbosity(0) @staticmethod def upload_context(do_upload, output): diff --git a/validphys2/src/validphys/checks.py b/validphys2/src/validphys/checks.py index d8427a0ed5..cfec940712 100644 --- a/validphys2/src/validphys/checks.py +++ b/validphys2/src/validphys/checks.py @@ -39,7 +39,7 @@ def check_pdf_is_montecarlo(ns, **kwargs): def check_know_errors(ns, **kwargs): pdf = ns['pdf'] try: - pdf.nnpdf_error + pdf.stats_class except NotImplementedError as e: raise CheckError(e) from e diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 8897376ef8..c44191611a 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -161,7 +161,7 @@ def parse_pdf(self, name: str): # Check that we know how to compute errors try: - pdf.nnpdf_error + pdf.stats_class except NotImplementedError as e: raise ConfigError(str(e)) return pdf diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 1c45342c58..2988985722 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -23,7 +23,7 @@ from reportengine.baseexceptions import AsInputError from reportengine.compat import yaml -from NNPDF import (LHAPDFSet, +from NNPDF import (LHAPDFSet as libNNPDF_LHAPDFSet, CommonData, FKTable, FKSet, @@ -38,23 +38,10 @@ from validphys.theorydbutils import fetch_theory from validphys.hyperoptplot import HyperoptTrial from validphys.utils import experiments_to_dataset_inputs +from validphys.lhapdfset import LHAPDFSet log = logging.getLogger(__name__) - -#TODO: Remove this eventually -#Bacward compatibility error type names -#Swig renamed these for no reason whatsoever. -try: - LHAPDFSet.erType_ER_EIG -except AttributeError: - import warnings - warnings.warn("libnnpdf out of date. Setting backwards compatible names") - LHAPDFSet.erType_ER_MC = LHAPDFSet.ER_MC - LHAPDFSet.erType_ER_EIG = LHAPDFSet.ER_EIG - LHAPDFSet.erType_ER_EIG90 = LHAPDFSet.ER_EIG90 - LHAPDFSet.erType_ER_SYMEIG = LHAPDFSet.ER_SYMEIG - class TupleComp: @classmethod @@ -97,12 +84,25 @@ class PDF(TupleComp): Statistical estimators which depends on the PDF type (MC, Hessian...) are exposed as a :py:class:`Stats` object through the :py:attr:`stats_class` attribute The LHAPDF metadata can directly be accessed through the :py:attr:`info` attribute + + + Examples + -------- + >>> from validphys.api import API + >>> from validphys.convolution import predictions + >>> args = {"dataset_input":{"dataset": "ATLASTTBARTOT"}, "theoryid":162, "use_cuts":"internal"} + >>> ds = API.dataset(**args) + >>> pdf = API.pdf(pdf="NNPDF40_nnlo_as_01180") + >>> preds = predictions(ds, pdf) + >>> preds.shape + (3, 100) """ def __init__(self, name): self.name = name self._plotname = name self._info = None + self._stats_class = None super().__init__(name) @property @@ -116,11 +116,15 @@ def label(self, label): @property def stats_class(self): """Return the stats calculator for this error type""" - error = self.error_type - klass = STAT_TYPES[error] - if self.error_conf_level is not None: - klass = functools.partial(klass, rescale_factor=self._rescale_factor()) - return klass + if self._stats_class is None: + try: + klass = STAT_TYPES[self.error_type] + except KeyError: + raise NotImplementedError(f"No Stats class for error type {self.error_type}") + if self.error_conf_level is not None: + klass = functools.partial(klass, rescale_factor=self._rescale_factor()) + self._stats_class = klass + return self._stats_class @property def infopath(self): @@ -181,12 +185,12 @@ def _rescale_factor(self): @functools.lru_cache(maxsize=16) def load(self): - return LHAPDFSet(self.name, self.nnpdf_error) + return LHAPDFSet(self.name, self.error_type) @functools.lru_cache(maxsize=2) def load_t0(self): """Load the PDF as a t0 set""" - return LHAPDFSet(self.name, LHAPDFSet.erType_ER_MCT0) + return LHAPDFSet(self.name, "t0") def __str__(self): return self.label @@ -194,30 +198,33 @@ def __str__(self): def __len__(self): return self.info["NumMembers"] - @property - def nnpdf_error(self): - """Return the NNPDF error tag, used to build the `LHAPDFSet` objeect""" + def legacy_load(self): + """Returns an libNNPDF LHAPDFSet object + Deprecated function used only in the `filter.py` module + """ error = self.error_type - if error == "replicas": - return LHAPDFSet.erType_ER_MC - cl = self.error_conf_level - if error == "hessian": + et = None + if error == "replicas": + et = libNNPDF_LHAPDFSet.erType_ER_MC + elif error == "hessian": if cl == 90: - return LHAPDFSet.erType_ER_EIG90 + et = libNNPDF_LHAPDFSet.erType_ER_EIG90 elif cl == 68: - return LHAPDFSet.erType_ER_EIG + et = libNNPDF_LHAPDFSet.erType_ER_EIG else: raise NotImplementedError(f"No hessian errors with confidence interval {cl}") - if error == "symmhessian": + elif error == "symmhessian": if cl == 68: - return LHAPDFSet.erType_ER_SYMEIG + et = libNNPDF_LHAPDFSet.erType_ER_SYMEIG else: raise NotImplementedError( f"No symmetric hessian errors with confidence interval {cl}" ) + else: + raise NotImplementedError(f"Error type for {self}: '{error}' is not implemented") - raise NotImplementedError(f"Error type for {self}: '{error}' is not implemented") + return libNNPDF_LHAPDFSet(self.name, et) @property def grid_values_index(self): @@ -236,17 +243,12 @@ def grid_values_index(self): ----- The range object can be used efficiently as a Pandas index. """ - err = self.nnpdf_error - if err is LHAPDFSet.erType_ER_MC: + if self.error_type == "replicas": return range(1, len(self)) - elif err in ( - LHAPDFSet.erType_ER_SYMEIG, - LHAPDFSet.erType_ER_EIG, - LHAPDFSet.erType_ER_EIG90, - ): + elif self.error_type in ("hessian", "symmhessian"): return range(0, len(self)) else: - raise RuntimeError("Unknown error type") + raise RuntimeError(f"Unknown error type: {self.stats_class}") def get_members(self): """Return the number of members selected in ``pdf.load().grid_values`` @@ -822,7 +824,7 @@ def error_members(self): class SymmHessianStats(Stats): - """Compute stats in the 'assymetric' hessian format: The first index (0) + """Compute stats in the 'symetric' hessian format: The first index (0) is the central value. The rest of the indexes are results for each eigenvector. A 'rescale_factor is allowed in case the eigenvector confidence interval diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index ee61137f3d..9c930c8f2a 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ Filters for NNPDF fits """ @@ -173,7 +172,7 @@ def _filter_closure_data(filter_path, data, fakepdfset, fakenoise, errorsize): """Filter closure test data.""" total_data_points = 0 total_cut_data_points = 0 - fakeset = fakepdfset.load() + fakeset = fakepdfset.legacy_load() # Load data, don't cache result loaded_data = data.load.__wrapped__(data) # generate level 1 shift if fakenoise diff --git a/validphys2/src/validphys/gridvalues.py b/validphys2/src/validphys/gridvalues.py index 7a6b7bcd2d..3b77f8bd3a 100644 --- a/validphys2/src/validphys/gridvalues.py +++ b/validphys2/src/validphys/gridvalues.py @@ -10,14 +10,8 @@ import numpy as np -from NNPDF import REALDOUBLE, LHAPDFSet - from validphys.core import PDF - -#Numpy is unhappy about downcasting double to float implicitly, so we have -#to manually make sure all input arrays correspond to NNPDF::real. -FLTYPE = np.int32 -REALTYPE = np.double if REALDOUBLE else np.float32 +from validphys.lhapdfset import LHAPDFSet # Canonical ordering of PDG quark flavour codes QUARK_FLAVOURS = (-6, -5, -4, -3, -2, -1, 1, 2, 3, 4, 5, 6) @@ -54,9 +48,9 @@ def _grid_values(lpdf, flmat, xmat, qmat): """Compute lpdf.grid_values with more forgiving argument types""" - flmat = np.atleast_1d(np.asanyarray(flmat, dtype=FLTYPE)) - xmat = np.atleast_1d(np.asarray(xmat, dtype=REALTYPE)) - qmat = np.atleast_1d(np.asarray(qmat, dtype=REALTYPE)) + flmat = np.atleast_1d(np.asanyarray(flmat)) + xmat = np.atleast_1d(np.asarray(xmat)) + qmat = np.atleast_1d(np.asarray(qmat)) return lpdf.grid_values(flmat, xmat, qmat) def grid_values(pdf:PDF, flmat, xmat, qmat): diff --git a/validphys2/src/validphys/lhapdfset.py b/validphys2/src/validphys/lhapdfset.py new file mode 100644 index 0000000000..ff029dec7a --- /dev/null +++ b/validphys2/src/validphys/lhapdfset.py @@ -0,0 +1,129 @@ +""" + Module containing an LHAPDF class compatible with validphys + using the official lhapdf python interface. + + It exposes an interface mostly compatible with libNNPDF::LHAPDFSet + so it can be used as a drop-in replacement. + + The ``.members`` and ``.central_member`` of the ``LHAPDFSet`` are + LHAPDF objects (the typical output from ``mkPDFs``) and can be used normally. + For MC PDFs the ``central_member`` is the average of all replicas (members 1-100) + while for Hessian PDFfs the ``central_member`` is also ``members[0]`` + + Examples + -------- + >>> from validphys.lhapdfset import LHAPDFSet + >>> pdf = LHAPDFSet("NNPDF40_nnlo_as_01180", "replicas") + >>> len(pdf.members) + 100 + >>> pdf.central_member.alphasQ(91.19) + 0.11800 + >>> pdf.members[0].xfxQ2(0.5, 15625) + {-5: 6.983360500601136e-05, + -4: 0.0021818063617227604, + -3: 0.00172453472243952, + -2: 0.0010906577230485718, + -1: 0.0022049272225017286, + 1: 0.020051104853608722, + 2: 0.0954139944889494, + 3: 0.004116641378803191, + 4: 0.002180124185625795, + 5: 6.922722705177504e-05, + 21: 0.007604124516892057} +""" +import logging +import numpy as np +import lhapdf + +log = logging.getLogger(__name__) + + +class LHAPDFSet: + """Wrapper for the lhapdf python interface. + + Once instantiated this class will load the PDF set from LHAPDF. + If it is a T0 set only the CV will be loaded. + + For Monte Carlo sets the central value (member 0) is by default not included when taking + the results for all members (i.e., when using ``grid_values``). + """ + + def __init__(self, name, error_type): + log.info("PDF: %s ErrorType: %s", name, error_type) + self._name = name + self._error_type = error_type + if self.is_t0: + # If at this point we already know this is a T0 set, load only the CV + self._lhapdf_set = [lhapdf.mkPDF(name)] + else: + self._lhapdf_set = lhapdf.mkPDFs(name) + self._flavors = None + + @property + def is_t0(self): + """Check whether we are in t0 mode""" + return self._error_type == "t0" + + @property + def n_members(self): + """Return the number of active members in the PDF set""" + return len(self.members) + + @property + def members(self): + """Return the members of the set, this depends on the error type: + t0: returns only member 0 + MC: skip member 0 + Hessian: return all + """ + if self.is_t0: + return self._lhapdf_set[0:1] + if self._error_type == "replicas": + return self._lhapdf_set[1:] + return self._lhapdf_set + + @property + def central_member(self): + """Returns a reference to member 0 of the PDF list""" + return self._lhapdf_set[0] + + def xfxQ(self, x, Q, n, fl): + """Return the PDF value for one single point for one single member + If the flavour is not included in the PDF (for instance top/antitop) return 0.0 + """ + member_pdf = self.members[n] + res = member_pdf.xfxQ(x, Q) + return res.get(fl, 0.0) + + @property + def flavors(self): + """Returns the list of accepted flavors by the LHAPDF set""" + if self._flavors is None: + self._flavors = self.members[0].flavors() + return self._flavors + + def grid_values(self, flavors: np.ndarray, xgrid: np.ndarray, qgrid: np.ndarray): + """Returns the PDF values for every member for the required + flavours, points in x and pointx in q + The return shape is + (members, flavors, xgrid, qgrid) + Return + ------ + ndarray of shape (members, flavors, xgrid, qgrid) + Examples + -------- + >>> import numpy as np + >>> from validphys.lhapdfset import LHAPDFSet + >>> pdf = LHAPDFSet("NNPDF40_nnlo_as_01180", "replicas") + >>> xgrid = np.random.rand(10) + >>> qgrid = np.random.rand(3) + >>> flavs = np.arange(-4,4) + >>> flavs[4] = 21 + >>> results = pdf.grid_values(flavs, xgrid, qgrid) + """ + # Create an array of x and q of equal length for LHAPDF + xarr, qarr = (g.ravel() for g in np.meshgrid(xgrid, qgrid)) + # Ask LHAPDF for the values and swap the flavours and xgrid-qgrid axes + raw = np.array([member.xfxQ(flavors, xarr, qarr) for member in self.members]).swapaxes(1, 2) + # Unroll the xgrid-qgrid axes + return raw.reshape(self.n_members, len(flavors), len(xgrid), len(qgrid)) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 0848ea6c1b..2a5cc8f541 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -171,7 +171,7 @@ def from_convolution(cls, pdf, dataset): class PositivityResult(StatsResult): @classmethod def from_convolution(cls, pdf, posset): - loaded_pdf = pdf.load() + loaded_pdf = pdf.legacy_load() loaded_pos = posset.load() data = loaded_pos.GetPredictions(loaded_pdf) stats = pdf.stats_class(data.T) diff --git a/validphys2/src/validphys/sumrules.py b/validphys2/src/validphys/sumrules.py index 2d036c862a..702ef350fd 100644 --- a/validphys2/src/validphys/sumrules.py +++ b/validphys2/src/validphys/sumrules.py @@ -13,13 +13,13 @@ import pandas as pd import scipy.integrate as integrate -from NNPDF import LHAPDFSet from reportengine.table import table from reportengine.checks import check_positive from reportengine.floatformatting import format_error_value_columns from validphys.core import PDF from validphys.pdfbases import ALL_FLAVOURS, parse_flarr +from validphys.lhapdfset import LHAPDFSet def _momentum_sum_rule_integrand(x, lpdf:LHAPDFSet, irep, Q): @@ -125,7 +125,7 @@ def f(x, lpdf, irep, Q): def _sum_rules(rules_dict, lpdf, Q): """Compute a SumRulesGrid from the loaded PDF, at Q""" - nmembers = lpdf.GetMembers() + nmembers = lpdf.n_members #TODO: Write this in something fast #If nothing else, at least allocate and store the result contiguously res = np.zeros((len(rules_dict), nmembers)) diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py index 2553c61a21..2d1e76da07 100644 --- a/validphys2/src/validphys/tests/conftest.py +++ b/validphys2/src/validphys/tests/conftest.py @@ -48,7 +48,7 @@ def tmp(tmpdir): HESSIAN_PDF = "NNPDF40_nnlo_as_01180_hessian" THEORYID = 162 FIT = "NNPDF40_nnlo_lowprecision" -FIT_ITERATED = "NNPDF40_nnlo_lowprecision_iterated" +FIT_ITERATED = "NNPDF40_nnlo_low_precision_iterated" base_config = dict( pdf=PDF,