diff --git a/doc/sphinx/source/tutorials/bundle-pdfs.rst b/doc/sphinx/source/tutorials/bundle-pdfs.rst new file mode 100644 index 0000000000..10b3510ec4 --- /dev/null +++ b/doc/sphinx/source/tutorials/bundle-pdfs.rst @@ -0,0 +1,78 @@ +Bundle PDFs with :math:`\alpha_s` replicas +========================================== + +Using ``validphys`` it is possible to produce a bundled +PDF set which accounts for a combined PDF + :math:`\alpha_s` +uncertainty. The procedure to generate such an LHAPDF set +is to simply take a baseline PDF set and append the central +replica from a list of :math:`\alpha_s` variation fits. + +The action to leverage this is +:py:func:`validphys.replica_selector.alpha_s_bundle_pdf`. We +specify the baseline PDF as the ``pdf`` key within the runcard +and the ``pdfs`` list specifies the :math:`\alpha_s` fits that +are to be used. In the following example, the ``NNPDF31_nnlo_as_0118`` +PDF set is used as baseline and we append the central replica from +``NNPDF31_nnlo_as_0117`` and ``NNPDF31_nnlo_as_0119``. + +.. code-block :: yaml + + pdf: NNPDF31_nnlo_as_0118 + + pdfs: + - NNPDF31_nnlo_as_0117 + - NNPDF31_nnlo_as_0119 + + actions_: + - alpha_s_bundle_pdf + +Executing this runcard with ``validphys`` produces the bundled PDF set +in the output folder, which by default will be name the same as the baseline, +except for the ``_pdfas`` suffix being appended: + +.. code-block :: + + output/ + ├── NNPDF31_nnlo_as_0118_pdfas + │   ├── NNPDF31_nnlo_as_0118_pdfas.info + │   ├── NNPDF31_nnlo_as_0118_pdfas_0000.dat + │   ├── NNPDF31_nnlo_as_0118_pdfas_0001.dat + │   ├── NNPDF31_nnlo_as_0118_pdfas_0002.dat + +The ``.info`` file now has ``replicas+as`` as the ``ErrorType`` and the +``NumMembers`` key has now been updated to reflect the additional replicas +(in this example it has been incremented by two). The additional replicas +also have the ``AlphaS_MZ`` key prepended at the top of the file so as to +keep track of what PDF set they originated from. + +The optional ``target_name`` key can be provided in the runcard so as to +specify the name of the resulting PDF. The following runcard will generate +the same bundled PDF as before, but with the name ``bundled_pdf``: + +.. code-block :: yaml + + pdf: NNPDF31_nnlo_as_0118 + + pdfs: + - NNPDF31_nnlo_as_0117 + - NNPDF31_nnlo_as_0119 + + target_name: bundled_pdf + + actions_: + - alpha_s_bundle_pdf + + +.. code-block :: + + output/ + ├── bundled_pdf + │   ├── bundled_pdf.info + │   ├── bundled_pdf_0000.dat + │   ├── bundled_pdf_0001.dat + │   ├── bundled_pdf_0002.dat + +.. note :: + + Despite adding additional replicas, the central replica in the bundled + PDF set is **not** recomputed: it is identical to that of the baseline. diff --git a/doc/sphinx/source/tutorials/index.rst b/doc/sphinx/source/tutorials/index.rst index 533d37ddb8..0a6899741d 100644 --- a/doc/sphinx/source/tutorials/index.rst +++ b/doc/sphinx/source/tutorials/index.rst @@ -55,4 +55,5 @@ Miscellaneous ./pseudodata.md ./newplottingfn.rst ./addspecialgrouping.rst + ./bundle-pdfs.rst ./conda.md diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index 0ab8d66c82..c68ffecb01 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -5,12 +5,16 @@ """ import logging import numbers +import pathlib +import shutil import warnings +import re import numpy as np import pandas as pd -from reportengine.checks import check_positive +from reportengine.checks import check_positive, make_argcheck, check +from reportengine.compat import yaml from reportengine.table import table from reportengine.figure import figuregen @@ -18,10 +22,115 @@ from validphys.pdfoutput import pdfset from validphys.lhio import new_pdf_from_indexes from validphys.checks import check_pdf_is_montecarlo, check_scale +from validphys.core import PDF from validphys.pdfplots import ReplicaPDFPlotter +from validphys.renametools import rename_pdf +from validphys.utils import tempfile_cleaner log = logging.getLogger(__name__) +def _fixup_new_replica(alphas_pdf: PDF, new_replica_file): + """Helper function that takes in a + :py:class:`validphys.core.PDF` object as well as + the path to the central replica corresponding to the + PDF and handles the writing of the alphas values + to the header file. + """ + AlphaS_MZ = alphas_pdf.AlphaS_MZ + AlphaS_Vals = alphas_pdf.AlphaS_Vals + with open(new_replica_file, 'rb') as in_stream: + data = in_stream.read() + with open(new_replica_file, 'wb') as out_stream: + # Add the AlphaS_MZ and AlphaS_Vals keys + out_stream.write(f"AlphaS_MZ: {AlphaS_MZ}\n AlphaS_Vals: {AlphaS_Vals}\n".encode()) + out_stream.write(data) + +@make_argcheck +def _check_target_name(target_name): + """Make sure this specifies a name and not some kid of path""" + if target_name is None: + return + check( + re.fullmatch(r'[\w]+', target_name), + "`target_name` must contain alphnumeric characters and underscores only", + ) + +@_check_target_name +def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = None): + """Action that bundles PDFs for distributing to the LHAPDF + format. The baseline pdf is declared as the ``pdf`` key + and the PDFs from which the replica 0s are to be added is + declared as the ``pdfs`` list. + + The bundled PDF set is stored inside the ``output`` directory. + + Parameters + ---------- + pdf: :py:class:`validphys.core.PDF` + The baseline PDF to which the new replicas will be added + pdfs: list of :py:class:`validphys.core.PDF` + The list of PDFs from which replica0 will be appended + target_name: str or None + Optional argument specifying the name of the output PDF. + If ``None``, then the name of the original pdf is used + but with ``_pdfas`` appended + """ + base_pdf_path = pathlib.Path(pdf.infopath).parent + nrep = len(pdf) + + target_name = target_name or pdf.name + '_pdfas' + target_path = output_path / target_name + + alphas_paths = [pathlib.Path(i.infopath).parent for i in pdfs] + alphas_replica0s = [path / f'{p}_0000.dat' for path, p in zip(alphas_paths, pdfs)] + new_nrep = nrep + len(alphas_replica0s) + alphas_values = [str(p.AlphaS_MZ) for p in pdfs] + + if target_path.exists(): + log.warning(f"{target_path} already exists. Deleting contents.") + shutil.rmtree(target_path) + + # We create a temporary directory to handle the manipulations inside. + # We move the files to the new directory at the end. + with tempfile_cleaner( + root=output_path, exit_func=shutil.rmtree, exc=KeyboardInterrupt + ) as tempdir: + # Copy the base pdf into the temporary directory + temp_pdf = shutil.copytree(base_pdf_path, tempdir / pdf.name) + + # Copy the alphas PDF replica0s into the new PDF + for i, (alphas_pdf, rep) in enumerate(zip(pdfs, alphas_replica0s)): + to = temp_pdf / f'{pdf.name}_{str(i + nrep).zfill(4)}.dat' + shutil.copy(rep, to) + _fixup_new_replica(alphas_pdf, to) + + #  Fixup the info file + info_file = (temp_pdf / temp_pdf.name).with_suffix('.info') + + with open(info_file, 'r') as stream: + yaml_obj = yaml.YAML() + info_yaml = yaml_obj.load(stream) + info_yaml['NumMembers'] = new_nrep + info_yaml['ErrorType'] += '+as' + extra_desc = '; '.join( + f"mem={i} => alphas(MZ)={val}" + for val, i in zip(alphas_values, range(nrep, new_nrep)) + ) + info_yaml['SetDesc'] += f"; {extra_desc}" + with open(info_file, 'w') as stream: + yaml_obj.dump(info_yaml, stream) + + # Rename the base pdf to the final name + rename_pdf(temp_pdf, pdf.name, target_name) + # This is the pdf path after the above renaming + # i.e new_pdf.exists() == True + new_pdf = temp_pdf.with_name(target_name) + # Move the final pdf outside the temporary directory + new_pdf = new_pdf.rename(target_path) + log.info(f"alpha_s bundle written at {new_pdf}") + return target_name + + @check_positive('Q') def gluon_values(pdf, Q, xgrid):