Skip to content
78 changes: 78 additions & 0 deletions doc/sphinx/source/tutorials/bundle-pdfs.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
Bundle PDFs with :math:`\alpha_s` replicas
==========================================

Using ``validphys`` it is possible to produce a bundled
PDF set which accounts for a combined PDF + :math:`\alpha_s`
uncertainty. The procedure to generate such an LHAPDF set
is to simply take a baseline PDF set and append the central
replica from a list of :math:`\alpha_s` variation fits.

The action to leverage this is
:py:func:`validphys.replica_selector.alpha_s_bundle_pdf`. We
specify the baseline PDF as the ``pdf`` key within the runcard
and the ``pdfs`` list specifies the :math:`\alpha_s` fits that
are to be used. In the following example, the ``NNPDF31_nnlo_as_0118``
PDF set is used as baseline and we append the central replica from
``NNPDF31_nnlo_as_0117`` and ``NNPDF31_nnlo_as_0119``.

.. code-block :: yaml

pdf: NNPDF31_nnlo_as_0118

pdfs:
- NNPDF31_nnlo_as_0117
- NNPDF31_nnlo_as_0119

actions_:
- alpha_s_bundle_pdf

Executing this runcard with ``validphys`` produces the bundled PDF set
in the output folder, which by default will be name the same as the baseline,
except for the ``_pdfas`` suffix being appended:

.. code-block ::

output/
├── NNPDF31_nnlo_as_0118_pdfas
│   ├── NNPDF31_nnlo_as_0118_pdfas.info
│   ├── NNPDF31_nnlo_as_0118_pdfas_0000.dat
│   ├── NNPDF31_nnlo_as_0118_pdfas_0001.dat
│   ├── NNPDF31_nnlo_as_0118_pdfas_0002.dat

The ``.info`` file now has ``replicas+as`` as the ``ErrorType`` and the
``NumMembers`` key has now been updated to reflect the additional replicas
(in this example it has been incremented by two). The additional replicas
also have the ``AlphaS_MZ`` key prepended at the top of the file so as to
keep track of what PDF set they originated from.

The optional ``target_name`` key can be provided in the runcard so as to
specify the name of the resulting PDF. The following runcard will generate
the same bundled PDF as before, but with the name ``bundled_pdf``:

.. code-block :: yaml

pdf: NNPDF31_nnlo_as_0118

pdfs:
- NNPDF31_nnlo_as_0117
- NNPDF31_nnlo_as_0119

target_name: bundled_pdf

actions_:
- alpha_s_bundle_pdf


.. code-block ::

output/
├── bundled_pdf
│   ├── bundled_pdf.info
│   ├── bundled_pdf_0000.dat
│   ├── bundled_pdf_0001.dat
│   ├── bundled_pdf_0002.dat

.. note ::

Despite adding additional replicas, the central replica in the bundled
PDF set is **not** recomputed: it is identical to that of the baseline.
1 change: 1 addition & 0 deletions doc/sphinx/source/tutorials/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,5 @@ Miscellaneous
./pseudodata.md
./newplottingfn.rst
./addspecialgrouping.rst
./bundle-pdfs.rst
./conda.md
111 changes: 110 additions & 1 deletion validphys2/src/validphys/replica_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,132 @@
"""
import logging
import numbers
import pathlib
import shutil
import warnings
import re

import numpy as np
import pandas as pd

from reportengine.checks import check_positive
from reportengine.checks import check_positive, make_argcheck, check
from reportengine.compat import yaml
from reportengine.table import table
from reportengine.figure import figuregen

from validphys.pdfbases import flavour
from validphys.pdfoutput import pdfset
from validphys.lhio import new_pdf_from_indexes
from validphys.checks import check_pdf_is_montecarlo, check_scale
from validphys.core import PDF
from validphys.pdfplots import ReplicaPDFPlotter
from validphys.renametools import rename_pdf
from validphys.utils import tempfile_cleaner

log = logging.getLogger(__name__)

def _fixup_new_replica(alphas_pdf: PDF, new_replica_file):
"""Helper function that takes in a
:py:class:`validphys.core.PDF` object as well as
the path to the central replica corresponding to the
PDF and handles the writing of the alphas values
to the header file.
"""
AlphaS_MZ = alphas_pdf.AlphaS_MZ
AlphaS_Vals = alphas_pdf.AlphaS_Vals
with open(new_replica_file, 'rb') as in_stream:
data = in_stream.read()
with open(new_replica_file, 'wb') as out_stream:
# Add the AlphaS_MZ and AlphaS_Vals keys
out_stream.write(f"AlphaS_MZ: {AlphaS_MZ}\n AlphaS_Vals: {AlphaS_Vals}\n".encode())
out_stream.write(data)

@make_argcheck
def _check_target_name(target_name):
"""Make sure this specifies a name and not some kid of path"""
if target_name is None:
return
check(
re.fullmatch(r'[\w]+', target_name),
"`target_name` must contain alphnumeric characters and underscores only",
)

@_check_target_name
def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = None):
"""Action that bundles PDFs for distributing to the LHAPDF
format. The baseline pdf is declared as the ``pdf`` key
and the PDFs from which the replica 0s are to be added is
declared as the ``pdfs`` list.

The bundled PDF set is stored inside the ``output`` directory.

Parameters
----------
pdf: :py:class:`validphys.core.PDF`
The baseline PDF to which the new replicas will be added
pdfs: list of :py:class:`validphys.core.PDF`
The list of PDFs from which replica0 will be appended
target_name: str or None
Optional argument specifying the name of the output PDF.
If ``None``, then the name of the original pdf is used
but with ``_pdfas`` appended
"""
base_pdf_path = pathlib.Path(pdf.infopath).parent
nrep = len(pdf)

target_name = target_name or pdf.name + '_pdfas'
target_path = output_path / target_name

alphas_paths = [pathlib.Path(i.infopath).parent for i in pdfs]
alphas_replica0s = [path / f'{p}_0000.dat' for path, p in zip(alphas_paths, pdfs)]
new_nrep = nrep + len(alphas_replica0s)
alphas_values = [str(p.AlphaS_MZ) for p in pdfs]

if target_path.exists():
log.warning(f"{target_path} already exists. Deleting contents.")
shutil.rmtree(target_path)

# We create a temporary directory to handle the manipulations inside.
# We move the files to the new directory at the end.
with tempfile_cleaner(
root=output_path, exit_func=shutil.rmtree, exc=KeyboardInterrupt
) as tempdir:
# Copy the base pdf into the temporary directory
temp_pdf = shutil.copytree(base_pdf_path, tempdir / pdf.name)

# Copy the alphas PDF replica0s into the new PDF
for i, (alphas_pdf, rep) in enumerate(zip(pdfs, alphas_replica0s)):
to = temp_pdf / f'{pdf.name}_{str(i + nrep).zfill(4)}.dat'
shutil.copy(rep, to)
_fixup_new_replica(alphas_pdf, to)

#  Fixup the info file
info_file = (temp_pdf / temp_pdf.name).with_suffix('.info')

with open(info_file, 'r') as stream:
yaml_obj = yaml.YAML()
info_yaml = yaml_obj.load(stream)
info_yaml['NumMembers'] = new_nrep
info_yaml['ErrorType'] += '+as'
extra_desc = '; '.join(
f"mem={i} => alphas(MZ)={val}"
for val, i in zip(alphas_values, range(nrep, new_nrep))
)
info_yaml['SetDesc'] += f"; {extra_desc}"
with open(info_file, 'w') as stream:
yaml_obj.dump(info_yaml, stream)

# Rename the base pdf to the final name
rename_pdf(temp_pdf, pdf.name, target_name)
# This is the pdf path after the above renaming
# i.e new_pdf.exists() == True
new_pdf = temp_pdf.with_name(target_name)
# Move the final pdf outside the temporary directory
new_pdf = new_pdf.rename(target_path)
log.info(f"alpha_s bundle written at {new_pdf}")
return target_name



@check_positive('Q')
def gluon_values(pdf, Q, xgrid):
Expand Down