From 525255a41bcae284e15d94560ef538ad6a2cd978 Mon Sep 17 00:00:00 2001 From: siranipour Date: Tue, 27 Jul 2021 12:50:35 +0100 Subject: [PATCH 01/13] Adding action to bundle PDFs --- validphys2/src/validphys/paramfits/dataops.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/validphys2/src/validphys/paramfits/dataops.py b/validphys2/src/validphys/paramfits/dataops.py index fed61f8b2f..49fc3c749f 100644 --- a/validphys2/src/validphys/paramfits/dataops.py +++ b/validphys2/src/validphys/paramfits/dataops.py @@ -7,6 +7,9 @@ be consumed by plotting functions. """ import logging +import os +import pathlib +import shutil import warnings import functools from collections import defaultdict @@ -18,8 +21,11 @@ from reportengine import collect from reportengine.floatformatting import format_error_value_columns, ValueErrorTuple, format_number from reportengine.checks import make_argcheck, CheckError, check_positive, check_not_empty +from reportengine.compat import yaml from reportengine.table import table +from validphys.renametools import rename_pdf +from validphys.utils import tempfile_cleaner from validphys.checks import ( check_fits_different, check_dataspecs_fits_different, @@ -903,6 +909,57 @@ def as_parabolic_coefficient_table( final_table = pd.concat(tb_polys, axis=1, keys=by_dataset_suptitle) return final_table +def bundle_pdfs(pdf, pdfs, output_path, target_name=None): + base_pdf_path = pathlib.Path(pdf.infopath).parent + nrep = len(pdf) + + target_name = target_name or pdf.name + '_pdfas' + + alphas_paths = [pathlib.Path(i.infopath).parent for i in pdfs] + alphas_replica0s = [path / f'{p}_0000.dat' for path, p in zip(alphas_paths, pdfs)] + new_nrep = nrep + len(alphas_replica0s) + + # We create a temporary directory to handle the manipulations inside. + # We move the files to the new directory at the end. + with tempfile_cleaner(root='./', exit_func=shutil.rmtree, exc=KeyboardInterrupt) as tempdir: + # Copy the base pdf into the temporary directory + temp_pdf = shutil.copytree(base_pdf_path, tempdir / pdf.name) + + # Copy the alphas PDF replica0s into the new PDF + for i, (alphas_pdf, rep) in enumerate(zip(pdfs, alphas_replica0s)): + to = temp_pdf / f'{pdf.name}_{str(i + nrep).zfill(4)}.dat' + shutil.copy(rep, to) + _fixup_new_replica(alphas_pdf, to) + + # Fixup the info file + info_file = (temp_pdf/temp_pdf.name).with_suffix('.info') + os.system(f"sed -i -e 's/NumMembers.*/NumMembers: {new_nrep}/g' {info_file}") + os.system(f"sed -i -e 's/ErrorType.*/ErrorType: replicas+as/g' {info_file}") + + # Rename the base pdf to the final name + rename_pdf(temp_pdf, pdf.name, target_name) + # This is the pdf path after the above renaming + # i.e new_pdf.exists() == True + new_pdf = temp_pdf.with_name(target_name) + # Move the final pdf outside the temporary directory + new_pdf.rename(output_path / target_name) + + +def _fixup_new_replica(alphas_pdf: "validphys.core.PDF", new_replica_file): + info_file = pathlib.Path(alphas_pdf.infopath) + + with open(info_file, 'r') as stream: + info = yaml.safe_load(stream) + + AlphaS_MZ = info['AlphaS_MZ'] + AlphaS_Vals = info['AlphaS_Vals'] + # Replace the AlphaS_MZ and AlphaS_Vals key + os.system(fr"sed -i -e '1s/^/AlphaS_MZ: {str(AlphaS_MZ)}\n/' {new_replica_file}") + os.system(fr"sed -i -e '1s/^/AlphaS_Vals: {str(AlphaS_Vals)}\n/' {new_replica_file}") + # Delete the from replica key + os.system(f"sed -i -e '/FromMCReplica.*/d' {new_replica_file}") + + # Define aliases for functions with spelling mistakes in their names which have now been corrected # Do this so that old runcards still work fits_matched_pseudorreplicas_chi2_table = fits_matched_pseudoreplicas_chi2_table From 72aafe84466692b4736d9b79bde4d62aa948dd70 Mon Sep 17 00:00:00 2001 From: siranipour Date: Tue, 27 Jul 2021 15:24:34 +0100 Subject: [PATCH 02/13] Moving function --- validphys2/src/validphys/paramfits/dataops.py | 57 ------------------ validphys2/src/validphys/replica_selector.py | 58 +++++++++++++++++++ 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/validphys2/src/validphys/paramfits/dataops.py b/validphys2/src/validphys/paramfits/dataops.py index 49fc3c749f..fed61f8b2f 100644 --- a/validphys2/src/validphys/paramfits/dataops.py +++ b/validphys2/src/validphys/paramfits/dataops.py @@ -7,9 +7,6 @@ be consumed by plotting functions. """ import logging -import os -import pathlib -import shutil import warnings import functools from collections import defaultdict @@ -21,11 +18,8 @@ from reportengine import collect from reportengine.floatformatting import format_error_value_columns, ValueErrorTuple, format_number from reportengine.checks import make_argcheck, CheckError, check_positive, check_not_empty -from reportengine.compat import yaml from reportengine.table import table -from validphys.renametools import rename_pdf -from validphys.utils import tempfile_cleaner from validphys.checks import ( check_fits_different, check_dataspecs_fits_different, @@ -909,57 +903,6 @@ def as_parabolic_coefficient_table( final_table = pd.concat(tb_polys, axis=1, keys=by_dataset_suptitle) return final_table -def bundle_pdfs(pdf, pdfs, output_path, target_name=None): - base_pdf_path = pathlib.Path(pdf.infopath).parent - nrep = len(pdf) - - target_name = target_name or pdf.name + '_pdfas' - - alphas_paths = [pathlib.Path(i.infopath).parent for i in pdfs] - alphas_replica0s = [path / f'{p}_0000.dat' for path, p in zip(alphas_paths, pdfs)] - new_nrep = nrep + len(alphas_replica0s) - - # We create a temporary directory to handle the manipulations inside. - # We move the files to the new directory at the end. - with tempfile_cleaner(root='./', exit_func=shutil.rmtree, exc=KeyboardInterrupt) as tempdir: - # Copy the base pdf into the temporary directory - temp_pdf = shutil.copytree(base_pdf_path, tempdir / pdf.name) - - # Copy the alphas PDF replica0s into the new PDF - for i, (alphas_pdf, rep) in enumerate(zip(pdfs, alphas_replica0s)): - to = temp_pdf / f'{pdf.name}_{str(i + nrep).zfill(4)}.dat' - shutil.copy(rep, to) - _fixup_new_replica(alphas_pdf, to) - - # Fixup the info file - info_file = (temp_pdf/temp_pdf.name).with_suffix('.info') - os.system(f"sed -i -e 's/NumMembers.*/NumMembers: {new_nrep}/g' {info_file}") - os.system(f"sed -i -e 's/ErrorType.*/ErrorType: replicas+as/g' {info_file}") - - # Rename the base pdf to the final name - rename_pdf(temp_pdf, pdf.name, target_name) - # This is the pdf path after the above renaming - # i.e new_pdf.exists() == True - new_pdf = temp_pdf.with_name(target_name) - # Move the final pdf outside the temporary directory - new_pdf.rename(output_path / target_name) - - -def _fixup_new_replica(alphas_pdf: "validphys.core.PDF", new_replica_file): - info_file = pathlib.Path(alphas_pdf.infopath) - - with open(info_file, 'r') as stream: - info = yaml.safe_load(stream) - - AlphaS_MZ = info['AlphaS_MZ'] - AlphaS_Vals = info['AlphaS_Vals'] - # Replace the AlphaS_MZ and AlphaS_Vals key - os.system(fr"sed -i -e '1s/^/AlphaS_MZ: {str(AlphaS_MZ)}\n/' {new_replica_file}") - os.system(fr"sed -i -e '1s/^/AlphaS_Vals: {str(AlphaS_Vals)}\n/' {new_replica_file}") - # Delete the from replica key - os.system(f"sed -i -e '/FromMCReplica.*/d' {new_replica_file}") - - # Define aliases for functions with spelling mistakes in their names which have now been corrected # Do this so that old runcards still work fits_matched_pseudorreplicas_chi2_table = fits_matched_pseudoreplicas_chi2_table diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index 0ab8d66c82..e0d4f09649 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -5,12 +5,16 @@ """ import logging import numbers +import os +import pathlib +import shutil import warnings import numpy as np import pandas as pd from reportengine.checks import check_positive +from reportengine.compat import yaml from reportengine.table import table from reportengine.figure import figuregen @@ -18,10 +22,64 @@ from validphys.pdfoutput import pdfset from validphys.lhio import new_pdf_from_indexes from validphys.checks import check_pdf_is_montecarlo, check_scale +from validphys.core import PDF from validphys.pdfplots import ReplicaPDFPlotter +from validphys.renametools import rename_pdf +from validphys.utils import tempfile_cleaner log = logging.getLogger(__name__) +def bundle_pdfs(pdf, pdfs, output_path, target_name=None): + base_pdf_path = pathlib.Path(pdf.infopath).parent + nrep = len(pdf) + + target_name = target_name or pdf.name + '_pdfas' + + alphas_paths = [pathlib.Path(i.infopath).parent for i in pdfs] + alphas_replica0s = [path / f'{p}_0000.dat' for path, p in zip(alphas_paths, pdfs)] + new_nrep = nrep + len(alphas_replica0s) + + # We create a temporary directory to handle the manipulations inside. + # We move the files to the new directory at the end. + with tempfile_cleaner(root='./', exit_func=shutil.rmtree, exc=KeyboardInterrupt) as tempdir: + # Copy the base pdf into the temporary directory + temp_pdf = shutil.copytree(base_pdf_path, tempdir / pdf.name) + + # Copy the alphas PDF replica0s into the new PDF + for i, (alphas_pdf, rep) in enumerate(zip(pdfs, alphas_replica0s)): + to = temp_pdf / f'{pdf.name}_{str(i + nrep).zfill(4)}.dat' + shutil.copy(rep, to) + _fixup_new_replica(alphas_pdf, to) + + # Fixup the info file + info_file = (temp_pdf/temp_pdf.name).with_suffix('.info') + os.system(f"sed -i -e 's/NumMembers.*/NumMembers: {new_nrep}/g' {info_file}") + os.system(f"sed -i -e 's/ErrorType.*/ErrorType: replicas+as/g' {info_file}") + + # Rename the base pdf to the final name + rename_pdf(temp_pdf, pdf.name, target_name) + # This is the pdf path after the above renaming + # i.e new_pdf.exists() == True + new_pdf = temp_pdf.with_name(target_name) + # Move the final pdf outside the temporary directory + new_pdf.rename(output_path / target_name) + + +def _fixup_new_replica(alphas_pdf: PDF, new_replica_file): + info_file = pathlib.Path(alphas_pdf.infopath) + + with open(info_file, 'r') as stream: + info = yaml.safe_load(stream) + + AlphaS_MZ = info['AlphaS_MZ'] + AlphaS_Vals = info['AlphaS_Vals'] + # Replace the AlphaS_MZ and AlphaS_Vals key + os.system(fr"sed -i -e '1s/^/AlphaS_MZ: {str(AlphaS_MZ)}\n/' {new_replica_file}") + os.system(fr"sed -i -e '1s/^/AlphaS_Vals: {str(AlphaS_Vals)}\n/' {new_replica_file}") + # Delete the from replica key + os.system(f"sed -i -e '/FromMCReplica.*/d' {new_replica_file}") + + @check_positive('Q') def gluon_values(pdf, Q, xgrid): From 6a70a7b84d5c8753c93627611d7dc966ed282826 Mon Sep 17 00:00:00 2001 From: siranipour Date: Tue, 3 Aug 2021 11:21:37 +0100 Subject: [PATCH 03/13] Removing reliance on sed --- validphys2/src/validphys/replica_selector.py | 21 ++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index e0d4f09649..a9ae6e1caf 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -5,7 +5,6 @@ """ import logging import numbers -import os import pathlib import shutil import warnings @@ -53,8 +52,13 @@ def bundle_pdfs(pdf, pdfs, output_path, target_name=None): # Fixup the info file info_file = (temp_pdf/temp_pdf.name).with_suffix('.info') - os.system(f"sed -i -e 's/NumMembers.*/NumMembers: {new_nrep}/g' {info_file}") - os.system(f"sed -i -e 's/ErrorType.*/ErrorType: replicas+as/g' {info_file}") + + with open(info_file, 'r') as stream: + info_yaml = yaml.safe_load(stream) + info_yaml['NumMembers'] = new_nrep + info_yaml['ErrorType'] += '+as' + with open(info_file, 'w') as stream: + yaml.dump(info_yaml, stream, Dumper=yaml.RoundTripDumper) # Rename the base pdf to the final name rename_pdf(temp_pdf, pdf.name, target_name) @@ -73,11 +77,12 @@ def _fixup_new_replica(alphas_pdf: PDF, new_replica_file): AlphaS_MZ = info['AlphaS_MZ'] AlphaS_Vals = info['AlphaS_Vals'] - # Replace the AlphaS_MZ and AlphaS_Vals key - os.system(fr"sed -i -e '1s/^/AlphaS_MZ: {str(AlphaS_MZ)}\n/' {new_replica_file}") - os.system(fr"sed -i -e '1s/^/AlphaS_Vals: {str(AlphaS_Vals)}\n/' {new_replica_file}") - # Delete the from replica key - os.system(f"sed -i -e '/FromMCReplica.*/d' {new_replica_file}") + with open(new_replica_file, 'r') as in_stream: + data = in_stream.read() + with open(new_replica_file, 'w') as out_stream: + # Replace the AlphaS_MZ and AlphaS_Vals key + new_data = f"AlphaS_MZ: {AlphaS_MZ}\n" + f"AlphaS_Vals: {AlphaS_Vals}\n" + data + out_stream.write(new_data) From 01b034cfb1596bba83877b8279d86ee211e74990 Mon Sep 17 00:00:00 2001 From: siranipour Date: Tue, 3 Aug 2021 11:30:53 +0100 Subject: [PATCH 04/13] Adding doc strings --- validphys2/src/validphys/replica_selector.py | 24 ++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index a9ae6e1caf..50b6e53df0 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -29,6 +29,24 @@ log = logging.getLogger(__name__) def bundle_pdfs(pdf, pdfs, output_path, target_name=None): + """Action that bundles PDFs for distributing to the LHAPDF + format. The baseline pdf is declared as the ``pdf`` key + and the PDFs from which the replica 0s are to be added is + declared as the ``pdfs`` list. + + The bundled PDF set is stored inside the ``output`` directory. + + Parameters + ---------- + pdf: :py:class:`validphys.core.PDF` + The baseline PDF to which the new replicas will be added + pdfs: list of :py:class:`validphys.core.PDF` + The list of PDFs from which replica0 will be appended + target_name: str or None + Optional argument specifying the name of the output PDF. + If ``None``, then the name of the original pdf is used + but with ``_pdfas`` appended + """ base_pdf_path = pathlib.Path(pdf.infopath).parent nrep = len(pdf) @@ -70,6 +88,12 @@ def bundle_pdfs(pdf, pdfs, output_path, target_name=None): def _fixup_new_replica(alphas_pdf: PDF, new_replica_file): + """Helper function that takes in a + :py:class:`validphys.core.PDF` object as well as + the path to the central replica corresponding to the + PDF and handles the writing of the alphas values + to the header file. + """ info_file = pathlib.Path(alphas_pdf.infopath) with open(info_file, 'r') as stream: From 0f65e5d26bf22db26701d727b0b12e085710bc5f Mon Sep 17 00:00:00 2001 From: siranipour <43517072+siranipour@users.noreply.github.com> Date: Tue, 3 Aug 2021 11:49:29 +0100 Subject: [PATCH 05/13] Update validphys2/src/validphys/replica_selector.py Co-authored-by: Zaharid --- validphys2/src/validphys/replica_selector.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index 50b6e53df0..7dc29666b5 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -104,9 +104,9 @@ def _fixup_new_replica(alphas_pdf: PDF, new_replica_file): with open(new_replica_file, 'r') as in_stream: data = in_stream.read() with open(new_replica_file, 'w') as out_stream: - # Replace the AlphaS_MZ and AlphaS_Vals key - new_data = f"AlphaS_MZ: {AlphaS_MZ}\n" + f"AlphaS_Vals: {AlphaS_Vals}\n" + data - out_stream.write(new_data) + # Add the AlphaS_MZ and AlphaS_Vals keys + out_stream.write(f"AlphaS_MZ: {AlphaS_MZ}\n" + f"AlphaS_Vals: {AlphaS_Vals}\n") + out_stream.write(data) From 116fe17a83cdb0f337dbc7e17c01c2cea77cf7ec Mon Sep 17 00:00:00 2001 From: siranipour Date: Tue, 3 Aug 2021 15:51:29 +0100 Subject: [PATCH 06/13] Renaming function and operate on bytes --- validphys2/src/validphys/replica_selector.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index 7dc29666b5..74c7582ae4 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -28,7 +28,7 @@ log = logging.getLogger(__name__) -def bundle_pdfs(pdf, pdfs, output_path, target_name=None): +def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name=None): """Action that bundles PDFs for distributing to the LHAPDF format. The baseline pdf is declared as the ``pdf`` key and the PDFs from which the replica 0s are to be added is @@ -71,11 +71,11 @@ def bundle_pdfs(pdf, pdfs, output_path, target_name=None): # Fixup the info file info_file = (temp_pdf/temp_pdf.name).with_suffix('.info') - with open(info_file, 'r') as stream: + with open(info_file, 'rb') as stream: info_yaml = yaml.safe_load(stream) info_yaml['NumMembers'] = new_nrep info_yaml['ErrorType'] += '+as' - with open(info_file, 'w') as stream: + with open(info_file, 'wb') as stream: yaml.dump(info_yaml, stream, Dumper=yaml.RoundTripDumper) # Rename the base pdf to the final name @@ -96,7 +96,7 @@ def _fixup_new_replica(alphas_pdf: PDF, new_replica_file): """ info_file = pathlib.Path(alphas_pdf.infopath) - with open(info_file, 'r') as stream: + with open(info_file, 'rb') as stream: info = yaml.safe_load(stream) AlphaS_MZ = info['AlphaS_MZ'] From e754e8580999a8cb2b729b2971f4f10691c2acdc Mon Sep 17 00:00:00 2001 From: Zahari Date: Tue, 10 Aug 2021 17:41:52 +0100 Subject: [PATCH 07/13] Simplify logic in replica fixup Add type checks to target name. --- validphys2/src/validphys/replica_selector.py | 40 +++++++++----------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index 74c7582ae4..b6cfd50d55 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -28,7 +28,24 @@ log = logging.getLogger(__name__) -def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name=None): +def _fixup_new_replica(alphas_pdf: PDF, new_replica_file): + """Helper function that takes in a + :py:class:`validphys.core.PDF` object as well as + the path to the central replica corresponding to the + PDF and handles the writing of the alphas values + to the header file. + """ + AlphaS_MZ = alphas_pdf.AlphaS_MZ + AlphaS_Vals = alphas_pdf.AlphaS_Vals + with open(new_replica_file, 'r') as in_stream: + data = in_stream.read() + with open(new_replica_file, 'w') as out_stream: + # Add the AlphaS_MZ and AlphaS_Vals keys + out_stream.write(f"AlphaS_MZ: {AlphaS_MZ}\n" + f"AlphaS_Vals: {AlphaS_Vals}\n") + out_stream.write(data) + + +def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None))=None): """Action that bundles PDFs for distributing to the LHAPDF format. The baseline pdf is declared as the ``pdf`` key and the PDFs from which the replica 0s are to be added is @@ -87,27 +104,6 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name=None): new_pdf.rename(output_path / target_name) -def _fixup_new_replica(alphas_pdf: PDF, new_replica_file): - """Helper function that takes in a - :py:class:`validphys.core.PDF` object as well as - the path to the central replica corresponding to the - PDF and handles the writing of the alphas values - to the header file. - """ - info_file = pathlib.Path(alphas_pdf.infopath) - - with open(info_file, 'rb') as stream: - info = yaml.safe_load(stream) - - AlphaS_MZ = info['AlphaS_MZ'] - AlphaS_Vals = info['AlphaS_Vals'] - with open(new_replica_file, 'r') as in_stream: - data = in_stream.read() - with open(new_replica_file, 'w') as out_stream: - # Add the AlphaS_MZ and AlphaS_Vals keys - out_stream.write(f"AlphaS_MZ: {AlphaS_MZ}\n" + f"AlphaS_Vals: {AlphaS_Vals}\n") - out_stream.write(data) - @check_positive('Q') From fa419b31a27e63aac955890fcb2a7155035f51f7 Mon Sep 17 00:00:00 2001 From: Zahari Date: Tue, 10 Aug 2021 17:43:16 +0100 Subject: [PATCH 08/13] Store temporary folder in output_path For all we know the current path may not be writable or accessible. --- validphys2/src/validphys/replica_selector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index b6cfd50d55..1cf97f0ba0 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -75,7 +75,7 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None))=No # We create a temporary directory to handle the manipulations inside. # We move the files to the new directory at the end. - with tempfile_cleaner(root='./', exit_func=shutil.rmtree, exc=KeyboardInterrupt) as tempdir: + with tempfile_cleaner(root=output_path, exit_func=shutil.rmtree, exc=KeyboardInterrupt) as tempdir: # Copy the base pdf into the temporary directory temp_pdf = shutil.copytree(base_pdf_path, tempdir / pdf.name) From c911a8eb3a5776b2a2238408d9b770fc4871d3b9 Mon Sep 17 00:00:00 2001 From: Zahari Date: Tue, 10 Aug 2021 18:46:02 +0100 Subject: [PATCH 09/13] Add target name check --- validphys2/src/validphys/replica_selector.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index 1cf97f0ba0..fc1fadbd8a 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -8,11 +8,12 @@ import pathlib import shutil import warnings +import re import numpy as np import pandas as pd -from reportengine.checks import check_positive +from reportengine.checks import check_positive, make_argcheck, check from reportengine.compat import yaml from reportengine.table import table from reportengine.figure import figuregen @@ -44,8 +45,18 @@ def _fixup_new_replica(alphas_pdf: PDF, new_replica_file): out_stream.write(f"AlphaS_MZ: {AlphaS_MZ}\n" + f"AlphaS_Vals: {AlphaS_Vals}\n") out_stream.write(data) - -def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None))=None): +@make_argcheck +def _check_target_name(target_name): + """Make sure this specifies a name and not some kid of path""" + if target_name is None: + return + check( + re.fullmatch(r'[\w]+', target_name), + "`target_name` must contain alphnumeric characters and underscores only", + ) + +@_check_target_name +def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = None): """Action that bundles PDFs for distributing to the LHAPDF format. The baseline pdf is declared as the ``pdf`` key and the PDFs from which the replica 0s are to be added is From 17075a0e524019a1aedebd29a72e2489f17b6990 Mon Sep 17 00:00:00 2001 From: Zahari Date: Tue, 10 Aug 2021 18:46:39 +0100 Subject: [PATCH 10/13] Delete old contents if they exist --- validphys2/src/validphys/replica_selector.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index fc1fadbd8a..417b0788e8 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -84,9 +84,15 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = alphas_replica0s = [path / f'{p}_0000.dat' for path, p in zip(alphas_paths, pdfs)] new_nrep = nrep + len(alphas_replica0s) + if target_path.exists(): + log.warning(f"{target_path} already exists. Deleting contents.") + shutil.rmtree(target_path) + # We create a temporary directory to handle the manipulations inside. # We move the files to the new directory at the end. - with tempfile_cleaner(root=output_path, exit_func=shutil.rmtree, exc=KeyboardInterrupt) as tempdir: + with tempfile_cleaner( + root=output_path, exit_func=shutil.rmtree, exc=KeyboardInterrupt + ) as tempdir: # Copy the base pdf into the temporary directory temp_pdf = shutil.copytree(base_pdf_path, tempdir / pdf.name) From 0b856ee6944f56088d71fd129ccc903371c3f0e7 Mon Sep 17 00:00:00 2001 From: Zahari Date: Tue, 10 Aug 2021 18:49:08 +0100 Subject: [PATCH 11/13] Improve info writing Add comment to description on alpha_s variations. Also use the newer interface to make sure it round trips. --- validphys2/src/validphys/replica_selector.py | 25 +++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index 417b0788e8..b792be6f93 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -79,10 +79,12 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = nrep = len(pdf) target_name = target_name or pdf.name + '_pdfas' + target_path = output_path / target_name alphas_paths = [pathlib.Path(i.infopath).parent for i in pdfs] alphas_replica0s = [path / f'{p}_0000.dat' for path, p in zip(alphas_paths, pdfs)] new_nrep = nrep + len(alphas_replica0s) + alphas_values = [str(p.AlphaS_MZ) for p in pdfs] if target_path.exists(): log.warning(f"{target_path} already exists. Deleting contents.") @@ -102,15 +104,21 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = shutil.copy(rep, to) _fixup_new_replica(alphas_pdf, to) - # Fixup the info file - info_file = (temp_pdf/temp_pdf.name).with_suffix('.info') + #  Fixup the info file + info_file = (temp_pdf / temp_pdf.name).with_suffix('.info') - with open(info_file, 'rb') as stream: - info_yaml = yaml.safe_load(stream) + with open(info_file, 'r') as stream: + yaml_obj = yaml.YAML() + info_yaml = yaml_obj.load(stream) info_yaml['NumMembers'] = new_nrep info_yaml['ErrorType'] += '+as' - with open(info_file, 'wb') as stream: - yaml.dump(info_yaml, stream, Dumper=yaml.RoundTripDumper) + extra_desc = '; '.join( + f"mem={i} => alphas(MZ)={val}" + for val, i in zip(alphas_values, range(nrep, new_nrep)) + ) + info_yaml['SetDesc'] += f"; {extra_desc}" + with open(info_file, 'w') as stream: + yaml_obj.dump(info_yaml, stream) # Rename the base pdf to the final name rename_pdf(temp_pdf, pdf.name, target_name) @@ -118,8 +126,9 @@ def alpha_s_bundle_pdf(pdf, pdfs, output_path, target_name: (str, type(None)) = # i.e new_pdf.exists() == True new_pdf = temp_pdf.with_name(target_name) # Move the final pdf outside the temporary directory - new_pdf.rename(output_path / target_name) - + new_pdf = new_pdf.rename(target_path) + log.info(f"alpha_s bundle written at {new_pdf}") + return target_name From 927ac08c48c6e6e3df906170eca6ad9dcc077ad2 Mon Sep 17 00:00:00 2001 From: Zahari Date: Tue, 10 Aug 2021 18:54:17 +0100 Subject: [PATCH 12/13] Use bytes as a minor optimization --- validphys2/src/validphys/replica_selector.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index b792be6f93..c68ffecb01 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -38,11 +38,11 @@ def _fixup_new_replica(alphas_pdf: PDF, new_replica_file): """ AlphaS_MZ = alphas_pdf.AlphaS_MZ AlphaS_Vals = alphas_pdf.AlphaS_Vals - with open(new_replica_file, 'r') as in_stream: + with open(new_replica_file, 'rb') as in_stream: data = in_stream.read() - with open(new_replica_file, 'w') as out_stream: + with open(new_replica_file, 'wb') as out_stream: # Add the AlphaS_MZ and AlphaS_Vals keys - out_stream.write(f"AlphaS_MZ: {AlphaS_MZ}\n" + f"AlphaS_Vals: {AlphaS_Vals}\n") + out_stream.write(f"AlphaS_MZ: {AlphaS_MZ}\n AlphaS_Vals: {AlphaS_Vals}\n".encode()) out_stream.write(data) @make_argcheck From a5ab318bbb1b2407be9f99d98343355883524e97 Mon Sep 17 00:00:00 2001 From: siranipour Date: Thu, 12 Aug 2021 13:06:48 +0100 Subject: [PATCH 13/13] Adding docs entry --- doc/sphinx/source/tutorials/bundle-pdfs.rst | 78 +++++++++++++++++++++ doc/sphinx/source/tutorials/index.rst | 1 + 2 files changed, 79 insertions(+) create mode 100644 doc/sphinx/source/tutorials/bundle-pdfs.rst diff --git a/doc/sphinx/source/tutorials/bundle-pdfs.rst b/doc/sphinx/source/tutorials/bundle-pdfs.rst new file mode 100644 index 0000000000..10b3510ec4 --- /dev/null +++ b/doc/sphinx/source/tutorials/bundle-pdfs.rst @@ -0,0 +1,78 @@ +Bundle PDFs with :math:`\alpha_s` replicas +========================================== + +Using ``validphys`` it is possible to produce a bundled +PDF set which accounts for a combined PDF + :math:`\alpha_s` +uncertainty. The procedure to generate such an LHAPDF set +is to simply take a baseline PDF set and append the central +replica from a list of :math:`\alpha_s` variation fits. + +The action to leverage this is +:py:func:`validphys.replica_selector.alpha_s_bundle_pdf`. We +specify the baseline PDF as the ``pdf`` key within the runcard +and the ``pdfs`` list specifies the :math:`\alpha_s` fits that +are to be used. In the following example, the ``NNPDF31_nnlo_as_0118`` +PDF set is used as baseline and we append the central replica from +``NNPDF31_nnlo_as_0117`` and ``NNPDF31_nnlo_as_0119``. + +.. code-block :: yaml + + pdf: NNPDF31_nnlo_as_0118 + + pdfs: + - NNPDF31_nnlo_as_0117 + - NNPDF31_nnlo_as_0119 + + actions_: + - alpha_s_bundle_pdf + +Executing this runcard with ``validphys`` produces the bundled PDF set +in the output folder, which by default will be name the same as the baseline, +except for the ``_pdfas`` suffix being appended: + +.. code-block :: + + output/ + ├── NNPDF31_nnlo_as_0118_pdfas + │   ├── NNPDF31_nnlo_as_0118_pdfas.info + │   ├── NNPDF31_nnlo_as_0118_pdfas_0000.dat + │   ├── NNPDF31_nnlo_as_0118_pdfas_0001.dat + │   ├── NNPDF31_nnlo_as_0118_pdfas_0002.dat + +The ``.info`` file now has ``replicas+as`` as the ``ErrorType`` and the +``NumMembers`` key has now been updated to reflect the additional replicas +(in this example it has been incremented by two). The additional replicas +also have the ``AlphaS_MZ`` key prepended at the top of the file so as to +keep track of what PDF set they originated from. + +The optional ``target_name`` key can be provided in the runcard so as to +specify the name of the resulting PDF. The following runcard will generate +the same bundled PDF as before, but with the name ``bundled_pdf``: + +.. code-block :: yaml + + pdf: NNPDF31_nnlo_as_0118 + + pdfs: + - NNPDF31_nnlo_as_0117 + - NNPDF31_nnlo_as_0119 + + target_name: bundled_pdf + + actions_: + - alpha_s_bundle_pdf + + +.. code-block :: + + output/ + ├── bundled_pdf + │   ├── bundled_pdf.info + │   ├── bundled_pdf_0000.dat + │   ├── bundled_pdf_0001.dat + │   ├── bundled_pdf_0002.dat + +.. note :: + + Despite adding additional replicas, the central replica in the bundled + PDF set is **not** recomputed: it is identical to that of the baseline. diff --git a/doc/sphinx/source/tutorials/index.rst b/doc/sphinx/source/tutorials/index.rst index 533d37ddb8..0a6899741d 100644 --- a/doc/sphinx/source/tutorials/index.rst +++ b/doc/sphinx/source/tutorials/index.rst @@ -55,4 +55,5 @@ Miscellaneous ./pseudodata.md ./newplottingfn.rst ./addspecialgrouping.rst + ./bundle-pdfs.rst ./conda.md