From 3b73e91ea4c60005f263092df7b479099a661595 Mon Sep 17 00:00:00 2001 From: RoyStegeman Date: Fri, 19 Nov 2021 08:47:27 +0100 Subject: [PATCH 1/4] read process from PLOTTING_*.yml files instead of hardcoding --- .../theorycovariance/theorycovarianceutils.py | 68 +++---------------- 1 file changed, 8 insertions(+), 60 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py b/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py index 8060391010..5981f2d64d 100644 --- a/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py +++ b/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py @@ -4,8 +4,10 @@ Low level utilities for theorycovariance module """ import logging +import yaml from reportengine.checks import make_argcheck, check +from validphys.loader import Loader log = logging.getLogger(__name__) @@ -90,72 +92,18 @@ def check_fit_dataset_order_matches_grouped( "errors when running fits with theory covmat. Datasets should " f"be ordered by {processed_metadata_group} in the runcard." ) - + def process_lookup(name): """Produces a dictionary with keys corresponding to dataset names and values corresponding to process types. Process types are regrouped into the five categories 'Drell-Yan', 'Top', Jets', 'DIS NC' and 'DIS CC'. - - - The implementation relies on hardcoding the process type for - each dataset internally. If a dataset is not registered, - 'UNKNOWN' is returned. """ - process_dictionary = { "ATLASZPT8TEVMDIST": "DY", - "ATLASZPT8TEVYDIST": "DY", - "CMSZDIFF12": "DY", - "ATLAS1JET11": "JETS", - "CMSJETS11": "JETS", - "CDFR2KT": "JETS", - "CMSTOPDIFF8TEVTTRAPNORM": "TOP", - "ATLASTOPDIFF8TEVTRAPNORM": "TOP", - "ATLASTTBARTOT": "TOP", - "CMSTTBARTOT": "TOP", - "DYE605": "DY", - "DYE886P": "DY", - "DYE886R": "DY", - "ATLASWZRAP36PB": "DY", - "ATLASZHIGHMASS49FB": "DY", - "ATLASLOMASSDY11EXT": "DY", - "ATLASWZRAP11": "DY", - "CMSWEASY840PB": "DY", - "CMSWMASY47FB": "DY", - "CMSDY2D11": "DY", - "CMSWMU8TEV": "DY", - "CMSWCHARMRAT": "DY", - "LHCBZ940PB": "DY", - "LHCBZEE2FB": "DY", - "LHCBWZMU7TEV": "DY", - "LHCBWZMU8TEV": "DY", - "D0WEASY": "DY", - "D0WMASY": "DY", - "D0ZRAP": "DY", - "CDFZRAP": "DY", - "H1HERAF2B": "DIS NC", - "HERACOMBCCEM": "DIS CC", - "HERACOMBCCEP": "DIS CC", - "HERACOMBNCEM": "DIS NC", - "HERACOMBNCEP460": "DIS NC", - "HERACOMBNCEP575": "DIS NC", - "HERACOMBNCEP820": "DIS NC", - "HERACOMBNCEP920": "DIS NC", - "HERAF2CHARM": "DIS NC", - "ZEUSHERAF2B": "DIS NC", - "NMCPD": "DIS NC", - "NMC": "DIS NC", - "SLACP": "DIS NC", - "SLACD": "DIS NC", - "BCDMSP": "DIS NC", - "BCDMSD": "DIS NC", - "CHORUSNU": "DIS CC", - "CHORUSNB": "DIS CC", - "NTVNUDMN": "DIS CC", - "NTVNBDMN": "DIS CC" } + cd = Loader().check_commondata(setname=name) + data_plotting = open(cd.plotfiles[1]) + with open(data_plotting.name, 'r') as stream: + data_loaded = yaml.safe_load(stream) + proc = data_loaded['nnpdf31_process'] - proc = process_dictionary.get(name) - if not proc: - log.warn(f'Unknown process type for dataset {name}') - return 'UNKNOWN' return proc From eccaba118b118e99c766ec8e77e0aa643d934061 Mon Sep 17 00:00:00 2001 From: RoyStegeman Date: Fri, 19 Nov 2021 09:30:33 +0100 Subject: [PATCH 2/4] update process_lookup docstring --- .../theorycovariance/theorycovarianceutils.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py b/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py index 5981f2d64d..d1dfee8a97 100644 --- a/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py +++ b/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py @@ -94,16 +94,15 @@ def check_fit_dataset_order_matches_grouped( ) def process_lookup(name): - """Produces a dictionary with keys corresponding to dataset names - and values corresponding to process types. Process types are - regrouped into the five categories 'Drell-Yan', 'Top', Jets', - 'DIS NC' and 'DIS CC'. + """ + Returns the `nnpdf31_process` as stored in the the PLOTTING_*.yaml file + corresponding to the input `name`. """ cd = Loader().check_commondata(setname=name) data_plotting = open(cd.plotfiles[1]) - with open(data_plotting.name, 'r') as stream: - data_loaded = yaml.safe_load(stream) + with open(data_plotting.name, 'r') as f: + data_loaded = yaml.safe_load(f) proc = data_loaded['nnpdf31_process'] return proc From a434a3def406ea7f714a5b694709556e655a59bc Mon Sep 17 00:00:00 2001 From: RoyStegeman Date: Fri, 19 Nov 2021 23:01:00 +0100 Subject: [PATCH 3/4] replace process_lookup with get_info in config.py --- validphys2/src/validphys/config.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index f0fcd5bba7..d2ffeaf0c4 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -52,7 +52,6 @@ from validphys.paramfits.config import ParamfitsConfig -from validphys.theorycovariance.theorycovarianceutils import process_lookup from validphys.plotoptions import get_info import validphys.scalevariations @@ -763,9 +762,13 @@ def produce_matched_datasets_from_dataspecs(self, dataspecs): with self.set_context(ns=self._curr_ns.new_child(spec)): _, data_input = self.parse_from_(None, "data_input", write=False) - names = { - (process_lookup(dsin.name), dsin.name): dsin for dsin in data_input - } + + names = {} + for dsin in data_input: + cd = self.produce_commondata(dataset_input=dsin) + proc = get_info(cd).nnpdf31_process + ds = dsin.name + names[(proc, ds)] = dsin all_names.append(names) used_set = set.intersection(*(set(d) for d in all_names)) From ed8e00fef11bbcdc125de18952c2dd179127b738 Mon Sep 17 00:00:00 2001 From: RoyStegeman Date: Tue, 30 Nov 2021 08:56:55 +0100 Subject: [PATCH 4/4] use get_info to obtain nnpdf31_process in process_lookup() --- .../theorycovariance/theorycovarianceutils.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py b/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py index d1dfee8a97..a31d8bbea5 100644 --- a/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py +++ b/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py @@ -4,10 +4,11 @@ Low level utilities for theorycovariance module """ import logging -import yaml from reportengine.checks import make_argcheck, check from validphys.loader import Loader +from validphys.plotoptions import get_info + log = logging.getLogger(__name__) @@ -95,14 +96,8 @@ def check_fit_dataset_order_matches_grouped( def process_lookup(name): """ - Returns the `nnpdf31_process` as stored in the the PLOTTING_*.yaml file - corresponding to the input `name`. + Returns the `nnpdf31_process` of the corresponding dataset. """ - cd = Loader().check_commondata(setname=name) - data_plotting = open(cd.plotfiles[1]) - with open(data_plotting.name, 'r') as f: - data_loaded = yaml.safe_load(f) - proc = data_loaded['nnpdf31_process'] - + proc = get_info(cd).nnpdf31_process return proc