From a331e049ac9a5b8a1072c8e19c8e0a01b475cae4 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 13 Dec 2022 14:04:43 +0000 Subject: [PATCH 01/49] added method that returns a new instance with modified central values (Z.K.) --- validphys2/src/validphys/coredata.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 5e73e3777f..e3819d7b2c 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -288,6 +288,11 @@ def with_cuts(self, cuts): def central_values(self): return self.commondata_table["data"] + def with_central_value(self, cv): + tb = self.commondata_table.copy() + tb["data"] = cv + return dataclasses.replace(self, commondata_table=tb) + @property def stat_errors(self): return self.commondata_table["stat"] From beca8371534690aa643be4194dc79878c18491a3 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 13 Dec 2022 14:06:50 +0000 Subject: [PATCH 02/49] added method to load validphys.core.CommonData from validphys.core.CommonDataSpec --- validphys2/src/validphys/core.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 694135e3ae..ce6f652c60 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -316,6 +316,13 @@ def load(self)->CommonData: #TODO: Use better path handling in python 3.6 return CommonData.ReadFile(str(self.datafile), str(self.sysfile)) + def load_commondata_instance(self): + """ + load a validphys.core.CommonDataSpec to validphys.core.CommonData + """ + from validphys.commondataparser import load_commondata + return load_commondata(self) + @property def plot_kinlabels(self): return get_plot_kinlabels(self) From e192f809c56a9b337efc5832ad24b8719b7c98de Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 13 Dec 2022 14:09:17 +0000 Subject: [PATCH 03/49] added functions to write commondata and systype files and function to create systype dir --- validphys2/src/validphys/commondataparser.py | 82 ++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index ab2cbaf8c5..06f081e1e6 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -89,3 +89,85 @@ def parse_systypes(systypefile): systypetable.set_index("sys_index", inplace=True) return systypetable + +def write_commondata(commondata_list, filter_path): + """ + GENERAL DESCRIPTION: + + writes dataset data in filter folder using the commondata file format + + Parameters + ---------- + + commondata_list : list + commondata object (cuts should be already applied) + + path : str + path to filter folder + + + """ + + for commondata_instance in commondata_list: + # path + path = str(filter_path) + f'/{commondata_instance.setname}' + path_data = str(path) + f"/DATA_{commondata_instance.setname}.dat" + commondata_tab = commondata_instance.commondata_table + header = f"{commondata_instance.setname} {commondata_instance.nsys} {commondata_instance.ndata}\n" + + #==== write DATA =====# + with open(path_data, "w+") as f: + f.write(header) + commondata_tab.to_csv(f, sep="\t", header=None) + + + +def make_systype_dir(path): + """ + GENERAL DESCRIPTION: + + creates directory named systypes + + Parameters + ---------- + + path : str + path to systypes filter/dataset_name/systypes folder + + + """ + if path.exists(): + log.warning(f"systypes folder exists: {path} Overwriting contents") + else: + path.mkdir(exist_ok=True) + + +def write_systype(commondata_list, filter_path): + """ + GENERAL DESCRIPTION: + + writes systype data in filter folder using the systype file format + + Parameters + ---------- + + commondata_list : list + commondata object (cuts should be already applied) + + filter_path : str + path to filter folder + + + """ + + for commondata_instance in commondata_list: + path = filter_path / commondata_instance.setname / 'systypes' + make_systype_dir(path) + path_data = str(path) + f"/SYSTYPES_{commondata_instance.setname}_DEFAULT.dat" + systype_tab = commondata_instance.systype_table + header = f"{len(systype_tab.index)}\n" + + #==== write DATA =====# + with open(path_data, "w+") as f: + f.write(header) + systype_tab.to_csv(f, sep="\t", header=None) \ No newline at end of file From 7e465c5ff097035e70b83802f9a474132f8d352a Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 13 Dec 2022 14:11:50 +0000 Subject: [PATCH 04/49] added functions used to generate pseudo data for closure tests --- validphys2/src/validphys/pseudodata.py | 125 +++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 142072e0d6..ab9ee4f2d1 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -236,7 +236,132 @@ def indexed_make_replica(groups_index, make_replica): return pd.DataFrame(make_replica, index=groups_index, columns=["data"]) +def make_level0_data(data,fakepdf): + """ + GENERAL DESCRIPTION: + + Given a validphys.core.DataGroupSpec object, load commondata and + generate a new commondata instance with central values replaced + by fakepdf prediction + + Parameters + ---------- + + data : validphys.core.DataGroupSpec + + fakepdf: validphys.core.PDF + + Returns + ------- + list + list of validphys.coredata.CommonData instances corresponding to + all datasets within one experiment. The central value is replaced + by Level 0 fake data. + + Example + ------- + >>> from validphys.api import API + >>> API.make_level0_data(dataset_inputs = [{"dataset":"NMC"}], use_cuts="internal", theoryid=200,fakepdf = "NNPDF40_nnlo_as_01180") + + [CommonData(setname='NMC', ndata=204, commondataproc='DIS_NCE', nkin=3, nsys=16)] + """ + level0_commondata_instances_wc = [] + + #==== Load validphys.coredata.CommonData instance with cuts ====# + + for j, dataset in enumerate(data.datasets): + if dataset.cuts is None: + commondata_wc = dataset.commondata.load_commondata_instance() + else: + cuts = dataset.cuts.load() + commondata_wc = dataset.commondata.load_commondata_instance().with_cuts(cuts) + + + #== Generate a new CommonData instance with central value given by Level 0 data generated with fakepdf ==# + from validphys.covmats import dataset_t0_predictions + t0_prediction = dataset_t0_predictions(dataset=dataset, t0set=fakepdf) # N.B. cuts already applied to th. pred. + level0_commondata_instances_wc.append(commondata_wc.with_central_value(t0_prediction)) + + return level0_commondata_instances_wc + + +def make_level1_data(data,commondata_wc,level0_commondata_wc,filterseed): + """ + GENERAL DESCRIPTION: + + Given + + + Parameters + ---------- + + data : validphys.core.DataGroupSpec + + commondata_wc : list + list of validphys.coredata.CommonData instances corresponding to + all datasets within one experiment. Cuts already applied. + + level0_commondata_wc : list + list of validphys.coredata.CommonData instances corresponding to + all datasets within one experiment. The central value is replaced + by Level 0 fake data. Cuts already applied. + + filterseed: int + random seed used for the generation of Level 1 data + + + Returns + ------- + list + list of validphys.coredata.CommonData instances corresponding to + all datasets within one experiment. The central value is replaced + by Level 1 fake data. + + Example + ------- + + >>> from validphys.api import API + >>> from validphys.loader import Loader + >>> dataset='NMC' + >>> l=Loader() + >>> cuts = l.check_dataset(dataset,theoryid=200).cuts.load() + >>> cd = l.check_commondata(dataset).load_commondata_instance().with_cuts(cuts) + >>> l0_cd = API.make_level0_data(dataset_inputs = [{"dataset":dataset}],use_cuts="internal", theoryid=200,fakepdf = "NNPDF40_nnlo_as_01180") + >>> l1_cd = API.make_level1_data(level0_commondata_wc=l0_cd, commondata_wc=[cd], dataset_inputs = [{"dataset":dataset}], use_cuts="internal",filterseed=1, theoryid=200) + >>> l1_cd + [CommonData(setname='NMC', ndata=204, commondataproc='DIS_NCE', nkin=3, nsys=16)] + """ + + #=============== generate experimental covariance matrix ===============# + from validphys.covmats import dataset_inputs_covmat_from_systematics + dataset_input_list = list(data.dsinputs) + + covmat = dataset_inputs_covmat_from_systematics(commondata_wc, dataset_input_list, + use_weights_in_covmat=False, norm_threshold=None, + _list_of_central_values=None, _only_additive=False,) + + #================== generation of pseudo data ======================# + from validphys.pseudodata import indexed_make_replica, make_replica + from validphys.results import groups_index + #= generate pseudo data starting from theory predictions + level1_data = make_replica(level0_commondata_wc, filterseed, covmat, + sep_mult=False, genrep=True) + + group_index = groups_index([data]) # already set cuts + + indexed_level1_data = indexed_make_replica(group_index, level1_data) + + #===== create commondata instances with central values given by pseudo_data =====# + level1_commondata_dict = {c.setname:c for c in level0_commondata_wc} + level1_commondata_instances_wc = [] + + for xx, grp in indexed_level1_data.groupby('dataset'): + level1_commondata_instances_wc.append( + level1_commondata_dict[xx].with_central_value(grp.values)) + + return level1_commondata_instances_wc + _group_recreate_pseudodata = collect('indexed_make_replica', ('group_dataset_inputs_by_experiment',)) _recreate_fit_pseudodata = collect('_group_recreate_pseudodata', ('fitreplicas', 'fitenvironment')) _recreate_pdf_pseudodata = collect('_group_recreate_pseudodata', ('pdfreplicas', 'fitenvironment')) From e4c809745140d28fe6340be0be1d124b5946ceee Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 13 Dec 2022 14:15:21 +0000 Subject: [PATCH 05/49] added test for validphys.pseudodata.make_level0_data function --- .../src/validphys/tests/test_pseudodata.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index 413975af7c..6d75374a7e 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -8,6 +8,7 @@ recreation. """ import pandas as pd +import numpy as np import pytest from validphys.api import API @@ -80,3 +81,22 @@ def test_read_matches_recreate(): ) pd.testing.assert_index_equal(read.tr_idx, recreate.tr_idx, check_order=False) pd.testing.assert_index_equal(read.val_idx, recreate.val_idx, check_order=False) + + +def test_make_level0_data(): + from validphys.loader import Loader + from validphys.covmats import dataset_t0_predictions + dataset='NMC' + pdfname='NNPDF40_nnlo_as_01180' + theoryid=200 + + l=Loader() + datasetspec = l.check_dataset(dataset,theoryid=theoryid) + t0set = l.check_pdf(pdfname) + + l0_cd = API.make_level0_data(dataset_inputs = [{"dataset":dataset}], + use_cuts="internal", theoryid=theoryid, fakepdf = pdfname) + + l0_vals = l0_cd[0].central_values + + assert(np.abs(np.sum(dataset_t0_predictions(dataset = datasetspec, t0set = t0set) / l0_vals) / len(l0_vals) - 1) <= 1e-15) \ No newline at end of file From 61a9889f20ae52b18c5a90c0576b74ef1f63c82d Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 13 Dec 2022 14:21:57 +0000 Subject: [PATCH 06/49] generation of level1 data done by make_replica function. Random seed used for the generation of level1 noise: filterseed. rngalgo and seed are not needed anymore to run a closure test --- validphys2/src/validphys/filters.py | 99 +++++++++++++++++++++++++---- 1 file changed, 87 insertions(+), 12 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 913f883e8b..6dd5137005 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -94,7 +94,7 @@ def prepare_nnpdf_rng(filterseed:int, rngalgo:int, seed:int): RandomGenerator.GetRNG().SetSeed(filterseed) @check_positive('errorsize') -def filter_closure_data(filter_path, data, fakepdf, fakenoise, errorsize, prepare_nnpdf_rng): +def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, errorsize): """Filter closure data. In addition to cutting data points, the data is generated from an underlying ``fakepdf``, applying a shift to the data if ``fakenoise`` is ``True``, which emulates the experimental central values @@ -103,12 +103,13 @@ def filter_closure_data(filter_path, data, fakepdf, fakenoise, errorsize, prepar """ log.info('Filtering closure-test data.') return _filter_closure_data( - filter_path, data, fakepdf, fakenoise, errorsize) + filter_path, data, fakepdf, fakenoise, filterseed, errorsize) @check_positive("errorsize") def filter_closure_data_by_experiment( - filter_path, experiments_data, fakepdf, fakenoise, errorsize, prepare_nnpdf_rng, + filter_path, experiments_data, fakepdf, fakenoise, filterseed + , errorsize, ): """ Like :py:func:`filter_closure_data` except filters data by experiment. @@ -120,7 +121,7 @@ def filter_closure_data_by_experiment( """ return [ - _filter_closure_data(filter_path, exp, fakepdf, fakenoise, errorsize) + _filter_closure_data(filter_path, exp, fakepdf, fakenoise, filterseed, errorsize) for exp in experiments_data ] @@ -157,6 +158,8 @@ def _write_ds_cut_data(path, dataset): def _filter_real_data(filter_path, data): """Filter real experimental data.""" + + total_data_points = 0 total_cut_data_points = 0 for dataset in data.datasets: @@ -168,24 +171,96 @@ def _filter_real_data(filter_path, data): return total_data_points, total_cut_data_points -def _filter_closure_data(filter_path, data, fakepdf, fakenoise, errorsize): - """Filter closure test data.""" +def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, errorsize): + """ + GENERAL DESCRIPTION: + + This function is accessed within a closure test only, that is, the fakedata + namespace has to be True (If fakedata = False, the _filter_real_data function + will be used to write the commondata files). + + The function writes commondata and systypes files within the + name_closure_test/filter folder. + If fakenoise is True, Level 1 type data is written to the filter folder, otherwise + Level 0 data is written. + + Level 1 data is generated from the Level 0 data by adding noise sampled from + the experimental covariance matrix using the validphys.pseudodata.make_replica + function. + + Parameters + ---------- + + filter_path : str + path to filter folder + + data : validphys.core.DataGroupSpec + + fakepdf : validphys.core.PDF + + fakenoise : bool + if fakenoise perform level1 shift of central data values + + filterseed : int + random seed used for the generation of + random noise added to Level 0 data + + errorsize : float + (defined in runcard) + + + Returns + ------- + tuple + total data points and points passing the cuts + + """ + total_data_points = 0 total_cut_data_points = 0 fakeset = fakepdf.legacy_load() # Load data, don't cache result loaded_data = data.load.__wrapped__(data) - # generate level 1 shift if fakenoise - loaded_data.MakeClosure(fakeset, fakenoise) + + from validphys.pseudodata import make_level0_data + level0_commondata_instances_wc = make_level0_data(data,fakepdf) + commondata_instances_wc = [] # used to generate experimental covariance matrix + + #======= Load CommonData instances ========# for j, dataset in enumerate(data.datasets): + #==== Load validphys.coredata.CommonData instance with cuts ====# + commondata = dataset.commondata.load_commondata_instance() + cuts = dataset.cuts.load() + commondata_wc = commondata.with_cuts(cuts) + commondata_instances_wc.append(commondata_wc) + #==== print number of points passing cuts, make dataset directory and write FKMASK ===+# + log.info(f"{len(cuts)}/{len(commondata.central_values)} datapoints in {dataset.name} passed kinematic cuts.") + total_cut_data_points += len(cuts) + total_data_points += len(commondata.central_values) path = filter_path / dataset.name - nfull, ncut = _write_ds_cut_data(path, dataset) - total_data_points += nfull - total_cut_data_points += ncut + make_dataset_dir(path) + export_mask(path / f'FKMASK_{dataset.name}.dat', cuts) + # Rescale errors loaded_ds = loaded_data.GetSet(j) if errorsize != 1.0: loaded_ds.RescaleErrors(errorsize) - loaded_ds.Export(str(path)) + + from validphys.commondataparser import write_commondata, write_systype + if not fakenoise: + #======= Level 0 closure test =======# + log.info("Writing Level0 data") + write_commondata(level0_commondata_instances_wc,filter_path) + write_systype(level0_commondata_instances_wc,filter_path) + + else: + #======= Level 1 closure test =======# + from validphys.pseudodata import make_level1_data + level1_commondata_instances_wc = make_level1_data(data,commondata_instances_wc,level0_commondata_instances_wc,filterseed) + #====== write commondata and systype files ======# + log.info("Writing Level1 data") + write_commondata(level1_commondata_instances_wc,filter_path) + write_systype(level1_commondata_instances_wc,filter_path) + return total_data_points, total_cut_data_points From 7bf9018b204ed8980fe079bbe5b0dc4a352ec61a Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 13 Dec 2022 14:25:04 +0000 Subject: [PATCH 07/49] import logging module within commondataparser --- validphys2/src/validphys/commondataparser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 06f081e1e6..54ca7b56de 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -12,6 +12,9 @@ from validphys.core import peek_commondata_metadata from validphys.coredata import CommonData +import logging + +log = logging.getLogger(__name__) def load_commondata(spec): """ From d9c2499263db0311229d0159027bd1984efc967f Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 13 Dec 2022 15:17:32 +0000 Subject: [PATCH 08/49] test_make_level0 data updated --- .../src/validphys/tests/test_pseudodata.py | 55 ++++++++++++++++--- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index 6d75374a7e..a797d4cdb6 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -84,19 +84,56 @@ def test_read_matches_recreate(): def test_make_level0_data(): - from validphys.loader import Loader - from validphys.covmats import dataset_t0_predictions + dataset='NMC' pdfname='NNPDF40_nnlo_as_01180' theoryid=200 - l=Loader() - datasetspec = l.check_dataset(dataset,theoryid=theoryid) - t0set = l.check_pdf(pdfname) - l0_cd = API.make_level0_data(dataset_inputs = [{"dataset":dataset}], use_cuts="internal", theoryid=theoryid, fakepdf = pdfname) - + l0_vals = l0_cd[0].central_values - - assert(np.abs(np.sum(dataset_t0_predictions(dataset = datasetspec, t0set = t0set) / l0_vals) / len(l0_vals) - 1) <= 1e-15) \ No newline at end of file + t0_pred = np.array([0.3545763 , 0.36826616, 0.36873821, 0.37127424, 0.37290897, + 0.37316775, 0.37055304, 0.3722504 , 0.37302464, 0.36924048, + 0.37034822, 0.37095343, 0.36640187, 0.36637822, 0.36574894, + 0.35825978, 0.35653348, 0.35473318, 0.35365563, 0.34134523, + 0.33823255, 0.33386657, 0.33338396, 0.31109264, 0.30324802, + 0.30229506, 0.25416182, 0.2523507 , 0.37196013, 0.37330645, + 0.37654302, 0.37939536, 0.38006708, 0.36966435, 0.37476681, + 0.37801795, 0.38023252, 0.38028246, 0.36852294, 0.37221565, + 0.37471784, 0.37672207, 0.37771963, 0.37009838, 0.37165855, + 0.37282795, 0.37345963, 0.37297594, 0.36651327, 0.36682826, + 0.36658552, 0.36602139, 0.36449717, 0.35922251, 0.35652948, + 0.35558624, 0.3538604 , 0.35223064, 0.34976281, 0.33977084, + 0.33597368, 0.33219177, 0.33006355, 0.32609279, 0.30810523, + 0.29984762, 0.2973673 , 0.29337914, 0.28835707, 0.26067163, + 0.2463048 , 0.23991642, 0.2346202 , 0.15752154, 0.14484311, + 0.37357085, 0.37989003, 0.38811387, 0.39269338, 0.39623239, + 0.38779016, 0.39516254, 0.39979762, 0.40095537, 0.37504614, + 0.38304588, 0.38946276, 0.3957882 , 0.39952722, 0.39975423, + 0.37745691, 0.38209647, 0.3865788 , 0.39086245, 0.39332198, + 0.39323533, 0.37680676, 0.37996398, 0.38281809, 0.38475522, + 0.38561046, 0.38429377, 0.37467448, 0.37630849, 0.37726671, + 0.37769016, 0.37684543, 0.36781676, 0.36746375, 0.36715733, + 0.36632717, 0.36472775, 0.36155728, 0.35540615, 0.35320503, + 0.35029904, 0.34840341, 0.34560717, 0.34203748, 0.33253254, + 0.32686904, 0.32281255, 0.31914235, 0.31415925, 0.29386229, + 0.29031662, 0.28439739, 0.27832586, 0.27371331, 0.24110857, + 0.23133557, 0.22451033, 0.21709159, 0.14911905, 0.13010754, + 0.12310468, 0.39012269, 0.39390918, 0.39924528, 0.40293211, + 0.4056547 , 0.39786495, 0.40730062, 0.41398714, 0.41582426, + 0.41570175, 0.39194794, 0.40105368, 0.40975066, 0.41707675, + 0.41820279, 0.41567735, 0.39337477, 0.39929797, 0.40673509, + 0.4117446 , 0.41284718, 0.40930253, 0.39081395, 0.3929952 , + 0.39747039, 0.40079244, 0.40128669, 0.39749262, 0.38580429, + 0.38676072, 0.38910638, 0.39044752, 0.38910755, 0.37872247, + 0.37961559, 0.38016427, 0.3793288 , 0.37651091, 0.36785649, + 0.36804973, 0.36655191, 0.36481613, 0.3620474 , 0.34931946, + 0.3463118 , 0.34349012, 0.34000501, 0.33611505, 0.3238292 , + 0.31964367, 0.31530522, 0.31121644, 0.30637345, 0.29586067, + 0.28526531, 0.27994941, 0.27426498, 0.26944325, 0.26316807, + 0.24346269, 0.22612249, 0.21976363, 0.21375908, 0.20546207, + 0.15822622, 0.13056665, 0.12244491, 0.11192735]) + + + assert(np.abs(np.sum(t0_pred / l0_vals) / len(l0_vals) - 1) <= 1e-9) \ No newline at end of file From c2af92b9226d48766cad57849a12565585c8507a Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 14 Dec 2022 11:31:30 +0000 Subject: [PATCH 09/49] theory 162 added --- validphys2/src/validphys/tests/test_pseudodata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index 6d75374a7e..7b54965d9e 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -88,7 +88,7 @@ def test_make_level0_data(): from validphys.covmats import dataset_t0_predictions dataset='NMC' pdfname='NNPDF40_nnlo_as_01180' - theoryid=200 + theoryid=162 l=Loader() datasetspec = l.check_dataset(dataset,theoryid=theoryid) @@ -99,4 +99,4 @@ def test_make_level0_data(): l0_vals = l0_cd[0].central_values - assert(np.abs(np.sum(dataset_t0_predictions(dataset = datasetspec, t0set = t0set) / l0_vals) / len(l0_vals) - 1) <= 1e-15) \ No newline at end of file + assert(np.abs(np.sum(dataset_t0_predictions(dataset = datasetspec, t0set = t0set) / l0_vals) / len(l0_vals) - 1) <= 1e-12) \ No newline at end of file From 0a7442d4e60592300547979cd1e6a26eaf7fa639 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 14 Dec 2022 11:40:30 +0000 Subject: [PATCH 10/49] make_level0_data test done with theoryid 162 --- .../src/validphys/tests/test_pseudodata.py | 57 +++---------------- 1 file changed, 8 insertions(+), 49 deletions(-) diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index 7af68ee945..7b4ecdd027 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -84,60 +84,19 @@ def test_read_matches_recreate(): def test_make_level0_data(): - + from validphys.loader import Loader + from validphys.covmats import dataset_t0_predictions + dataset='NMC' pdfname='NNPDF40_nnlo_as_01180' theoryid=162 - + l = Loader() + datasetspec = l.check_dataset(dataset,theoryid=theoryid) + t0set = l.check_pdf(pdfname) + l0_cd = API.make_level0_data(dataset_inputs = [{"dataset":dataset}], use_cuts="internal", theoryid=theoryid, fakepdf = pdfname) l0_vals = l0_cd[0].central_values - t0_pred = np.array([0.3545763 , 0.36826616, 0.36873821, 0.37127424, 0.37290897, - 0.37316775, 0.37055304, 0.3722504 , 0.37302464, 0.36924048, - 0.37034822, 0.37095343, 0.36640187, 0.36637822, 0.36574894, - 0.35825978, 0.35653348, 0.35473318, 0.35365563, 0.34134523, - 0.33823255, 0.33386657, 0.33338396, 0.31109264, 0.30324802, - 0.30229506, 0.25416182, 0.2523507 , 0.37196013, 0.37330645, - 0.37654302, 0.37939536, 0.38006708, 0.36966435, 0.37476681, - 0.37801795, 0.38023252, 0.38028246, 0.36852294, 0.37221565, - 0.37471784, 0.37672207, 0.37771963, 0.37009838, 0.37165855, - 0.37282795, 0.37345963, 0.37297594, 0.36651327, 0.36682826, - 0.36658552, 0.36602139, 0.36449717, 0.35922251, 0.35652948, - 0.35558624, 0.3538604 , 0.35223064, 0.34976281, 0.33977084, - 0.33597368, 0.33219177, 0.33006355, 0.32609279, 0.30810523, - 0.29984762, 0.2973673 , 0.29337914, 0.28835707, 0.26067163, - 0.2463048 , 0.23991642, 0.2346202 , 0.15752154, 0.14484311, - 0.37357085, 0.37989003, 0.38811387, 0.39269338, 0.39623239, - 0.38779016, 0.39516254, 0.39979762, 0.40095537, 0.37504614, - 0.38304588, 0.38946276, 0.3957882 , 0.39952722, 0.39975423, - 0.37745691, 0.38209647, 0.3865788 , 0.39086245, 0.39332198, - 0.39323533, 0.37680676, 0.37996398, 0.38281809, 0.38475522, - 0.38561046, 0.38429377, 0.37467448, 0.37630849, 0.37726671, - 0.37769016, 0.37684543, 0.36781676, 0.36746375, 0.36715733, - 0.36632717, 0.36472775, 0.36155728, 0.35540615, 0.35320503, - 0.35029904, 0.34840341, 0.34560717, 0.34203748, 0.33253254, - 0.32686904, 0.32281255, 0.31914235, 0.31415925, 0.29386229, - 0.29031662, 0.28439739, 0.27832586, 0.27371331, 0.24110857, - 0.23133557, 0.22451033, 0.21709159, 0.14911905, 0.13010754, - 0.12310468, 0.39012269, 0.39390918, 0.39924528, 0.40293211, - 0.4056547 , 0.39786495, 0.40730062, 0.41398714, 0.41582426, - 0.41570175, 0.39194794, 0.40105368, 0.40975066, 0.41707675, - 0.41820279, 0.41567735, 0.39337477, 0.39929797, 0.40673509, - 0.4117446 , 0.41284718, 0.40930253, 0.39081395, 0.3929952 , - 0.39747039, 0.40079244, 0.40128669, 0.39749262, 0.38580429, - 0.38676072, 0.38910638, 0.39044752, 0.38910755, 0.37872247, - 0.37961559, 0.38016427, 0.3793288 , 0.37651091, 0.36785649, - 0.36804973, 0.36655191, 0.36481613, 0.3620474 , 0.34931946, - 0.3463118 , 0.34349012, 0.34000501, 0.33611505, 0.3238292 , - 0.31964367, 0.31530522, 0.31121644, 0.30637345, 0.29586067, - 0.28526531, 0.27994941, 0.27426498, 0.26944325, 0.26316807, - 0.24346269, 0.22612249, 0.21976363, 0.21375908, 0.20546207, - 0.15822622, 0.13056665, 0.12244491, 0.11192735]) - - -<<<<<<< HEAD - assert(np.abs(np.sum(t0_pred / l0_vals) / len(l0_vals) - 1) <= 1e-9) -======= + assert(np.abs(np.sum(dataset_t0_predictions(dataset = datasetspec, t0set = t0set) / l0_vals) / len(l0_vals) - 1) <= 1e-12) ->>>>>>> tmp From c1e5a1663a0326848c334952436471b4cca5c36b Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 14 Dec 2022 11:59:59 +0000 Subject: [PATCH 11/49] added description to make_level1_data --- validphys2/src/validphys/pseudodata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index ab9ee4f2d1..ec3c60b4fb 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -290,7 +290,8 @@ def make_level1_data(data,commondata_wc,level0_commondata_wc,filterseed): """ GENERAL DESCRIPTION: - Given + Given a list of level0 commondata instances, return the same list + with central values replaced by level1 data Parameters @@ -342,7 +343,6 @@ def make_level1_data(data,commondata_wc,level0_commondata_wc,filterseed): _list_of_central_values=None, _only_additive=False,) #================== generation of pseudo data ======================# - from validphys.pseudodata import indexed_make_replica, make_replica from validphys.results import groups_index #= generate pseudo data starting from theory predictions level1_data = make_replica(level0_commondata_wc, filterseed, covmat, From b2e3a2cd97eca112799c26f335dc625d8d111ee4 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 14 Dec 2022 15:44:29 +0000 Subject: [PATCH 12/49] added method to load list of validphys.coredata.CommonData instances with cuts already applied --- validphys2/src/validphys/core.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index ce6f652c60..7d8a049ddc 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -663,6 +663,13 @@ def load(self): def load_commondata(self): return [d.load_commondata() for d in self.datasets] + def load_commondata_instances_wc(self): + """ + Given Experiment load list of validphys.coredata.CommonData + objects with cuts already applied + """ + return [dataset.commondata.load_commondata_instance().with_cuts(dataset.cuts.load()) for dataset in self.datasets] + @property def thspec(self): #TODO: Is this good enough? Should we explicitly pass the theory From ab0ca4ce1180b803fade6945897e7a997c499fce Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 14 Dec 2022 16:08:13 +0000 Subject: [PATCH 13/49] list of commondata loaded with new DataGroupSpec method --- validphys2/src/validphys/filters.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 6dd5137005..72b5d539b0 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -224,22 +224,14 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro from validphys.pseudodata import make_level0_data level0_commondata_instances_wc = make_level0_data(data,fakepdf) - commondata_instances_wc = [] # used to generate experimental covariance matrix + commondata_instances_wc = data.load_commondata_instances_wc() # used to generate experimental covariance matrix - #======= Load CommonData instances ========# for j, dataset in enumerate(data.datasets): - #==== Load validphys.coredata.CommonData instance with cuts ====# - commondata = dataset.commondata.load_commondata_instance() - cuts = dataset.cuts.load() - commondata_wc = commondata.with_cuts(cuts) - commondata_instances_wc.append(commondata_wc) - #==== print number of points passing cuts, make dataset directory and write FKMASK ===+# - log.info(f"{len(cuts)}/{len(commondata.central_values)} datapoints in {dataset.name} passed kinematic cuts.") - total_cut_data_points += len(cuts) - total_data_points += len(commondata.central_values) + #== print number of points passing cuts, make dataset directory and write FKMASK ==# path = filter_path / dataset.name - make_dataset_dir(path) - export_mask(path / f'FKMASK_{dataset.name}.dat', cuts) + nfull, ncut = _write_ds_cut_data(path, dataset) + total_data_points += nfull + total_cut_data_points += ncut # Rescale errors loaded_ds = loaded_data.GetSet(j) if errorsize != 1.0: From c4dc48aa45c8ddad447ecf70ec80086cdc1346af Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 14 Dec 2022 16:21:48 +0000 Subject: [PATCH 14/49] method name changed --- validphys2/src/validphys/core.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 7d8a049ddc..161a3b0bb9 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -663,13 +663,19 @@ def load(self): def load_commondata(self): return [d.load_commondata() for d in self.datasets] - def load_commondata_instances_wc(self): + def load_commondata_instance(self): """ Given Experiment load list of validphys.coredata.CommonData objects with cuts already applied """ - return [dataset.commondata.load_commondata_instance().with_cuts(dataset.cuts.load()) for dataset in self.datasets] - + commodata_list = [] + for dataset in self.datasets: + if dataset.cuts is None: + commodata_list.append(dataset.commondata.load_commondata_instance()) + else: + commodata_list.append(dataset.commondata.load_commondata_instance().with_cuts(dataset.cuts.load())) + return commodata_list + @property def thspec(self): #TODO: Is this good enough? Should we explicitly pass the theory From cd75cc7270257e9d931a21bbd24f0b5c9115df84 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 14 Dec 2022 16:23:47 +0000 Subject: [PATCH 15/49] name of DataGroupSpec method changed --- validphys2/src/validphys/filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 72b5d539b0..7d28bd16eb 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -224,7 +224,7 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro from validphys.pseudodata import make_level0_data level0_commondata_instances_wc = make_level0_data(data,fakepdf) - commondata_instances_wc = data.load_commondata_instances_wc() # used to generate experimental covariance matrix + commondata_instances_wc = data.load_commondata_instance() # used to generate experimental covariance matrix for j, dataset in enumerate(data.datasets): #== print number of points passing cuts, make dataset directory and write FKMASK ==# From 33d2ce9d9b787b96a0a9f0c4eb862cdc2d94f504 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 15 Dec 2022 15:31:27 +0000 Subject: [PATCH 16/49] reset_index of commondata tables --- validphys2/src/validphys/commondataparser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 54ca7b56de..2a02a2b470 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -115,7 +115,9 @@ def write_commondata(commondata_list, filter_path): # path path = str(filter_path) + f'/{commondata_instance.setname}' path_data = str(path) + f"/DATA_{commondata_instance.setname}.dat" - commondata_tab = commondata_instance.commondata_table + commondata_tab = commondata_instance.commondata_table.reset_index(drop = True) # do not use maskcut index + commondata_tab.index += 1 # index starting from 1 + header = f"{commondata_instance.setname} {commondata_instance.nsys} {commondata_instance.ndata}\n" #==== write DATA =====# From 9a859b502833c8cf3726c6e7a8c3b85d948acfa1 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 15 Dec 2022 15:43:24 +0000 Subject: [PATCH 17/49] deleted test_filter_rebuild_closure_data.csv --- .../test_filter_rebuild_closure_data.csv | 236 ------------------ 1 file changed, 236 deletions(-) delete mode 100644 validphys2/src/validphys/tests/regressions/test_filter_rebuild_closure_data.csv diff --git a/validphys2/src/validphys/tests/regressions/test_filter_rebuild_closure_data.csv b/validphys2/src/validphys/tests/regressions/test_filter_rebuild_closure_data.csv deleted file mode 100644 index f7ce521f40..0000000000 --- a/validphys2/src/validphys/tests/regressions/test_filter_rebuild_closure_data.csv +++ /dev/null @@ -1,236 +0,0 @@ -group dataset id data_central -NMC NMC 16 0.4007353489505 -NMC NMC 21 0.3596156369461 -NMC NMC 22 0.3696772738631 -NMC NMC 27 0.3662960538326 -NMC NMC 28 0.3671016068397 -NMC NMC 29 0.3911462081599 -NMC NMC 34 0.3873596614146 -NMC NMC 35 0.3685951249355 -NMC NMC 36 0.3652752448135 -NMC NMC 40 0.3782916051482 -NMC NMC 41 0.3910438628795 -NMC NMC 42 0.3733418699219 -NMC NMC 46 0.3810537704863 -NMC NMC 47 0.3708532044501 -NMC NMC 48 0.3593684720153 -NMC NMC 51 0.3693367172184 -NMC NMC 52 0.3737791523646 -NMC NMC 53 0.3386288622738 -NMC NMC 54 0.3500948571991 -NMC NMC 57 0.3418514086764 -NMC NMC 58 0.3489404958264 -NMC NMC 59 0.3488612312165 -NMC NMC 60 0.3505041034015 -NMC NMC 63 0.3333040875332 -NMC NMC 64 0.2944010260022 -NMC NMC 65 0.3099040341106 -NMC NMC 68 0.2811103711856 -NMC NMC 69 0.2595731013581 -NMC NMC 83 0.332503113838 -NMC NMC 84 0.3062223521694 -NMC NMC 87 0.3477451853918 -NMC NMC 88 0.352597827366 -NMC NMC 89 0.3689837642521 -NMC NMC 91 0.3410075219517 -NMC NMC 92 0.3380026268081 -NMC NMC 93 0.3630165854001 -NMC NMC 94 0.3692037594232 -NMC NMC 95 0.3379900856468 -NMC NMC 97 0.3498975977998 -NMC NMC 98 0.3455821206785 -NMC NMC 99 0.3561806744993 -NMC NMC 100 0.3732490901176 -NMC NMC 101 0.3500032818012 -NMC NMC 104 0.3619815034828 -NMC NMC 105 0.3577747301693 -NMC NMC 106 0.3448546937843 -NMC NMC 107 0.3970272458214 -NMC NMC 108 0.3194277040841 -NMC NMC 110 0.3205419458354 -NMC NMC 111 0.3367281886952 -NMC NMC 112 0.3386950891887 -NMC NMC 113 0.3225662456658 -NMC NMC 114 0.29975210255 -NMC NMC 115 0.3258513025439 -NMC NMC 116 0.3369058963301 -NMC NMC 117 0.3302714626432 -NMC NMC 118 0.3149870988695 -NMC NMC 119 0.2993100089626 -NMC NMC 120 0.3368228206002 -NMC NMC 121 0.3102225912073 -NMC NMC 122 0.3180821969045 -NMC NMC 123 0.2933430928775 -NMC NMC 124 0.3098840948832 -NMC NMC 125 0.3231255065332 -NMC NMC 126 0.2902405904389 -NMC NMC 127 0.2669059748255 -NMC NMC 128 0.2689476065289 -NMC NMC 129 0.2956116242333 -NMC NMC 130 0.288082548032 -NMC NMC 131 0.2226505072116 -NMC NMC 132 0.2198391302795 -NMC NMC 133 0.204048963916 -NMC NMC 134 0.2127696389248 -NMC NMC 136 0.1375618648213 -NMC NMC 137 0.1339894685257 -NMC NMC 147 0.3489969251822 -NMC NMC 148 0.3552399410999 -NMC NMC 152 0.3483599212619 -NMC NMC 153 0.3560033903034 -NMC NMC 154 0.3796442730621 -NMC NMC 157 0.3429642181623 -NMC NMC 158 0.3706142444086 -NMC NMC 159 0.355255409241 -NMC NMC 160 0.352842258266 -NMC NMC 161 0.3539020551091 -NMC NMC 162 0.3521469993182 -NMC NMC 163 0.3632537732513 -NMC NMC 164 0.3725308840612 -NMC NMC 165 0.3576604586946 -NMC NMC 166 0.3816526263355 -NMC NMC 167 0.3688809286074 -NMC NMC 168 0.3385882966462 -NMC NMC 169 0.3387628659638 -NMC NMC 170 0.3760634504695 -NMC NMC 171 0.3785323104752 -NMC NMC 172 0.3632421316272 -NMC NMC 173 0.3623753029918 -NMC NMC 174 0.3362183838135 -NMC NMC 175 0.3432266150754 -NMC NMC 176 0.3640134644566 -NMC NMC 177 0.3576634295374 -NMC NMC 178 0.3597463154209 -NMC NMC 179 0.3603372275518 -NMC NMC 180 0.3177940693752 -NMC NMC 181 0.3620568994759 -NMC NMC 182 0.3701314089822 -NMC NMC 183 0.2978652392955 -NMC NMC 184 0.3541449769575 -NMC NMC 185 0.3448722906877 -NMC NMC 186 0.3412588395999 -NMC NMC 187 0.3570684620173 -NMC NMC 188 0.3286891216355 -NMC NMC 189 0.3477526408617 -NMC NMC 190 0.335703925785 -NMC NMC 191 0.3444529842929 -NMC NMC 192 0.3403578691808 -NMC NMC 193 0.3347353981548 -NMC NMC 194 0.3203688496486 -NMC NMC 195 0.3159487761597 -NMC NMC 196 0.3443688712277 -NMC NMC 197 0.3124097261611 -NMC NMC 198 0.3154190766316 -NMC NMC 199 0.2942857811848 -NMC NMC 200 0.2799806891526 -NMC NMC 201 0.2775840928341 -NMC NMC 202 0.2532591445827 -NMC NMC 203 0.2708438289781 -NMC NMC 204 0.254383734349 -NMC NMC 205 0.2584578349333 -NMC NMC 206 0.2288369641399 -NMC NMC 207 0.2168810713002 -NMC NMC 208 0.2203718114199 -NMC NMC 209 0.1847450793115 -NMC NMC 210 0.1459140382753 -NMC NMC 211 0.1143166792701 -NMC NMC 212 0.1267472998114 -NMC NMC 221 0.377311655587 -NMC NMC 222 0.3786167921399 -NMC NMC 225 0.3880648558735 -NMC NMC 226 0.3835579098506 -NMC NMC 227 0.3700815811285 -NMC NMC 229 0.3784519070427 -NMC NMC 230 0.3982216265502 -NMC NMC 231 0.4017000200127 -NMC NMC 232 0.3922919247205 -NMC NMC 233 0.4124193564822 -NMC NMC 234 0.3590905662548 -NMC NMC 235 0.3967950366239 -NMC NMC 236 0.3971726043601 -NMC NMC 237 0.4020683891529 -NMC NMC 238 0.3814902989618 -NMC NMC 239 0.386741867313 -NMC NMC 240 0.369385519281 -NMC NMC 241 0.3814990929537 -NMC NMC 242 0.381564927955 -NMC NMC 243 0.3948804610387 -NMC NMC 244 0.3839173773074 -NMC NMC 245 0.3802441689135 -NMC NMC 246 0.3752499091384 -NMC NMC 247 0.3974188959257 -NMC NMC 248 0.3836237312771 -NMC NMC 249 0.3911270447289 -NMC NMC 250 0.3877800272259 -NMC NMC 251 0.3629868559682 -NMC NMC 252 0.3754088188714 -NMC NMC 253 0.380648396326 -NMC NMC 254 0.379643330629 -NMC NMC 255 0.3717431381859 -NMC NMC 256 0.3850277979529 -NMC NMC 257 0.357666057149 -NMC NMC 258 0.3843712169454 -NMC NMC 259 0.3677026936208 -NMC NMC 260 0.3655736280598 -NMC NMC 261 0.3441364575008 -NMC NMC 262 0.3972401996537 -NMC NMC 263 0.3645143376461 -NMC NMC 264 0.3510977851517 -NMC NMC 265 0.3539138992202 -NMC NMC 266 0.3493708856316 -NMC NMC 267 0.3320087427879 -NMC NMC 268 0.3284299620477 -NMC NMC 269 0.3321367638445 -NMC NMC 270 0.3209261313501 -NMC NMC 271 0.3152880392914 -NMC NMC 272 0.3067196941747 -NMC NMC 273 0.3051998380667 -NMC NMC 274 0.3111118466168 -NMC NMC 275 0.2900155628937 -NMC NMC 276 0.2883840542816 -NMC NMC 277 0.2756559342559 -NMC NMC 278 0.2855005116247 -NMC NMC 279 0.2662589115441 -NMC NMC 280 0.2559570780724 -NMC NMC 281 0.2504174862552 -NMC NMC 282 0.2307957738123 -NMC NMC 283 0.241680108918 -NMC NMC 284 0.2146215617884 -NMC NMC 285 0.211746450436 -NMC NMC 286 0.2128987035927 -NMC NMC 287 0.1935284098196 -NMC NMC 288 0.131946290253 -NMC NMC 289 0.1180789438391 -NMC NMC 290 0.1049547912561 -NMC NMC 291 0.1106025920512 -ATLAS ATLASTTBARTOT 0 162.5686864865 -ATLAS ATLASTTBARTOT 1 236.1859490199 -ATLAS ATLASTTBARTOT 2 830.4482576268 -CMS CMSZDIFF12 1 2914.124612961 -CMS CMSZDIFF12 2 1080.410756147 -CMS CMSZDIFF12 3 466.257652438 -CMS CMSZDIFF12 4 227.8725970988 -CMS CMSZDIFF12 5 112.9922431625 -CMS CMSZDIFF12 6 60.11148107363 -CMS CMSZDIFF12 7 31.01641512591 -CMS CMSZDIFF12 11 2841.234006315 -CMS CMSZDIFF12 12 1045.712255313 -CMS CMSZDIFF12 13 455.7116015988 -CMS CMSZDIFF12 14 208.6895742617 -CMS CMSZDIFF12 15 111.8661789107 -CMS CMSZDIFF12 16 61.01526352725 -CMS CMSZDIFF12 17 30.58163323228 -CMS CMSZDIFF12 21 2534.025669336 -CMS CMSZDIFF12 22 911.9673411774 -CMS CMSZDIFF12 23 415.3681972017 -CMS CMSZDIFF12 24 198.7630470755 -CMS CMSZDIFF12 25 100.2899391664 -CMS CMSZDIFF12 26 56.45171623313 -CMS CMSZDIFF12 27 28.6103128109 -CMS CMSZDIFF12 31 1990.217512849 -CMS CMSZDIFF12 32 728.4681533061 -CMS CMSZDIFF12 33 332.4189366359 -CMS CMSZDIFF12 34 165.4302576598 -CMS CMSZDIFF12 35 88.19728756934 -CMS CMSZDIFF12 36 47.85196748986 -CMS CMSZDIFF12 37 24.06201644101 From eb438f978e4bc266b5af5f0544eaa2484ff0c59d Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 15 Dec 2022 16:08:11 +0000 Subject: [PATCH 18/49] regressions/test_filter_rebuild_closure_data.csv file updated --- .../test_filter_rebuild_closure_data.csv | 236 ++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 validphys2/src/validphys/tests/regressions/test_filter_rebuild_closure_data.csv diff --git a/validphys2/src/validphys/tests/regressions/test_filter_rebuild_closure_data.csv b/validphys2/src/validphys/tests/regressions/test_filter_rebuild_closure_data.csv new file mode 100644 index 0000000000..a46ad353a5 --- /dev/null +++ b/validphys2/src/validphys/tests/regressions/test_filter_rebuild_closure_data.csv @@ -0,0 +1,236 @@ +group dataset id data_central +NMC NMC 16 0.28019164359912174 +NMC NMC 21 0.35499349354805787 +NMC NMC 22 0.36699960383943325 +NMC NMC 27 0.36514731639520354 +NMC NMC 28 0.3796019817168817 +NMC NMC 29 0.3557283684527493 +NMC NMC 34 0.358213498117281 +NMC NMC 35 0.38344229108403 +NMC NMC 36 0.3906176180402464 +NMC NMC 40 0.3565478540570389 +NMC NMC 41 0.35449556242092534 +NMC NMC 42 0.3477428857498088 +NMC NMC 46 0.34058587588303324 +NMC NMC 47 0.37528607055424945 +NMC NMC 48 0.35810702495585167 +NMC NMC 51 0.35948420833368 +NMC NMC 52 0.3252481995431795 +NMC NMC 53 0.34839699344491915 +NMC NMC 54 0.3305599715630647 +NMC NMC 57 0.33854839805110815 +NMC NMC 58 0.3475248192617279 +NMC NMC 59 0.3224033407693148 +NMC NMC 60 0.3557230755468301 +NMC NMC 63 0.3076864562936749 +NMC NMC 64 0.3249652996062988 +NMC NMC 65 0.3225373982130234 +NMC NMC 68 0.2698593749956943 +NMC NMC 69 0.28697589129899725 +NMC NMC 83 0.3314942368553224 +NMC NMC 84 0.3497035726336489 +NMC NMC 87 0.3561247503065496 +NMC NMC 88 0.36048612554048426 +NMC NMC 89 0.342867775009473 +NMC NMC 91 0.3462149531948183 +NMC NMC 92 0.33322645114254057 +NMC NMC 93 0.3496968402988778 +NMC NMC 94 0.37224312344590277 +NMC NMC 95 0.3501229381653187 +NMC NMC 97 0.34862218791470695 +NMC NMC 98 0.3513864997869911 +NMC NMC 99 0.3463189635741315 +NMC NMC 100 0.34999374714067055 +NMC NMC 101 0.3219866280166407 +NMC NMC 104 0.3136568526784361 +NMC NMC 105 0.3238255813447048 +NMC NMC 106 0.34905991139783304 +NMC NMC 107 0.34365171884104645 +NMC NMC 108 0.2700951102701827 +NMC NMC 110 0.3516749264080651 +NMC NMC 111 0.3420426414588959 +NMC NMC 112 0.3368571659898279 +NMC NMC 113 0.32766489680568944 +NMC NMC 114 0.3309229352262125 +NMC NMC 115 0.3087559452541232 +NMC NMC 116 0.30434091887231657 +NMC NMC 117 0.31985278442485376 +NMC NMC 118 0.3081369779237656 +NMC NMC 119 0.32428991893790204 +NMC NMC 120 0.29177949202546344 +NMC NMC 121 0.2986166917278112 +NMC NMC 122 0.28530172048925173 +NMC NMC 123 0.30960790490633666 +NMC NMC 124 0.2838914377920225 +NMC NMC 125 0.327108232453106 +NMC NMC 126 0.30800213354866085 +NMC NMC 127 0.27505741362840286 +NMC NMC 128 0.3057396895330445 +NMC NMC 129 0.2948720021522578 +NMC NMC 130 0.28654594665723143 +NMC NMC 131 0.24998019481392156 +NMC NMC 132 0.23305710852110711 +NMC NMC 133 0.2285884324760879 +NMC NMC 134 0.21022049472555798 +NMC NMC 136 0.14289441509543208 +NMC NMC 137 0.1467309459465361 +NMC NMC 147 0.3783814687867924 +NMC NMC 148 0.3660143252628356 +NMC NMC 152 0.37316750851562697 +NMC NMC 153 0.38611716465278784 +NMC NMC 154 0.3698410284359639 +NMC NMC 157 0.36961892116181694 +NMC NMC 158 0.40918582365959344 +NMC NMC 159 0.3772237079249307 +NMC NMC 160 0.36695342000619174 +NMC NMC 161 0.37837880076113106 +NMC NMC 162 0.3574940348440908 +NMC NMC 163 0.38209182150349047 +NMC NMC 164 0.38555197565482335 +NMC NMC 165 0.38435365356895423 +NMC NMC 166 0.39523350385236933 +NMC NMC 167 0.352962329300064 +NMC NMC 168 0.33498853883029145 +NMC NMC 169 0.3799963472642203 +NMC NMC 170 0.3626711192860542 +NMC NMC 171 0.3726539470995713 +NMC NMC 172 0.38205670828934024 +NMC NMC 173 0.3624890112249862 +NMC NMC 174 0.351766590223616 +NMC NMC 175 0.38120620112890435 +NMC NMC 176 0.376704707091722 +NMC NMC 177 0.35767966843982835 +NMC NMC 178 0.3739917821379859 +NMC NMC 179 0.3779713226850912 +NMC NMC 180 0.3540178478852946 +NMC NMC 181 0.3563337922341492 +NMC NMC 182 0.3495895799725437 +NMC NMC 183 0.40513857686448185 +NMC NMC 184 0.3737246199455833 +NMC NMC 185 0.34224373934105096 +NMC NMC 186 0.3572968130894208 +NMC NMC 187 0.3513259873499916 +NMC NMC 188 0.36191625620248563 +NMC NMC 189 0.3757225472452201 +NMC NMC 190 0.34496599482267015 +NMC NMC 191 0.3671454519682669 +NMC NMC 192 0.3355832294500817 +NMC NMC 193 0.32611756260750746 +NMC NMC 194 0.3264723091842311 +NMC NMC 195 0.31358299893261243 +NMC NMC 196 0.3063679612675748 +NMC NMC 197 0.3243783766742867 +NMC NMC 198 0.2951522224003636 +NMC NMC 199 0.30478606823022336 +NMC NMC 200 0.3162026195490957 +NMC NMC 201 0.24410226609924124 +NMC NMC 202 0.29437714007416727 +NMC NMC 203 0.29086153651126373 +NMC NMC 204 0.2618537028601226 +NMC NMC 205 0.24746460684350655 +NMC NMC 206 0.22851397088173026 +NMC NMC 207 0.23143418017058778 +NMC NMC 208 0.2083333904728336 +NMC NMC 209 0.1871220049907259 +NMC NMC 210 0.13694037697890943 +NMC NMC 211 0.1410254122612783 +NMC NMC 212 0.10913012127955796 +NMC NMC 221 0.38635187132002613 +NMC NMC 222 0.4142522089627604 +NMC NMC 225 0.40345790673282933 +NMC NMC 226 0.40425695161223724 +NMC NMC 227 0.39659246253718794 +NMC NMC 229 0.4111692386727388 +NMC NMC 230 0.40555697596449714 +NMC NMC 231 0.4035342194456604 +NMC NMC 232 0.4151050115450948 +NMC NMC 233 0.3925537185116469 +NMC NMC 234 0.3991798063407916 +NMC NMC 235 0.42128296044596675 +NMC NMC 236 0.39594826617785617 +NMC NMC 237 0.4132863843007187 +NMC NMC 238 0.4063071052878472 +NMC NMC 239 0.39412536731089337 +NMC NMC 240 0.38212850421302924 +NMC NMC 241 0.3914294452161547 +NMC NMC 242 0.40047857374987483 +NMC NMC 243 0.4008070275555427 +NMC NMC 244 0.40262694743529254 +NMC NMC 245 0.39621958826618797 +NMC NMC 246 0.39102221801161896 +NMC NMC 247 0.3886398830939979 +NMC NMC 248 0.38188378549251667 +NMC NMC 249 0.3988887017211007 +NMC NMC 250 0.39585021194541986 +NMC NMC 251 0.40126477628252327 +NMC NMC 252 0.3964150971271786 +NMC NMC 253 0.3902202435316766 +NMC NMC 254 0.38160232623513723 +NMC NMC 255 0.3839102218492622 +NMC NMC 256 0.39033449259461933 +NMC NMC 257 0.37931861168091624 +NMC NMC 258 0.3709117293446901 +NMC NMC 259 0.3732315660927265 +NMC NMC 260 0.3720221661101343 +NMC NMC 261 0.3517503669287229 +NMC NMC 262 0.36807144164962735 +NMC NMC 263 0.3693458573629922 +NMC NMC 264 0.3592451130180051 +NMC NMC 265 0.35632376909940633 +NMC NMC 266 0.35150220374085334 +NMC NMC 267 0.34496329443474294 +NMC NMC 268 0.33956152225234476 +NMC NMC 269 0.3485667202754675 +NMC NMC 270 0.3391216940975037 +NMC NMC 271 0.33736702477837127 +NMC NMC 272 0.3133373105891819 +NMC NMC 273 0.32982788223049275 +NMC NMC 274 0.3211288416924518 +NMC NMC 275 0.3060547284123289 +NMC NMC 276 0.335436416428492 +NMC NMC 277 0.3073804246714358 +NMC NMC 278 0.294093691234672 +NMC NMC 279 0.2884616638265187 +NMC NMC 280 0.26397391314307916 +NMC NMC 281 0.2444528193940454 +NMC NMC 282 0.27832660956025357 +NMC NMC 283 0.25803828744513574 +NMC NMC 284 0.23389825136222092 +NMC NMC 285 0.2146733551898643 +NMC NMC 286 0.21813290674472682 +NMC NMC 287 0.212151341826686 +NMC NMC 288 0.16141376202106558 +NMC NMC 289 0.13433721471473095 +NMC NMC 290 0.11551212545868192 +NMC NMC 291 0.10901466846254028 +ATLAS ATLASTTBARTOT 0 165.5652425198355 +ATLAS ATLASTTBARTOT 1 245.16706727829992 +ATLAS ATLASTTBARTOT 2 801.771432400817 +CMS CMSZDIFF12 1 2973.4725103674823 +CMS CMSZDIFF12 2 1056.3849990121557 +CMS CMSZDIFF12 3 456.57202617345774 +CMS CMSZDIFF12 4 221.42094319673168 +CMS CMSZDIFF12 5 107.23839580843014 +CMS CMSZDIFF12 6 58.24831168720775 +CMS CMSZDIFF12 7 29.594897586398673 +CMS CMSZDIFF12 11 2828.9994410871873 +CMS CMSZDIFF12 12 1042.0799324021702 +CMS CMSZDIFF12 13 443.3584194436015 +CMS CMSZDIFF12 14 204.46185503728015 +CMS CMSZDIFF12 15 105.84479937812591 +CMS CMSZDIFF12 16 59.79512008357787 +CMS CMSZDIFF12 17 30.620091935371985 +CMS CMSZDIFF12 21 2590.081496607627 +CMS CMSZDIFF12 22 912.4546874257419 +CMS CMSZDIFF12 23 410.2638687475507 +CMS CMSZDIFF12 24 203.38813364945474 +CMS CMSZDIFF12 25 103.36390836336159 +CMS CMSZDIFF12 26 54.7135715358862 +CMS CMSZDIFF12 27 28.40706067537687 +CMS CMSZDIFF12 31 2029.9154133864206 +CMS CMSZDIFF12 32 737.238837202754 +CMS CMSZDIFF12 33 331.45213196343286 +CMS CMSZDIFF12 34 172.74641496097794 +CMS CMSZDIFF12 35 88.99325953676879 +CMS CMSZDIFF12 36 49.525868163359064 +CMS CMSZDIFF12 37 24.187338718035164 From 2c1abaa6a84adb799144c876883bf1c69f58e5b9 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 16 Dec 2022 14:19:57 +0000 Subject: [PATCH 19/49] bug in sytypes file name fixed --- validphys2/src/validphys/commondataparser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 2a02a2b470..1751eb22f1 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -168,9 +168,9 @@ def write_systype(commondata_list, filter_path): for commondata_instance in commondata_list: path = filter_path / commondata_instance.setname / 'systypes' make_systype_dir(path) - path_data = str(path) + f"/SYSTYPES_{commondata_instance.setname}_DEFAULT.dat" + path_data = str(path) + f"/SYSTYPE_{commondata_instance.setname}_DEFAULT.dat" systype_tab = commondata_instance.systype_table - header = f"{len(systype_tab.index)}\n" + header = f"{commondata_instance}\n" #==== write DATA =====# with open(path_data, "w+") as f: From 2c7f128e2552307986c884c937689c14a7e96af1 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 16 Dec 2022 15:21:11 +0000 Subject: [PATCH 20/49] added functions to write commondata tables to files --- validphys2/src/validphys/commondataparser.py | 120 ++++++++++--------- 1 file changed, 65 insertions(+), 55 deletions(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 1751eb22f1..70475c19ac 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -93,86 +93,96 @@ def parse_systypes(systypefile): return systypetable -def write_commondata(commondata_list, filter_path): + +def write_commondata_table_to_string(commondata,sio,table="commondata_table"): """ GENERAL DESCRIPTION: - - writes dataset data in filter folder using the commondata file format - + + Given a validphys.coredata.CommonData instance and a StringIO, + update value of StringIO to contents of specified CommonData table. + Parameters ---------- - commondata_list : list - commondata object (cuts should be already applied) - - path : str - path to filter folder + commondata: validphys.coredata.CommonData + + sio: StringIO + + table: str, default 'commondata_table' + Example + ------- - """ + >>> from validphys.loader import Loader + >>> from io import StringIO - for commondata_instance in commondata_list: - # path - path = str(filter_path) + f'/{commondata_instance.setname}' - path_data = str(path) + f"/DATA_{commondata_instance.setname}.dat" - commondata_tab = commondata_instance.commondata_table.reset_index(drop = True) # do not use maskcut index - commondata_tab.index += 1 # index starting from 1 - - header = f"{commondata_instance.setname} {commondata_instance.nsys} {commondata_instance.ndata}\n" - - #==== write DATA =====# - with open(path_data, "w+") as f: - f.write(header) - commondata_tab.to_csv(f, sep="\t", header=None) + >>> l = Loader() + >>> obs = "NMC" + >>> commondata = l.check_commondata(obs).load_commondata_instance() + >>> sio = StringIO() + >>> print(sio.getvalue()) + >>> write_commondata_table_to_string(commondata,sio,table="systype_table") + >>> sio.getvalue() -def make_systype_dir(path): """ - GENERAL DESCRIPTION: + + data_frame = getattr(commondata,table) + data_frame.to_csv(sio, sep = "\t", header = None) + + - creates directory named systypes +def write_systypes_to_file(commondata,path): + """ + GENERAL DESCRIPTION: + + write a systype file to a specified path + (e.g. path = path_to_fit_folder/filter/name_observable/systypes/SYSTYPE_name_observable_DEFAULT.dat) Parameters ---------- - path : str - path to systypes filter/dataset_name/systypes folder + commondata: validphys.coredata.CommonData + + path: str + - """ - if path.exists(): - log.warning(f"systypes folder exists: {path} Overwriting contents") - else: - path.mkdir(exist_ok=True) - + from io import StringIO + sio = StringIO() + + write_commondata_table_to_string(commondata,sio,table="systype_table") + header = f"{commondata.nsys}\n" + + with open(path,"w") as file: + file.write(header) + file.write(sio.getvalue()) -def write_systype(commondata_list, filter_path): + + +def write_commondata_to_file(commondata,path): """ GENERAL DESCRIPTION: - - writes systype data in filter folder using the systype file format + + write a commondata table as file to a specified path Parameters ---------- - commondata_list : list - commondata object (cuts should be already applied) - - filter_path : str - path to filter folder - + commondata: validphys.coredata.CommonData + path: str + + """ - - for commondata_instance in commondata_list: - path = filter_path / commondata_instance.setname / 'systypes' - make_systype_dir(path) - path_data = str(path) + f"/SYSTYPE_{commondata_instance.setname}_DEFAULT.dat" - systype_tab = commondata_instance.systype_table - header = f"{commondata_instance}\n" - - #==== write DATA =====# - with open(path_data, "w+") as f: - f.write(header) - systype_tab.to_csv(f, sep="\t", header=None) \ No newline at end of file + from io import StringIO + + sio = StringIO() + write_commondata_table_to_string(commondata,sio) + + header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n" + + with open(path,"w") as file: + file.write(header) + file.write(sio.getvalue()) From 16760bfd9097f29efbdbb6fbca715e34d1f400bf Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 16 Dec 2022 15:22:01 +0000 Subject: [PATCH 21/49] import new validphys.commondataparser functions to write commondata tables --- validphys2/src/validphys/filters.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 7d28bd16eb..898adaceee 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -230,6 +230,7 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro #== print number of points passing cuts, make dataset directory and write FKMASK ==# path = filter_path / dataset.name nfull, ncut = _write_ds_cut_data(path, dataset) + make_dataset_dir(path / "systypes") total_data_points += nfull total_cut_data_points += ncut # Rescale errors @@ -237,12 +238,13 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro if errorsize != 1.0: loaded_ds.RescaleErrors(errorsize) - from validphys.commondataparser import write_commondata, write_systype + from validphys.commondataparser import write_commondata_to_file, write_systypes_to_file if not fakenoise: #======= Level 0 closure test =======# log.info("Writing Level0 data") - write_commondata(level0_commondata_instances_wc,filter_path) - write_systype(level0_commondata_instances_wc,filter_path) + for l0_cd in level0_commondata_instances_wc: + write_commondata_to_file(l0_cd, path = filter_path / l0_cd.setname / f"DATA_{l0_cd.setname}.dat") + write_systypes_to_file(l0_cd, path = filter_path / l0_cd.setname / "systypes" / f"SYSTYPE_{l0_cd.setname}.dat") else: #======= Level 1 closure test =======# @@ -250,8 +252,9 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro level1_commondata_instances_wc = make_level1_data(data,commondata_instances_wc,level0_commondata_instances_wc,filterseed) #====== write commondata and systype files ======# log.info("Writing Level1 data") - write_commondata(level1_commondata_instances_wc,filter_path) - write_systype(level1_commondata_instances_wc,filter_path) + for l1_cd in level1_commondata_instances_wc: + write_commondata_to_file(l1_cd, path = filter_path / l1_cd.setname / f"DATA_{l1_cd.setname}.dat") + write_systypes_to_file(l1_cd, path = filter_path / l1_cd.setname / "systypes" / f"SYSTYPE_{l1_cd.setname}.dat") return total_data_points, total_cut_data_points From 9b8f5bc656273743fd58887637c0ab5f8475c6eb Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 16 Dec 2022 15:45:22 +0000 Subject: [PATCH 22/49] added single_dataset --- validphys2/src/validphys/tests/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py index 74eb4315c3..b76f5073b7 100644 --- a/validphys2/src/validphys/tests/conftest.py +++ b/validphys2/src/validphys/tests/conftest.py @@ -23,6 +23,8 @@ def tmp(tmpdir): # Here define the default config items like the PDF, theory and experiment specs SINGLE_DATAPOINT = {'dataset': 'ATLASTTBARTOT8TEV', 'cfac': ['QCD']} +SINGLE_DATASET = {'dataset': 'NMC'} + DATA = [ {'dataset': 'NMC'}, {'dataset': 'ATLASTTBARTOT', 'cfac':['QCD']}, From a1f9689709967d9de4a20645cc64e4879a24c9d7 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 16 Dec 2022 15:46:01 +0000 Subject: [PATCH 23/49] import info from conftest.py --- validphys2/src/validphys/tests/test_pseudodata.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index 7b4ecdd027..a0de673b85 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -12,7 +12,7 @@ import pytest from validphys.api import API -from validphys.tests.conftest import FIT, PSEUDODATA_FIT +from validphys.tests.conftest import FIT, PSEUDODATA_FIT, THEORYID, SINGLE_DATASET, PDF def test_read_fit_pseudodata(): @@ -87,15 +87,16 @@ def test_make_level0_data(): from validphys.loader import Loader from validphys.covmats import dataset_t0_predictions - dataset='NMC' - pdfname='NNPDF40_nnlo_as_01180' - theoryid=162 + dataset=SINGLE_DATASET + pdfname=PDF + + l = Loader() - datasetspec = l.check_dataset(dataset,theoryid=theoryid) + datasetspec = l.check_dataset(list(dataset.values())[0],theoryid=THEORYID) t0set = l.check_pdf(pdfname) - l0_cd = API.make_level0_data(dataset_inputs = [{"dataset":dataset}], - use_cuts="internal", theoryid=theoryid, fakepdf = pdfname) + l0_cd = API.make_level0_data(dataset_inputs = [dataset], + use_cuts="internal", theoryid=THEORYID, fakepdf = pdfname) l0_vals = l0_cd[0].central_values From d782d7a7752ff36afe59045b9434e2abffbbefd2 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 28 Dec 2022 10:28:45 +0000 Subject: [PATCH 24/49] unusued fakeset loaded with c++ removed --- validphys2/src/validphys/filters.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 898adaceee..40f5d035e0 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -173,8 +173,6 @@ def _filter_real_data(filter_path, data): def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, errorsize): """ - GENERAL DESCRIPTION: - This function is accessed within a closure test only, that is, the fakedata namespace has to be True (If fakedata = False, the _filter_real_data function will be used to write the commondata files). @@ -218,7 +216,6 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro total_data_points = 0 total_cut_data_points = 0 - fakeset = fakepdf.legacy_load() # Load data, don't cache result loaded_data = data.load.__wrapped__(data) From 0ce9470b52533e5776b9c01c3ef7b3d7e5fdb6ce Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 28 Dec 2022 13:46:27 +0000 Subject: [PATCH 25/49] added functions to write commondata and systype data to buffer --- validphys2/src/validphys/commondataparser.py | 108 ++++++++----------- 1 file changed, 46 insertions(+), 62 deletions(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 70475c19ac..78394436e0 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -94,95 +94,79 @@ def parse_systypes(systypefile): return systypetable -def write_commondata_table_to_string(commondata,sio,table="commondata_table"): + +def write_commondata_data(commondata, buffer): """ - GENERAL DESCRIPTION: + write commondata table to buffer, this can be a memory map, + compressed archive or strings (using for instance StringIO) - Given a validphys.coredata.CommonData instance and a StringIO, - update value of StringIO to contents of specified CommonData table. Parameters ---------- - - commondata: validphys.coredata.CommonData - sio: StringIO + commondata : validphys.coredata.CommonData - table: str, default 'commondata_table' - + buffer : memory map, compressed archive or strings + example: StringIO object + + Example ------- - >>> from validphys.loader import Loader >>> from io import StringIO - + >>> l = Loader() - >>> obs = "NMC" - >>> commondata = l.check_commondata(obs).load_commondata_instance() - + >>> cd = l.check_commondata("NMC").load_commondata_instance() >>> sio = StringIO() + >>> write_commondata_data(cd,sio) >>> print(sio.getvalue()) - >>> write_commondata_table_to_string(commondata,sio,table="systype_table") - >>> sio.getvalue() - - - """ - - data_frame = getattr(commondata,table) - data_frame.to_csv(sio, sep = "\t", header = None) - - -def write_systypes_to_file(commondata,path): """ - GENERAL DESCRIPTION: - - write a systype file to a specified path - (e.g. path = path_to_fit_folder/filter/name_observable/systypes/SYSTYPE_name_observable_DEFAULT.dat) - - Parameters - ---------- + header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n" + buffer.write(header) + commondata.commondata_table.to_csv(buffer, sep="\t", header=None) - commondata: validphys.coredata.CommonData - - path: str - - +def write_commondata_to_file(commondata,path): + """ + write commondata table to file """ - from io import StringIO - sio = StringIO() - - write_commondata_table_to_string(commondata,sio,table="systype_table") - header = f"{commondata.nsys}\n" - with open(path,"w") as file: - file.write(header) - file.write(sio.getvalue()) + write_commondata_data(commondata,file) - - -def write_commondata_to_file(commondata,path): +def write_systype_data(commondata, buffer): """ - GENERAL DESCRIPTION: + write systype table to buffer, this can be a memory map, + compressed archive or strings (using for instance StringIO) - write a commondata table as file to a specified path Parameters ---------- - - commondata: validphys.coredata.CommonData - path: str - - - """ - from io import StringIO + commondata : validphys.coredata.CommonData - sio = StringIO() - write_commondata_table_to_string(commondata,sio) + buffer : memory map, compressed archive or strings + example: StringIO object + + + Example + ------- + >>> from validphys.loader import Loader + >>> from io import StringIO - header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n" + >>> l = Loader() + >>> cd = l.check_commondata("NMC").load_commondata_instance() + >>> sio = StringIO() + >>> write_systype_data(cd,sio) + >>> print(sio.getvalue()) + """ + header = f"{commondata.nsys}\n" + buffer.write(header) + commondata.systype_table.to_csv(buffer, sep="\t", header=None) + +def write_systype_to_file(commondata,path): + """ + write systype table to file + """ with open(path,"w") as file: - file.write(header) - file.write(sio.getvalue()) + write_systype_data(commondata,file) From 66821be8842c1940269b952442437361783740e8 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 28 Dec 2022 13:47:33 +0000 Subject: [PATCH 26/49] write commondata and systype using commondataparser functions --- validphys2/src/validphys/filters.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 40f5d035e0..8864413eb7 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -235,14 +235,16 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro if errorsize != 1.0: loaded_ds.RescaleErrors(errorsize) - from validphys.commondataparser import write_commondata_to_file, write_systypes_to_file + from validphys.commondataparser import write_commondata_to_file, write_systype_to_file if not fakenoise: #======= Level 0 closure test =======# log.info("Writing Level0 data") for l0_cd in level0_commondata_instances_wc: - write_commondata_to_file(l0_cd, path = filter_path / l0_cd.setname / f"DATA_{l0_cd.setname}.dat") - write_systypes_to_file(l0_cd, path = filter_path / l0_cd.setname / "systypes" / f"SYSTYPE_{l0_cd.setname}.dat") - + path_cd = filter_path / l0_cd.setname / f"DATA_{l0_cd.setname}.dat" + path_sys = filter_path / l0_cd.setname / "systypes" / f"SYSTYPE_{l0_cd.setname}_DEFAULT.dat" + write_commondata_to_file(commondata=l0_cd,path=path_cd) + write_systype_to_file(commondata=l0_cd,path=path_sys) + else: #======= Level 1 closure test =======# from validphys.pseudodata import make_level1_data @@ -250,8 +252,10 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro #====== write commondata and systype files ======# log.info("Writing Level1 data") for l1_cd in level1_commondata_instances_wc: - write_commondata_to_file(l1_cd, path = filter_path / l1_cd.setname / f"DATA_{l1_cd.setname}.dat") - write_systypes_to_file(l1_cd, path = filter_path / l1_cd.setname / "systypes" / f"SYSTYPE_{l1_cd.setname}.dat") + path_cd = filter_path / l1_cd.setname / f"DATA_{l1_cd.setname}.dat" + path_sys = filter_path / l1_cd.setname / "systypes" / f"SYSTYPE_{l1_cd.setname}_DEFAULT.dat" + write_commondata_to_file(commondata=l1_cd,path=path_cd) + write_systype_to_file(commondata=l1_cd,path=path_sys) return total_data_points, total_cut_data_points From 6e86c5a163dfe5bccb48fb8290be567363c46e11 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 29 Dec 2022 09:18:13 +0000 Subject: [PATCH 27/49] comment using numpy doc style --- validphys2/src/validphys/pseudodata.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index ec3c60b4fb..31f6388dd4 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -238,8 +238,6 @@ def indexed_make_replica(groups_index, make_replica): def make_level0_data(data,fakepdf): """ - GENERAL DESCRIPTION: - Given a validphys.core.DataGroupSpec object, load commondata and generate a new commondata instance with central values replaced by fakepdf prediction @@ -288,8 +286,6 @@ def make_level0_data(data,fakepdf): def make_level1_data(data,commondata_wc,level0_commondata_wc,filterseed): """ - GENERAL DESCRIPTION: - Given a list of level0 commondata instances, return the same list with central values replaced by level1 data From bd6be97557bd13f7452da12afaabac914c952d35 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Sat, 31 Dec 2022 12:42:31 +0000 Subject: [PATCH 28/49] use assert_allclose from numpy.testing for arrays --- validphys2/src/validphys/tests/test_pseudodata.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index a0de673b85..94f385930f 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -9,6 +9,7 @@ """ import pandas as pd import numpy as np +from numpy.testing import assert_allclose import pytest from validphys.api import API @@ -99,5 +100,5 @@ def test_make_level0_data(): use_cuts="internal", theoryid=THEORYID, fakepdf = pdfname) l0_vals = l0_cd[0].central_values - - assert(np.abs(np.sum(dataset_t0_predictions(dataset = datasetspec, t0set = t0set) / l0_vals) / len(l0_vals) - 1) <= 1e-12) + assert_allclose(dataset_t0_predictions(dataset = datasetspec, t0set = t0set), + l0_vals,rtol=1e-07, atol=0) \ No newline at end of file From a8d2055fce215846c67fa199c9e8d5fdcccd7ba6 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 4 Jan 2023 10:40:42 +0000 Subject: [PATCH 29/49] use experiments_index to index level1 data in make_level1_data --- validphys2/src/validphys/filters.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 8864413eb7..c3f68a4d9a 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -109,7 +109,7 @@ def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, error @check_positive("errorsize") def filter_closure_data_by_experiment( filter_path, experiments_data, fakepdf, fakenoise, filterseed - , errorsize, + , errorsize, experiments_index ): """ Like :py:func:`filter_closure_data` except filters data by experiment. @@ -120,10 +120,14 @@ def filter_closure_data_by_experiment( not reproducible. """ - return [ - _filter_closure_data(filter_path, exp, fakepdf, fakenoise, filterseed, errorsize) - for exp in experiments_data - ] + + res = [] + for exp in experiments_data: + experiment_index = experiments_index[experiments_index.isin([exp.name],level=0)] + res.append(_filter_closure_data(filter_path, exp, fakepdf, fakenoise, + filterseed, errorsize, experiment_index)) + + return res def filter_real_data(filter_path, data): @@ -171,7 +175,7 @@ def _filter_real_data(filter_path, data): return total_data_points, total_cut_data_points -def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, errorsize): +def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, errorsize, experiments_index): """ This function is accessed within a closure test only, that is, the fakedata namespace has to be True (If fakedata = False, the _filter_real_data function @@ -248,7 +252,8 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro else: #======= Level 1 closure test =======# from validphys.pseudodata import make_level1_data - level1_commondata_instances_wc = make_level1_data(data,commondata_instances_wc,level0_commondata_instances_wc,filterseed) + level1_commondata_instances_wc = make_level1_data(data,commondata_instances_wc,level0_commondata_instances_wc, + filterseed, experiments_index) #====== write commondata and systype files ======# log.info("Writing Level1 data") for l1_cd in level1_commondata_instances_wc: From f2a9624e9cf1ea014ea0a08628c0eff52715298a Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 4 Jan 2023 10:41:22 +0000 Subject: [PATCH 30/49] use experiments_index provider --- validphys2/src/validphys/pseudodata.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 31f6388dd4..54994d95ae 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -284,7 +284,7 @@ def make_level0_data(data,fakepdf): -def make_level1_data(data,commondata_wc,level0_commondata_wc,filterseed): +def make_level1_data(data,commondata_wc,level0_commondata_wc,filterseed,experiments_index): """ Given a list of level0 commondata instances, return the same list with central values replaced by level1 data @@ -339,14 +339,11 @@ def make_level1_data(data,commondata_wc,level0_commondata_wc,filterseed): _list_of_central_values=None, _only_additive=False,) #================== generation of pseudo data ======================# - from validphys.results import groups_index #= generate pseudo data starting from theory predictions level1_data = make_replica(level0_commondata_wc, filterseed, covmat, sep_mult=False, genrep=True) - group_index = groups_index([data]) # already set cuts - - indexed_level1_data = indexed_make_replica(group_index, level1_data) + indexed_level1_data = indexed_make_replica(experiments_index, level1_data) #===== create commondata instances with central values given by pseudo_data =====# level1_commondata_dict = {c.setname:c for c in level0_commondata_wc} From 7e827c687809d515d51a0db6afed5657cf7e3db6 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 4 Jan 2023 10:42:25 +0000 Subject: [PATCH 31/49] added commondata_wc provider to get commondata with cuts list given DataGroupSpec --- validphys2/src/validphys/results.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index ad76c043ad..531ed599c2 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -224,6 +224,25 @@ def procs_data_values(proc_result_table): data_central_values = proc_result_table["data_central"] return data_central_values +def commondata_wc(data): + """ + commondata with cuts: + given a DataGroupSpec load all the DataSetInput + instances into CommonData instances with cuts + already applied to + + Parameters + ---------- + + data : validphys.core.DataGroupSpec + + Returns + ------- + list containing commondata instances with cuts + + """ + return data.load_commondata_instance() + groups_results = collect( "dataset_inputs_results", ("group_dataset_inputs_by_metadata",) ) From 3795663f84f084beed112e1732f81ecfdc3ad151 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 4 Jan 2023 10:59:30 +0000 Subject: [PATCH 32/49] make_level0_data renamed to level0_commondata_wc --- validphys2/src/validphys/filters.py | 6 ++++-- validphys2/src/validphys/pseudodata.py | 12 ++++-------- validphys2/src/validphys/tests/test_pseudodata.py | 9 ++------- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index c3f68a4d9a..949352ff63 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -210,6 +210,8 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro errorsize : float (defined in runcard) + experiments_index : pandas.MultiIndex + Returns ------- @@ -223,8 +225,8 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro # Load data, don't cache result loaded_data = data.load.__wrapped__(data) - from validphys.pseudodata import make_level0_data - level0_commondata_instances_wc = make_level0_data(data,fakepdf) + from validphys.pseudodata import level0_commondata_wc + level0_commondata_instances_wc = level0_commondata_wc(data,fakepdf) commondata_instances_wc = data.load_commondata_instance() # used to generate experimental covariance matrix for j, dataset in enumerate(data.datasets): diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 54994d95ae..f9feb0977d 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -236,7 +236,7 @@ def indexed_make_replica(groups_index, make_replica): return pd.DataFrame(make_replica, index=groups_index, columns=["data"]) -def make_level0_data(data,fakepdf): +def level0_commondata_wc(data,fakepdf): """ Given a validphys.core.DataGroupSpec object, load commondata and generate a new commondata instance with central values replaced @@ -259,7 +259,7 @@ def make_level0_data(data,fakepdf): Example ------- >>> from validphys.api import API - >>> API.make_level0_data(dataset_inputs = [{"dataset":"NMC"}], use_cuts="internal", theoryid=200,fakepdf = "NNPDF40_nnlo_as_01180") + >>> API.level0_commondata_wc(dataset_inputs = [{"dataset":"NMC"}], use_cuts="internal", theoryid=200,fakepdf = "NNPDF40_nnlo_as_01180") [CommonData(setname='NMC', ndata=204, commondataproc='DIS_NCE', nkin=3, nsys=16)] """ @@ -319,13 +319,9 @@ def make_level1_data(data,commondata_wc,level0_commondata_wc,filterseed,experime ------- >>> from validphys.api import API - >>> from validphys.loader import Loader >>> dataset='NMC' - >>> l=Loader() - >>> cuts = l.check_dataset(dataset,theoryid=200).cuts.load() - >>> cd = l.check_commondata(dataset).load_commondata_instance().with_cuts(cuts) - >>> l0_cd = API.make_level0_data(dataset_inputs = [{"dataset":dataset}],use_cuts="internal", theoryid=200,fakepdf = "NNPDF40_nnlo_as_01180") - >>> l1_cd = API.make_level1_data(level0_commondata_wc=l0_cd, commondata_wc=[cd], dataset_inputs = [{"dataset":dataset}], use_cuts="internal",filterseed=1, theoryid=200) + >>> l1_cd = API.make_level1_data(dataset_inputs = [{"dataset":dataset}],use_cuts="internal", theoryid=200, + fakepdf = "NNPDF40_nnlo_as_01180",filterseed=1) >>> l1_cd [CommonData(setname='NMC', ndata=204, commondataproc='DIS_NCE', nkin=3, nsys=16)] """ diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index 94f385930f..285be67bd9 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -84,21 +84,16 @@ def test_read_matches_recreate(): pd.testing.assert_index_equal(read.val_idx, recreate.val_idx, check_order=False) -def test_make_level0_data(): +def test_level0_commondata_wc(): from validphys.loader import Loader from validphys.covmats import dataset_t0_predictions - dataset=SINGLE_DATASET pdfname=PDF - - l = Loader() datasetspec = l.check_dataset(list(dataset.values())[0],theoryid=THEORYID) t0set = l.check_pdf(pdfname) - - l0_cd = API.make_level0_data(dataset_inputs = [dataset], + l0_cd = API.level0_commondata_wc(dataset_inputs = [dataset], use_cuts="internal", theoryid=THEORYID, fakepdf = pdfname) - l0_vals = l0_cd[0].central_values assert_allclose(dataset_t0_predictions(dataset = datasetspec, t0set = t0set), l0_vals,rtol=1e-07, atol=0) \ No newline at end of file From cc868fb00786ee70a1c563e015f3bb94fce2012a Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 12 Jan 2023 10:45:19 +0100 Subject: [PATCH 33/49] use the new export functions --- validphys2/src/validphys/coredata.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 55e9c433ae..5d6544c988 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -371,19 +371,14 @@ def systematic_errors(self, central_values=None): def export(self, path): """Export the data, and error types - Use the same format as libNNPDF: - A DATA_.dat file with the dataframe of accepted points - A systypes/STYPES_.dat file with the error types """ + from validphys.commondataparser import write_systype_to_file, write_commondata_to_file dat_path = path / f"DATA_{self.setname}.dat" sys_path = path / "systypes" / f"SYSTYPE_{self.setname}_DEFAULT.dat" sys_path.parent.mkdir(exist_ok=True) - dat_string_raw = self.commondata_table.to_string(index=False, header=False, float_format="{:.8e}".format) - header = f"{self.setname} {self.nsys} {self.ndata}" - dat_string = "\n".join([f" {i+1} {r}" for i, r in enumerate(dat_string_raw.split("\n"))]) - dat_path.write_text(f"{header}\n{dat_string}\n") - - sys_raw = self.systype_table.to_string(index=True, header=False, index_names=False) - sys_path.write_text(f"{self.nsys}\n{sys_raw}\n") + write_systype_to_file(self, sys_path) + write_commondata_to_file(self, dat_path) From 2af5f39a6251595ed22ddb53d24f13f430951073 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 31 Jan 2023 16:45:36 +0000 Subject: [PATCH 34/49] Removed error rescaling within _filter_closure_data as unused. eliminates closure test runcard dependence on errorsize --- validphys2/src/validphys/filters.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index d791afe660..81b6b96dde 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -119,8 +119,8 @@ def prepare_nnpdf_rng(filterseed:int, rngalgo:int, seed:int): RandomGenerator.InitRNG(rngalgo, seed) RandomGenerator.GetRNG().SetSeed(filterseed) -@check_positive('errorsize') -def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, errorsize): + +def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed): """Filter closure data. In addition to cutting data points, the data is generated from an underlying ``fakepdf``, applying a shift to the data if ``fakenoise`` is ``True``, which emulates the experimental central values @@ -129,13 +129,12 @@ def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, error """ log.info('Filtering closure-test data.') return _filter_closure_data( - filter_path, data, fakepdf, fakenoise, filterseed, errorsize) + filter_path, data, fakepdf, fakenoise, filterseed) -@check_positive("errorsize") def filter_closure_data_by_experiment( filter_path, experiments_data, fakepdf, fakenoise, filterseed - , errorsize, experiments_index + , experiments_index ): """ Like :py:func:`filter_closure_data` except filters data by experiment. @@ -151,7 +150,7 @@ def filter_closure_data_by_experiment( for exp in experiments_data: experiment_index = experiments_index[experiments_index.isin([exp.name],level=0)] res.append(_filter_closure_data(filter_path, exp, fakepdf, fakenoise, - filterseed, errorsize, experiment_index)) + filterseed, experiment_index)) return res @@ -201,7 +200,7 @@ def _filter_real_data(filter_path, data): return total_data_points, total_cut_data_points -def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, errorsize, experiments_index): +def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, experiments_index): """ This function is accessed within a closure test only, that is, the fakedata namespace has to be True (If fakedata = False, the _filter_real_data function @@ -233,8 +232,6 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro random seed used for the generation of random noise added to Level 0 data - errorsize : float - (defined in runcard) experiments_index : pandas.MultiIndex @@ -248,8 +245,6 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro total_data_points = 0 total_cut_data_points = 0 - # Load data, don't cache result - loaded_data = data.load.__wrapped__(data) from validphys.pseudodata import level0_commondata_wc level0_commondata_instances_wc = level0_commondata_wc(data,fakepdf) @@ -262,10 +257,6 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, erro make_dataset_dir(path / "systypes") total_data_points += nfull total_cut_data_points += ncut - # Rescale errors - loaded_ds = loaded_data.GetSet(j) - if errorsize != 1.0: - loaded_ds.RescaleErrors(errorsize) from validphys.commondataparser import write_commondata_to_file, write_systype_to_file if not fakenoise: From 68f389cae345e9aae28454bdf6c9fd46c3f753a2 Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Wed, 8 Feb 2023 16:51:08 +0100 Subject: [PATCH 35/49] Update developing.yml --- n3fit/runcards/examples/developing.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/n3fit/runcards/examples/developing.yml b/n3fit/runcards/examples/developing.yml index 57c10236bc..2db98eacd5 100644 --- a/n3fit/runcards/examples/developing.yml +++ b/n3fit/runcards/examples/developing.yml @@ -92,6 +92,7 @@ parameters: # This defines the parameter dictionary that is passed to the Model fitting: fitbasis: EVOL # EVOL (7), EVOLQED (8), etc. + savepseudodata: False basis: - {fl: sng, trainable: false, smallx: [1.093, 1.121], largex: [1.486, 3.287]} - {fl: g, trainable: false, smallx: [0.8329, 1.071], largex: [3.084, 6.767]} From 3fb3454940a9bb5a0d9053c889af61c8f0f2858d Mon Sep 17 00:00:00 2001 From: Zahari Kassabov Date: Thu, 9 Feb 2023 09:28:36 +0000 Subject: [PATCH 36/49] Improve formatting --- validphys2/src/validphys/commondataparser.py | 53 ++++----- validphys2/src/validphys/core.py | 12 +- validphys2/src/validphys/coredata.py | 7 +- validphys2/src/validphys/filters.py | 74 ++++++++---- validphys2/src/validphys/pseudodata.py | 108 +++++++++++------- .../src/validphys/tests/test_pseudodata.py | 23 ++-- 6 files changed, 174 insertions(+), 103 deletions(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 724508fc96..55a470762b 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -125,93 +125,94 @@ def parse_systypes(systypefile): def write_commondata_data(commondata, buffer): """ - write commondata table to buffer, this can be a memory map, + write commondata table to buffer, this can be a memory map, compressed archive or strings (using for instance StringIO) - - + + Parameters ---------- - + commondata : validphys.coredata.CommonData - + buffer : memory map, compressed archive or strings example: StringIO object - - + + Example ------- >>> from validphys.loader import Loader >>> from io import StringIO - + >>> l = Loader() >>> cd = l.check_commondata("NMC").load_commondata_instance() >>> sio = StringIO() >>> write_commondata_data(cd,sio) >>> print(sio.getvalue()) - + """ header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n" buffer.write(header) commondata.commondata_table.to_csv(buffer, sep="\t", header=None) -def write_commondata_to_file(commondata,path): +def write_commondata_to_file(commondata, path): """ write commondata table to file """ - with open(path,"w") as file: - write_commondata_data(commondata,file) + with open(path, "w") as file: + write_commondata_data(commondata, file) def write_systype_data(commondata, buffer): """ - write systype table to buffer, this can be a memory map, + write systype table to buffer, this can be a memory map, compressed archive or strings (using for instance StringIO) - - + + Parameters ---------- - + commondata : validphys.coredata.CommonData - + buffer : memory map, compressed archive or strings example: StringIO object - - + + Example ------- >>> from validphys.loader import Loader >>> from io import StringIO - + >>> l = Loader() >>> cd = l.check_commondata("NMC").load_commondata_instance() >>> sio = StringIO() >>> write_systype_data(cd,sio) >>> print(sio.getvalue()) - + """ header = f"{commondata.nsys}\n" buffer.write(header) commondata.systype_table.to_csv(buffer, sep="\t", header=None) -def write_systype_to_file(commondata,path): + +def write_systype_to_file(commondata, path): """ write systype table to file """ - with open(path,"w") as file: - write_systype_data(commondata,file) + with open(path, "w") as file: + write_systype_data(commondata, file) + @dataclasses.dataclass(frozen=True) class CommonDataMetadata: """Contains metadata information about the data being read""" + name: str nsys: int ndata: int process_type: str - - def peek_commondata_metadata(commondatafilename): """Read some of the properties of the commondata object as a CommonData Metadata """ diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 5f6fe29f1e..854b1c0c1a 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -282,8 +282,9 @@ def load_commondata_instance(self): load a validphys.core.CommonDataSpec to validphys.core.CommonData """ from validphys.commondataparser import load_commondata + return load_commondata(self) - + @property def plot_kinlabels(self): return get_plot_kinlabels(self) @@ -625,6 +626,7 @@ def load(self): def load_commondata(self): return [d.load_commondata() for d in self.datasets] + def load_commondata_instance(self): """ Given Experiment load list of validphys.coredata.CommonData @@ -635,9 +637,13 @@ def load_commondata_instance(self): if dataset.cuts is None: commodata_list.append(dataset.commondata.load_commondata_instance()) else: - commodata_list.append(dataset.commondata.load_commondata_instance().with_cuts(dataset.cuts.load())) + commodata_list.append( + dataset.commondata.load_commondata_instance().with_cuts( + dataset.cuts.load() + ) + ) return commodata_list - + @property def thspec(self): #TODO: Is this good enough? Should we explicitly pass the theory diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index b494a31199..7e60ba5897 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -369,13 +369,18 @@ def systematic_errors(self, central_values=None): converted_mult_errors = self.multiplicative_errors * central_values[:, np.newaxis] / 100 return pd.concat((self.additive_errors, converted_mult_errors), axis=1) + def export(self, path): """Export the data, and error types - A DATA_.dat file with the dataframe of accepted points - A systypes/STYPES_.dat file with the error types """ - from validphys.commondataparser import write_systype_to_file, write_commondata_to_file + from validphys.commondataparser import ( + write_systype_to_file, + write_commondata_to_file, + ) + dat_path = path / f"DATA_{self.setname}.dat" sys_path = path / "systypes" / f"SYSTYPE_{self.setname}_DEFAULT.dat" sys_path.parent.mkdir(exist_ok=True) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 705dbf26cf..4111571ed9 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -128,13 +128,11 @@ def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed): """ log.info('Filtering closure-test data.') - return _filter_closure_data( - filter_path, data, fakepdf, fakenoise, filterseed) + return _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed) def filter_closure_data_by_experiment( - filter_path, experiments_data, fakepdf, fakenoise, filterseed - , experiments_index + filter_path, experiments_data, fakepdf, fakenoise, filterseed, experiments_index ): """ Like :py:func:`filter_closure_data` except filters data by experiment. @@ -148,9 +146,14 @@ def filter_closure_data_by_experiment( res = [] for exp in experiments_data: - experiment_index = experiments_index[experiments_index.isin([exp.name],level=0)] - res.append(_filter_closure_data(filter_path, exp, fakepdf, fakenoise, - filterseed, experiment_index)) + experiment_index = experiments_index[ + experiments_index.isin([exp.name], level=0) + ] + res.append( + _filter_closure_data( + filter_path, exp, fakepdf, fakenoise, filterseed, experiment_index + ) + ) return res @@ -200,7 +203,9 @@ def _filter_real_data(filter_path, data): return total_data_points, total_cut_data_points -def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, experiments_index): +def _filter_closure_data( + filter_path, data, fakepdf, fakenoise, filterseed, experiments_index +): """ This function is accessed within a closure test only, that is, the fakedata namespace has to be True (If fakedata = False, the _filter_real_data function @@ -247,39 +252,62 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, expe total_cut_data_points = 0 from validphys.pseudodata import level0_commondata_wc - level0_commondata_instances_wc = level0_commondata_wc(data,fakepdf) - commondata_instances_wc = data.load_commondata_instance() # used to generate experimental covariance matrix + + level0_commondata_instances_wc = level0_commondata_wc(data, fakepdf) + commondata_instances_wc = ( + data.load_commondata_instance() + ) # used to generate experimental covariance matrix for j, dataset in enumerate(data.datasets): - #== print number of points passing cuts, make dataset directory and write FKMASK ==# + # == print number of points passing cuts, make dataset directory and write FKMASK ==# path = filter_path / dataset.name nfull, ncut = _write_ds_cut_data(path, dataset) make_dataset_dir(path / "systypes") total_data_points += nfull total_cut_data_points += ncut - from validphys.commondataparser import write_commondata_to_file, write_systype_to_file + from validphys.commondataparser import ( + write_commondata_to_file, + write_systype_to_file, + ) + if not fakenoise: - #======= Level 0 closure test =======# + # ======= Level 0 closure test =======# log.info("Writing Level0 data") for l0_cd in level0_commondata_instances_wc: path_cd = filter_path / l0_cd.setname / f"DATA_{l0_cd.setname}.dat" - path_sys = filter_path / l0_cd.setname / "systypes" / f"SYSTYPE_{l0_cd.setname}_DEFAULT.dat" - write_commondata_to_file(commondata=l0_cd,path=path_cd) - write_systype_to_file(commondata=l0_cd,path=path_sys) + path_sys = ( + filter_path + / l0_cd.setname + / "systypes" + / f"SYSTYPE_{l0_cd.setname}_DEFAULT.dat" + ) + write_commondata_to_file(commondata=l0_cd, path=path_cd) + write_systype_to_file(commondata=l0_cd, path=path_sys) else: - #======= Level 1 closure test =======# + # ======= Level 1 closure test =======# from validphys.pseudodata import make_level1_data - level1_commondata_instances_wc = make_level1_data(data,commondata_instances_wc,level0_commondata_instances_wc, - filterseed, experiments_index) - #====== write commondata and systype files ======# + + level1_commondata_instances_wc = make_level1_data( + data, + commondata_instances_wc, + level0_commondata_instances_wc, + filterseed, + experiments_index, + ) + # ====== write commondata and systype files ======# log.info("Writing Level1 data") for l1_cd in level1_commondata_instances_wc: path_cd = filter_path / l1_cd.setname / f"DATA_{l1_cd.setname}.dat" - path_sys = filter_path / l1_cd.setname / "systypes" / f"SYSTYPE_{l1_cd.setname}_DEFAULT.dat" - write_commondata_to_file(commondata=l1_cd,path=path_cd) - write_systype_to_file(commondata=l1_cd,path=path_sys) + path_sys = ( + filter_path + / l1_cd.setname + / "systypes" + / f"SYSTYPE_{l1_cd.setname}_DEFAULT.dat" + ) + write_commondata_to_file(commondata=l1_cd, path=path_cd) + write_systype_to_file(commondata=l1_cd, path=path_sys) return total_data_points, total_cut_data_points diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index f9feb0977d..088ca2fe51 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -236,88 +236,95 @@ def indexed_make_replica(groups_index, make_replica): return pd.DataFrame(make_replica, index=groups_index, columns=["data"]) -def level0_commondata_wc(data,fakepdf): +def level0_commondata_wc(data, fakepdf): """ - Given a validphys.core.DataGroupSpec object, load commondata and + Given a validphys.core.DataGroupSpec object, load commondata and generate a new commondata instance with central values replaced - by fakepdf prediction + by fakepdf prediction Parameters ---------- - data : validphys.core.DataGroupSpec - + data : validphys.core.DataGroupSpec + fakepdf: validphys.core.PDF Returns ------- list list of validphys.coredata.CommonData instances corresponding to - all datasets within one experiment. The central value is replaced + all datasets within one experiment. The central value is replaced by Level 0 fake data. Example ------- >>> from validphys.api import API >>> API.level0_commondata_wc(dataset_inputs = [{"dataset":"NMC"}], use_cuts="internal", theoryid=200,fakepdf = "NNPDF40_nnlo_as_01180") - + [CommonData(setname='NMC', ndata=204, commondataproc='DIS_NCE', nkin=3, nsys=16)] """ level0_commondata_instances_wc = [] - #==== Load validphys.coredata.CommonData instance with cuts ====# - + # ==== Load validphys.coredata.CommonData instance with cuts ====# + for j, dataset in enumerate(data.datasets): if dataset.cuts is None: commondata_wc = dataset.commondata.load_commondata_instance() else: cuts = dataset.cuts.load() - commondata_wc = dataset.commondata.load_commondata_instance().with_cuts(cuts) - - - #== Generate a new CommonData instance with central value given by Level 0 data generated with fakepdf ==# + commondata_wc = dataset.commondata.load_commondata_instance().with_cuts( + cuts + ) + + # == Generate a new CommonData instance with central value given by Level 0 data generated with fakepdf ==# from validphys.covmats import dataset_t0_predictions - t0_prediction = dataset_t0_predictions(dataset=dataset, t0set=fakepdf) # N.B. cuts already applied to th. pred. - level0_commondata_instances_wc.append(commondata_wc.with_central_value(t0_prediction)) - return level0_commondata_instances_wc + t0_prediction = dataset_t0_predictions( + dataset=dataset, t0set=fakepdf + ) # N.B. cuts already applied to th. pred. + level0_commondata_instances_wc.append( + commondata_wc.with_central_value(t0_prediction) + ) + return level0_commondata_instances_wc -def make_level1_data(data,commondata_wc,level0_commondata_wc,filterseed,experiments_index): +def make_level1_data( + data, commondata_wc, level0_commondata_wc, filterseed, experiments_index +): """ Given a list of level0 commondata instances, return the same list - with central values replaced by level1 data + with central values replaced by level1 data Parameters ---------- - data : validphys.core.DataGroupSpec + data : validphys.core.DataGroupSpec commondata_wc : list list of validphys.coredata.CommonData instances corresponding to all datasets within one experiment. Cuts already applied. - + level0_commondata_wc : list list of validphys.coredata.CommonData instances corresponding to - all datasets within one experiment. The central value is replaced + all datasets within one experiment. The central value is replaced by Level 0 fake data. Cuts already applied. - + filterseed: int random seed used for the generation of Level 1 data - + Returns ------- list list of validphys.coredata.CommonData instances corresponding to - all datasets within one experiment. The central value is replaced + all datasets within one experiment. The central value is replaced by Level 1 fake data. Example ------- - + >>> from validphys.api import API >>> dataset='NMC' >>> l1_cd = API.make_level1_data(dataset_inputs = [{"dataset":dataset}],use_cuts="internal", theoryid=200, @@ -326,34 +333,49 @@ def make_level1_data(data,commondata_wc,level0_commondata_wc,filterseed,experime [CommonData(setname='NMC', ndata=204, commondataproc='DIS_NCE', nkin=3, nsys=16)] """ - #=============== generate experimental covariance matrix ===============# + # =============== generate experimental covariance matrix ===============# from validphys.covmats import dataset_inputs_covmat_from_systematics - dataset_input_list = list(data.dsinputs) - covmat = dataset_inputs_covmat_from_systematics(commondata_wc, dataset_input_list, - use_weights_in_covmat=False, norm_threshold=None, - _list_of_central_values=None, _only_additive=False,) + dataset_input_list = list(data.dsinputs) - #================== generation of pseudo data ======================# - #= generate pseudo data starting from theory predictions - level1_data = make_replica(level0_commondata_wc, filterseed, covmat, - sep_mult=False, genrep=True) + covmat = dataset_inputs_covmat_from_systematics( + commondata_wc, + dataset_input_list, + use_weights_in_covmat=False, + norm_threshold=None, + _list_of_central_values=None, + _only_additive=False, + ) + + # ================== generation of pseudo data ======================# + # = generate pseudo data starting from theory predictions + level1_data = make_replica( + level0_commondata_wc, filterseed, covmat, sep_mult=False, genrep=True + ) indexed_level1_data = indexed_make_replica(experiments_index, level1_data) - - #===== create commondata instances with central values given by pseudo_data =====# - level1_commondata_dict = {c.setname:c for c in level0_commondata_wc} + + # ===== create commondata instances with central values given by pseudo_data =====# + level1_commondata_dict = {c.setname: c for c in level0_commondata_wc} level1_commondata_instances_wc = [] for xx, grp in indexed_level1_data.groupby('dataset'): level1_commondata_instances_wc.append( - level1_commondata_dict[xx].with_central_value(grp.values)) + level1_commondata_dict[xx].with_central_value(grp.values) + ) return level1_commondata_instances_wc - -_group_recreate_pseudodata = collect('indexed_make_replica', ('group_dataset_inputs_by_experiment',)) -_recreate_fit_pseudodata = collect('_group_recreate_pseudodata', ('fitreplicas', 'fitenvironment')) -_recreate_pdf_pseudodata = collect('_group_recreate_pseudodata', ('pdfreplicas', 'fitenvironment')) + + +_group_recreate_pseudodata = collect( + 'indexed_make_replica', ('group_dataset_inputs_by_experiment',) +) +_recreate_fit_pseudodata = collect( + '_group_recreate_pseudodata', ('fitreplicas', 'fitenvironment') +) +_recreate_pdf_pseudodata = collect( + '_group_recreate_pseudodata', ('pdfreplicas', 'fitenvironment') +) fit_tr_masks = collect('replica_training_mask_table', ('fitreplicas', 'fitenvironment')) pdf_tr_masks = collect('replica_training_mask_table', ('pdfreplicas', 'fitenvironment')) @@ -418,4 +440,4 @@ def recreate_pdf_pseudodata(_recreate_pdf_pseudodata, pdfreplicas, pdf_tr_masks) pdf_tr_masks_no_table = collect('replica_training_mask', ('pdfreplicas', 'fitenvironment')) def recreate_pdf_pseudodata_no_table(_recreate_pdf_pseudodata, pdfreplicas, pdf_tr_masks_no_table): - return recreate_pdf_pseudodata(_recreate_pdf_pseudodata, pdfreplicas, pdf_tr_masks_no_table) \ No newline at end of file + return recreate_pdf_pseudodata(_recreate_pdf_pseudodata, pdfreplicas, pdf_tr_masks_no_table) diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index 285be67bd9..21e4c8edac 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -87,13 +87,22 @@ def test_read_matches_recreate(): def test_level0_commondata_wc(): from validphys.loader import Loader from validphys.covmats import dataset_t0_predictions - dataset=SINGLE_DATASET - pdfname=PDF + + dataset = SINGLE_DATASET + pdfname = PDF l = Loader() - datasetspec = l.check_dataset(list(dataset.values())[0],theoryid=THEORYID) + datasetspec = l.check_dataset(list(dataset.values())[0], theoryid=THEORYID) t0set = l.check_pdf(pdfname) - l0_cd = API.level0_commondata_wc(dataset_inputs = [dataset], - use_cuts="internal", theoryid=THEORYID, fakepdf = pdfname) + l0_cd = API.level0_commondata_wc( + dataset_inputs=[dataset], + use_cuts="internal", + theoryid=THEORYID, + fakepdf=pdfname, + ) l0_vals = l0_cd[0].central_values - assert_allclose(dataset_t0_predictions(dataset = datasetspec, t0set = t0set), - l0_vals,rtol=1e-07, atol=0) \ No newline at end of file + assert_allclose( + dataset_t0_predictions(dataset=datasetspec, t0set=t0set), + l0_vals, + rtol=1e-07, + atol=0, + ) From ab60f9d733f808ed5f663dc338e228d1bf48ec66 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 9 Feb 2023 13:01:01 +0000 Subject: [PATCH 37/49] load cd before if conditions --- validphys2/src/validphys/core.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 854b1c0c1a..944056a6d7 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -634,14 +634,11 @@ def load_commondata_instance(self): """ commodata_list = [] for dataset in self.datasets: + cd = dataset.commondata.load_commondata_instance() if dataset.cuts is None: - commodata_list.append(dataset.commondata.load_commondata_instance()) + commodata_list.append(cd) else: - commodata_list.append( - dataset.commondata.load_commondata_instance().with_cuts( - dataset.cuts.load() - ) - ) + commodata_list.append(cd.with_cuts(dataset.cuts.load())) return commodata_list @property From b4177bb74628e53be778e01e887eb72c59ced866 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 9 Feb 2023 13:01:46 +0000 Subject: [PATCH 38/49] import dataset_t0_predictions outside of loop --- validphys2/src/validphys/pseudodata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 088ca2fe51..65e00f6f53 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -263,6 +263,7 @@ def level0_commondata_wc(data, fakepdf): [CommonData(setname='NMC', ndata=204, commondataproc='DIS_NCE', nkin=3, nsys=16)] """ + from validphys.covmats import dataset_t0_predictions level0_commondata_instances_wc = [] # ==== Load validphys.coredata.CommonData instance with cuts ====# @@ -277,7 +278,6 @@ def level0_commondata_wc(data, fakepdf): ) # == Generate a new CommonData instance with central value given by Level 0 data generated with fakepdf ==# - from validphys.covmats import dataset_t0_predictions t0_prediction = dataset_t0_predictions( dataset=dataset, t0set=fakepdf From af4b97d4e079f15fe5602271ffc995e223a79dcb Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 9 Feb 2023 13:08:48 +0000 Subject: [PATCH 39/49] import write commondata functions at the top of module --- validphys2/src/validphys/filters.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 4111571ed9..fde9e860c8 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -12,7 +12,10 @@ from reportengine.checks import make_argcheck, check, check_positive, make_check from reportengine.compat import yaml import validphys.cuts - +from validphys.commondataparser import ( + write_commondata_to_file, + write_systype_to_file, + ) log = logging.getLogger(__name__) KIN_LABEL = { @@ -247,7 +250,6 @@ def _filter_closure_data( total data points and points passing the cuts """ - total_data_points = 0 total_cut_data_points = 0 @@ -266,11 +268,6 @@ def _filter_closure_data( total_data_points += nfull total_cut_data_points += ncut - from validphys.commondataparser import ( - write_commondata_to_file, - write_systype_to_file, - ) - if not fakenoise: # ======= Level 0 closure test =======# log.info("Writing Level0 data") @@ -308,7 +305,7 @@ def _filter_closure_data( ) write_commondata_to_file(commondata=l1_cd, path=path_cd) write_systype_to_file(commondata=l1_cd, path=path_sys) - + return total_data_points, total_cut_data_points From 3e8161e84c95946d4d4dda7d8209eec8908fae11 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 9 Feb 2023 13:09:33 +0000 Subject: [PATCH 40/49] import at top of module --- validphys2/src/validphys/tests/test_pseudodata.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index 21e4c8edac..b6149006ff 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -14,7 +14,8 @@ from validphys.api import API from validphys.tests.conftest import FIT, PSEUDODATA_FIT, THEORYID, SINGLE_DATASET, PDF - +from validphys.loader import Loader +from validphys.covmats import dataset_t0_predictions def test_read_fit_pseudodata(): fit_pseudodata = API.read_fit_pseudodata(fit=PSEUDODATA_FIT) @@ -85,14 +86,16 @@ def test_read_matches_recreate(): def test_level0_commondata_wc(): - from validphys.loader import Loader - from validphys.covmats import dataset_t0_predictions - + """ + check whether level0_commondata_wc and dataset_t0_predictions + coincide + """ dataset = SINGLE_DATASET pdfname = PDF l = Loader() datasetspec = l.check_dataset(list(dataset.values())[0], theoryid=THEORYID) t0set = l.check_pdf(pdfname) + l0_cd = API.level0_commondata_wc( dataset_inputs=[dataset], use_cuts="internal", From bc056bf7190d61732e57f45ddc0d3e39b906ce7c Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 10 Feb 2023 16:03:41 +0000 Subject: [PATCH 41/49] simplification of logic of _filter_closure_data function --- validphys2/src/validphys/filters.py | 74 ++++++++++++----------------- 1 file changed, 30 insertions(+), 44 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index fde9e860c8..9f30da2375 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -9,7 +9,7 @@ import numpy as np -from reportengine.checks import make_argcheck, check, check_positive, make_check +from reportengine.checks import make_argcheck, check, make_check from reportengine.compat import yaml import validphys.cuts from validphys.commondataparser import ( @@ -194,7 +194,6 @@ def _write_ds_cut_data(path, dataset): def _filter_real_data(filter_path, data): """Filter real experimental data.""" - total_data_points = 0 total_cut_data_points = 0 for dataset in data.datasets: @@ -253,59 +252,46 @@ def _filter_closure_data( total_data_points = 0 total_cut_data_points = 0 - from validphys.pseudodata import level0_commondata_wc + # circular import generated @ core.py + from validphys.pseudodata import level0_commondata_wc, make_level1_data - level0_commondata_instances_wc = level0_commondata_wc(data, fakepdf) - commondata_instances_wc = ( - data.load_commondata_instance() - ) # used to generate experimental covariance matrix + closure_data = level0_commondata_wc(data, fakepdf) - for j, dataset in enumerate(data.datasets): - # == print number of points passing cuts, make dataset directory and write FKMASK ==# + for dataset in data.datasets: + #== print number of points passing cuts, make dataset directory and write FKMASK ==# path = filter_path / dataset.name nfull, ncut = _write_ds_cut_data(path, dataset) make_dataset_dir(path / "systypes") total_data_points += nfull total_cut_data_points += ncut - - if not fakenoise: - # ======= Level 0 closure test =======# - log.info("Writing Level0 data") - for l0_cd in level0_commondata_instances_wc: - path_cd = filter_path / l0_cd.setname / f"DATA_{l0_cd.setname}.dat" - path_sys = ( - filter_path - / l0_cd.setname - / "systypes" - / f"SYSTYPE_{l0_cd.setname}_DEFAULT.dat" + + if fakenoise: + #======= Level 1 closure test =======# + + closure_data = make_level1_data( + data, + closure_data, + filterseed, + experiments_index, ) - write_commondata_to_file(commondata=l0_cd, path=path_cd) - write_systype_to_file(commondata=l0_cd, path=path_sys) + #====== write commondata and systype files ======# + if fakenoise: + log.info("Writing Level1 data") else: - # ======= Level 1 closure test =======# - from validphys.pseudodata import make_level1_data - - level1_commondata_instances_wc = make_level1_data( - data, - commondata_instances_wc, - level0_commondata_instances_wc, - filterseed, - experiments_index, + log.info("Writing Level0 data") + + for cd in closure_data: + path_cd = filter_path / cd.setname / f"DATA_{cd.setname}.dat" + path_sys = ( + filter_path + / cd.setname + / "systypes" + / f"SYSTYPE_{cd.setname}_DEFAULT.dat" ) - # ====== write commondata and systype files ======# - log.info("Writing Level1 data") - for l1_cd in level1_commondata_instances_wc: - path_cd = filter_path / l1_cd.setname / f"DATA_{l1_cd.setname}.dat" - path_sys = ( - filter_path - / l1_cd.setname - / "systypes" - / f"SYSTYPE_{l1_cd.setname}_DEFAULT.dat" - ) - write_commondata_to_file(commondata=l1_cd, path=path_cd) - write_systype_to_file(commondata=l1_cd, path=path_sys) - + write_commondata_to_file(commondata=cd, path=path_cd) + write_systype_to_file(commondata=cd, path=path_sys) + return total_data_points, total_cut_data_points From 1f61dfc69d19f22cac21517e87bc64ddde342520 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 10 Feb 2023 16:05:35 +0000 Subject: [PATCH 42/49] eliminated dependence of make_level1_data function on commondata_wc provider as unnecessary --- validphys2/src/validphys/pseudodata.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 65e00f6f53..67d0144a7b 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from validphys.covmats import INTRA_DATASET_SYS_NAME, sqrt_covmat +from validphys.covmats import INTRA_DATASET_SYS_NAME, sqrt_covmat, dataset_inputs_covmat_from_systematics from reportengine import collect @@ -268,14 +268,11 @@ def level0_commondata_wc(data, fakepdf): # ==== Load validphys.coredata.CommonData instance with cuts ====# - for j, dataset in enumerate(data.datasets): - if dataset.cuts is None: - commondata_wc = dataset.commondata.load_commondata_instance() - else: + for dataset in data.datasets: + commondata_wc = dataset.commondata.load_commondata_instance() + if dataset.cuts is not None: cuts = dataset.cuts.load() - commondata_wc = dataset.commondata.load_commondata_instance().with_cuts( - cuts - ) + commondata_wc = commondata_wc.with_cuts(cuts) # == Generate a new CommonData instance with central value given by Level 0 data generated with fakepdf ==# @@ -290,7 +287,7 @@ def level0_commondata_wc(data, fakepdf): def make_level1_data( - data, commondata_wc, level0_commondata_wc, filterseed, experiments_index + data, level0_commondata_wc, filterseed, experiments_index ): """ Given a list of level0 commondata instances, return the same list @@ -302,10 +299,6 @@ def make_level1_data( data : validphys.core.DataGroupSpec - commondata_wc : list - list of validphys.coredata.CommonData instances corresponding to - all datasets within one experiment. Cuts already applied. - level0_commondata_wc : list list of validphys.coredata.CommonData instances corresponding to all datasets within one experiment. The central value is replaced @@ -332,11 +325,11 @@ def make_level1_data( >>> l1_cd [CommonData(setname='NMC', ndata=204, commondataproc='DIS_NCE', nkin=3, nsys=16)] """ - # =============== generate experimental covariance matrix ===============# - from validphys.covmats import dataset_inputs_covmat_from_systematics dataset_input_list = list(data.dsinputs) + + commondata_wc = data.load_commondata_instance() covmat = dataset_inputs_covmat_from_systematics( commondata_wc, From 167cbe22965193cc23e33a77d46a9b821e41a4d1 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 10 Feb 2023 16:06:03 +0000 Subject: [PATCH 43/49] commondata_wc provider deleted as superfluos --- validphys2/src/validphys/results.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 531ed599c2..ad76c043ad 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -224,25 +224,6 @@ def procs_data_values(proc_result_table): data_central_values = proc_result_table["data_central"] return data_central_values -def commondata_wc(data): - """ - commondata with cuts: - given a DataGroupSpec load all the DataSetInput - instances into CommonData instances with cuts - already applied to - - Parameters - ---------- - - data : validphys.core.DataGroupSpec - - Returns - ------- - list containing commondata instances with cuts - - """ - return data.load_commondata_instance() - groups_results = collect( "dataset_inputs_results", ("group_dataset_inputs_by_metadata",) ) From 804ebcabbf175e94b0f64e8ec96f92c5677e3e2a Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Mon, 13 Feb 2023 09:22:16 +0000 Subject: [PATCH 44/49] _filter_closure_data no longer depends on prepare_nnpdf_rng and check_rngalgo --- validphys2/src/validphys/filters.py | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 9f30da2375..cdd3b689cb 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -9,7 +9,7 @@ import numpy as np -from reportengine.checks import make_argcheck, check, make_check +from reportengine.checks import check, make_check from reportengine.compat import yaml import validphys.cuts from validphys.commondataparser import ( @@ -75,12 +75,6 @@ def default_filter_rules_input(): return yaml.safe_load(read_text(validphys.cuts, "filters.yaml")) -@make_argcheck -def check_rngalgo(rngalgo: int): - """Check rngalgo content""" - check(0 <= rngalgo < 17, - "Invalid rngalgo. Must be int between [0, 16].") - def check_nonnegative(var: str): """Ensure that `var` is positive""" @@ -103,25 +97,6 @@ def export_mask(path, mask): """Dump mask to file""" np.savetxt(path, mask, fmt='%d') -@check_rngalgo -@check_nonnegative('filterseed') -@check_nonnegative('seed') -def prepare_nnpdf_rng(filterseed:int, rngalgo:int, seed:int): - """Initialise the internal NNPDF RNG, specified by ``rngalgo`` which must - be an integer between 0 and 16, seeded with ``filterseed``. - The RNG can then be subsequently used to i.e generate pseudodata. - """ - try: - from NNPDF import RandomGenerator - except ImportError as e: - logging.error("Generating closure data needs a valid installation of libNNPDF") - raise e - - log.warning("Importing libNNPDF") - log.info("Initialising RNG") - RandomGenerator.InitRNG(rngalgo, seed) - RandomGenerator.GetRNG().SetSeed(filterseed) - def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed): """Filter closure data. In addition to cutting data points, the data is From 9025c957972044fda5a288812dbc9fa0ad506e7d Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 21 Feb 2023 15:46:31 +0000 Subject: [PATCH 45/49] import write functions from commondatawriter to avoid circular import error --- validphys2/src/validphys/coredata.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 7e60ba5897..335f6d0a49 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -7,7 +7,10 @@ import dataclasses import numpy as np import pandas as pd - +from validphys.commondatawriter import ( + write_systype_to_file, + write_commondata_to_file, + ) KIN_NAMES = ["kin1", "kin2", "kin3"] @@ -372,14 +375,11 @@ def systematic_errors(self, central_values=None): def export(self, path): """Export the data, and error types + Use the same format as libNNPDF: - A DATA_.dat file with the dataframe of accepted points - A systypes/STYPES_.dat file with the error types """ - from validphys.commondataparser import ( - write_systype_to_file, - write_commondata_to_file, - ) dat_path = path / f"DATA_{self.setname}.dat" sys_path = path / "systypes" / f"SYSTYPE_{self.setname}_DEFAULT.dat" From a4e6b399b5e92210b38d9df2788b09c299553505 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 21 Feb 2023 15:47:21 +0000 Subject: [PATCH 46/49] module for writing commondata and systype table to file --- validphys2/src/validphys/commondatawriter.py | 83 ++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 validphys2/src/validphys/commondatawriter.py diff --git a/validphys2/src/validphys/commondatawriter.py b/validphys2/src/validphys/commondatawriter.py new file mode 100644 index 0000000000..07b06b4585 --- /dev/null +++ b/validphys2/src/validphys/commondatawriter.py @@ -0,0 +1,83 @@ +""" +This module contains functions to write commondata and systypes +tables to files +""" + +def write_commondata_data(commondata, buffer): + """ + write commondata table to buffer, this can be a memory map, + compressed archive or strings (using for instance StringIO) + + + Parameters + ---------- + + commondata : validphys.coredata.CommonData + + buffer : memory map, compressed archive or strings + example: StringIO object + + + Example + ------- + >>> from validphys.loader import Loader + >>> from io import StringIO + + >>> l = Loader() + >>> cd = l.check_commondata("NMC").load_commondata_instance() + >>> sio = StringIO() + >>> write_commondata_data(cd,sio) + >>> print(sio.getvalue()) + + """ + header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n" + buffer.write(header) + commondata.commondata_table.to_csv(buffer, sep="\t", header=None) + + +def write_commondata_to_file(commondata, path): + """ + write commondata table to file + """ + with open(path, "w") as file: + write_commondata_data(commondata, file) + + +def write_systype_data(commondata, buffer): + """ + write systype table to buffer, this can be a memory map, + compressed archive or strings (using for instance StringIO) + + + Parameters + ---------- + + commondata : validphys.coredata.CommonData + + buffer : memory map, compressed archive or strings + example: StringIO object + + + Example + ------- + >>> from validphys.loader import Loader + >>> from io import StringIO + + >>> l = Loader() + >>> cd = l.check_commondata("NMC").load_commondata_instance() + >>> sio = StringIO() + >>> write_systype_data(cd,sio) + >>> print(sio.getvalue()) + + """ + header = f"{commondata.nsys}\n" + buffer.write(header) + commondata.systype_table.to_csv(buffer, sep="\t", header=None) + + +def write_systype_to_file(commondata, path): + """ + write systype table to file + """ + with open(path, "w") as file: + write_systype_data(commondata, file) \ No newline at end of file From d5fb7a0f3f8c5298f8faf57eba29ed47b12e6d7a Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 21 Feb 2023 17:42:15 +0000 Subject: [PATCH 47/49] write function in commondatawrite --- validphys2/src/validphys/commondataparser.py | 80 -------------------- 1 file changed, 80 deletions(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 55a470762b..987170fee1 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -123,86 +123,6 @@ def parse_systypes(systypefile): return systypetable -def write_commondata_data(commondata, buffer): - """ - write commondata table to buffer, this can be a memory map, - compressed archive or strings (using for instance StringIO) - - - Parameters - ---------- - - commondata : validphys.coredata.CommonData - - buffer : memory map, compressed archive or strings - example: StringIO object - - - Example - ------- - >>> from validphys.loader import Loader - >>> from io import StringIO - - >>> l = Loader() - >>> cd = l.check_commondata("NMC").load_commondata_instance() - >>> sio = StringIO() - >>> write_commondata_data(cd,sio) - >>> print(sio.getvalue()) - - """ - header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n" - buffer.write(header) - commondata.commondata_table.to_csv(buffer, sep="\t", header=None) - - -def write_commondata_to_file(commondata, path): - """ - write commondata table to file - """ - with open(path, "w") as file: - write_commondata_data(commondata, file) - - -def write_systype_data(commondata, buffer): - """ - write systype table to buffer, this can be a memory map, - compressed archive or strings (using for instance StringIO) - - - Parameters - ---------- - - commondata : validphys.coredata.CommonData - - buffer : memory map, compressed archive or strings - example: StringIO object - - - Example - ------- - >>> from validphys.loader import Loader - >>> from io import StringIO - - >>> l = Loader() - >>> cd = l.check_commondata("NMC").load_commondata_instance() - >>> sio = StringIO() - >>> write_systype_data(cd,sio) - >>> print(sio.getvalue()) - - """ - header = f"{commondata.nsys}\n" - buffer.write(header) - commondata.systype_table.to_csv(buffer, sep="\t", header=None) - - -def write_systype_to_file(commondata, path): - """ - write systype table to file - """ - with open(path, "w") as file: - write_systype_data(commondata, file) - - @dataclasses.dataclass(frozen=True) class CommonDataMetadata: """Contains metadata information about the data being read""" From 5c2aaec940ebb501c755db83a2f789fe71ce2b8b Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 21 Feb 2023 21:47:28 +0000 Subject: [PATCH 48/49] . --- validphys2/src/validphys/filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index cdd3b689cb..2532ca2c00 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -12,7 +12,7 @@ from reportengine.checks import check, make_check from reportengine.compat import yaml import validphys.cuts -from validphys.commondataparser import ( +from validphys.commondatawriter import ( write_commondata_to_file, write_systype_to_file, ) From 0da9735620c1ad297a11b2aa87ed1756ab919cdf Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Wed, 22 Feb 2023 10:39:16 +0100 Subject: [PATCH 49/49] Apply suggestions from code review --- validphys2/src/validphys/commondataparser.py | 2 -- validphys2/src/validphys/coredata.py | 5 +---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 987170fee1..80d55c0390 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -43,8 +43,6 @@ } - - def load_commondata(spec): """ Load the data corresponding to a CommonDataSpec object. diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 335f6d0a49..ce0a580cc4 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -7,10 +7,7 @@ import dataclasses import numpy as np import pandas as pd -from validphys.commondatawriter import ( - write_systype_to_file, - write_commondata_to_file, - ) +from validphys.commondatawriter import write_systype_to_file, write_commondata_to_file KIN_NAMES = ["kin1", "kin2", "kin3"]