From f1108cda8e00cd2585df54d79b08484e0adc3918 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Thu, 14 Jan 2021 13:04:58 +0000 Subject: [PATCH 1/9] add python t0 covmat and some actions for ease of testing, add basic t0 test but more required. --- validphys2/src/validphys/core.py | 8 ++ validphys2/src/validphys/coredata.py | 5 +- validphys2/src/validphys/covmats.py | 104 +++++++++++++++++- .../src/validphys/tests/test_covmats.py | 65 +++++------ 4 files changed, 138 insertions(+), 44 deletions(-) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index f862cd0627..217cb59bb9 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -323,6 +323,14 @@ def load(self)->CommonData: #TODO: Use better path handling in python 3.6 return CommonData.ReadFile(str(self.datafile), str(self.sysfile)) + @functools.lru_cache() + def pyload(self): + """load the validphys.coredata.CommonData object.""" + #TODO: resolve circulate imports and move this. + from validphys.commondataparser import load_commondata + #TODO: replace load with this function? + return load_commondata(self) + @property def plot_kinlabels(self): return get_plot_kinlabels(self) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 08bc762216..2e6ab10db8 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -246,7 +246,7 @@ def additive_errors(self): return add_table.loc[:, add_table.columns != "SKIP"] - def systematic_errors(self): + def systematic_errors(self, central_values=None): """Returns all systematic errors as absolute uncertainties, with a single column for each uncertainty. Converts :py:attr:`multiplicative_errors` to units of data and then appends @@ -254,7 +254,8 @@ def systematic_errors(self): """ # NOTE: in the future can take t0 predictions here. - central_values = self.central_values.to_numpy() + if central_values is None: + central_values = self.central_values.to_numpy() converted_mult_errors = ( self.multiplicative_errors * central_values[:, np.newaxis] / 100 ) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 530ced5815..25e8cb02d4 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -11,7 +11,6 @@ from reportengine.table import table from validphys.calcutils import regularize_covmat, get_df_block -from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.checks import ( check_dataset_cuts_match_theorycovmat, check_norm_threshold, @@ -19,6 +18,8 @@ check_speclabels_different, check_data_cuts_match_theorycovmat, ) +from validphys.convolution import central_predictions +from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.results import ThPredictionsResult log = logging.getLogger(__name__) @@ -26,7 +27,7 @@ INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR") -def covmat_from_systematics(commondata): +def covmat_from_systematics(commondata, central_values=None): """Take the statistical uncertainty and systematics table from a :py:class:`validphys.coredata.CommonData` object and construct the covariance matrix accounting for correlations between @@ -67,6 +68,12 @@ def covmat_from_systematics(commondata): commondata : validphys.coredata.CommonData CommonData which stores information about systematic errors, their treatment and description. + central_values: None, np.array + 1-D array containing alternative central values to combine with the + multiplicative errors to calculate their absolute contributions. By + default this is None, and the experimental central values are used but + can be used to calculate, for example, the t0 covariance matrix by + using the predictions from the central member of the t0 pdf. Returns ------- @@ -98,10 +105,14 @@ def covmat_from_systematics(commondata): 4.14126235e-05, 4.15843357e-05, 1.43824457e-04]]) """ return construct_covmat( - commondata.stat_errors.to_numpy(), commondata.systematic_errors()) + commondata.stat_errors.to_numpy(), + commondata.systematic_errors(central_values) + ) -def datasets_covmat_from_systematics(list_of_commondata): +def datasets_covmat_from_systematics( + list_of_commondata, list_of_central_values=None +): """Given a list containing :py:class:`validphys.coredata.CommonData` s, construct the full covariance matrix. @@ -115,6 +126,11 @@ def datasets_covmat_from_systematics(list_of_commondata): ---------- list_of_commondata : list[validphys.coredata.CommonData] list of CommonData objects. + list_of_central_values: None, list[np.array] + list of 1-D arrays which contain alternative central values which are + combined with the multiplicative errors to calculate their absolute + contribution. By default this is None and the experimental central + values are used. Returns ------- @@ -139,8 +155,12 @@ def datasets_covmat_from_systematics(list_of_commondata): special_corrs = [] block_diags = [] - for cd in list_of_commondata: - errors = cd.systematic_errors() + if list_of_central_values is None: + # want to just pass None to systematic_errors method + list_of_central_values = [None for _ in list_of_commondata] + + for cd, central_values in zip(list_of_commondata, list_of_central_values): + errors = cd.systematic_errors(central_values) # separate out the special uncertainties which can be correlated across # datasets is_intra_dataset_error = errors.columns.isin(INTRA_DATASET_SYS_NAME) @@ -193,6 +213,78 @@ def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame): return np.diag(diagonal) + corr_sys_mat @ corr_sys_mat.T +def experimental_covmat(commondata, cuts): + """Returns the experimental covariance matrix. Details of how + the covmat is constructed can be found in :py:func:`covmat_from_systematics`. + The experimental covariance matrix uses the experimental central values + to calculate the absolute uncertainties from the multiplicative systematics. + + Parameters + ---------- + commondata: validphys.core.CommonDataSpec + + Returns + ------- + covmat: np.array + + """ + #TODO: make a cut commondata action to avoid mistakes + lcd = commondata.pyload() + cut_lcd = lcd.with_cuts(cuts) + return covmat_from_systematics(cut_lcd) + + +def t0_covmat(t0set, dataset, cuts): + """Like :py:func:`experimental_covmat` except uses the ``t0pdf`` predictions + to calculate the absolute constributions to the covmat from multiplicative + uncertainties. The t0 predictions are the predictions generated from the + central pdf, note that in the case that the ``t0pdf`` has replicas + errortype this is not the same as the central value of the replica + predictions in the case of hadronic observables. + + Parameters + ---------- + t0set: validphys.core.PDF + pdf set used to generate t0 predictions + commondata: validphys.core.CommonDataSpec + commondata object for which to generate the covmat. + + Returns + ------- + covmat: np.array + + """ + # flatten values since t0_pred is 2D with shape n_data * 1 + t0_pred = central_predictions(dataset, t0set).to_numpy().squeeze() + lcd = dataset.commondata.pyload() + cut_lcd = lcd.with_cuts(cuts) + return covmat_from_systematics(cut_lcd, t0_pred) + +dataset_inputs_cuts = collect("cuts", ("data_input",)) + +def dataset_inputs_experimental_covmat( + dataset_inputs_commondata, dataset_inputs_cuts): + """Like :py:func:`experimental_covmat` except for all data""" + #TODO: simply collect cut commondata instead of this + cut_ld_cd = [ + cd.pyload().with_cuts(cuts) + for cd, cuts in zip(dataset_inputs_commondata, dataset_inputs_cuts) + ] + return datasets_covmat_from_systematics(cut_ld_cd) + +def dataset_inputs_t0_covmat(t0set, data, dataset_inputs_cuts): + """Like :py:func:`t0_covmat` except for all data""" + t0_preds = [ + central_predictions(dataset, t0set).to_numpy().squeeze() + for dataset in data.datasets + ] + cut_ld_cd = [ + ds.commondata.pyload().with_cuts(cuts) + for ds, cuts in zip(data.datasets, dataset_inputs_cuts) + ] + return datasets_covmat_from_systematics(cut_ld_cd, t0_preds) + + def sqrt_covmat(covariance_matrix): """Function that computes the square root of the covariance matrix. diff --git a/validphys2/src/validphys/tests/test_covmats.py b/validphys2/src/validphys/tests/test_covmats.py index 9cef8eb370..b4f149b764 100644 --- a/validphys2/src/validphys/tests/test_covmats.py +++ b/validphys2/src/validphys/tests/test_covmats.py @@ -11,11 +11,8 @@ from validphys.api import API -from validphys.commondataparser import load_commondata from validphys.covmats import ( sqrt_covmat, - datasets_covmat_from_systematics, - covmat_from_systematics ) from validphys.loader import Loader from validphys.tests.conftest import THEORYID @@ -25,15 +22,8 @@ def test_covmat_from_systematics_correlated(data_with_correlations_config): """Test the covariance matrix generation from a set of correlated datasets given their systematic errors """ - data = API.data(**data_with_correlations_config) - cds = [ds.commondata for ds in data.datasets] - - ld_cds = list(map(load_commondata, cds)) - - covmat = datasets_covmat_from_systematics(ld_cds) - + covmat = API.dataset_inputs_experimental_covmat(**data_with_correlations_config) cpp_covmat = API.groups_covmat(**data_with_correlations_config) - np.testing.assert_allclose(cpp_covmat, covmat) @@ -43,17 +33,16 @@ def test_self_consistent_covmat_from_systematics(data_internal_cuts_config): when the latter is given a list containing a single dataset. """ - data = API.data(**data_internal_cuts_config) - cds = [ds.commondata for ds in data.datasets] - - ld_cds = list(map(load_commondata, cds)) - - internal_cuts = [ds.cuts for ds in data.datasets] - cut_ld_cds = list(map(lambda x: x[0].with_cuts(x[1]), zip(ld_cds, internal_cuts))) - - for cut_ld_cd in cut_ld_cds: - covmat_a = covmat_from_systematics(cut_ld_cd) - covmat_b = datasets_covmat_from_systematics([cut_ld_cd]) + base_config = dict(data_internal_cuts_config) + #TODO: update tests to use new input. + exps = base_config.pop("experiments") + dataset_inputs = [dsinp for exp in exps for dsinp in exp["datasets"]] + + for dsinp in dataset_inputs: + covmat_a = API.experimental_covmat( + **base_config, dataset_input=dsinp) + covmat_b = API.dataset_inputs_experimental_covmat( + **base_config, dataset_inputs=[dsinp]) np.testing.assert_allclose(covmat_a, covmat_b) @@ -62,16 +51,7 @@ def test_covmat_from_systematics(data_internal_cuts_config): collection of datasets matches that of the C++ computation. Note that the datasets are cut using the internal rules, but the datasets are not correlated. """ - data = API.data(**data_internal_cuts_config) - cds = [ds.commondata for ds in data.datasets] - - ld_cds = list(map(load_commondata, cds)) - - internal_cuts = [ds.cuts for ds in data.datasets] - cut_ld_cds = list(map(lambda x: x[0].with_cuts(x[1]), zip(ld_cds, internal_cuts))) - - covmat = datasets_covmat_from_systematics(cut_ld_cds) - + covmat = API.dataset_inputs_experimental_covmat(**data_internal_cuts_config) cpp_covmat = API.groups_covmat(**data_internal_cuts_config) np.testing.assert_allclose(cpp_covmat, covmat) @@ -83,11 +63,10 @@ def test_covmat_with_one_systematic(): """ dsinput = {"dataset": "D0ZRAP", "frac": 1.0, "cfac": ["QCD"]} - cd = API.commondata(dataset_input=dsinput) - l_cd = load_commondata(cd) - covmat = covmat_from_systematics(l_cd) + config = dict(dataset_input=dsinput, theoryid=THEORYID, use_cuts="nocuts") - ds = API.dataset(dataset_input=dsinput, theoryid=THEORYID, use_cuts="nocuts") + covmat = API.experimental_covmat(**config) + ds = API.dataset(**config) cpp_covmat = ds.load().get_covmat() np.testing.assert_allclose(cpp_covmat, covmat) @@ -142,3 +121,17 @@ def test_sqrt_covmat(data_config): covmat = ld_exp.get_covmat() cholesky_cov = sqrt_covmat(covmat) np.testing.assert_allclose(cholesky_cov @ cholesky_cov.T, covmat) + +def test_t0_covmat(data_witht0_config): + #TODO: expand t0 tests + base_config = dict(data_witht0_config) + # just compare cut data + base_config["use_cuts"] = "internal" + covmat = API.dataset_inputs_t0_covmat(**base_config) + cpp_covmat = API.groups_covmat(**base_config) + # use allclose defaults or it fails + np.testing.assert_allclose(cpp_covmat, covmat, rtol=1e-05, atol=1e-08) + with pytest.raises(AssertionError): + np.testing.assert_allclose( + covmat, API.dataset_inputs_experimental_covmat(**base_config) + ) From 57945703e264fbbcae2feab69911379fe9d15ad2 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Fri, 15 Jan 2021 14:36:30 +0000 Subject: [PATCH 2/9] added new module with results providers, keep better tab on loading python commondata and cutting it --- validphys2/src/validphys/app.py | 1 + validphys2/src/validphys/core.py | 8 --- validphys2/src/validphys/coredata.py | 19 +++++- validphys2/src/validphys/covmats.py | 46 ++++----------- validphys2/src/validphys/results_providers.py | 59 +++++++++++++++++++ 5 files changed, 90 insertions(+), 43 deletions(-) create mode 100644 validphys2/src/validphys/results_providers.py diff --git a/validphys2/src/validphys/app.py b/validphys2/src/validphys/app.py index 1cda2cc651..a37cde8271 100644 --- a/validphys2/src/validphys/app.py +++ b/validphys2/src/validphys/app.py @@ -24,6 +24,7 @@ providers = [ 'validphys.results', + 'validphys.results_providers', 'validphys.pdfgrids', 'validphys.pdfplots', 'validphys.dataplots', diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 217cb59bb9..f862cd0627 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -323,14 +323,6 @@ def load(self)->CommonData: #TODO: Use better path handling in python 3.6 return CommonData.ReadFile(str(self.datafile), str(self.sysfile)) - @functools.lru_cache() - def pyload(self): - """load the validphys.coredata.CommonData object.""" - #TODO: resolve circulate imports and move this. - from validphys.commondataparser import load_commondata - #TODO: replace load with this function? - return load_commondata(self) - @property def plot_kinlabels(self): return get_plot_kinlabels(self) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 2e6ab10db8..828dfad3a0 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -250,10 +250,25 @@ def systematic_errors(self, central_values=None): """Returns all systematic errors as absolute uncertainties, with a single column for each uncertainty. Converts :py:attr:`multiplicative_errors` to units of data and then appends - onto :py:attr:`additive_errors` + onto :py:attr:`additive_errors`. By default uses the experimental + central values to perform convertion, but the user can supply a + 1-D array of central values, with length :py:attr:`self.ndata`, to use + instead of the experimental central values to calculate the absolute + contribution of the multiplicative systematics. + + Parameters + ---------- + central_values: None, np.array + 1-D array containing alternative central values combine with + multiplicative uncertainties. This array must have length equal + to :py:attr:`self.ndata` + + Returns + ------- + systematic_errors: pd.DataFrame + Dataframe containing systematic errors. """ - # NOTE: in the future can take t0 predictions here. if central_values is None: central_values = self.central_values.to_numpy() converted_mult_errors = ( diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 25e8cb02d4..73f5ad0ef2 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -18,7 +18,6 @@ check_speclabels_different, check_data_cuts_match_theorycovmat, ) -from validphys.convolution import central_predictions from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.results import ThPredictionsResult @@ -157,7 +156,7 @@ def datasets_covmat_from_systematics( if list_of_central_values is None: # want to just pass None to systematic_errors method - list_of_central_values = [None for _ in list_of_commondata] + list_of_central_values = [None] * len(list_of_commondata) for cd, central_values in zip(list_of_commondata, list_of_central_values): errors = cd.systematic_errors(central_values) @@ -213,7 +212,7 @@ def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame): return np.diag(diagonal) + corr_sys_mat @ corr_sys_mat.T -def experimental_covmat(commondata, cuts): +def experimental_covmat(loaded_commondata_with_cuts): """Returns the experimental covariance matrix. Details of how the covmat is constructed can be found in :py:func:`covmat_from_systematics`. The experimental covariance matrix uses the experimental central values @@ -228,13 +227,10 @@ def experimental_covmat(commondata, cuts): covmat: np.array """ - #TODO: make a cut commondata action to avoid mistakes - lcd = commondata.pyload() - cut_lcd = lcd.with_cuts(cuts) - return covmat_from_systematics(cut_lcd) + return covmat_from_systematics(loaded_commondata_with_cuts) -def t0_covmat(t0set, dataset, cuts): +def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions): """Like :py:func:`experimental_covmat` except uses the ``t0pdf`` predictions to calculate the absolute constributions to the covmat from multiplicative uncertainties. The t0 predictions are the predictions generated from the @@ -254,35 +250,19 @@ def t0_covmat(t0set, dataset, cuts): covmat: np.array """ - # flatten values since t0_pred is 2D with shape n_data * 1 - t0_pred = central_predictions(dataset, t0set).to_numpy().squeeze() - lcd = dataset.commondata.pyload() - cut_lcd = lcd.with_cuts(cuts) - return covmat_from_systematics(cut_lcd, t0_pred) + return covmat_from_systematics( + loaded_commondata_with_cuts, dataset_t0_predictions) -dataset_inputs_cuts = collect("cuts", ("data_input",)) -def dataset_inputs_experimental_covmat( - dataset_inputs_commondata, dataset_inputs_cuts): +def dataset_inputs_experimental_covmat(datasets_loaded_cd_with_cuts): """Like :py:func:`experimental_covmat` except for all data""" - #TODO: simply collect cut commondata instead of this - cut_ld_cd = [ - cd.pyload().with_cuts(cuts) - for cd, cuts in zip(dataset_inputs_commondata, dataset_inputs_cuts) - ] - return datasets_covmat_from_systematics(cut_ld_cd) - -def dataset_inputs_t0_covmat(t0set, data, dataset_inputs_cuts): + return datasets_covmat_from_systematics(datasets_loaded_cd_with_cuts) + +def dataset_inputs_t0_covmat( + datasets_loaded_cd_with_cuts, datasets_t0_predictions): """Like :py:func:`t0_covmat` except for all data""" - t0_preds = [ - central_predictions(dataset, t0set).to_numpy().squeeze() - for dataset in data.datasets - ] - cut_ld_cd = [ - ds.commondata.pyload().with_cuts(cuts) - for ds, cuts in zip(data.datasets, dataset_inputs_cuts) - ] - return datasets_covmat_from_systematics(cut_ld_cd, t0_preds) + return datasets_covmat_from_systematics( + datasets_loaded_cd_with_cuts, datasets_t0_predictions) def sqrt_covmat(covariance_matrix): diff --git a/validphys2/src/validphys/results_providers.py b/validphys2/src/validphys/results_providers.py new file mode 100644 index 0000000000..1fe2844fea --- /dev/null +++ b/validphys2/src/validphys/results_providers.py @@ -0,0 +1,59 @@ +""" +results_providers.py + +module which bridges between underlying functions concerned with loading +theory predictions and data and exposing them as actions which can be accessed +by other actions/providers. + +""" +from reportengine import collect + +from validphys.commondataparser import load_commondata +from validphys.convolution import central_predictions + +def loaded_commondata_with_cuts(commondata, cuts): + """Load the commondata and apply cuts. + + Parameters + ---------- + commondata: validphys.core.CommonDataSpec + commondata to load and cut. + cuts: validphys.core.cuts, None + valid cuts, used to cut loaded commondata. + + Returns + ------- + loaded_cut_commondata: validphys.coredata.CommonData + + """ + lcd = load_commondata(commondata) + return lcd.with_cuts(cuts) + +datasets_loaded_cd_with_cuts = collect( + "loaded_commondata_with_cuts", ("data_input",)) + + +def dataset_t0_predictions(dataset, t0set): + """Returns the t0 predictions for a ``dataset`` which are the predictions + calculated using the central member of ``pdf``. Note that if ``pdf`` has + errortype MC, and the dataset is a hadronic observable then the predictions + of the central member are subtley different to the central value of the + replica predictions. + + Parameters + ---------- + dataset: validphys.core.DataSetSpec + dataset for which to calculate t0 predictions + t0set: validphys.core.PDF + pdf used to calculate the predictions + + Returns + ------- + t0_predictions: np.array + 1-D numpy array with predictions for each of the cut datapoints. + + """ + # Squeeze values since t0_pred is DataFrame with shape n_data * 1 + return central_predictions(dataset, t0set).to_numpy().squeeze() + +datasets_t0_predictions = collect("dataset_t0_predictions", ("data",)) From e2df31042fcb0ffcde041978137815c1e001434c Mon Sep 17 00:00:00 2001 From: wilsonm Date: Fri, 15 Jan 2021 14:55:31 +0000 Subject: [PATCH 3/9] update tests to data keyword, where appropriate --- validphys2/src/validphys/tests/test_covmats.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/validphys2/src/validphys/tests/test_covmats.py b/validphys2/src/validphys/tests/test_covmats.py index b4f149b764..a8315851f9 100644 --- a/validphys2/src/validphys/tests/test_covmats.py +++ b/validphys2/src/validphys/tests/test_covmats.py @@ -34,9 +34,7 @@ def test_self_consistent_covmat_from_systematics(data_internal_cuts_config): """ base_config = dict(data_internal_cuts_config) - #TODO: update tests to use new input. - exps = base_config.pop("experiments") - dataset_inputs = [dsinp for exp in exps for dsinp in exp["datasets"]] + dataset_inputs = base_config.pop("dataset_inputs") for dsinp in dataset_inputs: covmat_a = API.experimental_covmat( From 2108cb9723cf20e1128de7bc0ff00ae126f1afff Mon Sep 17 00:00:00 2001 From: wilsonm Date: Fri, 15 Jan 2021 15:00:38 +0000 Subject: [PATCH 4/9] expand python t0 tests --- validphys2/src/validphys/tests/test_covmats.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/tests/test_covmats.py b/validphys2/src/validphys/tests/test_covmats.py index a8315851f9..df50747abd 100644 --- a/validphys2/src/validphys/tests/test_covmats.py +++ b/validphys2/src/validphys/tests/test_covmats.py @@ -121,7 +121,7 @@ def test_sqrt_covmat(data_config): np.testing.assert_allclose(cholesky_cov @ cholesky_cov.T, covmat) def test_t0_covmat(data_witht0_config): - #TODO: expand t0 tests + """Test that t0 covmat matches between cpp and python implementation.""" base_config = dict(data_witht0_config) # just compare cut data base_config["use_cuts"] = "internal" @@ -133,3 +133,19 @@ def test_t0_covmat(data_witht0_config): np.testing.assert_allclose( covmat, API.dataset_inputs_experimental_covmat(**base_config) ) + +def test_t0_correlated_covmat(data_with_correlations_internal_cuts_config): + """Test that the correlated t0 covmat matches between cpp and python""" + base_config = dict(data_with_correlations_internal_cuts_config) + # use t0 + base_config["t0pdfset"] = base_config["pdf"] + base_config["use_t0"] = True + + covmat = API.dataset_inputs_t0_covmat(**base_config) + cpp_covmat = API.groups_covmat(**base_config) + # use allclose defaults or it fails + np.testing.assert_allclose(cpp_covmat, covmat, rtol=1e-05, atol=1e-08) + with pytest.raises(AssertionError): + np.testing.assert_allclose( + covmat, API.dataset_inputs_experimental_covmat(**base_config) + ) From fcc47eda813ec68968968c8e0023c152b0f4ca63 Mon Sep 17 00:00:00 2001 From: wilsonmr <33907451+wilsonmr@users.noreply.github.com> Date: Fri, 15 Jan 2021 17:01:15 +0000 Subject: [PATCH 5/9] Apply suggestions from code review Co-authored-by: Rosalyn Pearson <33020850+RosalynLP@users.noreply.github.com> --- validphys2/src/validphys/coredata.py | 4 ++-- validphys2/src/validphys/covmats.py | 4 ++-- validphys2/src/validphys/results_providers.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 828dfad3a0..82a26e6bdf 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -251,7 +251,7 @@ def systematic_errors(self, central_values=None): single column for each uncertainty. Converts :py:attr:`multiplicative_errors` to units of data and then appends onto :py:attr:`additive_errors`. By default uses the experimental - central values to perform convertion, but the user can supply a + central values to perform conversion, but the user can supply a 1-D array of central values, with length :py:attr:`self.ndata`, to use instead of the experimental central values to calculate the absolute contribution of the multiplicative systematics. @@ -259,7 +259,7 @@ def systematic_errors(self, central_values=None): Parameters ---------- central_values: None, np.array - 1-D array containing alternative central values combine with + 1-D array containing alternative central values to combine with multiplicative uncertainties. This array must have length equal to :py:attr:`self.ndata` diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 73f5ad0ef2..34f5914678 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -70,7 +70,7 @@ def covmat_from_systematics(commondata, central_values=None): central_values: None, np.array 1-D array containing alternative central values to combine with the multiplicative errors to calculate their absolute contributions. By - default this is None, and the experimental central values are used but + default this is None, and the experimental central values are used. However, this can be used to calculate, for example, the t0 covariance matrix by using the predictions from the central member of the t0 pdf. @@ -234,7 +234,7 @@ def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions): """Like :py:func:`experimental_covmat` except uses the ``t0pdf`` predictions to calculate the absolute constributions to the covmat from multiplicative uncertainties. The t0 predictions are the predictions generated from the - central pdf, note that in the case that the ``t0pdf`` has replicas + central pdf. Note that in the case that the ``t0pdf`` has replicas errortype this is not the same as the central value of the replica predictions in the case of hadronic observables. diff --git a/validphys2/src/validphys/results_providers.py b/validphys2/src/validphys/results_providers.py index 1fe2844fea..cd6598b232 100644 --- a/validphys2/src/validphys/results_providers.py +++ b/validphys2/src/validphys/results_providers.py @@ -2,7 +2,7 @@ results_providers.py module which bridges between underlying functions concerned with loading -theory predictions and data and exposing them as actions which can be accessed +theory predictions and data and actions which can be accessed by other actions/providers. """ @@ -37,7 +37,7 @@ def dataset_t0_predictions(dataset, t0set): """Returns the t0 predictions for a ``dataset`` which are the predictions calculated using the central member of ``pdf``. Note that if ``pdf`` has errortype MC, and the dataset is a hadronic observable then the predictions - of the central member are subtley different to the central value of the + of the central member are subtly different to the central value of the replica predictions. Parameters From a0227dc8ac1a9f65b251168e8d6350257d80218c Mon Sep 17 00:00:00 2001 From: wilsonm Date: Fri, 15 Jan 2021 17:12:01 +0000 Subject: [PATCH 6/9] fix up docstrings, removing repetitions and correcting paramters --- validphys2/src/validphys/covmats.py | 47 ++++++++++++++----- validphys2/src/validphys/results_providers.py | 6 +-- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 34f5914678..5ffa5346c2 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -220,7 +220,7 @@ def experimental_covmat(loaded_commondata_with_cuts): Parameters ---------- - commondata: validphys.core.CommonDataSpec + loaded_commondata_with_cuts: validphys.coredata.CommonData Returns ------- @@ -231,23 +231,22 @@ def experimental_covmat(loaded_commondata_with_cuts): def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions): - """Like :py:func:`experimental_covmat` except uses the ``t0pdf`` predictions + """Like :py:func:`experimental_covmat` except uses the t0 predictions to calculate the absolute constributions to the covmat from multiplicative - uncertainties. The t0 predictions are the predictions generated from the - central pdf. Note that in the case that the ``t0pdf`` has replicas - errortype this is not the same as the central value of the replica - predictions in the case of hadronic observables. + uncertainties. For more info on the t0 predictions see + :py:func:`validphys.results_providers.dataset_t0_predictions`. Parameters ---------- - t0set: validphys.core.PDF - pdf set used to generate t0 predictions - commondata: validphys.core.CommonDataSpec + loaded_commondata_with_cuts: validphys.coredata.CommonData commondata object for which to generate the covmat. + dataset_t0_predictions: np.array + 1-D array with t0 predictions. Returns ------- - covmat: np.array + t0_covmat: np.array + t0 covariance matrix """ return covmat_from_systematics( @@ -255,12 +254,36 @@ def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions): def dataset_inputs_experimental_covmat(datasets_loaded_cd_with_cuts): - """Like :py:func:`experimental_covmat` except for all data""" + """Like :py:func:`experimental_covmat` except for all data + + Parameters + ---------- + datasets_loaded_cd_with_cuts: list[validphys.coredata.CommonData] + The CommonData for all datasets defined in ``dataset_inputs``. + + Returns + ------- + covmat: np.array + Covariance matrix for list of datasets. + """ return datasets_covmat_from_systematics(datasets_loaded_cd_with_cuts) def dataset_inputs_t0_covmat( datasets_loaded_cd_with_cuts, datasets_t0_predictions): - """Like :py:func:`t0_covmat` except for all data""" + """Like :py:func:`t0_covmat` except for all data + + Parameters + ---------- + datasets_loaded_cd_with_cuts: list[validphys.coredata.CommonData] + The CommonData for all datasets defined in ``dataset_inputs``. + datasets_t0_predictions: list[np.array] + The t0 predictions for all datasets. + + Returns + ------- + t0_covmat: np.array + t0 covariance matrix matrix for list of datasets. + """ return datasets_covmat_from_systematics( datasets_loaded_cd_with_cuts, datasets_t0_predictions) diff --git a/validphys2/src/validphys/results_providers.py b/validphys2/src/validphys/results_providers.py index cd6598b232..529eb81be8 100644 --- a/validphys2/src/validphys/results_providers.py +++ b/validphys2/src/validphys/results_providers.py @@ -36,9 +36,9 @@ def loaded_commondata_with_cuts(commondata, cuts): def dataset_t0_predictions(dataset, t0set): """Returns the t0 predictions for a ``dataset`` which are the predictions calculated using the central member of ``pdf``. Note that if ``pdf`` has - errortype MC, and the dataset is a hadronic observable then the predictions - of the central member are subtly different to the central value of the - replica predictions. + errortype ``replicas``, and the dataset is a hadronic observable then the + predictions of the central member are subtly different to the central + value of the replica predictions. Parameters ---------- From dfea82ce01b5313cac5961eb91660be40338f7c0 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Fri, 15 Jan 2021 18:10:32 +0000 Subject: [PATCH 7/9] use params in pytest fixtures to loop over same test with different settings --- validphys2/src/validphys/tests/conftest.py | 21 ----- .../src/validphys/tests/test_covmats.py | 78 ++++++++++--------- 2 files changed, 40 insertions(+), 59 deletions(-) diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py index c0f5a85338..7bc4876f54 100644 --- a/validphys2/src/validphys/tests/conftest.py +++ b/validphys2/src/validphys/tests/conftest.py @@ -28,15 +28,6 @@ def tmp(tmpdir): {'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10} ] -# Experiments which have non trivial correlations between their datasets -CORR_DATA = [ - {'dataset': 'ATLASWZRAP36PB', 'cfac': ['QCD']}, - {'dataset': 'ATLASZHIGHMASS49FB', 'cfac': ['QCD']}, - {'dataset': 'ATLASLOMASSDY11EXT', 'cfac': ['QCD']}, - {'dataset': 'ATLASWZRAP11', 'frac': 0.5, 'cfac': ['QCD']}, - {'dataset': 'CMSZDIFF12', 'cfac': ('QCD', 'NRM'), 'sys': 10}, - {'dataset': 'CMSJETS11', 'frac': 0.5, 'sys': 10}, -] SINGLE_EXP = [ { @@ -90,18 +81,6 @@ def data_singleexp_witht0_config(data_witht0_config): config_dict.update({'experiments': SINGLE_EXP}) return config_dict -@pytest.fixture(scope='module') -def data_with_correlations_config(): - corr_dict = dict(base_config) - corr_dict.update(dataset_inputs=CORR_DATA) - return corr_dict - -@pytest.fixture(scope='module') -def data_with_correlations_internal_cuts_config(data_with_correlations_config): - config_dict = dict(data_with_correlations_config) - config_dict.update(use_cuts='internal') - return config_dict - @pytest.fixture(scope='module') def weighted_data_witht0_config(data_witht0_config): config_dict = dict(data_witht0_config) diff --git a/validphys2/src/validphys/tests/test_covmats.py b/validphys2/src/validphys/tests/test_covmats.py index df50747abd..e548855e66 100644 --- a/validphys2/src/validphys/tests/test_covmats.py +++ b/validphys2/src/validphys/tests/test_covmats.py @@ -11,20 +11,20 @@ from validphys.api import API -from validphys.covmats import ( - sqrt_covmat, -) +from validphys.covmats import sqrt_covmat from validphys.loader import Loader -from validphys.tests.conftest import THEORYID +from validphys.tests.conftest import THEORYID, PDF, HESSIAN_PDF, DATA -def test_covmat_from_systematics_correlated(data_with_correlations_config): - """Test the covariance matrix generation from a set of correlated datasets - given their systematic errors - """ - covmat = API.dataset_inputs_experimental_covmat(**data_with_correlations_config) - cpp_covmat = API.groups_covmat(**data_with_correlations_config) - np.testing.assert_allclose(cpp_covmat, covmat) +# Experiments which have non trivial correlations between their datasets +CORR_DATA = [ + {'dataset': 'ATLASWZRAP36PB', 'cfac': ['QCD']}, + {'dataset': 'ATLASZHIGHMASS49FB', 'cfac': ['QCD']}, + {'dataset': 'ATLASLOMASSDY11EXT', 'cfac': ['QCD']}, + {'dataset': 'ATLASWZRAP11', 'frac': 0.5, 'cfac': ['QCD']}, + {'dataset': 'CMSZDIFF12', 'cfac': ('QCD', 'NRM'), 'sys': 10}, + {'dataset': 'CMSJETS11', 'frac': 0.5, 'sys': 10}, +] def test_self_consistent_covmat_from_systematics(data_internal_cuts_config): @@ -44,13 +44,21 @@ def test_self_consistent_covmat_from_systematics(data_internal_cuts_config): np.testing.assert_allclose(covmat_a, covmat_b) -def test_covmat_from_systematics(data_internal_cuts_config): +@pytest.mark.parametrize("use_cuts", ["nocuts", "internal"]) +@pytest.mark.parametrize("dataset_inputs", [DATA, CORR_DATA]) +def test_covmat_from_systematics(data_config, use_cuts, dataset_inputs): """Test which checks the python computation of the covmat relating to a - collection of datasets matches that of the C++ computation. Note that the - datasets are cut using the internal rules, but the datasets are not correlated. + collection of datasets matches that of the C++ computation. + + Tests all combinations of internal/no cuts and correlated/uncorrelated data. + """ - covmat = API.dataset_inputs_experimental_covmat(**data_internal_cuts_config) - cpp_covmat = API.groups_covmat(**data_internal_cuts_config) + config = dict(data_config) + config["use_cuts"] = use_cuts + config["dataset_inputs"] = dataset_inputs + + covmat = API.dataset_inputs_experimental_covmat(**config) + cpp_covmat = API.groups_covmat(**config) np.testing.assert_allclose(cpp_covmat, covmat) @@ -120,32 +128,26 @@ def test_sqrt_covmat(data_config): cholesky_cov = sqrt_covmat(covmat) np.testing.assert_allclose(cholesky_cov @ cholesky_cov.T, covmat) -def test_t0_covmat(data_witht0_config): - """Test that t0 covmat matches between cpp and python implementation.""" - base_config = dict(data_witht0_config) - # just compare cut data - base_config["use_cuts"] = "internal" - covmat = API.dataset_inputs_t0_covmat(**base_config) - cpp_covmat = API.groups_covmat(**base_config) - # use allclose defaults or it fails - np.testing.assert_allclose(cpp_covmat, covmat, rtol=1e-05, atol=1e-08) - with pytest.raises(AssertionError): - np.testing.assert_allclose( - covmat, API.dataset_inputs_experimental_covmat(**base_config) - ) +@pytest.mark.parametrize("t0pdfset", [PDF, HESSIAN_PDF]) +@pytest.mark.parametrize("dataset_inputs", [DATA, CORR_DATA]) +def test_python_t0_covmat_matches_cpp( + data_internal_cuts_config, t0pdfset, dataset_inputs): + """Test which checks the python computation of the t0 covmat relating to a + collection of datasets matches that of the C++ computation. -def test_t0_correlated_covmat(data_with_correlations_internal_cuts_config): - """Test that the correlated t0 covmat matches between cpp and python""" - base_config = dict(data_with_correlations_internal_cuts_config) - # use t0 - base_config["t0pdfset"] = base_config["pdf"] - base_config["use_t0"] = True + Tests all combinations of hessian/MC t0pdfset and correlated/uncorrelated + data. - covmat = API.dataset_inputs_t0_covmat(**base_config) - cpp_covmat = API.groups_covmat(**base_config) + """ + config = dict(data_internal_cuts_config) + config["dataset_inputs"] = dataset_inputs + config["t0pdfset"] = t0pdfset + config["use_t0"] = True + covmat = API.dataset_inputs_t0_covmat(**config) + cpp_covmat = API.groups_covmat(**config) # use allclose defaults or it fails np.testing.assert_allclose(cpp_covmat, covmat, rtol=1e-05, atol=1e-08) with pytest.raises(AssertionError): np.testing.assert_allclose( - covmat, API.dataset_inputs_experimental_covmat(**base_config) + covmat, API.dataset_inputs_experimental_covmat(**config) ) From 649d9c0d30e03eaac17e09be9c8985b126059d18 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Mon, 25 Jan 2021 16:25:16 +0000 Subject: [PATCH 8/9] update collects to follow convention, add to docstring of systematic errors --- validphys2/src/validphys/coredata.py | 3 ++- validphys2/src/validphys/covmats.py | 14 +++++++------- validphys2/src/validphys/results_providers.py | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 82a26e6bdf..f600601402 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -261,7 +261,8 @@ def systematic_errors(self, central_values=None): central_values: None, np.array 1-D array containing alternative central values to combine with multiplicative uncertainties. This array must have length equal - to :py:attr:`self.ndata` + to :py:attr:`self.ndata`. By default ``central_values`` is None, and + the central values of the commondata are used. Returns ------- diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 5ffa5346c2..d99093750d 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -253,12 +253,12 @@ def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions): loaded_commondata_with_cuts, dataset_t0_predictions) -def dataset_inputs_experimental_covmat(datasets_loaded_cd_with_cuts): +def dataset_inputs_experimental_covmat(dataset_inputs_loaded_cd_with_cuts): """Like :py:func:`experimental_covmat` except for all data Parameters ---------- - datasets_loaded_cd_with_cuts: list[validphys.coredata.CommonData] + dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData] The CommonData for all datasets defined in ``dataset_inputs``. Returns @@ -266,17 +266,17 @@ def dataset_inputs_experimental_covmat(datasets_loaded_cd_with_cuts): covmat: np.array Covariance matrix for list of datasets. """ - return datasets_covmat_from_systematics(datasets_loaded_cd_with_cuts) + return datasets_covmat_from_systematics(dataset_inputs_loaded_cd_with_cuts) def dataset_inputs_t0_covmat( - datasets_loaded_cd_with_cuts, datasets_t0_predictions): + dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions): """Like :py:func:`t0_covmat` except for all data Parameters ---------- - datasets_loaded_cd_with_cuts: list[validphys.coredata.CommonData] + dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData] The CommonData for all datasets defined in ``dataset_inputs``. - datasets_t0_predictions: list[np.array] + dataset_inputs_t0_predictions: list[np.array] The t0 predictions for all datasets. Returns @@ -285,7 +285,7 @@ def dataset_inputs_t0_covmat( t0 covariance matrix matrix for list of datasets. """ return datasets_covmat_from_systematics( - datasets_loaded_cd_with_cuts, datasets_t0_predictions) + dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions) def sqrt_covmat(covariance_matrix): diff --git a/validphys2/src/validphys/results_providers.py b/validphys2/src/validphys/results_providers.py index 529eb81be8..4e7332514d 100644 --- a/validphys2/src/validphys/results_providers.py +++ b/validphys2/src/validphys/results_providers.py @@ -29,7 +29,7 @@ def loaded_commondata_with_cuts(commondata, cuts): lcd = load_commondata(commondata) return lcd.with_cuts(cuts) -datasets_loaded_cd_with_cuts = collect( +dataset_inputs_loaded_cd_with_cuts = collect( "loaded_commondata_with_cuts", ("data_input",)) @@ -56,4 +56,4 @@ def dataset_t0_predictions(dataset, t0set): # Squeeze values since t0_pred is DataFrame with shape n_data * 1 return central_predictions(dataset, t0set).to_numpy().squeeze() -datasets_t0_predictions = collect("dataset_t0_predictions", ("data",)) +dataset_inputs_t0_predictions = collect("dataset_t0_predictions", ("data",)) From 85c4fe471763b666aee1888a3ecd7889f5caf415 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Mon, 25 Jan 2021 16:34:14 +0000 Subject: [PATCH 9/9] update test to ensure dataset which is assumed to have single systematic does, small fix of docstring --- validphys2/src/validphys/covmats.py | 4 ++-- validphys2/src/validphys/tests/test_covmats.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index d99093750d..74989025a0 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -62,12 +62,12 @@ def covmat_from_systematics(commondata, central_values=None): `paper `_ outlining the procedure, specifically equation 2 and surrounding text. - Paramaters + Parameters ---------- commondata : validphys.coredata.CommonData CommonData which stores information about systematic errors, their treatment and description. - central_values: None, np.array + central_values : None, np.array 1-D array containing alternative central values to combine with the multiplicative errors to calculate their absolute contributions. By default this is None, and the experimental central values are used. However, this diff --git a/validphys2/src/validphys/tests/test_covmats.py b/validphys2/src/validphys/tests/test_covmats.py index e548855e66..a481475147 100644 --- a/validphys2/src/validphys/tests/test_covmats.py +++ b/validphys2/src/validphys/tests/test_covmats.py @@ -73,6 +73,8 @@ def test_covmat_with_one_systematic(): covmat = API.experimental_covmat(**config) ds = API.dataset(**config) + # double check that the dataset does indeed only have 1 systematic. + assert ds.commondata.nsys == 1 cpp_covmat = ds.load().get_covmat() np.testing.assert_allclose(cpp_covmat, covmat)