diff --git a/validphys2/src/validphys/app.py b/validphys2/src/validphys/app.py index 1cda2cc651..a37cde8271 100644 --- a/validphys2/src/validphys/app.py +++ b/validphys2/src/validphys/app.py @@ -24,6 +24,7 @@ providers = [ 'validphys.results', + 'validphys.results_providers', 'validphys.pdfgrids', 'validphys.pdfplots', 'validphys.dataplots', diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 08bc762216..f600601402 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -246,15 +246,32 @@ def additive_errors(self): return add_table.loc[:, add_table.columns != "SKIP"] - def systematic_errors(self): + def systematic_errors(self, central_values=None): """Returns all systematic errors as absolute uncertainties, with a single column for each uncertainty. Converts :py:attr:`multiplicative_errors` to units of data and then appends - onto :py:attr:`additive_errors` + onto :py:attr:`additive_errors`. By default uses the experimental + central values to perform conversion, but the user can supply a + 1-D array of central values, with length :py:attr:`self.ndata`, to use + instead of the experimental central values to calculate the absolute + contribution of the multiplicative systematics. + + Parameters + ---------- + central_values: None, np.array + 1-D array containing alternative central values to combine with + multiplicative uncertainties. This array must have length equal + to :py:attr:`self.ndata`. By default ``central_values`` is None, and + the central values of the commondata are used. + + Returns + ------- + systematic_errors: pd.DataFrame + Dataframe containing systematic errors. """ - # NOTE: in the future can take t0 predictions here. - central_values = self.central_values.to_numpy() + if central_values is None: + central_values = self.central_values.to_numpy() converted_mult_errors = ( self.multiplicative_errors * central_values[:, np.newaxis] / 100 ) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 530ced5815..74989025a0 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -11,7 +11,6 @@ from reportengine.table import table from validphys.calcutils import regularize_covmat, get_df_block -from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.checks import ( check_dataset_cuts_match_theorycovmat, check_norm_threshold, @@ -19,6 +18,7 @@ check_speclabels_different, check_data_cuts_match_theorycovmat, ) +from validphys.core import PDF, DataGroupSpec, DataSetSpec from validphys.results import ThPredictionsResult log = logging.getLogger(__name__) @@ -26,7 +26,7 @@ INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR") -def covmat_from_systematics(commondata): +def covmat_from_systematics(commondata, central_values=None): """Take the statistical uncertainty and systematics table from a :py:class:`validphys.coredata.CommonData` object and construct the covariance matrix accounting for correlations between @@ -62,11 +62,17 @@ def covmat_from_systematics(commondata): `paper `_ outlining the procedure, specifically equation 2 and surrounding text. - Paramaters + Parameters ---------- commondata : validphys.coredata.CommonData CommonData which stores information about systematic errors, their treatment and description. + central_values : None, np.array + 1-D array containing alternative central values to combine with the + multiplicative errors to calculate their absolute contributions. By + default this is None, and the experimental central values are used. However, this + can be used to calculate, for example, the t0 covariance matrix by + using the predictions from the central member of the t0 pdf. Returns ------- @@ -98,10 +104,14 @@ def covmat_from_systematics(commondata): 4.14126235e-05, 4.15843357e-05, 1.43824457e-04]]) """ return construct_covmat( - commondata.stat_errors.to_numpy(), commondata.systematic_errors()) + commondata.stat_errors.to_numpy(), + commondata.systematic_errors(central_values) + ) -def datasets_covmat_from_systematics(list_of_commondata): +def datasets_covmat_from_systematics( + list_of_commondata, list_of_central_values=None +): """Given a list containing :py:class:`validphys.coredata.CommonData` s, construct the full covariance matrix. @@ -115,6 +125,11 @@ def datasets_covmat_from_systematics(list_of_commondata): ---------- list_of_commondata : list[validphys.coredata.CommonData] list of CommonData objects. + list_of_central_values: None, list[np.array] + list of 1-D arrays which contain alternative central values which are + combined with the multiplicative errors to calculate their absolute + contribution. By default this is None and the experimental central + values are used. Returns ------- @@ -139,8 +154,12 @@ def datasets_covmat_from_systematics(list_of_commondata): special_corrs = [] block_diags = [] - for cd in list_of_commondata: - errors = cd.systematic_errors() + if list_of_central_values is None: + # want to just pass None to systematic_errors method + list_of_central_values = [None] * len(list_of_commondata) + + for cd, central_values in zip(list_of_commondata, list_of_central_values): + errors = cd.systematic_errors(central_values) # separate out the special uncertainties which can be correlated across # datasets is_intra_dataset_error = errors.columns.isin(INTRA_DATASET_SYS_NAME) @@ -193,6 +212,82 @@ def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame): return np.diag(diagonal) + corr_sys_mat @ corr_sys_mat.T +def experimental_covmat(loaded_commondata_with_cuts): + """Returns the experimental covariance matrix. Details of how + the covmat is constructed can be found in :py:func:`covmat_from_systematics`. + The experimental covariance matrix uses the experimental central values + to calculate the absolute uncertainties from the multiplicative systematics. + + Parameters + ---------- + loaded_commondata_with_cuts: validphys.coredata.CommonData + + Returns + ------- + covmat: np.array + + """ + return covmat_from_systematics(loaded_commondata_with_cuts) + + +def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions): + """Like :py:func:`experimental_covmat` except uses the t0 predictions + to calculate the absolute constributions to the covmat from multiplicative + uncertainties. For more info on the t0 predictions see + :py:func:`validphys.results_providers.dataset_t0_predictions`. + + Parameters + ---------- + loaded_commondata_with_cuts: validphys.coredata.CommonData + commondata object for which to generate the covmat. + dataset_t0_predictions: np.array + 1-D array with t0 predictions. + + Returns + ------- + t0_covmat: np.array + t0 covariance matrix + + """ + return covmat_from_systematics( + loaded_commondata_with_cuts, dataset_t0_predictions) + + +def dataset_inputs_experimental_covmat(dataset_inputs_loaded_cd_with_cuts): + """Like :py:func:`experimental_covmat` except for all data + + Parameters + ---------- + dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData] + The CommonData for all datasets defined in ``dataset_inputs``. + + Returns + ------- + covmat: np.array + Covariance matrix for list of datasets. + """ + return datasets_covmat_from_systematics(dataset_inputs_loaded_cd_with_cuts) + +def dataset_inputs_t0_covmat( + dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions): + """Like :py:func:`t0_covmat` except for all data + + Parameters + ---------- + dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData] + The CommonData for all datasets defined in ``dataset_inputs``. + dataset_inputs_t0_predictions: list[np.array] + The t0 predictions for all datasets. + + Returns + ------- + t0_covmat: np.array + t0 covariance matrix matrix for list of datasets. + """ + return datasets_covmat_from_systematics( + dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions) + + def sqrt_covmat(covariance_matrix): """Function that computes the square root of the covariance matrix. diff --git a/validphys2/src/validphys/results_providers.py b/validphys2/src/validphys/results_providers.py new file mode 100644 index 0000000000..4e7332514d --- /dev/null +++ b/validphys2/src/validphys/results_providers.py @@ -0,0 +1,59 @@ +""" +results_providers.py + +module which bridges between underlying functions concerned with loading +theory predictions and data and actions which can be accessed +by other actions/providers. + +""" +from reportengine import collect + +from validphys.commondataparser import load_commondata +from validphys.convolution import central_predictions + +def loaded_commondata_with_cuts(commondata, cuts): + """Load the commondata and apply cuts. + + Parameters + ---------- + commondata: validphys.core.CommonDataSpec + commondata to load and cut. + cuts: validphys.core.cuts, None + valid cuts, used to cut loaded commondata. + + Returns + ------- + loaded_cut_commondata: validphys.coredata.CommonData + + """ + lcd = load_commondata(commondata) + return lcd.with_cuts(cuts) + +dataset_inputs_loaded_cd_with_cuts = collect( + "loaded_commondata_with_cuts", ("data_input",)) + + +def dataset_t0_predictions(dataset, t0set): + """Returns the t0 predictions for a ``dataset`` which are the predictions + calculated using the central member of ``pdf``. Note that if ``pdf`` has + errortype ``replicas``, and the dataset is a hadronic observable then the + predictions of the central member are subtly different to the central + value of the replica predictions. + + Parameters + ---------- + dataset: validphys.core.DataSetSpec + dataset for which to calculate t0 predictions + t0set: validphys.core.PDF + pdf used to calculate the predictions + + Returns + ------- + t0_predictions: np.array + 1-D numpy array with predictions for each of the cut datapoints. + + """ + # Squeeze values since t0_pred is DataFrame with shape n_data * 1 + return central_predictions(dataset, t0set).to_numpy().squeeze() + +dataset_inputs_t0_predictions = collect("dataset_t0_predictions", ("data",)) diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py index c0f5a85338..7bc4876f54 100644 --- a/validphys2/src/validphys/tests/conftest.py +++ b/validphys2/src/validphys/tests/conftest.py @@ -28,15 +28,6 @@ def tmp(tmpdir): {'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10} ] -# Experiments which have non trivial correlations between their datasets -CORR_DATA = [ - {'dataset': 'ATLASWZRAP36PB', 'cfac': ['QCD']}, - {'dataset': 'ATLASZHIGHMASS49FB', 'cfac': ['QCD']}, - {'dataset': 'ATLASLOMASSDY11EXT', 'cfac': ['QCD']}, - {'dataset': 'ATLASWZRAP11', 'frac': 0.5, 'cfac': ['QCD']}, - {'dataset': 'CMSZDIFF12', 'cfac': ('QCD', 'NRM'), 'sys': 10}, - {'dataset': 'CMSJETS11', 'frac': 0.5, 'sys': 10}, -] SINGLE_EXP = [ { @@ -90,18 +81,6 @@ def data_singleexp_witht0_config(data_witht0_config): config_dict.update({'experiments': SINGLE_EXP}) return config_dict -@pytest.fixture(scope='module') -def data_with_correlations_config(): - corr_dict = dict(base_config) - corr_dict.update(dataset_inputs=CORR_DATA) - return corr_dict - -@pytest.fixture(scope='module') -def data_with_correlations_internal_cuts_config(data_with_correlations_config): - config_dict = dict(data_with_correlations_config) - config_dict.update(use_cuts='internal') - return config_dict - @pytest.fixture(scope='module') def weighted_data_witht0_config(data_witht0_config): config_dict = dict(data_witht0_config) diff --git a/validphys2/src/validphys/tests/test_covmats.py b/validphys2/src/validphys/tests/test_covmats.py index 9cef8eb370..a481475147 100644 --- a/validphys2/src/validphys/tests/test_covmats.py +++ b/validphys2/src/validphys/tests/test_covmats.py @@ -11,30 +11,20 @@ from validphys.api import API -from validphys.commondataparser import load_commondata -from validphys.covmats import ( - sqrt_covmat, - datasets_covmat_from_systematics, - covmat_from_systematics -) +from validphys.covmats import sqrt_covmat from validphys.loader import Loader -from validphys.tests.conftest import THEORYID +from validphys.tests.conftest import THEORYID, PDF, HESSIAN_PDF, DATA -def test_covmat_from_systematics_correlated(data_with_correlations_config): - """Test the covariance matrix generation from a set of correlated datasets - given their systematic errors - """ - data = API.data(**data_with_correlations_config) - cds = [ds.commondata for ds in data.datasets] - - ld_cds = list(map(load_commondata, cds)) - - covmat = datasets_covmat_from_systematics(ld_cds) - - cpp_covmat = API.groups_covmat(**data_with_correlations_config) - - np.testing.assert_allclose(cpp_covmat, covmat) +# Experiments which have non trivial correlations between their datasets +CORR_DATA = [ + {'dataset': 'ATLASWZRAP36PB', 'cfac': ['QCD']}, + {'dataset': 'ATLASZHIGHMASS49FB', 'cfac': ['QCD']}, + {'dataset': 'ATLASLOMASSDY11EXT', 'cfac': ['QCD']}, + {'dataset': 'ATLASWZRAP11', 'frac': 0.5, 'cfac': ['QCD']}, + {'dataset': 'CMSZDIFF12', 'cfac': ('QCD', 'NRM'), 'sys': 10}, + {'dataset': 'CMSJETS11', 'frac': 0.5, 'sys': 10}, +] def test_self_consistent_covmat_from_systematics(data_internal_cuts_config): @@ -43,36 +33,32 @@ def test_self_consistent_covmat_from_systematics(data_internal_cuts_config): when the latter is given a list containing a single dataset. """ - data = API.data(**data_internal_cuts_config) - cds = [ds.commondata for ds in data.datasets] - - ld_cds = list(map(load_commondata, cds)) - - internal_cuts = [ds.cuts for ds in data.datasets] - cut_ld_cds = list(map(lambda x: x[0].with_cuts(x[1]), zip(ld_cds, internal_cuts))) - - for cut_ld_cd in cut_ld_cds: - covmat_a = covmat_from_systematics(cut_ld_cd) - covmat_b = datasets_covmat_from_systematics([cut_ld_cd]) + base_config = dict(data_internal_cuts_config) + dataset_inputs = base_config.pop("dataset_inputs") + + for dsinp in dataset_inputs: + covmat_a = API.experimental_covmat( + **base_config, dataset_input=dsinp) + covmat_b = API.dataset_inputs_experimental_covmat( + **base_config, dataset_inputs=[dsinp]) np.testing.assert_allclose(covmat_a, covmat_b) -def test_covmat_from_systematics(data_internal_cuts_config): +@pytest.mark.parametrize("use_cuts", ["nocuts", "internal"]) +@pytest.mark.parametrize("dataset_inputs", [DATA, CORR_DATA]) +def test_covmat_from_systematics(data_config, use_cuts, dataset_inputs): """Test which checks the python computation of the covmat relating to a - collection of datasets matches that of the C++ computation. Note that the - datasets are cut using the internal rules, but the datasets are not correlated. - """ - data = API.data(**data_internal_cuts_config) - cds = [ds.commondata for ds in data.datasets] + collection of datasets matches that of the C++ computation. - ld_cds = list(map(load_commondata, cds)) + Tests all combinations of internal/no cuts and correlated/uncorrelated data. - internal_cuts = [ds.cuts for ds in data.datasets] - cut_ld_cds = list(map(lambda x: x[0].with_cuts(x[1]), zip(ld_cds, internal_cuts))) - - covmat = datasets_covmat_from_systematics(cut_ld_cds) + """ + config = dict(data_config) + config["use_cuts"] = use_cuts + config["dataset_inputs"] = dataset_inputs - cpp_covmat = API.groups_covmat(**data_internal_cuts_config) + covmat = API.dataset_inputs_experimental_covmat(**config) + cpp_covmat = API.groups_covmat(**config) np.testing.assert_allclose(cpp_covmat, covmat) @@ -83,11 +69,12 @@ def test_covmat_with_one_systematic(): """ dsinput = {"dataset": "D0ZRAP", "frac": 1.0, "cfac": ["QCD"]} - cd = API.commondata(dataset_input=dsinput) - l_cd = load_commondata(cd) - covmat = covmat_from_systematics(l_cd) + config = dict(dataset_input=dsinput, theoryid=THEORYID, use_cuts="nocuts") - ds = API.dataset(dataset_input=dsinput, theoryid=THEORYID, use_cuts="nocuts") + covmat = API.experimental_covmat(**config) + ds = API.dataset(**config) + # double check that the dataset does indeed only have 1 systematic. + assert ds.commondata.nsys == 1 cpp_covmat = ds.load().get_covmat() np.testing.assert_allclose(cpp_covmat, covmat) @@ -142,3 +129,27 @@ def test_sqrt_covmat(data_config): covmat = ld_exp.get_covmat() cholesky_cov = sqrt_covmat(covmat) np.testing.assert_allclose(cholesky_cov @ cholesky_cov.T, covmat) + +@pytest.mark.parametrize("t0pdfset", [PDF, HESSIAN_PDF]) +@pytest.mark.parametrize("dataset_inputs", [DATA, CORR_DATA]) +def test_python_t0_covmat_matches_cpp( + data_internal_cuts_config, t0pdfset, dataset_inputs): + """Test which checks the python computation of the t0 covmat relating to a + collection of datasets matches that of the C++ computation. + + Tests all combinations of hessian/MC t0pdfset and correlated/uncorrelated + data. + + """ + config = dict(data_internal_cuts_config) + config["dataset_inputs"] = dataset_inputs + config["t0pdfset"] = t0pdfset + config["use_t0"] = True + covmat = API.dataset_inputs_t0_covmat(**config) + cpp_covmat = API.groups_covmat(**config) + # use allclose defaults or it fails + np.testing.assert_allclose(cpp_covmat, covmat, rtol=1e-05, atol=1e-08) + with pytest.raises(AssertionError): + np.testing.assert_allclose( + covmat, API.dataset_inputs_experimental_covmat(**config) + )