From 7e3d8c567dc2d926d5e63b648e9c991485366bc2 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Wed, 3 Feb 2021 11:20:31 +0000 Subject: [PATCH 1/2] move python data utils and actions to sub folder --- validphys2/src/validphys/covmats.py | 266 +----------------- .../validphys/results_providers/__init__.py | 15 + .../validphys/results_providers/commondata.py | 30 ++ .../commondata_parser.py} | 0 .../src/validphys/results_providers/covmat.py | 87 ++++++ .../results_providers/covmat_construction.py | 197 +++++++++++++ .../theory_prediction.py} | 30 +- .../validphys/tests/test_commondataparser.py | 2 +- 8 files changed, 334 insertions(+), 293 deletions(-) create mode 100644 validphys2/src/validphys/results_providers/__init__.py create mode 100644 validphys2/src/validphys/results_providers/commondata.py rename validphys2/src/validphys/{commondataparser.py => results_providers/commondata_parser.py} (100%) create mode 100644 validphys2/src/validphys/results_providers/covmat.py create mode 100644 validphys2/src/validphys/results_providers/covmat_construction.py rename validphys2/src/validphys/{results_providers.py => results_providers/theory_prediction.py} (56%) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 74989025a0..4c5c0fe4e9 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -23,270 +23,6 @@ log = logging.getLogger(__name__) -INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR") - - -def covmat_from_systematics(commondata, central_values=None): - """Take the statistical uncertainty and systematics table from - a :py:class:`validphys.coredata.CommonData` object and - construct the covariance matrix accounting for correlations between - systematics. - - If the systematic has the name ``SKIP`` then it is ignored in the - construction of the covariance matrix. - - ADDitive or MULTiplicative systypes are handled by either multiplying - the additive or multiplicative uncertainties respectively. We convert - uncertainties so that they are all in the same units as the data: - - Additive (ADD) systematics are left unchanged - - multiplicative (MULT) systematics need to be converted from a - percentage by multiplying by the central value - and dividing by 100. - - Finally, the systematics are split into the five possible archetypes - of systematic uncertainties: uncorrelated (UNCORR), correlated (CORR), - theory uncorrelated (THEORYUNCORR), theory correlated (THEORYCORR) and - special correlated (SPECIALCORR) systematics. - - Uncorrelated contributions from statistical error, uncorrelated and - theory uncorrelated are added in quadrature to the diagonal of the covmat. - - The contribution to the covariance matrix arising due to - correlated systematics is schematically ``A_correlated @ A_correlated.T``, - where A_correlated is a matrix N_dat by N_sys. The total contribution - from correlated systematics is found by adding together the result of - mutiplying each correlated systematic matrix by its transpose - (correlated, theory_correlated and special_correlated). - - For more information on the generation of the covariance matrix see the - `paper `_ - outlining the procedure, specifically equation 2 and surrounding text. - - Parameters - ---------- - commondata : validphys.coredata.CommonData - CommonData which stores information about systematic errors, - their treatment and description. - central_values : None, np.array - 1-D array containing alternative central values to combine with the - multiplicative errors to calculate their absolute contributions. By - default this is None, and the experimental central values are used. However, this - can be used to calculate, for example, the t0 covariance matrix by - using the predictions from the central member of the t0 pdf. - - Returns - ------- - cov_mat : np.array - Numpy array which is N_dat x N_dat (where N_dat is the number of data - points after cuts) containing uncertainty and correlation information. - - Example - ------- - >>> from validphys.commondataparser import load_commondata - >>> from validphys.loader import Loader - >>> from validphys.calcutils import covmat_from_systematics - >>> l = Loader() - >>> cd = l.check_commondata("NMC") - >>> cd = load_commondata(cd) - >>> covmat_from_systematics(cd) - array([[8.64031971e-05, 8.19971921e-05, 6.27396915e-05, ..., - 2.40747732e-05, 2.79614418e-05, 3.46727332e-05], - [8.19971921e-05, 1.41907442e-04, 6.52360141e-05, ..., - 2.36624379e-05, 2.72605623e-05, 3.45492831e-05], - [6.27396915e-05, 6.52360141e-05, 9.41928691e-05, ..., - 1.79244824e-05, 2.08603130e-05, 2.56283708e-05], - ..., - [2.40747732e-05, 2.36624379e-05, 1.79244824e-05, ..., - 5.67822050e-05, 4.09077450e-05, 4.14126235e-05], - [2.79614418e-05, 2.72605623e-05, 2.08603130e-05, ..., - 4.09077450e-05, 5.55150870e-05, 4.15843357e-05], - [3.46727332e-05, 3.45492831e-05, 2.56283708e-05, ..., - 4.14126235e-05, 4.15843357e-05, 1.43824457e-04]]) - """ - return construct_covmat( - commondata.stat_errors.to_numpy(), - commondata.systematic_errors(central_values) - ) - - -def datasets_covmat_from_systematics( - list_of_commondata, list_of_central_values=None -): - """Given a list containing :py:class:`validphys.coredata.CommonData` s, - construct the full covariance matrix. - - This is similar to :py:meth:`covmat_from_systematics` - except that special corr systematics are concatenated across all datasets - before being multiplied by their transpose to give off block-diagonal - contributions. The other systematics contribute to the block diagonal in the - same way as :py:meth:`covmat_from_systematics`. - - Parameters - ---------- - list_of_commondata : list[validphys.coredata.CommonData] - list of CommonData objects. - list_of_central_values: None, list[np.array] - list of 1-D arrays which contain alternative central values which are - combined with the multiplicative errors to calculate their absolute - contribution. By default this is None and the experimental central - values are used. - - Returns - ------- - cov_mat : np.array - Numpy array which is N_dat x N_dat (where N_dat is the number of data points after cuts) - containing uncertainty and correlation information. - - Example - ------- - >>> from validphys.commondataparser import load_commondata - >>> from validphys.covmats import datasets_covmat_from_systematics - >>> from validphys.loader import Loader - >>> l = Loader() - >>> cd1 = l.check_commondata("ATLASLOMASSDY11EXT") - >>> cd2 = l.check_commondata("ATLASZHIGHMASS49FB") - >>> ld1, ld2 = map(load_commondata, (cd1, cd2)) - >>> datasets_covmat_from_systematics((ld1, ld2)) - array([[2.91814548e+06, 4.66692123e+06, 2.36823008e+06, 8.62587330e+05, - 2.78209614e+05, 1.11790645e+05, 1.75129920e+03, 7.97466600e+02, - 4.00296960e+02, 2.22039720e+02, 1.46202210e+02, 8.36558100e+01, - """ - special_corrs = [] - block_diags = [] - - if list_of_central_values is None: - # want to just pass None to systematic_errors method - list_of_central_values = [None] * len(list_of_commondata) - - for cd, central_values in zip(list_of_commondata, list_of_central_values): - errors = cd.systematic_errors(central_values) - # separate out the special uncertainties which can be correlated across - # datasets - is_intra_dataset_error = errors.columns.isin(INTRA_DATASET_SYS_NAME) - block_diags.append(construct_covmat( - cd.stat_errors.to_numpy(), errors.loc[:, is_intra_dataset_error])) - special_corrs.append(errors.loc[:, ~is_intra_dataset_error]) - - # concat systematics across datasets - special_sys = pd.concat(special_corrs, axis=0, sort=False) - # non-overlapping systematics are set to NaN by concat, fill with 0 instead. - special_sys.fillna(0, inplace=True) - - diag = la.block_diag(*block_diags) - return diag + special_sys.to_numpy() @ special_sys.to_numpy().T - - -def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame): - """Basic function to construct a covariance matrix (covmat), given the - statistical error and a dataframe of systematics. - - Errors with name UNCORR or THEORYUNCORR are added in quadrature with - the statistical error to the diagonal of the covmat. - - Other systematics are treated as correlated; their covmat contribution is - found by multiplying them by their transpose. - - Parameters - ---------- - stat_errors: np.array - a 1-D array of statistical uncertainties - sys_errors: pd.DataFrame - a dataframe with shape (N_data * N_sys) and systematic name as the - column headers. The uncertainties should be in the same units as the - data. - - Notes - ----- - This function doesn't contain any logic to ignore certain contributions to - the covmat, if you wanted to not include a particular systematic/set of - systematics i.e all uncertainties with MULT errors, then filter those out - of ``sys_errors`` before passing that to this function. - - """ - diagonal = stat_errors ** 2 - - is_uncorr = sys_errors.columns.isin(("UNCORR", "THEORYUNCORR")) - diagonal += (sys_errors.loc[:, is_uncorr].to_numpy() ** 2).sum(axis=1) - - corr_sys_mat = sys_errors.loc[:, ~is_uncorr].to_numpy() - return np.diag(diagonal) + corr_sys_mat @ corr_sys_mat.T - - -def experimental_covmat(loaded_commondata_with_cuts): - """Returns the experimental covariance matrix. Details of how - the covmat is constructed can be found in :py:func:`covmat_from_systematics`. - The experimental covariance matrix uses the experimental central values - to calculate the absolute uncertainties from the multiplicative systematics. - - Parameters - ---------- - loaded_commondata_with_cuts: validphys.coredata.CommonData - - Returns - ------- - covmat: np.array - - """ - return covmat_from_systematics(loaded_commondata_with_cuts) - - -def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions): - """Like :py:func:`experimental_covmat` except uses the t0 predictions - to calculate the absolute constributions to the covmat from multiplicative - uncertainties. For more info on the t0 predictions see - :py:func:`validphys.results_providers.dataset_t0_predictions`. - - Parameters - ---------- - loaded_commondata_with_cuts: validphys.coredata.CommonData - commondata object for which to generate the covmat. - dataset_t0_predictions: np.array - 1-D array with t0 predictions. - - Returns - ------- - t0_covmat: np.array - t0 covariance matrix - - """ - return covmat_from_systematics( - loaded_commondata_with_cuts, dataset_t0_predictions) - - -def dataset_inputs_experimental_covmat(dataset_inputs_loaded_cd_with_cuts): - """Like :py:func:`experimental_covmat` except for all data - - Parameters - ---------- - dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData] - The CommonData for all datasets defined in ``dataset_inputs``. - - Returns - ------- - covmat: np.array - Covariance matrix for list of datasets. - """ - return datasets_covmat_from_systematics(dataset_inputs_loaded_cd_with_cuts) - -def dataset_inputs_t0_covmat( - dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions): - """Like :py:func:`t0_covmat` except for all data - - Parameters - ---------- - dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData] - The CommonData for all datasets defined in ``dataset_inputs``. - dataset_inputs_t0_predictions: list[np.array] - The t0 predictions for all datasets. - - Returns - ------- - t0_covmat: np.array - t0 covariance matrix matrix for list of datasets. - """ - return datasets_covmat_from_systematics( - dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions) - def sqrt_covmat(covariance_matrix): """Function that computes the square root of the covariance matrix. @@ -323,7 +59,7 @@ def sqrt_covmat(covariance_matrix): Example ------- - >>> from validphys.commondataparser import load_commondata + >>> from validphys.results_providers.commondata_parser.py import load_commondata >>> from validphys.loader import Loader >>> from validphys.calcutils import covmat_from_systematics >>> from validphys.results import sqrt_covmat diff --git a/validphys2/src/validphys/results_providers/__init__.py b/validphys2/src/validphys/results_providers/__init__.py new file mode 100644 index 0000000000..ae7f8f30a5 --- /dev/null +++ b/validphys2/src/validphys/results_providers/__init__.py @@ -0,0 +1,15 @@ +""" +results_providers.py + +Bridges between underlying functions concerned with: + + - loading theory predictions and data + - constructing covariance matrices + - generating pseudodata + +and actions which can be accessed by other actions/providers. + +""" +from validphys.results_providers.commondata import * +from validphys.results_providers.theory_prediction import * +from validphys.results_providers.covmat import * diff --git a/validphys2/src/validphys/results_providers/commondata.py b/validphys2/src/validphys/results_providers/commondata.py new file mode 100644 index 0000000000..fa7dcf79c0 --- /dev/null +++ b/validphys2/src/validphys/results_providers/commondata.py @@ -0,0 +1,30 @@ +""" +commondata.py + +Actions which return loaded commondata with cuts applied. + +""" +from reportengine import collect + +from validphys.results_providers.commondata_parser import load_commondata + +def loaded_commondata_with_cuts(commondata, cuts): + """Load the commondata and apply cuts. + + Parameters + ---------- + commondata: validphys.core.CommonDataSpec + commondata to load and cut. + cuts: validphys.core.cuts, None + valid cuts, used to cut loaded commondata. + + Returns + ------- + loaded_cut_commondata: validphys.coredata.CommonData + + """ + lcd = load_commondata(commondata) + return lcd.with_cuts(cuts) + +dataset_inputs_loaded_cd_with_cuts = collect( + "loaded_commondata_with_cuts", ("data_input",)) diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/results_providers/commondata_parser.py similarity index 100% rename from validphys2/src/validphys/commondataparser.py rename to validphys2/src/validphys/results_providers/commondata_parser.py diff --git a/validphys2/src/validphys/results_providers/covmat.py b/validphys2/src/validphys/results_providers/covmat.py new file mode 100644 index 0000000000..02517adad1 --- /dev/null +++ b/validphys2/src/validphys/results_providers/covmat.py @@ -0,0 +1,87 @@ +""" +covmat.py + +Module containing actions which return constructed covariance matrices for +datasets/groups of datasets. + +""" +from validphys.results_providers.covmat_construction import ( + covmat_from_systematics, + datasets_covmat_from_systematics, +) + + +def experimental_covmat(loaded_commondata_with_cuts): + """Returns the experimental covariance matrix. Details of how + the covmat is constructed can be found in :py:func:`covmat_from_systematics`. + The experimental covariance matrix uses the experimental central values + to calculate the absolute uncertainties from the multiplicative systematics. + + Parameters + ---------- + loaded_commondata_with_cuts: validphys.coredata.CommonData + + Returns + ------- + covmat: np.array + + """ + return covmat_from_systematics(loaded_commondata_with_cuts) + + +def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions): + """Like :py:func:`experimental_covmat` except uses the t0 predictions + to calculate the absolute constributions to the covmat from multiplicative + uncertainties. For more info on the t0 predictions see + :py:func:`validphys.results_providers.dataset_t0_predictions`. + + Parameters + ---------- + loaded_commondata_with_cuts: validphys.coredata.CommonData + commondata object for which to generate the covmat. + dataset_t0_predictions: np.array + 1-D array with t0 predictions. + + Returns + ------- + t0_covmat: np.array + t0 covariance matrix + + """ + return covmat_from_systematics( + loaded_commondata_with_cuts, dataset_t0_predictions) + + +def dataset_inputs_experimental_covmat(dataset_inputs_loaded_cd_with_cuts): + """Like :py:func:`experimental_covmat` except for all data + + Parameters + ---------- + dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData] + The CommonData for all datasets defined in ``dataset_inputs``. + + Returns + ------- + covmat: np.array + Covariance matrix for list of datasets. + """ + return datasets_covmat_from_systematics(dataset_inputs_loaded_cd_with_cuts) + +def dataset_inputs_t0_covmat( + dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions): + """Like :py:func:`t0_covmat` except for all data + + Parameters + ---------- + dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData] + The CommonData for all datasets defined in ``dataset_inputs``. + dataset_inputs_t0_predictions: list[np.array] + The t0 predictions for all datasets. + + Returns + ------- + t0_covmat: np.array + t0 covariance matrix matrix for list of datasets. + """ + return datasets_covmat_from_systematics( + dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions) diff --git a/validphys2/src/validphys/results_providers/covmat_construction.py b/validphys2/src/validphys/results_providers/covmat_construction.py new file mode 100644 index 0000000000..9d45f07c7e --- /dev/null +++ b/validphys2/src/validphys/results_providers/covmat_construction.py @@ -0,0 +1,197 @@ +""" +covmat_construction.py + +Module containing underlying functions which construct the covariance matrices + +""" +import numpy as np +import pandas as pd +import scipy.linalg as la + +INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR") + + +def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame): + """Basic function to construct a covariance matrix (covmat), given the + statistical error and a dataframe of systematics. + + Errors with name UNCORR or THEORYUNCORR are added in quadrature with + the statistical error to the diagonal of the covmat. + + Other systematics are treated as correlated; their covmat contribution is + found by multiplying them by their transpose. + + Parameters + ---------- + stat_errors: np.array + a 1-D array of statistical uncertainties + sys_errors: pd.DataFrame + a dataframe with shape (N_data * N_sys) and systematic name as the + column headers. The uncertainties should be in the same units as the + data. + + Notes + ----- + This function doesn't contain any logic to ignore certain contributions to + the covmat, if you wanted to not include a particular systematic/set of + systematics i.e all uncertainties with MULT errors, then filter those out + of ``sys_errors`` before passing that to this function. + + """ + diagonal = stat_errors ** 2 + + is_uncorr = sys_errors.columns.isin(("UNCORR", "THEORYUNCORR")) + diagonal += (sys_errors.loc[:, is_uncorr].to_numpy() ** 2).sum(axis=1) + + corr_sys_mat = sys_errors.loc[:, ~is_uncorr].to_numpy() + return np.diag(diagonal) + corr_sys_mat @ corr_sys_mat.T + + +def covmat_from_systematics(commondata, central_values=None): + """Take the statistical uncertainty and systematics table from + a :py:class:`validphys.coredata.CommonData` object and + construct the covariance matrix accounting for correlations between + systematics. + + If the systematic has the name ``SKIP`` then it is ignored in the + construction of the covariance matrix. + + ADDitive or MULTiplicative systypes are handled by either multiplying + the additive or multiplicative uncertainties respectively. We convert + uncertainties so that they are all in the same units as the data: + - Additive (ADD) systematics are left unchanged + - multiplicative (MULT) systematics need to be converted from a + percentage by multiplying by the central value + and dividing by 100. + + Finally, the systematics are split into the five possible archetypes + of systematic uncertainties: uncorrelated (UNCORR), correlated (CORR), + theory uncorrelated (THEORYUNCORR), theory correlated (THEORYCORR) and + special correlated (SPECIALCORR) systematics. + + Uncorrelated contributions from statistical error, uncorrelated and + theory uncorrelated are added in quadrature to the diagonal of the covmat. + + The contribution to the covariance matrix arising due to + correlated systematics is schematically ``A_correlated @ A_correlated.T``, + where A_correlated is a matrix N_dat by N_sys. The total contribution + from correlated systematics is found by adding together the result of + mutiplying each correlated systematic matrix by its transpose + (correlated, theory_correlated and special_correlated). + + For more information on the generation of the covariance matrix see the + `paper `_ + outlining the procedure, specifically equation 2 and surrounding text. + + Parameters + ---------- + commondata : validphys.coredata.CommonData + CommonData which stores information about systematic errors, + their treatment and description. + central_values : None, np.array + 1-D array containing alternative central values to combine with the + multiplicative errors to calculate their absolute contributions. By + default this is None, and the experimental central values are used. However, this + can be used to calculate, for example, the t0 covariance matrix by + using the predictions from the central member of the t0 pdf. + + Returns + ------- + cov_mat : np.array + Numpy array which is N_dat x N_dat (where N_dat is the number of data + points after cuts) containing uncertainty and correlation information. + + Example + ------- + >>> from validphys.results_providers.commondata_parser.py import load_commondata + >>> from validphys.loader import Loader + >>> from validphys.calcutils import covmat_from_systematics + >>> l = Loader() + >>> cd = l.check_commondata("NMC") + >>> cd = load_commondata(cd) + >>> covmat_from_systematics(cd) + array([[8.64031971e-05, 8.19971921e-05, 6.27396915e-05, ..., + 2.40747732e-05, 2.79614418e-05, 3.46727332e-05], + [8.19971921e-05, 1.41907442e-04, 6.52360141e-05, ..., + 2.36624379e-05, 2.72605623e-05, 3.45492831e-05], + [6.27396915e-05, 6.52360141e-05, 9.41928691e-05, ..., + 1.79244824e-05, 2.08603130e-05, 2.56283708e-05], + ..., + [2.40747732e-05, 2.36624379e-05, 1.79244824e-05, ..., + 5.67822050e-05, 4.09077450e-05, 4.14126235e-05], + [2.79614418e-05, 2.72605623e-05, 2.08603130e-05, ..., + 4.09077450e-05, 5.55150870e-05, 4.15843357e-05], + [3.46727332e-05, 3.45492831e-05, 2.56283708e-05, ..., + 4.14126235e-05, 4.15843357e-05, 1.43824457e-04]]) + """ + return construct_covmat( + commondata.stat_errors.to_numpy(), + commondata.systematic_errors(central_values) + ) + + +def datasets_covmat_from_systematics( + list_of_commondata, list_of_central_values=None +): + """Given a list containing :py:class:`validphys.coredata.CommonData` s, + construct the full covariance matrix. + + This is similar to :py:meth:`covmat_from_systematics` + except that special corr systematics are concatenated across all datasets + before being multiplied by their transpose to give off block-diagonal + contributions. The other systematics contribute to the block diagonal in the + same way as :py:meth:`covmat_from_systematics`. + + Parameters + ---------- + list_of_commondata : list[validphys.coredata.CommonData] + list of CommonData objects. + list_of_central_values: None, list[np.array] + list of 1-D arrays which contain alternative central values which are + combined with the multiplicative errors to calculate their absolute + contribution. By default this is None and the experimental central + values are used. + + Returns + ------- + cov_mat : np.array + Numpy array which is N_dat x N_dat (where N_dat is the number of data points after cuts) + containing uncertainty and correlation information. + + Example + ------- + >>> from validphys.results_providers.commondata_parser.py import load_commondata + >>> from validphys.covmats import datasets_covmat_from_systematics + >>> from validphys.loader import Loader + >>> l = Loader() + >>> cd1 = l.check_commondata("ATLASLOMASSDY11EXT") + >>> cd2 = l.check_commondata("ATLASZHIGHMASS49FB") + >>> ld1, ld2 = map(load_commondata, (cd1, cd2)) + >>> datasets_covmat_from_systematics((ld1, ld2)) + array([[2.91814548e+06, 4.66692123e+06, 2.36823008e+06, 8.62587330e+05, + 2.78209614e+05, 1.11790645e+05, 1.75129920e+03, 7.97466600e+02, + 4.00296960e+02, 2.22039720e+02, 1.46202210e+02, 8.36558100e+01, + """ + special_corrs = [] + block_diags = [] + + if list_of_central_values is None: + # want to just pass None to systematic_errors method + list_of_central_values = [None] * len(list_of_commondata) + + for cd, central_values in zip(list_of_commondata, list_of_central_values): + errors = cd.systematic_errors(central_values) + # separate out the special uncertainties which can be correlated across + # datasets + is_intra_dataset_error = errors.columns.isin(INTRA_DATASET_SYS_NAME) + block_diags.append(construct_covmat( + cd.stat_errors.to_numpy(), errors.loc[:, is_intra_dataset_error])) + special_corrs.append(errors.loc[:, ~is_intra_dataset_error]) + + # concat systematics across datasets + special_sys = pd.concat(special_corrs, axis=0, sort=False) + # non-overlapping systematics are set to NaN by concat, fill with 0 instead. + special_sys.fillna(0, inplace=True) + + diag = la.block_diag(*block_diags) + return diag + special_sys.to_numpy() @ special_sys.to_numpy().T diff --git a/validphys2/src/validphys/results_providers.py b/validphys2/src/validphys/results_providers/theory_prediction.py similarity index 56% rename from validphys2/src/validphys/results_providers.py rename to validphys2/src/validphys/results_providers/theory_prediction.py index 4e7332514d..74af49cee9 100644 --- a/validphys2/src/validphys/results_providers.py +++ b/validphys2/src/validphys/results_providers/theory_prediction.py @@ -1,38 +1,14 @@ """ -results_providers.py +theory_prediction.py -module which bridges between underlying functions concerned with loading -theory predictions and data and actions which can be accessed -by other actions/providers. +Module containing actions which return theory predictions associated with +datasets. """ from reportengine import collect -from validphys.commondataparser import load_commondata from validphys.convolution import central_predictions -def loaded_commondata_with_cuts(commondata, cuts): - """Load the commondata and apply cuts. - - Parameters - ---------- - commondata: validphys.core.CommonDataSpec - commondata to load and cut. - cuts: validphys.core.cuts, None - valid cuts, used to cut loaded commondata. - - Returns - ------- - loaded_cut_commondata: validphys.coredata.CommonData - - """ - lcd = load_commondata(commondata) - return lcd.with_cuts(cuts) - -dataset_inputs_loaded_cd_with_cuts = collect( - "loaded_commondata_with_cuts", ("data_input",)) - - def dataset_t0_predictions(dataset, t0set): """Returns the t0 predictions for a ``dataset`` which are the predictions calculated using the central member of ``pdf``. Note that if ``pdf`` has diff --git a/validphys2/src/validphys/tests/test_commondataparser.py b/validphys2/src/validphys/tests/test_commondataparser.py index 5712625090..ba86cab68a 100644 --- a/validphys2/src/validphys/tests/test_commondataparser.py +++ b/validphys2/src/validphys/tests/test_commondataparser.py @@ -2,7 +2,7 @@ import pandas as pd from validphys.api import API -from validphys.commondataparser import load_commondata +from validphys.results_providers.commondata_parser import load_commondata from validphys.loader import FallbackLoader as Loader from validphys.tests.conftest import THEORYID, FIT From 94fdaea4a8d4439e6913e7c64a2f206fff24c50e Mon Sep 17 00:00:00 2001 From: wilsonm Date: Wed, 3 Feb 2021 12:10:33 +0000 Subject: [PATCH 2/2] add to py data obj docs --- doc/sphinx/source/vp/pydataobjs.rst | 110 +++++++++++++++++++++++++++- 1 file changed, 109 insertions(+), 1 deletion(-) diff --git a/doc/sphinx/source/vp/pydataobjs.rst b/doc/sphinx/source/vp/pydataobjs.rst index 3aeab6153e..094e40385a 100644 --- a/doc/sphinx/source/vp/pydataobjs.rst +++ b/doc/sphinx/source/vp/pydataobjs.rst @@ -15,7 +15,7 @@ computation and storage strategies. Loading FKTables ---------------- -Currently only FKTables can be directly without C++ code. This is implemented +This is implemented in the :py:mod:`validphys.fkparser` module. For example:: from validphys.fkparser import load_fktable @@ -143,3 +143,111 @@ central replica is the same as the mean of the replica predictions:: # Compute the size of the differences between approximate and true predictions # over the PDF uncertainty. Take the maximum over the three ttbar data points. print(((p - lp).std() / p.std()).max()) + +Loading CommonData +------------------ + +The underlying functions for loading CommonData can be found in +:py:mod:`validphys.results_providers.commondata_parser`. The data is loaded +as :py:class:`validphys.coredata.CommonData`, which uses the +`dataclasses `_ module +which automatically generates some special methods for the class. The +underlying data is stored as DataFrames, and so can be used +with the standard pandas machinery:: + + import pandas as pd + + from validphys.api import API + from validphys.results_providers.commondata_parser import load_commondata + # first get the CommonDataSpec + cd = API.commondata(dataset_input={"dataset":"NMC"}) + lcd = load_commondata(cd) + assert isinstance(lcd.central_values, pd.Series) + assert isinstance(lcd.systematics_table, pd.DataFrame) + +The :py:class:`validphys.coredata.CommonData` class has a method which returns +a new instance of the class with cuts applied:: + + from validphys.api import API + from validphys.results_providers.commondata_parser import load_commondata + inp = { + "dataset_input": {"dataset":"NMC"}, + "use_cuts": "internal", + "theoryid": 162 + } + # first get the CommonDataSpec + cd = API.commondata(**inp) + lcd = load_commondata(cd) + # CommonDataSpec object ndata is always total data points uncut + assert lcd.ndata == cd.ndata + cuts = API.cuts(**inp) + lcd_cut = lcd.with_cuts(cuts) + # data has been cut, ndata should have changed. + assert lcd_cut.ndata != cd.ndata + +An action already exists which returns the loaded and cut commondata, which is +more convenient than calling the underlying functions:: + + api_lcd_cut = API.loaded_commondata_with_cuts(**inp) + assert api_lcd_cut.ndata == lcd_cut.ndata + +Loading Covariance Matrices +--------------------------- + +Functions which take :py:class:`validphys.coredata.CommonData` s and return +covariance matrices can be found in +:py:mod:`validphys.results_providers.covmat_construction`. As with the commondata +the underlying functions can be accessed directly:: + + import numpy as np + from validphys.api import API + from validphys.results_providers.covmat_construction import covmat_from_systematics + + inp = { + "dataset_input": {"dataset":"NMC"}, + "use_cuts": "internal", + "theoryid": 162 + } + lcd = API.loaded_commondata_with_cuts(**inp) + cov = covmat_from_systematics(lcd) + assert isinstance(cov, np.ndarray) + assert cov.shape == (lcd.ndata, lcd.ndata) + +There exists a similar function which acts upon a list of multiple commondatas +and takes into account correlations between datasets:: + + from validphys.results_providers.covmat_construction import datasets_covmat_from_systematics + inp = { + "dataset_inputs": [ + {"dataset":"NMC"}, + {"dataset":"NMCPD"}, + ], + "use_cuts": "internal", + "theoryid": 162 + } + lcds = API.dataset_inputs_loaded_cd_with_cuts(**inp) + total_ndata = np.sum([lcd.ndata for lcd in lcds]) + total_cov = datasets_covmat_from_systematics(lcds) + assert total_cov.shape == (total_ndata, total_ndata) + +These functions are already leveraged by actions, which can be accessed directly +from the API:: + + from validphys.api import API + + inp = { + "dataset_input": {"dataset":"NMC"}, + "use_cuts": "internal", + "theoryid": 162 + } + # single dataset covmat + cov = API.experimental_covmat(**inp) + inp = { + "dataset_inputs": [ + {"dataset":"NMC"}, + {"dataset":"NMCPD"}, + ], + "use_cuts": "internal", + "theoryid": 162 + } + total_cov = API.dataset_inputs_experimental_covmat(**inp)