From 0abfced1ae575dcd35d7794e624fb7178f9cea6a Mon Sep 17 00:00:00 2001 From: wilsonm Date: Wed, 3 Feb 2021 12:10:33 +0000 Subject: [PATCH 1/5] add to py data obj docs --- doc/sphinx/source/vp/pydataobjs.rst | 110 +++++++++++++++++++++++++++- 1 file changed, 109 insertions(+), 1 deletion(-) diff --git a/doc/sphinx/source/vp/pydataobjs.rst b/doc/sphinx/source/vp/pydataobjs.rst index 3aeab6153e..094e40385a 100644 --- a/doc/sphinx/source/vp/pydataobjs.rst +++ b/doc/sphinx/source/vp/pydataobjs.rst @@ -15,7 +15,7 @@ computation and storage strategies. Loading FKTables ---------------- -Currently only FKTables can be directly without C++ code. This is implemented +This is implemented in the :py:mod:`validphys.fkparser` module. For example:: from validphys.fkparser import load_fktable @@ -143,3 +143,111 @@ central replica is the same as the mean of the replica predictions:: # Compute the size of the differences between approximate and true predictions # over the PDF uncertainty. Take the maximum over the three ttbar data points. print(((p - lp).std() / p.std()).max()) + +Loading CommonData +------------------ + +The underlying functions for loading CommonData can be found in +:py:mod:`validphys.results_providers.commondata_parser`. The data is loaded +as :py:class:`validphys.coredata.CommonData`, which uses the +`dataclasses `_ module +which automatically generates some special methods for the class. The +underlying data is stored as DataFrames, and so can be used +with the standard pandas machinery:: + + import pandas as pd + + from validphys.api import API + from validphys.results_providers.commondata_parser import load_commondata + # first get the CommonDataSpec + cd = API.commondata(dataset_input={"dataset":"NMC"}) + lcd = load_commondata(cd) + assert isinstance(lcd.central_values, pd.Series) + assert isinstance(lcd.systematics_table, pd.DataFrame) + +The :py:class:`validphys.coredata.CommonData` class has a method which returns +a new instance of the class with cuts applied:: + + from validphys.api import API + from validphys.results_providers.commondata_parser import load_commondata + inp = { + "dataset_input": {"dataset":"NMC"}, + "use_cuts": "internal", + "theoryid": 162 + } + # first get the CommonDataSpec + cd = API.commondata(**inp) + lcd = load_commondata(cd) + # CommonDataSpec object ndata is always total data points uncut + assert lcd.ndata == cd.ndata + cuts = API.cuts(**inp) + lcd_cut = lcd.with_cuts(cuts) + # data has been cut, ndata should have changed. + assert lcd_cut.ndata != cd.ndata + +An action already exists which returns the loaded and cut commondata, which is +more convenient than calling the underlying functions:: + + api_lcd_cut = API.loaded_commondata_with_cuts(**inp) + assert api_lcd_cut.ndata == lcd_cut.ndata + +Loading Covariance Matrices +--------------------------- + +Functions which take :py:class:`validphys.coredata.CommonData` s and return +covariance matrices can be found in +:py:mod:`validphys.results_providers.covmat_construction`. As with the commondata +the underlying functions can be accessed directly:: + + import numpy as np + from validphys.api import API + from validphys.results_providers.covmat_construction import covmat_from_systematics + + inp = { + "dataset_input": {"dataset":"NMC"}, + "use_cuts": "internal", + "theoryid": 162 + } + lcd = API.loaded_commondata_with_cuts(**inp) + cov = covmat_from_systematics(lcd) + assert isinstance(cov, np.ndarray) + assert cov.shape == (lcd.ndata, lcd.ndata) + +There exists a similar function which acts upon a list of multiple commondatas +and takes into account correlations between datasets:: + + from validphys.results_providers.covmat_construction import datasets_covmat_from_systematics + inp = { + "dataset_inputs": [ + {"dataset":"NMC"}, + {"dataset":"NMCPD"}, + ], + "use_cuts": "internal", + "theoryid": 162 + } + lcds = API.dataset_inputs_loaded_cd_with_cuts(**inp) + total_ndata = np.sum([lcd.ndata for lcd in lcds]) + total_cov = datasets_covmat_from_systematics(lcds) + assert total_cov.shape == (total_ndata, total_ndata) + +These functions are already leveraged by actions, which can be accessed directly +from the API:: + + from validphys.api import API + + inp = { + "dataset_input": {"dataset":"NMC"}, + "use_cuts": "internal", + "theoryid": 162 + } + # single dataset covmat + cov = API.experimental_covmat(**inp) + inp = { + "dataset_inputs": [ + {"dataset":"NMC"}, + {"dataset":"NMCPD"}, + ], + "use_cuts": "internal", + "theoryid": 162 + } + total_cov = API.dataset_inputs_experimental_covmat(**inp) From 77f28a28e7ee14e208620d89ce5d356f2b02b8b1 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Wed, 17 Feb 2021 10:40:25 +0000 Subject: [PATCH 2/5] split out util function from covmats and then remove uneccessary actions, promoting preexisting functions to actions --- validphys2/src/validphys/covmats.py | 112 ++++++------------ validphys2/src/validphys/covmats_utils.py | 45 +++++++ validphys2/src/validphys/results_providers.py | 33 +----- .../src/validphys/tests/test_covmats.py | 14 +-- 4 files changed, 91 insertions(+), 113 deletions(-) create mode 100644 validphys2/src/validphys/covmats_utils.py diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 74989025a0..232a157630 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -18,7 +18,9 @@ check_speclabels_different, check_data_cuts_match_theorycovmat, ) +from validphys.convolution import central_predictions from validphys.core import PDF, DataGroupSpec, DataSetSpec +from validphys.covmats_utils import construct_covmat from validphys.results import ThPredictionsResult log = logging.getLogger(__name__) @@ -26,7 +28,7 @@ INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR") -def covmat_from_systematics(commondata, central_values=None): +def covmat_from_systematics(loaded_commondata_with_cuts, _central_values=None): """Take the statistical uncertainty and systematics table from a :py:class:`validphys.coredata.CommonData` object and construct the covariance matrix accounting for correlations between @@ -64,10 +66,10 @@ def covmat_from_systematics(commondata, central_values=None): Parameters ---------- - commondata : validphys.coredata.CommonData + loaded_commondata_with_cuts : validphys.coredata.CommonData CommonData which stores information about systematic errors, their treatment and description. - central_values : None, np.array + _central_values : None, np.array 1-D array containing alternative central values to combine with the multiplicative errors to calculate their absolute contributions. By default this is None, and the experimental central values are used. However, this @@ -104,13 +106,13 @@ def covmat_from_systematics(commondata, central_values=None): 4.14126235e-05, 4.15843357e-05, 1.43824457e-04]]) """ return construct_covmat( - commondata.stat_errors.to_numpy(), - commondata.systematic_errors(central_values) + loaded_commondata_with_cuts.stat_errors.to_numpy(), + loaded_commondata_with_cuts.systematic_errors(_central_values) ) -def datasets_covmat_from_systematics( - list_of_commondata, list_of_central_values=None +def dataset_inputs_covmat_from_systematics( + dataset_inputs_loaded_cd_with_cuts, _list_of_central_values=None ): """Given a list containing :py:class:`validphys.coredata.CommonData` s, construct the full covariance matrix. @@ -123,9 +125,9 @@ def datasets_covmat_from_systematics( Parameters ---------- - list_of_commondata : list[validphys.coredata.CommonData] + dataset_inputs_loaded_cd_with_cuts : list[validphys.coredata.CommonData] list of CommonData objects. - list_of_central_values: None, list[np.array] + _list_of_central_values: None, list[np.array] list of 1-D arrays which contain alternative central values which are combined with the multiplicative errors to calculate their absolute contribution. By default this is None and the experimental central @@ -140,13 +142,13 @@ def datasets_covmat_from_systematics( Example ------- >>> from validphys.commondataparser import load_commondata - >>> from validphys.covmats import datasets_covmat_from_systematics + >>> from validphys.covmats import dataset_inputs_covmat_from_systematics >>> from validphys.loader import Loader >>> l = Loader() >>> cd1 = l.check_commondata("ATLASLOMASSDY11EXT") >>> cd2 = l.check_commondata("ATLASZHIGHMASS49FB") >>> ld1, ld2 = map(load_commondata, (cd1, cd2)) - >>> datasets_covmat_from_systematics((ld1, ld2)) + >>> dataset_inputs_covmat_from_systematics((ld1, ld2)) array([[2.91814548e+06, 4.66692123e+06, 2.36823008e+06, 8.62587330e+05, 2.78209614e+05, 1.11790645e+05, 1.75129920e+03, 7.97466600e+02, 4.00296960e+02, 2.22039720e+02, 1.46202210e+02, 8.36558100e+01, @@ -154,11 +156,11 @@ def datasets_covmat_from_systematics( special_corrs = [] block_diags = [] - if list_of_central_values is None: + if _list_of_central_values is None: # want to just pass None to systematic_errors method - list_of_central_values = [None] * len(list_of_commondata) + _list_of_central_values = [None] * len(dataset_inputs_loaded_cd_with_cuts) - for cd, central_values in zip(list_of_commondata, list_of_central_values): + for cd, central_values in zip(dataset_inputs_loaded_cd_with_cuts, _list_of_central_values): errors = cd.systematic_errors(central_values) # separate out the special uncertainties which can be correlated across # datasets @@ -176,62 +178,32 @@ def datasets_covmat_from_systematics( return diag + special_sys.to_numpy() @ special_sys.to_numpy().T -def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame): - """Basic function to construct a covariance matrix (covmat), given the - statistical error and a dataframe of systematics. - - Errors with name UNCORR or THEORYUNCORR are added in quadrature with - the statistical error to the diagonal of the covmat. - - Other systematics are treated as correlated; their covmat contribution is - found by multiplying them by their transpose. +def dataset_t0_predictions(dataset, t0set): + """Returns the t0 predictions for a ``dataset`` which are the predictions + calculated using the central member of ``pdf``. Note that if ``pdf`` has + errortype ``replicas``, and the dataset is a hadronic observable then the + predictions of the central member are subtly different to the central + value of the replica predictions. Parameters ---------- - stat_errors: np.array - a 1-D array of statistical uncertainties - sys_errors: pd.DataFrame - a dataframe with shape (N_data * N_sys) and systematic name as the - column headers. The uncertainties should be in the same units as the - data. - - Notes - ----- - This function doesn't contain any logic to ignore certain contributions to - the covmat, if you wanted to not include a particular systematic/set of - systematics i.e all uncertainties with MULT errors, then filter those out - of ``sys_errors`` before passing that to this function. - - """ - diagonal = stat_errors ** 2 - - is_uncorr = sys_errors.columns.isin(("UNCORR", "THEORYUNCORR")) - diagonal += (sys_errors.loc[:, is_uncorr].to_numpy() ** 2).sum(axis=1) - - corr_sys_mat = sys_errors.loc[:, ~is_uncorr].to_numpy() - return np.diag(diagonal) + corr_sys_mat @ corr_sys_mat.T - - -def experimental_covmat(loaded_commondata_with_cuts): - """Returns the experimental covariance matrix. Details of how - the covmat is constructed can be found in :py:func:`covmat_from_systematics`. - The experimental covariance matrix uses the experimental central values - to calculate the absolute uncertainties from the multiplicative systematics. - - Parameters - ---------- - loaded_commondata_with_cuts: validphys.coredata.CommonData + dataset: validphys.core.DataSetSpec + dataset for which to calculate t0 predictions + t0set: validphys.core.PDF + pdf used to calculate the predictions Returns ------- - covmat: np.array + t0_predictions: np.array + 1-D numpy array with predictions for each of the cut datapoints. """ - return covmat_from_systematics(loaded_commondata_with_cuts) + # Squeeze values since t0_pred is DataFrame with shape n_data * 1 + return central_predictions(dataset, t0set).to_numpy().squeeze() -def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions): - """Like :py:func:`experimental_covmat` except uses the t0 predictions +def t0_covmat_from_systematics(loaded_commondata_with_cuts, dataset_t0_predictions): + """Like :py:func:`covmat_from_systematics` except uses the t0 predictions to calculate the absolute constributions to the covmat from multiplicative uncertainties. For more info on the t0 predictions see :py:func:`validphys.results_providers.dataset_t0_predictions`. @@ -253,24 +225,12 @@ def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions): loaded_commondata_with_cuts, dataset_t0_predictions) -def dataset_inputs_experimental_covmat(dataset_inputs_loaded_cd_with_cuts): - """Like :py:func:`experimental_covmat` except for all data - - Parameters - ---------- - dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData] - The CommonData for all datasets defined in ``dataset_inputs``. +dataset_inputs_t0_predictions = collect("dataset_t0_predictions", ("data",)) - Returns - ------- - covmat: np.array - Covariance matrix for list of datasets. - """ - return datasets_covmat_from_systematics(dataset_inputs_loaded_cd_with_cuts) -def dataset_inputs_t0_covmat( +def dataset_inputs_t0_covmat_from_systematics( dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions): - """Like :py:func:`t0_covmat` except for all data + """Like :py:func:`t0_covmat_from_systematics` except for all data Parameters ---------- @@ -284,7 +244,7 @@ def dataset_inputs_t0_covmat( t0_covmat: np.array t0 covariance matrix matrix for list of datasets. """ - return datasets_covmat_from_systematics( + return dataset_inputs_covmat_from_systematics( dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions) diff --git a/validphys2/src/validphys/covmats_utils.py b/validphys2/src/validphys/covmats_utils.py new file mode 100644 index 0000000000..51225a1806 --- /dev/null +++ b/validphys2/src/validphys/covmats_utils.py @@ -0,0 +1,45 @@ +""" +covmat_utils.py + +Utils functions for constructing covariance matrices from systematics. +Leveraged by :py:mod:`validphys.covmats` which contains relevant +actions/providers. + +""" +import numpy as np +import pandas as pd + +def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame): + """Basic function to construct a covariance matrix (covmat), given the + statistical error and a dataframe of systematics. + + Errors with name UNCORR or THEORYUNCORR are added in quadrature with + the statistical error to the diagonal of the covmat. + + Other systematics are treated as correlated; their covmat contribution is + found by multiplying them by their transpose. + + Parameters + ---------- + stat_errors: np.array + a 1-D array of statistical uncertainties + sys_errors: pd.DataFrame + a dataframe with shape (N_data * N_sys) and systematic name as the + column headers. The uncertainties should be in the same units as the + data. + + Notes + ----- + This function doesn't contain any logic to ignore certain contributions to + the covmat, if you wanted to not include a particular systematic/set of + systematics i.e all uncertainties with MULT errors, then filter those out + of ``sys_errors`` before passing that to this function. + + """ + diagonal = stat_errors ** 2 + + is_uncorr = sys_errors.columns.isin(("UNCORR", "THEORYUNCORR")) + diagonal += (sys_errors.loc[:, is_uncorr].to_numpy() ** 2).sum(axis=1) + + corr_sys_mat = sys_errors.loc[:, ~is_uncorr].to_numpy() + return np.diag(diagonal) + corr_sys_mat @ corr_sys_mat.T diff --git a/validphys2/src/validphys/results_providers.py b/validphys2/src/validphys/results_providers.py index 4e7332514d..a8f618f8ad 100644 --- a/validphys2/src/validphys/results_providers.py +++ b/validphys2/src/validphys/results_providers.py @@ -1,15 +1,14 @@ """ results_providers.py -module which bridges between underlying functions concerned with loading -theory predictions and data and actions which can be accessed -by other actions/providers. +Module containing actions which return loaded commondata, leverages utils +found in :py:mod:`validphys.commondataparser`, returning objects from +:py:mod:`validphys.coredata` """ from reportengine import collect from validphys.commondataparser import load_commondata -from validphys.convolution import central_predictions def loaded_commondata_with_cuts(commondata, cuts): """Load the commondata and apply cuts. @@ -31,29 +30,3 @@ def loaded_commondata_with_cuts(commondata, cuts): dataset_inputs_loaded_cd_with_cuts = collect( "loaded_commondata_with_cuts", ("data_input",)) - - -def dataset_t0_predictions(dataset, t0set): - """Returns the t0 predictions for a ``dataset`` which are the predictions - calculated using the central member of ``pdf``. Note that if ``pdf`` has - errortype ``replicas``, and the dataset is a hadronic observable then the - predictions of the central member are subtly different to the central - value of the replica predictions. - - Parameters - ---------- - dataset: validphys.core.DataSetSpec - dataset for which to calculate t0 predictions - t0set: validphys.core.PDF - pdf used to calculate the predictions - - Returns - ------- - t0_predictions: np.array - 1-D numpy array with predictions for each of the cut datapoints. - - """ - # Squeeze values since t0_pred is DataFrame with shape n_data * 1 - return central_predictions(dataset, t0set).to_numpy().squeeze() - -dataset_inputs_t0_predictions = collect("dataset_t0_predictions", ("data",)) diff --git a/validphys2/src/validphys/tests/test_covmats.py b/validphys2/src/validphys/tests/test_covmats.py index a481475147..2e2dd21deb 100644 --- a/validphys2/src/validphys/tests/test_covmats.py +++ b/validphys2/src/validphys/tests/test_covmats.py @@ -29,7 +29,7 @@ def test_self_consistent_covmat_from_systematics(data_internal_cuts_config): """Test which checks that the single dataset implementation of - ``covmat_from_systematics`` matches ``datasets_covmat_from_systematics`` + ``covmat_from_systematics`` matches ``dataset_inputs_covmat_from_systematics`` when the latter is given a list containing a single dataset. """ @@ -37,9 +37,9 @@ def test_self_consistent_covmat_from_systematics(data_internal_cuts_config): dataset_inputs = base_config.pop("dataset_inputs") for dsinp in dataset_inputs: - covmat_a = API.experimental_covmat( + covmat_a = API.covmat_from_systematics( **base_config, dataset_input=dsinp) - covmat_b = API.dataset_inputs_experimental_covmat( + covmat_b = API.dataset_inputs_covmat_from_systematics( **base_config, dataset_inputs=[dsinp]) np.testing.assert_allclose(covmat_a, covmat_b) @@ -57,7 +57,7 @@ def test_covmat_from_systematics(data_config, use_cuts, dataset_inputs): config["use_cuts"] = use_cuts config["dataset_inputs"] = dataset_inputs - covmat = API.dataset_inputs_experimental_covmat(**config) + covmat = API.dataset_inputs_covmat_from_systematics(**config) cpp_covmat = API.groups_covmat(**config) np.testing.assert_allclose(cpp_covmat, covmat) @@ -71,7 +71,7 @@ def test_covmat_with_one_systematic(): dsinput = {"dataset": "D0ZRAP", "frac": 1.0, "cfac": ["QCD"]} config = dict(dataset_input=dsinput, theoryid=THEORYID, use_cuts="nocuts") - covmat = API.experimental_covmat(**config) + covmat = API.covmat_from_systematics(**config) ds = API.dataset(**config) # double check that the dataset does indeed only have 1 systematic. assert ds.commondata.nsys == 1 @@ -145,11 +145,11 @@ def test_python_t0_covmat_matches_cpp( config["dataset_inputs"] = dataset_inputs config["t0pdfset"] = t0pdfset config["use_t0"] = True - covmat = API.dataset_inputs_t0_covmat(**config) + covmat = API.dataset_inputs_t0_covmat_from_systematics(**config) cpp_covmat = API.groups_covmat(**config) # use allclose defaults or it fails np.testing.assert_allclose(cpp_covmat, covmat, rtol=1e-05, atol=1e-08) with pytest.raises(AssertionError): np.testing.assert_allclose( - covmat, API.dataset_inputs_experimental_covmat(**config) + covmat, API.dataset_inputs_covmat_from_systematics(**config) ) From 8788af7ee27e814bd609392dfb9dde7abd3f6f5f Mon Sep 17 00:00:00 2001 From: wilsonm Date: Wed, 17 Feb 2021 10:49:27 +0000 Subject: [PATCH 3/5] rename results_providers to reflect its reduced scope and update docs --- doc/sphinx/source/vp/pydataobjs.rst | 22 +++++++++---------- validphys2/src/validphys/app.py | 2 +- .../{results_providers.py => commondata.py} | 2 +- validphys2/src/validphys/covmats.py | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) rename validphys2/src/validphys/{results_providers.py => commondata.py} (97%) diff --git a/doc/sphinx/source/vp/pydataobjs.rst b/doc/sphinx/source/vp/pydataobjs.rst index 094e40385a..1134e6ac05 100644 --- a/doc/sphinx/source/vp/pydataobjs.rst +++ b/doc/sphinx/source/vp/pydataobjs.rst @@ -148,7 +148,7 @@ Loading CommonData ------------------ The underlying functions for loading CommonData can be found in -:py:mod:`validphys.results_providers.commondata_parser`. The data is loaded +:py:mod:`validphys.commondataparser`. The data is loaded as :py:class:`validphys.coredata.CommonData`, which uses the `dataclasses `_ module which automatically generates some special methods for the class. The @@ -158,7 +158,7 @@ with the standard pandas machinery:: import pandas as pd from validphys.api import API - from validphys.results_providers.commondata_parser import load_commondata + from validphys.commondataparser import load_commondata # first get the CommonDataSpec cd = API.commondata(dataset_input={"dataset":"NMC"}) lcd = load_commondata(cd) @@ -169,7 +169,7 @@ The :py:class:`validphys.coredata.CommonData` class has a method which returns a new instance of the class with cuts applied:: from validphys.api import API - from validphys.results_providers.commondata_parser import load_commondata + from validphys.commondataparser import load_commondata inp = { "dataset_input": {"dataset":"NMC"}, "use_cuts": "internal", @@ -196,12 +196,12 @@ Loading Covariance Matrices Functions which take :py:class:`validphys.coredata.CommonData` s and return covariance matrices can be found in -:py:mod:`validphys.results_providers.covmat_construction`. As with the commondata -the underlying functions can be accessed directly:: +:py:mod:`validphys.covmats`. As with the commondata +the functions can be called in scripts directly:: import numpy as np from validphys.api import API - from validphys.results_providers.covmat_construction import covmat_from_systematics + from validphys.covmats import covmat_from_systematics inp = { "dataset_input": {"dataset":"NMC"}, @@ -216,7 +216,7 @@ the underlying functions can be accessed directly:: There exists a similar function which acts upon a list of multiple commondatas and takes into account correlations between datasets:: - from validphys.results_providers.covmat_construction import datasets_covmat_from_systematics + from validphys.covmats import dataset_inputs_covmat_from_systematics inp = { "dataset_inputs": [ {"dataset":"NMC"}, @@ -227,10 +227,10 @@ and takes into account correlations between datasets:: } lcds = API.dataset_inputs_loaded_cd_with_cuts(**inp) total_ndata = np.sum([lcd.ndata for lcd in lcds]) - total_cov = datasets_covmat_from_systematics(lcds) + total_cov = dataset_inputs_covmat_from_systematics(lcds) assert total_cov.shape == (total_ndata, total_ndata) -These functions are already leveraged by actions, which can be accessed directly +These functions are also actions, which can be accessed directly from the API:: from validphys.api import API @@ -241,7 +241,7 @@ from the API:: "theoryid": 162 } # single dataset covmat - cov = API.experimental_covmat(**inp) + cov = API.covmat_from_systematics(**inp) inp = { "dataset_inputs": [ {"dataset":"NMC"}, @@ -250,4 +250,4 @@ from the API:: "use_cuts": "internal", "theoryid": 162 } - total_cov = API.dataset_inputs_experimental_covmat(**inp) + total_cov = API.dataset_inputs_covmat_from_systematics(**inp) diff --git a/validphys2/src/validphys/app.py b/validphys2/src/validphys/app.py index a37cde8271..0de5827f6e 100644 --- a/validphys2/src/validphys/app.py +++ b/validphys2/src/validphys/app.py @@ -24,7 +24,7 @@ providers = [ 'validphys.results', - 'validphys.results_providers', + 'validphys.commondata', 'validphys.pdfgrids', 'validphys.pdfplots', 'validphys.dataplots', diff --git a/validphys2/src/validphys/results_providers.py b/validphys2/src/validphys/commondata.py similarity index 97% rename from validphys2/src/validphys/results_providers.py rename to validphys2/src/validphys/commondata.py index a8f618f8ad..167636e01a 100644 --- a/validphys2/src/validphys/results_providers.py +++ b/validphys2/src/validphys/commondata.py @@ -1,5 +1,5 @@ """ -results_providers.py +commondata.py Module containing actions which return loaded commondata, leverages utils found in :py:mod:`validphys.commondataparser`, returning objects from diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 232a157630..934151ce73 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -206,7 +206,7 @@ def t0_covmat_from_systematics(loaded_commondata_with_cuts, dataset_t0_predictio """Like :py:func:`covmat_from_systematics` except uses the t0 predictions to calculate the absolute constributions to the covmat from multiplicative uncertainties. For more info on the t0 predictions see - :py:func:`validphys.results_providers.dataset_t0_predictions`. + :py:func:`validphys.commondata.dataset_t0_predictions`. Parameters ---------- From f82e97113ada1aa65ee43bbd3242a7980d1cf2f8 Mon Sep 17 00:00:00 2001 From: wilsonmr <33907451+wilsonmr@users.noreply.github.com> Date: Tue, 23 Feb 2021 17:18:51 +0000 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Rosalyn Pearson <33020850+RosalynLP@users.noreply.github.com> --- validphys2/src/validphys/commondata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py index 167636e01a..256c0e3830 100644 --- a/validphys2/src/validphys/commondata.py +++ b/validphys2/src/validphys/commondata.py @@ -2,7 +2,7 @@ commondata.py Module containing actions which return loaded commondata, leverages utils -found in :py:mod:`validphys.commondataparser`, returning objects from +found in :py:mod:`validphys.commondataparser`, and returns objects from :py:mod:`validphys.coredata` """ From f2484c67d7709d28fac6b1503186ad3c4f536410 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Tue, 23 Feb 2021 17:40:40 +0000 Subject: [PATCH 5/5] update docs to include an example with more complicated dataset specification and updated docstrings to recommend API --- doc/sphinx/source/vp/pydataobjs.rst | 8 +++- validphys2/src/validphys/covmats.py | 57 ++++++++++++++++++++++------- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/doc/sphinx/source/vp/pydataobjs.rst b/doc/sphinx/source/vp/pydataobjs.rst index 1134e6ac05..d7a5e41452 100644 --- a/doc/sphinx/source/vp/pydataobjs.rst +++ b/doc/sphinx/source/vp/pydataobjs.rst @@ -159,8 +159,10 @@ with the standard pandas machinery:: from validphys.api import API from validphys.commondataparser import load_commondata + # define dataset settings + ds_input={'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10} # first get the CommonDataSpec - cd = API.commondata(dataset_input={"dataset":"NMC"}) + cd = API.commondata(dataset_input=ds_input) lcd = load_commondata(cd) assert isinstance(lcd.central_values, pd.Series) assert isinstance(lcd.systematics_table, pd.DataFrame) @@ -170,8 +172,10 @@ a new instance of the class with cuts applied:: from validphys.api import API from validphys.commondataparser import load_commondata + # define dataset and additional settings + ds_input={'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10} inp = { - "dataset_input": {"dataset":"NMC"}, + "dataset_input": ds_input, "use_cuts": "internal", "theoryid": 162 } diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 934151ce73..2beeb514a9 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -84,26 +84,34 @@ def covmat_from_systematics(loaded_commondata_with_cuts, _central_values=None): Example ------- + In order to use this function, simply call it from the API + + >>> from validphys.api import API + >>> inp = dict( + ... dataset_input={'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10}, + ... theoryid=162, + ... use_cuts="internal" + ... ) + >>> cov = API.covmat_from_systematics(**inp) + >>> cov.shape + (28, 28) + + To better understand the inputs for the function we can also explicitly + load the commondata and pass it to this function. Here we do that with + NMC: + >>> from validphys.commondataparser import load_commondata >>> from validphys.loader import Loader >>> from validphys.calcutils import covmat_from_systematics >>> l = Loader() >>> cd = l.check_commondata("NMC") >>> cd = load_commondata(cd) - >>> covmat_from_systematics(cd) - array([[8.64031971e-05, 8.19971921e-05, 6.27396915e-05, ..., - 2.40747732e-05, 2.79614418e-05, 3.46727332e-05], - [8.19971921e-05, 1.41907442e-04, 6.52360141e-05, ..., - 2.36624379e-05, 2.72605623e-05, 3.45492831e-05], - [6.27396915e-05, 6.52360141e-05, 9.41928691e-05, ..., - 1.79244824e-05, 2.08603130e-05, 2.56283708e-05], - ..., - [2.40747732e-05, 2.36624379e-05, 1.79244824e-05, ..., - 5.67822050e-05, 4.09077450e-05, 4.14126235e-05], - [2.79614418e-05, 2.72605623e-05, 2.08603130e-05, ..., - 4.09077450e-05, 5.55150870e-05, 4.15843357e-05], - [3.46727332e-05, 3.45492831e-05, 2.56283708e-05, ..., - 4.14126235e-05, 4.15843357e-05, 1.43824457e-04]]) + >>> cov = covmat_from_systematics(cd) + + However, it is clearly more effort than using the API. This is only worsened + when trying to apply cuts and more complicated dataset settings as in the + first example. + """ return construct_covmat( loaded_commondata_with_cuts.stat_errors.to_numpy(), @@ -141,6 +149,22 @@ def dataset_inputs_covmat_from_systematics( Example ------- + This function can be called directly from the API: + + >>> dsinps = [ + ... {'dataset': 'NMC'}, + ... {'dataset': 'ATLASTTBARTOT', 'cfac':['QCD']}, + ... {'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10} + ... ] + >>> inp = dict(dataset_inputs=dsinps, theoryid=162, use_cuts="internal") + >>> cov = API.dataset_inputs_covmat_from_systematics(**inp) + >>> cov.shape + (235, 235) + + Which properly accounts for all dataset settings and cuts. As with any other + action we can demonstrate explictly the inputs to this action by explictly + loading them and calling the action directly: + >>> from validphys.commondataparser import load_commondata >>> from validphys.covmats import dataset_inputs_covmat_from_systematics >>> from validphys.loader import Loader @@ -152,6 +176,11 @@ def dataset_inputs_covmat_from_systematics( array([[2.91814548e+06, 4.66692123e+06, 2.36823008e+06, 8.62587330e+05, 2.78209614e+05, 1.11790645e+05, 1.75129920e+03, 7.97466600e+02, 4.00296960e+02, 2.22039720e+02, 1.46202210e+02, 8.36558100e+01, + + However, note that it is much more difficult to apply the correct settings + to datasets and apply cuts so it is strongly encouraged to use the API for + all scripting purposes. + """ special_corrs = [] block_diags = []