Skip to content
1 change: 1 addition & 0 deletions validphys2/src/validphys/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

providers = [
'validphys.results',
'validphys.results_providers',
'validphys.pdfgrids',
'validphys.pdfplots',
'validphys.dataplots',
Expand Down
25 changes: 21 additions & 4 deletions validphys2/src/validphys/coredata.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,15 +246,32 @@ def additive_errors(self):
return add_table.loc[:, add_table.columns != "SKIP"]


def systematic_errors(self):
def systematic_errors(self, central_values=None):
"""Returns all systematic errors as absolute uncertainties, with a
single column for each uncertainty. Converts
:py:attr:`multiplicative_errors` to units of data and then appends
onto :py:attr:`additive_errors`
onto :py:attr:`additive_errors`. By default uses the experimental
central values to perform conversion, but the user can supply a
1-D array of central values, with length :py:attr:`self.ndata`, to use
instead of the experimental central values to calculate the absolute
contribution of the multiplicative systematics.

Parameters
----------
central_values: None, np.array
1-D array containing alternative central values to combine with
multiplicative uncertainties. This array must have length equal
to :py:attr:`self.ndata`. By default ``central_values`` is None, and
the central values of the commondata are used.

Returns
-------
systematic_errors: pd.DataFrame
Dataframe containing systematic errors.

"""
# NOTE: in the future can take t0 predictions here.
central_values = self.central_values.to_numpy()
if central_values is None:
central_values = self.central_values.to_numpy()
converted_mult_errors = (
self.multiplicative_errors * central_values[:, np.newaxis] / 100
)
Expand Down
109 changes: 102 additions & 7 deletions validphys2/src/validphys/covmats.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@
from reportengine.table import table

from validphys.calcutils import regularize_covmat, get_df_block
from validphys.core import PDF, DataGroupSpec, DataSetSpec
from validphys.checks import (
check_dataset_cuts_match_theorycovmat,
check_norm_threshold,
check_pdf_is_montecarlo,
check_speclabels_different,
check_data_cuts_match_theorycovmat,
)
from validphys.core import PDF, DataGroupSpec, DataSetSpec
from validphys.results import ThPredictionsResult

log = logging.getLogger(__name__)

INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR")


def covmat_from_systematics(commondata):
def covmat_from_systematics(commondata, central_values=None):
"""Take the statistical uncertainty and systematics table from
a :py:class:`validphys.coredata.CommonData` object and
construct the covariance matrix accounting for correlations between
Expand Down Expand Up @@ -62,11 +62,17 @@ def covmat_from_systematics(commondata):
`paper <https://arxiv.org/pdf/hep-ph/0501067.pdf>`_
outlining the procedure, specifically equation 2 and surrounding text.

Paramaters
Parameters
----------
commondata : validphys.coredata.CommonData
CommonData which stores information about systematic errors,
their treatment and description.
central_values : None, np.array
1-D array containing alternative central values to combine with the
multiplicative errors to calculate their absolute contributions. By
default this is None, and the experimental central values are used. However, this
can be used to calculate, for example, the t0 covariance matrix by
using the predictions from the central member of the t0 pdf.

Returns
-------
Expand Down Expand Up @@ -98,10 +104,14 @@ def covmat_from_systematics(commondata):
4.14126235e-05, 4.15843357e-05, 1.43824457e-04]])
"""
return construct_covmat(
commondata.stat_errors.to_numpy(), commondata.systematic_errors())
commondata.stat_errors.to_numpy(),
commondata.systematic_errors(central_values)
)


def datasets_covmat_from_systematics(list_of_commondata):
def datasets_covmat_from_systematics(
list_of_commondata, list_of_central_values=None
):
"""Given a list containing :py:class:`validphys.coredata.CommonData` s,
construct the full covariance matrix.

Expand All @@ -115,6 +125,11 @@ def datasets_covmat_from_systematics(list_of_commondata):
----------
list_of_commondata : list[validphys.coredata.CommonData]
list of CommonData objects.
list_of_central_values: None, list[np.array]
list of 1-D arrays which contain alternative central values which are
combined with the multiplicative errors to calculate their absolute
contribution. By default this is None and the experimental central
values are used.

Returns
-------
Expand All @@ -139,8 +154,12 @@ def datasets_covmat_from_systematics(list_of_commondata):
special_corrs = []
block_diags = []

for cd in list_of_commondata:
errors = cd.systematic_errors()
if list_of_central_values is None:
# want to just pass None to systematic_errors method
list_of_central_values = [None] * len(list_of_commondata)

for cd, central_values in zip(list_of_commondata, list_of_central_values):
errors = cd.systematic_errors(central_values)
# separate out the special uncertainties which can be correlated across
# datasets
is_intra_dataset_error = errors.columns.isin(INTRA_DATASET_SYS_NAME)
Expand Down Expand Up @@ -193,6 +212,82 @@ def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame):
return np.diag(diagonal) + corr_sys_mat @ corr_sys_mat.T


def experimental_covmat(loaded_commondata_with_cuts):
"""Returns the experimental covariance matrix. Details of how
the covmat is constructed can be found in :py:func:`covmat_from_systematics`.
The experimental covariance matrix uses the experimental central values
to calculate the absolute uncertainties from the multiplicative systematics.

Parameters
----------
loaded_commondata_with_cuts: validphys.coredata.CommonData

Returns
-------
covmat: np.array

"""
return covmat_from_systematics(loaded_commondata_with_cuts)


def t0_covmat(loaded_commondata_with_cuts, dataset_t0_predictions):
"""Like :py:func:`experimental_covmat` except uses the t0 predictions
to calculate the absolute constributions to the covmat from multiplicative
uncertainties. For more info on the t0 predictions see
:py:func:`validphys.results_providers.dataset_t0_predictions`.

Parameters
----------
loaded_commondata_with_cuts: validphys.coredata.CommonData
commondata object for which to generate the covmat.
dataset_t0_predictions: np.array
1-D array with t0 predictions.

Returns
-------
t0_covmat: np.array
t0 covariance matrix

"""
return covmat_from_systematics(
loaded_commondata_with_cuts, dataset_t0_predictions)


def dataset_inputs_experimental_covmat(dataset_inputs_loaded_cd_with_cuts):
"""Like :py:func:`experimental_covmat` except for all data

Parameters
----------
dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData]
The CommonData for all datasets defined in ``dataset_inputs``.

Returns
-------
covmat: np.array
Covariance matrix for list of datasets.
"""
return datasets_covmat_from_systematics(dataset_inputs_loaded_cd_with_cuts)

def dataset_inputs_t0_covmat(
dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions):
"""Like :py:func:`t0_covmat` except for all data

Parameters
----------
dataset_inputs_loaded_cd_with_cuts: list[validphys.coredata.CommonData]
The CommonData for all datasets defined in ``dataset_inputs``.
dataset_inputs_t0_predictions: list[np.array]
The t0 predictions for all datasets.

Returns
-------
t0_covmat: np.array
t0 covariance matrix matrix for list of datasets.
"""
return datasets_covmat_from_systematics(
dataset_inputs_loaded_cd_with_cuts, dataset_inputs_t0_predictions)


def sqrt_covmat(covariance_matrix):
"""Function that computes the square root of the covariance matrix.

Expand Down
59 changes: 59 additions & 0 deletions validphys2/src/validphys/results_providers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""
results_providers.py

module which bridges between underlying functions concerned with loading
theory predictions and data and actions which can be accessed
by other actions/providers.

"""
from reportengine import collect

from validphys.commondataparser import load_commondata
from validphys.convolution import central_predictions

def loaded_commondata_with_cuts(commondata, cuts):
"""Load the commondata and apply cuts.

Parameters
----------
commondata: validphys.core.CommonDataSpec
commondata to load and cut.
cuts: validphys.core.cuts, None
valid cuts, used to cut loaded commondata.

Returns
-------
loaded_cut_commondata: validphys.coredata.CommonData

"""
lcd = load_commondata(commondata)
return lcd.with_cuts(cuts)

dataset_inputs_loaded_cd_with_cuts = collect(
"loaded_commondata_with_cuts", ("data_input",))


def dataset_t0_predictions(dataset, t0set):
"""Returns the t0 predictions for a ``dataset`` which are the predictions
calculated using the central member of ``pdf``. Note that if ``pdf`` has
errortype ``replicas``, and the dataset is a hadronic observable then the
predictions of the central member are subtly different to the central
value of the replica predictions.

Parameters
----------
dataset: validphys.core.DataSetSpec
Comment thread
siranipour marked this conversation as resolved.
dataset for which to calculate t0 predictions
t0set: validphys.core.PDF
Comment thread
wilsonmr marked this conversation as resolved.
pdf used to calculate the predictions

Returns
-------
t0_predictions: np.array
1-D numpy array with predictions for each of the cut datapoints.

"""
# Squeeze values since t0_pred is DataFrame with shape n_data * 1
return central_predictions(dataset, t0set).to_numpy().squeeze()

dataset_inputs_t0_predictions = collect("dataset_t0_predictions", ("data",))
21 changes: 0 additions & 21 deletions validphys2/src/validphys/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,6 @@ def tmp(tmpdir):
{'dataset': 'CMSZDIFF12', 'cfac':('QCD', 'NRM'), 'sys':10}
]

# Experiments which have non trivial correlations between their datasets
CORR_DATA = [
{'dataset': 'ATLASWZRAP36PB', 'cfac': ['QCD']},
{'dataset': 'ATLASZHIGHMASS49FB', 'cfac': ['QCD']},
{'dataset': 'ATLASLOMASSDY11EXT', 'cfac': ['QCD']},
{'dataset': 'ATLASWZRAP11', 'frac': 0.5, 'cfac': ['QCD']},
{'dataset': 'CMSZDIFF12', 'cfac': ('QCD', 'NRM'), 'sys': 10},
{'dataset': 'CMSJETS11', 'frac': 0.5, 'sys': 10},
]

SINGLE_EXP = [
{
Expand Down Expand Up @@ -90,18 +81,6 @@ def data_singleexp_witht0_config(data_witht0_config):
config_dict.update({'experiments': SINGLE_EXP})
return config_dict

@pytest.fixture(scope='module')
def data_with_correlations_config():
corr_dict = dict(base_config)
corr_dict.update(dataset_inputs=CORR_DATA)
return corr_dict

@pytest.fixture(scope='module')
def data_with_correlations_internal_cuts_config(data_with_correlations_config):
config_dict = dict(data_with_correlations_config)
config_dict.update(use_cuts='internal')
return config_dict

@pytest.fixture(scope='module')
def weighted_data_witht0_config(data_witht0_config):
config_dict = dict(data_witht0_config)
Expand Down
Loading