From b4aabdd6862c2de805feea2ed244faafa9cab591 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Tue, 3 Sep 2019 12:39:08 +0100 Subject: [PATCH 1/8] add simple corrmat reg to calcutils --- validphys2/src/validphys/calcutils.py | 67 +++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/validphys2/src/validphys/calcutils.py b/validphys2/src/validphys/calcutils.py index 44ed05442e..21d5ae73f3 100644 --- a/validphys2/src/validphys/calcutils.py +++ b/validphys2/src/validphys/calcutils.py @@ -149,3 +149,70 @@ def get_df_block(matrix: pd.DataFrame, key: str, level): key, level=level, axis=1).values return block +def regularize_covmat(covmat: np.array, cond_num_threshold=500): + """Given a covariance matrix, performs a regularization on + the corresponding correlation matrix and uses that to return a new + regularized covariance matrix: + + corr_ij = cov_ij / (sigma_i * sigma_j) + + where sigma_i = sqrt(diag(cov)_i). The correlation + matrix is then regularized by clipping the smallest eigenvalues to a + minimum acceptable values given by + + max(eigenvalues of corr_ij)/cond_num_threshold + + finally the process to get a correlation matrix from a covariance matrix + is inverted + + new_cov_ij = (regularized corr)_ij * sigma_i * sigma_j + + Parameters + ---------- + covmat : array + a covariance matrix which is to be regularized. + cond_num_threshold : float + The acceptable condition number of the correlation matrix, by default + set to 500. + + Returns + ------- + new_covmat : array + A new covariance matrix which has been regularized according to + prescription above. + + Notes + ----- + (regularized corr)_ij is not technically a correlation matrix since it might + not have 1s on the diagonal. + + TODO: is `regularized corr` the nearest matrix with condition number of + `cond_num_threshold` according to frob dist? + + Examples + -------- + + >>> from validphys.calcutils import regularize_covmat + >>> import numpy as np + >>> import scipy.linalg as la + >>> np.random.seed(0) + >>> s = np.random.rand(3,3) + >>> cov = s@s.T + >>> cov + array([[1.17601578, 0.99135397, 1.45880096], + [0.99135397, 0.89356028, 1.23866193], + [1.45880096, 1.23866193, 1.91538757]]) + >>> new_cov = regularize_covmat(cov, 100) + >>> new_cov + array([[1.18115619, 0.98945605, 1.45496881], + [0.98945605, 0.89426102, 1.24007682], + [1.45496881, 1.24007682, 1.9182444 ]]) + >>> print(np.linalg.norm(new_cov-cov)) + 0.008697992044783345 + """ + d = np.sqrt(np.diag(covmat)) + corr = (covmat/d)/d[:, np.newaxis] + e_val, e_vec = la.eigh(corr) + new_e_val = np.clip(e_val, a_min=max(e_val)/cond_num_threshold, a_max=None) + new_corr = e_vec@(np.diag(new_e_val)@e_vec.T) + return (new_corr*d)*d[:, np.newaxis] From d0fbf5e2f79925115a1b7b82563a4200855189a7 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Tue, 3 Sep 2019 12:49:58 +0100 Subject: [PATCH 2/8] regularize covmats at the level of n3fit --- n3fit/src/n3fit/io/reader.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/n3fit/src/n3fit/io/reader.py b/n3fit/src/n3fit/io/reader.py index a155e7959c..cabad1b284 100644 --- a/n3fit/src/n3fit/io/reader.py +++ b/n3fit/src/n3fit/io/reader.py @@ -8,6 +8,7 @@ from NNPDF import RandomGenerator from validphys.core import ExperimentSpec as vp_Exp from validphys.core import DataSetSpec as vp_Dataset +from validphys.calcutils import regularize_covmat def make_tr_val_mask(datasets, exp_name, seed): @@ -247,6 +248,8 @@ def common_data_reader(spec, t0pdfset, replica_seeds=None, trval_seeds=None): exp_name = spec.name covmat = spec_c.get_covmat() + # Regularize the covmat with cond_num_threshold=500 + covmat = regularize_covmat(covmat, cond_num_threshold=500) # Now it is time to build the masks for the training validation split all_dict_out = [] From 66020f8f0dfc22ab0f9e0b42dd815b92f52e5b63 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Wed, 4 Sep 2019 10:29:03 +0100 Subject: [PATCH 3/8] restructured covmat functions to just return covmats and added sqrt covmat functions in rsults, improved documentation --- validphys2/src/validphys/results.py | 161 +++++++++++++++++++++------- 1 file changed, 124 insertions(+), 37 deletions(-) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index a6db26f128..7813702904 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -235,8 +235,7 @@ def experiments_covmat_no_table( for experiment, experiment_covmat in zip( experiments, experiments_covariance_matrix): name = experiment.name - mat, _ = experiment_covmat - df.loc[[name],[name]] = mat + df.loc[[name],[name]] = experiment_covmat return df @table @@ -244,21 +243,25 @@ def experiments_covmat(experiments_covmat_no_table): """Duplicate of experiments_covmat_no_table but with a table decorator.""" return experiments_covmat_no_table +exps_sqrt_covmat = collect( + 'experiment_sqrt_covariance_matrix', + ('experiments',) +) + @table def experiments_sqrtcovmat( - experiments, experiments_index, experiments_covariance_matrix): + experiments, experiments_index, exps_sqrt_covmat): """Like experiments_covmat, but dump the lower triangular part of the Cholesky decomposition as used in the fit. The upper part indices are set to zero. """ data = np.zeros((len(experiments_index),len(experiments_index))) df = pd.DataFrame(data, index=experiments_index, columns=experiments_index) - for experiment, experiment_covmat in zip( - experiments, experiments_covariance_matrix): + for experiment, exp_sqrt_covmat in zip( + experiments, exps_sqrt_covmat): name = experiment.name - _, mat = experiment_covmat - mat[np.triu_indices_from(mat, k=1)] = 0 - df.loc[[name],[name]] = mat + exp_sqrt_covmat[np.triu_indices_from(exp_sqrt_covmat, k=1)] = 0 + df.loc[[name],[name]] = exp_sqrt_covmat return df @table @@ -272,9 +275,8 @@ def experiments_invcovmat( for experiment, experiment_covmat in zip( experiments, experiments_covariance_matrix): name = experiment.name - cov, _ = experiment_covmat #Improve this inversion if this method tuns out to be important - invcov = la.inv(cov) + invcov = la.inv(experiment_covmat) df.loc[[name],[name]] = invcov return df @@ -360,11 +362,49 @@ def closure_pseudodata_replicas(experiments, pdf, nclosure:int, @check_dataset_cuts_match_theorycovmat def covariance_matrix(dataset:DataSetSpec, fitthcovmat, t0set:(PDF, type(None)) = None): - """Returns a tuple of Covariance matrix and sqrt covariance matrix for the given dataset - which includes theory contribution from scale variations if `use_theorycovmat` is True and - an appropriate fit from which the covariance matrix will be loaded is given - - if t0set is specified then t0 predictions will be used to construct the covariance matrix. + """Returns the covariance matrix for a given `dataset`. By default the + data central values will be used to calculate the multiplicative contributions + to the covariance matrix. + + The matrix can instead be constructed with + the t0 proceedure if the user sets `use_t0` to True and gives a + `t0pdfset`. In this case the central predictions from the `t0pdfset` will be + used to calculate the multiplicative contributions to the covariance matrix. + More information on the t0 procedure can be found here: + https://arxiv.org/abs/0912.2276 + + Additionally the user can specify `use_fit_thcovmat_if_present` to be True + and provide a corresponding `fit`. If the theory covmat was used in the + corresponding `fit` and the specfied `dataset` was used in the fit then + the theory covariance matrix for this `dataset` will be added in quadrature + to the experimental covariance matrix. + + Parameters + ---------- + dataset : DataSetSpec + object parsed from the `dataset_input` runcard key + fitthcovmat: None or ThCovMatSpec + None if either `use_thcovmat_if_present` is False or if no theory + covariance matrix was used in the corresponding fit + t0set: None or PDF + None if `use_t0` is False or a PDF parsed from `t0pdfset` runcard key + + Returns + ------- + covariance_matrix : array + a covariance matrix as a numpy array + + Examples + -------- + + >>> from validphys.api import API + >>> inp = { + 'dataset_input': {'ATLASTTBARTOT'}, + 'theoryid': 52, + 'use_cuts': 'no_cuts' + } + >>> cov = API.covariance_matrix(**inp) + TODO: complete example """ loaded_data = dataset.load() @@ -374,14 +414,40 @@ def covariance_matrix(dataset:DataSetSpec, fitthcovmat, t0set:(PDF, type(None)) log.debug("Setting T0 predictions for %s" % dataset) loaded_data.SetT0(t0set.load_t0()) + covmat = loaded_data.get_covmat() if fitthcovmat: loaded_thcov = fitthcovmat.load() - covmat = get_df_block(loaded_thcov, dataset.name, level=1) + loaded_data.get_covmat() - sqrtcovmat = np.linalg.cholesky(covmat) - else: - covmat = loaded_data.get_covmat() - sqrtcovmat = loaded_data.get_sqrtcovmat() - return covmat, sqrtcovmat + covmat += get_df_block(loaded_thcov, dataset.name, level=1) + return covmat + +def sqrt_covariance_matrix(covariance_matrix: np.array): + """Returns the lower-triangular Cholesky factor `covariance_matrix` + + Parameters + ---------- + covariance_matrix: array + a positive definite covariance matrix + + Returns + ------- + sqrt_covariance_matrix : array + lower triangular Cholesky factor of covariance_matrix + + Notes + ----- + The lower triangular is useful for efficient calculation of the chi^2 + + Examples + -------- + + >>> import numpy as np + >>> from validphys.results import sqrt_covariance_matrix + >>> a = np.array([[1, 0.5], [0.5, 1]]) + >>> sqrt_covariance_matrix(a) + array([[1. , 0. ], + [0.5 , 0.8660254]]) + """ + return la.cholesky(covariance_matrix, lower=True) @check_experiment_cuts_match_theorycovmat def experiment_covariance_matrix( @@ -395,18 +461,25 @@ def experiment_covariance_matrix( log.debug("Setting T0 predictions for %s" % experiment) loaded_data.SetT0(t0set.load_t0()) + covmat = loaded_data.get_covmat() + if fitthcovmat: loaded_thcov = fitthcovmat.load() ds_names = loaded_thcov.index.get_level_values(1) indices = np.in1d(ds_names, [ds.name for ds in experiment.datasets]).nonzero()[0] - covmat = loaded_thcov.iloc[indices, indices].values + loaded_data.get_covmat() - sqrtcovmat = np.linalg.cholesky(covmat) - else: - covmat = loaded_data.get_covmat() - sqrtcovmat = loaded_data.get_sqrtcovmat() - return covmat, sqrtcovmat - -def results(dataset:(DataSetSpec), pdf:PDF, covariance_matrix): + covmat += loaded_thcov.iloc[indices, indices].values + return covmat + +def experiment_sqrt_covariance_matrix(experiment_covariance_matrix): + """Like `sqrt_covariance_matrix` but for an experiment""" + return sqrt_covariance_matrix(experiment_covariance_matrix) + +def results( + dataset:(DataSetSpec), + pdf:PDF, + covariance_matrix, + sqrt_covariance_matrix + ): """Tuple of data and theory results for a single pdf. The data will have an associated covariance matrix, which can include a contribution from the theory covariance matrix which is constructed from scale variation. The inclusion of this covariance matrix by default is used @@ -416,17 +489,30 @@ def results(dataset:(DataSetSpec), pdf:PDF, covariance_matrix): An experiment is also allowed. (as a result of the C++ code layout).""" data = dataset.load() - return (DataResult(data, *covariance_matrix), + return (DataResult(data, covariance_matrix, sqrt_covariance_matrix), ThPredictionsResult.from_convolution(pdf, dataset, loaded_data=data)) -def experiment_results(experiment, pdf:PDF, experiment_covariance_matrix): +def experiment_results( + experiment, + pdf:PDF, + experiment_covariance_matrix, + experiment_sqrt_covariance_matrix): """Like `results` but for a whole experiment""" - return results(experiment, pdf, experiment_covariance_matrix) + return results( + experiment, + pdf, + experiment_covariance_matrix, + experiment_sqrt_covariance_matrix + ) #It's better to duplicate a few lines than to complicate the logic of #``results`` to support this. #TODO: The above comment doesn't make sense after adding T0. Deprecate this -def pdf_results(dataset:(DataSetSpec, ExperimentSpec), pdfs:Sequence, covariance_matrix:tuple): +def pdf_results( + dataset:(DataSetSpec, ExperimentSpec), + pdfs:Sequence, + covariance_matrix, + sqrt_covariance_matrix): """Return a list of results, the first for the data and the rest for each of the PDFs.""" @@ -438,12 +524,13 @@ def pdf_results(dataset:(DataSetSpec, ExperimentSpec), pdfs:Sequence, covarianc th_results.append(th_result) - return (DataResult(data, *covariance_matrix), *th_results) + return (DataResult(data, covariance_matrix, sqrt_covariance_matrix), *th_results) @require_one('pdfs', 'pdf') @remove_outer('pdfs', 'pdf') def one_or_more_results(dataset:(DataSetSpec, ExperimentSpec), - covariance_matrix: tuple, + covariance_matrix, + sqrt_covariance_matrix, pdfs:(type(None), Sequence)=None, pdf:(type(None), PDF)=None): """Generate a list of results, where the first element is the data values, @@ -451,9 +538,9 @@ def one_or_more_results(dataset:(DataSetSpec, ExperimentSpec), Which of the two is selected intelligently depending on the namespace, when executing as an action.""" if pdf: - return results(dataset, pdf, covariance_matrix) + return results(dataset, pdf, covariance_matrix, sqrt_covariance_matrix) else: - return pdf_results(dataset, pdfs, covariance_matrix) + return pdf_results(dataset, pdfs, covariance_matrix, sqrt_covariance_matrix) raise ValueError("Either 'pdf' or 'pdfs' is required") From 955879fc5666a5aac17491b6a8673e85e8f6c6a6 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Wed, 4 Sep 2019 11:43:21 +0100 Subject: [PATCH 4/8] added covmat reg to covmat providers, use these covmat functions in n3fit --- n3fit/src/n3fit/fit.py | 5 ++-- n3fit/src/n3fit/io/reader.py | 5 +--- n3fit/src/n3fit/n3fit.py | 2 +- validphys2/src/validphys/config.py | 4 +++ validphys2/src/validphys/results.py | 43 +++++++++++++++++++++++++---- 5 files changed, 47 insertions(+), 12 deletions(-) diff --git a/n3fit/src/n3fit/fit.py b/n3fit/src/n3fit/fit.py index c3cb84375c..ebb26a8c60 100644 --- a/n3fit/src/n3fit/fit.py +++ b/n3fit/src/n3fit/fit.py @@ -15,6 +15,7 @@ def fit( fitting, experiments, + experiments_covariance_matrix, t0set, replica, replica_path, @@ -132,9 +133,9 @@ def fit( ############################################################################## all_exp_infos = [[] for _ in replica] # First loop over the experiments - for exp in experiments: + for exp, exp_cov in zip(experiments, experiments_covariance_matrix): log.info("Loading experiment: {0}".format(exp)) - all_exp_dicts = reader.common_data_reader(exp, t0pdfset, replica_seeds=mcseeds, trval_seeds=trvalseeds) + all_exp_dicts = reader.common_data_reader(exp, exp_cov, t0pdfset, replica_seeds=mcseeds, trval_seeds=trvalseeds) for i, exp_dict in enumerate(all_exp_dicts): all_exp_infos[i].append(exp_dict) diff --git a/n3fit/src/n3fit/io/reader.py b/n3fit/src/n3fit/io/reader.py index cabad1b284..3c3ad046a1 100644 --- a/n3fit/src/n3fit/io/reader.py +++ b/n3fit/src/n3fit/io/reader.py @@ -173,7 +173,7 @@ def common_data_reader_experiment(experiment_c, experiment_spec): return parsed_datasets -def common_data_reader(spec, t0pdfset, replica_seeds=None, trval_seeds=None): +def common_data_reader(spec, covmat, t0pdfset, replica_seeds=None, trval_seeds=None): """ Wrapper to read the information from validphys object This function receives either a validphyis experiment or dataset objects @@ -247,9 +247,6 @@ def common_data_reader(spec, t0pdfset, replica_seeds=None, trval_seeds=None): ) exp_name = spec.name - covmat = spec_c.get_covmat() - # Regularize the covmat with cond_num_threshold=500 - covmat = regularize_covmat(covmat, cond_num_threshold=500) # Now it is time to build the masks for the training validation split all_dict_out = [] diff --git a/n3fit/src/n3fit/n3fit.py b/n3fit/src/n3fit/n3fit.py index b9b8a12a2a..a04f11d71d 100755 --- a/n3fit/src/n3fit/n3fit.py +++ b/n3fit/src/n3fit/n3fit.py @@ -24,7 +24,7 @@ actions_ = ['datacuts::theory fit'] ) -N3FIT_PROVIDERS = ["n3fit.fit"] +N3FIT_PROVIDERS = ["n3fit.fit", "validphys.results"] log = logging.getLogger(__name__) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index f0830d6bab..8829858357 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -868,6 +868,10 @@ def parse_groupby(self, grouping: str): "correctly?") return grouping + def parse_perform_covmat_reg(self, do_reg: bool): + """Parse the `regularize_covmat` key from runcard""" + log.info("Regularizing covariance matrices") + return do_reg class Config(report.Config, CoreConfig, ParamfitsConfig): diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 7813702904..9e933c270f 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -24,8 +24,9 @@ check_dataset_cuts_match_theorycovmat, check_experiment_cuts_match_theorycovmat) from validphys.core import DataSetSpec, PDF, ExperimentSpec -from validphys.calcutils import (all_chi2, central_chi2, calc_chi2, calc_phi, bootstrap_values, - get_df_block) +from validphys.calcutils import ( + all_chi2, central_chi2, calc_chi2, calc_phi, bootstrap_values, + get_df_block, regularize_covmat) log = logging.getLogger(__name__) @@ -361,7 +362,12 @@ def closure_pseudodata_replicas(experiments, pdf, nclosure:int, @check_dataset_cuts_match_theorycovmat -def covariance_matrix(dataset:DataSetSpec, fitthcovmat, t0set:(PDF, type(None)) = None): +def covariance_matrix( + dataset:DataSetSpec, + fitthcovmat, + t0set:(PDF, type(None)) = None, + perform_covmat_reg=False, + condition_number_threshold=500): """Returns the covariance matrix for a given `dataset`. By default the data central values will be used to calculate the multiplicative contributions to the covariance matrix. @@ -373,12 +379,21 @@ def covariance_matrix(dataset:DataSetSpec, fitthcovmat, t0set:(PDF, type(None)) More information on the t0 procedure can be found here: https://arxiv.org/abs/0912.2276 - Additionally the user can specify `use_fit_thcovmat_if_present` to be True + The user can specify `use_fit_thcovmat_if_present` to be True and provide a corresponding `fit`. If the theory covmat was used in the corresponding `fit` and the specfied `dataset` was used in the fit then the theory covariance matrix for this `dataset` will be added in quadrature to the experimental covariance matrix. + Covariance matrix can be regularized according to + `calcutils.regularize_covmat` if the user specifies `perform_covmat_reg` to + be true. This algorithm sets a minimum threshold for eigenvalues that the + corresponding correlation matrix can have to be: + + max(eigenvalue)/condition_number_threshold + + by default condition_number_threshold is set to 500 + Parameters ---------- dataset : DataSetSpec @@ -388,6 +403,10 @@ def covariance_matrix(dataset:DataSetSpec, fitthcovmat, t0set:(PDF, type(None)) covariance matrix was used in the corresponding fit t0set: None or PDF None if `use_t0` is False or a PDF parsed from `t0pdfset` runcard key + perform_covmat_reg: bool + whether or not to regularize the covariance matrix + condition_number_threshold: number + threshold used to regularize covariance matrix Returns ------- @@ -418,6 +437,11 @@ def covariance_matrix(dataset:DataSetSpec, fitthcovmat, t0set:(PDF, type(None)) if fitthcovmat: loaded_thcov = fitthcovmat.load() covmat += get_df_block(loaded_thcov, dataset.name, level=1) + if perform_covmat_reg: + covmat = regularize_covmat( + covmat, + cond_num_threshold=condition_number_threshold + ) return covmat def sqrt_covariance_matrix(covariance_matrix: np.array): @@ -451,7 +475,11 @@ def sqrt_covariance_matrix(covariance_matrix: np.array): @check_experiment_cuts_match_theorycovmat def experiment_covariance_matrix( - experiment: ExperimentSpec, fitthcovmat, t0set:(PDF, type(None)) = None): + experiment: ExperimentSpec, + fitthcovmat, + t0set:(PDF, type(None)) = None, + perform_covmat_reg=False, + condition_number_threshold=500): """Like `covariance_matrix` except for an experiment""" loaded_data = experiment.load() @@ -468,6 +496,11 @@ def experiment_covariance_matrix( ds_names = loaded_thcov.index.get_level_values(1) indices = np.in1d(ds_names, [ds.name for ds in experiment.datasets]).nonzero()[0] covmat += loaded_thcov.iloc[indices, indices].values + if perform_covmat_reg: + covmat = regularize_covmat( + covmat, + cond_num_threshold=condition_number_threshold + ) return covmat def experiment_sqrt_covariance_matrix(experiment_covariance_matrix): From ffb45c14313f8d019519676aa522b66c634fa822 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Wed, 4 Sep 2019 11:58:11 +0100 Subject: [PATCH 5/8] since using collected experiments_covariance_matrices, t0 not required in as many functions, added comment in n3fit to help others understand why it is hardcoded to be true --- n3fit/src/n3fit/fit.py | 12 ++++-------- n3fit/src/n3fit/io/reader.py | 3 +-- n3fit/src/n3fit/n3fit.py | 3 ++- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/n3fit/src/n3fit/fit.py b/n3fit/src/n3fit/fit.py index ebb26a8c60..99ec6a1403 100644 --- a/n3fit/src/n3fit/fit.py +++ b/n3fit/src/n3fit/fit.py @@ -16,7 +16,6 @@ def fit( fitting, experiments, experiments_covariance_matrix, - t0set, replica, replica_path, output_path, @@ -47,8 +46,9 @@ def fit( # Arguments: - `fitting`: dictionary with the hyperparameters of the fit - `experiments`: vp list of experiments to be included in the fit - - `t0set`: t0set name - - `replica`: a list of replica numbers to run over (tipycally just one) + - `experiments_covariance_matrix`: covariance matrices collected + over experiments + - `replica`: a list of replica numbers to run over (typically just one) - `replica_path`: path to the output of this run - `output_path`: name of the fit - `theorid`: theory id number @@ -91,10 +91,6 @@ def fit( else: status_ok = "ok" - # Loading t0set from LHAPDF - if t0set is not None: - t0pdfset = t0set.load_t0() - # First set the seed variables for # - Tr/Vl split # - Neural Network initialization @@ -135,7 +131,7 @@ def fit( # First loop over the experiments for exp, exp_cov in zip(experiments, experiments_covariance_matrix): log.info("Loading experiment: {0}".format(exp)) - all_exp_dicts = reader.common_data_reader(exp, exp_cov, t0pdfset, replica_seeds=mcseeds, trval_seeds=trvalseeds) + all_exp_dicts = reader.common_data_reader(exp, exp_cov, replica_seeds=mcseeds, trval_seeds=trvalseeds) for i, exp_dict in enumerate(all_exp_dicts): all_exp_infos[i].append(exp_dict) diff --git a/n3fit/src/n3fit/io/reader.py b/n3fit/src/n3fit/io/reader.py index 3c3ad046a1..d7a11ae3db 100644 --- a/n3fit/src/n3fit/io/reader.py +++ b/n3fit/src/n3fit/io/reader.py @@ -173,7 +173,7 @@ def common_data_reader_experiment(experiment_c, experiment_spec): return parsed_datasets -def common_data_reader(spec, covmat, t0pdfset, replica_seeds=None, trval_seeds=None): +def common_data_reader(spec, covmat, replica_seeds=None, trval_seeds=None): """ Wrapper to read the information from validphys object This function receives either a validphyis experiment or dataset objects @@ -215,7 +215,6 @@ def common_data_reader(spec, covmat, t0pdfset, replica_seeds=None, trval_seeds=N spec_c = spec.load() ndata = spec_c.GetNData() expdata_true = spec_c.get_cv().reshape(1, ndata) - spec_c.SetT0(t0pdfset) base_mcseed = int(hashlib.sha256(str(spec).encode()).hexdigest(), 16) % 10 ** 8 if replica_seeds: diff --git a/n3fit/src/n3fit/n3fit.py b/n3fit/src/n3fit/n3fit.py index a04f11d71d..f22a1ec327 100755 --- a/n3fit/src/n3fit/n3fit.py +++ b/n3fit/src/n3fit/n3fit.py @@ -17,7 +17,8 @@ from reportengine import colors from reportengine.compat import yaml - +# Set use_t0 true here for backwards compatibility. Causes fit covmats to +# use t0 prescription N3FIT_FIXED_CONFIG = dict( use_cuts = 'internal', use_t0 = True, From 04015bdc74a297ceaf70ea242bd69936d5d11991 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Wed, 4 Sep 2019 14:35:37 +0100 Subject: [PATCH 6/8] slightly improved log message about regularizing covmats (to be correct at least) added collect over datasets covmats --- validphys2/src/validphys/config.py | 5 ++++- validphys2/src/validphys/results.py | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 8829858357..c0677ddd36 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -870,7 +870,10 @@ def parse_groupby(self, grouping: str): def parse_perform_covmat_reg(self, do_reg: bool): """Parse the `regularize_covmat` key from runcard""" - log.info("Regularizing covariance matrices") + if do_reg: + log.info("Regularizing covariance matrices") + else: + log.info("Not regularizing covariance matrices") return do_reg diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 9e933c270f..36a1ed9b84 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -444,6 +444,11 @@ def covariance_matrix( ) return covmat +datasets_covariance_matrix = collect( + 'covariance_matrix', + ('experiments', 'experiment',) +) + def sqrt_covariance_matrix(covariance_matrix: np.array): """Returns the lower-triangular Cholesky factor `covariance_matrix` From 17cf16c22cd301694ef51c8b3a8d2b0201b580d4 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Thu, 19 Sep 2019 12:14:44 +0100 Subject: [PATCH 7/8] make regularize_covmat nicer and remove log message I added for debugging purposes --- validphys2/src/validphys/calcutils.py | 2 +- validphys2/src/validphys/config.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/validphys2/src/validphys/calcutils.py b/validphys2/src/validphys/calcutils.py index 21d5ae73f3..e7f8298c4c 100644 --- a/validphys2/src/validphys/calcutils.py +++ b/validphys2/src/validphys/calcutils.py @@ -214,5 +214,5 @@ def regularize_covmat(covmat: np.array, cond_num_threshold=500): corr = (covmat/d)/d[:, np.newaxis] e_val, e_vec = la.eigh(corr) new_e_val = np.clip(e_val, a_min=max(e_val)/cond_num_threshold, a_max=None) - new_corr = e_vec@(np.diag(new_e_val)@e_vec.T) + new_corr = (new_e_val*e_vec)@e_vec.T return (new_corr*d)*d[:, np.newaxis] diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index c0677ddd36..8f8b5ada7d 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -872,8 +872,6 @@ def parse_perform_covmat_reg(self, do_reg: bool): """Parse the `regularize_covmat` key from runcard""" if do_reg: log.info("Regularizing covariance matrices") - else: - log.info("Not regularizing covariance matrices") return do_reg From f47f4de7ffc14a3b4e8f6e46e5fead75e8c85f20 Mon Sep 17 00:00:00 2001 From: wilsonm Date: Thu, 19 Sep 2019 12:16:12 +0100 Subject: [PATCH 8/8] removed changes to n3fit --- n3fit/src/n3fit/fit.py | 15 +++++++++------ n3fit/src/n3fit/io/reader.py | 5 +++-- n3fit/src/n3fit/n3fit.py | 5 ++--- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/n3fit/src/n3fit/fit.py b/n3fit/src/n3fit/fit.py index 99ec6a1403..c3cb84375c 100644 --- a/n3fit/src/n3fit/fit.py +++ b/n3fit/src/n3fit/fit.py @@ -15,7 +15,7 @@ def fit( fitting, experiments, - experiments_covariance_matrix, + t0set, replica, replica_path, output_path, @@ -46,9 +46,8 @@ def fit( # Arguments: - `fitting`: dictionary with the hyperparameters of the fit - `experiments`: vp list of experiments to be included in the fit - - `experiments_covariance_matrix`: covariance matrices collected - over experiments - - `replica`: a list of replica numbers to run over (typically just one) + - `t0set`: t0set name + - `replica`: a list of replica numbers to run over (tipycally just one) - `replica_path`: path to the output of this run - `output_path`: name of the fit - `theorid`: theory id number @@ -91,6 +90,10 @@ def fit( else: status_ok = "ok" + # Loading t0set from LHAPDF + if t0set is not None: + t0pdfset = t0set.load_t0() + # First set the seed variables for # - Tr/Vl split # - Neural Network initialization @@ -129,9 +132,9 @@ def fit( ############################################################################## all_exp_infos = [[] for _ in replica] # First loop over the experiments - for exp, exp_cov in zip(experiments, experiments_covariance_matrix): + for exp in experiments: log.info("Loading experiment: {0}".format(exp)) - all_exp_dicts = reader.common_data_reader(exp, exp_cov, replica_seeds=mcseeds, trval_seeds=trvalseeds) + all_exp_dicts = reader.common_data_reader(exp, t0pdfset, replica_seeds=mcseeds, trval_seeds=trvalseeds) for i, exp_dict in enumerate(all_exp_dicts): all_exp_infos[i].append(exp_dict) diff --git a/n3fit/src/n3fit/io/reader.py b/n3fit/src/n3fit/io/reader.py index d7a11ae3db..a155e7959c 100644 --- a/n3fit/src/n3fit/io/reader.py +++ b/n3fit/src/n3fit/io/reader.py @@ -8,7 +8,6 @@ from NNPDF import RandomGenerator from validphys.core import ExperimentSpec as vp_Exp from validphys.core import DataSetSpec as vp_Dataset -from validphys.calcutils import regularize_covmat def make_tr_val_mask(datasets, exp_name, seed): @@ -173,7 +172,7 @@ def common_data_reader_experiment(experiment_c, experiment_spec): return parsed_datasets -def common_data_reader(spec, covmat, replica_seeds=None, trval_seeds=None): +def common_data_reader(spec, t0pdfset, replica_seeds=None, trval_seeds=None): """ Wrapper to read the information from validphys object This function receives either a validphyis experiment or dataset objects @@ -215,6 +214,7 @@ def common_data_reader(spec, covmat, replica_seeds=None, trval_seeds=None): spec_c = spec.load() ndata = spec_c.GetNData() expdata_true = spec_c.get_cv().reshape(1, ndata) + spec_c.SetT0(t0pdfset) base_mcseed = int(hashlib.sha256(str(spec).encode()).hexdigest(), 16) % 10 ** 8 if replica_seeds: @@ -246,6 +246,7 @@ def common_data_reader(spec, covmat, replica_seeds=None, trval_seeds=None): ) exp_name = spec.name + covmat = spec_c.get_covmat() # Now it is time to build the masks for the training validation split all_dict_out = [] diff --git a/n3fit/src/n3fit/n3fit.py b/n3fit/src/n3fit/n3fit.py index f22a1ec327..b9b8a12a2a 100755 --- a/n3fit/src/n3fit/n3fit.py +++ b/n3fit/src/n3fit/n3fit.py @@ -17,15 +17,14 @@ from reportengine import colors from reportengine.compat import yaml -# Set use_t0 true here for backwards compatibility. Causes fit covmats to -# use t0 prescription + N3FIT_FIXED_CONFIG = dict( use_cuts = 'internal', use_t0 = True, actions_ = ['datacuts::theory fit'] ) -N3FIT_PROVIDERS = ["n3fit.fit", "validphys.results"] +N3FIT_PROVIDERS = ["n3fit.fit"] log = logging.getLogger(__name__)