From 33cc091943d1ed018cf90026ff0e84172d7d7db4 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 21 Dec 2023 12:38:31 +0100 Subject: [PATCH 01/17] Readd needed functions --- .../theorycovariance/construction.py | 31 ++++++++++++++ .../src/validphys/theorycovariance/tests.py | 41 +++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 78e02cf99f..1e8c6610c5 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -171,6 +171,37 @@ def combine_by_type(each_dataset_results_central_bytheory): ) return process_info +def process_starting_points(combine_by_type): + """Returns a dictionary of indices in the covariance matrix corresponding + to the starting point of each process.""" + process_info = combine_by_type + running_index = 0 + start_proc = defaultdict(list) + for name in process_info.theory: + size = len(process_info.theory[name][0]) + start_proc[name] = running_index + running_index += size + return start_proc + + +def covmap(combine_by_type, dataset_names): + """Creates a map between the covmat indices from matrices ordered by + process to matrices ordered by experiment as listed in the runcard""" + mapping = defaultdict(list) + start_exp = defaultdict(list) + process_info = combine_by_type + running_index = 0 + for dataset in dataset_names: + size = process_info.sizes[dataset] + start_exp[dataset] = running_index + running_index += size + start = 0 + names_by_proc_list = [item for sublist in process_info.namelist.values() for item in sublist] + for dataset in names_by_proc_list: + for i in range(process_info.sizes[dataset]): + mapping[start + i] = start_exp[dataset] + i + start += process_info.sizes[dataset] + return mapping def covmat_3fpt(name1, name2, deltas1, deltas2): """Returns theory covariance sub-matrix for 3pt factorisation diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index 2e3d4d12ba..09052299c2 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -22,6 +22,10 @@ from validphys.theorycovariance.construction import ( combine_by_type, theory_corrmat_singleprocess, + theory_covmat_custom, + covmap, + covs_pt_prescrip, + process_starting_points, ) from validphys.theorycovariance.output import _get_key, matrix_plot_labels from validphys.theorycovariance.theorycovarianceutils import ( @@ -206,6 +210,43 @@ def theory_corrmat_custom_dataspecs(theory_covmat_custom_dataspecs): mat = theory_corrmat_singleprocess(theory_covmat_custom_dataspecs) return mat +def process_starting_points_dataspecs(combine_by_type_dataspecs): + """Like process_starting_points but for matched dataspecs.""" + return process_starting_points(combine_by_type_dataspecs) + + +@check_correct_theory_combination_dataspecs +def covs_pt_prescrip_dataspecs( + combine_by_type_dataspecs, + process_starting_points_dataspecs, + dataspecs_theoryids, + point_prescription, + fivetheories, + seventheories, +): + """Like covs_pt_prescrip but for matched dataspecs.""" + return covs_pt_prescrip( + combine_by_type_dataspecs, + process_starting_points_dataspecs, + dataspecs_theoryids, + point_prescription, + fivetheories, + seventheories, + ) + + +def covmap_dataspecs(combine_by_type_dataspecs, matched_dataspecs_dataset_name): + """Like covmap but for matched dataspecs.""" + return covmap(combine_by_type_dataspecs, matched_dataspecs_dataset_name) + +@table +def theory_covmat_custom_dataspecs( + covs_pt_prescrip_dataspecs, covmap_dataspecs, matched_experiments_index +): + """Like theory_covmat_custom but for matched dataspecs.""" + return theory_covmat_custom( + covs_pt_prescrip_dataspecs, covmap_dataspecs, matched_experiments_index + ) def _shuffle_list(l, shift): """Function that moves list elements left by 'shift' entries""" From e5f177f71f8ffa8392b54bceb279bbd3f4505293 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Wed, 3 Jan 2024 13:54:14 +0100 Subject: [PATCH 02/17] First draft --- validphys2/src/validphys/config.py | 16 ++-- .../src/validphys/theorycovariance/tests.py | 75 ++++++++++++------- 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 861d0f7756..0add44c70c 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -909,20 +909,16 @@ def produce_theory_database(self): """Produces path to the theory.db file""" return self.loader.theorydb_file - def produce_combined_shift_and_theory_dataspecs(self, theoryconfig, shiftconfig): - total_dataspecs = theoryconfig["dataspecs"] + shiftconfig["dataspecs"] - matched_datasets = self.produce_matched_datasets_from_dataspecs(total_dataspecs) + def produce_combined_shift_and_theory_dataspecs(self, dataspecs): + matched_datasets = self.produce_matched_datasets_from_dataspecs(dataspecs) for ns in matched_datasets: ns["dataspecs"] = self.produce_dataspecs_with_matched_cuts(ns["dataspecs"]) - new_theoryconfig = [] - new_shiftconfig = [] - len_th = len(theoryconfig["dataspecs"]) + new_dataspecs = [] + len_th = len(dataspecs) for s in matched_datasets: - new_theoryconfig.append(ChainMap({"dataspecs": s["dataspecs"][:len_th]}, s)) - new_shiftconfig.append(ChainMap({"dataspecs": s["dataspecs"][len_th:]}, s)) + new_dataspecs.append(ChainMap({"dataspecs": s["dataspecs"][len_th:]}, s)) return { - "shiftconfig": {"dataspecs": new_shiftconfig, "original": shiftconfig}, - "theoryconfig": {"dataspecs": new_theoryconfig, "original": theoryconfig}, + "dataspecs": {"dataspecs": new_dataspecs, "original": dataspecs }, } # TODO: Worth it to do some black magic to not pass params explicitly? diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index 09052299c2..f9e309f4e6 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -31,6 +31,7 @@ from validphys.theorycovariance.theorycovarianceutils import ( check_correct_theory_combination_theoryconfig, process_lookup, + check_correct_theory_combination_dataspecs ) log = logging.getLogger(__name__) @@ -57,22 +58,17 @@ def dataspecs_dataset_prediction_shift(matched_dataspecs_results, process, datas ) -def shift_vector(matched_dataspecs_dataset_prediction_shift, matched_dataspecs_dataset_theory): - """Returns a DataFrame of normalised shift vectors for matched dataspecs.""" - all_shifts = np.concatenate([val.shifts for val in matched_dataspecs_dataset_prediction_shift]) - all_theory = np.concatenate([val.shifts for val in matched_dataspecs_dataset_theory]) - norm_shifts = all_shifts / all_theory - dsnames = np.concatenate( - [ - np.full(len(val.shifts), val.dataset_name, dtype=object) - for val in matched_dataspecs_dataset_prediction_shift - ] - ) - point_indexes = np.concatenate( - [np.arange(len(val.shifts)) for val in matched_dataspecs_dataset_prediction_shift] - ) - index = pd.MultiIndex.from_arrays([dsnames, point_indexes], names=["Dataset name", "Point"]) - return pd.DataFrame(norm_shifts, index=index) +def shift_vector(matched_dataspecs_results, process, dataset_name): + """Returns a DataFrame of normalised shift vectors and normalization vector for matched dataspecs.""" + r1, r2 = matched_dataspecs_results + shifts = r1[1].central_value - r2[1].central_value + norm = r2[1].central_value + norm_shifts = shifts / norm + dsnames = np.full(len(shifts), dataset_name, dtype=object) + processnames = np.full(len(shifts), process, dtype=object) + point_indexes = np.arange(len(shifts)) + index = pd.MultiIndex.from_arrays([processnames,dsnames, point_indexes], names=["group", "dataset", "id"]) + return pd.DataFrame({'shifts': norm_shifts, 'norm':norm}, index=index) def dataspecs_dataset_theory(matched_dataspecs_results, process, dataset_name): @@ -138,9 +134,9 @@ def alltheory_vector(matched_dataspecs_dataset_alltheory, matched_dataspecs_data all_matched_results = collect("matched_dataspecs_results", ["dataspecs"]) -def combine_by_type_dataspecs(all_matched_results, matched_dataspecs_dataset_name): +def combine_by_type_dataspecs(all_matched_results): """Like combine_by_type but for matched dataspecs""" - return combine_by_type(all_matched_results, matched_dataspecs_dataset_name) + return combine_by_type(all_matched_results) dataspecs_theoryids = collect("theoryid", ["theoryconfig", "original", "dataspecs"]) @@ -190,7 +186,7 @@ def matched_experiments_index(matched_dataspecs_dataset_name, all_matched_data_l "all_matched_results", ["combined_shift_and_theory_dataspecs", "theoryconfig"] ) -shx_vector = collect("shift_vector", ["combined_shift_and_theory_dataspecs", "shiftconfig"]) +shx_vector = collect("shift_vector", ["matched_datasets_from_dataspecs"]) thx_vector = collect("theory_vector", ["combined_shift_and_theory_dataspecs", "theoryconfig"]) @@ -218,7 +214,6 @@ def process_starting_points_dataspecs(combine_by_type_dataspecs): @check_correct_theory_combination_dataspecs def covs_pt_prescrip_dataspecs( combine_by_type_dataspecs, - process_starting_points_dataspecs, dataspecs_theoryids, point_prescription, fivetheories, @@ -227,7 +222,6 @@ def covs_pt_prescrip_dataspecs( """Like covs_pt_prescrip but for matched dataspecs.""" return covs_pt_prescrip( combine_by_type_dataspecs, - process_starting_points_dataspecs, dataspecs_theoryids, point_prescription, fivetheories, @@ -235,9 +229,9 @@ def covs_pt_prescrip_dataspecs( ) -def covmap_dataspecs(combine_by_type_dataspecs, matched_dataspecs_dataset_name): +def covmap_dataspecs(combine_by_type_dataspecs): """Like covmap but for matched dataspecs.""" - return covmap(combine_by_type_dataspecs, matched_dataspecs_dataset_name) + return covmap(combine_by_type_dataspecs) @table def theory_covmat_custom_dataspecs( @@ -758,10 +752,9 @@ def deltamiss_plot(theory_shift_test, allthx_vector, evals_nonzero_basis, shx_ve @figure -def shift_diag_cov_comparison(allthx_vector, shx_vector, thx_covmat, thx_vector): +def shift_diag_cov_comparison(shx_vector, thx_covmat, thx_vector): """Produces a plot of a comparison between the NNLO-NLO shift and the envelope given by the diagonal elements of the theory covariance matrix.""" - l = len(allthx_vector[0]) + 1 matrix = thx_covmat[0] / (np.outer(thx_vector[0], thx_vector[0])) fnorm = -shx_vector[0] indexlist = list(matrix.index.values) @@ -807,3 +800,35 @@ def shift_diag_cov_comparison(allthx_vector, shx_vector, thx_covmat, thx_vector) ax.legend(fontsize=20) ax.yaxis.set_tick_params(labelsize=20) return fig + +@figure +def shift_diag_cov_comparison_test(theory_covmat_custom, shx_vector, point_prescription): + matrix = theory_covmat_custom + diagdf = pd.DataFrame(data=np.diag(matrix.values), index=matrix.index) + concatenated_shx_vector = pd.concat(shx_vector) + tripleindex = diagdf.index.values + tripleindex_set = set([(index[0], index[1]) for index in tripleindex]) + sqrtdiags = [] + fnorm = [] + for index in tripleindex_set: + sqrtdiags.append(list(np.sqrt(diagdf.loc[index[0]].loc[index[1]].values.transpose()[0]/concatenated_shx_vector.loc[index[0]].loc[index[1]].norm.values))) + fnorm.append(list(concatenated_shx_vector.loc[index[0]].loc[index[1]].shifts.values)) + fnorm_concat = [j for i in fnorm for j in i] + sqrtdiags_concat = [j for i in sqrtdiags for j in i] + fig, ax = plotutils.subplots(figsize=(20, 10)) + import ipdb; ipdb.set_trace() + ax.plot(np.array(sqrtdiags_concat) * 100, ".-", label=f"MHOU ({point_prescription})", color="red") + ax.plot(-np.array(sqrtdiags_concat) * 100, ".-", color="red") + ax.plot(-np.array(fnorm_concat) * 100, ".-", label="NNLO-NLO Shift", color="black") + ticklocs, ticklabels, startlocs = matrix_plot_labels(matrix) + ax.set_xticks(ticklocs) + ax.set_xticklabels(ticklabels, rotation=45, fontsize=20) + # Shift startlocs elements 0.5 to left so lines are between indexes + startlocs_lines = [x - 0.5 for x in startlocs] + ax.vlines(startlocs_lines, -70, 70, linestyles="dashed") + ax.margins(x=0, y=0) + ax.set_ylabel(r"% wrt central theory $T_i^{(0)}$", fontsize=20) + ax.set_ylim(-35, 35) + ax.legend(fontsize=20) + ax.yaxis.set_tick_params(labelsize=20) + return fig \ No newline at end of file From 273e48799fd01d577a6ea2366d40dcabe0ee0b33 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 8 Jan 2024 14:29:05 +0100 Subject: [PATCH 03/17] Second draft --- .../src/validphys/theorycovariance/tests.py | 43 +++++++++++++------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index f9e309f4e6..9624daacb0 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -62,7 +62,7 @@ def shift_vector(matched_dataspecs_results, process, dataset_name): """Returns a DataFrame of normalised shift vectors and normalization vector for matched dataspecs.""" r1, r2 = matched_dataspecs_results shifts = r1[1].central_value - r2[1].central_value - norm = r2[1].central_value + norm = r1[1].central_value norm_shifts = shifts / norm dsnames = np.full(len(shifts), dataset_name, dtype=object) processnames = np.full(len(shifts), process, dtype=object) @@ -801,26 +801,43 @@ def shift_diag_cov_comparison(shx_vector, thx_covmat, thx_vector): ax.yaxis.set_tick_params(labelsize=20) return fig -@figure -def shift_diag_cov_comparison_test(theory_covmat_custom, shx_vector, point_prescription): - matrix = theory_covmat_custom - diagdf = pd.DataFrame(data=np.diag(matrix.values), index=matrix.index) - concatenated_shx_vector = pd.concat(shx_vector) - tripleindex = diagdf.index.values - tripleindex_set = set([(index[0], index[1]) for index in tripleindex]) +def diagdf_theory_covmat(theory_covmat_custom): + return pd.DataFrame(data=np.diag(theory_covmat_custom.values), index=theory_covmat_custom.index) + +def tripleindex_set(group_dataset_inputs_by_process): + tripleindex = [] + for process in group_dataset_inputs_by_process: + for dataset in process['data_input']: + tripleindex.append((process['group_name'], dataset.name)) + return tripleindex + +def concatenated_shx_vector(shx_vector): + return pd.concat(shx_vector) + +def sqrtdiags_thcovmat(tripleindex_set, diagdf_theory_covmat, concatenated_shx_vector): sqrtdiags = [] + for index in tripleindex_set: + sqrtdiags.append(list(np.sqrt(diagdf_theory_covmat.loc[index[0]].loc[index[1]].values.transpose()[0]/concatenated_shx_vector.loc[index[0]].loc[index[1]].norm.values))) + return sqrtdiags + +def fnorm_shifts(concatenated_shx_vector, tripleindex_set): fnorm = [] for index in tripleindex_set: - sqrtdiags.append(list(np.sqrt(diagdf.loc[index[0]].loc[index[1]].values.transpose()[0]/concatenated_shx_vector.loc[index[0]].loc[index[1]].norm.values))) fnorm.append(list(concatenated_shx_vector.loc[index[0]].loc[index[1]].shifts.values)) - fnorm_concat = [j for i in fnorm for j in i] - sqrtdiags_concat = [j for i in sqrtdiags for j in i] + return fnorm + +def ticklocs_thcovmat(theory_covmat_custom): + return matrix_plot_labels(theory_covmat_custom) + +@figure +def shift_diag_cov_comparison_test(sqrtdiags_thcovmat, fnorm_shifts, point_prescription, ticklocs_thcovmat): + fnorm_concat = [j for i in fnorm_shifts for j in i] + sqrtdiags_concat = [j for i in sqrtdiags_thcovmat for j in i] fig, ax = plotutils.subplots(figsize=(20, 10)) - import ipdb; ipdb.set_trace() ax.plot(np.array(sqrtdiags_concat) * 100, ".-", label=f"MHOU ({point_prescription})", color="red") ax.plot(-np.array(sqrtdiags_concat) * 100, ".-", color="red") ax.plot(-np.array(fnorm_concat) * 100, ".-", label="NNLO-NLO Shift", color="black") - ticklocs, ticklabels, startlocs = matrix_plot_labels(matrix) + ticklocs, ticklabels, startlocs = ticklocs_thcovmat ax.set_xticks(ticklocs) ax.set_xticklabels(ticklabels, rotation=45, fontsize=20) # Shift startlocs elements 0.5 to left so lines are between indexes From 52f32621b41f2769badafb36e9c322cb5d670cbf Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 8 Jan 2024 14:57:00 +0100 Subject: [PATCH 04/17] Correct wrong sqrt --- validphys2/src/validphys/theorycovariance/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index 9624daacb0..3ddc9fcd11 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -817,7 +817,7 @@ def concatenated_shx_vector(shx_vector): def sqrtdiags_thcovmat(tripleindex_set, diagdf_theory_covmat, concatenated_shx_vector): sqrtdiags = [] for index in tripleindex_set: - sqrtdiags.append(list(np.sqrt(diagdf_theory_covmat.loc[index[0]].loc[index[1]].values.transpose()[0]/concatenated_shx_vector.loc[index[0]].loc[index[1]].norm.values))) + sqrtdiags.append(list(np.sqrt(diagdf_theory_covmat.loc[index[0]].loc[index[1]].values.transpose()[0])/concatenated_shx_vector.loc[index[0]].loc[index[1]].norm.values)) return sqrtdiags def fnorm_shifts(concatenated_shx_vector, tripleindex_set): From c5c4d75d79cb4224a4869be34c21e4d4d0b1885a Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Mon, 8 Jan 2024 18:33:32 +0100 Subject: [PATCH 05/17] Start fixing the other tests functions --- .../src/validphys/theorycovariance/tests.py | 154 ++++++------------ 1 file changed, 51 insertions(+), 103 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index 3ddc9fcd11..29ef030c44 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -82,7 +82,7 @@ def dataspecs_dataset_theory(matched_dataspecs_results, process, dataset_name): matched_dataspecs_dataset_theory = collect("dataspecs_dataset_theory", ["dataspecs"]) -def theory_vector(matched_dataspecs_dataset_theory): +def theory_vector(matched_dataspecs_results, process, dataset_name): """Returns a DataFrame of the central theory vector for matched dataspecs.""" all_theory = np.concatenate([val.shifts for val in matched_dataspecs_dataset_theory]) @@ -99,32 +99,35 @@ def theory_vector(matched_dataspecs_dataset_theory): return pd.DataFrame(all_theory, index=index) -def dataspecs_dataset_alltheory(matched_dataspecs_results, process, dataset_name): +def dataset_alltheory(each_dataset_results_central_bytheory): """Returns a LabeledShifts tuple corresponding to the theory - vectors for all the scale varied theories (not the central one), - processed by data set and experiment for matched dataspecs.""" - others = matched_dataspecs_results[1:] - res = [other[1].central_value for other in others] - return LabeledShifts(dataset_name=dataset_name, process=process, shifts=res) + vectors for all the scale varied theories.""" + labeled_shifts_list = [] + for dataset in each_dataset_results_central_bytheory: + res = [theoryid[1].central_value for theoryid in dataset] + labeled_shifts_list.append(LabeledShifts(dataset_name=dataset[0][0].name, process=process_lookup(dataset[0][0].name), shifts=res)) + return labeled_shifts_list matched_dataspecs_dataset_alltheory = collect("dataspecs_dataset_alltheory", ["dataspecs"]) - -def alltheory_vector(matched_dataspecs_dataset_alltheory, matched_dataspecs_dataset_theory): +def alltheory_vector(dataset_alltheory): """Returns a DataFrame with the theory vectors for matched - dataspecs for the scale-varied theories (not the central one).""" - all_theory = np.concatenate([val.shifts for val in matched_dataspecs_dataset_alltheory], axis=1) + dataspecs for the scale-varied theories""" + all_theory = np.concatenate([val.shifts for val in dataset_alltheory], axis=1) dsnames = np.concatenate( [ - np.full(len(val.shifts), val.dataset_name, dtype=object) - for val in matched_dataspecs_dataset_theory + np.full(len(val.shifts[0]), val.dataset_name, dtype=object) + for val in dataset_alltheory ] ) - point_indexes = np.concatenate( - [np.arange(len(val.shifts)) for val in matched_dataspecs_dataset_theory] + groupnames = np.concatenate( + [ + np.full(len(val.shifts[0]), val.process, dtype=object) + for val in dataset_alltheory + ] ) - index = pd.MultiIndex.from_arrays([dsnames, point_indexes], names=["Dataset name", "Point"]) + index = pd.MultiIndex.from_arrays([groupnames, dsnames], names=["group", "dataset"]) theory_vectors = [] for theoryvector in all_theory: theory_vectors.append(pd.DataFrame(theoryvector, index=index)) @@ -188,9 +191,9 @@ def matched_experiments_index(matched_dataspecs_dataset_name, all_matched_data_l shx_vector = collect("shift_vector", ["matched_datasets_from_dataspecs"]) -thx_vector = collect("theory_vector", ["combined_shift_and_theory_dataspecs", "theoryconfig"]) +thx_vector = collect("theory_vector", ["matched_datasets_from_dataspecs"]) -allthx_vector = collect("alltheory_vector", ["combined_shift_and_theory_dataspecs", "theoryconfig"]) +allthx_vector = collect("alltheory_vector", ["matched_datasets_from_dataspecs"]) def theory_matrix_threshold(theory_threshold: (int, float) = 0): @@ -434,41 +437,33 @@ def vectors_9pt(splitdiffs): xs.append(newvec) return xs - -@check_correct_theory_combination_theoryconfig -def evals_nonzero_basis( - allthx_vector, - thx_covmat, - thx_vector, - collected_theoryids, - fivetheories, - seventheories: (str, type(None)) = None, - orthonormalisation: (str, type(None)) = None, -): - """Projects the theory covariance matrix from the data space into - the basis of non-zero eigenvalues, dependent on point-prescription. - Then returns the eigenvalues (w) and eigenvectors (v) - in the data space. There are three methods to linearly - orthonormalise the basis vectors for the covariance matrix, - and the choice must be specified using the "orthonormalisation" - flag in the runcard. The choices are: gs, the Gram-Schmidt - method; qr, QR decomposition; svd, singular value decomposition. - QR is the method which should be used as standard; the others - exist for testing purposes.""" - - covmat = thx_covmat[0] / (np.outer(thx_vector[0], thx_vector[0])) +def doubleindex_thcovmat(theory_covmat_custom): + tripleindex = theory_covmat_custom.index + return list(dict.fromkeys([(ind[0], ind[1]) for ind in tripleindex])) + +def ordered_alltheory_vector(alltheory_vector, doubleindex_thcovmat): + ord_alltheory_vector = [] + for theoryid_vector in alltheory_vector: + list_df = [] + for index in doubleindex_thcovmat: + list_df.append(theoryid_vector.loc[index[0]].loc[index[1]]) + ord_alltheory_vector.append(pd.concat(list_df).values.T[0]) + return ord_alltheory_vector + +def evals_nonzero_basis(ordered_alltheory_vector, theory_covmat_custom,fivetheories, orthonormalisation: (str, type(None)) = None, seventheories: (str, type(None)) = None): + covmat = theory_covmat_custom / (np.outer(ordered_alltheory_vector[0], ordered_alltheory_vector[0])) # constructing vectors of shifts due to scale variation diffs = [ - ((thx_vector[0] - scalevarvector) / thx_vector[0]) for scalevarvector in allthx_vector[0] + pd.DataFrame(((ordered_alltheory_vector[0] - scalevarvector) / ordered_alltheory_vector[0]), index=theory_covmat_custom.index.droplevel(0)) for scalevarvector in ordered_alltheory_vector[1:] ] # number of points in point prescription - num_pts = len(diffs) + 1 + num_pts = len(ordered_alltheory_vector) # constructing dictionary of datasets in each process type - indexlist = list(diffs[0].index.values) + doubleindex = doubleindex_thcovmat(theory_covmat_custom) procdict = {} - for index in indexlist: - name = index[0] - proc = process_lookup(name) + for index in doubleindex: + name = index[1] + proc = index[0] if proc not in list(procdict.keys()): procdict[proc] = [name] elif name not in procdict[proc]: @@ -526,23 +521,27 @@ def evals_nonzero_basis( w, v_projected = la.eigh(projected_matrix) # Finding eigenvectors in data space v = p.dot(v_projected) - return w, v, cond_num - + return w, v, cond_num + def projected_condition_num(evals_nonzero_basis): cond_num = evals_nonzero_basis[2] return cond_num -def theory_shift_test(shx_vector, evals_nonzero_basis): +def theory_shift_test(concatenated_shx_vector, evals_nonzero_basis, doubleindex_thcovmat ): """Compares the NNLO-NLO shift, f, with the eigenvectors and eigenvalues of the theory covariance matrix, and returns the component of the NNLO-NLO shift space which is missed by the covariance matrix space: fmiss, as well as the projections of the shift vector onto each of the eigenvectors: projectors.""" w, v = evals_nonzero_basis[:2] v = np.real(v) + # Order the concatenated_shx_vector and concatenate again + fnorm_vector = fnorm_shifts(concatenated_shx_vector, doubleindex_thcovmat) + fnorm_concat = np.array([j for i in fnorm_vector for j in i]) # NNLO-NLO shift vector - f = -shx_vector[0].values.T[0] + #f = -fnorm_concat.T + f = -fnorm_concat # Projecting the shift vector onto each of the eigenvectors projectors = np.sum(f * v.T, axis=1) # Initialise array of zeros and set precision to same as FK tables @@ -750,57 +749,6 @@ def deltamiss_plot(theory_shift_test, allthx_vector, evals_nonzero_basis, shx_ve ax.yaxis.set_tick_params(labelsize=20) return fig - -@figure -def shift_diag_cov_comparison(shx_vector, thx_covmat, thx_vector): - """Produces a plot of a comparison between the NNLO-NLO shift and the - envelope given by the diagonal elements of the theory covariance matrix.""" - matrix = thx_covmat[0] / (np.outer(thx_vector[0], thx_vector[0])) - fnorm = -shx_vector[0] - indexlist = list(matrix.index.values) - # adding process index for plotting, and reindexing matrices and vectors - dsnames = [] - processnames = [] - ids = [] - for index in indexlist: - name = index[0] - i = index[1] - dsnames.append(name) - ids.append(i) - proc = process_lookup(name) - processnames.append(proc) - tripleindex = pd.MultiIndex.from_arrays( - [processnames, dsnames, ids], names=("process", "dataset", "id") - ) - matrix = pd.DataFrame(matrix.values, index=tripleindex, columns=tripleindex) - matrix.sort_index(axis=0, inplace=True) - matrix.sort_index(axis=1, inplace=True) - oldindex = matrix.index.tolist() - newindex = sorted(oldindex, key=_get_key) - matrix = matrix.reindex(newindex) - matrix = (matrix.T.reindex(newindex)).T - sqrtdiags = np.sqrt(np.diag(matrix)) - fnorm = pd.DataFrame(fnorm.values, index=tripleindex) - fnorm.sort_index(axis=0, inplace=True) - fnorm = fnorm.reindex(newindex) - # Plotting - fig, ax = plotutils.subplots(figsize=(20, 10)) - ax.plot(sqrtdiags * 100, ".-", label=f"MHOU ({l} pt)", color="red") - ax.plot(-sqrtdiags * 100, ".-", color="red") - ax.plot(fnorm.values * 100, ".-", label="NNLO-NLO Shift", color="black") - ticklocs, ticklabels, startlocs = matrix_plot_labels(matrix) - ax.set_xticks(ticklocs) - ax.set_xticklabels(ticklabels, rotation=45, fontsize=20) - # Shift startlocs elements 0.5 to left so lines are between indexes - startlocs_lines = [x - 0.5 for x in startlocs] - ax.vlines(startlocs_lines, -70, 70, linestyles="dashed") - ax.margins(x=0, y=0) - ax.set_ylabel(r"% wrt central theory $T_i^{(0)}$", fontsize=20) - ax.set_ylim(-35, 35) - ax.legend(fontsize=20) - ax.yaxis.set_tick_params(labelsize=20) - return fig - def diagdf_theory_covmat(theory_covmat_custom): return pd.DataFrame(data=np.diag(theory_covmat_custom.values), index=theory_covmat_custom.index) @@ -830,7 +778,7 @@ def ticklocs_thcovmat(theory_covmat_custom): return matrix_plot_labels(theory_covmat_custom) @figure -def shift_diag_cov_comparison_test(sqrtdiags_thcovmat, fnorm_shifts, point_prescription, ticklocs_thcovmat): +def shift_diag_cov_comparison(sqrtdiags_thcovmat, fnorm_shifts, point_prescription, ticklocs_thcovmat): fnorm_concat = [j for i in fnorm_shifts for j in i] sqrtdiags_concat = [j for i in sqrtdiags_thcovmat for j in i] fig, ax = plotutils.subplots(figsize=(20, 10)) From ac2401d5639719906a3382e32e50f4d4b9c32554 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 9 Jan 2024 11:50:51 +0100 Subject: [PATCH 06/17] Fixing eigenvector plot and deltamiss --- .../src/validphys/theorycovariance/tests.py | 62 +++++-------------- 1 file changed, 17 insertions(+), 45 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index 29ef030c44..abf5eb9843 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -437,6 +437,9 @@ def vectors_9pt(splitdiffs): xs.append(newvec) return xs +def tripleindex_thcovmat_complete(theory_covmat_custom): + return theory_covmat_custom.index + def doubleindex_thcovmat(theory_covmat_custom): tripleindex = theory_covmat_custom.index return list(dict.fromkeys([(ind[0], ind[1]) for ind in tripleindex])) @@ -648,40 +651,24 @@ def projector_eigenvalue_ratio(theory_shift_test): @figure -def eigenvector_plot(evals_nonzero_basis, shx_vector): +def eigenvector_plot(evals_nonzero_basis, concatenated_shx_vector): """Produces a plot of the eigenvectors for the projected matrix, transformed back to the data space.""" evals = evals_nonzero_basis[0][::-1] evecs = evals_nonzero_basis[1].T[::-1] - f = shx_vector[0] - indexlist = list(f.index.values) - # adding process index for plotting, and reindexing matrices and vectors - dsnames = [] - processnames = [] - ids = [] - for index in indexlist: - name = index[0] - i = index[1] - dsnames.append(name) - ids.append(i) - proc = process_lookup(name) - processnames.append(proc) - tripleindex = pd.MultiIndex.from_arrays( - [processnames, dsnames, ids], names=("process", "dataset", "id") - ) - f = pd.DataFrame(f.values, index=tripleindex) + f = concatenated_shx_vector + tripleindex = f.index f.sort_index(axis=0, inplace=True) oldindex = f.index.tolist() newindex = sorted(oldindex, key=_get_key) f = f.reindex(newindex) fig, axes = plotutils.subplots(figsize=(10, 2 * len(evecs)), nrows=len(evecs)) - fig.subplots_adjust(hspace=0.8) for ax, evec, eval in zip(axes.flatten(), evecs, evals): eval_3sf = floatformatting.significant_digits(eval.item(), 3) evec = pd.DataFrame(evec, index=tripleindex) evec = evec.reindex(newindex) - ax.plot(-f.values, color="k", label="NNLO-NLO shift") + ax.plot(-f.shifts.values, color="k", label="NNLO-NLO shift") ax.plot(evec.values, label="Eigenvector") ticklocs, ticklabels, startlocs = matrix_plot_labels(evec) # Shift startlocs elements 0.5 to left so lines are between indexes @@ -699,42 +686,27 @@ def eigenvector_plot(evals_nonzero_basis, shx_vector): @figure -def deltamiss_plot(theory_shift_test, allthx_vector, evals_nonzero_basis, shx_vector): +def deltamiss_plot(theory_shift_test, ordered_alltheory_vector, concatenated_shx_vector, doubleindex_thcovmat,tripleindex_thcovmat_complete): """Produces a plot of the missing component of the shift vector, transformed back to the data space.""" # Define l, which is the number of points in the point prescription being used - l = len(allthx_vector[0]) + 1 + l = len(ordered_alltheory_vector) + 1 + # Order the concatenated_shx_vector and concatenate again + fnorm_vector = fnorm_shifts(concatenated_shx_vector, doubleindex_thcovmat) + fnorm_concat = [j for i in fnorm_vector for j in i] # Minus sign changes it from NLO-NNLO shift to NNLO-NLO shift (convention) - f = -shx_vector[0] - fmiss = theory_shift_test[4] - indexlist = list(f.index.values) - # adding process index for plotting, and reindexing matrices and vectors - dsnames = [] - processnames = [] - ids = [] - for index in indexlist: - name = index[0] - i = index[1] - dsnames.append(name) - ids.append(i) - proc = process_lookup(name) - processnames.append(proc) - # Index and reindex f and fmiss - tripleindex = pd.MultiIndex.from_arrays( - [processnames, dsnames, ids], names=("process", "dataset", "id") - ) - f = pd.DataFrame(f.values, index=tripleindex) + f = -pd.DataFrame(fnorm_concat, index=tripleindex_thcovmat_complete) + tripleindex = f.index f.sort_index(axis=0, inplace=True) oldindex = f.index.tolist() newindex = sorted(oldindex, key=_get_key) f = f.reindex(newindex) - fmiss = pd.DataFrame(fmiss, index=tripleindex) - fmiss.sort_index(axis=0, inplace=True) - fmiss = fmiss.reindex(newindex) + fmiss = pd.DataFrame(theory_shift_test[4], index=tripleindex) + fmiss_reordered = fmiss.reindex(f.index) # Plotting fig, ax = plotutils.subplots(figsize=(20, 10)) ax.plot(f.values * 100, ".-", label="NNLO-NLO Shift", color="black") - ax.plot(fmiss.values * 100, ".-", label=r"$\delta_{miss}$" + f" ({l} pt)", color="blue") + ax.plot(fmiss_reordered.values * 100, ".-", label=r"$\delta_{miss}$" + f" ({l} pt)", color="blue") ticklocs, ticklabels, startlocs = matrix_plot_labels(f) ax.set_xticks(ticklocs) ax.set_xticklabels(ticklabels, rotation=45, fontsize=20) From 6491368251e05cb7bff99b4b1343be3bb0290839 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 9 Jan 2024 12:10:50 +0100 Subject: [PATCH 07/17] Use speclabels to write NNLO-NLO shift --- validphys2/src/validphys/theorycovariance/tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index abf5eb9843..b1f21a2856 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -750,13 +750,13 @@ def ticklocs_thcovmat(theory_covmat_custom): return matrix_plot_labels(theory_covmat_custom) @figure -def shift_diag_cov_comparison(sqrtdiags_thcovmat, fnorm_shifts, point_prescription, ticklocs_thcovmat): +def shift_diag_cov_comparison(sqrtdiags_thcovmat, fnorm_shifts, point_prescription, ticklocs_thcovmat, dataspecs): fnorm_concat = [j for i in fnorm_shifts for j in i] sqrtdiags_concat = [j for i in sqrtdiags_thcovmat for j in i] fig, ax = plotutils.subplots(figsize=(20, 10)) ax.plot(np.array(sqrtdiags_concat) * 100, ".-", label=f"MHOU ({point_prescription})", color="red") ax.plot(-np.array(sqrtdiags_concat) * 100, ".-", color="red") - ax.plot(-np.array(fnorm_concat) * 100, ".-", label="NNLO-NLO Shift", color="black") + ax.plot(-np.array(fnorm_concat) * 100, ".-", label=f"{dataspecs[1]['speclabel']}-{dataspecs[0]['speclabel']} Shift", color="black") ticklocs, ticklabels, startlocs = ticklocs_thcovmat ax.set_xticks(ticklocs) ax.set_xticklabels(ticklabels, rotation=45, fontsize=20) From 5db9ae053853fc5fe9dc92876d06f1abbcbc2654 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 9 Jan 2024 13:25:17 +0100 Subject: [PATCH 08/17] Polish the thcovariance module --- .../theorycovariance/construction.py | 223 ------------------ .../src/validphys/theorycovariance/output.py | 125 ---------- .../src/validphys/theorycovariance/tests.py | 174 +------------- .../theorycovariance/theorycovarianceutils.py | 12 - 4 files changed, 2 insertions(+), 532 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 1e8c6610c5..69073ba672 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -31,37 +31,6 @@ log = logging.getLogger(__name__) -theoryids_procs_central_values = collect(procs_central_values, ("theoryids",)) - -theoryids_procs_central_values_no_table = collect(procs_central_values_no_table, ("theoryids",)) - -collected_theoryids = collect("theoryids", ["theoryconfig"]) - - -def make_scale_var_covmat(predictions): - raise Exception( - "If you are seeing this error, please open an issue. This function should not be used" - ) - - -@check_correct_theory_combination -def theory_covmat_singleprocess_no_table( - theoryids_procs_central_values_no_table, procs_index, theoryids, fivetheories -): - """Calculates the theory covariance matrix for scale variations. - The matrix is a dataframe indexed by procs_index.""" - s = make_scale_var_covmat(theoryids_procs_central_values_no_table) - df = pd.DataFrame(s, index=procs_index, columns=procs_index) - return df - - -@table -@check_correct_theory_combination -def theory_covmat_singleprocess(theory_covmat_singleprocess_no_table, fivetheories): - """Duplicate of theory_covmat_singleprocess_no_table but with a table decorator.""" - return theory_covmat_singleprocess_no_table - - results_central_bytheoryids = collect(results_central, ("theoryids",)) each_dataset_results_central_bytheory = collect("results_central_bytheoryids", ("data",)) @@ -98,44 +67,6 @@ def theory_covmat_dataset( return thcovmat -@table -def theory_block_diag_covmat(theory_covmat_datasets, procs_index): - """Takes the theory covariance matrices for individual datasets and - returns a data frame with a block diagonal theory covariance matrix - by dataset""" - s = la.block_diag(*theory_covmat_datasets) - df = pd.DataFrame(s, index=procs_index, columns=procs_index) - return df - - -@table -def theory_diagonal_covmat(theory_covmat_singleprocess_no_table, procs_index): - """Returns theory covmat with only diagonal values""" - s = theory_covmat_singleprocess_no_table.values - # Initialise array of zeros and set precision to same as FK tables - s_diag = np.zeros((len(s), len(s)), dtype=np.float32) - np.fill_diagonal(s_diag, np.diag(s)) - df = pd.DataFrame(s_diag, index=procs_index, columns=procs_index) - return df - - -procs_results_theory = collect("procs_results", ("theoryids",)) - - -@check_correct_theory_combination -def total_covmat_procs(procs_results_theory, fivetheories): - """Same as total_covmat_datasets but per experiment rather than - per dataset. Needed for calculation of chi2 per experiment.""" - proc_result_covmats = [] - for proc_result in zip(*procs_results_theory): - theory_centrals = [x[1].central_value for x in proc_result] - s = make_scale_var_covmat(theory_centrals) - sigma = proc_result[0][0].covmat - cov = s + sigma - proc_result_covmats.append(cov) - return proc_result_covmats - - ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes")) @@ -171,38 +102,6 @@ def combine_by_type(each_dataset_results_central_bytheory): ) return process_info -def process_starting_points(combine_by_type): - """Returns a dictionary of indices in the covariance matrix corresponding - to the starting point of each process.""" - process_info = combine_by_type - running_index = 0 - start_proc = defaultdict(list) - for name in process_info.theory: - size = len(process_info.theory[name][0]) - start_proc[name] = running_index - running_index += size - return start_proc - - -def covmap(combine_by_type, dataset_names): - """Creates a map between the covmat indices from matrices ordered by - process to matrices ordered by experiment as listed in the runcard""" - mapping = defaultdict(list) - start_exp = defaultdict(list) - process_info = combine_by_type - running_index = 0 - for dataset in dataset_names: - size = process_info.sizes[dataset] - start_exp[dataset] = running_index - running_index += size - start = 0 - names_by_proc_list = [item for sublist in process_info.namelist.values() for item in sublist] - for dataset in names_by_proc_list: - for i in range(process_info.sizes[dataset]): - mapping[start + i] = start_exp[dataset] + i - start += process_info.sizes[dataset] - return mapping - def covmat_3fpt(name1, name2, deltas1, deltas2): """Returns theory covariance sub-matrix for 3pt factorisation scale variation *only*, given two dataset names and collections @@ -662,24 +561,6 @@ def procs_index_matched(groups_index, procs_index): return pd.MultiIndex.from_tuples(tups, names=("process", "dataset", "id")) - -@check_correct_theory_combination -def total_covmat_diagtheory_procs(procs_results_theory, fivetheories): - """Same as total_covmat_datasets but per proc rather than - per dataset. Needed for calculation of chi2 per proc.""" - exp_result_covmats = [] - for exp_result in zip(*procs_results_theory): - theory_centrals = [x[1].central_value for x in exp_result] - s = make_scale_var_covmat(theory_centrals) - # Initialise array of zeros and set precision to same as FK tables - s_diag = np.zeros((len(s), len(s)), dtype=np.float32) - np.fill_diagonal(s_diag, np.diag(s)) - sigma = exp_result[0][0].covmat - cov = s_diag + sigma - exp_result_covmats.append(cov) - return exp_result_covmats - - @table def theory_corrmat_singleprocess(theory_covmat_singleprocess): """Calculates the theory correlation matrix for scale variations.""" @@ -690,14 +571,6 @@ def theory_corrmat_singleprocess(theory_covmat_singleprocess): return mat -@table -def theory_blockcorrmat(theory_block_diag_covmat): - """Calculates the theory correlation matrix for scale variations - with block diagonal entries by dataset only""" - mat = theory_corrmat_singleprocess(theory_block_diag_covmat) - return mat - - @table def theory_corrmat_custom(theory_covmat_custom): """Calculates the theory correlation matrix for scale variations @@ -706,24 +579,6 @@ def theory_corrmat_custom(theory_covmat_custom): return mat -@table -def theory_normcovmat_singleprocess(theory_covmat_singleprocess, procs_data_values): - """Calculates the theory covariance matrix for scale variations normalised - to data.""" - df = theory_covmat_singleprocess - mat = df / np.outer(procs_data_values, procs_data_values) - return mat - - -@table -def theory_normblockcovmat(theory_block_diag_covmat, procs_data_values): - """Calculates the theory covariance matrix for scale variations - normalised to data, block diagonal by dataset.""" - df = theory_block_diag_covmat - mat = df / np.outer(procs_data_values, procs_data_values) - return mat - - @table def theory_normcovmat_custom(theory_covmat_custom, procs_data_values): """Calculates the theory covariance matrix for scale variations normalised @@ -734,65 +589,6 @@ def theory_normcovmat_custom(theory_covmat_custom, procs_data_values): return mat -@table -def experimentplustheory_covmat_singleprocess( - procs_covmat_no_table, theory_covmat_singleprocess_no_table -): - """Calculates the experiment + theory covariance matrix for - scale variations.""" - df = procs_covmat_no_table + theory_covmat_singleprocess_no_table - return df - - -@table -def experimentplusblocktheory_covmat(procs_covmat, theory_block_diag_covmat): - """Calculates the experiment + theory covariance - matrix for scale variations.""" - df = procs_covmat + theory_block_diag_covmat - return df - - -@table -def experimentplustheory_covmat_custom(procs_covmat, theory_covmat_custom): - """Calculates the experiment + theory covariance matrix for - scale variations correlated according to the relevant prescription.""" - df = procs_covmat + theory_covmat_custom - return df - - -@table -def experimentplustheory_normcovmat_singleprocess( - procs_covmat, theory_covmat_singleprocess, procs_data -): - """Calculates the experiment + theory covariance matrix for scale - variations normalised to data.""" - df = procs_covmat + theory_covmat_singleprocess - procs_data_array = np.array(procs_data) - mat = df / np.outer(procs_data_array, procs_data_array) - return mat - - -@table -def experimentplusblocktheory_normcovmat( - procs_covmat, theory_block_diag_covmat, procs_data_values, experimentplustheory_normcovmat -): - """Calculates the experiment + theory covariance matrix for scale - variations normalised to data, block diagonal by data set.""" - mat = experimentplustheory_normcovmat(procs_covmat, theory_block_diag_covmat, procs_data_values) - return mat - - -@table -def experimentplustheory_normcovmat_custom( - procs_covmat, theory_covmat_custom, procs_data_values, experimentplustheory_normcovmat -): - """Calculates the experiment + theory covariance matrix for scale - variations normalised to data, correlations by process type.""" - mat = experimentplustheory_normcovmat(procs_covmat, theory_covmat_custom, procs_data_values) - - return mat - - @table def experimentplustheory_corrmat_singleprocess(procs_covmat, theory_covmat_singleprocess): """Calculates the correlation matrix for the experimental @@ -804,14 +600,6 @@ def experimentplustheory_corrmat_singleprocess(procs_covmat, theory_covmat_singl return corrmat -@table -def experimentplusblocktheory_corrmat(procs_covmat, theory_block_diag_covmat): - """Calculates the correlation matrix for the experimental - plus theory covariance matrices, block diagonal by dataset.""" - corrmat = experimentplustheory_corrmat_singleprocess(procs_covmat, theory_block_diag_covmat) - return corrmat - - @table def experimentplustheory_corrmat_custom(procs_covmat, theory_covmat_custom): """Calculates the correlation matrix for the experimental @@ -843,12 +631,6 @@ def data_theory_diff(procs_results): return central_diff -def chi2_block_impact(theory_block_diag_covmat, procs_covmat, procs_results): - """Returns total chi2 including theory cov mat""" - chi2 = chi2_impact(theory_block_diag_covmat, procs_covmat, procs_results) - return chi2 - - def chi2_impact_custom(theory_covmat_custom, procs_covmat, procs_results): """Returns total chi2 including theory cov mat""" chi2 = chi2_impact(theory_covmat_custom, procs_covmat, procs_results) @@ -902,11 +684,6 @@ def abs_chi2_data_theory_proc(procs_results, total_covmat_procs): return chi2data_array -def abs_chi2_data_diagtheory_proc(procs_results, total_covmat_diagtheory_procs): - """For a diagonal theory covmat""" - return abs_chi2_data_theory_proc(procs_results, total_covmat_diagtheory_procs) - - def abs_chi2_data_diagtheory_dataset(each_dataset_results, total_covmat_diagtheory_datasets): """For a diagonal theory covmat""" return abs_chi2_data_theory_dataset(each_dataset_results, total_covmat_diagtheory_datasets) diff --git a/validphys2/src/validphys/theorycovariance/output.py b/validphys2/src/validphys/theorycovariance/output.py index 0487d81215..6c56ac37c9 100644 --- a/validphys2/src/validphys/theorycovariance/output.py +++ b/validphys2/src/validphys/theorycovariance/output.py @@ -38,17 +38,6 @@ def procs_chi2_table_theory( ) -@table -def procs_chi2_table_diagtheory( - procs_data, pdf, abs_chi2_data_diagtheory_proc, abs_chi2_data_theory_dataset_by_process -): - """Same as groups_chi2_table but including diagonal theory covariance matrix. - Note: we use groups_chi2_table here but provide data grouped by process.""" - return groups_chi2_table( - procs_data, pdf, abs_chi2_data_diagtheory_proc, abs_chi2_data_theory_dataset_by_process - ) - - def matrix_plot_labels(df): """Returns the tick locations and labels, and the starting point values for each category, based on a dataframe @@ -223,15 +212,6 @@ def plot_expcorrmat_heatmap(procs_corrmat): return fig -@figure -def plot_normthblockcovmat_heatmap(theory_normblockcovmat): - """Matrix plot for block diagonal theory covariance matrix""" - fig = plot_covmat_heatmap( - theory_normblockcovmat, "Block diagonal theory covariance matrix by dataset" - ) - return fig - - @figure def plot_normthcovmat_heatmap_custom(theory_normcovmat_custom, theoryids, fivetheories): """Matrix plot for block diagonal theory covariance matrix by process type""" @@ -243,15 +223,6 @@ def plot_normthcovmat_heatmap_custom(theory_normcovmat_custom, theoryids, fiveth return fig -@figure -def plot_thblockcorrmat_heatmap(theory_blockcorrmat): - """Matrix plot of the theory correlation matrix""" - fig = plot_corrmat_heatmap( - theory_blockcorrmat, "Theory correlation matrix block diagonal by dataset" - ) - return fig - - @figure def plot_thcorrmat_heatmap_custom(theory_corrmat_custom, theoryids, fivetheories): """Matrix plot of the theory correlation matrix, correlations by process type""" @@ -263,41 +234,6 @@ def plot_thcorrmat_heatmap_custom(theory_corrmat_custom, theoryids, fivetheories return fig -@figure -def plot_normexpplusblockthcovmat_heatmap(experimentplusblocktheory_normcovmat): - """Matrix plot of the exp + theory covariance matrix normalised to data""" - fig = plot_covmat_heatmap( - experimentplusblocktheory_normcovmat, - "Experiment + theory (block diagonal by dataset) covariance matrix", - ) - return fig - - -@figure -def plot_normexpplusthcovmat_heatmap_custom( - experimentplustheory_normcovmat_custom, theoryids, fivetheories -): - """Matrix plot of the exp + theory covariance matrix normalised to data""" - l = len(theoryids) - if l == 5: - if fivetheories == "bar": - l = r"$\bar{5}$" - fig = plot_covmat_heatmap( - experimentplustheory_normcovmat_custom, f"Experimental + Theory Covariance Matrix ({l} pt)" - ) - return fig - - -@figure -def plot_expplusblockthcorrmat_heatmap(experimentplusblocktheory_corrmat): - """Matrix plot of the exp + theory correlation matrix""" - fig = plot_corrmat_heatmap( - experimentplusblocktheory_corrmat, - "Experiment + theory (block diagonal by dataset) correlation matrix", - ) - return fig - - @figure def plot_expplusthcorrmat_heatmap_custom( experimentplustheory_corrmat_custom, theoryids, fivetheories @@ -313,30 +249,6 @@ def plot_expplusthcorrmat_heatmap_custom( return fig -@figure -def plot_blockcovdiff_heatmap(theory_block_diag_covmat, procs_covmat): - """Matrix plot (thcov + expcov)/expcov""" - df = (theory_block_diag_covmat.as_matrix() + procs_covmat.values) / np.mean(procs_covmat.values) - fig = plot_covmat_heatmap( - df, "(Theory + experiment)/mean(experiment)" + "for block diagonal theory covmat by dataset" - ) - return fig - - -@figure -def plot_covdiff_heatmap_custom(theory_covmat_custom, procs_covmat, theoryids, fivetheories): - """Matrix plot (thcov + expcov)/expcov""" - l = len(theoryids) - if l == 5: - if fivetheories == "bar": - l = r"$\bar{5}$" - df = (theory_covmat_custom + procs_covmat) / np.mean(procs_covmat.values) - fig = plot_covmat_heatmap( - df, f"(Theory + experiment)/mean(experiment) covariance matrices for {l} points" - ) - return fig - - @figure def plot_diag_cov_comparison( theory_covmat_custom, procs_covmat, procs_data_values, theoryids, fivetheories @@ -386,43 +298,6 @@ def plot_diag_cov_comparison( return fig -@figure -def plot_diag_cov_impact( - theory_covmat_custom, procs_covmat, procs_data_values, theoryids, fivetheories -): - """Plot ((expcov)^-1_ii)^-0.5 versus ((expcov + thcov)^-1_ii)^-0.5""" - l = len(theoryids) - if l == 5: - if fivetheories == "bar": - l = r"$\bar{5}$" - matrix_theory = theory_covmat_custom.values - matrix_experiment = procs_covmat.values - inv_exp = (np.diag(la.inv(matrix_experiment))) ** (-0.5) / procs_data_values - inv_tot = (np.diag(la.inv(matrix_theory + matrix_experiment))) ** (-0.5) / procs_data_values - plot_index = theory_covmat_custom.index - df_inv_exp = pd.DataFrame(inv_exp, index=plot_index) - df_inv_exp.sort_index(axis=0, inplace=True) - oldindex = df_inv_exp.index.tolist() - newindex = sorted(oldindex, key=_get_key) - df_inv_exp = df_inv_exp.reindex(newindex) - df_inv_tot = pd.DataFrame(inv_tot, index=plot_index) - df_inv_tot.sort_index(axis=0, inplace=True) - df_inv_tot = df_inv_tot.reindex(newindex) - fig, ax = plotutils.subplots() - ax.plot(df_inv_exp.values, ".", label="Experiment", color="orange") - ax.plot(df_inv_tot.values, ".", label="Experiment + Theory", color="mediumseagreen") - ticklocs, ticklabels, startlocs = matrix_plot_labels(df_inv_exp) - ax.set_xticks(ticklocs) - ax.set_xticklabels(ticklabels, rotation="vertical", fontsize=20) - ax.vlines(startlocs, 0, len(matrix_theory), linestyles="dashed") - ax.set_ylabel(r"$\frac{1}{D_i}\frac{1}{\sqrt{[cov^{-1}_]{ii}}}$", fontsize=30) - ax.yaxis.set_tick_params(labelsize=20) - ax.set_title(f"Diagonal impact of adding theory covariance matrix for {l} points", fontsize=20) - ax.legend(fontsize=20) - ax.margins(x=0) - return fig - - @figure def plot_datasets_chi2_theory(procs_data, each_dataset_chi2, abs_chi2_data_theory_dataset): """Plot the chi² of all datasets, before and after adding theory errors, with bars.""" diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index b1f21a2856..dfd9fcc35f 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -18,20 +18,9 @@ from reportengine.figure import figure from reportengine.table import table from validphys import plotutils -from validphys.checks import check_two_dataspecs -from validphys.theorycovariance.construction import ( - combine_by_type, - theory_corrmat_singleprocess, - theory_covmat_custom, - covmap, - covs_pt_prescrip, - process_starting_points, -) from validphys.theorycovariance.output import _get_key, matrix_plot_labels from validphys.theorycovariance.theorycovarianceutils import ( - check_correct_theory_combination_theoryconfig, process_lookup, - check_correct_theory_combination_dataspecs ) log = logging.getLogger(__name__) @@ -41,23 +30,6 @@ LabeledShifts = namedtuple("LabeledShifts", ("process", "dataset_name", "shifts")) -@check_two_dataspecs -def dataspecs_dataset_prediction_shift(matched_dataspecs_results, process, dataset_name): - """Compute the difference in theory predictions between two dataspecs. - This can be used in combination with `matched_datasets_from_dataspecs` - It returns a ``LabeledShifts`` containing ``dataset_name``, - ``process`` and ``shifts``. - """ - r1, r2 = matched_dataspecs_results - res = r1[1].central_value - r2[1].central_value - return LabeledShifts(dataset_name=dataset_name, process=process, shifts=res) - - -matched_dataspecs_dataset_prediction_shift = collect( - "dataspecs_dataset_prediction_shift", ["dataspecs"] -) - - def shift_vector(matched_dataspecs_results, process, dataset_name): """Returns a DataFrame of normalised shift vectors and normalization vector for matched dataspecs.""" r1, r2 = matched_dataspecs_results @@ -70,35 +42,6 @@ def shift_vector(matched_dataspecs_results, process, dataset_name): index = pd.MultiIndex.from_arrays([processnames,dsnames, point_indexes], names=["group", "dataset", "id"]) return pd.DataFrame({'shifts': norm_shifts, 'norm':norm}, index=index) - -def dataspecs_dataset_theory(matched_dataspecs_results, process, dataset_name): - """Returns a tuple of shifts processed by data set and experiment - for matched dataspecs.""" - central = matched_dataspecs_results[0] - res = central[1].central_value - return LabeledShifts(dataset_name=dataset_name, process=process, shifts=res) - - -matched_dataspecs_dataset_theory = collect("dataspecs_dataset_theory", ["dataspecs"]) - - -def theory_vector(matched_dataspecs_results, process, dataset_name): - """Returns a DataFrame of the central theory vector for - matched dataspecs.""" - all_theory = np.concatenate([val.shifts for val in matched_dataspecs_dataset_theory]) - dsnames = np.concatenate( - [ - np.full(len(val.shifts), val.dataset_name, dtype=object) - for val in matched_dataspecs_dataset_theory - ] - ) - point_indexes = np.concatenate( - [np.arange(len(val.shifts)) for val in matched_dataspecs_dataset_theory] - ) - index = pd.MultiIndex.from_arrays([dsnames, point_indexes], names=["Dataset name", "Point"]) - return pd.DataFrame(all_theory, index=index) - - def dataset_alltheory(each_dataset_results_central_bytheory): """Returns a LabeledShifts tuple corresponding to the theory vectors for all the scale varied theories.""" @@ -109,8 +52,6 @@ def dataset_alltheory(each_dataset_results_central_bytheory): return labeled_shifts_list -matched_dataspecs_dataset_alltheory = collect("dataspecs_dataset_alltheory", ["dataspecs"]) - def alltheory_vector(dataset_alltheory): """Returns a DataFrame with the theory vectors for matched dataspecs for the scale-varied theories""" @@ -133,118 +74,8 @@ def alltheory_vector(dataset_alltheory): theory_vectors.append(pd.DataFrame(theoryvector, index=index)) return theory_vectors - -all_matched_results = collect("matched_dataspecs_results", ["dataspecs"]) - - -def combine_by_type_dataspecs(all_matched_results): - """Like combine_by_type but for matched dataspecs""" - return combine_by_type(all_matched_results) - - -dataspecs_theoryids = collect("theoryid", ["theoryconfig", "original", "dataspecs"]) - - -matched_dataspecs_process = collect("process", ["dataspecs"]) -matched_dataspecs_dataset_name = collect("dataset_name", ["dataspecs"]) -matched_cuts_datasets = collect("dataset", ["dataspecs"]) -all_matched_datasets = collect("matched_cuts_datasets", ["dataspecs"]) - - -def all_matched_data_lengths(all_matched_datasets): - """Returns a list of the data sets lengths.""" - lens = [] - for rlist in all_matched_datasets: - lens.append(rlist[0].load_commondata().ndata) - return lens - - -def matched_experiments_index(matched_dataspecs_dataset_name, all_matched_data_lengths): - """Returns MultiIndex composed of data set name and - starting point of data set.""" - dsnames = matched_dataspecs_dataset_name - lens = all_matched_data_lengths - dsnames = np.concatenate( - [np.full(l, dsname, dtype=object) for (l, dsname) in zip(lens, dsnames)] - ) - point_indexes = np.concatenate([np.arange(l) for l in lens]) - index = pd.MultiIndex.from_arrays([dsnames, point_indexes], names=["Dataset name", "Point"]) - return index - - -thx_corrmat = collect( - "theory_corrmat_custom_dataspecs", ["combined_shift_and_theory_dataspecs", "theoryconfig"] -) - -shx_corrmat = collect( - "matched_datasets_shift_matrix_correlations", - ["combined_shift_and_theory_dataspecs", "shiftconfig"], -) - -thx_covmat = collect( - "theory_covmat_custom_dataspecs", ["combined_shift_and_theory_dataspecs", "theoryconfig"] -) - -combined_dataspecs_results = collect( - "all_matched_results", ["combined_shift_and_theory_dataspecs", "theoryconfig"] -) - shx_vector = collect("shift_vector", ["matched_datasets_from_dataspecs"]) -thx_vector = collect("theory_vector", ["matched_datasets_from_dataspecs"]) - -allthx_vector = collect("alltheory_vector", ["matched_datasets_from_dataspecs"]) - - -def theory_matrix_threshold(theory_threshold: (int, float) = 0): - """Returns the threshold below which theory correlation elements are set to - zero when comparing to shift correlation matrix""" - return theory_threshold - - -@table -def theory_corrmat_custom_dataspecs(theory_covmat_custom_dataspecs): - """Calculates the theory correlation matrix for scale variations - with variations by process type""" - mat = theory_corrmat_singleprocess(theory_covmat_custom_dataspecs) - return mat - -def process_starting_points_dataspecs(combine_by_type_dataspecs): - """Like process_starting_points but for matched dataspecs.""" - return process_starting_points(combine_by_type_dataspecs) - - -@check_correct_theory_combination_dataspecs -def covs_pt_prescrip_dataspecs( - combine_by_type_dataspecs, - dataspecs_theoryids, - point_prescription, - fivetheories, - seventheories, -): - """Like covs_pt_prescrip but for matched dataspecs.""" - return covs_pt_prescrip( - combine_by_type_dataspecs, - dataspecs_theoryids, - point_prescription, - fivetheories, - seventheories, - ) - - -def covmap_dataspecs(combine_by_type_dataspecs): - """Like covmap but for matched dataspecs.""" - return covmap(combine_by_type_dataspecs) - -@table -def theory_covmat_custom_dataspecs( - covs_pt_prescrip_dataspecs, covmap_dataspecs, matched_experiments_index -): - """Like theory_covmat_custom but for matched dataspecs.""" - return theory_covmat_custom( - covs_pt_prescrip_dataspecs, covmap_dataspecs, matched_experiments_index - ) - def _shuffle_list(l, shift): """Function that moves list elements left by 'shift' entries""" i = 0 @@ -543,8 +374,7 @@ def theory_shift_test(concatenated_shx_vector, evals_nonzero_basis, doubleindex_ fnorm_vector = fnorm_shifts(concatenated_shx_vector, doubleindex_thcovmat) fnorm_concat = np.array([j for i in fnorm_vector for j in i]) # NNLO-NLO shift vector - #f = -fnorm_concat.T - f = -fnorm_concat + f = -fnorm_concat.T # Projecting the shift vector onto each of the eigenvectors projectors = np.sum(f * v.T, axis=1) # Initialise array of zeros and set precision to same as FK tables @@ -607,7 +437,7 @@ def theta(theory_shift_test): fs_mod = np.sqrt(np.sum(fs**2)) costheta = f @ fs / (fmod * fs_mod) th = np.arccos(costheta) - return th + return np.degrees(th) @figure diff --git a/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py b/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py index 0e04dccc4f..5bfec07d94 100644 --- a/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py +++ b/validphys2/src/validphys/theorycovariance/theorycovarianceutils.py @@ -121,18 +121,6 @@ def check_correct_theory_combination_internal( check_correct_theory_combination = make_argcheck(check_correct_theory_combination_internal) - -@make_argcheck -def check_correct_theory_combination_theoryconfig(collected_theoryids, fivetheories): - check_correct_theory_combination_internal(collected_theoryids[0], fivetheories) - - -@make_argcheck -def check_correct_theory_combination_dataspecs(dataspecs_theoryids, fivetheories): - """Like check_correct_theory_combination but for matched dataspecs.""" - return check_correct_theory_combination.__wrapped__(dataspecs_theoryids, fivetheories) - - @make_argcheck def check_fit_dataset_order_matches_grouped( group_dataset_inputs_by_metadata, data_input, processed_metadata_group From e0952bcc9bf4db8f272ade51c14dbdfa6c309755 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 9 Jan 2024 13:38:52 +0100 Subject: [PATCH 09/17] Run black --- .../src/validphys/theorycovariance/tests.py | 102 +++++++++++++----- 1 file changed, 75 insertions(+), 27 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index dfd9fcc35f..3347af654e 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -19,9 +19,7 @@ from reportengine.table import table from validphys import plotutils from validphys.theorycovariance.output import _get_key, matrix_plot_labels -from validphys.theorycovariance.theorycovarianceutils import ( - process_lookup, -) +from validphys.theorycovariance.theorycovarianceutils import process_lookup log = logging.getLogger(__name__) @@ -39,8 +37,11 @@ def shift_vector(matched_dataspecs_results, process, dataset_name): dsnames = np.full(len(shifts), dataset_name, dtype=object) processnames = np.full(len(shifts), process, dtype=object) point_indexes = np.arange(len(shifts)) - index = pd.MultiIndex.from_arrays([processnames,dsnames, point_indexes], names=["group", "dataset", "id"]) - return pd.DataFrame({'shifts': norm_shifts, 'norm':norm}, index=index) + index = pd.MultiIndex.from_arrays( + [processnames, dsnames, point_indexes], names=["group", "dataset", "id"] + ) + return pd.DataFrame({'shifts': norm_shifts, 'norm': norm}, index=index) + def dataset_alltheory(each_dataset_results_central_bytheory): """Returns a LabeledShifts tuple corresponding to the theory @@ -48,7 +49,13 @@ def dataset_alltheory(each_dataset_results_central_bytheory): labeled_shifts_list = [] for dataset in each_dataset_results_central_bytheory: res = [theoryid[1].central_value for theoryid in dataset] - labeled_shifts_list.append(LabeledShifts(dataset_name=dataset[0][0].name, process=process_lookup(dataset[0][0].name), shifts=res)) + labeled_shifts_list.append( + LabeledShifts( + dataset_name=dataset[0][0].name, + process=process_lookup(dataset[0][0].name), + shifts=res, + ) + ) return labeled_shifts_list @@ -57,16 +64,10 @@ def alltheory_vector(dataset_alltheory): dataspecs for the scale-varied theories""" all_theory = np.concatenate([val.shifts for val in dataset_alltheory], axis=1) dsnames = np.concatenate( - [ - np.full(len(val.shifts[0]), val.dataset_name, dtype=object) - for val in dataset_alltheory - ] + [np.full(len(val.shifts[0]), val.dataset_name, dtype=object) for val in dataset_alltheory] ) groupnames = np.concatenate( - [ - np.full(len(val.shifts[0]), val.process, dtype=object) - for val in dataset_alltheory - ] + [np.full(len(val.shifts[0]), val.process, dtype=object) for val in dataset_alltheory] ) index = pd.MultiIndex.from_arrays([groupnames, dsnames], names=["group", "dataset"]) theory_vectors = [] @@ -74,8 +75,10 @@ def alltheory_vector(dataset_alltheory): theory_vectors.append(pd.DataFrame(theoryvector, index=index)) return theory_vectors + shx_vector = collect("shift_vector", ["matched_datasets_from_dataspecs"]) + def _shuffle_list(l, shift): """Function that moves list elements left by 'shift' entries""" i = 0 @@ -268,13 +271,16 @@ def vectors_9pt(splitdiffs): xs.append(newvec) return xs + def tripleindex_thcovmat_complete(theory_covmat_custom): return theory_covmat_custom.index + def doubleindex_thcovmat(theory_covmat_custom): tripleindex = theory_covmat_custom.index return list(dict.fromkeys([(ind[0], ind[1]) for ind in tripleindex])) + def ordered_alltheory_vector(alltheory_vector, doubleindex_thcovmat): ord_alltheory_vector = [] for theoryid_vector in alltheory_vector: @@ -284,11 +290,24 @@ def ordered_alltheory_vector(alltheory_vector, doubleindex_thcovmat): ord_alltheory_vector.append(pd.concat(list_df).values.T[0]) return ord_alltheory_vector -def evals_nonzero_basis(ordered_alltheory_vector, theory_covmat_custom,fivetheories, orthonormalisation: (str, type(None)) = None, seventheories: (str, type(None)) = None): - covmat = theory_covmat_custom / (np.outer(ordered_alltheory_vector[0], ordered_alltheory_vector[0])) + +def evals_nonzero_basis( + ordered_alltheory_vector, + theory_covmat_custom, + fivetheories, + orthonormalisation: (str, type(None)) = None, + seventheories: (str, type(None)) = None, +): + covmat = theory_covmat_custom / ( + np.outer(ordered_alltheory_vector[0], ordered_alltheory_vector[0]) + ) # constructing vectors of shifts due to scale variation diffs = [ - pd.DataFrame(((ordered_alltheory_vector[0] - scalevarvector) / ordered_alltheory_vector[0]), index=theory_covmat_custom.index.droplevel(0)) for scalevarvector in ordered_alltheory_vector[1:] + pd.DataFrame( + ((ordered_alltheory_vector[0] - scalevarvector) / ordered_alltheory_vector[0]), + index=theory_covmat_custom.index.droplevel(0), + ) + for scalevarvector in ordered_alltheory_vector[1:] ] # number of points in point prescription num_pts = len(ordered_alltheory_vector) @@ -355,15 +374,15 @@ def evals_nonzero_basis(ordered_alltheory_vector, theory_covmat_custom,fivetheor w, v_projected = la.eigh(projected_matrix) # Finding eigenvectors in data space v = p.dot(v_projected) - return w, v, cond_num - + return w, v, cond_num + def projected_condition_num(evals_nonzero_basis): cond_num = evals_nonzero_basis[2] return cond_num -def theory_shift_test(concatenated_shx_vector, evals_nonzero_basis, doubleindex_thcovmat ): +def theory_shift_test(concatenated_shx_vector, evals_nonzero_basis, doubleindex_thcovmat): """Compares the NNLO-NLO shift, f, with the eigenvectors and eigenvalues of the theory covariance matrix, and returns the component of the NNLO-NLO shift space which is missed by the covariance matrix space: fmiss, as well as the @@ -516,7 +535,13 @@ def eigenvector_plot(evals_nonzero_basis, concatenated_shx_vector): @figure -def deltamiss_plot(theory_shift_test, ordered_alltheory_vector, concatenated_shx_vector, doubleindex_thcovmat,tripleindex_thcovmat_complete): +def deltamiss_plot( + theory_shift_test, + ordered_alltheory_vector, + concatenated_shx_vector, + doubleindex_thcovmat, + tripleindex_thcovmat_complete, +): """Produces a plot of the missing component of the shift vector, transformed back to the data space.""" # Define l, which is the number of points in the point prescription being used @@ -536,7 +561,9 @@ def deltamiss_plot(theory_shift_test, ordered_alltheory_vector, concatenated_shx # Plotting fig, ax = plotutils.subplots(figsize=(20, 10)) ax.plot(f.values * 100, ".-", label="NNLO-NLO Shift", color="black") - ax.plot(fmiss_reordered.values * 100, ".-", label=r"$\delta_{miss}$" + f" ({l} pt)", color="blue") + ax.plot( + fmiss_reordered.values * 100, ".-", label=r"$\delta_{miss}$" + f" ({l} pt)", color="blue" + ) ticklocs, ticklabels, startlocs = matrix_plot_labels(f) ax.set_xticks(ticklocs) ax.set_xticklabels(ticklabels, rotation=45, fontsize=20) @@ -551,9 +578,11 @@ def deltamiss_plot(theory_shift_test, ordered_alltheory_vector, concatenated_shx ax.yaxis.set_tick_params(labelsize=20) return fig + def diagdf_theory_covmat(theory_covmat_custom): return pd.DataFrame(data=np.diag(theory_covmat_custom.values), index=theory_covmat_custom.index) + def tripleindex_set(group_dataset_inputs_by_process): tripleindex = [] for process in group_dataset_inputs_by_process: @@ -561,32 +590,51 @@ def tripleindex_set(group_dataset_inputs_by_process): tripleindex.append((process['group_name'], dataset.name)) return tripleindex + def concatenated_shx_vector(shx_vector): return pd.concat(shx_vector) + def sqrtdiags_thcovmat(tripleindex_set, diagdf_theory_covmat, concatenated_shx_vector): sqrtdiags = [] for index in tripleindex_set: - sqrtdiags.append(list(np.sqrt(diagdf_theory_covmat.loc[index[0]].loc[index[1]].values.transpose()[0])/concatenated_shx_vector.loc[index[0]].loc[index[1]].norm.values)) + sqrtdiags.append( + list( + np.sqrt(diagdf_theory_covmat.loc[index[0]].loc[index[1]].values.transpose()[0]) + / concatenated_shx_vector.loc[index[0]].loc[index[1]].norm.values + ) + ) return sqrtdiags + def fnorm_shifts(concatenated_shx_vector, tripleindex_set): fnorm = [] for index in tripleindex_set: fnorm.append(list(concatenated_shx_vector.loc[index[0]].loc[index[1]].shifts.values)) return fnorm + def ticklocs_thcovmat(theory_covmat_custom): return matrix_plot_labels(theory_covmat_custom) + @figure -def shift_diag_cov_comparison(sqrtdiags_thcovmat, fnorm_shifts, point_prescription, ticklocs_thcovmat, dataspecs): +def shift_diag_cov_comparison( + sqrtdiags_thcovmat, fnorm_shifts, point_prescription, ticklocs_thcovmat, dataspecs +): fnorm_concat = [j for i in fnorm_shifts for j in i] sqrtdiags_concat = [j for i in sqrtdiags_thcovmat for j in i] fig, ax = plotutils.subplots(figsize=(20, 10)) - ax.plot(np.array(sqrtdiags_concat) * 100, ".-", label=f"MHOU ({point_prescription})", color="red") + ax.plot( + np.array(sqrtdiags_concat) * 100, ".-", label=f"MHOU ({point_prescription})", color="red" + ) ax.plot(-np.array(sqrtdiags_concat) * 100, ".-", color="red") - ax.plot(-np.array(fnorm_concat) * 100, ".-", label=f"{dataspecs[1]['speclabel']}-{dataspecs[0]['speclabel']} Shift", color="black") + ax.plot( + -np.array(fnorm_concat) * 100, + ".-", + label=f"{dataspecs[1]['speclabel']}-{dataspecs[0]['speclabel']} Shift", + color="black", + ) ticklocs, ticklabels, startlocs = ticklocs_thcovmat ax.set_xticks(ticklocs) ax.set_xticklabels(ticklabels, rotation=45, fontsize=20) @@ -598,4 +646,4 @@ def shift_diag_cov_comparison(sqrtdiags_thcovmat, fnorm_shifts, point_prescripti ax.set_ylim(-35, 35) ax.legend(fontsize=20) ax.yaxis.set_tick_params(labelsize=20) - return fig \ No newline at end of file + return fig From 38020788004cd2014c48f694c2bc2d3bda267375 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 9 Jan 2024 13:41:01 +0100 Subject: [PATCH 10/17] Address comments --- validphys2/src/validphys/theorycovariance/tests.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index 3347af654e..9f8db6a891 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -314,9 +314,7 @@ def evals_nonzero_basis( # constructing dictionary of datasets in each process type doubleindex = doubleindex_thcovmat(theory_covmat_custom) procdict = {} - for index in doubleindex: - name = index[1] - proc = index[0] + for proc, name in doubleindex: if proc not in list(procdict.keys()): procdict[proc] = [name] elif name not in procdict[proc]: @@ -550,7 +548,7 @@ def deltamiss_plot( fnorm_vector = fnorm_shifts(concatenated_shx_vector, doubleindex_thcovmat) fnorm_concat = [j for i in fnorm_vector for j in i] # Minus sign changes it from NLO-NNLO shift to NNLO-NLO shift (convention) - f = -pd.DataFrame(fnorm_concat, index=tripleindex_thcovmat_complete) + f = -1.0 * pd.DataFrame(fnorm_concat, index=tripleindex_thcovmat_complete) tripleindex = f.index f.sort_index(axis=0, inplace=True) oldindex = f.index.tolist() From 17777e4782707d92f65e1ead5292d63f7ca4a766 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Tue, 9 Jan 2024 17:03:43 +0100 Subject: [PATCH 11/17] Remove index on id --- .../src/validphys/theorycovariance/tests.py | 98 +++++++++++-------- 1 file changed, 58 insertions(+), 40 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index 9f8db6a891..40e65e6845 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -36,16 +36,15 @@ def shift_vector(matched_dataspecs_results, process, dataset_name): norm_shifts = shifts / norm dsnames = np.full(len(shifts), dataset_name, dtype=object) processnames = np.full(len(shifts), process, dtype=object) - point_indexes = np.arange(len(shifts)) index = pd.MultiIndex.from_arrays( - [processnames, dsnames, point_indexes], names=["group", "dataset", "id"] + [processnames, dsnames], names=["group", "dataset"] ) return pd.DataFrame({'shifts': norm_shifts, 'norm': norm}, index=index) def dataset_alltheory(each_dataset_results_central_bytheory): """Returns a LabeledShifts tuple corresponding to the theory - vectors for all the scale varied theories.""" + vectors for all the scale varied theories, including the central.""" labeled_shifts_list = [] for dataset in each_dataset_results_central_bytheory: res = [theoryid[1].central_value for theoryid in dataset] @@ -61,7 +60,7 @@ def dataset_alltheory(each_dataset_results_central_bytheory): def alltheory_vector(dataset_alltheory): """Returns a DataFrame with the theory vectors for matched - dataspecs for the scale-varied theories""" + dataspecs for the scale-varied theories, including the central""" all_theory = np.concatenate([val.shifts for val in dataset_alltheory], axis=1) dsnames = np.concatenate( [np.full(len(val.shifts[0]), val.dataset_name, dtype=object) for val in dataset_alltheory] @@ -273,15 +272,18 @@ def vectors_9pt(splitdiffs): def tripleindex_thcovmat_complete(theory_covmat_custom): + """The complete tripleindex of the theory covmat.""" return theory_covmat_custom.index def doubleindex_thcovmat(theory_covmat_custom): + """The unique set of (group, dataset) index of the theory covmat.""" tripleindex = theory_covmat_custom.index return list(dict.fromkeys([(ind[0], ind[1]) for ind in tripleindex])) def ordered_alltheory_vector(alltheory_vector, doubleindex_thcovmat): + """Order the group and the dataset of all the theory vectors of the scale-varied predictions in the same way the theory covmat is ordered.""" ord_alltheory_vector = [] for theoryid_vector in alltheory_vector: list_df = [] @@ -305,7 +307,7 @@ def evals_nonzero_basis( diffs = [ pd.DataFrame( ((ordered_alltheory_vector[0] - scalevarvector) / ordered_alltheory_vector[0]), - index=theory_covmat_custom.index.droplevel(0), + index=theory_covmat_custom.index.droplevel(0).droplevel(1), ) for scalevarvector in ordered_alltheory_vector[1:] ] @@ -379,19 +381,20 @@ def projected_condition_num(evals_nonzero_basis): cond_num = evals_nonzero_basis[2] return cond_num +def fnorm_shifts_ordered(concatenated_shx_vector, doubleindex_thcovmat): + """Shifts vectors ordered as the theory covmat.""" + fnorm_vector = fnorm_shifts_byprocess(concatenated_shx_vector, doubleindex_thcovmat) + return np.array([j for i in fnorm_vector for j in i]) -def theory_shift_test(concatenated_shx_vector, evals_nonzero_basis, doubleindex_thcovmat): +def theory_shift_test(fnorm_shifts_ordered, evals_nonzero_basis): """Compares the NNLO-NLO shift, f, with the eigenvectors and eigenvalues of the theory covariance matrix, and returns the component of the NNLO-NLO shift space which is missed by the covariance matrix space: fmiss, as well as the projections of the shift vector onto each of the eigenvectors: projectors.""" w, v = evals_nonzero_basis[:2] v = np.real(v) - # Order the concatenated_shx_vector and concatenate again - fnorm_vector = fnorm_shifts(concatenated_shx_vector, doubleindex_thcovmat) - fnorm_concat = np.array([j for i in fnorm_vector for j in i]) # NNLO-NLO shift vector - f = -fnorm_concat.T + f = -fnorm_shifts_ordered.T # Projecting the shift vector onto each of the eigenvectors projectors = np.sum(f * v.T, axis=1) # Initialise array of zeros and set precision to same as FK tables @@ -498,13 +501,13 @@ def projector_eigenvalue_ratio(theory_shift_test): @figure -def eigenvector_plot(evals_nonzero_basis, concatenated_shx_vector): +def eigenvector_plot(evals_nonzero_basis, fnorm_shifts_ordered, tripleindex_thcovmat_complete, dataspecs): """Produces a plot of the eigenvectors for the projected matrix, transformed back to the data space.""" evals = evals_nonzero_basis[0][::-1] evecs = evals_nonzero_basis[1].T[::-1] - f = concatenated_shx_vector - tripleindex = f.index + f = pd.DataFrame(fnorm_shifts_ordered, index=tripleindex_thcovmat_complete) + # Sort index f.sort_index(axis=0, inplace=True) oldindex = f.index.tolist() newindex = sorted(oldindex, key=_get_key) @@ -513,9 +516,9 @@ def eigenvector_plot(evals_nonzero_basis, concatenated_shx_vector): fig.subplots_adjust(hspace=0.8) for ax, evec, eval in zip(axes.flatten(), evecs, evals): eval_3sf = floatformatting.significant_digits(eval.item(), 3) - evec = pd.DataFrame(evec, index=tripleindex) + evec = pd.DataFrame(evec, index=tripleindex_thcovmat_complete) evec = evec.reindex(newindex) - ax.plot(-f.shifts.values, color="k", label="NNLO-NLO shift") + ax.plot(-1.0 * (f.values), color="k", label=f"{dataspecs[1]['speclabel']}-{dataspecs[0]['speclabel']} shift") ax.plot(evec.values, label="Eigenvector") ticklocs, ticklabels, startlocs = matrix_plot_labels(evec) # Shift startlocs elements 0.5 to left so lines are between indexes @@ -536,29 +539,26 @@ def eigenvector_plot(evals_nonzero_basis, concatenated_shx_vector): def deltamiss_plot( theory_shift_test, ordered_alltheory_vector, - concatenated_shx_vector, - doubleindex_thcovmat, + fnorm_shifts_ordered, tripleindex_thcovmat_complete, + dataspecs, ): """Produces a plot of the missing component of the shift vector, transformed back to the data space.""" # Define l, which is the number of points in the point prescription being used l = len(ordered_alltheory_vector) + 1 - # Order the concatenated_shx_vector and concatenate again - fnorm_vector = fnorm_shifts(concatenated_shx_vector, doubleindex_thcovmat) - fnorm_concat = [j for i in fnorm_vector for j in i] # Minus sign changes it from NLO-NNLO shift to NNLO-NLO shift (convention) - f = -1.0 * pd.DataFrame(fnorm_concat, index=tripleindex_thcovmat_complete) - tripleindex = f.index + f = -1.0 * pd.DataFrame(fnorm_shifts_ordered, index=tripleindex_thcovmat_complete) + # Sort index f.sort_index(axis=0, inplace=True) oldindex = f.index.tolist() newindex = sorted(oldindex, key=_get_key) f = f.reindex(newindex) - fmiss = pd.DataFrame(theory_shift_test[4], index=tripleindex) + fmiss = pd.DataFrame(theory_shift_test[4], index=tripleindex_thcovmat_complete) fmiss_reordered = fmiss.reindex(f.index) # Plotting fig, ax = plotutils.subplots(figsize=(20, 10)) - ax.plot(f.values * 100, ".-", label="NNLO-NLO Shift", color="black") + ax.plot(f.values * 100, ".-", label=f"{dataspecs[1]['speclabel']}-{dataspecs[0]['speclabel']} Shift", color="black") ax.plot( fmiss_reordered.values * 100, ".-", label=r"$\delta_{miss}$" + f" ({l} pt)", color="blue" ) @@ -578,37 +578,53 @@ def deltamiss_plot( def diagdf_theory_covmat(theory_covmat_custom): - return pd.DataFrame(data=np.diag(theory_covmat_custom.values), index=theory_covmat_custom.index) + """Return a Dataframe indexed with groups and dataset of the diagonal entry of the theory covmat.""" + return pd.DataFrame(data=np.diag(theory_covmat_custom.values), index=theory_covmat_custom.index.droplevel(2)) -def tripleindex_set(group_dataset_inputs_by_process): - tripleindex = [] +def doubleindex_set_byprocess(group_dataset_inputs_by_process): + """The (group, dataset) index ordered by process.""" + doubleindex = [] for process in group_dataset_inputs_by_process: for dataset in process['data_input']: - tripleindex.append((process['group_name'], dataset.name)) - return tripleindex + doubleindex.append((process['group_name'], dataset.name)) + return doubleindex def concatenated_shx_vector(shx_vector): + """Single DataFrame for all the datasets of the shift vectors.""" return pd.concat(shx_vector) -def sqrtdiags_thcovmat(tripleindex_set, diagdf_theory_covmat, concatenated_shx_vector): +def sqrtdiags_thcovmat_byprocess(doubleindex_set_byprocess, diagdf_theory_covmat, concatenated_shx_vector): + """Ratio of the sqrts of the diagonal entries of the theory covmat to the normalization entries of the shift vectors, ordered by process.""" sqrtdiags = [] - for index in tripleindex_set: - sqrtdiags.append( + for index in doubleindex_set_byprocess: + if isinstance(concatenated_shx_vector.loc[index[0]].loc[index[1]].norm, np.float64): + sqrtdiags.append( list( - np.sqrt(diagdf_theory_covmat.loc[index[0]].loc[index[1]].values.transpose()[0]) - / concatenated_shx_vector.loc[index[0]].loc[index[1]].norm.values + np.sqrt(diagdf_theory_covmat.loc[index[0]].loc[index[1]].values) + / concatenated_shx_vector.loc[index[0]].loc[index[1]].norm ) ) + else: + sqrtdiags.append( + list( + np.sqrt(diagdf_theory_covmat.loc[index[0]].loc[index[1]].values.transpose()[0]) + / concatenated_shx_vector.loc[index[0]].loc[index[1]].norm.values + ) + ) return sqrtdiags -def fnorm_shifts(concatenated_shx_vector, tripleindex_set): +def fnorm_shifts_byprocess(concatenated_shx_vector, doubleindex_set_byprocess): + """Shift vector ordered by process.""" fnorm = [] - for index in tripleindex_set: - fnorm.append(list(concatenated_shx_vector.loc[index[0]].loc[index[1]].shifts.values)) + for index in doubleindex_set_byprocess: + if isinstance(concatenated_shx_vector.loc[index[0]].loc[index[1]].shifts, np.float64): + fnorm.append([concatenated_shx_vector.loc[index[0]].loc[index[1]].shifts]) + else: + fnorm.append(list(concatenated_shx_vector.loc[index[0]].loc[index[1]].shifts.values)) return fnorm @@ -618,10 +634,12 @@ def ticklocs_thcovmat(theory_covmat_custom): @figure def shift_diag_cov_comparison( - sqrtdiags_thcovmat, fnorm_shifts, point_prescription, ticklocs_thcovmat, dataspecs + sqrtdiags_thcovmat_byprocess, fnorm_shifts_byprocess, point_prescription, ticklocs_thcovmat, dataspecs ): - fnorm_concat = [j for i in fnorm_shifts for j in i] - sqrtdiags_concat = [j for i in sqrtdiags_thcovmat for j in i] + """Plot of the comparison of a shift between two pertubative order and the diagonal entries of the theory covmat, both normalized to the first of the two perturbative orders.""" + # Concatenation of the arrays + fnorm_concat = [j for i in fnorm_shifts_byprocess for j in i] + sqrtdiags_concat = [j for i in sqrtdiags_thcovmat_byprocess for j in i] fig, ax = plotutils.subplots(figsize=(20, 10)) ax.plot( np.array(sqrtdiags_concat) * 100, ".-", label=f"MHOU ({point_prescription})", color="red" From 392f176040921fa1ca4b078ee974f64d06390bf5 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 11 Jan 2024 16:40:01 +0100 Subject: [PATCH 12/17] Remove dead code --- doc/sphinx/source/vp/theorycov/examples.rst | 32 ------- .../template_matrix_plots.md | 34 +------- .../theorycovariance/construction.py | 84 ++----------------- .../src/validphys/theorycovariance/output.py | 11 --- 4 files changed, 9 insertions(+), 152 deletions(-) diff --git a/doc/sphinx/source/vp/theorycov/examples.rst b/doc/sphinx/source/vp/theorycov/examples.rst index 3e3aa7a341..eea0111c10 100644 --- a/doc/sphinx/source/vp/theorycov/examples.rst +++ b/doc/sphinx/source/vp/theorycov/examples.rst @@ -154,38 +154,6 @@ a comprehensive set of plots and tables describing the covariance matrices. {@plot_diag_cov_comparison@} {@endwith@} - Experimental $\chi^2$ - --------------------- - {@with default_theory@} - {@total_experiments_chi2@} - - Total (exp. + th.) $\chi^2$ - --------------------------- - {@chi2_impact_custom@} - - Experimental $\chi^2$ by dataset - -------------------------------- - {@experiments_chi2_table@} - - Total (exp. + th.) $\chi^2$ by dataset - -------------------------------------- - {@experiments_chi2_table_theory@} - - $\chi^2$ including only diagonal theory elements - ------------------------------------------------ - {@chi2_diag_only@} - - Impact of theory covariance matrix on $\chi^2$s - ----------------------------------------------- - {@plot_datasets_chi2_theory@} - {@endwith@} - - Scale variations as a function of the kinematics - ------------------------------------------------ - {@with matched_datasets_from_dataspecs@} - [Plots for {@dataset_name@}]({@dataset_report report@}) - {@endwith@} - Validation report ----------------- diff --git a/validphys2/examples/theory_covariance/template_matrix_plots.md b/validphys2/examples/theory_covariance/template_matrix_plots.md index 8e200fb46f..6c156524fd 100644 --- a/validphys2/examples/theory_covariance/template_matrix_plots.md +++ b/validphys2/examples/theory_covariance/template_matrix_plots.md @@ -17,36 +17,4 @@ Diagonal elements of covariance matrices ---------------------------------------- {@with default_theory@} {@plot_diag_cov_comparison@} -{@endwith@} - -Experimental $\chi^2$ ---------------------- -{@with default_theory@} - {@total_chi2_per_point_data@} - -Total (exp. + th.) $\chi^2$ ---------------------------- - {@chi2_impact_custom@} - -Experimental $\chi^2$ by dataset --------------------------------- - {@procs_chi2_table@} - -Total (exp. + th.) $\chi^2$ by dataset --------------------------------------- - {@procs_chi2_table_theory@} - -$\chi^2$ including only diagonal theory elements ------------------------------------------------- - {@chi2_diag_only@} - -Impact of theory covariance matrix on $\chi^2$s ------------------------------------------------ - {@plot_datasets_chi2_theory@} -{@endwith@} - -Scale variations as a function of the kinematics ------------------------------------------------- -{@with matched_datasets_from_dataspecs@} - [Plots for {@dataset_name@}]({@dataset_report report@}) -{@endwith@} +{@endwith@} \ No newline at end of file diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 69073ba672..9fc5e9ab93 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -18,8 +18,6 @@ from validphys.checks import check_using_theory_covmat from validphys.results import ( Chi2Data, - procs_central_values, - procs_central_values_no_table, results, results_central, ) @@ -34,7 +32,6 @@ results_central_bytheoryids = collect(results_central, ("theoryids",)) each_dataset_results_central_bytheory = collect("results_central_bytheoryids", ("data",)) - @check_using_theory_covmat def theory_covmat_dataset( results, @@ -561,21 +558,14 @@ def procs_index_matched(groups_index, procs_index): return pd.MultiIndex.from_tuples(tups, names=("process", "dataset", "id")) -@table -def theory_corrmat_singleprocess(theory_covmat_singleprocess): - """Calculates the theory correlation matrix for scale variations.""" - df = theory_covmat_singleprocess - covmat = df.values - diag_minus_half = (np.diagonal(covmat)) ** (-0.5) - mat = diag_minus_half[:, np.newaxis] * df * diag_minus_half - return mat - - @table def theory_corrmat_custom(theory_covmat_custom): """Calculates the theory correlation matrix for scale variations with variations by process type""" - mat = theory_corrmat_singleprocess(theory_covmat_custom) + df = theory_covmat_custom + covmat = df.values + diag_minus_half = (np.diagonal(covmat)) ** (-0.5) + mat = diag_minus_half[:, np.newaxis] * df * diag_minus_half return mat @@ -589,37 +579,16 @@ def theory_normcovmat_custom(theory_covmat_custom, procs_data_values): return mat -@table -def experimentplustheory_corrmat_singleprocess(procs_covmat, theory_covmat_singleprocess): - """Calculates the correlation matrix for the experimental - plus theory covariance matrices.""" - total_df = procs_covmat + theory_covmat_singleprocess - total_cov = (procs_covmat + theory_covmat_singleprocess).values - diag_minus_half = (np.diagonal(total_cov)) ** (-0.5) - corrmat = diag_minus_half[:, np.newaxis] * total_df * diag_minus_half - return corrmat - - @table def experimentplustheory_corrmat_custom(procs_covmat, theory_covmat_custom): """Calculates the correlation matrix for the experimental plus theory covariance matrices, correlations by prescription.""" - corrmat = experimentplustheory_corrmat_singleprocess(procs_covmat, theory_covmat_custom) + total_df = procs_covmat + theory_covmat_custom + total_cov = (procs_covmat + theory_covmat_custom).values + diag_minus_half = (np.diagonal(total_cov)) ** (-0.5) + corrmat = diag_minus_half[:, np.newaxis] * total_df * diag_minus_half return corrmat - -def chi2_impact(theory_covmat_singleprocess, procs_covmat, procs_results): - """Returns total chi2 including theory cov mat""" - dataresults, theoryresults = zip(*procs_results) - dat_central_list = [x.central_value for x in dataresults] - th_central_list = [x.central_value for x in theoryresults] - dat_central = np.concatenate(dat_central_list) - th_central = np.concatenate([x for x in th_central_list]) - central_diff = dat_central - th_central - cov = theory_covmat_singleprocess.values + procs_covmat.values - return calc_chi2(la.cholesky(cov, lower=True), central_diff) / len(central_diff) - - def data_theory_diff(procs_results): """Returns (D-T) for central theory, for use in chi2 calculations""" dataresults, theoryresults = zip(*procs_results) @@ -630,30 +599,6 @@ def data_theory_diff(procs_results): central_diff = dat_central - th_central return central_diff - -def chi2_impact_custom(theory_covmat_custom, procs_covmat, procs_results): - """Returns total chi2 including theory cov mat""" - chi2 = chi2_impact(theory_covmat_custom, procs_covmat, procs_results) - return chi2 - - -def theory_diagcovmat(theory_covmat_singleprocess): - """Returns theory covmat with only diagonal values""" - s = theory_covmat_singleprocess.values - # Initialise array of zeros and set precision to same as FK tables - s_diag = np.zeros((len(s), len(s)), dtype=np.float32) - np.fill_diagonal(s_diag, np.diag(s)) - return s_diag - - -def chi2_diag_only(theory_diagcovmat, procs_covmat, data_theory_diff): - """Returns total chi2 including only diags of theory cov mat""" - cov = theory_diagcovmat + procs_covmat.values - elements = np.dot(data_theory_diff.T, np.dot(la.inv(cov), data_theory_diff)) - chi2 = (1 / len(data_theory_diff)) * np.sum(elements) - return chi2 - - each_dataset_results = collect(results, ("group_dataset_inputs_by_process", "data")) @@ -671,19 +616,6 @@ def abs_chi2_data_theory_dataset(each_dataset_results, total_covmat_datasets): return chi2data_array -def abs_chi2_data_theory_proc(procs_results, total_covmat_procs): - """Like abs_chi2_data_theory_dataset but for procs not datasets""" - chi2data_array = [] - for expresults, covmat in zip(procs_results, total_covmat_procs): - data_result, th_result = expresults - chi2s = all_chi2_theory(expresults, covmat) - central_result = central_chi2_theory(expresults, covmat) - chi2data_array.append( - Chi2Data(th_result.stats_class(chi2s[:, np.newaxis]), central_result, len(data_result)) - ) - return chi2data_array - - def abs_chi2_data_diagtheory_dataset(each_dataset_results, total_covmat_diagtheory_datasets): """For a diagonal theory covmat""" return abs_chi2_data_theory_dataset(each_dataset_results, total_covmat_diagtheory_datasets) diff --git a/validphys2/src/validphys/theorycovariance/output.py b/validphys2/src/validphys/theorycovariance/output.py index 6c56ac37c9..45ef1c2bb0 100644 --- a/validphys2/src/validphys/theorycovariance/output.py +++ b/validphys2/src/validphys/theorycovariance/output.py @@ -27,17 +27,6 @@ ) -@table -def procs_chi2_table_theory( - procs_data, pdf, abs_chi2_data_theory_proc, abs_chi2_data_theory_dataset_by_process -): - """Same as groups_chi2_table but including theory covariance matrix. - Note: we use groups_chi2_table here but provide data grouped by process.""" - return groups_chi2_table( - procs_data, pdf, abs_chi2_data_theory_proc, abs_chi2_data_theory_dataset_by_process - ) - - def matrix_plot_labels(df): """Returns the tick locations and labels, and the starting point values for each category, based on a dataframe From a6d1a09c4f1a0af1d486a5ad7d2a04caee1b023f Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Thu, 11 Jan 2024 16:47:44 +0100 Subject: [PATCH 13/17] Keep removing dead code --- .../theorycovariance/construction.py | 29 ------------------ .../src/validphys/theorycovariance/output.py | 30 +------------------ 2 files changed, 1 insertion(+), 58 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 9fc5e9ab93..92f9586eb4 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -589,33 +589,4 @@ def experimentplustheory_corrmat_custom(procs_covmat, theory_covmat_custom): corrmat = diag_minus_half[:, np.newaxis] * total_df * diag_minus_half return corrmat -def data_theory_diff(procs_results): - """Returns (D-T) for central theory, for use in chi2 calculations""" - dataresults, theoryresults = zip(*procs_results) - dat_central_list = [x.central_value for x in dataresults] - th_central_list = [x.central_value for x in theoryresults] - dat_central = np.concatenate(dat_central_list) - th_central = np.concatenate(th_central_list) - central_diff = dat_central - th_central - return central_diff - each_dataset_results = collect(results, ("group_dataset_inputs_by_process", "data")) - - -def abs_chi2_data_theory_dataset(each_dataset_results, total_covmat_datasets): - """Returns an array of tuples (member_chi², central_chi², numpoints) - corresponding to each data set, where theory errors are included""" - chi2data_array = [] - for datresults, covmat in zip(each_dataset_results, total_covmat_datasets): - data_result, th_result = datresults - chi2s = all_chi2_theory(datresults, covmat) - central_result = central_chi2_theory(datresults, covmat) - chi2data_array.append( - Chi2Data(th_result.stats_class(chi2s[:, np.newaxis]), central_result, len(data_result)) - ) - return chi2data_array - - -def abs_chi2_data_diagtheory_dataset(each_dataset_results, total_covmat_diagtheory_datasets): - """For a diagonal theory covmat""" - return abs_chi2_data_theory_dataset(each_dataset_results, total_covmat_diagtheory_datasets) diff --git a/validphys2/src/validphys/theorycovariance/output.py b/validphys2/src/validphys/theorycovariance/output.py index 45ef1c2bb0..dc63e0e0f0 100644 --- a/validphys2/src/validphys/theorycovariance/output.py +++ b/validphys2/src/validphys/theorycovariance/output.py @@ -22,10 +22,6 @@ log = logging.getLogger(__name__) -abs_chi2_data_theory_dataset_by_process = collect( - "abs_chi2_data_theory_dataset", ("group_dataset_inputs_by_process",) -) - def matrix_plot_labels(df): """Returns the tick locations and labels, and the starting @@ -284,28 +280,4 @@ def plot_diag_cov_comparison( ) ax.legend(fontsize=20) ax.margins(x=0) - return fig - - -@figure -def plot_datasets_chi2_theory(procs_data, each_dataset_chi2, abs_chi2_data_theory_dataset): - """Plot the chi² of all datasets, before and after adding theory errors, with bars.""" - ds = iter(each_dataset_chi2) - dstheory = iter(abs_chi2_data_theory_dataset) - dschi2 = [] - dschi2theory = [] - xticks = [] - for proc in procs_data: - for dataset, dsres in zip(proc, ds): - dschi2.append(dsres.central_result / dsres.ndata) - xticks.append(dataset.name) - for proc in procs_data: - for dataset, dsres in zip(proc, dstheory): - dschi2theory.append(dsres.central_result / dsres.ndata) - plotvalues = np.stack((dschi2theory, dschi2)) - fig, ax = plotutils.barplot( - plotvalues, collabels=xticks, datalabels=["experiment + theory", "experiment"] - ) - ax.set_title(r"$\chi^2$ distribution for datasets") - ax.legend(fontsize=14) - return fig + return fig \ No newline at end of file From 3a428bf55e4142b1eece4cd86f0d0a947b8d54eb Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 12 Jan 2024 11:18:40 +0100 Subject: [PATCH 14/17] Cosmetic changes --- .../src/validphys/theorycovariance/tests.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index 40e65e6845..ae6d1112ce 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -384,7 +384,7 @@ def projected_condition_num(evals_nonzero_basis): def fnorm_shifts_ordered(concatenated_shx_vector, doubleindex_thcovmat): """Shifts vectors ordered as the theory covmat.""" fnorm_vector = fnorm_shifts_byprocess(concatenated_shx_vector, doubleindex_thcovmat) - return np.array([j for i in fnorm_vector for j in i]) + return np.array(sum(fnorm_vector, [])) def theory_shift_test(fnorm_shifts_ordered, evals_nonzero_basis): """Compares the NNLO-NLO shift, f, with the eigenvectors and eigenvalues of the @@ -638,15 +638,15 @@ def shift_diag_cov_comparison( ): """Plot of the comparison of a shift between two pertubative order and the diagonal entries of the theory covmat, both normalized to the first of the two perturbative orders.""" # Concatenation of the arrays - fnorm_concat = [j for i in fnorm_shifts_byprocess for j in i] - sqrtdiags_concat = [j for i in sqrtdiags_thcovmat_byprocess for j in i] + fnorm_concat = sum(fnorm_shifts_byprocess, []) + sqrtdiags_concat = sum(sqrtdiags_thcovmat_byprocess, []) fig, ax = plotutils.subplots(figsize=(20, 10)) ax.plot( - np.array(sqrtdiags_concat) * 100, ".-", label=f"MHOU ({point_prescription})", color="red" + np.array(sqrtdiags_concat), ".-", label=f"MHOU ({point_prescription})", color="red" ) - ax.plot(-np.array(sqrtdiags_concat) * 100, ".-", color="red") + ax.plot(-np.array(sqrtdiags_concat), ".-", color="red") ax.plot( - -np.array(fnorm_concat) * 100, + -np.array(fnorm_concat), ".-", label=f"{dataspecs[1]['speclabel']}-{dataspecs[0]['speclabel']} Shift", color="black", @@ -658,8 +658,8 @@ def shift_diag_cov_comparison( startlocs_lines = [x - 0.5 for x in startlocs] ax.vlines(startlocs_lines, -70, 70, linestyles="dashed") ax.margins(x=0, y=0) - ax.set_ylabel(r"% wrt central theory $T_i^{(0)}$", fontsize=20) - ax.set_ylim(-35, 35) + ax.set_ylabel(r"$\pm \sqrt{\hat{S_{ii}}}$, $\delta_{i}$", fontsize=20) + ax.set_ylim(-0.35, 0.35) ax.legend(fontsize=20) ax.yaxis.set_tick_params(labelsize=20) return fig From f6263c2df5ba571ce53b4c621e240e17464d65ee Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 12 Jan 2024 12:30:39 +0100 Subject: [PATCH 15/17] Address comment --- .../src/validphys/theorycovariance/construction.py | 3 +-- validphys2/src/validphys/theorycovariance/output.py | 9 +++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 92f9586eb4..bf6a676148 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -584,8 +584,7 @@ def experimentplustheory_corrmat_custom(procs_covmat, theory_covmat_custom): """Calculates the correlation matrix for the experimental plus theory covariance matrices, correlations by prescription.""" total_df = procs_covmat + theory_covmat_custom - total_cov = (procs_covmat + theory_covmat_custom).values - diag_minus_half = (np.diagonal(total_cov)) ** (-0.5) + diag_minus_half = (np.diagonal(total_df.values)) ** (-0.5) corrmat = diag_minus_half[:, np.newaxis] * total_df * diag_minus_half return corrmat diff --git a/validphys2/src/validphys/theorycovariance/output.py b/validphys2/src/validphys/theorycovariance/output.py index dc63e0e0f0..6d5936cfd2 100644 --- a/validphys2/src/validphys/theorycovariance/output.py +++ b/validphys2/src/validphys/theorycovariance/output.py @@ -66,9 +66,10 @@ def plot_covmat_heatmap(covmat, title): matrixplot = ax.matshow( 100 * matrix, cmap=cm.Spectral_r, - norm=mcolors.SymLogNorm( - linthresh=0.01, linscale=10, vmin=-100 * matrix.max(), vmax=100 * matrix.max() - ), + norm = mcolors.Normalize(vmin=-100 * matrix.max(), vmax=100 * matrix.max()) + #norm=mcolors.SymLogNorm( + # linthresh=0.00001, linscale=1, vmin=-100 * matrix.max(), vmax=100 * matrix.max() + #), ) cbar = fig.colorbar(matrixplot, fraction=0.046, pad=0.04) cbar.set_label(label="% of data", fontsize=20) @@ -270,7 +271,7 @@ def plot_diag_cov_comparison( # Shift startlocs elements 0.5 to left so lines are between indexes startlocs_lines = [x - 0.5 for x in startlocs] ax.vlines(startlocs_lines, 0, len(data), linestyles="dashed") - ax.set_ylabel(r"$\frac{\sqrt{cov_{ii}}}{|D_i|}$", fontsize=30) + ax.set_ylabel(r"$\frac{\sqrt{S_{ii}}}{|D_i|}$", fontsize=30) ax.yaxis.set_tick_params(labelsize=20) ax.set_ylim([0, 0.5]) ax.set_title( From 34cfa620fea5b6aa984d5d55d89923bd99bd5b6c Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 12 Jan 2024 14:14:38 +0100 Subject: [PATCH 16/17] Cosmetic changes to covmat plots --- validphys2/src/validphys/theorycovariance/output.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/output.py b/validphys2/src/validphys/theorycovariance/output.py index 6d5936cfd2..26320f7eb0 100644 --- a/validphys2/src/validphys/theorycovariance/output.py +++ b/validphys2/src/validphys/theorycovariance/output.py @@ -66,10 +66,9 @@ def plot_covmat_heatmap(covmat, title): matrixplot = ax.matshow( 100 * matrix, cmap=cm.Spectral_r, - norm = mcolors.Normalize(vmin=-100 * matrix.max(), vmax=100 * matrix.max()) - #norm=mcolors.SymLogNorm( - # linthresh=0.00001, linscale=1, vmin=-100 * matrix.max(), vmax=100 * matrix.max() - #), + norm=mcolors.SymLogNorm( + linthresh=0.00001, linscale=0.1, vmin=-100 * matrix.max(), vmax=100 * matrix.max() + ), ) cbar = fig.colorbar(matrixplot, fraction=0.046, pad=0.04) cbar.set_label(label="% of data", fontsize=20) From 8c7158530ed78976638416f1c9084dc871fbf2b6 Mon Sep 17 00:00:00 2001 From: andreab1997 Date: Fri, 12 Jan 2024 14:23:36 +0100 Subject: [PATCH 17/17] Correct linscale --- validphys2/src/validphys/theorycovariance/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/theorycovariance/output.py b/validphys2/src/validphys/theorycovariance/output.py index 26320f7eb0..f6279bd222 100644 --- a/validphys2/src/validphys/theorycovariance/output.py +++ b/validphys2/src/validphys/theorycovariance/output.py @@ -67,7 +67,7 @@ def plot_covmat_heatmap(covmat, title): 100 * matrix, cmap=cm.Spectral_r, norm=mcolors.SymLogNorm( - linthresh=0.00001, linscale=0.1, vmin=-100 * matrix.max(), vmax=100 * matrix.max() + linthresh=0.00001, linscale=1, vmin=-100 * matrix.max(), vmax=100 * matrix.max() ), ) cbar = fig.colorbar(matrixplot, fraction=0.046, pad=0.04)