From 261caeaf08f304265c94dbd146f70a125dbc07cd Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Tue, 19 Dec 2023 13:16:42 +0000
Subject: [PATCH 01/14] clean up combine_by_type

---
 validphys2/src/validphys/results.py           |  13 +-
 .../theorycovariance/construction.py          | 144 ++++++++++--------
 2 files changed, 89 insertions(+), 68 deletions(-)

diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
index 9444b7fc1d..02dc9bb014 100644
--- a/validphys2/src/validphys/results.py
+++ b/validphys2/src/validphys/results.py
@@ -75,6 +75,7 @@ def __init__(self, dataset, covmat, sqrtcovmat):
         stats = Stats(self._central_value)
         self._covmat = covmat
         self._sqrtcovmat = sqrtcovmat
+        self._name = dataset.name
         super().__init__(stats)
 
     @property
@@ -98,15 +99,19 @@ def sqrtcovmat(self):
         """Lower part of the Cholesky decomposition"""
         return self._sqrtcovmat
 
+    @property
+    def name(self):
+        return self._name
 
 class ThPredictionsResult(StatsResult):
     """Class holding theory prediction, inherits from StatsResult
     When created with `from_convolution`, it keeps tracks of the PDF for which it was computed
     """
 
-    def __init__(self, dataobj, stats_class, label=None, pdf=None, theoryid=None):
+    def __init__(self, dataobj, stats_class, datasetnames=None, label=None, pdf=None, theoryid=None):
         self.stats_class = stats_class
         self.label = label
+        self._datasetnames = datasetnames
         statsobj = stats_class(dataobj.T)
         self._pdf = pdf
         self._theoryid = theoryid
@@ -149,8 +154,12 @@ def from_convolution(cls, pdf, dataset, central_only=False):
 
         label = cls.make_label(pdf, dataset)
         thid = dataset.thspec.id
+        datasetnames = [i.name for i in datasets]
+        return cls(th_predictions, pdf.stats_class, datasetnames, label, pdf=pdf, theoryid=thid)
 
-        return cls(th_predictions, pdf.stats_class, label, pdf=pdf, theoryid=thid)
+    @property
+    def datasetnames(self):
+        return self._datasetnames
 
 
 class ThUncertaintiesResult(StatsResult):
diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 147568daea..791752f54b 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -64,7 +64,7 @@ def theory_covmat_singleprocess(theory_covmat_singleprocess_no_table, fivetheori
 
 results_central_bytheoryids = collect(results_central, ("theoryids",))
 each_dataset_results_central_bytheory = collect(
-    "results_central_bytheoryids", ("group_dataset_inputs_by_process", "data")
+    "results_central_bytheoryids", ("data",)
 )
 
 
@@ -100,48 +100,48 @@ def theory_covmat_dataset(
     return thcovmat
 
 
-@check_correct_theory_combination
-def theory_covmat_datasets(each_dataset_results_central_bytheory, fivetheories):
-    """Produces an array of theory covariance matrices. Each matrix corresponds
-    to a different dataset, which must be specified in the runcard."""
-    dataset_covmats = []
-    for dataset in each_dataset_results_central_bytheory:
-        theory_centrals = [x[1].central_value for x in dataset]
-        s = make_scale_var_covmat(theory_centrals)
-        dataset_covmats.append(s)
-    return dataset_covmats
-
-
-@check_correct_theory_combination
-def total_covmat_datasets(each_dataset_results_central_bytheory, fivetheories):
-    """Produces an array of total covariance matrices; the sum of experimental
-    and scale-varied theory covariance matrices. Each matrix corresponds
-    to a different dataset, which must be specified in the runcard.
-    These are needed for calculation of chi2 per dataset."""
-    dataset_covmats = []
-    for dataset in each_dataset_results_central_bytheory:
-        theory_centrals = [x[1].central_value for x in dataset]
-        s = make_scale_var_covmat(theory_centrals)
-        sigma = dataset[0][0].covmat
-        cov = s + sigma
-        dataset_covmats.append(cov)
-    return dataset_covmats
-
-
-@check_correct_theory_combination
-def total_covmat_diagtheory_datasets(each_dataset_results_central_bytheory, fivetheories):
-    """Same as total_covmat_theory_datasets but for diagonal theory only"""
-    dataset_covmats = []
-    for dataset in each_dataset_results_central_bytheory:
-        theory_centrals = [x[1].central_value for x in dataset]
-        s = make_scale_var_covmat(theory_centrals)
-        # Initialise array of zeros and set precision to same as FK tables
-        s_diag = np.zeros((len(s), len(s)), dtype=np.float32)
-        np.fill_diagonal(s_diag, np.diag(s))
-        sigma = dataset[0][0].covmat
-        cov = s_diag + sigma
-        dataset_covmats.append(cov)
-    return dataset_covmats
+# @check_correct_theory_combination
+# def theory_covmat_datasets(each_dataset_results_central_bytheory, fivetheories):
+#     """Produces an array of theory covariance matrices. Each matrix corresponds
+#     to a different dataset, which must be specified in the runcard."""
+#     dataset_covmats = []
+#     for dataset in each_dataset_results_central_bytheory:
+#         theory_centrals = [x[1].central_value for x in dataset]
+#         s = make_scale_var_covmat(theory_centrals)
+#         dataset_covmats.append(s)
+#     return dataset_covmats
+
+
+# @check_correct_theory_combination
+# def total_covmat_datasets(each_dataset_results_central_bytheory, fivetheories):
+#     """Produces an array of total covariance matrices; the sum of experimental
+#     and scale-varied theory covariance matrices. Each matrix corresponds
+#     to a different dataset, which must be specified in the runcard.
+#     These are needed for calculation of chi2 per dataset."""
+#     dataset_covmats = []
+#     for dataset in each_dataset_results_central_bytheory:
+#         theory_centrals = [x[1].central_value for x in dataset]
+#         s = make_scale_var_covmat(theory_centrals)
+#         sigma = dataset[0][0].covmat
+#         cov = s + sigma
+#         dataset_covmats.append(cov)
+#     return dataset_covmats
+
+
+# @check_correct_theory_combination
+# def total_covmat_diagtheory_datasets(each_dataset_results_central_bytheory, fivetheories):
+#     """Same as total_covmat_theory_datasets but for diagonal theory only"""
+#     dataset_covmats = []
+#     for dataset in each_dataset_results_central_bytheory:
+#         theory_centrals = [x[1].central_value for x in dataset]
+#         s = make_scale_var_covmat(theory_centrals)
+#         # Initialise array of zeros and set precision to same as FK tables
+#         s_diag = np.zeros((len(s), len(s)), dtype=np.float32)
+#         np.fill_diagonal(s_diag, np.diag(s))
+#         sigma = dataset[0][0].covmat
+#         cov = s_diag + sigma
+#         dataset_covmats.append(cov)
+#     return dataset_covmats
 
 
 @table
@@ -182,27 +182,38 @@ def total_covmat_procs(procs_results_theory, fivetheories):
     return proc_result_covmats
 
 
-def dataset_names(data_input):
-    """Returns a list of the names of the datasets, in the same order as
-    they are inputted in the runcard"""
-    return [el.name for el in data_input]
-
-
 ProcessInfo = namedtuple("ProcessInfo", ("theory", "namelist", "sizes"))
 
 
-def combine_by_type(each_dataset_results_central_bytheory, dataset_names):
-    """Groups the datasets according to processes and returns three objects:
-    theories_by_process: the relevant theories grouped by process type
-    ordered_names: dictionary with keys of process type and values being the
-                   corresponding list of names of datasets, in the order they
-                   are appended to theories_by_process
-    dataset_size:  dictionary with keys of dataset name and values being the
-                   number of points in that dataset"""
+def combine_by_type(each_dataset_results_central_bytheory, data_input):
+    """Groups the datasets according to processes returns an instance of the ProcessInfo class
+
+    Parameters
+    ----------
+    each_dataset_results_central_bytheory: list(list((DataResult,ThPredictionsResult)))
+        
+    data_input: list(DataSetInput)
+        List with DatasetInput as order in the runcard
+
+    Returns
+    -------
+    ProcesInfo
+        Class with info needed to construct the theory covmat.
+
+    Raises
+    ------
+    ValueError
+        If the order is of the inputs are not the same
+    """
     dataset_size = defaultdict(list)
     theories_by_process = defaultdict(list)
     ordered_names = defaultdict(list)
-    for dataset, name in zip(each_dataset_results_central_bytheory, dataset_names):
+    for dataset, datain in zip(each_dataset_results_central_bytheory, data_input):
+        name = datain.name
+        # A difference in ordering of each_dataset_results_central_bytheory and 
+        # data_input has caused problems before, so let's explicitly check
+        if name != dataset[0][0].name:
+            raise ValueError
         theory_centrals = [x[1].central_value for x in dataset]
         dataset_size[name] = len(theory_centrals[0])
         proc_type = process_lookup(name)
@@ -229,23 +240,24 @@ def process_starting_points(combine_by_type):
     return start_proc
 
 
-def covmap(combine_by_type, dataset_names):
+def covmap(combine_by_type, data_input):
     """Creates a map between the covmat indices from matrices ordered by
     process to matrices ordered by experiment as listed in the runcard"""
     mapping = defaultdict(list)
     start_exp = defaultdict(list)
     process_info = combine_by_type
     running_index = 0
-    for dataset in dataset_names:
-        size = process_info.sizes[dataset]
-        start_exp[dataset] = running_index
+    for dataset in data_input:
+        name = dataset.name
+        size = process_info.sizes[name]
+        start_exp[name] = running_index
         running_index += size
     start = 0
     names_by_proc_list = [item for sublist in process_info.namelist.values() for item in sublist]
-    for dataset in names_by_proc_list:
-        for i in range(process_info.sizes[dataset]):
-            mapping[start + i] = start_exp[dataset] + i
-        start += process_info.sizes[dataset]
+    for name in names_by_proc_list:
+        for i in range(process_info.sizes[name]):
+            mapping[start + i] = start_exp[name] + i
+        start += process_info.sizes[name]
     return mapping
 
 

From 3b22042ac6d132133bdd4a7971ff1d113e98a89f Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Tue, 19 Dec 2023 13:39:48 +0000
Subject: [PATCH 02/14] rename theory to preds

---
 .../theorycovariance/construction.py          | 141 ++++++++----------
 1 file changed, 65 insertions(+), 76 deletions(-)

diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 791752f54b..5524cb80f0 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -63,9 +63,7 @@ def theory_covmat_singleprocess(theory_covmat_singleprocess_no_table, fivetheori
 
 
 results_central_bytheoryids = collect(results_central, ("theoryids",))
-each_dataset_results_central_bytheory = collect(
-    "results_central_bytheoryids", ("data",)
-)
+each_dataset_results_central_bytheory = collect("results_central_bytheoryids", ("data",))
 
 
 @check_using_theory_covmat
@@ -182,7 +180,7 @@ def total_covmat_procs(procs_results_theory, fivetheories):
     return proc_result_covmats
 
 
-ProcessInfo = namedtuple("ProcessInfo", ("theory", "namelist", "sizes"))
+ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes"))
 
 
 def combine_by_type(each_dataset_results_central_bytheory, data_input):
@@ -191,7 +189,7 @@ def combine_by_type(each_dataset_results_central_bytheory, data_input):
     Parameters
     ----------
     each_dataset_results_central_bytheory: list(list((DataResult,ThPredictionsResult)))
-        
+
     data_input: list(DataSetInput)
         List with DatasetInput as order in the runcard
 
@@ -210,7 +208,7 @@ def combine_by_type(each_dataset_results_central_bytheory, data_input):
     ordered_names = defaultdict(list)
     for dataset, datain in zip(each_dataset_results_central_bytheory, data_input):
         name = datain.name
-        # A difference in ordering of each_dataset_results_central_bytheory and 
+        # A difference in ordering of each_dataset_results_central_bytheory and
         # data_input has caused problems before, so let's explicitly check
         if name != dataset[0][0].name:
             raise ValueError
@@ -222,24 +220,11 @@ def combine_by_type(each_dataset_results_central_bytheory, data_input):
     for key, item in theories_by_process.items():
         theories_by_process[key] = np.concatenate(item, axis=1)
     process_info = ProcessInfo(
-        theory=theories_by_process, namelist=ordered_names, sizes=dataset_size
+        preds=theories_by_process, namelist=ordered_names, sizes=dataset_size
     )
     return process_info
 
 
-def process_starting_points(combine_by_type):
-    """Returns a dictionary of indices in the covariance matrix corresponding
-    to the starting point of each process."""
-    process_info = combine_by_type
-    running_index = 0
-    start_proc = defaultdict(list)
-    for name in process_info.theory:
-        size = len(process_info.theory[name][0])
-        start_proc[name] = running_index
-        running_index += size
-    return start_proc
-
-
 def covmap(combine_by_type, data_input):
     """Creates a map between the covmat indices from matrices ordered by
     process to matrices ordered by experiment as listed in the runcard"""
@@ -261,6 +246,66 @@ def covmap(combine_by_type, data_input):
     return mapping
 
 
+@check_correct_theory_combination
+def covs_pt_prescrip(combine_by_type, theoryids, point_prescription, fivetheories, seventheories):
+    """Produces the sub-matrices of the theory covariance matrix according
+    to a point prescription which matches the number of input theories.
+    If 5 theories are provided, a scheme 'bar' or 'nobar' must be
+    chosen in the runcard in order to specify the prescription. Sub-matrices
+    correspond to applying the scale variation prescription to each pair of
+    processes in turn, using a different procedure for the case where the
+    processes are the same relative to when they are different."""
+
+    process_info = combine_by_type
+    running_index = 0
+    start_proc = defaultdict(list)
+    for name in process_info.preds:
+        size = len(process_info.preds[name][0])
+        start_proc[name] = running_index
+        running_index += size
+    import ipdb
+
+    ipdb.set_trace()
+
+    l = len(theoryids)
+    process_info = combine_by_type
+    covmats = defaultdict(list)
+    for name1 in process_info.preds:
+        for name2 in process_info.preds:
+            central1, *others1 = process_info.preds[name1]
+            deltas1 = list((other - central1 for other in others1))
+            central2, *others2 = process_info.preds[name2]
+            deltas2 = list((other - central2 for other in others2))
+            s = compute_covs_pt_prescrip(
+                point_prescription, l, name1, deltas1, name2, deltas2, fivetheories, seventheories
+            )
+            start_locs = (start_proc[name1], start_proc[name2])
+            covmats[start_locs] = s
+    return covmats
+
+
+@table
+def theory_covmat_custom(covs_pt_prescrip, covmap, procs_index):
+    """Takes the individual sub-covmats between each two processes and assembles
+    them into a full covmat. Then reshuffles the order from ordering by process
+    to ordering by experiment as listed in the runcard"""
+    matlength = int(
+        sum([len(covmat) for covmat in covs_pt_prescrip.values()])
+        / int(np.sqrt(len(covs_pt_prescrip)))
+    )
+    # Initialise arrays of zeros and set precision to same as FK tables
+    mat = np.zeros((matlength, matlength), dtype=np.float32)
+    cov_by_exp = np.zeros((matlength, matlength), dtype=np.float32)
+    for locs in covs_pt_prescrip:
+        cov = covs_pt_prescrip[locs]
+        mat[locs[0] : (len(cov) + locs[0]), locs[1] : (len(cov.T) + locs[1])] = cov
+    for i in range(matlength):
+        for j in range(matlength):
+            cov_by_exp[covmap[i]][covmap[j]] = mat[i][j]
+    df = pd.DataFrame(cov_by_exp, index=procs_index, columns=procs_index)
+    return df
+
+
 def covmat_3fpt(name1, name2, deltas1, deltas2):
     """Returns theory covariance sub-matrix for 3pt factorisation
     scale variation *only*, given two dataset names and collections
@@ -501,62 +546,6 @@ def compute_covs_pt_prescrip(
     return s
 
 
-@check_correct_theory_combination
-def covs_pt_prescrip(
-    combine_by_type,
-    process_starting_points,
-    theoryids,
-    point_prescription,
-    fivetheories,
-    seventheories,
-):
-    """Produces the sub-matrices of the theory covariance matrix according
-    to a point prescription which matches the number of input theories.
-    If 5 theories are provided, a scheme 'bar' or 'nobar' must be
-    chosen in the runcard in order to specify the prescription. Sub-matrices
-    correspond to applying the scale variation prescription to each pair of
-    processes in turn, using a different procedure for the case where the
-    processes are the same relative to when they are different."""
-    l = len(theoryids)
-    start_proc = process_starting_points
-    process_info = combine_by_type
-    covmats = defaultdict(list)
-    for name1 in process_info.theory:
-        for name2 in process_info.theory:
-            central1, *others1 = process_info.theory[name1]
-            deltas1 = list((other - central1 for other in others1))
-            central2, *others2 = process_info.theory[name2]
-            deltas2 = list((other - central2 for other in others2))
-            s = compute_covs_pt_prescrip(
-                point_prescription, l, name1, deltas1, name2, deltas2, fivetheories, seventheories
-            )
-            start_locs = (start_proc[name1], start_proc[name2])
-            covmats[start_locs] = s
-    return covmats
-
-
-@table
-def theory_covmat_custom(covs_pt_prescrip, covmap, procs_index):
-    """Takes the individual sub-covmats between each two processes and assembles
-    them into a full covmat. Then reshuffles the order from ordering by process
-    to ordering by experiment as listed in the runcard"""
-    matlength = int(
-        sum([len(covmat) for covmat in covs_pt_prescrip.values()])
-        / int(np.sqrt(len(covs_pt_prescrip)))
-    )
-    # Initialise arrays of zeros and set precision to same as FK tables
-    mat = np.zeros((matlength, matlength), dtype=np.float32)
-    cov_by_exp = np.zeros((matlength, matlength), dtype=np.float32)
-    for locs in covs_pt_prescrip:
-        cov = covs_pt_prescrip[locs]
-        mat[locs[0] : (len(cov) + locs[0]), locs[1] : (len(cov.T) + locs[1])] = cov
-    for i in range(matlength):
-        for j in range(matlength):
-            cov_by_exp[covmap[i]][covmap[j]] = mat[i][j]
-    df = pd.DataFrame(cov_by_exp, index=procs_index, columns=procs_index)
-    return df
-
-
 @table
 def fromfile_covmat(covmatpath, procs_data, procs_index):
     """Reads a general theory covariance matrix from file. Then

From df2172831519d3f7b98e1c3fc87c5752fd276443 Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Tue, 19 Dec 2023 14:37:19 +0000
Subject: [PATCH 03/14] update theory_covmat_custom

---
 .../theorycovariance/construction.py          | 60 ++++++++-----------
 1 file changed, 25 insertions(+), 35 deletions(-)

diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 5524cb80f0..f826f8c37a 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -225,27 +225,6 @@ def combine_by_type(each_dataset_results_central_bytheory, data_input):
     return process_info
 
 
-def covmap(combine_by_type, data_input):
-    """Creates a map between the covmat indices from matrices ordered by
-    process to matrices ordered by experiment as listed in the runcard"""
-    mapping = defaultdict(list)
-    start_exp = defaultdict(list)
-    process_info = combine_by_type
-    running_index = 0
-    for dataset in data_input:
-        name = dataset.name
-        size = process_info.sizes[name]
-        start_exp[name] = running_index
-        running_index += size
-    start = 0
-    names_by_proc_list = [item for sublist in process_info.namelist.values() for item in sublist]
-    for name in names_by_proc_list:
-        for i in range(process_info.sizes[name]):
-            mapping[start + i] = start_exp[name] + i
-        start += process_info.sizes[name]
-    return mapping
-
-
 @check_correct_theory_combination
 def covs_pt_prescrip(combine_by_type, theoryids, point_prescription, fivetheories, seventheories):
     """Produces the sub-matrices of the theory covariance matrix according
@@ -263,9 +242,6 @@ def covs_pt_prescrip(combine_by_type, theoryids, point_prescription, fivetheorie
         size = len(process_info.preds[name][0])
         start_proc[name] = running_index
         running_index += size
-    import ipdb
-
-    ipdb.set_trace()
 
     l = len(theoryids)
     process_info = combine_by_type
@@ -285,24 +261,38 @@ def covs_pt_prescrip(combine_by_type, theoryids, point_prescription, fivetheorie
 
 
 @table
-def theory_covmat_custom(covs_pt_prescrip, covmap, procs_index):
+def theory_covmat_custom(covs_pt_prescrip, procs_index, combine_by_type):
     """Takes the individual sub-covmats between each two processes and assembles
     them into a full covmat. Then reshuffles the order from ordering by process
     to ordering by experiment as listed in the runcard"""
-    matlength = int(
-        sum([len(covmat) for covmat in covs_pt_prescrip.values()])
-        / int(np.sqrt(len(covs_pt_prescrip)))
-    )
+    process_info = combine_by_type
+
+    # the order is important for the construction of comvat_index below
+    if procs_index.names != ['group', 'dataset', 'id']:
+        raise ValueError
+
+    # construct covmat_index based on the order of experiments as they are in combine_by_type
+    indexlist = []
+    for procname in process_info.preds:
+        for expname in process_info.namelist[procname]:
+            for ind in procs_index:
+                # we need procs_index for the datapoint ids of the datapoints that survived the cuts
+                # or do we just assume they're the same as for the exp covmat? Perhaps this
+                # additional layer is a bit pointless.
+                if ind[0] == procname and ind[1] == expname:
+                    data_id = ind[2]
+                    indexlist.append((procname, expname, data_id))
+    # Is this always the exact same as procs index? This depends on how procs_index orders datasets
+    # within a process. 
+    covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names)
+
     # Initialise arrays of zeros and set precision to same as FK tables
-    mat = np.zeros((matlength, matlength), dtype=np.float32)
-    cov_by_exp = np.zeros((matlength, matlength), dtype=np.float32)
+    total_datapoints = sum(combine_by_type.sizes.values())
+    mat = np.zeros((total_datapoints, total_datapoints), dtype=np.float32)
     for locs in covs_pt_prescrip:
         cov = covs_pt_prescrip[locs]
         mat[locs[0] : (len(cov) + locs[0]), locs[1] : (len(cov.T) + locs[1])] = cov
-    for i in range(matlength):
-        for j in range(matlength):
-            cov_by_exp[covmap[i]][covmap[j]] = mat[i][j]
-    df = pd.DataFrame(cov_by_exp, index=procs_index, columns=procs_index)
+    df = pd.DataFrame(mat, index=covmat_index, columns=covmat_index)
     return df
 
 

From a10f84736f89f46668029ac34913009e92d4deb5 Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Tue, 19 Dec 2023 14:39:17 +0000
Subject: [PATCH 04/14] move back functions to clarify diffs

---
 .../theorycovariance/construction.py          | 142 +++++++++---------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index f826f8c37a..362b5e005e 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -225,77 +225,6 @@ def combine_by_type(each_dataset_results_central_bytheory, data_input):
     return process_info
 
 
-@check_correct_theory_combination
-def covs_pt_prescrip(combine_by_type, theoryids, point_prescription, fivetheories, seventheories):
-    """Produces the sub-matrices of the theory covariance matrix according
-    to a point prescription which matches the number of input theories.
-    If 5 theories are provided, a scheme 'bar' or 'nobar' must be
-    chosen in the runcard in order to specify the prescription. Sub-matrices
-    correspond to applying the scale variation prescription to each pair of
-    processes in turn, using a different procedure for the case where the
-    processes are the same relative to when they are different."""
-
-    process_info = combine_by_type
-    running_index = 0
-    start_proc = defaultdict(list)
-    for name in process_info.preds:
-        size = len(process_info.preds[name][0])
-        start_proc[name] = running_index
-        running_index += size
-
-    l = len(theoryids)
-    process_info = combine_by_type
-    covmats = defaultdict(list)
-    for name1 in process_info.preds:
-        for name2 in process_info.preds:
-            central1, *others1 = process_info.preds[name1]
-            deltas1 = list((other - central1 for other in others1))
-            central2, *others2 = process_info.preds[name2]
-            deltas2 = list((other - central2 for other in others2))
-            s = compute_covs_pt_prescrip(
-                point_prescription, l, name1, deltas1, name2, deltas2, fivetheories, seventheories
-            )
-            start_locs = (start_proc[name1], start_proc[name2])
-            covmats[start_locs] = s
-    return covmats
-
-
-@table
-def theory_covmat_custom(covs_pt_prescrip, procs_index, combine_by_type):
-    """Takes the individual sub-covmats between each two processes and assembles
-    them into a full covmat. Then reshuffles the order from ordering by process
-    to ordering by experiment as listed in the runcard"""
-    process_info = combine_by_type
-
-    # the order is important for the construction of comvat_index below
-    if procs_index.names != ['group', 'dataset', 'id']:
-        raise ValueError
-
-    # construct covmat_index based on the order of experiments as they are in combine_by_type
-    indexlist = []
-    for procname in process_info.preds:
-        for expname in process_info.namelist[procname]:
-            for ind in procs_index:
-                # we need procs_index for the datapoint ids of the datapoints that survived the cuts
-                # or do we just assume they're the same as for the exp covmat? Perhaps this
-                # additional layer is a bit pointless.
-                if ind[0] == procname and ind[1] == expname:
-                    data_id = ind[2]
-                    indexlist.append((procname, expname, data_id))
-    # Is this always the exact same as procs index? This depends on how procs_index orders datasets
-    # within a process. 
-    covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names)
-
-    # Initialise arrays of zeros and set precision to same as FK tables
-    total_datapoints = sum(combine_by_type.sizes.values())
-    mat = np.zeros((total_datapoints, total_datapoints), dtype=np.float32)
-    for locs in covs_pt_prescrip:
-        cov = covs_pt_prescrip[locs]
-        mat[locs[0] : (len(cov) + locs[0]), locs[1] : (len(cov.T) + locs[1])] = cov
-    df = pd.DataFrame(mat, index=covmat_index, columns=covmat_index)
-    return df
-
-
 def covmat_3fpt(name1, name2, deltas1, deltas2):
     """Returns theory covariance sub-matrix for 3pt factorisation
     scale variation *only*, given two dataset names and collections
@@ -536,6 +465,77 @@ def compute_covs_pt_prescrip(
     return s
 
 
+@check_correct_theory_combination
+def covs_pt_prescrip(combine_by_type, theoryids, point_prescription, fivetheories, seventheories):
+    """Produces the sub-matrices of the theory covariance matrix according
+    to a point prescription which matches the number of input theories.
+    If 5 theories are provided, a scheme 'bar' or 'nobar' must be
+    chosen in the runcard in order to specify the prescription. Sub-matrices
+    correspond to applying the scale variation prescription to each pair of
+    processes in turn, using a different procedure for the case where the
+    processes are the same relative to when they are different."""
+
+    process_info = combine_by_type
+    running_index = 0
+    start_proc = defaultdict(list)
+    for name in process_info.preds:
+        size = len(process_info.preds[name][0])
+        start_proc[name] = running_index
+        running_index += size
+
+    l = len(theoryids)
+    process_info = combine_by_type
+    covmats = defaultdict(list)
+    for name1 in process_info.preds:
+        for name2 in process_info.preds:
+            central1, *others1 = process_info.preds[name1]
+            deltas1 = list((other - central1 for other in others1))
+            central2, *others2 = process_info.preds[name2]
+            deltas2 = list((other - central2 for other in others2))
+            s = compute_covs_pt_prescrip(
+                point_prescription, l, name1, deltas1, name2, deltas2, fivetheories, seventheories
+            )
+            start_locs = (start_proc[name1], start_proc[name2])
+            covmats[start_locs] = s
+    return covmats
+
+
+@table
+def theory_covmat_custom(covs_pt_prescrip, procs_index, combine_by_type):
+    """Takes the individual sub-covmats between each two processes and assembles
+    them into a full covmat. Then reshuffles the order from ordering by process
+    to ordering by experiment as listed in the runcard"""
+    process_info = combine_by_type
+
+    # the order is important for the construction of comvat_index below
+    if procs_index.names != ['group', 'dataset', 'id']:
+        raise ValueError
+
+    # construct covmat_index based on the order of experiments as they are in combine_by_type
+    indexlist = []
+    for procname in process_info.preds:
+        for expname in process_info.namelist[procname]:
+            for ind in procs_index:
+                # we need procs_index for the datapoint ids of the datapoints that survived the cuts
+                # or do we just assume they're the same as for the exp covmat? Perhaps this
+                # additional layer is a bit pointless.
+                if ind[0] == procname and ind[1] == expname:
+                    data_id = ind[2]
+                    indexlist.append((procname, expname, data_id))
+    # Is this always the exact same as procs index? This depends on how procs_index orders datasets
+    # within a process. 
+    covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names)
+
+    # Initialise arrays of zeros and set precision to same as FK tables
+    total_datapoints = sum(combine_by_type.sizes.values())
+    mat = np.zeros((total_datapoints, total_datapoints), dtype=np.float32)
+    for locs in covs_pt_prescrip:
+        cov = covs_pt_prescrip[locs]
+        mat[locs[0] : (len(cov) + locs[0]), locs[1] : (len(cov.T) + locs[1])] = cov
+    df = pd.DataFrame(mat, index=covmat_index, columns=covmat_index)
+    return df
+
+
 @table
 def fromfile_covmat(covmatpath, procs_data, procs_index):
     """Reads a general theory covariance matrix from file. Then

From d4a3e5935a10651606f66218016a25ec74d7850a Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Tue, 19 Dec 2023 15:07:21 +0000
Subject: [PATCH 05/14] tiny cleanup

---
 validphys2/src/validphys/theorycovariance/construction.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 362b5e005e..0e0891097a 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -483,8 +483,6 @@ def covs_pt_prescrip(combine_by_type, theoryids, point_prescription, fivetheorie
         start_proc[name] = running_index
         running_index += size
 
-    l = len(theoryids)
-    process_info = combine_by_type
     covmats = defaultdict(list)
     for name1 in process_info.preds:
         for name2 in process_info.preds:
@@ -493,7 +491,7 @@ def covs_pt_prescrip(combine_by_type, theoryids, point_prescription, fivetheorie
             central2, *others2 = process_info.preds[name2]
             deltas2 = list((other - central2 for other in others2))
             s = compute_covs_pt_prescrip(
-                point_prescription, l, name1, deltas1, name2, deltas2, fivetheories, seventheories
+                point_prescription, len(theoryids), name1, deltas1, name2, deltas2, fivetheories, seventheories
             )
             start_locs = (start_proc[name1], start_proc[name2])
             covmats[start_locs] = s

From e6d674d1f9e6848794f16fd1db6443fc91070858 Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Tue, 19 Dec 2023 15:21:07 +0000
Subject: [PATCH 06/14] remove data_input as input to combine_by_type

---
 validphys2/src/validphys/app.py                   |  2 +-
 .../validphys/theorycovariance/construction.py    | 15 ++++-----------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/validphys2/src/validphys/app.py b/validphys2/src/validphys/app.py
index 19f9f1d41f..0d47897bc1 100644
--- a/validphys2/src/validphys/app.py
+++ b/validphys2/src/validphys/app.py
@@ -39,7 +39,7 @@
     "validphys.paramfits.plots",
     "validphys.theorycovariance.construction",
     "validphys.theorycovariance.output",
-    "validphys.theorycovariance.tests",
+    # "validphys.theorycovariance.tests",
     "validphys.replica_selector",
     "validphys.closuretest",
     "validphys.mc_gen",
diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 0e0891097a..444137dad3 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -183,16 +183,13 @@ def total_covmat_procs(procs_results_theory, fivetheories):
 ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes"))
 
 
-def combine_by_type(each_dataset_results_central_bytheory, data_input):
+def combine_by_type(each_dataset_results_central_bytheory):
     """Groups the datasets according to processes returns an instance of the ProcessInfo class
 
     Parameters
     ----------
     each_dataset_results_central_bytheory: list(list((DataResult,ThPredictionsResult)))
 
-    data_input: list(DataSetInput)
-        List with DatasetInput as order in the runcard
-
     Returns
     -------
     ProcesInfo
@@ -206,12 +203,8 @@ def combine_by_type(each_dataset_results_central_bytheory, data_input):
     dataset_size = defaultdict(list)
     theories_by_process = defaultdict(list)
     ordered_names = defaultdict(list)
-    for dataset, datain in zip(each_dataset_results_central_bytheory, data_input):
-        name = datain.name
-        # A difference in ordering of each_dataset_results_central_bytheory and
-        # data_input has caused problems before, so let's explicitly check
-        if name != dataset[0][0].name:
-            raise ValueError
+    for dataset in each_dataset_results_central_bytheory:
+        name = dataset[0][0].name
         theory_centrals = [x[1].central_value for x in dataset]
         dataset_size[name] = len(theory_centrals[0])
         proc_type = process_lookup(name)
@@ -521,7 +514,7 @@ def theory_covmat_custom(covs_pt_prescrip, procs_index, combine_by_type):
                     data_id = ind[2]
                     indexlist.append((procname, expname, data_id))
     # Is this always the exact same as procs index? This depends on how procs_index orders datasets
-    # within a process. 
+    # within a process.
     covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names)
 
     # Initialise arrays of zeros and set precision to same as FK tables

From 206881f5b0e19cbcbc8b5db60a90d5db9a341806 Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Tue, 19 Dec 2023 15:28:06 +0000
Subject: [PATCH 07/14] reindex covmat_custom normalization

---
 validphys2/src/validphys/theorycovariance/construction.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 444137dad3..4c13793e42 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -745,7 +745,8 @@ def theory_normcovmat_custom(theory_covmat_custom, procs_data_values):
     """Calculates the theory covariance matrix for scale variations normalised
     to data, with variations according to the relevant prescription."""
     df = theory_covmat_custom
-    mat = df / np.outer(procs_data_values, procs_data_values)
+    vals = procs_data_values.reindex(df.index)
+    mat = df / np.outer(vals, vals)
     return mat
 
 

From 6ae00701b3b3536db71d430e4432236e338b9ef1 Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Tue, 19 Dec 2023 15:33:26 +0000
Subject: [PATCH 08/14] clean up laoding dataset name to Results

---
 validphys2/src/validphys/results.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
index 02dc9bb014..0bb9685b44 100644
--- a/validphys2/src/validphys/results.py
+++ b/validphys2/src/validphys/results.py
@@ -75,7 +75,7 @@ def __init__(self, dataset, covmat, sqrtcovmat):
         stats = Stats(self._central_value)
         self._covmat = covmat
         self._sqrtcovmat = sqrtcovmat
-        self._name = dataset.name
+        self._dataset = dataset
         super().__init__(stats)
 
     @property
@@ -101,7 +101,7 @@ def sqrtcovmat(self):
 
     @property
     def name(self):
-        return self._name
+        return self._dataset.name
 
 class ThPredictionsResult(StatsResult):
     """Class holding theory prediction, inherits from StatsResult

From 5df26ce3a8bb0be3a7fe067ca7e3e92ad63355ad Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Tue, 19 Dec 2023 15:34:15 +0000
Subject: [PATCH 09/14] correct expname to datasetname

---
 validphys2/src/validphys/theorycovariance/construction.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 4c13793e42..08deabae6e 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -505,14 +505,14 @@ def theory_covmat_custom(covs_pt_prescrip, procs_index, combine_by_type):
     # construct covmat_index based on the order of experiments as they are in combine_by_type
     indexlist = []
     for procname in process_info.preds:
-        for expname in process_info.namelist[procname]:
+        for datasetname in process_info.namelist[procname]:
             for ind in procs_index:
                 # we need procs_index for the datapoint ids of the datapoints that survived the cuts
                 # or do we just assume they're the same as for the exp covmat? Perhaps this
                 # additional layer is a bit pointless.
-                if ind[0] == procname and ind[1] == expname:
+                if ind[0] == procname and ind[1] == datasetname:
                     data_id = ind[2]
-                    indexlist.append((procname, expname, data_id))
+                    indexlist.append((procname, datasetname, data_id))
     # Is this always the exact same as procs index? This depends on how procs_index orders datasets
     # within a process.
     covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names)

From bc7fe567dc0778dc892e7811b93f7e9eb513b02a Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Tue, 19 Dec 2023 18:31:51 +0000
Subject: [PATCH 10/14] apply suggestion

---
 .../theorycovariance/construction.py          | 23 ++++++-------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 08deabae6e..5d86f62f3f 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -498,31 +498,22 @@ def theory_covmat_custom(covs_pt_prescrip, procs_index, combine_by_type):
     to ordering by experiment as listed in the runcard"""
     process_info = combine_by_type
 
-    # the order is important for the construction of comvat_index below
-    if procs_index.names != ['group', 'dataset', 'id']:
-        raise ValueError
-
     # construct covmat_index based on the order of experiments as they are in combine_by_type
     indexlist = []
     for procname in process_info.preds:
         for datasetname in process_info.namelist[procname]:
-            for ind in procs_index:
-                # we need procs_index for the datapoint ids of the datapoints that survived the cuts
-                # or do we just assume they're the same as for the exp covmat? Perhaps this
-                # additional layer is a bit pointless.
-                if ind[0] == procname and ind[1] == datasetname:
-                    data_id = ind[2]
-                    indexlist.append((procname, datasetname, data_id))
-    # Is this always the exact same as procs index? This depends on how procs_index orders datasets
-    # within a process.
+            slicer = procs_index.get_locs((procname, datasetname))
+            indexlist += procs_index[slicer].to_list()
+    # Is this always the exact same as procs_index? In that case we could just use that one, but
+    # that depends on how procs_index orders datasets within a process.
     covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names)
 
     # Initialise arrays of zeros and set precision to same as FK tables
     total_datapoints = sum(combine_by_type.sizes.values())
     mat = np.zeros((total_datapoints, total_datapoints), dtype=np.float32)
-    for locs in covs_pt_prescrip:
-        cov = covs_pt_prescrip[locs]
-        mat[locs[0] : (len(cov) + locs[0]), locs[1] : (len(cov.T) + locs[1])] = cov
+    for locs, cov in covs_pt_prescrip.items():
+        xsize, ysize = cov.shape
+        mat[locs[0] : locs[0] + xsize, locs[1] : locs[1] + ysize] = cov
     df = pd.DataFrame(mat, index=covmat_index, columns=covmat_index)
     return df
 

From 4a7f1c691e1f47fdcca070ecc30000d52d3f887f Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Wed, 20 Dec 2023 09:51:04 +0000
Subject: [PATCH 11/14] clean up theorycovariance.tests

---
 validphys2/src/validphys/app.py               |  2 +-
 .../src/validphys/theorycovariance/tests.py   | 45 -------------------
 2 files changed, 1 insertion(+), 46 deletions(-)

diff --git a/validphys2/src/validphys/app.py b/validphys2/src/validphys/app.py
index 0d47897bc1..19f9f1d41f 100644
--- a/validphys2/src/validphys/app.py
+++ b/validphys2/src/validphys/app.py
@@ -39,7 +39,7 @@
     "validphys.paramfits.plots",
     "validphys.theorycovariance.construction",
     "validphys.theorycovariance.output",
-    # "validphys.theorycovariance.tests",
+    "validphys.theorycovariance.tests",
     "validphys.replica_selector",
     "validphys.closuretest",
     "validphys.mc_gen",
diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py
index b5b302ddd1..2e3d4d12ba 100644
--- a/validphys2/src/validphys/theorycovariance/tests.py
+++ b/validphys2/src/validphys/theorycovariance/tests.py
@@ -21,15 +21,10 @@
 from validphys.checks import check_two_dataspecs
 from validphys.theorycovariance.construction import (
     combine_by_type,
-    covmap,
-    covs_pt_prescrip,
-    process_starting_points,
     theory_corrmat_singleprocess,
-    theory_covmat_custom,
 )
 from validphys.theorycovariance.output import _get_key, matrix_plot_labels
 from validphys.theorycovariance.theorycovarianceutils import (
-    check_correct_theory_combination_dataspecs,
     check_correct_theory_combination_theoryconfig,
     process_lookup,
 )
@@ -147,36 +142,6 @@ def combine_by_type_dataspecs(all_matched_results, matched_dataspecs_dataset_nam
 dataspecs_theoryids = collect("theoryid", ["theoryconfig", "original", "dataspecs"])
 
 
-def process_starting_points_dataspecs(combine_by_type_dataspecs):
-    """Like process_starting_points but for matched dataspecs."""
-    return process_starting_points(combine_by_type_dataspecs)
-
-
-@check_correct_theory_combination_dataspecs
-def covs_pt_prescrip_dataspecs(
-    combine_by_type_dataspecs,
-    process_starting_points_dataspecs,
-    dataspecs_theoryids,
-    point_prescription,
-    fivetheories,
-    seventheories,
-):
-    """Like covs_pt_prescrip but for matched dataspecs."""
-    return covs_pt_prescrip(
-        combine_by_type_dataspecs,
-        process_starting_points_dataspecs,
-        dataspecs_theoryids,
-        point_prescription,
-        fivetheories,
-        seventheories,
-    )
-
-
-def covmap_dataspecs(combine_by_type_dataspecs, matched_dataspecs_dataset_name):
-    """Like covmap but for matched dataspecs."""
-    return covmap(combine_by_type_dataspecs, matched_dataspecs_dataset_name)
-
-
 matched_dataspecs_process = collect("process", ["dataspecs"])
 matched_dataspecs_dataset_name = collect("dataset_name", ["dataspecs"])
 matched_cuts_datasets = collect("dataset", ["dataspecs"])
@@ -204,16 +169,6 @@ def matched_experiments_index(matched_dataspecs_dataset_name, all_matched_data_l
     return index
 
 
-@table
-def theory_covmat_custom_dataspecs(
-    covs_pt_prescrip_dataspecs, covmap_dataspecs, matched_experiments_index
-):
-    """Like theory_covmat_custom but for matched dataspecs."""
-    return theory_covmat_custom(
-        covs_pt_prescrip_dataspecs, covmap_dataspecs, matched_experiments_index
-    )
-
-
 thx_corrmat = collect(
     "theory_corrmat_custom_dataspecs", ["combined_shift_and_theory_dataspecs", "theoryconfig"]
 )

From 48632aaae790648680fd20e6d72a2c851c8dd72e Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Wed, 20 Dec 2023 11:02:08 +0000
Subject: [PATCH 12/14] update cobmine_by_type docstring

---
 .../theorycovariance/construction.py          | 26 ++++++++++++-------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 5d86f62f3f..76bcd798e4 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -184,21 +184,19 @@ def total_covmat_procs(procs_results_theory, fivetheories):
 
 
 def combine_by_type(each_dataset_results_central_bytheory):
-    """Groups the datasets according to processes returns an instance of the ProcessInfo class
+    """Groups the datasets bu process and returns an instance of the ProcessInfo class
 
     Parameters
     ----------
-    each_dataset_results_central_bytheory: list(list((DataResult,ThPredictionsResult)))
+    each_dataset_results_central_bytheory: list[list[(DataResult,ThPredictionsResult)]]
+        Tuples of DataResult and ThPredictionsResult (where only the second is used for the
+        construction of the theory covariance matrix), wrapped in a list such that there is a tuple
+        per theoryid, wrapped in another list per dataset.
 
     Returns
     -------
-    ProcesInfo
+    :ProcesInfo :py:class:`validphys.theorycovariance.construction.ProcessInfo`
         Class with info needed to construct the theory covmat.
-
-    Raises
-    ------
-    ValueError
-        If the order is of the inputs are not the same
     """
     dataset_size = defaultdict(list)
     theories_by_process = defaultdict(list)
@@ -215,6 +213,9 @@ def combine_by_type(each_dataset_results_central_bytheory):
     process_info = ProcessInfo(
         preds=theories_by_process, namelist=ordered_names, sizes=dataset_size
     )
+    import ipdb
+
+    ipdb.set_trace()
     return process_info
 
 
@@ -484,7 +485,14 @@ def covs_pt_prescrip(combine_by_type, theoryids, point_prescription, fivetheorie
             central2, *others2 = process_info.preds[name2]
             deltas2 = list((other - central2 for other in others2))
             s = compute_covs_pt_prescrip(
-                point_prescription, len(theoryids), name1, deltas1, name2, deltas2, fivetheories, seventheories
+                point_prescription,
+                len(theoryids),
+                name1,
+                deltas1,
+                name2,
+                deltas2,
+                fivetheories,
+                seventheories,
             )
             start_locs = (start_proc[name1], start_proc[name2])
             covmats[start_locs] = s

From 0758bb63c7502687f3486fc9f8702631674cbe18 Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Wed, 20 Dec 2023 11:02:40 +0000
Subject: [PATCH 13/14] remove debugger statement

---
 validphys2/src/validphys/theorycovariance/construction.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 76bcd798e4..382d12063c 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -213,9 +213,6 @@ def combine_by_type(each_dataset_results_central_bytheory):
     process_info = ProcessInfo(
         preds=theories_by_process, namelist=ordered_names, sizes=dataset_size
     )
-    import ipdb
-
-    ipdb.set_trace()
     return process_info
 
 

From 570047f711e1ce92a052f1bfd18b83afea60684d Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Wed, 20 Dec 2023 11:15:46 +0000
Subject: [PATCH 14/14] add some comments to function and remove deprecated
 functions

---
 .../theorycovariance/construction.py          | 53 ++-----------------
 1 file changed, 5 insertions(+), 48 deletions(-)

diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py
index 382d12063c..78e02cf99f 100644
--- a/validphys2/src/validphys/theorycovariance/construction.py
+++ b/validphys2/src/validphys/theorycovariance/construction.py
@@ -98,50 +98,6 @@ def theory_covmat_dataset(
     return thcovmat
 
 
-# @check_correct_theory_combination
-# def theory_covmat_datasets(each_dataset_results_central_bytheory, fivetheories):
-#     """Produces an array of theory covariance matrices. Each matrix corresponds
-#     to a different dataset, which must be specified in the runcard."""
-#     dataset_covmats = []
-#     for dataset in each_dataset_results_central_bytheory:
-#         theory_centrals = [x[1].central_value for x in dataset]
-#         s = make_scale_var_covmat(theory_centrals)
-#         dataset_covmats.append(s)
-#     return dataset_covmats
-
-
-# @check_correct_theory_combination
-# def total_covmat_datasets(each_dataset_results_central_bytheory, fivetheories):
-#     """Produces an array of total covariance matrices; the sum of experimental
-#     and scale-varied theory covariance matrices. Each matrix corresponds
-#     to a different dataset, which must be specified in the runcard.
-#     These are needed for calculation of chi2 per dataset."""
-#     dataset_covmats = []
-#     for dataset in each_dataset_results_central_bytheory:
-#         theory_centrals = [x[1].central_value for x in dataset]
-#         s = make_scale_var_covmat(theory_centrals)
-#         sigma = dataset[0][0].covmat
-#         cov = s + sigma
-#         dataset_covmats.append(cov)
-#     return dataset_covmats
-
-
-# @check_correct_theory_combination
-# def total_covmat_diagtheory_datasets(each_dataset_results_central_bytheory, fivetheories):
-#     """Same as total_covmat_theory_datasets but for diagonal theory only"""
-#     dataset_covmats = []
-#     for dataset in each_dataset_results_central_bytheory:
-#         theory_centrals = [x[1].central_value for x in dataset]
-#         s = make_scale_var_covmat(theory_centrals)
-#         # Initialise array of zeros and set precision to same as FK tables
-#         s_diag = np.zeros((len(s), len(s)), dtype=np.float32)
-#         np.fill_diagonal(s_diag, np.diag(s))
-#         sigma = dataset[0][0].covmat
-#         cov = s_diag + sigma
-#         dataset_covmats.append(cov)
-#     return dataset_covmats
-
-
 @table
 def theory_block_diag_covmat(theory_covmat_datasets, procs_index):
     """Takes the theory covariance matrices for individual datasets and
@@ -503,17 +459,18 @@ def theory_covmat_custom(covs_pt_prescrip, procs_index, combine_by_type):
     to ordering by experiment as listed in the runcard"""
     process_info = combine_by_type
 
-    # construct covmat_index based on the order of experiments as they are in combine_by_type
+    # Construct a covmat_index based on the order of experiments as they are in combine_by_type
+    # NOTE: maybe the ordering of covmat_index is always the same as that of procs_index? 
+    # Regardless, we don't want to open ourselves up to the risk of the ordering of procs_index
+    # changing and breaking this function
     indexlist = []
     for procname in process_info.preds:
         for datasetname in process_info.namelist[procname]:
             slicer = procs_index.get_locs((procname, datasetname))
             indexlist += procs_index[slicer].to_list()
-    # Is this always the exact same as procs_index? In that case we could just use that one, but
-    # that depends on how procs_index orders datasets within a process.
     covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names)
 
-    # Initialise arrays of zeros and set precision to same as FK tables
+    # Put the covariance matrices between two process into a single covariance matrix
     total_datapoints = sum(combine_by_type.sizes.values())
     mat = np.zeros((total_datapoints, total_datapoints), dtype=np.float32)
     for locs, cov in covs_pt_prescrip.items():