From e38e54aa4a7c15e2b6c2d64292fe867652ea1607 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Tue, 14 Apr 2026 14:20:12 +0100
Subject: [PATCH 01/29] writing pseudodata in diagonal basis and saving eigvecs
 and eigvals as part of vp-setupfit

---
 doc/sphinx/source/n3fit/runcard_detailed.rst  |   6 +
 n3fit/src/n3fit/scripts/vp_setupfit.py        |   4 +-
 validphys2/src/validphys/n3fit_data.py        |  62 +++++++++-
 validphys2/src/validphys/overfit_metric.py    |  44 +++++--
 validphys2/src/validphys/pseudodata.py        | 115 ++++++++++++++----
 .../src/validphys/tests/test_pseudodata.py    |   7 ++
 6 files changed, 197 insertions(+), 41 deletions(-)
diff --git a/doc/sphinx/source/n3fit/runcard_detailed.rst b/doc/sphinx/source/n3fit/runcard_detailed.rst
index f5ea1643eb..0db00dfaad 100644
--- a/doc/sphinx/source/n3fit/runcard_detailed.rst
+++ b/doc/sphinx/source/n3fit/runcard_detailed.rst
@@ -429,6 +429,12 @@ according to their experiment. Additionally, the union of these two is saved in
 ``<fit_directory>/replica_<number>/datacuts_theory_fitting_pseudodata_table.csv``
 if one is not interested in the exact nature of the splitting.
 
+When ``diagonal_basis: true`` is used (by default), the saved pseudodata indices are labeled as
+``eigenmode <i>`` corresponding to the diagonal basis used in the fit. ``vp-setupfit`` writes one
+file with the diagonal-basis elements under:
+``<fit_directory>/tables/diagonal_basis_rotation.csv``.
+This file stores both the eigenvalues and eigenvectors used for the rotation.
+
 
 Imposing sum rules
 ^^^^^^^^^^^^^^^^^^
diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 8ed0d8766a..abfda732bb 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -51,6 +51,7 @@
     'validphys.commondata',
     'validphys.covmats',
     'validphys.filters',
+    'validphys.n3fit_data',
     'validphys.results',
     'validphys.theorycovariance.construction',
     'validphys.photon.compute',
@@ -169,10 +170,11 @@ def from_yaml(cls, o, *args, **kwargs):
             check_n3fit_action = 'datacuts::theory::closuretest::fitting n3fit_checks_action'
         else:
             filter_action = 'datacuts::theory::fitting filter'
+            rotation_action = 'datacuts::theory::fitting save_diagonal_basis_rotation'
             check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action'
 
         # The settings for these actions depend on the presence of closuretest
-        SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action]
+        SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action, rotation_action]
 
         # Check theory covariance matrix configuration
         thconfig = file_content.get('theorycovmatconfig', {})
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index b5c5e6e239..3588f76446 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -10,6 +10,7 @@
 import functools
 import hashlib
 import logging
+import pathlib
 
 import numpy as np
 import pandas as pd
@@ -21,6 +22,8 @@
 
 log = logging.getLogger(__name__)
 
+DIAGONAL_BASIS_ROTATION_FILENAME = "diagonal_basis_rotation.csv"
+
 
 class Hashrray(TupleComp):
     """Wrapper class to hash a numpy array so it can be cached."""
@@ -546,6 +549,35 @@ def fitting_data_dict(
 
 
 exps_fitting_data_dict = collect("fitting_data_dict", ("group_dataset_inputs_by_metadata",))
+groups_dataset_inputs_fitting_covmat = collect(
+    "dataset_inputs_fitting_covmat", ("group_dataset_inputs_by_metadata",)
+)
+
+
+def save_diagonal_basis_rotation(output_path, _inv_covmat_prepared, diagonal_basis=True):
+    """Store a single fit-level table with eigenvalues and eigenvectors.
+
+    The rows are globally indexed as ``eigenmode N`` to line up with diagonal-basis pseudodata.
+    """
+    if not diagonal_basis:
+        return None
+
+    table_path = pathlib.Path(output_path) / "tables"
+    table_path.mkdir(parents=True, exist_ok=True)
+
+    _, _, diagonal_rotation, eig_vals = _inv_covmat_prepared
+    if diagonal_rotation is None or eig_vals is None:
+        log.warning("No diagonal-basis rotation available; skipping table export.")
+        return None
+
+    df_rotation = pd.DataFrame(diagonal_rotation)
+    df_rotation.insert(0, "eig_val", eig_vals)
+    df_rotation.index = pd.Index([f"eigenmode {i}" for i in range(len(eig_vals))])
+
+    output_file = table_path / DIAGONAL_BASIS_ROTATION_FILENAME
+    df_rotation.to_csv(output_file, sep="\t", index=True)
+    log.info("Saved diagonal-basis rotation table to %s", output_file)
+    return output_file
 
 
 def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nnseed):
@@ -575,7 +607,9 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn
 )
 
 
-def replica_pseudodata(experiment_indexed_make_replica, replica):
+def replica_pseudodata(
+    experiment_indexed_make_replica, exps_fitting_data_dict, replica, diagonal_basis=True
+):
     """Creates a pandas DataFrame containing the generated pseudodata.
     The index is :py:func:`validphys.results.experiments_index` and the columns
     is the replica numbers.
@@ -586,9 +620,29 @@ def replica_pseudodata(experiment_indexed_make_replica, replica):
     `fitting::savepseudodata` is `true` (as per the default setting)
     The table can be found in the replica folder i.e. <fit dir>/nnfit/replica_*/
     """
-    df = pd.concat(experiment_indexed_make_replica)
-    df.columns = [f"replica {replica}"]
-    return df
+    replica_column = f"replica {replica}"
+
+    if not diagonal_basis:
+        df = pd.concat(experiment_indexed_make_replica)
+        df.columns = [replica_column]
+        return df
+
+    diagonal_frames = []
+    mode_counter = 0
+    for indexed_replica, fit_data in zip(experiment_indexed_make_replica, exps_fitting_data_dict):
+        values = indexed_replica.iloc[:, 0].to_numpy()
+        diag_rot = fit_data.get("data_transformation")
+        rotated_values = values if diag_rot is None else diag_rot @ values
+        next_counter = mode_counter + len(rotated_values)
+        eigenmode_index = pd.Index(
+            [f"eigenmode {i}" for i in range(mode_counter, next_counter)], name="eigenmode"
+        )
+        diagonal_frames.append(
+            pd.DataFrame(rotated_values, index=eigenmode_index, columns=[replica_column])
+        )
+        mode_counter = next_counter
+
+    return pd.concat(diagonal_frames)
 
 
 @_per_replica
diff --git a/validphys2/src/validphys/overfit_metric.py b/validphys2/src/validphys/overfit_metric.py
index 2d8c1d1c8f..9101e0a4ff 100644
--- a/validphys2/src/validphys/overfit_metric.py
+++ b/validphys2/src/validphys/overfit_metric.py
@@ -17,6 +17,7 @@
 from reportengine.table import table
 from validphys import plotutils
 from validphys.checks import check_at_least_two_replicas
+from validphys.pseudodata import load_diagonal_rotation_and_eigenvalues
 
 log = logging.getLogger(__name__)
 
@@ -39,6 +40,12 @@ def _create_new_val_pseudodata(pdf_data_index, fit_data_indices_list):
     return np.array(vl_data_fitrep)[:, :, 0]
 
 
+def _eigenmode_ids(eigenmode_index):
+    return np.array(
+        [int(str(label).replace("eigenmode ", "")) for label in eigenmode_index], dtype=int
+    )
+
+
 @check_at_least_two_replicas
 def calculate_chi2s_per_replica(
     pdf,  # for the check
@@ -47,6 +54,7 @@ def calculate_chi2s_per_replica(
     preds,
     dataset_inputs,
     groups_covmat_no_table,
+    fit,
 ):
     """Calculates, for each PDF replica, the chi2 of the validation with the
     pseudodata generated for all other replicas in the fit
@@ -86,28 +94,46 @@ def calculate_chi2s_per_replica(
 
     PDF_predictions = pd.concat(pp)
 
+    diagonal_mode = isinstance(
+        recreate_pdf_pseudodata_no_table[0].val_idx, pd.Index
+    ) and not isinstance(recreate_pdf_pseudodata_no_table[0].val_idx, pd.MultiIndex)
+    rotation = None
+    eigenvalues = None
+    if diagonal_mode:
+        rotation, eigenvalues = load_diagonal_rotation_and_eigenvalues(fit)
+
     chi2s_per_replica = []
     for enum, pdf_data_index in enumerate(recreate_pdf_pseudodata_no_table):
-        prediction_filter = pdf_data_index.val_idx.droplevel(level=0)
-        prediction_filter.rename(["dataset", "data"], inplace=True)
-        PDF_predictions_val = PDF_predictions.loc[prediction_filter]
-        PDF_predictions_val = PDF_predictions_val.values[:, enum]
+        if diagonal_mode and rotation is not None:
+            # Build full prediction vector in data space for this PDF replica and rotate it.
+            full_prediction = PDF_predictions.values[:, enum]
+            rotated_prediction = rotation @ full_prediction
+            mode_ids = _eigenmode_ids(pdf_data_index.val_idx)
+            PDF_predictions_val = rotated_prediction[mode_ids]
+        else:
+            prediction_filter = pdf_data_index.val_idx.droplevel(level=0)
+            prediction_filter.rename(["dataset", "data"], inplace=True)
+            PDF_predictions_val = PDF_predictions.loc[prediction_filter]
+            PDF_predictions_val = PDF_predictions_val.values[:, enum]
 
         new_val_pseudodata_list = _create_new_val_pseudodata(
             pdf_data_index, recreate_pdf_pseudodata_no_table
         )
 
-        invcovmat_vl = np.linalg.inv(
-            groups_covmat_no_table[pdf_data_index.val_idx].T[pdf_data_index.val_idx]
-        )
+        if diagonal_mode and eigenvalues is not None:
+            mode_ids = _eigenmode_ids(pdf_data_index.val_idx)
+            invcovmat_vl = np.diag(1.0 / eigenvalues[mode_ids])
+        else:
+            invcovmat_vl = np.linalg.inv(
+                groups_covmat_no_table[pdf_data_index.val_idx].T[pdf_data_index.val_idx]
+            )
 
         tmp = PDF_predictions_val - new_val_pseudodata_list
 
         chi2 = np.einsum("ij,jk,ik->i", tmp, invcovmat_vl, tmp) / tmp.shape[1]
         chi2s_per_replica.append(chi2)
-        ret = np.array(chi2s_per_replica)
 
-    return ret
+    return np.array(chi2s_per_replica)
 
 
 def array_expected_overfitting(
diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py
index de7dd36c69..c10db996a3 100644
--- a/validphys2/src/validphys/pseudodata.py
+++ b/validphys2/src/validphys/pseudodata.py
@@ -20,6 +20,8 @@
 
 log = logging.getLogger(__name__)
 
+DIAGONAL_BASIS_ROTATION_FILENAME = "diagonal_basis_rotation.csv"
+
 DataTrValSpec = namedtuple('DataTrValSpec', ['pseudodata', 'tr_idx', 'val_idx'])
 
 context_index = collect("groups_index", ("fitcontext",))
@@ -31,7 +33,7 @@ class ReplicaGenerationError(Exception):
     pass
 
 
-def read_replica_pseudodata(fit, context_index, replica):
+def read_replica_pseudodata(fit, context_index, replica, diagonal_basis=True):
     """Function to handle the reading of training and validation splits for a fit that has been
     produced with the ``savepseudodata`` flag set to ``True``.
 
@@ -70,11 +72,6 @@ def read_replica_pseudodata(fit, context_index, replica):
                             5    3.117819
                             6    0.771079
     """
-    # List of length 1 due to the collect
-    context_index = context_index[0]
-    # The [0] is because of how pandas handles sorting a MultiIndex
-    sorted_index = context_index.sortlevel(level=range(1, 3))[0]
-
     log.debug(f"Reading pseudodata & training/validation splits from {fit.name}.")
     replica_path = fit.path / "nnfit" / f"replica_{replica}"
 
@@ -88,8 +85,16 @@ def read_replica_pseudodata(fit, context_index, replica):
         vl_pseudodatafile = "datacuts_theory_fitting_validation_pseudodata.csv"
 
     try:
-        tr = pd.read_csv(replica_path / tr_pseudodatafile, index_col=[0, 1, 2], sep="\t", header=0)
-        val = pd.read_csv(replica_path / vl_pseudodatafile, index_col=[0, 1, 2], sep="\t", header=0)
+        if diagonal_basis:
+            tr = pd.read_csv(replica_path / tr_pseudodatafile, index_col=[0], sep="\t", header=0)
+            val = pd.read_csv(replica_path / vl_pseudodatafile, index_col=[0], sep="\t", header=0)
+        else:
+            tr = pd.read_csv(
+                replica_path / tr_pseudodatafile, index_col=[0, 1, 2], sep="\t", header=0
+            )
+            val = pd.read_csv(
+                replica_path / vl_pseudodatafile, index_col=[0, 1, 2], sep="\t", header=0
+            )
     except FileNotFoundError as e:
         raise FileNotFoundError(
             "Could not find saved training and validation data files. "
@@ -100,19 +105,23 @@ def read_replica_pseudodata(fit, context_index, replica):
 
     pseudodata = pd.concat((tr, val))
 
-    # In order for this function to work also with old fit, it is necessary to remap the names
-    # being read (since the names in the context have already been remapped)
-    # The following checks whether a given name is in both the context and the fit, and if not
-    # tries to get it from the old_to_new mapping.
-    mapping = {}
-    context_datasets = context_index.get_level_values("dataset").unique()
-    for dsname in pseudodata.index.get_level_values("dataset").unique():
-        if dsname not in context_datasets:
-            new_name, _ = legacy_to_new_map(dsname)
-            mapping[dsname] = new_name
+    if not diagonal_basis:
+        # List of length 1 due to the collect
+        context_index = context_index[0]
+        # The [0] is because of how pandas handles sorting a MultiIndex
+        sorted_index = context_index.sortlevel(level=range(1, 3))[0]
+
+        # In order for this function to work also with old fit, it is necessary to remap the names
+        # being read (since the names in the context have already been remapped)
+        mapping = {}
+        context_datasets = context_index.get_level_values("dataset").unique()
+        for dsname in pseudodata.index.get_level_values("dataset").unique():
+            if dsname not in context_datasets:
+                new_name, _ = legacy_to_new_map(dsname)
+                mapping[dsname] = new_name
 
-    pseudodata = pseudodata.rename(mapping, level=1).sort_index(level=range(1, 3))
-    pseudodata.index = sorted_index
+        pseudodata = pseudodata.rename(mapping, level=1).sort_index(level=range(1, 3))
+        pseudodata.index = sorted_index
 
     tr = pseudodata[pseudodata["type"] == "training"]
     val = pseudodata[pseudodata["type"] == "validation"]
@@ -314,6 +323,28 @@ def indexed_make_replica(groups_index, make_replica):
     return pd.DataFrame(make_replica, index=groups_index, columns=["data"])
 
 
+def _load_diagonal_rotation(fit):
+    """Load the fixed diagonal-basis rotation table written during vp-setupfit."""
+    rotation_path = fit.path / "tables" / DIAGONAL_BASIS_ROTATION_FILENAME
+    rotation_table = pd.read_csv(rotation_path, sep="\t", index_col=0)
+    rotation_table = rotation_table.sort_index(
+        key=lambda idx: idx.str.replace("eigenmode ", "").astype(int)
+    )
+    eigenvector_cols = [c for c in rotation_table.columns if c.startswith("evec_")]
+    return rotation_table[eigenvector_cols].to_numpy()
+
+
+def load_diagonal_rotation_and_eigenvalues(fit):
+    """Return the global rotation matrix and eigenvalues for diagonal-basis fits."""
+    rotation_path = fit.path / "tables" / DIAGONAL_BASIS_ROTATION_FILENAME
+    rotation_table = pd.read_csv(rotation_path, sep="\t", index_col=0)
+    rotation_table = rotation_table.sort_index(
+        key=lambda idx: idx.str.replace("eigenmode ", "").astype(int)
+    )
+    eigenvector_cols = [c for c in rotation_table.columns if c.startswith("evec_")]
+    return rotation_table[eigenvector_cols].to_numpy(), rotation_table["eigenvalue"].to_numpy()
+
+
 def level0_commondata_wc(data, fakepdf):
     """
     Given a validphys.core.DataGroupSpec object, load commondata and
@@ -432,11 +463,17 @@ def make_level1_data(data, level0_commondata_wc, filterseed, data_index, sep_mul
     )
 
     # ================== generation of Level1 data ======================#
-    central_vals= central_values_array(level0_commondata_wc)
+    central_vals = central_values_array(level0_commondata_wc)
     group_mult_errs = group_multiplicative_errors(level0_commondata_wc, sep_mult=sep_mult)
     group_pos_mask = group_positivity_mask(level0_commondata_wc)
     level1_data = make_replica(
-        central_vals, filterseed, covmat, group_multiplicative_errors=group_mult_errs, group_positivity_mask=group_pos_mask, sep_mult=sep_mult, genrep=True,
+        central_vals,
+        filterseed,
+        covmat,
+        group_multiplicative_errors=group_mult_errs,
+        group_positivity_mask=group_pos_mask,
+        sep_mult=sep_mult,
+        genrep=True,
     )
 
     indexed_level1_data = indexed_make_replica(data_index, level1_data)
@@ -468,7 +505,9 @@ def make_level1_data(data, level0_commondata_wc, filterseed, data_index, sep_mul
 indexed_make_replicas = collect('indexed_make_replica', ('replicas',))
 
 
-def recreate_fit_pseudodata(_recreate_fit_pseudodata, fitreplicas, fit_masks):
+def recreate_fit_pseudodata(
+    _recreate_fit_pseudodata, fitreplicas, fit_masks, fit, diagonal_basis=False
+):
     """Function used to reconstruct the pseudodata seen by each of the
     Monte Carlo fit replicas.
 
@@ -493,16 +532,28 @@ def recreate_fit_pseudodata(_recreate_fit_pseudodata, fitreplicas, fit_masks):
     :py:func:`validphys.pseudodata.recreate_pdf_pseudodata`
     """
     res = []
+    diagonal_rotation = _load_diagonal_rotation(fit) if diagonal_basis else None
+
     for pseudodata, mask, rep in zip(_recreate_fit_pseudodata, fit_masks, fitreplicas):
         df = pd.concat(pseudodata)
         df.columns = [f"replica {rep}"]
+
+        if diagonal_rotation is not None:
+            rotated_values = diagonal_rotation @ df.iloc[:, 0].to_numpy()
+            eigenmode_index = pd.Index(
+                [f"eigenmode {i}" for i in range(len(rotated_values))], name="eigenmode"
+            )
+            df = pd.DataFrame(rotated_values, index=eigenmode_index, columns=[f"replica {rep}"])
+
         tr_idx = df.loc[mask[0].values].index
         val_idx = df.loc[mask[1].values].index
         res.append(DataTrValSpec(df, tr_idx, val_idx))
     return res
 
 
-def recreate_pdf_pseudodata(_recreate_pdf_pseudodata, pdfreplicas, pdf_masks):
+def recreate_pdf_pseudodata(
+    _recreate_pdf_pseudodata, pdfreplicas, pdf_masks, fit, diagonal_basis=False
+):
     """Like :py:func:`validphys.pseudodata.recreate_fit_pseudodata`
     but accounts for the postfit reshuffling of replicas.
 
@@ -522,11 +573,21 @@ def recreate_pdf_pseudodata(_recreate_pdf_pseudodata, pdfreplicas, pdf_masks):
     --------
     :py:func:`validphys.pseudodata.recreate_fit_pseudodata`
     """
-    return recreate_fit_pseudodata(_recreate_pdf_pseudodata, pdfreplicas, pdf_masks)
+    return recreate_fit_pseudodata(
+        _recreate_pdf_pseudodata, pdfreplicas, pdf_masks, fit, diagonal_basis=diagonal_basis
+    )
 
 
 pdf_masks_no_table = collect('replica_mask', ('pdfreplicas', 'fitenvironment'))
 
 
-def recreate_pdf_pseudodata_no_table(_recreate_pdf_pseudodata, pdfreplicas, pdf_masks_no_table):
-    return recreate_pdf_pseudodata(_recreate_pdf_pseudodata, pdfreplicas, pdf_masks_no_table)
+def recreate_pdf_pseudodata_no_table(
+    _recreate_pdf_pseudodata, pdfreplicas, pdf_masks_no_table, fit, diagonal_basis=False
+):
+    return recreate_pdf_pseudodata(
+        _recreate_pdf_pseudodata,
+        pdfreplicas,
+        pdf_masks_no_table,
+        fit,
+        diagonal_basis=diagonal_basis,
+    )
diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py
index cbb1cc8b8d..29b9f37aa4 100644
--- a/validphys2/src/validphys/tests/test_pseudodata.py
+++ b/validphys2/src/validphys/tests/test_pseudodata.py
@@ -8,6 +8,8 @@
 recreation.
 """
 
+from pathlib import Path
+
 import numpy as np
 from numpy.testing import assert_allclose
 import pandas as pd
@@ -85,6 +87,11 @@ def test_read_matches_recreate():
 
     for fit in [PSEUDODATA_FIT, PSEUDODATA_FIT_DIAG]:
         diagonal_basis = True if fit == PSEUDODATA_FIT_DIAG else False
+        if diagonal_basis:
+            fit_spec = API.fit(fit=fit)
+            rotation_file = Path(fit_spec.path) / "tables" / "diagonal_basis_rotation.csv"
+            if not rotation_file.exists():
+                pytest.fail(f"Diagonal rotation file not available for {fit}")
         reads = API.read_fit_pseudodata(fit=fit, diagonal_basis=diagonal_basis)
         recreates = API.recreate_fit_pseudodata(fit=fit, diagonal_basis=diagonal_basis)
         for read, recreate in zip(reads, recreates):

From a9f621e2e684a684336e5b07e6653a8a9378431e Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Tue, 21 Apr 2026 11:36:44 +0100
Subject: [PATCH 02/29] using table decorator

---
 n3fit/src/n3fit/scripts/vp_setupfit.py |  2 +-
 validphys2/src/validphys/n3fit_data.py | 15 ++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index abfda732bb..f9e05c707b 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -170,7 +170,7 @@ def from_yaml(cls, o, *args, **kwargs):
             check_n3fit_action = 'datacuts::theory::closuretest::fitting n3fit_checks_action'
         else:
             filter_action = 'datacuts::theory::fitting filter'
-            rotation_action = 'datacuts::theory::fitting save_diagonal_basis_rotation'
+            rotation_action = 'datacuts::theory::fitting diagonal_basis_rotation_table'
             check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action'
 
         # The settings for these actions depend on the presence of closuretest
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 3588f76446..e2db7e124c 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -554,10 +554,10 @@ def fitting_data_dict(
 )
 
 
-def save_diagonal_basis_rotation(output_path, _inv_covmat_prepared, diagonal_basis=True):
-    """Store a single fit-level table with eigenvalues and eigenvectors.
-
-    The rows are globally indexed as ``eigenmode N`` to line up with diagonal-basis pseudodata.
+@table
+def diagonal_basis_rotation_table(output_path, _inv_covmat_prepared, diagonal_basis=True):
+    """
+    Save the diagonal basis rotation.
     """
     if not diagonal_basis:
         return None
@@ -573,11 +573,8 @@ def save_diagonal_basis_rotation(output_path, _inv_covmat_prepared, diagonal_bas
     df_rotation = pd.DataFrame(diagonal_rotation)
     df_rotation.insert(0, "eig_val", eig_vals)
     df_rotation.index = pd.Index([f"eigenmode {i}" for i in range(len(eig_vals))])
-
-    output_file = table_path / DIAGONAL_BASIS_ROTATION_FILENAME
-    df_rotation.to_csv(output_file, sep="\t", index=True)
-    log.info("Saved diagonal-basis rotation table to %s", output_file)
-    return output_file
+    log.info("Saving diagonal-basis rotation table")
+    return df_rotation
 
 
 def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nnseed):

From 88620455292d978d05a072eefeb08fa1ec8dcc1c Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Tue, 21 Apr 2026 11:41:24 +0100
Subject: [PATCH 03/29] more cleaning

---
 validphys2/src/validphys/n3fit_data.py        | 39 ++++++++++---------
 .../src/validphys/tests/test_pseudodata.py    |  7 ----
 2 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index e2db7e124c..8cc149f105 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -549,9 +549,6 @@ def fitting_data_dict(
 
 
 exps_fitting_data_dict = collect("fitting_data_dict", ("group_dataset_inputs_by_metadata",))
-groups_dataset_inputs_fitting_covmat = collect(
-    "dataset_inputs_fitting_covmat", ("group_dataset_inputs_by_metadata",)
-)
 
 
 @table
@@ -604,8 +601,19 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn
 )
 
 
+def diagonal_indexed_make_replica(indexed_make_replica, fitting_data_dict):
+    """Rotate one group pseudodata block to the diagonal basis."""
+    values = indexed_make_replica.iloc[:, 0].to_numpy()
+    diag_rot = fitting_data_dict.get("data_transformation")
+    rotated_values = values if diag_rot is None else diag_rot @ values
+    return pd.DataFrame(rotated_values, columns=["data"])
+
+
 def replica_pseudodata(
-    experiment_indexed_make_replica, exps_fitting_data_dict, replica, diagonal_basis=True
+    experiment_indexed_make_replica,
+    experiment_diagonal_indexed_make_replica,
+    replica,
+    diagonal_basis=True,
 ):
     """Creates a pandas DataFrame containing the generated pseudodata.
     The index is :py:func:`validphys.results.experiments_index` and the columns
@@ -624,22 +632,15 @@ def replica_pseudodata(
         df.columns = [replica_column]
         return df
 
-    diagonal_frames = []
-    mode_counter = 0
-    for indexed_replica, fit_data in zip(experiment_indexed_make_replica, exps_fitting_data_dict):
-        values = indexed_replica.iloc[:, 0].to_numpy()
-        diag_rot = fit_data.get("data_transformation")
-        rotated_values = values if diag_rot is None else diag_rot @ values
-        next_counter = mode_counter + len(rotated_values)
-        eigenmode_index = pd.Index(
-            [f"eigenmode {i}" for i in range(mode_counter, next_counter)], name="eigenmode"
-        )
-        diagonal_frames.append(
-            pd.DataFrame(rotated_values, index=eigenmode_index, columns=[replica_column])
-        )
-        mode_counter = next_counter
+    df = pd.concat(experiment_diagonal_indexed_make_replica, ignore_index=True)
+    df.columns = [replica_column]
+    df.index = pd.Index([f"eigenmode {i}" for i in range(len(df))])
+    return df
+
 
-    return pd.concat(diagonal_frames)
+experiment_diagonal_indexed_make_replica = collect(
+    "diagonal_indexed_make_replica", ("group_dataset_inputs_by_metadata",)
+)
 
 
 @_per_replica
diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py
index 29b9f37aa4..cbb1cc8b8d 100644
--- a/validphys2/src/validphys/tests/test_pseudodata.py
+++ b/validphys2/src/validphys/tests/test_pseudodata.py
@@ -8,8 +8,6 @@
 recreation.
 """
 
-from pathlib import Path
-
 import numpy as np
 from numpy.testing import assert_allclose
 import pandas as pd
@@ -87,11 +85,6 @@ def test_read_matches_recreate():
 
     for fit in [PSEUDODATA_FIT, PSEUDODATA_FIT_DIAG]:
         diagonal_basis = True if fit == PSEUDODATA_FIT_DIAG else False
-        if diagonal_basis:
-            fit_spec = API.fit(fit=fit)
-            rotation_file = Path(fit_spec.path) / "tables" / "diagonal_basis_rotation.csv"
-            if not rotation_file.exists():
-                pytest.fail(f"Diagonal rotation file not available for {fit}")
         reads = API.read_fit_pseudodata(fit=fit, diagonal_basis=diagonal_basis)
         recreates = API.recreate_fit_pseudodata(fit=fit, diagonal_basis=diagonal_basis)
         for read, recreate in zip(reads, recreates):

From 8fd4d1f7072a505da30cd3b23f35a8185c6a99c4 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Tue, 21 Apr 2026 12:06:59 +0100
Subject: [PATCH 04/29] more cleaning

---
 validphys2/src/validphys/n3fit_data.py     |   6 --
 validphys2/src/validphys/overfit_metric.py |  44 ++-------
 validphys2/src/validphys/pseudodata.py     | 105 +++++----------------
 3 files changed, 34 insertions(+), 121 deletions(-)

diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 8cc149f105..f54e35ccb9 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -10,7 +10,6 @@
 import functools
 import hashlib
 import logging
-import pathlib
 
 import numpy as np
 import pandas as pd
@@ -22,8 +21,6 @@
 
 log = logging.getLogger(__name__)
 
-DIAGONAL_BASIS_ROTATION_FILENAME = "diagonal_basis_rotation.csv"
-
 
 class Hashrray(TupleComp):
     """Wrapper class to hash a numpy array so it can be cached."""
@@ -559,9 +556,6 @@ def diagonal_basis_rotation_table(output_path, _inv_covmat_prepared, diagonal_ba
     if not diagonal_basis:
         return None
 
-    table_path = pathlib.Path(output_path) / "tables"
-    table_path.mkdir(parents=True, exist_ok=True)
-
     _, _, diagonal_rotation, eig_vals = _inv_covmat_prepared
     if diagonal_rotation is None or eig_vals is None:
         log.warning("No diagonal-basis rotation available; skipping table export.")
diff --git a/validphys2/src/validphys/overfit_metric.py b/validphys2/src/validphys/overfit_metric.py
index 9101e0a4ff..2d8c1d1c8f 100644
--- a/validphys2/src/validphys/overfit_metric.py
+++ b/validphys2/src/validphys/overfit_metric.py
@@ -17,7 +17,6 @@
 from reportengine.table import table
 from validphys import plotutils
 from validphys.checks import check_at_least_two_replicas
-from validphys.pseudodata import load_diagonal_rotation_and_eigenvalues
 
 log = logging.getLogger(__name__)
 
@@ -40,12 +39,6 @@ def _create_new_val_pseudodata(pdf_data_index, fit_data_indices_list):
     return np.array(vl_data_fitrep)[:, :, 0]
 
 
-def _eigenmode_ids(eigenmode_index):
-    return np.array(
-        [int(str(label).replace("eigenmode ", "")) for label in eigenmode_index], dtype=int
-    )
-
-
 @check_at_least_two_replicas
 def calculate_chi2s_per_replica(
     pdf,  # for the check
@@ -54,7 +47,6 @@ def calculate_chi2s_per_replica(
     preds,
     dataset_inputs,
     groups_covmat_no_table,
-    fit,
 ):
     """Calculates, for each PDF replica, the chi2 of the validation with the
     pseudodata generated for all other replicas in the fit
@@ -94,46 +86,28 @@ def calculate_chi2s_per_replica(
 
     PDF_predictions = pd.concat(pp)
 
-    diagonal_mode = isinstance(
-        recreate_pdf_pseudodata_no_table[0].val_idx, pd.Index
-    ) and not isinstance(recreate_pdf_pseudodata_no_table[0].val_idx, pd.MultiIndex)
-    rotation = None
-    eigenvalues = None
-    if diagonal_mode:
-        rotation, eigenvalues = load_diagonal_rotation_and_eigenvalues(fit)
-
     chi2s_per_replica = []
     for enum, pdf_data_index in enumerate(recreate_pdf_pseudodata_no_table):
-        if diagonal_mode and rotation is not None:
-            # Build full prediction vector in data space for this PDF replica and rotate it.
-            full_prediction = PDF_predictions.values[:, enum]
-            rotated_prediction = rotation @ full_prediction
-            mode_ids = _eigenmode_ids(pdf_data_index.val_idx)
-            PDF_predictions_val = rotated_prediction[mode_ids]
-        else:
-            prediction_filter = pdf_data_index.val_idx.droplevel(level=0)
-            prediction_filter.rename(["dataset", "data"], inplace=True)
-            PDF_predictions_val = PDF_predictions.loc[prediction_filter]
-            PDF_predictions_val = PDF_predictions_val.values[:, enum]
+        prediction_filter = pdf_data_index.val_idx.droplevel(level=0)
+        prediction_filter.rename(["dataset", "data"], inplace=True)
+        PDF_predictions_val = PDF_predictions.loc[prediction_filter]
+        PDF_predictions_val = PDF_predictions_val.values[:, enum]
 
         new_val_pseudodata_list = _create_new_val_pseudodata(
             pdf_data_index, recreate_pdf_pseudodata_no_table
         )
 
-        if diagonal_mode and eigenvalues is not None:
-            mode_ids = _eigenmode_ids(pdf_data_index.val_idx)
-            invcovmat_vl = np.diag(1.0 / eigenvalues[mode_ids])
-        else:
-            invcovmat_vl = np.linalg.inv(
-                groups_covmat_no_table[pdf_data_index.val_idx].T[pdf_data_index.val_idx]
-            )
+        invcovmat_vl = np.linalg.inv(
+            groups_covmat_no_table[pdf_data_index.val_idx].T[pdf_data_index.val_idx]
+        )
 
         tmp = PDF_predictions_val - new_val_pseudodata_list
 
         chi2 = np.einsum("ij,jk,ik->i", tmp, invcovmat_vl, tmp) / tmp.shape[1]
         chi2s_per_replica.append(chi2)
+        ret = np.array(chi2s_per_replica)
 
-    return np.array(chi2s_per_replica)
+    return ret
 
 
 def array_expected_overfitting(
diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py
index c10db996a3..8f1eca10e8 100644
--- a/validphys2/src/validphys/pseudodata.py
+++ b/validphys2/src/validphys/pseudodata.py
@@ -20,8 +20,6 @@
 
 log = logging.getLogger(__name__)
 
-DIAGONAL_BASIS_ROTATION_FILENAME = "diagonal_basis_rotation.csv"
-
 DataTrValSpec = namedtuple('DataTrValSpec', ['pseudodata', 'tr_idx', 'val_idx'])
 
 context_index = collect("groups_index", ("fitcontext",))
@@ -33,7 +31,7 @@ class ReplicaGenerationError(Exception):
     pass
 
 
-def read_replica_pseudodata(fit, context_index, replica, diagonal_basis=True):
+def read_replica_pseudodata(fit, context_index, replica):
     """Function to handle the reading of training and validation splits for a fit that has been
     produced with the ``savepseudodata`` flag set to ``True``.
 
@@ -72,6 +70,11 @@ def read_replica_pseudodata(fit, context_index, replica, diagonal_basis=True):
                             5    3.117819
                             6    0.771079
     """
+    # List of length 1 due to the collect
+    context_index = context_index[0]
+    # The [0] is because of how pandas handles sorting a MultiIndex
+    sorted_index = context_index.sortlevel(level=range(1, 3))[0]
+
     log.debug(f"Reading pseudodata & training/validation splits from {fit.name}.")
     replica_path = fit.path / "nnfit" / f"replica_{replica}"
 
@@ -85,16 +88,8 @@ def read_replica_pseudodata(fit, context_index, replica, diagonal_basis=True):
         vl_pseudodatafile = "datacuts_theory_fitting_validation_pseudodata.csv"
 
     try:
-        if diagonal_basis:
-            tr = pd.read_csv(replica_path / tr_pseudodatafile, index_col=[0], sep="\t", header=0)
-            val = pd.read_csv(replica_path / vl_pseudodatafile, index_col=[0], sep="\t", header=0)
-        else:
-            tr = pd.read_csv(
-                replica_path / tr_pseudodatafile, index_col=[0, 1, 2], sep="\t", header=0
-            )
-            val = pd.read_csv(
-                replica_path / vl_pseudodatafile, index_col=[0, 1, 2], sep="\t", header=0
-            )
+        tr = pd.read_csv(replica_path / tr_pseudodatafile, index_col=[0, 1, 2], sep="\t", header=0)
+        val = pd.read_csv(replica_path / vl_pseudodatafile, index_col=[0, 1, 2], sep="\t", header=0)
     except FileNotFoundError as e:
         raise FileNotFoundError(
             "Could not find saved training and validation data files. "
@@ -105,23 +100,19 @@ def read_replica_pseudodata(fit, context_index, replica, diagonal_basis=True):
 
     pseudodata = pd.concat((tr, val))
 
-    if not diagonal_basis:
-        # List of length 1 due to the collect
-        context_index = context_index[0]
-        # The [0] is because of how pandas handles sorting a MultiIndex
-        sorted_index = context_index.sortlevel(level=range(1, 3))[0]
-
-        # In order for this function to work also with old fit, it is necessary to remap the names
-        # being read (since the names in the context have already been remapped)
-        mapping = {}
-        context_datasets = context_index.get_level_values("dataset").unique()
-        for dsname in pseudodata.index.get_level_values("dataset").unique():
-            if dsname not in context_datasets:
-                new_name, _ = legacy_to_new_map(dsname)
-                mapping[dsname] = new_name
+    # In order for this function to work also with old fit, it is necessary to remap the names
+    # being read (since the names in the context have already been remapped)
+    # The following checks whether a given name is in both the context and the fit, and if not
+    # tries to get it from the old_to_new mapping.
+    mapping = {}
+    context_datasets = context_index.get_level_values("dataset").unique()
+    for dsname in pseudodata.index.get_level_values("dataset").unique():
+        if dsname not in context_datasets:
+            new_name, _ = legacy_to_new_map(dsname)
+            mapping[dsname] = new_name
 
-        pseudodata = pseudodata.rename(mapping, level=1).sort_index(level=range(1, 3))
-        pseudodata.index = sorted_index
+    pseudodata = pseudodata.rename(mapping, level=1).sort_index(level=range(1, 3))
+    pseudodata.index = sorted_index
 
     tr = pseudodata[pseudodata["type"] == "training"]
     val = pseudodata[pseudodata["type"] == "validation"]
@@ -323,28 +314,6 @@ def indexed_make_replica(groups_index, make_replica):
     return pd.DataFrame(make_replica, index=groups_index, columns=["data"])
 
 
-def _load_diagonal_rotation(fit):
-    """Load the fixed diagonal-basis rotation table written during vp-setupfit."""
-    rotation_path = fit.path / "tables" / DIAGONAL_BASIS_ROTATION_FILENAME
-    rotation_table = pd.read_csv(rotation_path, sep="\t", index_col=0)
-    rotation_table = rotation_table.sort_index(
-        key=lambda idx: idx.str.replace("eigenmode ", "").astype(int)
-    )
-    eigenvector_cols = [c for c in rotation_table.columns if c.startswith("evec_")]
-    return rotation_table[eigenvector_cols].to_numpy()
-
-
-def load_diagonal_rotation_and_eigenvalues(fit):
-    """Return the global rotation matrix and eigenvalues for diagonal-basis fits."""
-    rotation_path = fit.path / "tables" / DIAGONAL_BASIS_ROTATION_FILENAME
-    rotation_table = pd.read_csv(rotation_path, sep="\t", index_col=0)
-    rotation_table = rotation_table.sort_index(
-        key=lambda idx: idx.str.replace("eigenmode ", "").astype(int)
-    )
-    eigenvector_cols = [c for c in rotation_table.columns if c.startswith("evec_")]
-    return rotation_table[eigenvector_cols].to_numpy(), rotation_table["eigenvalue"].to_numpy()
-
-
 def level0_commondata_wc(data, fakepdf):
     """
     Given a validphys.core.DataGroupSpec object, load commondata and
@@ -505,9 +474,7 @@ def make_level1_data(data, level0_commondata_wc, filterseed, data_index, sep_mul
 indexed_make_replicas = collect('indexed_make_replica', ('replicas',))
 
 
-def recreate_fit_pseudodata(
-    _recreate_fit_pseudodata, fitreplicas, fit_masks, fit, diagonal_basis=False
-):
+def recreate_fit_pseudodata(_recreate_fit_pseudodata, fitreplicas, fit_masks):
     """Function used to reconstruct the pseudodata seen by each of the
     Monte Carlo fit replicas.
 
@@ -532,28 +499,16 @@ def recreate_fit_pseudodata(
     :py:func:`validphys.pseudodata.recreate_pdf_pseudodata`
     """
     res = []
-    diagonal_rotation = _load_diagonal_rotation(fit) if diagonal_basis else None
-
     for pseudodata, mask, rep in zip(_recreate_fit_pseudodata, fit_masks, fitreplicas):
         df = pd.concat(pseudodata)
         df.columns = [f"replica {rep}"]
-
-        if diagonal_rotation is not None:
-            rotated_values = diagonal_rotation @ df.iloc[:, 0].to_numpy()
-            eigenmode_index = pd.Index(
-                [f"eigenmode {i}" for i in range(len(rotated_values))], name="eigenmode"
-            )
-            df = pd.DataFrame(rotated_values, index=eigenmode_index, columns=[f"replica {rep}"])
-
         tr_idx = df.loc[mask[0].values].index
         val_idx = df.loc[mask[1].values].index
         res.append(DataTrValSpec(df, tr_idx, val_idx))
     return res
 
 
-def recreate_pdf_pseudodata(
-    _recreate_pdf_pseudodata, pdfreplicas, pdf_masks, fit, diagonal_basis=False
-):
+def recreate_pdf_pseudodata(_recreate_pdf_pseudodata, pdfreplicas, pdf_masks):
     """Like :py:func:`validphys.pseudodata.recreate_fit_pseudodata`
     but accounts for the postfit reshuffling of replicas.
 
@@ -573,21 +528,11 @@ def recreate_pdf_pseudodata(
     --------
     :py:func:`validphys.pseudodata.recreate_fit_pseudodata`
     """
-    return recreate_fit_pseudodata(
-        _recreate_pdf_pseudodata, pdfreplicas, pdf_masks, fit, diagonal_basis=diagonal_basis
-    )
+    return recreate_fit_pseudodata(_recreate_pdf_pseudodata, pdfreplicas, pdf_masks)
 
 
 pdf_masks_no_table = collect('replica_mask', ('pdfreplicas', 'fitenvironment'))
 
 
-def recreate_pdf_pseudodata_no_table(
-    _recreate_pdf_pseudodata, pdfreplicas, pdf_masks_no_table, fit, diagonal_basis=False
-):
-    return recreate_pdf_pseudodata(
-        _recreate_pdf_pseudodata,
-        pdfreplicas,
-        pdf_masks_no_table,
-        fit,
-        diagonal_basis=diagonal_basis,
-    )
+def recreate_pdf_pseudodata_no_table(_recreate_pdf_pseudodata, pdfreplicas, pdf_masks_no_table):
+    return recreate_pdf_pseudodata(_recreate_pdf_pseudodata, pdfreplicas, pdf_masks_no_table)

From af1f2415dbc96c229262d0784d5b07eff32f9ded Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:47:40 +0100
Subject: [PATCH 05/29] setting use_t0 = True in vp-setupfit

Needed to compute the covmat for diagonalisation durin vp-setupfit
---
 n3fit/src/n3fit/scripts/vp_setupfit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index f9e05c707b..dbc41100ef 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -57,7 +57,7 @@
     'validphys.photon.compute',
 ]
 
-SETUPFIT_DEFAULTS = dict(use_cuts='internal')
+SETUPFIT_DEFAULTS = dict(use_cuts='internal', use_t0=True)
 
 
 log = logging.getLogger(__name__)

From ae4b0ebcdba0a409d0a1f9ab38ae2ff4ae685de8 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Fri, 24 Apr 2026 13:17:52 +0100
Subject: [PATCH 06/29] wip on loading diag rot from table dir in vp-setupfit

---
 n3fit/src/n3fit/scripts/vp_setupfit.py |  2 +-
 validphys2/src/validphys/config.py     |  8 ++++++--
 validphys2/src/validphys/n3fit_data.py | 22 ++++++++++++----------
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index dbc41100ef..11dfa94e9d 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -51,10 +51,10 @@
     'validphys.commondata',
     'validphys.covmats',
     'validphys.filters',
-    'validphys.n3fit_data',
     'validphys.results',
     'validphys.theorycovariance.construction',
     'validphys.photon.compute',
+    'validphys.n3fit_data',
 ]
 
 SETUPFIT_DEFAULTS = dict(use_cuts='internal', use_t0=True)
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 910fc1fd7b..8e57e4b39e 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -805,7 +805,7 @@ def produce_experiment_from_input(self, experiment_input, theoryid, use_cuts, fi
         }
 
     @configparser.explicit_node
-    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=False):
+    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=True):
         """
         Produces the correct covmat to be used in fitting_data_dict according
         to some options: whether to include the theory covmat, whether to
@@ -880,12 +880,16 @@ def produce_loaded_theory_covmat(
         point_prescriptions=None,
         use_thcovmat_in_sampling=False,
         use_thcovmat_in_fitting=False,
+        use_thcovmat_in_diag_basis=True,
     ):
         """
         Loads the theory covmat from the correct file according to how it
         was generated by vp-setupfit.
         """
-        if not use_thcovmat_in_sampling and not use_thcovmat_in_fitting:
+
+        if not use_thcovmat_in_diag_basis and not (
+            use_thcovmat_in_sampling or use_thcovmat_in_fitting
+        ):
             return 0.0
         # Load correct file according to how the thcovmat was generated by vp-setupfit
         generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv"
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index f54e35ccb9..186297a27e 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -371,6 +371,7 @@ def fitting_data_dict(
     _inv_covmat_prepared,
     kfold_masks,
     fittable_datasets_masked,
+    output_path,
     threshold=0.0,
 ):
     """
@@ -421,6 +422,15 @@ def fitting_data_dict(
     fittable_datasets = fittable_datasets_masked
 
     # all covmat manipulation is shared across the replicas for memory purposes
+
+    # TODO: load vp-setupfit table
+    diagonal_basis_saved = "datacuts_theory_fitting_diagonal_basis_rotation_table.csv"
+    path_diagonal_basis = output_path / "tables" / diagonal_basis_saved
+    eigensystem = pd.read_csv(
+        path_diagonal_basis, index_col=[0], header=[0], sep="\t|,", engine="python"
+    )
+    diag_rot2 = eigensystem.iloc[:, 1:].values
+    eig_vals2 = eigensystem["eig_val"].values
     covmat, inv_true, diag_rot, eig_vals = _inv_covmat_prepared
 
     # get the masks - different for each replica so fine to call here
@@ -604,10 +614,7 @@ def diagonal_indexed_make_replica(indexed_make_replica, fitting_data_dict):
 
 
 def replica_pseudodata(
-    experiment_indexed_make_replica,
-    experiment_diagonal_indexed_make_replica,
-    replica,
-    diagonal_basis=True,
+    experiment_indexed_make_replica, diagonal_indexed_make_replica, replica, diagonal_basis=True
 ):
     """Creates a pandas DataFrame containing the generated pseudodata.
     The index is :py:func:`validphys.results.experiments_index` and the columns
@@ -626,17 +633,12 @@ def replica_pseudodata(
         df.columns = [replica_column]
         return df
 
-    df = pd.concat(experiment_diagonal_indexed_make_replica, ignore_index=True)
+    df = pd.concat(diagonal_indexed_make_replica, ignore_index=True)
     df.columns = [replica_column]
     df.index = pd.Index([f"eigenmode {i}" for i in range(len(df))])
     return df
 
 
-experiment_diagonal_indexed_make_replica = collect(
-    "diagonal_indexed_make_replica", ("group_dataset_inputs_by_metadata",)
-)
-
-
 @_per_replica
 @table
 def pseudodata_table(replica_pseudodata):

From ba641ee2da6a4a408f314f6e851dddd933770c9e Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Mon, 27 Apr 2026 12:10:10 +0100
Subject: [PATCH 07/29] wip on loading diag rot from table dir in vp-setupfit

---
 n3fit/src/n3fit/scripts/vp_setupfit.py | 1 +
 validphys2/src/validphys/config.py     | 7 +++----
 validphys2/src/validphys/n3fit_data.py | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 11dfa94e9d..254911584d 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -170,6 +170,7 @@ def from_yaml(cls, o, *args, **kwargs):
             check_n3fit_action = 'datacuts::theory::closuretest::fitting n3fit_checks_action'
         else:
             filter_action = 'datacuts::theory::fitting filter'
+            # TODO: correct namespace?, the rotation needs access to the theory covmat but it's not yet available..
             rotation_action = 'datacuts::theory::fitting diagonal_basis_rotation_table'
             check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action'
 
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 8e57e4b39e..c4c654ac1d 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -805,7 +805,7 @@ def produce_experiment_from_input(self, experiment_input, theoryid, use_cuts, fi
         }
 
     @configparser.explicit_node
-    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=True):
+    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=False):
         """
         Produces the correct covmat to be used in fitting_data_dict according
         to some options: whether to include the theory covmat, whether to
@@ -887,9 +887,8 @@ def produce_loaded_theory_covmat(
         was generated by vp-setupfit.
         """
 
-        if not use_thcovmat_in_diag_basis and not (
-            use_thcovmat_in_sampling or use_thcovmat_in_fitting
-        ):
+        # TODO: this return a zero theory covmat which means that the diagonal basis does not include the theory covmat
+        if not use_thcovmat_in_sampling and not use_thcovmat_in_fitting:
             return 0.0
         # Load correct file according to how the thcovmat was generated by vp-setupfit
         generic_path = "datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv"
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 186297a27e..63386c8416 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -633,7 +633,7 @@ def replica_pseudodata(
         df.columns = [replica_column]
         return df
 
-    df = pd.concat(diagonal_indexed_make_replica, ignore_index=True)
+    df = diagonal_indexed_make_replica.copy()
     df.columns = [replica_column]
     df.index = pd.Index([f"eigenmode {i}" for i in range(len(df))])
     return df

From fe3abca89aec0f801853b47fab44b54d0d65f165 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Mon, 27 Apr 2026 12:19:44 +0100
Subject: [PATCH 08/29] updating theory cov defaults

---
 validphys2/src/validphys/config.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index c4c654ac1d..41f3e09ba1 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -805,7 +805,7 @@ def produce_experiment_from_input(self, experiment_input, theoryid, use_cuts, fi
         }
 
     @configparser.explicit_node
-    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=False):
+    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=True):
         """
         Produces the correct covmat to be used in fitting_data_dict according
         to some options: whether to include the theory covmat, whether to
@@ -878,16 +878,14 @@ def produce_loaded_theory_covmat(
         data_input,
         user_covmat_path=None,
         point_prescriptions=None,
-        use_thcovmat_in_sampling=False,
-        use_thcovmat_in_fitting=False,
-        use_thcovmat_in_diag_basis=True,
+        use_thcovmat_in_sampling=True,
+        use_thcovmat_in_fitting=True,
     ):
         """
         Loads the theory covmat from the correct file according to how it
         was generated by vp-setupfit.
         """
 
-        # TODO: this return a zero theory covmat which means that the diagonal basis does not include the theory covmat
         if not use_thcovmat_in_sampling and not use_thcovmat_in_fitting:
             return 0.0
         # Load correct file according to how the thcovmat was generated by vp-setupfit

From 097b9276d60963e9509c911be63f6de711638f4c Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Mon, 27 Apr 2026 13:02:45 +0100
Subject: [PATCH 09/29] swapping order rotation and theory covmat in
 vp-setupfit

---
 n3fit/src/n3fit/scripts/vp_setupfit.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 254911584d..239a7e494c 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -175,7 +175,7 @@ def from_yaml(cls, o, *args, **kwargs):
             check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action'
 
         # The settings for these actions depend on the presence of closuretest
-        SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action, rotation_action]
+        SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action]
 
         # Check theory covariance matrix configuration
         thconfig = file_content.get('theorycovmatconfig', {})
@@ -191,6 +191,8 @@ def from_yaml(cls, o, *args, **kwargs):
                 'datacuts::theory::theorycovmatconfig nnfit_theory_covmat'
             )
 
+        SETUPFIT_FIXED_CONFIG['actions_'] += [rotation_action]
+
         # Check fiatlux configuration
         fiatlux = file_content.get('fiatlux')
         if fiatlux is not None:

From 320163c32a41ea7a249d29a338efa37732a4c824 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Mon, 27 Apr 2026 14:42:40 +0100
Subject: [PATCH 10/29] vp-setupfit now runs

---
 n3fit/src/n3fit/scripts/vp_setupfit.py |  2 +-
 validphys2/src/validphys/n3fit_data.py | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 239a7e494c..10fe7a691d 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -171,7 +171,7 @@ def from_yaml(cls, o, *args, **kwargs):
         else:
             filter_action = 'datacuts::theory::fitting filter'
             # TODO: correct namespace?, the rotation needs access to the theory covmat but it's not yet available..
-            rotation_action = 'datacuts::theory::fitting diagonal_basis_rotation_table'
+            rotation_action = 'datacuts::theory::theorycovmatconfig diagonal_basis_rotation_table'
             check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action'
 
         # The settings for these actions depend on the presence of closuretest
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 63386c8416..462797e1f2 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -313,24 +313,24 @@ def fittable_datasets_masked(data):
     return validphys_group_extractor(data.datasets)
 
 
-def _hashed_dataset_inputs_fitting_covmat(dataset_inputs_fitting_covmat) -> Hashrray:
-    """Wrap the covmat into a Hashrray for caches to work"""
-    return Hashrray(dataset_inputs_fitting_covmat)
+# def _hashed_dataset_inputs_fitting_covmat(dataset_inputs_fitting_covmat) -> Hashrray:
+#     """Wrap the covmat into a Hashrray for caches to work"""
+#     return Hashrray(dataset_inputs_fitting_covmat)
 
 
-@functools.lru_cache
-def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat, diagonal_basis=True):
+# @functools.lru_cache
+def _inv_covmat_prepared(dataset_inputs_fitting_covmat, diagonal_basis=True):
     """Returns the inverse covmats for training, validation and total
-    attending to the right masks and whether it is diagonal or not.
-
-    Since the masks and number of datapoints need to be treated for 1-point datasets
-    it also returns the right ndata and masks for training and validation:
+        attending to the right masks and whether it is diagonal or not.
+    s
+        Since the masks and number of datapoints need to be treated for 1-point datasets
+        it also returns the right ndata and masks for training and validation:
 
     """
-    log.info(
-        f"_inv_covmat_prepared called with covmat hash={hash(_hashed_dataset_inputs_fitting_covmat)}, diagonal_basis={diagonal_basis}"
-    )
-    covmat = _hashed_dataset_inputs_fitting_covmat.array
+    # log.info(
+    #     f"_inv_covmat_prepared called with covmat hash={hash(_hashed_dataset_inputs_fitting_covmat)}, diagonal_basis={diagonal_basis}"
+    # )
+    covmat = dataset_inputs_fitting_covmat
     diagonal_rotation = None
     eig_vals = None
 

From 140bdcf555d648c17c1305c411b1889f0a074458 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Mon, 27 Apr 2026 15:25:34 +0100
Subject: [PATCH 11/29] calling nnfit_theory_covmat instead of reading the csv

---
 validphys2/src/validphys/config.py     |  5 ++---
 validphys2/src/validphys/n3fit_data.py | 27 ++++++++++++++++----------
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 41f3e09ba1..299a6f5093 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -805,17 +805,16 @@ def produce_experiment_from_input(self, experiment_input, theoryid, use_cuts, fi
         }
 
     @configparser.explicit_node
-    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=True):
+    def produce_dataset_inputs_fitting_covmat(self):
         """
         Produces the correct covmat to be used in fitting_data_dict according
         to some options: whether to include the theory covmat, whether to
         separate the multiplcative errors and whether to compute the
         experimental covmat using the t0 prescription.
         """
+        # TODO update docstring
         from validphys import covmats
 
-        if use_thcovmat_in_fitting:
-            return covmats.dataset_inputs_t0_total_covmat
         return covmats.dataset_inputs_t0_exp_covmat
 
     def produce_sep_mult(self, separate_multiplicative=False):
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 462797e1f2..4492e245f9 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -319,7 +319,7 @@ def fittable_datasets_masked(data):
 
 
 # @functools.lru_cache
-def _inv_covmat_prepared(dataset_inputs_fitting_covmat, diagonal_basis=True):
+def _inv_covmat_prepared(dataset_inputs_fitting_covmat, nnfit_theory_covmat, diagonal_basis=True):
     """Returns the inverse covmats for training, validation and total
         attending to the right masks and whether it is diagonal or not.
     s
@@ -330,7 +330,13 @@ def _inv_covmat_prepared(dataset_inputs_fitting_covmat, diagonal_basis=True):
     # log.info(
     #     f"_inv_covmat_prepared called with covmat hash={hash(_hashed_dataset_inputs_fitting_covmat)}, diagonal_basis={diagonal_basis}"
     # )
+
+    # TODO: JtH, note to self: inv_covmat_prepared can no longer be called during n3fit because nnfit_theory_covmat is in a different namespace
+
     covmat = dataset_inputs_fitting_covmat
+    if nnfit_theory_covmat is not None:
+        covmat += nnfit_theory_covmat
+
     diagonal_rotation = None
     eig_vals = None
 
@@ -368,7 +374,6 @@ def fitting_data_dict(
     make_replica,
     dataset_inputs_loaded_cd_with_cuts,
     masks,
-    _inv_covmat_prepared,
     kfold_masks,
     fittable_datasets_masked,
     output_path,
@@ -421,17 +426,17 @@ def fitting_data_dict(
     expdata = make_replica
     fittable_datasets = fittable_datasets_masked
 
-    # all covmat manipulation is shared across the replicas for memory purposes
-
-    # TODO: load vp-setupfit table
-    diagonal_basis_saved = "datacuts_theory_fitting_diagonal_basis_rotation_table.csv"
+    # load covmat stored at the time of vp-setupfit
+    diagonal_basis_saved = "datacuts_theory_theorycovmatconfig_diagonal_basis_rotation_table.csv"
     path_diagonal_basis = output_path / "tables" / diagonal_basis_saved
     eigensystem = pd.read_csv(
         path_diagonal_basis, index_col=[0], header=[0], sep="\t|,", engine="python"
     )
-    diag_rot2 = eigensystem.iloc[:, 1:].values
-    eig_vals2 = eigensystem["eig_val"].values
-    covmat, inv_true, diag_rot, eig_vals = _inv_covmat_prepared
+    diag_rot = eigensystem.iloc[:, 1:].values
+    eig_vals = eigensystem["eig_val"].values
+    inv_true = np.diag(1 / eig_vals)
+    covmat = np.diag(eig_vals)
+    # covmat, inv_true, diag_rot, eig_vals = _inv_covmat_prepared
 
     # get the masks - different for each replica so fine to call here
     tr_mask, vl_mask = masks.tr_masks[0], masks.vl_masks[0]
@@ -563,10 +568,12 @@ def diagonal_basis_rotation_table(output_path, _inv_covmat_prepared, diagonal_ba
     """
     Save the diagonal basis rotation.
     """
+    covmat, _, diagonal_rotation, eig_vals = _inv_covmat_prepared
+
     if not diagonal_basis:
+
         return None
 
-    _, _, diagonal_rotation, eig_vals = _inv_covmat_prepared
     if diagonal_rotation is None or eig_vals is None:
         log.warning("No diagonal-basis rotation available; skipping table export.")
         return None

From ba59edfe5e04c7ab7b341be5d6b2dd9554ed4fc4 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Mon, 27 Apr 2026 15:49:14 +0100
Subject: [PATCH 12/29] vp-setupfit stores covmat in diagonal and non-diagonal
 case

---
 n3fit/src/n3fit/scripts/vp_setupfit.py |  3 +-
 validphys2/src/validphys/n3fit_data.py | 51 +++++++++++++++++---------
 2 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 10fe7a691d..6c54139653 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -170,8 +170,7 @@ def from_yaml(cls, o, *args, **kwargs):
             check_n3fit_action = 'datacuts::theory::closuretest::fitting n3fit_checks_action'
         else:
             filter_action = 'datacuts::theory::fitting filter'
-            # TODO: correct namespace?, the rotation needs access to the theory covmat but it's not yet available..
-            rotation_action = 'datacuts::theory::theorycovmatconfig diagonal_basis_rotation_table'
+            rotation_action = 'datacuts::theory::theorycovmatconfig fitting_covmat_table'
             check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action'
 
         # The settings for these actions depend on the presence of closuretest
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 4492e245f9..acb4b40ff5 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -378,6 +378,7 @@ def fitting_data_dict(
     fittable_datasets_masked,
     output_path,
     threshold=0.0,
+    diagonal_basis=True,
 ):
     """
     Provider which takes  the information from validphys ``data``.
@@ -427,16 +428,33 @@ def fitting_data_dict(
     fittable_datasets = fittable_datasets_masked
 
     # load covmat stored at the time of vp-setupfit
-    diagonal_basis_saved = "datacuts_theory_theorycovmatconfig_diagonal_basis_rotation_table.csv"
-    path_diagonal_basis = output_path / "tables" / diagonal_basis_saved
-    eigensystem = pd.read_csv(
-        path_diagonal_basis, index_col=[0], header=[0], sep="\t|,", engine="python"
-    )
-    diag_rot = eigensystem.iloc[:, 1:].values
-    eig_vals = eigensystem["eig_val"].values
-    inv_true = np.diag(1 / eig_vals)
-    covmat = np.diag(eig_vals)
-    # covmat, inv_true, diag_rot, eig_vals = _inv_covmat_prepared
+    if diagonal_basis:
+        diagonal_basis_saved = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
+        path_diagonal_basis = output_path / "tables" / diagonal_basis_saved
+        eigensystem = pd.read_csv(
+            path_diagonal_basis, index_col=[0], header=[0], sep="\t|,", engine="python"
+        )
+        diag_rot = eigensystem.iloc[:, 1:].values
+        eig_vals = eigensystem["eig_val"].values
+        inv_true = np.diag(1 / eig_vals)
+        covmat = np.diag(eig_vals)
+    else:
+        covmat_saved = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
+        path_covmat = output_path / "tables" / covmat_saved
+        # TODO: check if it is save to convert to numpy already at this stage, is the indexing consistent?
+        covmat = pd.read_csv(
+            path_covmat, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python"
+        ).values
+        diag_rot = None
+        eig_vals = None
+        # TODO: cache this so that is reused
+        diag_inv_sqrt_total = 1 / np.sqrt(np.diag(covmat))
+        cormat_total = np.einsum("i, ij, j -> ij", diag_inv_sqrt_total, covmat, diag_inv_sqrt_total)
+        inv_true = (
+            np.diag(diag_inv_sqrt_total)
+            @ np.linalg.inv(cormat_total)
+            @ np.diag(diag_inv_sqrt_total)
+        )
 
     # get the masks - different for each replica so fine to call here
     tr_mask, vl_mask = masks.tr_masks[0], masks.vl_masks[0]
@@ -564,19 +582,16 @@ def fitting_data_dict(
 
 
 @table
-def diagonal_basis_rotation_table(output_path, _inv_covmat_prepared, diagonal_basis=True):
+def fitting_covmat_table(output_path, _inv_covmat_prepared, diagonal_basis=True):
     """
-    Save the diagonal basis rotation.
+    Stores the fitting covariance matrix if diagonal_basis is False, else store the rotation matrix and eigenvalues
     """
+    # TODO: JtH, check if this includes the theory covmat
     covmat, _, diagonal_rotation, eig_vals = _inv_covmat_prepared
 
     if not diagonal_basis:
-
-        return None
-
-    if diagonal_rotation is None or eig_vals is None:
-        log.warning("No diagonal-basis rotation available; skipping table export.")
-        return None
+        log.info("Saving fitting covmat")
+        return covmat
 
     df_rotation = pd.DataFrame(diagonal_rotation)
     df_rotation.insert(0, "eig_val", eig_vals)

From b987466ec07a79037371e77c051c3925a6a7ebb1 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Mon, 27 Apr 2026 16:47:55 +0100
Subject: [PATCH 13/29] caching inverse covmat in non diagonal basis

---
 validphys2/src/validphys/config.py     | 16 ++++++++
 validphys2/src/validphys/n3fit_data.py | 57 ++++++++++++--------------
 2 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 299a6f5093..ff7404e2b5 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -871,6 +871,22 @@ def produce_dataset_inputs_sampling_covmat(
                 else:
                     return covmats.dataset_inputs_exp_covmat
 
+    def produce_loaded_fit_covmat(self, output_path, data_input):
+        """
+        Loads the theory covmat from the correct file according to how it
+        was generated by vp-setupfit.
+        """
+
+        generic_path = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
+        fit_covmat_path = output_path / "tables" / generic_path
+        fit_covmat = pd.read_csv(
+            fit_covmat_path, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python"
+        ).fillna(0)
+        # change ordering according to exp_covmat (so according to runcard order)
+        tmp = fit_covmat.droplevel(0, axis=0).droplevel(0, axis=1)
+        bb = [str(i) for i in data_input]
+        return tmp.reindex(index=bb, columns=bb, level=0).values
+
     def produce_loaded_theory_covmat(
         self,
         output_path,
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index acb4b40ff5..ec21cdfd90 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -313,23 +313,36 @@ def fittable_datasets_masked(data):
     return validphys_group_extractor(data.datasets)
 
 
-# def _hashed_dataset_inputs_fitting_covmat(dataset_inputs_fitting_covmat) -> Hashrray:
-#     """Wrap the covmat into a Hashrray for caches to work"""
-#     return Hashrray(dataset_inputs_fitting_covmat)
+def _hashed_dataset_inputs_fitting_covmat(loaded_fit_covmat) -> Hashrray:
+    """Wrap the covmat into a Hashrray for caches to work"""
+    return Hashrray(loaded_fit_covmat)
 
 
-# @functools.lru_cache
-def _inv_covmat_prepared(dataset_inputs_fitting_covmat, nnfit_theory_covmat, diagonal_basis=True):
-    """Returns the inverse covmats for training, validation and total
+@functools.lru_cache
+def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat):
+    """Returns the inverse covmats for training, validation and total when diagonal_basis = False"""
+    log.info(
+        f"_inv_covmat_prepared called with covmat hash={hash(_hashed_dataset_inputs_fitting_covmat)}"
+    )
+    covmat = _hashed_dataset_inputs_fitting_covmat.array
+
+    diag_inv_sqrt_total = 1 / np.sqrt(np.diag(covmat))
+    cormat_total = np.einsum("i, ij, j -> ij", diag_inv_sqrt_total, covmat, diag_inv_sqrt_total)
+    inv_total = (
+        np.diag(diag_inv_sqrt_total) @ np.linalg.inv(cormat_total) @ np.diag(diag_inv_sqrt_total)
+    )
+
+    return inv_total
+
+
+def _covmat_prepared(dataset_inputs_fitting_covmat, nnfit_theory_covmat, diagonal_basis=True):
+    """Returns the covmats for training, validation and total
         attending to the right masks and whether it is diagonal or not.
     s
         Since the masks and number of datapoints need to be treated for 1-point datasets
         it also returns the right ndata and masks for training and validation:
 
     """
-    # log.info(
-    #     f"_inv_covmat_prepared called with covmat hash={hash(_hashed_dataset_inputs_fitting_covmat)}, diagonal_basis={diagonal_basis}"
-    # )
 
     # TODO: JtH, note to self: inv_covmat_prepared can no longer be called during n3fit because nnfit_theory_covmat is in a different namespace
 
@@ -356,17 +369,7 @@ def _inv_covmat_prepared(dataset_inputs_fitting_covmat, nnfit_theory_covmat, dia
 
         inv_total = np.diag(1 / eig_vals)
 
-    else:
-
-        diag_inv_sqrt_total = 1 / np.sqrt(np.diag(covmat))
-        cormat_total = np.einsum("i, ij, j -> ij", diag_inv_sqrt_total, covmat, diag_inv_sqrt_total)
-        inv_total = (
-            np.diag(diag_inv_sqrt_total)
-            @ np.linalg.inv(cormat_total)
-            @ np.diag(diag_inv_sqrt_total)
-        )
-
-    return (covmat, inv_total, diagonal_rotation, eig_vals)
+    return covmat, diagonal_rotation, eig_vals
 
 
 def fitting_data_dict(
@@ -374,6 +377,7 @@ def fitting_data_dict(
     make_replica,
     dataset_inputs_loaded_cd_with_cuts,
     masks,
+    _inv_covmat_prepared,
     kfold_masks,
     fittable_datasets_masked,
     output_path,
@@ -447,14 +451,7 @@ def fitting_data_dict(
         ).values
         diag_rot = None
         eig_vals = None
-        # TODO: cache this so that is reused
-        diag_inv_sqrt_total = 1 / np.sqrt(np.diag(covmat))
-        cormat_total = np.einsum("i, ij, j -> ij", diag_inv_sqrt_total, covmat, diag_inv_sqrt_total)
-        inv_true = (
-            np.diag(diag_inv_sqrt_total)
-            @ np.linalg.inv(cormat_total)
-            @ np.diag(diag_inv_sqrt_total)
-        )
+        inv_true = _inv_covmat_prepared
 
     # get the masks - different for each replica so fine to call here
     tr_mask, vl_mask = masks.tr_masks[0], masks.vl_masks[0]
@@ -582,12 +579,12 @@ def fitting_data_dict(
 
 
 @table
-def fitting_covmat_table(output_path, _inv_covmat_prepared, diagonal_basis=True):
+def fitting_covmat_table(output_path, _covmat_prepared, diagonal_basis=True):
     """
     Stores the fitting covariance matrix if diagonal_basis is False, else store the rotation matrix and eigenvalues
     """
     # TODO: JtH, check if this includes the theory covmat
-    covmat, _, diagonal_rotation, eig_vals = _inv_covmat_prepared
+    covmat, diagonal_rotation, eig_vals = _covmat_prepared
 
     if not diagonal_basis:
         log.info("Saving fitting covmat")

From c55f697eab1be79bbd4b3a3ed9b6a173e21c52a0 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Mon, 27 Apr 2026 22:32:58 +0100
Subject: [PATCH 14/29] cleaning

---
 validphys2/src/validphys/config.py     | 24 ++++---
 validphys2/src/validphys/n3fit_data.py | 88 ++++++++++++++------------
 2 files changed, 65 insertions(+), 47 deletions(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index ff7404e2b5..9210433255 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -8,6 +8,7 @@
 import numbers
 import pathlib
 
+import numpy as np
 import pandas as pd
 
 from nnpdf_data import legacy_to_new_map
@@ -871,7 +872,7 @@ def produce_dataset_inputs_sampling_covmat(
                 else:
                     return covmats.dataset_inputs_exp_covmat
 
-    def produce_loaded_fit_covmat(self, output_path, data_input):
+    def produce_loaded_fit_covmat(self, output_path, data_input, diagonal_basis=True):
         """
         Loads the theory covmat from the correct file according to how it
         was generated by vp-setupfit.
@@ -879,13 +880,20 @@ def produce_loaded_fit_covmat(self, output_path, data_input):
 
         generic_path = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
         fit_covmat_path = output_path / "tables" / generic_path
-        fit_covmat = pd.read_csv(
-            fit_covmat_path, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python"
-        ).fillna(0)
-        # change ordering according to exp_covmat (so according to runcard order)
-        tmp = fit_covmat.droplevel(0, axis=0).droplevel(0, axis=1)
-        bb = [str(i) for i in data_input]
-        return tmp.reindex(index=bb, columns=bb, level=0).values
+        if not diagonal_basis:
+            fit_covmat = pd.read_csv(
+                fit_covmat_path, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python"
+            ).fillna(0)
+            # change ordering according to exp_covmat (so according to runcard order)
+            tmp = fit_covmat.droplevel(0, axis=0).droplevel(0, axis=1)
+            bb = [str(i) for i in data_input]
+            return tmp.reindex(index=bb, columns=bb, level=0).values
+        else:
+            eigensystem = pd.read_csv(
+                fit_covmat_path, index_col=[0], header=[0], sep="\t|,", engine="python"
+            )
+
+            return np.diag(eigensystem["eig_val"].values)
 
     def produce_loaded_theory_covmat(
         self,
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index ec21cdfd90..232e68355e 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -319,29 +319,62 @@ def _hashed_dataset_inputs_fitting_covmat(loaded_fit_covmat) -> Hashrray:
 
 
 @functools.lru_cache
-def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat):
+def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat, output_path, diagonal_basis=True):
     """Returns the inverse covmats for training, validation and total when diagonal_basis = False"""
     log.info(
         f"_inv_covmat_prepared called with covmat hash={hash(_hashed_dataset_inputs_fitting_covmat)}"
     )
     covmat = _hashed_dataset_inputs_fitting_covmat.array
+    diag_rot = None
+    eig_vals = None
 
-    diag_inv_sqrt_total = 1 / np.sqrt(np.diag(covmat))
-    cormat_total = np.einsum("i, ij, j -> ij", diag_inv_sqrt_total, covmat, diag_inv_sqrt_total)
-    inv_total = (
-        np.diag(diag_inv_sqrt_total) @ np.linalg.inv(cormat_total) @ np.diag(diag_inv_sqrt_total)
-    )
+    if diagonal_basis:
+        diagonal_basis_saved = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
+        path_diagonal_basis = output_path / "tables" / diagonal_basis_saved
+        eigensystem = pd.read_csv(
+            path_diagonal_basis, index_col=[0], header=[0], sep="\t|,", engine="python"
+        )
+        diag_rot = eigensystem.iloc[:, 1:].values
+        eig_vals = eigensystem["eig_val"].values
+        inv_total = np.diag(1 / eig_vals)
+
+    else:
+        diag_inv_sqrt_total = 1 / np.sqrt(np.diag(covmat))
+        cormat_total = np.einsum("i, ij, j -> ij", diag_inv_sqrt_total, covmat, diag_inv_sqrt_total)
+        inv_total = (
+            np.diag(diag_inv_sqrt_total)
+            @ np.linalg.inv(cormat_total)
+            @ np.diag(diag_inv_sqrt_total)
+        )
 
-    return inv_total
+    return covmat, inv_total, diag_rot, eig_vals
 
 
-def _covmat_prepared(dataset_inputs_fitting_covmat, nnfit_theory_covmat, diagonal_basis=True):
-    """Returns the covmats for training, validation and total
-        attending to the right masks and whether it is diagonal or not.
-    s
-        Since the masks and number of datapoints need to be treated for 1-point datasets
-        it also returns the right ndata and masks for training and validation:
+def _fiting_covmat(dataset_inputs_fitting_covmat, nnfit_theory_covmat, diagonal_basis=True):
+    """Prepare the fitting covariance matrix by optionally adding theory contributions
+    and transforming to diagonal basis.
 
+    Parameters
+    ----------
+    dataset_inputs_fitting_covmat : np.ndarray
+        The experimental covariance matrix from the datasets.
+    nnfit_theory_covmat : np.ndarray or None
+        The theory covariance matrix to add to the experimental covmat.
+        If None, only the experimental covmat is used.
+    diagonal_basis : bool, optional
+        If True, transform the covariance matrix to diagonal basis by extracting
+        eigenvalues and eigenvectors of the correlation matrix. Default is True.
+
+    Returns
+    -------
+    covmat : np.ndarray
+        The prepared covariance matrix (sum of experimental and theory covmats).
+    diagonal_rotation : np.ndarray or None
+        The rotation matrix (transposed eigenvectors) to transform data to diagonal basis.
+        Only returned if diagonal_basis=True, otherwise None.
+    eig_vals : np.ndarray or None
+        The eigenvalues of the correlation matrix in diagonal basis.
+        Only returned if diagonal_basis=True, otherwise None.
     """
 
     # TODO: JtH, note to self: inv_covmat_prepared can no longer be called during n3fit because nnfit_theory_covmat is in a different namespace
@@ -365,10 +398,6 @@ def _covmat_prepared(dataset_inputs_fitting_covmat, nnfit_theory_covmat, diagona
         uT = np.einsum("i, ik -> ik", diag_inv_sqrt, uT)
         diagonal_rotation = uT.T
 
-        ndata = len(eig_vals)
-
-        inv_total = np.diag(1 / eig_vals)
-
     return covmat, diagonal_rotation, eig_vals
 
 
@@ -432,26 +461,7 @@ def fitting_data_dict(
     fittable_datasets = fittable_datasets_masked
 
     # load covmat stored at the time of vp-setupfit
-    if diagonal_basis:
-        diagonal_basis_saved = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
-        path_diagonal_basis = output_path / "tables" / diagonal_basis_saved
-        eigensystem = pd.read_csv(
-            path_diagonal_basis, index_col=[0], header=[0], sep="\t|,", engine="python"
-        )
-        diag_rot = eigensystem.iloc[:, 1:].values
-        eig_vals = eigensystem["eig_val"].values
-        inv_true = np.diag(1 / eig_vals)
-        covmat = np.diag(eig_vals)
-    else:
-        covmat_saved = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
-        path_covmat = output_path / "tables" / covmat_saved
-        # TODO: check if it is save to convert to numpy already at this stage, is the indexing consistent?
-        covmat = pd.read_csv(
-            path_covmat, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python"
-        ).values
-        diag_rot = None
-        eig_vals = None
-        inv_true = _inv_covmat_prepared
+    covmat, inv_true, diag_rot, eig_vals = _inv_covmat_prepared
 
     # get the masks - different for each replica so fine to call here
     tr_mask, vl_mask = masks.tr_masks[0], masks.vl_masks[0]
@@ -579,12 +589,12 @@ def fitting_data_dict(
 
 
 @table
-def fitting_covmat_table(output_path, _covmat_prepared, diagonal_basis=True):
+def fitting_covmat_table(output_path, _fiting_covmat, diagonal_basis=True):
     """
     Stores the fitting covariance matrix if diagonal_basis is False, else store the rotation matrix and eigenvalues
     """
     # TODO: JtH, check if this includes the theory covmat
-    covmat, diagonal_rotation, eig_vals = _covmat_prepared
+    covmat, diagonal_rotation, eig_vals = _fiting_covmat
 
     if not diagonal_basis:
         log.info("Saving fitting covmat")

From df49a5bf21392ea42aef901f2cd3f688921702e4 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Wed, 29 Apr 2026 20:49:40 +0100
Subject: [PATCH 15/29] fixing covmat by reference issue

---
 n3fit/src/n3fit/scripts/vp_setupfit.py |  4 +---
 validphys2/src/validphys/config.py     | 11 +++++++----
 validphys2/src/validphys/covmats.py    | 12 ++++++++++--
 validphys2/src/validphys/n3fit_data.py |  8 +-------
 4 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 6c54139653..987a2de368 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -174,7 +174,7 @@ def from_yaml(cls, o, *args, **kwargs):
             check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action'
 
         # The settings for these actions depend on the presence of closuretest
-        SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action]
+        SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action, rotation_action]
 
         # Check theory covariance matrix configuration
         thconfig = file_content.get('theorycovmatconfig', {})
@@ -190,8 +190,6 @@ def from_yaml(cls, o, *args, **kwargs):
                 'datacuts::theory::theorycovmatconfig nnfit_theory_covmat'
             )
 
-        SETUPFIT_FIXED_CONFIG['actions_'] += [rotation_action]
-
         # Check fiatlux configuration
         fiatlux = file_content.get('fiatlux')
         if fiatlux is not None:
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 9210433255..a967697314 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -806,16 +806,18 @@ def produce_experiment_from_input(self, experiment_input, theoryid, use_cuts, fi
         }
 
     @configparser.explicit_node
-    def produce_dataset_inputs_fitting_covmat(self):
+    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=False):
         """
         Produces the correct covmat to be used in fitting_data_dict according
         to some options: whether to include the theory covmat, whether to
         separate the multiplcative errors and whether to compute the
         experimental covmat using the t0 prescription.
         """
-        # TODO update docstring
+
         from validphys import covmats
 
+        if use_thcovmat_in_fitting:
+            return covmats.dataset_inputs_t0_total_covmat
         return covmats.dataset_inputs_t0_exp_covmat
 
     def produce_sep_mult(self, separate_multiplicative=False):
@@ -901,8 +903,8 @@ def produce_loaded_theory_covmat(
         data_input,
         user_covmat_path=None,
         point_prescriptions=None,
-        use_thcovmat_in_sampling=True,
-        use_thcovmat_in_fitting=True,
+        use_thcovmat_in_sampling=False,
+        use_thcovmat_in_fitting=False,
     ):
         """
         Loads the theory covmat from the correct file according to how it
@@ -1352,6 +1354,7 @@ def produce_nnfit_theory_covmat(
         This function is only used in vp-setupfit to store the necessary covmats as .csv files in
         the tables directory.
         """
+
         if point_prescriptions is not None:
             if user_covmat_path is not None:
                 # Both scalevar and user uncertainties
diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py
index 195371ad5d..c8a06d9163 100644
--- a/validphys2/src/validphys/covmats.py
+++ b/validphys2/src/validphys/covmats.py
@@ -472,13 +472,21 @@ def dataset_inputs_exp_covmat_separate(
     return covmat
 
 
-def dataset_inputs_t0_total_covmat(dataset_inputs_t0_exp_covmat, loaded_theory_covmat):
+def dataset_inputs_t0_total_covmat(dataset_inputs_t0_exp_covmat, nnfit_theory_covmat, data):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2
     by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat
     and the multiplicative errors are included in it. Moreover, the theory covmat is added to experimental covmat.
     """
-    return dataset_inputs_t0_exp_covmat + loaded_theory_covmat
+    tmp = nnfit_theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1)
+    # old to new names mapping
+    new_names = {d[0]: legacy_to_new_map(d[0])[0] for d in tmp.index}
+    tmp = tmp.rename(columns=new_names, index=new_names, level=0)
+    # reorder
+    bb = [str(i) for i in data]
+    theory_covmat = tmp.reindex(index=bb, columns=bb, level=0).values
+
+    return dataset_inputs_t0_exp_covmat + theory_covmat
 
 
 def dataset_inputs_t0_exp_covmat(
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 232e68355e..f6e43f3337 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -350,7 +350,7 @@ def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat, output_path, dia
     return covmat, inv_total, diag_rot, eig_vals
 
 
-def _fiting_covmat(dataset_inputs_fitting_covmat, nnfit_theory_covmat, diagonal_basis=True):
+def _fiting_covmat(dataset_inputs_fitting_covmat, diagonal_basis=True):
     """Prepare the fitting covariance matrix by optionally adding theory contributions
     and transforming to diagonal basis.
 
@@ -377,15 +377,9 @@ def _fiting_covmat(dataset_inputs_fitting_covmat, nnfit_theory_covmat, diagonal_
         Only returned if diagonal_basis=True, otherwise None.
     """
 
-    # TODO: JtH, note to self: inv_covmat_prepared can no longer be called during n3fit because nnfit_theory_covmat is in a different namespace
-
     covmat = dataset_inputs_fitting_covmat
-    if nnfit_theory_covmat is not None:
-        covmat += nnfit_theory_covmat
-
     diagonal_rotation = None
     eig_vals = None
-
     if diagonal_basis:
         log.info("working in diagonal basis.")
 

From 300963865fedb66dd34c48cc17debed272b8b5ab Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:42:01 +0100
Subject: [PATCH 16/29] n3fit runs again

---
 validphys2/src/validphys/config.py     | 60 ++++++++++++++------------
 validphys2/src/validphys/covmats.py    | 27 +++++-------
 validphys2/src/validphys/n3fit_data.py | 12 +++---
 validphys2/src/validphys/pseudodata.py |  7 ++-
 4 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index a967697314..a8027dce62 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -806,7 +806,7 @@ def produce_experiment_from_input(self, experiment_input, theoryid, use_cuts, fi
         }
 
     @configparser.explicit_node
-    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=False):
+    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=True):
         """
         Produces the correct covmat to be used in fitting_data_dict according
         to some options: whether to include the theory covmat, whether to
@@ -827,7 +827,11 @@ def produce_sep_mult(self, separate_multiplicative=False):
 
     @configparser.explicit_node
     def produce_dataset_inputs_sampling_covmat(
-        self, sep_mult=False, use_thcovmat_in_sampling=False, use_t0_sampling=True
+        self,
+        loaded_fit_covmat,
+        sep_mult=False,
+        use_thcovmat_in_sampling=False,
+        use_t0_sampling=True,
     ):
         """
         Produces the correct MC replica method sampling covmat to be used in
@@ -850,36 +854,38 @@ def produce_dataset_inputs_sampling_covmat(
         """
         from validphys import covmats
 
-        if use_t0_sampling:
-            if use_thcovmat_in_sampling:
-                if sep_mult:
-                    return covmats.dataset_inputs_t0_total_covmat_separate
-                else:
-                    return covmats.dataset_inputs_t0_total_covmat
-            else:
-                if sep_mult:
-                    return covmats.dataset_inputs_t0_exp_covmat_separate
-                else:
-                    return covmats.dataset_inputs_t0_exp_covmat
-
-        else:
-            if use_thcovmat_in_sampling:
-                if sep_mult:
-                    return covmats.dataset_inputs_total_covmat_separate
-                else:
-                    return covmats.dataset_inputs_total_covmat
-            else:
-                if sep_mult:
-                    return covmats.dataset_inputs_exp_covmat_separate
-                else:
-                    return covmats.dataset_inputs_exp_covmat
+        return loaded_fit_covmat
+
+        # if use_t0_sampling:
+        #     if use_thcovmat_in_sampling:
+        #         if sep_mult:
+        #             return covmats.dataset_inputs_t0_total_covmat_separate
+        #         else:
+        #             return covmats.dataset_inputs_t0_total_covmat
+        #     else:
+        #         if sep_mult:
+        #             return covmats.dataset_inputs_t0_exp_covmat_separate
+        #         else:
+        #             return covmats.dataset_inputs_t0_exp_covmat
+        #
+        # else:
+        #     if use_thcovmat_in_sampling:
+        #         if sep_mult:
+        #             return covmats.dataset_inputs_total_covmat_separate
+        #         else:
+        #             return covmats.dataset_inputs_total_covmat
+        #     else:
+        #         if sep_mult:
+        #             return covmats.dataset_inputs_exp_covmat_separate
+        #         else:
+        #             return covmats.dataset_inputs_exp_covmat
 
     def produce_loaded_fit_covmat(self, output_path, data_input, diagonal_basis=True):
         """
-        Loads the theory covmat from the correct file according to how it
-        was generated by vp-setupfit.
+        Loads the fit covmat (including the theory covmat if present) as saved during vp-setupfit
         """
 
+        # the csv includes both the experimental and theory covmat (if requested in the runcard)
         generic_path = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
         fit_covmat_path = output_path / "tables" / generic_path
         if not diagonal_basis:
diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py
index c8a06d9163..fdbbc7315a 100644
--- a/validphys2/src/validphys/covmats.py
+++ b/validphys2/src/validphys/covmats.py
@@ -225,6 +225,7 @@ def dataset_inputs_covmat_from_systematics(
         covmat = (covmat / sqrt_weights).T / sqrt_weights
     if norm_threshold is not None:
         covmat = regularize_covmat(covmat, norm_threshold=norm_threshold)
+
     return covmat
 
 
@@ -406,14 +407,14 @@ def dataset_inputs_t0_covmat_from_systematics(
 
 
 def dataset_inputs_t0_total_covmat_separate(
-    dataset_inputs_t0_exp_covmat_separate, loaded_theory_covmat
+    dataset_inputs_t0_exp_covmat_separate, nnfit_theory_covmat
 ):
     """
     Function to compute the covmat to be used for the sampling by make_replica.
     In this case the t0 prescription is used for the experimental covmat and the multiplicative
     errors are separated. Moreover, the theory covmat is added to experimental covmat.
     """
-    return dataset_inputs_t0_exp_covmat_separate + loaded_theory_covmat
+    return dataset_inputs_t0_exp_covmat_separate + nnfit_theory_covmat
 
 
 def dataset_inputs_t0_exp_covmat_separate(
@@ -440,13 +441,13 @@ def dataset_inputs_t0_exp_covmat_separate(
     return covmat
 
 
-def dataset_inputs_total_covmat_separate(dataset_inputs_exp_covmat_separate, loaded_theory_covmat):
+def dataset_inputs_total_covmat_separate(dataset_inputs_exp_covmat_separate, nnfit_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica.
     In this case the t0 prescription is not used for the experimental covmat and the multiplicative
     errors are separated. Moreover, the theory covmat is added to experimental covmat.
     """
-    return dataset_inputs_exp_covmat_separate + loaded_theory_covmat
+    return dataset_inputs_exp_covmat_separate + nnfit_theory_covmat
 
 
 def dataset_inputs_exp_covmat_separate(
@@ -472,21 +473,14 @@ def dataset_inputs_exp_covmat_separate(
     return covmat
 
 
-def dataset_inputs_t0_total_covmat(dataset_inputs_t0_exp_covmat, nnfit_theory_covmat, data):
+def dataset_inputs_t0_total_covmat(dataset_inputs_t0_exp_covmat, nnfit_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2
     by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat
     and the multiplicative errors are included in it. Moreover, the theory covmat is added to experimental covmat.
     """
-    tmp = nnfit_theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1)
-    # old to new names mapping
-    new_names = {d[0]: legacy_to_new_map(d[0])[0] for d in tmp.index}
-    tmp = tmp.rename(columns=new_names, index=new_names, level=0)
-    # reorder
-    bb = [str(i) for i in data]
-    theory_covmat = tmp.reindex(index=bb, columns=bb, level=0).values
 
-    return dataset_inputs_t0_exp_covmat + theory_covmat
+    return dataset_inputs_t0_exp_covmat + nnfit_theory_covmat
 
 
 def dataset_inputs_t0_exp_covmat(
@@ -496,6 +490,7 @@ def dataset_inputs_t0_exp_covmat(
     use_weights_in_covmat=True,
     norm_threshold=None,
     dataset_inputs_t0_predictions,
+    data_index,
 ):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2
@@ -510,16 +505,16 @@ def dataset_inputs_t0_exp_covmat(
         dataset_inputs_t0_predictions,
         False,
     )
-    return covmat
+    return pd.DataFrame(covmat, index=data_index, columns=data_index)
 
 
-def dataset_inputs_total_covmat(dataset_inputs_exp_covmat, loaded_theory_covmat):
+def dataset_inputs_total_covmat(dataset_inputs_exp_covmat, nnfit_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2
     by fitting_data_dict. In this case the t0 prescription is not used for the experimental covmat
     and the multiplicative errors are included in it. Moreover, the theory covmat is added to experimental covmat.
     """
-    return dataset_inputs_exp_covmat + loaded_theory_covmat
+    return dataset_inputs_exp_covmat + nnfit_theory_covmat
 
 
 def dataset_inputs_exp_covmat(
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index f6e43f3337..a1611c179a 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -171,12 +171,12 @@ def __init__(self, group_name, seed, tr_masks, vl_masks):
         super().__init__(group_name, seed)
 
 
-def diagonal_masks(data, replica_trvlseed, dataset_inputs_fitting_covmat, diagonal_frac=0.75):
+def diagonal_masks(data, replica_trvlseed, loaded_fit_covmat, diagonal_frac=0.75):
 
     nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10**8
     nameseed += replica_trvlseed
     rng = np.random.Generator(np.random.PCG64(nameseed))
-    ndata = len(dataset_inputs_fitting_covmat)
+    ndata = len(loaded_fit_covmat)
 
     # construct training mask by selecting a fraction of the eigenvalues
     trmax = int(ndata * diagonal_frac)
@@ -350,7 +350,9 @@ def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat, output_path, dia
     return covmat, inv_total, diag_rot, eig_vals
 
 
-def _fiting_covmat(dataset_inputs_fitting_covmat, diagonal_basis=True):
+def _fiting_covmat(
+    dataset_inputs_fitting_covmat, nnfit_theory_covmat, data_input, diagonal_basis=True
+):
     """Prepare the fitting covariance matrix by optionally adding theory contributions
     and transforming to diagonal basis.
 
@@ -380,6 +382,7 @@ def _fiting_covmat(dataset_inputs_fitting_covmat, diagonal_basis=True):
     covmat = dataset_inputs_fitting_covmat
     diagonal_rotation = None
     eig_vals = None
+
     if diagonal_basis:
         log.info("working in diagonal basis.")
 
@@ -583,11 +586,10 @@ def fitting_data_dict(
 
 
 @table
-def fitting_covmat_table(output_path, _fiting_covmat, diagonal_basis=True):
+def fitting_covmat_table(output_path, _fiting_covmat, data_index, diagonal_basis=True):
     """
     Stores the fitting covariance matrix if diagonal_basis is False, else store the rotation matrix and eigenvalues
     """
-    # TODO: JtH, check if this includes the theory covmat
     covmat, diagonal_rotation, eig_vals = _fiting_covmat
 
     if not diagonal_basis:
diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py
index 8f1eca10e8..4870fa0afe 100644
--- a/validphys2/src/validphys/pseudodata.py
+++ b/validphys2/src/validphys/pseudodata.py
@@ -123,7 +123,7 @@ def read_replica_pseudodata(fit, context_index, replica):
 def make_replica(
     central_values_array,
     group_replica_mcseed,
-    dataset_inputs_sampling_covmat,
+    loaded_fit_covmat,
     group_multiplicative_errors=None,
     group_positivity_mask=None,
     sep_mult=False,
@@ -195,8 +195,7 @@ def make_replica(
     # Set random seed
     rng = np.random.default_rng(seed=group_replica_mcseed)
     # construct covmat
-    covmat = dataset_inputs_sampling_covmat
-    covmat_sqrt = sqrt_covmat(covmat)
+    covmat_sqrt = sqrt_covmat(loaded_fit_covmat)
 
     full_mask = (
         group_positivity_mask
@@ -224,7 +223,7 @@ def make_replica(
                 mult_shifts.append(mult_shift)
 
         # If sep_mult is true then the multiplicative shifts were not included in the covmat
-        shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1])
+        shifts = covmat_sqrt @ rng.normal(size=loaded_fit_covmat.shape[1])
         mult_part = 1.0
         if sep_mult:
             special_mult_errors = group_multiplicative_errors["special_mult"]

From 14c4b8ee5d67a01286e3679ddcfcb908af79e4dd Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Thu, 30 Apr 2026 17:01:26 +0100
Subject: [PATCH 17/29] extending number of headers

---
 validphys2/src/validphys/config.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index a8027dce62..79c14787c1 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -890,7 +890,11 @@ def produce_loaded_fit_covmat(self, output_path, data_input, diagonal_basis=True
         fit_covmat_path = output_path / "tables" / generic_path
         if not diagonal_basis:
             fit_covmat = pd.read_csv(
-                fit_covmat_path, index_col=[0, 1, 2], header=[0, 1, 2], sep="\t|,", engine="python"
+                fit_covmat_path,
+                index_col=[0, 1, 2, 3],
+                header=[0, 1, 2, 3],
+                sep="\t|,",
+                engine="python",
             ).fillna(0)
             # change ordering according to exp_covmat (so according to runcard order)
             tmp = fit_covmat.droplevel(0, axis=0).droplevel(0, axis=1)

From 2559764b14374a45d6bc347d594430298ae0ecdd Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Fri, 1 May 2026 08:52:11 +0100
Subject: [PATCH 18/29] passing covmat in data basis to make_replica for
 correct sampling

---
 validphys2/src/validphys/config.py     | 82 --------------------------
 validphys2/src/validphys/covmats.py    |  1 -
 validphys2/src/validphys/n3fit_data.py | 13 ++--
 validphys2/src/validphys/pseudodata.py |  6 +-
 4 files changed, 10 insertions(+), 92 deletions(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 79c14787c1..632b477b73 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -825,88 +825,6 @@ def produce_sep_mult(self, separate_multiplicative=False):
             return False
         return True
 
-    @configparser.explicit_node
-    def produce_dataset_inputs_sampling_covmat(
-        self,
-        loaded_fit_covmat,
-        sep_mult=False,
-        use_thcovmat_in_sampling=False,
-        use_t0_sampling=True,
-    ):
-        """
-        Produces the correct MC replica method sampling covmat to be used in
-        make_replica according to some options: whether to sample using a t0
-        covariance matrix, include the theory covmat and whether to
-        separate the multiplcative errors.
-
-        Parameters
-        ----------
-        sep_mult : bool, default=False
-            Whether to separate the multiplicative errors.
-        use_thcovmat_in_sampling : bool, default=False
-            Whether to include the theory covariance matrix.
-        use_t0_sampling : bool, default=True
-            Whether to sample using a t0 covariance matrix.
-
-        Returns
-        -------
-        Callable
-        """
-        from validphys import covmats
-
-        return loaded_fit_covmat
-
-        # if use_t0_sampling:
-        #     if use_thcovmat_in_sampling:
-        #         if sep_mult:
-        #             return covmats.dataset_inputs_t0_total_covmat_separate
-        #         else:
-        #             return covmats.dataset_inputs_t0_total_covmat
-        #     else:
-        #         if sep_mult:
-        #             return covmats.dataset_inputs_t0_exp_covmat_separate
-        #         else:
-        #             return covmats.dataset_inputs_t0_exp_covmat
-        #
-        # else:
-        #     if use_thcovmat_in_sampling:
-        #         if sep_mult:
-        #             return covmats.dataset_inputs_total_covmat_separate
-        #         else:
-        #             return covmats.dataset_inputs_total_covmat
-        #     else:
-        #         if sep_mult:
-        #             return covmats.dataset_inputs_exp_covmat_separate
-        #         else:
-        #             return covmats.dataset_inputs_exp_covmat
-
-    def produce_loaded_fit_covmat(self, output_path, data_input, diagonal_basis=True):
-        """
-        Loads the fit covmat (including the theory covmat if present) as saved during vp-setupfit
-        """
-
-        # the csv includes both the experimental and theory covmat (if requested in the runcard)
-        generic_path = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
-        fit_covmat_path = output_path / "tables" / generic_path
-        if not diagonal_basis:
-            fit_covmat = pd.read_csv(
-                fit_covmat_path,
-                index_col=[0, 1, 2, 3],
-                header=[0, 1, 2, 3],
-                sep="\t|,",
-                engine="python",
-            ).fillna(0)
-            # change ordering according to exp_covmat (so according to runcard order)
-            tmp = fit_covmat.droplevel(0, axis=0).droplevel(0, axis=1)
-            bb = [str(i) for i in data_input]
-            return tmp.reindex(index=bb, columns=bb, level=0).values
-        else:
-            eigensystem = pd.read_csv(
-                fit_covmat_path, index_col=[0], header=[0], sep="\t|,", engine="python"
-            )
-
-            return np.diag(eigensystem["eig_val"].values)
-
     def produce_loaded_theory_covmat(
         self,
         output_path,
diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py
index fdbbc7315a..4a802be215 100644
--- a/validphys2/src/validphys/covmats.py
+++ b/validphys2/src/validphys/covmats.py
@@ -479,7 +479,6 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_t0_exp_covmat, nnfit_theory_co
     by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat
     and the multiplicative errors are included in it. Moreover, the theory covmat is added to experimental covmat.
     """
-
     return dataset_inputs_t0_exp_covmat + nnfit_theory_covmat
 
 
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index a1611c179a..de04aaa43c 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -171,12 +171,12 @@ def __init__(self, group_name, seed, tr_masks, vl_masks):
         super().__init__(group_name, seed)
 
 
-def diagonal_masks(data, replica_trvlseed, loaded_fit_covmat, diagonal_frac=0.75):
+def diagonal_masks(data, replica_trvlseed, dataset_inputs_covmat_t0_considered, diagonal_frac=0.75):
 
     nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10**8
     nameseed += replica_trvlseed
     rng = np.random.Generator(np.random.PCG64(nameseed))
-    ndata = len(loaded_fit_covmat)
+    ndata = len(dataset_inputs_covmat_t0_considered)
 
     # construct training mask by selecting a fraction of the eigenvalues
     trmax = int(ndata * diagonal_frac)
@@ -313,9 +313,9 @@ def fittable_datasets_masked(data):
     return validphys_group_extractor(data.datasets)
 
 
-def _hashed_dataset_inputs_fitting_covmat(loaded_fit_covmat) -> Hashrray:
+def _hashed_dataset_inputs_fitting_covmat(dataset_inputs_covmat_t0_considered) -> Hashrray:
     """Wrap the covmat into a Hashrray for caches to work"""
-    return Hashrray(loaded_fit_covmat)
+    return Hashrray(dataset_inputs_covmat_t0_considered)
 
 
 @functools.lru_cache
@@ -324,7 +324,7 @@ def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat, output_path, dia
     log.info(
         f"_inv_covmat_prepared called with covmat hash={hash(_hashed_dataset_inputs_fitting_covmat)}"
     )
-    covmat = _hashed_dataset_inputs_fitting_covmat.array
+
     diag_rot = None
     eig_vals = None
 
@@ -336,9 +336,10 @@ def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat, output_path, dia
         )
         diag_rot = eigensystem.iloc[:, 1:].values
         eig_vals = eigensystem["eig_val"].values
+        covmat = np.diag(eig_vals)
         inv_total = np.diag(1 / eig_vals)
-
     else:
+        covmat = _hashed_dataset_inputs_fitting_covmat.array
         diag_inv_sqrt_total = 1 / np.sqrt(np.diag(covmat))
         cormat_total = np.einsum("i, ij, j -> ij", diag_inv_sqrt_total, covmat, diag_inv_sqrt_total)
         inv_total = (
diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py
index 4870fa0afe..79fd4a27c5 100644
--- a/validphys2/src/validphys/pseudodata.py
+++ b/validphys2/src/validphys/pseudodata.py
@@ -123,7 +123,7 @@ def read_replica_pseudodata(fit, context_index, replica):
 def make_replica(
     central_values_array,
     group_replica_mcseed,
-    loaded_fit_covmat,
+    dataset_inputs_covmat_t0_considered,
     group_multiplicative_errors=None,
     group_positivity_mask=None,
     sep_mult=False,
@@ -195,7 +195,7 @@ def make_replica(
     # Set random seed
     rng = np.random.default_rng(seed=group_replica_mcseed)
     # construct covmat
-    covmat_sqrt = sqrt_covmat(loaded_fit_covmat)
+    covmat_sqrt = sqrt_covmat(dataset_inputs_covmat_t0_considered)
 
     full_mask = (
         group_positivity_mask
@@ -223,7 +223,7 @@ def make_replica(
                 mult_shifts.append(mult_shift)
 
         # If sep_mult is true then the multiplicative shifts were not included in the covmat
-        shifts = covmat_sqrt @ rng.normal(size=loaded_fit_covmat.shape[1])
+        shifts = covmat_sqrt @ rng.normal(size=dataset_inputs_covmat_t0_considered.shape[1])
         mult_part = 1.0
         if sep_mult:
             special_mult_errors = group_multiplicative_errors["special_mult"]

From 1e800c0d4a37108ecb2e73435d223397b14b69e1 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Fri, 1 May 2026 11:31:40 +0100
Subject: [PATCH 19/29] indexing by process group

---
 validphys2/src/validphys/covmats.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py
index 4a802be215..5443b00b0a 100644
--- a/validphys2/src/validphys/covmats.py
+++ b/validphys2/src/validphys/covmats.py
@@ -479,6 +479,7 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_t0_exp_covmat, nnfit_theory_co
     by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat
     and the multiplicative errors are included in it. Moreover, the theory covmat is added to experimental covmat.
     """
+
     return dataset_inputs_t0_exp_covmat + nnfit_theory_covmat
 
 
@@ -489,7 +490,7 @@ def dataset_inputs_t0_exp_covmat(
     use_weights_in_covmat=True,
     norm_threshold=None,
     dataset_inputs_t0_predictions,
-    data_index,
+    procs_index_matched,
 ):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2
@@ -504,7 +505,8 @@ def dataset_inputs_t0_exp_covmat(
         dataset_inputs_t0_predictions,
         False,
     )
-    return pd.DataFrame(covmat, index=data_index, columns=data_index)
+    # TODO: how to know for sure if the index matches the covmat value ordering?
+    return pd.DataFrame(covmat, index=procs_index_matched, columns=procs_index_matched)
 
 
 def dataset_inputs_total_covmat(dataset_inputs_exp_covmat, nnfit_theory_covmat):

From 7123c15a6fae660986e93ab819a7a3f0cbed243e Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Fri, 1 May 2026 11:48:15 +0100
Subject: [PATCH 20/29] make rotation action condition on presence of
 theorycovmatconfig in runcard

---
 n3fit/src/n3fit/scripts/vp_setupfit.py |  5 +++-
 validphys2/src/validphys/config.py     |  2 +-
 validphys2/src/validphys/n3fit_data.py | 34 ++++++++++++++++++++------
 3 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 987a2de368..5ffad64e60 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -170,7 +170,10 @@ def from_yaml(cls, o, *args, **kwargs):
             check_n3fit_action = 'datacuts::theory::closuretest::fitting n3fit_checks_action'
         else:
             filter_action = 'datacuts::theory::fitting filter'
-            rotation_action = 'datacuts::theory::theorycovmatconfig fitting_covmat_table'
+            if file_content.get('theorycovmatconfig') is not None:
+                rotation_action = 'datacuts::theory::theorycovmatconfig fitting_covmat_table'
+            else:
+                rotation_action = 'datacuts::theory fitting_covmat_table'
             check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action'
 
         # The settings for these actions depend on the presence of closuretest
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 632b477b73..858cfb024f 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -806,7 +806,7 @@ def produce_experiment_from_input(self, experiment_input, theoryid, use_cuts, fi
         }
 
     @configparser.explicit_node
-    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=True):
+    def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=False):
         """
         Produces the correct covmat to be used in fitting_data_dict according
         to some options: whether to include the theory covmat, whether to
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index de04aaa43c..74aecab899 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -320,7 +320,32 @@ def _hashed_dataset_inputs_fitting_covmat(dataset_inputs_covmat_t0_considered) -
 
 @functools.lru_cache
 def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat, output_path, diagonal_basis=True):
-    """Returns the inverse covmats for training, validation and total when diagonal_basis = False"""
+    """Prepare the covariance matrix and its inverse, optionally transforming to diagonal basis.
+
+    Parameters
+    ----------
+    _hashed_dataset_inputs_fitting_covmat : Hashrray
+        Wrapped covariance matrix for caching purposes.
+    output_path : Path
+        Path to output directory containing diagonal basis data if needed.
+    diagonal_basis : bool, optional
+        If True, load pre-computed diagonal basis from tables. If False, use the
+        covariance matrix directly and compute the inverse via correlation matrix
+        inversion. Default is True.
+
+    Returns
+    -------
+    covmat : np.ndarray
+        The covariance matrix (diagonal if diagonal_basis=True, full otherwise).
+    inv_total : np.ndarray
+        The inverse of the covariance matrix.
+    diag_rot : np.ndarray or None
+        The rotation matrix (eigenvectors transposed) for diagonal basis transformation.
+        Only returned when diagonal_basis=True, otherwise None.
+    eig_vals : np.ndarray or None
+        The eigenvalues of the correlation matrix. Only returned when
+        diagonal_basis=True, otherwise None.
+    """
     log.info(
         f"_inv_covmat_prepared called with covmat hash={hash(_hashed_dataset_inputs_fitting_covmat)}"
     )
@@ -351,9 +376,7 @@ def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat, output_path, dia
     return covmat, inv_total, diag_rot, eig_vals
 
 
-def _fiting_covmat(
-    dataset_inputs_fitting_covmat, nnfit_theory_covmat, data_input, diagonal_basis=True
-):
+def _fiting_covmat(dataset_inputs_fitting_covmat, data_input, diagonal_basis=True):
     """Prepare the fitting covariance matrix by optionally adding theory contributions
     and transforming to diagonal basis.
 
@@ -361,9 +384,6 @@ def _fiting_covmat(
     ----------
     dataset_inputs_fitting_covmat : np.ndarray
         The experimental covariance matrix from the datasets.
-    nnfit_theory_covmat : np.ndarray or None
-        The theory covariance matrix to add to the experimental covmat.
-        If None, only the experimental covmat is used.
     diagonal_basis : bool, optional
         If True, transform the covariance matrix to diagonal basis by extracting
         eigenvalues and eigenvectors of the correlation matrix. Default is True.

From 6b02b685528684f7751477e66e95cc7eaa9e66cc Mon Sep 17 00:00:00 2001
From: Jaco ter Hoeve <54140851+jacoterh@users.noreply.github.com>
Date: Fri, 1 May 2026 12:31:28 +0100
Subject: [PATCH 21/29] Update validphys2/src/validphys/config.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 validphys2/src/validphys/config.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 858cfb024f..d131e442e4 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -8,7 +8,6 @@
 import numbers
 import pathlib
 
-import numpy as np
 import pandas as pd
 
 from nnpdf_data import legacy_to_new_map

From 0597efe73df73f22a80d4ba7e2156ced55ccb07e Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Fri, 1 May 2026 13:08:23 +0100
Subject: [PATCH 22/29] making csv file name theorycovmat dependent + no longer
 modify global setupfitconfig

---
 n3fit/src/n3fit/scripts/vp_setupfit.py | 24 ++++++++++++++----------
 validphys2/src/validphys/n3fit_data.py | 12 ++++++++++--
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 5ffad64e60..0e4e83cd27 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -25,6 +25,7 @@
 # top.
 
 
+import copy
 import hashlib
 import logging
 import pathlib
@@ -142,6 +143,9 @@ class SetupFitConfig(Config):
 
     @classmethod
     def from_yaml(cls, o, *args, **kwargs):
+        # Create a fresh copy of the fixed config to avoid in-place modifications
+        fixed_config = copy.deepcopy(SETUPFIT_FIXED_CONFIG)
+
         try:
             file_content = yaml_safe.load(o)
         except error.YAMLError as e:
@@ -157,10 +161,10 @@ def from_yaml(cls, o, *args, **kwargs):
             # Use faketheoryid to create the L0 data to be stored into the filter folder
             # (L1 data is stored if fakedata is True)
             if 'faketheoryid' in closuredict:
-                # make sure theory key exists in SETUPFIT_FIXED_CONFIG
-                SETUPFIT_FIXED_CONFIG.setdefault('theory', {})
+                # make sure theory key exists in fixed_config
+                fixed_config.setdefault('theory', {})
                 # overwrite theoryid with the faketheoryid
-                SETUPFIT_FIXED_CONFIG['theory']['theoryid'] = closuredict['faketheoryid']
+                fixed_config['theory']['theoryid'] = closuredict['faketheoryid']
                 # download theoryid since it will be used in the fit
                 try:
                     loader.check_theoryID(file_content['theory']['theoryid'])
@@ -177,7 +181,7 @@ def from_yaml(cls, o, *args, **kwargs):
             check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action'
 
         # The settings for these actions depend on the presence of closuretest
-        SETUPFIT_FIXED_CONFIG['actions_'] += [check_n3fit_action, filter_action, rotation_action]
+        fixed_config['actions_'] += [check_n3fit_action, filter_action, rotation_action]
 
         # Check theory covariance matrix configuration
         thconfig = file_content.get('theorycovmatconfig', {})
@@ -189,7 +193,7 @@ def from_yaml(cls, o, *args, **kwargs):
                 "`point_prescriptions: ['9 point', '3 point']`"
             )
         if thconfig:
-            SETUPFIT_FIXED_CONFIG['actions_'].append(
+            fixed_config['actions_'].append(
                 'datacuts::theory::theorycovmatconfig nnfit_theory_covmat'
             )
 
@@ -219,14 +223,14 @@ def from_yaml(cls, o, *args, **kwargs):
             if compute_in_setupfit:
                 log.info("Forcing photon computation with FiatLux during setupfit.")
                 # Since the photon will be computed, check that the luxset and additional_errors exist
-                SETUPFIT_FIXED_CONFIG['actions_'].append('fiatlux check_luxset_exists')
+                fixed_config['actions_'].append('fiatlux check_luxset_exists')
                 if fiatlux.get("additional_errors"):
-                    SETUPFIT_FIXED_CONFIG['actions_'].append('fiatlux check_additional_errors')
-                SETUPFIT_FIXED_CONFIG['actions_'].append('fiatlux::theory compute_photon_to_disk')
+                    fixed_config['actions_'].append('fiatlux check_additional_errors')
+                fixed_config['actions_'].append('fiatlux::theory compute_photon_to_disk')
 
         # Check positivity bound
         if file_content.get('positivity_bound') is not None:
-            SETUPFIT_FIXED_CONFIG['actions_'].append('positivity_bound check_unpolarized_bc')
+            fixed_config['actions_'].append('positivity_bound check_unpolarized_bc')
 
         # Check hyperscan
         trials_config = file_content.get('trial_specs', {})
@@ -238,7 +242,7 @@ def from_yaml(cls, o, *args, **kwargs):
             file_content.setdefault(k, v)
 
         # Update file content with fixed configuration
-        file_content.update(SETUPFIT_FIXED_CONFIG)
+        file_content.update(fixed_config)
 
         return cls(file_content, *args, **kwargs)
 
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 74aecab899..94bf19785f 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -319,7 +319,12 @@ def _hashed_dataset_inputs_fitting_covmat(dataset_inputs_covmat_t0_considered) -
 
 
 @functools.lru_cache
-def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat, output_path, diagonal_basis=True):
+def _inv_covmat_prepared(
+    _hashed_dataset_inputs_fitting_covmat,
+    output_path,
+    use_thcovmat_in_fitting=False,
+    diagonal_basis=True,
+):
     """Prepare the covariance matrix and its inverse, optionally transforming to diagonal basis.
 
     Parameters
@@ -354,7 +359,10 @@ def _inv_covmat_prepared(_hashed_dataset_inputs_fitting_covmat, output_path, dia
     eig_vals = None
 
     if diagonal_basis:
-        diagonal_basis_saved = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
+        if use_thcovmat_in_fitting:
+            diagonal_basis_saved = "datacuts_theory_theorycovmatconfig_fitting_covmat_table.csv"
+        else:
+            diagonal_basis_saved = "datacuts_theory_fitting_covmat_table.csv"
         path_diagonal_basis = output_path / "tables" / diagonal_basis_saved
         eigensystem = pd.read_csv(
             path_diagonal_basis, index_col=[0], header=[0], sep="\t|,", engine="python"

From 47c21fe68f41d0015c96ac2fb2356e970d7632fd Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Thu, 14 May 2026 13:05:30 +0200
Subject: [PATCH 23/29] make_replica uses loaded theory covmat

---
 validphys2/src/validphys/config.py     | 50 ++++++++++++++++++++++++++
 validphys2/src/validphys/covmats.py    | 14 ++++++--
 validphys2/src/validphys/n3fit_data.py |  2 +-
 validphys2/src/validphys/pseudodata.py |  6 ++--
 4 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index d131e442e4..77b86a0d22 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -824,6 +824,56 @@ def produce_sep_mult(self, separate_multiplicative=False):
             return False
         return True
 
+    @configparser.explicit_node
+    def produce_dataset_inputs_sampling_covmat(
+        self, sep_mult=False, use_thcovmat_in_sampling=False, use_t0_sampling=True
+    ):
+        """
+        Produces the correct MC replica method sampling covmat to be used in
+        make_replica according to some options: whether to sample using a t0
+        covariance matrix, include the theory covmat and whether to
+        separate the multiplcative errors.
+
+        Parameters
+        ----------
+        sep_mult : bool, default=False
+            Whether to separate the multiplicative errors.
+        use_thcovmat_in_sampling : bool, default=False
+            Whether to include the theory covariance matrix.
+        use_t0_sampling : bool, default=True
+            Whether to sample using a t0 covariance matrix.
+
+        Returns
+        -------
+        Callable
+        """
+        from validphys import covmats
+
+        # TODO: JtH also add loaders for the remain covmats
+        if use_t0_sampling:
+            if use_thcovmat_in_sampling:
+                if sep_mult:
+                    return covmats.dataset_inputs_t0_total_covmat_separate
+                else:
+                    return covmats.dataset_load_inputs_t0_total_covmat
+            else:
+                if sep_mult:
+                    return covmats.dataset_inputs_t0_exp_covmat_separate
+                else:
+                    return covmats.dataset_inputs_t0_exp_covmat
+
+        else:
+            if use_thcovmat_in_sampling:
+                if sep_mult:
+                    return covmats.dataset_inputs_total_covmat_separate
+                else:
+                    return covmats.dataset_inputs_total_covmat
+            else:
+                if sep_mult:
+                    return covmats.dataset_inputs_exp_covmat_separate
+                else:
+                    return covmats.dataset_inputs_exp_covmat
+
     def produce_loaded_theory_covmat(
         self,
         output_path,
diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py
index 5443b00b0a..c3808cd5d5 100644
--- a/validphys2/src/validphys/covmats.py
+++ b/validphys2/src/validphys/covmats.py
@@ -483,6 +483,15 @@ def dataset_inputs_t0_total_covmat(dataset_inputs_t0_exp_covmat, nnfit_theory_co
     return dataset_inputs_t0_exp_covmat + nnfit_theory_covmat
 
 
+def dataset_load_inputs_t0_total_covmat(dataset_inputs_t0_exp_covmat, loaded_theory_covmat):
+    """
+    Function to compute the covmat to be used for the sampling by make_replica and for the chi2
+    by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat
+    and the multiplicative errors are included in it. Moreover, the theory covmat is added to experimental covmat.
+    """
+    return dataset_inputs_t0_exp_covmat + loaded_theory_covmat
+
+
 def dataset_inputs_t0_exp_covmat(
     dataset_inputs_loaded_cd_with_cuts,
     *,
@@ -490,7 +499,6 @@ def dataset_inputs_t0_exp_covmat(
     use_weights_in_covmat=True,
     norm_threshold=None,
     dataset_inputs_t0_predictions,
-    procs_index_matched,
 ):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2
@@ -505,8 +513,8 @@ def dataset_inputs_t0_exp_covmat(
         dataset_inputs_t0_predictions,
         False,
     )
-    # TODO: how to know for sure if the index matches the covmat value ordering?
-    return pd.DataFrame(covmat, index=procs_index_matched, columns=procs_index_matched)
+
+    return covmat
 
 
 def dataset_inputs_total_covmat(dataset_inputs_exp_covmat, nnfit_theory_covmat):
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 94bf19785f..b86f956331 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -623,7 +623,7 @@ def fitting_covmat_table(output_path, _fiting_covmat, data_index, diagonal_basis
 
     if not diagonal_basis:
         log.info("Saving fitting covmat")
-        return covmat
+        return pd.DataFrame(covmat, index=data_index, columns=data_index)
 
     df_rotation = pd.DataFrame(diagonal_rotation)
     df_rotation.insert(0, "eig_val", eig_vals)
diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py
index 79fd4a27c5..96126f9974 100644
--- a/validphys2/src/validphys/pseudodata.py
+++ b/validphys2/src/validphys/pseudodata.py
@@ -123,7 +123,7 @@ def read_replica_pseudodata(fit, context_index, replica):
 def make_replica(
     central_values_array,
     group_replica_mcseed,
-    dataset_inputs_covmat_t0_considered,
+    dataset_inputs_sampling_covmat,
     group_multiplicative_errors=None,
     group_positivity_mask=None,
     sep_mult=False,
@@ -195,7 +195,7 @@ def make_replica(
     # Set random seed
     rng = np.random.default_rng(seed=group_replica_mcseed)
     # construct covmat
-    covmat_sqrt = sqrt_covmat(dataset_inputs_covmat_t0_considered)
+    covmat_sqrt = sqrt_covmat(dataset_inputs_sampling_covmat)
 
     full_mask = (
         group_positivity_mask
@@ -223,7 +223,7 @@ def make_replica(
                 mult_shifts.append(mult_shift)
 
         # If sep_mult is true then the multiplicative shifts were not included in the covmat
-        shifts = covmat_sqrt @ rng.normal(size=dataset_inputs_covmat_t0_considered.shape[1])
+        shifts = covmat_sqrt @ rng.normal(size=dataset_inputs_sampling_covmat.shape[1])
         mult_part = 1.0
         if sep_mult:
             special_mult_errors = group_multiplicative_errors["special_mult"]

From da14aff455f819704fa38e83ce353cc580c6a0a0 Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Thu, 14 May 2026 13:12:32 +0200
Subject: [PATCH 24/29] adding loaded theory covmat variants

---
 validphys2/src/validphys/config.py  |  7 +++----
 validphys2/src/validphys/covmats.py | 31 +++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 77b86a0d22..d84ec11b6a 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -849,11 +849,10 @@ def produce_dataset_inputs_sampling_covmat(
         """
         from validphys import covmats
 
-        # TODO: JtH also add loaders for the remain covmats
         if use_t0_sampling:
             if use_thcovmat_in_sampling:
                 if sep_mult:
-                    return covmats.dataset_inputs_t0_total_covmat_separate
+                    return covmats.dataset_load_inputs_t0_total_covmat_separate
                 else:
                     return covmats.dataset_load_inputs_t0_total_covmat
             else:
@@ -865,9 +864,9 @@ def produce_dataset_inputs_sampling_covmat(
         else:
             if use_thcovmat_in_sampling:
                 if sep_mult:
-                    return covmats.dataset_inputs_total_covmat_separate
+                    return covmats.dataset_load_inputs_total_covmat_separate
                 else:
-                    return covmats.dataset_inputs_total_covmat
+                    return covmats.dataset_load_inputs_total_covmat
             else:
                 if sep_mult:
                     return covmats.dataset_inputs_exp_covmat_separate
diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py
index c3808cd5d5..7de450dcc0 100644
--- a/validphys2/src/validphys/covmats.py
+++ b/validphys2/src/validphys/covmats.py
@@ -406,6 +406,17 @@ def dataset_inputs_t0_covmat_from_systematics(
     )
 
 
+def dataset_load_inputs_t0_total_covmat_separate(
+    dataset_inputs_t0_exp_covmat_separate, loaded_theory_covmat
+):
+    """
+    Function to compute the covmat to be used for the sampling by make_replica.
+    In this case the t0 prescription is used for the experimental covmat and the multiplicative
+    errors are separated. Moreover, the theory covmat is added to experimental covmat.
+    """
+    return dataset_inputs_t0_exp_covmat_separate + loaded_theory_covmat
+
+
 def dataset_inputs_t0_total_covmat_separate(
     dataset_inputs_t0_exp_covmat_separate, nnfit_theory_covmat
 ):
@@ -441,6 +452,17 @@ def dataset_inputs_t0_exp_covmat_separate(
     return covmat
 
 
+def dataset_load_inputs_total_covmat_separate(
+    dataset_inputs_exp_covmat_separate, loaded_theory_covmat
+):
+    """
+    Function to compute the covmat to be used for the sampling by make_replica.
+    In this case the t0 prescription is not used for the experimental covmat and the multiplicative
+    errors are separated. Moreover, the theory covmat is added to experimental covmat.
+    """
+    return dataset_inputs_exp_covmat_separate + loaded_theory_covmat
+
+
 def dataset_inputs_total_covmat_separate(dataset_inputs_exp_covmat_separate, nnfit_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica.
@@ -517,6 +539,15 @@ def dataset_inputs_t0_exp_covmat(
     return covmat
 
 
+def dataset_load_inputs_total_covmat(dataset_inputs_exp_covmat, loaded_theory_covmat):
+    """
+    Function to compute the covmat to be used for the sampling by make_replica and for the chi2
+    by fitting_data_dict. In this case the t0 prescription is not used for the experimental covmat
+    and the multiplicative errors are included in it. Moreover, the theory covmat is added to experimental covmat.
+    """
+    return dataset_inputs_exp_covmat + loaded_theory_covmat
+
+
 def dataset_inputs_total_covmat(dataset_inputs_exp_covmat, nnfit_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2

From a6dc904cdc08b2d43f83c13fbf93a62d1d51311a Mon Sep 17 00:00:00 2001
From: jacoterh <54140851+jacoterh@users.noreply.github.com>
Date: Thu, 14 May 2026 14:47:06 +0200
Subject: [PATCH 25/29] removing t0_considered from hashed covmat array

---
 validphys2/src/validphys/config.py     |  2 +-
 validphys2/src/validphys/n3fit_data.py | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index d84ec11b6a..0d841bd6ad 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -816,7 +816,7 @@ def produce_dataset_inputs_fitting_covmat(self, use_thcovmat_in_fitting=False):
         from validphys import covmats
 
         if use_thcovmat_in_fitting:
-            return covmats.dataset_inputs_t0_total_covmat
+            return covmats.dataset_load_inputs_t0_total_covmat
         return covmats.dataset_inputs_t0_exp_covmat
 
     def produce_sep_mult(self, separate_multiplicative=False):
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index b86f956331..0e1016d8fc 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -313,9 +313,9 @@ def fittable_datasets_masked(data):
     return validphys_group_extractor(data.datasets)
 
 
-def _hashed_dataset_inputs_fitting_covmat(dataset_inputs_covmat_t0_considered) -> Hashrray:
+def _hashed_dataset_inputs_fitting_covmat(dataset_inputs_fitting_covmat) -> Hashrray:
     """Wrap the covmat into a Hashrray for caches to work"""
-    return Hashrray(dataset_inputs_covmat_t0_considered)
+    return Hashrray(dataset_inputs_fitting_covmat)
 
 
 @functools.lru_cache
@@ -384,7 +384,7 @@ def _inv_covmat_prepared(
     return covmat, inv_total, diag_rot, eig_vals
 
 
-def _fiting_covmat(dataset_inputs_fitting_covmat, data_input, diagonal_basis=True):
+def _fiting_covmat(dataset_inputs_fitting_covmat, diagonal_basis=True):
     """Prepare the fitting covariance matrix by optionally adding theory contributions
     and transforming to diagonal basis.
 
@@ -623,7 +623,10 @@ def fitting_covmat_table(output_path, _fiting_covmat, data_index, diagonal_basis
 
     if not diagonal_basis:
         log.info("Saving fitting covmat")
-        return pd.DataFrame(covmat, index=data_index, columns=data_index)
+        if not hasattr(covmat, "index"):
+            return pd.DataFrame(covmat, index=data_index, columns=data_index)
+        else:
+            return covmat
 
     df_rotation = pd.DataFrame(diagonal_rotation)
     df_rotation.insert(0, "eig_val", eig_vals)

From 91fabeac42ea052f756da4f69a24ffa250f204fb Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Fri, 15 May 2026 16:26:29 +0100
Subject: [PATCH 26/29] Fixng errors: moving rotation_action out of closure
 block

---
 n3fit/src/n3fit/scripts/vp_setupfit.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py
index 0e4e83cd27..a5ba367d10 100644
--- a/n3fit/src/n3fit/scripts/vp_setupfit.py
+++ b/n3fit/src/n3fit/scripts/vp_setupfit.py
@@ -174,12 +174,14 @@ def from_yaml(cls, o, *args, **kwargs):
             check_n3fit_action = 'datacuts::theory::closuretest::fitting n3fit_checks_action'
         else:
             filter_action = 'datacuts::theory::fitting filter'
-            if file_content.get('theorycovmatconfig') is not None:
-                rotation_action = 'datacuts::theory::theorycovmatconfig fitting_covmat_table'
-            else:
-                rotation_action = 'datacuts::theory fitting_covmat_table'
             check_n3fit_action = 'datacuts::theory::fitting n3fit_checks_action'
 
+        # Add rotation action for the total covariance matrix
+        if file_content.get('theorycovmatconfig') is not None:
+            rotation_action = 'datacuts::theory::theorycovmatconfig fitting_covmat_table'
+        else:
+            rotation_action = 'datacuts::theory fitting_covmat_table'
+
         # The settings for these actions depend on the presence of closuretest
         fixed_config['actions_'] += [check_n3fit_action, filter_action, rotation_action]
 

From 85a7049b57bafe3575e00b44ef90bbad65c02b96 Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Sat, 16 May 2026 15:31:30 +0100
Subject: [PATCH 27/29] Using single-column csv when reading diag-basis results

---
 n3fit/src/n3fit/tests/test_fit.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/n3fit/src/n3fit/tests/test_fit.py b/n3fit/src/n3fit/tests/test_fit.py
index c7f10e3faf..088c797000 100644
--- a/n3fit/src/n3fit/tests/test_fit.py
+++ b/n3fit/src/n3fit/tests/test_fit.py
@@ -283,8 +283,11 @@ def test_parallel_against_sequential(tmp_path, rep_from=6, rep_to=8):
         "ATLAS_TTBAR_8TEV_TOT_X-SEC",
         "CMS_SINGLETOP_13TEV_TCHANNEL-XSEC",
     ]
-    dataset_inputs = [{"dataset": d, "frac": 0.6, "variant": "legacy"} for d in datasets]
+    dataset_inputs = [{"dataset": d, "variant": "legacy"} for d in datasets]
     n3fit_input["dataset_inputs"] = dataset_inputs
+    # Using diaogonal basis
+    n3fit_input["diagonal_basis"] = True
+    n3fit_input["diagonal_frac"] = 0.5
     # Exit inmediately
     n3fit_input["parameters"]["epochs"] = 1
     # Save pseudodata
@@ -311,8 +314,9 @@ def test_parallel_against_sequential(tmp_path, rep_from=6, rep_to=8):
     for csvfile_seq in folder_seq.glob("*/*.csv"):
         csvfile_par = folder_par / csvfile_seq.relative_to(folder_seq)
 
-        result_seq = pd.read_csv(csvfile_seq, sep="\t", index_col=[0, 1, 2], header=0)
-        result_par = pd.read_csv(csvfile_par, sep="\t", index_col=[0, 1, 2], header=0)
+        # Diagonal basis writes single-index csv files
+        result_seq = pd.read_csv(csvfile_seq, sep="\t", index_col=[0], header=0)
+        result_par = pd.read_csv(csvfile_par, sep="\t", index_col=[0], header=0)
         pd.testing.assert_frame_equal(result_seq, result_par)
 
     # Check the rest of the fit, while numerical differences are expected between sequential

From 82cdad53388dff7bbecb1ab51fdd1969fd4fa0c4 Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Sun, 17 May 2026 10:22:21 +0100
Subject: [PATCH 28/29] Pin torch 2.12.0 to avoid triton segfault

---
 .github/workflows/all_tests_nnpdf.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/all_tests_nnpdf.yml b/.github/workflows/all_tests_nnpdf.yml
index f61f4764c1..ea0f882457 100644
--- a/.github/workflows/all_tests_nnpdf.yml
+++ b/.github/workflows/all_tests_nnpdf.yml
@@ -103,7 +103,11 @@ jobs:
         sudo rm -rf /opt/hostedtoolcache/Python/{3.9*}
         sudo rm -rf /opt/hostedtoolcache/CodeQL
         sudo rm -rf /usr/local/lib/android/sdk
-        pip install .[nolha,torch]
+        # torch 2.12.0 pulls triton 3.7.0, whose libtriton C extension segfaults
+        # at import on CPU-only Linux + CPython 3.13. Pinning torch<2.12 also
+        # pins triton to 3.6.x. Remove this pin once the upstream issue is
+        # resolved.
+        pip install ".[nolha,torch]" "torch<2.12"
         # Since there is no LHAPDF in the system, initialize the folder and download pdfsets.index
         lhapdf-management update --init
     - name: Test we can run one runcard

From e67b1c66e0d32bd2b64e881a0e2fe0f63b9387b1 Mon Sep 17 00:00:00 2001
From: achiefa <amedeochiefa@gmail.com>
Date: Sun, 17 May 2026 20:57:28 +0100
Subject: [PATCH 29/29] Add reference to issue

---
 .github/workflows/all_tests_nnpdf.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/all_tests_nnpdf.yml b/.github/workflows/all_tests_nnpdf.yml
index ea0f882457..8735f2b9bd 100644
--- a/.github/workflows/all_tests_nnpdf.yml
+++ b/.github/workflows/all_tests_nnpdf.yml
@@ -106,7 +106,7 @@ jobs:
         # torch 2.12.0 pulls triton 3.7.0, whose libtriton C extension segfaults
         # at import on CPU-only Linux + CPython 3.13. Pinning torch<2.12 also
         # pins triton to 3.6.x. Remove this pin once the upstream issue is
-        # resolved.
+        # resolved (see https://github.com/nascheme/triton/issues/43).
         pip install ".[nolha,torch]" "torch<2.12"
         # Since there is no LHAPDF in the system, initialize the folder and download pdfsets.index
         lhapdf-management update --init