diff --git a/doc/validphys2/guide.md b/doc/validphys2/guide.md
index b19109d8ea..dcc71b821f 100644
--- a/doc/validphys2/guide.md
+++ b/doc/validphys2/guide.md
@@ -1942,6 +1942,239 @@ In addition, errors will be raised if the input directory is not a valid fit (fo
 If the user wishes to add their own, non-standard files, then it is advisable to avoid using the fit name in these
 files as the `fitrename` command will also rename these files.
 
+### Fits with a Theory Covariance Matrix
+
+Fits can be ran with a contribution to the covarince matrix obtained from
+performing scale variations. `validphys` also has flags which control the how
+whether the covariance matrix used to calculate statistical estimators should
+include a contribution from the theory covariance matrix. Getting the various
+flags that control these behaviours correct in both fit and `validphys` runcards
+is critical to getting sensible results. Examples with explanation will be
+provided here to demonstrate how to run a fit with a theory covariance matrix
+and then use the various `validphys` analysis tools on the fit.
+
+#### Running a fit with Theory Covariance Matrix
+
+In order to run a fit with a theory covariance the user must first specify that
+`datasets` are all part of the same `experiment`. An example of how this is done
+in practise is provided with the `experiments` section of a DIS only fit
+runcard:
+
+```yaml
+experiments:
+- experiment: BIGEXP
+  datasets:
+  - {dataset: NMCPD, frac: 0.5}
+  - {dataset: NMC, frac: 0.5}
+  - {dataset: SLACP, frac: 0.5}
+  - {dataset: SLACD, frac: 0.5}
+  - {dataset: BCDMSP, frac: 0.5}
+  - {dataset: BCDMSD, frac: 0.5}
+  - {dataset: CHORUSNU, frac: 0.5}
+  - {dataset: CHORUSNB, frac: 0.5}
+  - {dataset: NTVNUDMN, frac: 0.5}
+  - {dataset: NTVNBDMN, frac: 0.5}
+  - {dataset: HERACOMBNCEM, frac: 0.5}
+  - {dataset: HERACOMBNCEP460, frac: 0.5}
+  - {dataset: HERACOMBNCEP575, frac: 0.5}
+  - {dataset: HERACOMBNCEP820, frac: 0.5}
+  - {dataset: HERACOMBNCEP920, frac: 0.5}
+  - {dataset: HERACOMBCCEM, frac: 0.5}
+  - {dataset: HERACOMBCCEP, frac: 0.5}
+  - {dataset: HERAF2CHARM, frac: 0.5}
+```
+
+The `datasets` must be part of the same single `experiment` for the theory
+covariance matrix which is generated when the user runs `vp-setupfit` to be
+compatible with the fitting infrastructure.
+
+The next step is to specify the `theorycovmatconfig`. This namespace controls
+which point prescription is used to generate the theory covariance matrix, and
+whether the theory covariance matrix will be used in the sampling of the
+pseudodata, the fitting of the data or both.
+
+The different prescriptions for scale variation are 3-point, 5-point,
+5bar-point, 7-point and 9-point, depending on which presciption the user decides
+to use, they must provide the correct number and combination of `theoryids`. In
+addition to this if 5 or 5bar is being used then the user must specify which of
+these prescriptions to use with the `fivetheories` flag. There are also two
+options for the 7-point presciption, the default is 'Gavin's' prescription but
+the user can also specify `seventheories: original`.
+
+The various configuration options might seem overwhelming, so for each of the
+presciptions the appropriate `theoryids` and additional flags required are
+provided below, ready to be pasted into a report runcard.
+
+---------------------------------------------------------------------
+
+##### 3-point
+
+```yaml
+theorycovmatconfig:
+  theoryids:
+  - 163
+  - 180
+  - 173
+
+```
+
+##### 5-point
+
+```yaml
+theorycovmatconfig:
+  theoryids:
+  - 163
+  - 177
+  - 176
+  - 179
+  - 174
+  fivetheories: nobar
+
+```
+
+##### 5bar-point
+
+```yaml
+theorycovmatconfig:
+  theoryids:
+  - 163
+  - 180
+  - 173
+  - 175
+  - 178
+  fivetheories: bar
+
+```
+
+##### 7-point original
+
+```yaml
+theorycovmatconfig:
+  theoryids:
+  - 163
+  - 177
+  - 176
+  - 179
+  - 174
+  - 180
+  - 173
+  seventheories: original
+```
+
+##### 7-point Gavin (default)
+
+```yaml
+theorycovmatconfig:
+  theoryids:
+  - 163
+  - 177
+  - 176
+  - 179
+  - 174
+  - 180
+  - 173
+
+```
+
+##### 9-point
+
+```yaml
+theorycovmatconfig:
+  theoryids:
+  - 163
+  - 177
+  - 176
+  - 179
+  - 174
+  - 180
+  - 173
+  - 175
+  - 178
+```
+
+---------------------------------------------------------------------
+
+Once the user has correctly specified the `theoryids` and additional flags for
+their chosen prescription then the user must specify which PDF will be used to
+generate the theory 'points' required to construct the theory covariance matrix.
+The user must additionally specify where the theory covariance is to be used.
+The theory covariance can be used to sample the pseudodata by setting
+`use_thcovmat_in_sampling: true`, likewise the theory covariance can be included
+in covariance matrix used in the fit by specifying
+`use_thcovmat_in_fitting: true`.
+
+Combining all of the above information, if one wanted to run a fit using the
+theory covariance, calculated using the 9-point prescription, in both the
+fitting and sampling with `NNPDF31_nlo_as_0118` used to generate the
+covariance matrix then the complete `theorycovmatconfig` would be:
+
+```yaml
+theorycovmatconfig:
+  theoryids:
+  - 163
+  - 177
+  - 176
+  - 179
+  - 174
+  - 180
+  - 173
+  - 175
+  - 178
+  pdf: NNPDF31_nlo_as_0118
+  use_thcovmat_in_fitting: true
+  use_thcovmat_in_sampling: true
+```
+
+#### Using `validphys` statistical estimators with theory covariance
+
+Once a fit has been ran with the theory covariance, it is necessary to use the
+theory covariance matrix in estimators such as calculating the chi² in order to
+get meaningful values. This behaviour is controlled by the flag
+`use_thcovmat_if_present`, which by default the flag is set to `False`.
+
+If the user specifies `use_thcovmat_if_present: True` then they must also
+specify a corresponding `fit`. The configuration file for that `fit` will be
+read. If `use_thcovmat_in_fitting: True` then validphys will locate the theory
+covariance matrix used during the fit and add it to the experimental
+covariance matrix, for use in statistical estimators such as chi². A simple
+example of this would be
+
+```yaml
+dataset_input: {dataset: HERAF2CHARM}
+
+use_thcovmat_if_present: True
+
+fit: 190310-tg-nlo-global-7pts
+
+use_cuts: "fromfit"
+
+pdf:
+  from_: fit
+
+theory:
+  from_: fit
+
+theoryid:
+  from_: theory
+
+actions_:
+    - dataset_chi2_table
+```
+
+It should be noted that any `dataset_input` specified in the same runcard that
+`use_thcovmat_if_present: True` must have been fitted in the corresponding
+`fit`. If the corresponding fit has `use_thcovmat_if_present: False` then the
+user will be warned and there will be no contribution from the theory covariance
+matrix used in calculating statistical estimators for that runcard.
+
+When using the `vp-comparefits` application, the user **must** either specify
+the commandline flag `--thcovmat_if_present` or `--no-thcovmat_if_present`
+which set `use_thcovmat_if_present` to `True` or `False` respectively.
+
+If the user uses the interactive mode, `vp-comparefits -i`, then they will be
+prompted to select whether or not to use the theory covariance matrix, if
+available, in the report if they have not already specified on the command line.
+
 Parallel mode
 -------------
 
diff --git a/validphys2/src/validphys/checks.py b/validphys2/src/validphys/checks.py
index e6ce3eb531..d2719ec690 100644
--- a/validphys2/src/validphys/checks.py
+++ b/validphys2/src/validphys/checks.py
@@ -103,6 +103,37 @@ def check_cuts_considered(use_cuts):
         raise CheckError(f"Cuts must be computed for this action, but they are set to {use_cuts.value}")
 
 
+@make_argcheck
+def check_dataset_cuts_match_theorycovmat(dataset, fitthcovmat):
+    if fitthcovmat:
+        ds_index = fitthcovmat.load().index.get_level_values(1)
+        ncovmat = (ds_index == dataset.name).sum()
+
+        cuts = dataset.cuts
+        if cuts:
+            ndata = len(dataset.cuts.load())
+        else:
+            ndata = dataset.commondata.ndata
+        check(ndata == ncovmat)
+
+
+@make_argcheck
+def check_experiment_cuts_match_theorycovmat(
+        experiment, fitthcovmat):
+    for dataset in experiment.datasets:
+        if fitthcovmat:
+            ds_index = fitthcovmat.load().index.get_level_values(1)
+            ncovmat = (ds_index == dataset.name).sum()
+
+            cuts = dataset.cuts
+            if cuts:
+                ndata = len(dataset.cuts.load())
+            else:
+                ndata = dataset.commondata.ndata
+            check(ndata == ncovmat)
+
+
+
 @make_argcheck
 def check_have_two_pdfs(pdfs):
     check(len(pdfs) == 2,'Expecting exactly two pdfs.')
diff --git a/validphys2/src/validphys/comparefittemplates/comparecard.yaml b/validphys2/src/validphys/comparefittemplates/comparecard.yaml
index 3678b0acab..adf2b71be1 100644
--- a/validphys2/src/validphys/comparefittemplates/comparecard.yaml
+++ b/validphys2/src/validphys/comparefittemplates/comparecard.yaml
@@ -74,14 +74,14 @@ pdf_report:
 
 template: report.md
 
-experiments:
-    from_: fit
 
 positivity:
       from_: fit
 
 dataspecs:
-  - theoryid:
+  - experiments:
+      from_: fit
+    theoryid:
       from_: current
     pdf:
       from_: current
@@ -91,7 +91,9 @@ dataspecs:
       from_: current
 
 
-  - theoryid:
+  - experiments:
+      from_: fit
+    theoryid:
       from_: reference
     pdf:
       from_: reference
diff --git a/validphys2/src/validphys/comparefittemplates/report.md b/validphys2/src/validphys/comparefittemplates/report.md
index f1c3cc72ed..d879eb9f93 100644
--- a/validphys2/src/validphys/comparefittemplates/report.md
+++ b/validphys2/src/validphys/comparefittemplates/report.md
@@ -4,6 +4,10 @@ Fit summary
 ------------------
 {@ summarise_fits @}
 
+Theory Covariance Summary
+-------------------------
+{@summarise_theory_covmat_fits@}
+
 Dataset properties
 ------------------
 {@current datasets_properties_table@}
@@ -44,10 +48,7 @@ $\chi^2$ by dataset comparisons
 
 $\phi$ by experiment
 --------------------
-{@with dataspecs@}
-### {@fit@}
-{@plot_phi@}
-{@endwith@}
+{@plot_fits_experiments_phi@}
 
 Experiment plots
 ---------------
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 9a21bf9de4..b9f6ce2887 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -29,6 +29,8 @@
 
 from validphys.paramfits.config import ParamfitsConfig
 
+from validphys.plotoptions import get_info
+
 log = logging.getLogger(__name__)
 
 
@@ -694,21 +696,20 @@ def res(*args, **kwargs):
         return res
 
     def produce_fitthcovmat(
-            self, use_thcovmat_if_present: bool = True, fit: (str, type(None)) = None):
+            self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None):
         """If a `fit` is specified and `use_thcovmat_if_present` is `True` then returns the
         corresponding covariance matrix for the given fit if it exists. If the fit doesn't have a
-        theory covariance matrix then returns `False`. If no fit is specified then returns the user
-        must manually set `use_thcovmat_if_present` to be False, or provide an appropriate fit.
+        theory covariance matrix then returns `False`.
         """
         if not isinstance(use_thcovmat_if_present, bool):
-            raise ConfigError("use_thcovmat_if_present should be a boolean, by default it is True")
+            raise ConfigError("use_thcovmat_if_present should be a boolean, by default it is False")
 
         if use_thcovmat_if_present and not fit:
             raise ConfigError("`use_thcovmat_if_present` was true but no `fit` was specified.")
 
         if use_thcovmat_if_present and fit:
             try:
-                use_thcovmat_if_present = fit.as_input()[
+                thcovmat_present = fit.as_input()[
                     'theorycovmatconfig']['use_thcovmat_in_fitting']
             except KeyError:
                 #assume covmat wasn't used and fill in key accordingly but warn user
@@ -716,17 +717,21 @@ def produce_fitthcovmat(
                             "`use_thcovmat_in_fitting` didn't exist in the runcard for "
                             f"{fit.name}. Theory covariance matrix will not be used "
                             "in any statistical estimators.")
-                use_thcovmat_if_present = False
-            #Now set as expected path and check it exists
-            if use_thcovmat_if_present:
-                use_thcovmat_if_present = (
-                    fit.path/'tables'/'datacuts_theory_theorycovmatconfig_theory_covmat.csv')
-                if not os.path.exists(use_thcovmat_if_present):
-                    raise ConfigError(
-                        "Fit appeared to use theory covmat in fit but the file was not at the "
-                        f"usual location: {use_thcovmat_if_present}.")
-                use_thcovmat_if_present = ThCovMatSpec(use_thcovmat_if_present)
-        return use_thcovmat_if_present
+                thcovmat_present = False
+
+
+        if use_thcovmat_if_present and thcovmat_present:
+            # Expected path of covmat hardcoded
+            covmat_path = (
+                fit.path/'tables'/'datacuts_theory_theorycovmatconfig_theory_covmat.csv')
+            if not os.path.exists(covmat_path):
+                raise ConfigError(
+                    "Fit appeared to use theory covmat in fit but the file was not at the "
+                    f"usual location: {covmat_path}.")
+            fit_theory_covmat = ThCovMatSpec(covmat_path)
+        else:
+            fit_theory_covmat = None
+        return fit_theory_covmat
 
     def parse_speclabel(self, label:(str, type(None))):
         """A label for a dataspec. To be used in some plots"""
@@ -744,6 +749,48 @@ def parse_fitdeclaration(self, label:str):
         """
         return label
 
+    def produce_fit_data_groupby_experiment(self, fit):
+        """Used to produce data from the fit grouped into experiments,
+        where each experiment is a group of datasets according to the experiment
+        key in the plotting info file.
+        """
+        #TODO: consider this an implimentation detail
+        from reportengine.namespaces import NSList
+
+        with self.set_context(ns=self._curr_ns.new_child({'fit':fit})):
+            _, experiments = self.parse_from_('fit', 'experiments', write=False)
+
+        flat = (ds for exp in experiments for ds in exp.datasets)
+        metaexps = [get_info(ds).experiment for ds in flat]
+        res = {}
+        for exp in experiments:
+            for dsinput, ds in zip(exp.dsinputs, exp.datasets):
+                metaexp = get_info(ds).experiment
+                if metaexp in res:
+                    res[metaexp].append(ds)
+                else:
+                    res[metaexp] = [ds]
+        exps = []
+        for exp in res:
+            exps.append(ExperimentSpec(exp, res[exp]))
+
+        experiments = NSList(exps, nskey='experiment')
+        return {'experiments': experiments}
+
+    def produce_fit_context_groupby_experiment(self, fit):
+        """produces experiments similarly to `fit_data_groupby_experiment`
+        but also sets fitcontext (pdf and theoryid)
+        """
+        _, pdf         = self.parse_from_('fit', 'pdf', write=False)
+        _, theory      = self.parse_from_('fit', 'theory', write=False)
+        thid = theory['theoryid']
+        with self.set_context(ns=self._curr_ns.new_child({'theoryid':thid})):
+            experiments = self.produce_fit_data_groupby_experiment(
+                fit)['experiments']
+        return {'pdf': pdf, 'theoryid':thid, 'experiments': experiments}
+
+
+
 class Config(report.Config, CoreConfig, ParamfitsConfig):
     """The effective configuration parser class."""
     pass
diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py
index c930772fb6..6ecdb2adb8 100644
--- a/validphys2/src/validphys/dataplots.py
+++ b/validphys2/src/validphys/dataplots.py
@@ -54,21 +54,19 @@ def plot_phi(experiments, experiments_phi):
 
     See `phi_data` for information on how phi is calculated
     """
-    phi = experiments_phi
+    phi = [exp_phi for (exp_phi, npoints) in experiments_phi]
     xticks = [experiment.name for experiment in experiments]
     fig, ax = plotutils.barplot(phi, collabels=xticks, datalabels=[r'$\phi$'])
     ax.set_title(r"$\phi$ for each experiment")
     return fig
 
 @figure
-def plot_phi_pdfs(experiments, pdfs, experiments_pdfs_phi):
-    """Like `plot_phi` but plots a set of bars for each PDF input"""
-    phi = experiments_pdfs_phi
-    phi_labels = [pdf.name for pdf in pdfs]
-    xticks = [experiment.name for experiment in experiments]
-    fig, ax = plotutils.barplot(phi, collabels=xticks, datalabels=phi_labels)
+def plot_fits_experiments_phi(fits_experiments_phi_table):
+    """Plots a set of bars for each fit, each bar represents the value of phi for the corresponding
+    experiment, where the experiment is a group of datasets according to the `experiment` key in
+    the PLOTTING info file"""
+    fig, ax = _plot_chis_df(fits_experiments_phi_table)
     ax.set_title(r"$\phi$ for each experiment")
-    ax.legend()
     return fig
 
 @figure
diff --git a/validphys2/src/validphys/fitdata.py b/validphys2/src/validphys/fitdata.py
index 98fd20e8c5..dded113ae3 100644
--- a/validphys2/src/validphys/fitdata.py
+++ b/validphys2/src/validphys/fitdata.py
@@ -120,7 +120,7 @@ def replica_data(fit, replica_paths):
 
 
 @table
-def fit_summary(fit, replica_data, total_experiments_chi2data):
+def fit_summary(fit_name_with_covmat_label, replica_data, total_experiments_chi2data):
     """ Summary table of fit properties
         - Central chi-squared
         - Average chi-squared
@@ -146,7 +146,7 @@ def fit_summary(fit, replica_data, total_experiments_chi2data):
     etrain = [x.training for x in replica_data]
     evalid = [x.validation for x in replica_data]
 
-    phi = phi_data(total_experiments_chi2data)
+    phi, _ = phi_data(total_experiments_chi2data)
     phi_err = np.std(member_chi2)/(2.0*phi*np.sqrt(nrep))
 
     VET = ValueErrorTuple
@@ -157,7 +157,7 @@ def fit_summary(fit, replica_data, total_experiments_chi2data):
                          (r"$<\chi^2>$", f"{VET(np.mean(member_chi2), np.std(member_chi2))}"),
                          (r"$\phi$",     f"{VET(phi, phi_err)}")))
 
-    return pd.Series(data, index=data.keys(), name=fit.name)
+    return pd.Series(data, index=data.keys(), name=fit_name_with_covmat_label)
 
 
 collected_fit_summaries = collect('fit_summary', ('fits', 'fitcontext'))
@@ -295,6 +295,32 @@ def print_different_cuts(fits, test_for_same_cuts):
 
     return res.getvalue()
 
+def fit_theory_covmat_summary(fit, fitthcovmat):
+    """returns a table with a single column for the `fit`, with three rows
+    indicating if the theory covariance matrix was used in the 'sampling' of the pseudodata,
+    the 'fitting', and the 'validphys statistical estimators' in the current namespace for that fit.
+    """
+    try:
+        config = fit.as_input()['theorycovmatconfig']
+    except KeyError:
+        config = {'use_thcovmat_in_sampling': False, 'use_thcovmat_in_fitting': False}
+    sampling = config.get('use_thcovmat_in_sampling', False)
+    fitting = config.get('use_thcovmat_in_fitting', False)
+    report = bool(fitthcovmat)
+    df = pd.DataFrame(
+        [sampling, fitting, report],
+        columns=[fit.name],
+        index=['sampling', 'fitting', 'validphys statistical estimators'])
+    return df
+
+fits_theory_covmat_summary = collect('fit_theory_covmat_summary', ('fits',))
+
+@table
+def summarise_theory_covmat_fits(fits_theory_covmat_summary):
+    """Collects the theory covmat summary for all fits and concatenates them into a single table"""
+    return pd.concat(fits_theory_covmat_summary, axis=1)
+
+
 def _get_fitted_index(pdf, i):
     """Return the nnfit index for the replcia i"""
     p = pathlib.Path(pdf.infopath).with_name(f'{pdf.name}_{i:04d}.dat')
diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
index 49429a6602..23c3a8d8aa 100644
--- a/validphys2/src/validphys/results.py
+++ b/validphys2/src/validphys/results.py
@@ -20,7 +20,9 @@
 from reportengine import collect
 
 from validphys.checks import (check_cuts_considered, check_pdf_is_montecarlo,
-                              check_speclabels_different, check_two_dataspecs)
+                              check_speclabels_different, check_two_dataspecs,
+                              check_dataset_cuts_match_theorycovmat,
+                              check_experiment_cuts_match_theorycovmat)
 from validphys.core import DataSetSpec, PDF, ExperimentSpec, ThCovMatSpec
 from validphys.calcutils import (all_chi2, central_chi2, calc_chi2, calc_phi, bootstrap_values,
                                  get_df_block)
@@ -59,16 +61,10 @@ def std_error(self):
 
 class DataResult(NNPDFDataResult):
 
-    def __init__(self, dataobj, thcovmat=False):
+    def __init__(self, dataobj, covmat, sqrtcovmat):
         super().__init__(dataobj)
-        self._covmat = dataobj.get_covmat()
-        if isinstance(thcovmat, ThCovMatSpec):
-            self._sqrtcovmat = dataobj.GetSqrtFitCovMat(str(thcovmat), [])
-        elif isinstance(thcovmat, pd.DataFrame):
-            thcovslice = get_df_block(thcovmat, dataobj.GetSetName(), level=1)
-            self._sqrtcovmat = np.linalg.cholesky(self._covmat + thcovslice)
-        else:
-            self._sqrtcovmat = dataobj.get_sqrtcovmat()
+        self._covmat = covmat
+        self._sqrtcovmat = sqrtcovmat
 
 
     @property
@@ -364,46 +360,79 @@ def closure_pseudodata_replicas(experiments, pdf, nclosure:int,
     return df
 
 
-def results(dataset:(DataSetSpec), pdf:PDF, t0set:(PDF, type(None))=None, fitthcovmat=False):
-    """Tuple of data and theory results for a single pdf.
-    The theory is specified as part of the dataset.
-    An experiment is also allowed.
-    (as a result of the C++ code layout)."""
+@check_dataset_cuts_match_theorycovmat
+def covariance_matrix(dataset:DataSetSpec, fitthcovmat, t0set:(PDF, type(None)) = None):
+    """Returns a tuple of Covariance matrix and sqrt covariance matrix for the given dataset
+    which includes theory contribution from scale variations if `use_theorycovmat` is True and
+    an appropriate fit from which the covariance matrix will be loaded is given
 
-    data = dataset.load()
-
-    # only need to load thcov if specified and if dataset
-    if fitthcovmat and isinstance(dataset, DataSetSpec):
-        fitthcovmat = fitthcovmat.load()
+    if t0set is specified then t0 predictions will be used to construct the covariance matrix.
+    """
+    loaded_data = dataset.load()
 
     if t0set:
         #Copy data to avoid chaos
-        data = type(data)(data)
+        loaded_data = type(loaded_data)(loaded_data)
         log.debug("Setting T0 predictions for %s" % dataset)
-        data.SetT0(t0set.load_t0())
+        loaded_data.SetT0(t0set.load_t0())
+
+    if fitthcovmat:
+        loaded_thcov = fitthcovmat.load()
+        covmat = get_df_block(loaded_thcov, dataset.name, level=1) + loaded_data.get_covmat()
+        sqrtcovmat = np.linalg.cholesky(covmat)
+    else:
+        covmat = loaded_data.get_covmat()
+        sqrtcovmat = loaded_data.get_sqrtcovmat()
+    return covmat, sqrtcovmat
+
+@check_experiment_cuts_match_theorycovmat
+def experiment_covariance_matrix(
+        experiment: ExperimentSpec, fitthcovmat, t0set:(PDF, type(None)) = None):
+    """Like `covariance_matrix` except for an experiment"""
+    loaded_data = experiment.load()
+
+    if t0set:
+        #Copy data to avoid chaos
+        loaded_data = type(loaded_data)(loaded_data)
+        log.debug("Setting T0 predictions for %s" % experiment)
+        loaded_data.SetT0(t0set.load_t0())
+
+    if fitthcovmat:
+        loaded_thcov = fitthcovmat.load()
+        ds_names = loaded_thcov.index.get_level_values(1)
+        indices = np.in1d(ds_names, [ds.name for ds in experiment.datasets]).nonzero()[0]
+        covmat = loaded_thcov.iloc[indices, indices].values + loaded_data.get_covmat()
+        sqrtcovmat = np.linalg.cholesky(covmat)
+    else:
+        covmat = loaded_data.get_covmat()
+        sqrtcovmat = loaded_data.get_sqrtcovmat()
+    return covmat, sqrtcovmat
 
-    return (DataResult(data, thcovmat=fitthcovmat),
+def results(dataset:(DataSetSpec), pdf:PDF, covariance_matrix):
+    """Tuple of data and theory results for a single pdf. The data will have an associated
+    covariance matrix, which can include a contribution from the theory covariance matrix which
+    is constructed from scale variation. The inclusion of this covariance matrix by default is used
+    where available, however this behaviour can be modified with the flag `use_theorycovmat`.
+
+    The theory is specified as part of the dataset.
+    An experiment is also allowed.
+    (as a result of the C++ code layout)."""
+    data = dataset.load()
+    return (DataResult(data, *covariance_matrix),
             ThPredictionsResult.from_convolution(pdf, dataset, loaded_data=data))
 
-def experiment_results(experiment, pdf:PDF, t0set:(PDF, type(None))=None, fitthcovmat=False):
+def experiment_results(experiment, pdf:PDF, experiment_covariance_matrix):
     """Like `results` but for a whole experiment"""
-    return results(experiment, pdf, t0set, fitthcovmat=fitthcovmat)
+    return results(experiment, pdf, experiment_covariance_matrix)
 
 #It's better to duplicate a few lines than to complicate the logic of
 #``results`` to support this.
 #TODO: The above comment doesn't make sense after adding T0. Deprecate this
-def pdf_results(dataset:(DataSetSpec,  ExperimentSpec), pdfs:Sequence, t0set:(PDF, type(None))):
+def pdf_results(dataset:(DataSetSpec,  ExperimentSpec), pdfs:Sequence, covariance_matrix:tuple):
     """Return a list of results, the first for the data and the rest for
     each of the PDFs."""
 
     data = dataset.load()
-
-    if t0set:
-        #Copy data to avoid chaos
-        data = type(data)(data)
-        log.debug("Setting T0 predictions for %s" % dataset)
-        data.SetT0(t0set.load_t0())
-
     th_results = []
     for pdf in pdfs:
         th_result = ThPredictionsResult.from_convolution(pdf, dataset,
@@ -411,22 +440,22 @@ def pdf_results(dataset:(DataSetSpec,  ExperimentSpec), pdfs:Sequence, t0set:(PD
         th_results.append(th_result)
 
 
-    return (DataResult(data), *th_results)
+    return (DataResult(data, *covariance_matrix), *th_results)
 
 @require_one('pdfs', 'pdf')
 @remove_outer('pdfs', 'pdf')
 def one_or_more_results(dataset:(DataSetSpec, ExperimentSpec),
+                        covariance_matrix: tuple,
                         pdfs:(type(None), Sequence)=None,
-                        pdf:(type(None), PDF)=None,
-                        t0set:(PDF, type(None))=None):
+                        pdf:(type(None), PDF)=None):
     """Generate a list of results, where the first element is the data values,
     and the next is either the prediction for pdf or for each of the pdfs.
     Which of the two is selected intelligently depending on the namespace,
     when executing as an action."""
     if pdf:
-        return results(dataset, pdf, t0set)
+        return results(dataset, pdf, covariance_matrix)
     else:
-        return pdf_results(dataset, pdfs, t0set)
+        return pdf_results(dataset, pdfs, covariance_matrix)
     raise ValueError("Either 'pdf' or 'pdfs' is required")
 
 
@@ -452,11 +481,13 @@ def abs_chi2_data_experiment(experiment_results):
 def phi_data(abs_chi2_data):
     """Calculate phi using values returned by `abs_chi2_data`.
 
+    Returns tuple of (phi, numpoints)
+
     For more information on how phi is calculated see Eq.(24) in
     1410.8849
     """
     alldata, central, npoints = abs_chi2_data
-    return np.sqrt((alldata.data.mean() - central)/npoints)
+    return (np.sqrt((alldata.data.mean() - central)/npoints), npoints)
 
 def phi_data_experiment(abs_chi2_data_experiment):
     """Like `phi_data` but for whole experiment"""
@@ -653,20 +684,38 @@ def dataset_chi2_table(chi2_stats, dataset):
     """Show the chi² estimators for a given dataset"""
     return pd.DataFrame(chi2_stats, index=[dataset.name])
 
+fits_experiment_chi2_data = collect(
+    'experiments_chi2', ('fits', 'fit_context_groupby_experiment',))
+fits_experiments = collect(
+    'experiments', ('fits', 'fit_context_groupby_experiment',))
+
+def fit_name_with_covmat_label(fit, fitthcovmat):
+    """If theory covariance matrix is being used to calculate statistical estimators for the `fit`
+    then appends (exp + th) onto the fit name for use in legends and column headers to help the user
+    see what covariance matrix was used to produce the plot or table they are looking at.
+    """
+    if fitthcovmat:
+        label = str(fit) + " (exp + th)"
+    else:
+        label = str(fit)
+    return label
+
+fits_name_with_covmat_label = collect('fit_name_with_covmat_label', ('fits',))
 
 
 #TODO: Possibly get rid of the per_point_data parameter and have separate
 #actions for absolute and relative tables.
 @table
-def fits_experiments_chi2_table(fits, fits_experiments, fits_experiment_chi2_data,
-                                per_point_data:bool=True):
+def fits_experiments_chi2_table(fits_name_with_covmat_label, fits_experiments,
+                                fits_experiment_chi2_data, per_point_data:bool=True):
     """A table with the chi2 for each included experiment in the fits,
     computed with the theory corresponding to each fit.  If points_per_data is
     True, the chi² will be shown divided by ndata.
     Otherwise they will be absolute."""
     dfs = []
     cols = ('ndata', r'$\chi^2/ndata$') if per_point_data else ('ndata', r'$\chi^2$')
-    for fit, experiments, exps_chi2 in zip(fits, fits_experiments, fits_experiment_chi2_data):
+    for label, experiments, exps_chi2 in zip(
+            fits_name_with_covmat_label, fits_experiments, fits_experiment_chi2_data):
         records = []
         for experiment, exp_chi2 in zip(experiments, exps_chi2):
             mean_chi2 = exp_chi2.central_result.mean()
@@ -683,7 +732,37 @@ def fits_experiments_chi2_table(fits, fits_experiments, fits_experiment_chi2_dat
              )
         if per_point_data:
             df['mean_chi2'] /= df['npoints']
-        df.columns = pd.MultiIndex.from_product(([str(fit)], cols))
+        df.columns = pd.MultiIndex.from_product(([label], cols))
+        dfs.append(df)
+    res =  pd.concat(dfs, axis=1)
+    return res
+
+fits_experiments_phi = collect(
+    'experiments_phi', ('fits', 'fit_context_groupby_experiment'))
+
+@table
+def fits_experiments_phi_table(fits_name_with_covmat_label, fits_experiments, fits_experiments_phi):
+    """For every fit, returns phi and number of data points per experiment, where experiment is
+    a collection of datasets grouped according to the experiment key in the PLOTTING info file
+    """
+    dfs = []
+    cols = ('ndata', r'$\phi$')
+    for label, experiments, exps_phi in zip(
+            fits_name_with_covmat_label, fits_experiments, fits_experiments_phi):
+        records = []
+        for experiment, (exp_phi, npoints) in zip(experiments, exps_phi):
+            npoints = npoints
+            records.append(dict(
+                experiment=str(experiment),
+                npoints=npoints,
+                phi = exp_phi
+
+            ))
+        df = pd.DataFrame.from_records(records,
+                 columns=('experiment', 'npoints', 'phi'),
+                 index = ('experiment', )
+             )
+        df.columns = pd.MultiIndex.from_product(([label], cols))
         dfs.append(df)
     res =  pd.concat(dfs, axis=1)
     return res
@@ -701,20 +780,20 @@ def dataspecs_experiments_chi2_table(dataspecs_speclabel, dataspecs_experiments,
 
 
 @table
-def fits_datasets_chi2_table(fits, fits_experiments, fits_chi2_data,
+def fits_datasets_chi2_table(fits_name_with_covmat_label, fits_experiments, fits_chi2_data,
                              per_point_data:bool=True):
     """A table with the chi2 for each included dataset in the fits, computed
     with the theory corresponding to the fit. The result are indexed in two
-    levels by experiment and dataset.  If points_per_data is True,
-    the chi² will be shown divided by ndata.
-    Otherwise they will be absolute."""
+    levels by experiment and dataset, where experiment is the grouping of datasets according to the
+    `experiment` key in the PLOTTING info file.  If points_per_data is True, the chi² will be shown
+    divided by ndata. Otherwise they will be absolute."""
 
     chi2_it = iter(fits_chi2_data)
 
     cols = ('ndata', r'$\chi^2/ndata$') if per_point_data else ('ndata', r'$\chi^2$')
 
     dfs = []
-    for fit, experiments in zip(fits, fits_experiments):
+    for label, experiments in zip(fits_name_with_covmat_label, fits_experiments):
         records = []
         for experiment in experiments:
             for dataset, chi2 in zip(experiment.datasets, chi2_it):
@@ -734,7 +813,7 @@ def fits_datasets_chi2_table(fits, fits_experiments, fits_chi2_data,
              )
         if per_point_data:
             df['mean_chi2'] /= df['npoints']
-        df.columns = pd.MultiIndex.from_product(([str(fit)], cols))
+        df.columns = pd.MultiIndex.from_product(([label], cols))
         dfs.append(df)
     return pd.concat(dfs, axis=1)
 
@@ -747,17 +826,21 @@ def dataspecs_datasets_chi2_table(dataspecs_speclabel, dataspecs_experiments,
                                     dataspecs_chi2_data, per_point_data=per_point_data)
 
 
+#NOTE: This will need to be changed to work with `data` when that gets added
+fits_total_chi2_data = collect('total_experiments_chi2data', ('fits', 'fitcontext'))
+
 #TODO: Decide what to do with the horrible totals code.
 @table
 def fits_chi2_table(
-        fits_experiments_chi2_table,
+        fits_total_chi2_data,
         fits_datasets_chi2_table,
+        fits_experiments_chi2_table,
         show_total:bool=False):
     """Show the chi² of each and number of points of each dataset and experiment
-    of each fit,
-    computed with the theory corresponding to the fit. Dataset that are not
-    included in some fit appear as "Not Fitted". This is itended for display
-    purposes."""
+    of each fit, where experiment is a group of datasets according to the `experiment` key in
+    the PLOTTING info file, computed with the theory corresponding to the fit. Dataset that are not
+    included in some fit appear as `NaN`
+    """
     lvs = fits_experiments_chi2_table.index
     expanded_index = pd.MultiIndex.from_product((lvs, ["Total"]))
     edf = fits_experiments_chi2_table.set_index(expanded_index)
@@ -767,9 +850,10 @@ def fits_chi2_table(
     for lv in lvs:
         dfs.append(pd.concat((edf.loc[lv],ddf.loc[lv]), copy=False, axis=0))
     if show_total:
-        total_points = fits_experiments_chi2_table.iloc[:, 0::2].sum().values
-        total_chi = (fits_experiments_chi2_table.iloc[:, 0::2].values *
-                     fits_experiments_chi2_table.iloc[:,1::2].values).sum(axis=0)
+        total_points = np.array(
+            [total_chi2_data.ndata for total_chi2_data in fits_total_chi2_data])
+        total_chi = np.array(
+            [total_chi2_data.central_result for total_chi2_data in fits_total_chi2_data])
         total_chi /= total_points
         row = np.zeros(len(total_points)*2)
         row[::2] = total_points
@@ -880,7 +964,6 @@ def experiments_central_values(experiment_result_table):
 experiments_results = collect(experiment_results, ('experiments',))
 
 experiments_phi = collect(phi_data_experiment, ('experiments',))
-experiments_pdfs_phi = collect('experiments_phi', ('pdfs',))
 
 pdfs_total_chi2 = collect(total_experiments_chi2, ('pdfs',))
 
@@ -891,15 +974,13 @@ def experiments_central_values(experiment_result_table):
 
 #These are convenient ways to iterate and extract varios data from fits
 fits_chi2_data = collect(abs_chi2_data, ('fits', 'fitcontext', 'experiments', 'experiment'))
-fits_experiment_chi2_data = collect('experiments_chi2', ('fits', 'fitcontext'))
+
 fits_total_chi2 = collect('total_experiments_chi2', ('fits', 'fitcontext'))
 
 fits_total_chi2_for_experiments = collect('total_experiment_chi2',
                                           ('fits', 'fittheoryandpdf',
                                            'expspec', 'experiment'))
 
-
-fits_experiments = collect('experiments', ('fits', 'fitcontext'))
 fits_pdf = collect('pdf', ('fits', 'fitpdf'))
 
 #Dataspec is so
diff --git a/validphys2/src/validphys/scripts/vp_comparefits.py b/validphys2/src/validphys/scripts/vp_comparefits.py
index 07462da8d0..0ff1671a06 100644
--- a/validphys2/src/validphys/scripts/vp_comparefits.py
+++ b/validphys2/src/validphys/scripts/vp_comparefits.py
@@ -67,14 +67,31 @@ def add_positional_arguments(self, parser):
             '--closure',
             help="Use the closure comparison template.",
             action='store_true')
+#        parser.add_argument(
+#            '--use_thcovmat_if_present',
+#            help="Use theory cov mat for calculating statistical estimators.",
+#            action='store_true')
         parser.add_argument(
-            '--theory_cov',
-            help="Use theory cov mat for calculating statistical estimators.")
+            '--thcovmat_if_present',
+            dest='thcovmat_if_present',
+            action='store_true',
+            help="Use theory cov mat for calculating statistical estimators if available.")
+        parser.add_argument(
+            '--no-thcovmat_if_present',
+            dest='thcovmat_if_present',
+            action='store_false',
+            help="Do not use theory cov mat for calculating statistical estimators.")
+        parser.set_defaults(thcovmat_if_present=None)
 
     def try_complete_args(self):
         args = self.args
-        argnames = ('base_fit', 'reference_fit', 'theory_cov', 'title', 'author', 'keywords')
-        bad = [argname for argname in argnames if not args[argname]]
+        argnames = (
+            'base_fit', 'reference_fit', 'title', 'author', 'keywords')
+        boolnames = (
+            'thcovmat_if_present',)
+        badargs = [argname for argname in argnames if not args[argname]]
+        badbools = [bname for bname in boolnames if args[bname] is None]
+        bad = badargs + badbools
         if bad and not args['interactive']:
             sys.exit(f"The following arguments are required: {bad}")
         try:
@@ -84,7 +101,7 @@ def try_complete_args(self):
             raise KeyboardInterrupt()
         texts = '\n'.join(
             f'    {argname.replace("_", " ").capitalize()}: {args[argname]}'
-            for argname in argnames)
+            for argname in [*argnames, *boolnames])
         log.info(f"Starting NNPDF fit comparison:\n{texts}")
 
     def interactive_base_fit(self):
@@ -126,11 +143,11 @@ def interactive_keywords(self):
             complete_in_thread=True)
         return [k.strip() for k in kwinp.split(',') if k]
 
-    def interactive_theory_cov(self):
+    def interactive_thcovmat_if_present(self):
         """Interactively fill in the `use_thcovmat_if_present` runcard flag. Which is True by default
         """
-        message = ("Do you want to use the fitted covariance matrix (including theory covariance\n"
-                   "matrix) to calculate the statistical estimators? ")
+        message = ("Do you want to use the theory covariance matrix, if available,\n"
+                   "to calculate the statistical estimators? ")
         return confirm(message, default=True)
 
     def get_commandline_arguments(self, cmdline=None):
@@ -178,7 +195,7 @@ def complete_mapping(self):
             },
             'speclabel': 'Reference Fit'
         }
-        autosettings['use_thcovmat_if_present'] = args['theory_cov']
+        autosettings['use_thcovmat_if_present'] = args['thcovmat_if_present']
         return autosettings
 
 
diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py
index 90d9720494..3a84a34314 100644
--- a/validphys2/src/validphys/tests/conftest.py
+++ b/validphys2/src/validphys/tests/conftest.py
@@ -36,7 +36,9 @@ def data():
 
 def convolution_results_implement(data):
     pdf, exps = data
-    return [results.experiment_results(exp, pdf, pdf) for exp in exps]
+    #no theory covmat here
+    covs = [results.experiment_covariance_matrix(exp, False, pdf) for exp in exps]
+    return [results.experiment_results(exp, pdf, cov) for exp, cov in zip(exps, covs)]
 
 @pytest.fixture(scope='module')
 def convolution_results(data):
@@ -46,7 +48,8 @@ def convolution_results(data):
 def dataset_t0_convolution_results(data):
     pdf, exps = data
     ds = [x.datasets[0] for x in exps]
-    return [results.results(x, pdf, t0set=pdf) for x in ds]
+    covs = [results.covariance_matrix(x, False, pdf) for x in ds]
+    return [results.results(x, pdf, cov) for x, cov in zip(ds, covs)]
 
 @pytest.fixture(scope='module')
 def single_exp_data():
@@ -62,7 +65,8 @@ def single_exp_data():
 @pytest.fixture(scope='module')
 def dataset_convolution_results(single_exp_data):
     pdf, exp = single_exp_data
-    return [results.results(ds, pdf, pdf) for ds in exp.datasets]
+    covs = [results.covariance_matrix(ds, False, pdf) for ds in exp.datasets]
+    return [results.results(ds, pdf, cov) for ds, cov in zip(exp.datasets, covs)]
 
 @pytest.fixture(scope='module')
 def dataset_chi2data(dataset_convolution_results):