NNPDF · scarlehoff · Jan 27, 2022 · Jan 27, 2022 · Jan 27, 2022 · Jan 27, 2022
diff --git a/n3fit/src/n3fit/tests/test_vpinterface.py b/n3fit/src/n3fit/tests/test_vpinterface.py
@@ -40,7 +40,7 @@ def test_N3PDF(members, layers):
     xsize = np.random.randint(2, 20)
     xx = np.random.rand(xsize)
     n3pdf = generate_n3pdf(layers=layers, members=members)
-    assert len(n3pdf) == members + 1
+    assert len(n3pdf) == members
     assert n3pdf.stats_class == MCStats
     assert n3pdf.load() is n3pdf
     w = n3pdf.get_nn_weights()

diff --git a/n3fit/src/n3fit/vpinterface.py b/n3fit/src/n3fit/vpinterface.py
@@ -74,7 +74,7 @@ def __init__(self, pdf_models, fit_basis=None, name="n3fit"):
         # The number of members will be minimum 2, needed for legacy compatibility
         # Note that the member 0 does not exist as an actual model as it corresponds
         # to the average of all others
-        self._info = {"ErrorType": "replicas", "NumMembers": len(self._models) + 1}
+        self._info = {"ErrorType": "replicas", "NumMembers": len(self._models)}
 
     @property
     def stats_class(self):

diff --git a/validphys2/src/validphys/calcutils.py b/validphys2/src/validphys/calcutils.py
@@ -70,7 +70,7 @@ def all_chi2(results):
     """Return the chi² for all elements in the result. Note that the
     interpretation of the result will depend on the PDF error type"""
     data_result, th_result = results
-    diffs = th_result._rawdata - data_result.central_value[:,np.newaxis]
+    diffs = th_result.data - data_result.central_value[:,np.newaxis]
     return calc_chi2(sqrtcov=data_result.sqrtcovmat, diffs=diffs)
 
 def central_chi2(results):
@@ -85,7 +85,7 @@ def all_chi2_theory(results, totcov):
     """Like all_chi2 but here the chi² are calculated using a covariance matrix
     that is the sum of the experimental covmat and the theory covmat."""
     data_result, th_result = results
-    diffs = th_result._rawdata - data_result.central_value[:,np.newaxis]
+    diffs = th_result.data - data_result.central_value[:,np.newaxis]
     total_covmat = np.array(totcov)
     return calc_chi2(sqrtcov=la.cholesky(total_covmat, lower=True), diffs=diffs)
 

diff --git a/validphys2/src/validphys/closuretest/closure_results.py b/validphys2/src/validphys/closuretest/closure_results.py
@@ -122,7 +122,7 @@ def bootstrap_bias_experiment(
     """
     dt_ct, th_ct = dataset_inputs_results
     ((_, th_ul),) = underlying_dataset_inputs_results
-    th_ct_boot_cv = bootstrap_values(th_ct._rawdata, bootstrap_samples)
+    th_ct_boot_cv = bootstrap_values(th_ct.data, bootstrap_samples)
     boot_diffs = th_ct_boot_cv - th_ul.central_value[:, np.newaxis]
     boot_bias = calc_chi2(dt_ct.sqrtcovmat, boot_diffs) / len(dt_ct)
     return boot_bias
@@ -191,7 +191,7 @@ def variance_dataset(results, fit, use_fitcommondata):
 
     """
     dt, th = results
-    diff = th.central_value[:, np.newaxis] - th._rawdata
+    diff = th.central_value[:, np.newaxis] - th.data
     var_unnorm = calc_chi2(dt.sqrtcovmat, diff).mean()
     return VarianceData(var_unnorm, len(th))
 
@@ -210,7 +210,7 @@ def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500)
     normalised to the number of data in the experiment.
     """
     dt_ct, th_ct = dataset_inputs_results
-    diff = th_ct.central_value[:, np.newaxis] - th_ct._rawdata
+    diff = th_ct.central_value[:, np.newaxis] - th_ct.data
     var_unnorm_boot = bootstrap_values(
         diff,
         bootstrap_samples,

diff --git a/validphys2/src/validphys/closuretest/multiclosure.py b/validphys2/src/validphys/closuretest/multiclosure.py
@@ -130,7 +130,7 @@ def fits_dataset_bias_variance(
     """
     closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
     # The dimentions here are (fit, data point, replica)
-    reps = np.asarray([th._rawdata[:, :_internal_max_reps] for th in closures_th])
+    reps = np.asarray([th.data[:, :_internal_max_reps] for th in closures_th])
     # take mean across replicas - since we might have changed no. of reps
     centrals = reps.mean(axis=2)
     # place bins on first axis
@@ -226,7 +226,7 @@ def dataset_xi(internal_multiclosure_dataset_loader):
 
     """
     closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader
-    replicas = np.asarray([th._rawdata for th in closures_th])
+    replicas = np.asarray([th.data for th in closures_th])
     centrals = np.mean(replicas, axis=-1)
     underlying = law_th.central_value
 
@@ -297,7 +297,7 @@ class BootstrappedTheoryResult:
     """
 
     def __init__(self, data):
-        self._rawdata = data
+        self.data = data
         self.central_value = data.mean(axis=1)
 
 
@@ -341,7 +341,7 @@ def _bootstrap_multiclosure_fits(
     # construct proxy fits theory predictions
     for fit_th in fit_boot_th:
         rep_boot_index = rng.choice(n_rep_max, size=n_rep, replace=use_repeats)
-        boot_ths.append(BootstrappedTheoryResult(fit_th._rawdata[:, rep_boot_index]))
+        boot_ths.append(BootstrappedTheoryResult(fit_th.data[:, rep_boot_index]))
     return (boot_ths, *input_tuple)
 
 
@@ -741,7 +741,7 @@ def dataset_fits_bias_replicas_variance_samples(
     """
     closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
     # The dimentions here are (fit, data point, replica)
-    reps = np.asarray([th._rawdata[:, :_internal_max_reps] for th in closures_th])
+    reps = np.asarray([th.data[:, :_internal_max_reps] for th in closures_th])
     # take mean across replicas - since we might have changed no. of reps
     centrals = reps.mean(axis=2)
     # place bins on first axis

diff --git a/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py b/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py
@@ -126,7 +126,7 @@ def experiments_closure_pseudodata_estimators_table(
         fits_erep_delta_chi2 = []
         for i_fit, fit_th in enumerate(closures_th):
             # some of these could be done outside of loop, but easier to do here.
-            th_replicas = fit_th._rawdata
+            th_replicas = fit_th.data
             th_central = np.mean(th_replicas, axis=-1)
             dt_replicas = fits_reps_pseudo[i_fit].xs(exp_name, axis=0, level=0).to_numpy()
             dt_central = fits_cv.xs(exp_name, axis=0, level=0).iloc[:, i_fit].to_numpy()

diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py
@@ -229,10 +229,8 @@ def legacy_load(self):
     @property
     def grid_values_index(self):
         """A range object describing which members are selected in a
-        ``pdf.load().grid_values`` operation.  This is ``range(1,
-        len(pdf))`` for Monte Carlo sets, because replica 0 is not selected
-        and ``range(0, len(pdf))`` for hessian sets.
-
+        ``pdf.load().grid_values`` operation.
+        Equal to range(0, len(pdf))
 
         Returns
         -------
@@ -243,12 +241,7 @@ def grid_values_index(self):
         -----
         The range object can be used efficiently as a Pandas index.
         """
-        if self.error_type == "replicas":
-            return range(1, len(self))
-        elif self.error_type in ("hessian", "symmhessian"):
-            return range(0, len(self))
-        else:
-            raise RuntimeError(f"Unknown error type: {self.stats_class}")
+        return range(0, len(self))
 
     def get_members(self):
         """Return the number of members selected in ``pdf.load().grid_values``
@@ -760,18 +753,27 @@ def load(self):
     def __str__(self):
         return str(self.path)
 
-#TODO: Decide if we want methods or properties
 class Stats:
+    """The stats class holds the results of computations that involve PDFs
+    and knows how to compute statistical estimators for each kind of PDF.
+    The first index is always the central value.
+
+
+    Parameters
+    ----------
+        data: np.array
+            Result of the computation, should always be N_pdf * N_bins * ...
+    """
 
     def __init__(self, data):
         """`data `should be N_pdf*N_bins"""
         self.data = np.atleast_2d(data)
 
     def central_value(self):
-        raise NotImplementedError()
+        return self.data[0]
 
     def error_members(self):
-        raise NotImplementedError()
+        return self.data[1:]
 
     def std_error(self):
         raise NotImplementedError()
@@ -798,16 +800,12 @@ def errorbarstd(self):
 
 class MCStats(Stats):
     """Result obtained from a Monte Carlo sample"""
-
-    def central_value(self):
-        return np.mean(self.data, axis=0)
-
     def std_error(self):
         # ddof == 1 to match libNNPDF behaviour
-        return np.std(self.data, ddof=1, axis=0)
+        return np.std(self.error_members(), ddof=1, axis=0)
 
     def moment(self, order):
-        return np.mean(np.power(self.data-self.central_value(),order), axis=0)
+        return np.mean(np.power(self.error_members()-self.central_value(),order), axis=0)
 
     def errorbar68(self):
         #Use nanpercentile here because we can have e.g. 0/0==nan normalization
@@ -819,9 +817,6 @@ def errorbar68(self):
     def sample_values(self, size):
         return np.random.choice(self, size=size)
 
-    def error_members(self):
-        return self.data
-
 
 class SymmHessianStats(Stats):
     """Compute stats in the 'symetric' hessian format: The first index (0)
@@ -833,15 +828,9 @@ def __init__(self, data, rescale_factor=1):
         super().__init__(data)
         self.rescale_factor = rescale_factor
 
-    def central_value(self):
-        return self.data[0]
-
     def errorbar68(self):
         return self.errorbarstd()
 
-    def error_members(self):
-        return self.data[1:]
-
     def std_error(self):
         data = self.data
         diffsq = (data[0] - data[1:])**2

diff --git a/validphys2/src/validphys/correlations.py b/validphys2/src/validphys/correlations.py
@@ -60,14 +60,14 @@ def _basic_obs_obs_correlation(obsarr1, obsarr2):
 
     return x@y/np.outer(la.norm(x,axis=1),la.norm(y,axis=0))
 
-#TODO: Implement for other error types. Do not use the _rawdata.
 @check_pdf_is_montecarlo
 def obs_pdf_correlations(pdf, results, xplotting_grid):
     """Return the correlations between each point in a dataset and the PDF
     values on a grid of (x,f) points in a format similar to `xplotting_grid`."""
     _ , th = results
-    corrs = _basic_obs_pdf_correlation(xplotting_grid.grid_values, th._rawdata)
-    return xplotting_grid._replace(grid_values=corrs)
+    pdf_replicas = pdf.stats_class(xplotting_grid.grid_values).error_members()
+    corrs = _basic_obs_pdf_correlation(pdf_replicas, th.error_members)
+    return xplotting_grid.copy_grid(grid_values=corrs)
 
 
 corrpair_results = collect('results', ['corrpair'])
@@ -77,4 +77,4 @@ def obs_pdf_correlations(pdf, results, xplotting_grid):
 def obs_obs_correlations(pdf, corrpair_results):
     """Return the theoretical correlation matrix between a pair of observables."""
     (_,th1), (_,th2) = corrpair_results
-    return _basic_obs_obs_correlation(th1._rawdata, th2._rawdata)
+    return _basic_obs_obs_correlation(th1.error_members, th2.error_members)
diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py
@@ -531,7 +531,7 @@ def pdferr_plus_covmat(dataset, pdf, covmat_t0_considered):
     True
     """
     th = ThPredictionsResult.from_convolution(pdf, dataset)
-    pdf_cov = np.cov(th._rawdata, rowvar=True)
+    pdf_cov = np.cov(th.error_members, rowvar=True)
     return pdf_cov + covmat_t0_considered
 
 

diff --git a/validphys2/src/validphys/deltachi2.py b/validphys2/src/validphys/deltachi2.py
@@ -152,8 +152,8 @@ def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid):
     pos_grid = xplotting_grid.grid_values[pos_mask]
     neg_grid = xplotting_grid.grid_values[neg_mask]
 
-    pos_xplotting_grid = xplotting_grid._replace(grid_values=pos_grid)
-    neg_xplotting_grid = xplotting_grid._replace(grid_values=neg_grid)
+    pos_xplotting_grid = xplotting_grid.copy_grid(grid_values=pos_grid)
+    neg_xplotting_grid = xplotting_grid.copy_grid(grid_values=neg_grid)
 
     return [xplotting_grid, pos_xplotting_grid, neg_xplotting_grid]
 

diff --git a/validphys2/src/validphys/eff_exponents.py b/validphys2/src/validphys/eff_exponents.py
@@ -73,7 +73,7 @@ def alpha_eff(pdf: PDF, *,
         warnings.simplefilter('ignore', RuntimeWarning)
         alphaGrid_values = -np.log(abs(pdfGrid_values/xGrid))/np.log(xGrid)
         alphaGrid_values[alphaGrid_values == - np.inf] = np.nan  # when PDF_i =0
-    alphaGrid = pdfGrid._replace(grid_values=alphaGrid_values)
+    alphaGrid = pdfGrid.copy_grid(grid_values=alphaGrid_values)
     return alphaGrid
 
 @check_positive('Q')
@@ -117,7 +117,7 @@ def beta_eff(pdf, *,
         warnings.simplefilter('ignore', RuntimeWarning)
         betaGrid_values = np.log(abs(pdfGrid_values/xGrid))/np.log(1-xGrid)
         betaGrid_values[betaGrid_values == -np.inf] = np.nan  # when PDF_i =0
-    betaGrid = pdfGrid._replace(grid_values=betaGrid_values)
+    betaGrid = pdfGrid.copy_grid(grid_values=betaGrid_values)
 
     return betaGrid  # .grid_values
 
@@ -348,17 +348,14 @@ def next_effective_exponents_table(
 
     eff_exp_data = []
 
-    alphagrid = alpha_effs.grid_values
-    betagrid = beta_effs.grid_values
-
-    alphastats = pdf.stats_class(alphagrid)
-    betastats = pdf.stats_class(betagrid)
+    alphastats = alpha_effs.stats_gv
+    betastats = beta_effs.stats_gv
 
     with warnings.catch_warnings():
         warnings.simplefilter('ignore', RuntimeWarning)
 
-        alpha_cv = np.nanmean(alphagrid, axis=0)
-        beta_cv = np.nanmean(betagrid, axis=0)
+        alpha_cv = np.nanmean(alphastats.error_members(), axis=0)
+        beta_cv = np.nanmean(betastats.error_members(), axis=0)
         #tuple of low and high values repectively
         alpha68 = alphastats.errorbar68()
         beta68 = betastats.errorbar68()

diff --git a/validphys2/src/validphys/lhapdfset.py b/validphys2/src/validphys/lhapdfset.py
@@ -7,15 +7,14 @@
 
     The ``.members`` and ``.central_member`` of the ``LHAPDFSet`` are
     LHAPDF objects (the typical output from ``mkPDFs``) and can be used normally.
-    For MC PDFs the ``central_member`` is the average of all replicas (members 1-100)
-    while for Hessian PDFfs the ``central_member`` is also ``members[0]``
+    The ``central_member`` PDF is also ``member[0]``
 
     Examples
     --------
     >>> from validphys.lhapdfset import LHAPDFSet
     >>> pdf = LHAPDFSet("NNPDF40_nnlo_as_01180", "replicas")
     >>> len(pdf.members)
-    100
+    101
     >>> pdf.central_member.alphasQ(91.19)
     0.11800
     >>> pdf.members[0].xfxQ2(0.5, 15625)
@@ -59,11 +58,6 @@ def __init__(self, name, error_type):
             self._lhapdf_set = lhapdf.mkPDFs(name)
         self._flavors = None
 
-    @property
-    def is_monte_carlo(self):
-        """Check whether the error type is MC"""
-        return self._error_type == "replicas"
-
     @property
     def is_t0(self):
         """Check whether we are in t0 mode"""
@@ -78,25 +72,30 @@ def n_members(self):
     def members(self):
         """Return the members of the set, this depends on the error type:
         t0: returns only member 0
-        MC: skip member 0
-        Hessian: return all
+        MC & Hessian: return all
         """
         if self.is_t0:
             return self._lhapdf_set[0:1]
-        if self.is_monte_carlo:
-            return self._lhapdf_set[1:]
         return self._lhapdf_set
 
     @property
     def central_member(self):
         """Returns a reference to member 0 of the PDF list"""
         return self._lhapdf_set[0]
 
-    def xfxQ(self, x, q, member_idx, flavour):
-        """Return the PDF value for one single point for one single member"""
-        member_pdf = self.members[member_idx]
-        res = member_pdf.xfxQ(x, q)
-        return res[flavour]
+    def xfxQ(self, x, Q, n, fl):
+        """Return the PDF value for one single point for one single member
+        Parameters
+        ---------
+            x: float
+            Q: float
+            n: int
+            fl: str
+        """
+        member_pdf = self.members[n]
+        res = member_pdf.xfxQ(x, Q)
+        # Use `.get` so that non-existing flavours (top / antitop) just return 0
+        return res.get(fl, 0.0)
 
     @property
     def flavors(self):

diff --git a/validphys2/src/validphys/mc2hessian.py b/validphys2/src/validphys/mc2hessian.py
@@ -108,9 +108,8 @@ def _create_mc2hessian(pdf, Q, xgrid, Neig, output_path, name=None):
 
 def _get_X(pdf, Q, xgrid, reshape=False):
     pdf_grid = xplotting_grid(pdf, Q, xgrid=xgrid)
-    pdf_grid_values = pdf_grid.grid_values
-    replicas = pdf_grid_values
-    mean = pdf_grid_values.mean(axis=0)
+    replicas = pdf_grid.stats_gv.error_members()
+    mean = pdf_grid.stats_gv.central_value()
     Xt = replicas - mean
     if reshape:
         Xt = Xt.reshape(Xt.shape[0], Xt.shape[1] * Xt.shape[2])