Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion n3fit/src/n3fit/tests/test_vpinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_N3PDF(members, layers):
xsize = np.random.randint(2, 20)
xx = np.random.rand(xsize)
n3pdf = generate_n3pdf(layers=layers, members=members)
assert len(n3pdf) == members + 1
assert len(n3pdf) == members
assert n3pdf.stats_class == MCStats
assert n3pdf.load() is n3pdf
w = n3pdf.get_nn_weights()
Expand Down
2 changes: 1 addition & 1 deletion n3fit/src/n3fit/vpinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, pdf_models, fit_basis=None, name="n3fit"):
# The number of members will be minimum 2, needed for legacy compatibility
# Note that the member 0 does not exist as an actual model as it corresponds
# to the average of all others
self._info = {"ErrorType": "replicas", "NumMembers": len(self._models) + 1}
self._info = {"ErrorType": "replicas", "NumMembers": len(self._models)}

@property
def stats_class(self):
Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/calcutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def all_chi2(results):
"""Return the chi² for all elements in the result. Note that the
interpretation of the result will depend on the PDF error type"""
data_result, th_result = results
diffs = th_result._rawdata - data_result.central_value[:,np.newaxis]
diffs = th_result.data - data_result.central_value[:,np.newaxis]
return calc_chi2(sqrtcov=data_result.sqrtcovmat, diffs=diffs)

def central_chi2(results):
Expand All @@ -85,7 +85,7 @@ def all_chi2_theory(results, totcov):
"""Like all_chi2 but here the chi² are calculated using a covariance matrix
that is the sum of the experimental covmat and the theory covmat."""
data_result, th_result = results
diffs = th_result._rawdata - data_result.central_value[:,np.newaxis]
diffs = th_result.data - data_result.central_value[:,np.newaxis]
total_covmat = np.array(totcov)
return calc_chi2(sqrtcov=la.cholesky(total_covmat, lower=True), diffs=diffs)

Expand Down
6 changes: 3 additions & 3 deletions validphys2/src/validphys/closuretest/closure_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def bootstrap_bias_experiment(
"""
dt_ct, th_ct = dataset_inputs_results
((_, th_ul),) = underlying_dataset_inputs_results
th_ct_boot_cv = bootstrap_values(th_ct._rawdata, bootstrap_samples)
th_ct_boot_cv = bootstrap_values(th_ct.data, bootstrap_samples)
boot_diffs = th_ct_boot_cv - th_ul.central_value[:, np.newaxis]
boot_bias = calc_chi2(dt_ct.sqrtcovmat, boot_diffs) / len(dt_ct)
return boot_bias
Expand Down Expand Up @@ -191,7 +191,7 @@ def variance_dataset(results, fit, use_fitcommondata):

"""
dt, th = results
diff = th.central_value[:, np.newaxis] - th._rawdata
diff = th.central_value[:, np.newaxis] - th.data
var_unnorm = calc_chi2(dt.sqrtcovmat, diff).mean()
return VarianceData(var_unnorm, len(th))

Expand All @@ -210,7 +210,7 @@ def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500)
normalised to the number of data in the experiment.
"""
dt_ct, th_ct = dataset_inputs_results
diff = th_ct.central_value[:, np.newaxis] - th_ct._rawdata
diff = th_ct.central_value[:, np.newaxis] - th_ct.data
var_unnorm_boot = bootstrap_values(
diff,
bootstrap_samples,
Expand Down
10 changes: 5 additions & 5 deletions validphys2/src/validphys/closuretest/multiclosure.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def fits_dataset_bias_variance(
"""
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
# The dimentions here are (fit, data point, replica)
reps = np.asarray([th._rawdata[:, :_internal_max_reps] for th in closures_th])
reps = np.asarray([th.data[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
centrals = reps.mean(axis=2)
# place bins on first axis
Expand Down Expand Up @@ -226,7 +226,7 @@ def dataset_xi(internal_multiclosure_dataset_loader):

"""
closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader
replicas = np.asarray([th._rawdata for th in closures_th])
replicas = np.asarray([th.data for th in closures_th])
centrals = np.mean(replicas, axis=-1)
underlying = law_th.central_value

Expand Down Expand Up @@ -297,7 +297,7 @@ class BootstrappedTheoryResult:
"""

def __init__(self, data):
self._rawdata = data
self.data = data
self.central_value = data.mean(axis=1)


Expand Down Expand Up @@ -341,7 +341,7 @@ def _bootstrap_multiclosure_fits(
# construct proxy fits theory predictions
for fit_th in fit_boot_th:
rep_boot_index = rng.choice(n_rep_max, size=n_rep, replace=use_repeats)
boot_ths.append(BootstrappedTheoryResult(fit_th._rawdata[:, rep_boot_index]))
boot_ths.append(BootstrappedTheoryResult(fit_th.data[:, rep_boot_index]))
return (boot_ths, *input_tuple)


Expand Down Expand Up @@ -741,7 +741,7 @@ def dataset_fits_bias_replicas_variance_samples(
"""
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
# The dimentions here are (fit, data point, replica)
reps = np.asarray([th._rawdata[:, :_internal_max_reps] for th in closures_th])
reps = np.asarray([th.data[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
centrals = reps.mean(axis=2)
# place bins on first axis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def experiments_closure_pseudodata_estimators_table(
fits_erep_delta_chi2 = []
for i_fit, fit_th in enumerate(closures_th):
# some of these could be done outside of loop, but easier to do here.
th_replicas = fit_th._rawdata
th_replicas = fit_th.data
th_central = np.mean(th_replicas, axis=-1)
dt_replicas = fits_reps_pseudo[i_fit].xs(exp_name, axis=0, level=0).to_numpy()
dt_central = fits_cv.xs(exp_name, axis=0, level=0).iloc[:, i_fit].to_numpy()
Expand Down
45 changes: 17 additions & 28 deletions validphys2/src/validphys/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,8 @@ def legacy_load(self):
@property
def grid_values_index(self):
"""A range object describing which members are selected in a
``pdf.load().grid_values`` operation. This is ``range(1,
len(pdf))`` for Monte Carlo sets, because replica 0 is not selected
and ``range(0, len(pdf))`` for hessian sets.

``pdf.load().grid_values`` operation.
Equal to range(0, len(pdf))

Returns
-------
Expand All @@ -243,12 +241,7 @@ def grid_values_index(self):
-----
The range object can be used efficiently as a Pandas index.
"""
if self.error_type == "replicas":
return range(1, len(self))
elif self.error_type in ("hessian", "symmhessian"):
return range(0, len(self))
else:
raise RuntimeError(f"Unknown error type: {self.stats_class}")
return range(0, len(self))

def get_members(self):
"""Return the number of members selected in ``pdf.load().grid_values``
Expand Down Expand Up @@ -760,18 +753,27 @@ def load(self):
def __str__(self):
return str(self.path)

#TODO: Decide if we want methods or properties
class Stats:
"""The stats class holds the results of computations that involve PDFs
and knows how to compute statistical estimators for each kind of PDF.
The first index is always the central value.


Parameters
----------
data: np.array
Result of the computation, should always be N_pdf * N_bins * ...
"""

def __init__(self, data):
"""`data `should be N_pdf*N_bins"""
self.data = np.atleast_2d(data)

def central_value(self):
raise NotImplementedError()
return self.data[0]

def error_members(self):
raise NotImplementedError()
return self.data[1:]

def std_error(self):
raise NotImplementedError()
Expand All @@ -798,16 +800,12 @@ def errorbarstd(self):

class MCStats(Stats):
"""Result obtained from a Monte Carlo sample"""

def central_value(self):
return np.mean(self.data, axis=0)

def std_error(self):
# ddof == 1 to match libNNPDF behaviour
return np.std(self.data, ddof=1, axis=0)
return np.std(self.error_members(), ddof=1, axis=0)

def moment(self, order):
return np.mean(np.power(self.data-self.central_value(),order), axis=0)
return np.mean(np.power(self.error_members()-self.central_value(),order), axis=0)

def errorbar68(self):
#Use nanpercentile here because we can have e.g. 0/0==nan normalization
Expand All @@ -819,9 +817,6 @@ def errorbar68(self):
def sample_values(self, size):
return np.random.choice(self, size=size)

def error_members(self):
return self.data


class SymmHessianStats(Stats):
"""Compute stats in the 'symetric' hessian format: The first index (0)
Expand All @@ -833,15 +828,9 @@ def __init__(self, data, rescale_factor=1):
super().__init__(data)
self.rescale_factor = rescale_factor

def central_value(self):
return self.data[0]

def errorbar68(self):
return self.errorbarstd()

def error_members(self):
return self.data[1:]

def std_error(self):
data = self.data
diffsq = (data[0] - data[1:])**2
Expand Down
8 changes: 4 additions & 4 deletions validphys2/src/validphys/correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ def _basic_obs_obs_correlation(obsarr1, obsarr2):

return x@y/np.outer(la.norm(x,axis=1),la.norm(y,axis=0))

#TODO: Implement for other error types. Do not use the _rawdata.
@check_pdf_is_montecarlo
def obs_pdf_correlations(pdf, results, xplotting_grid):
"""Return the correlations between each point in a dataset and the PDF
values on a grid of (x,f) points in a format similar to `xplotting_grid`."""
_ , th = results
corrs = _basic_obs_pdf_correlation(xplotting_grid.grid_values, th._rawdata)
return xplotting_grid._replace(grid_values=corrs)
pdf_replicas = pdf.stats_class(xplotting_grid.grid_values).error_members()
corrs = _basic_obs_pdf_correlation(pdf_replicas, th.error_members)
return xplotting_grid.copy_grid(grid_values=corrs)


corrpair_results = collect('results', ['corrpair'])
Expand All @@ -77,4 +77,4 @@ def obs_pdf_correlations(pdf, results, xplotting_grid):
def obs_obs_correlations(pdf, corrpair_results):
"""Return the theoretical correlation matrix between a pair of observables."""
(_,th1), (_,th2) = corrpair_results
return _basic_obs_obs_correlation(th1._rawdata, th2._rawdata)
return _basic_obs_obs_correlation(th1.error_members, th2.error_members)
2 changes: 1 addition & 1 deletion validphys2/src/validphys/covmats.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def pdferr_plus_covmat(dataset, pdf, covmat_t0_considered):
True
"""
th = ThPredictionsResult.from_convolution(pdf, dataset)
pdf_cov = np.cov(th._rawdata, rowvar=True)
pdf_cov = np.cov(th.error_members, rowvar=True)
return pdf_cov + covmat_t0_considered


Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/deltachi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid):
pos_grid = xplotting_grid.grid_values[pos_mask]
neg_grid = xplotting_grid.grid_values[neg_mask]

pos_xplotting_grid = xplotting_grid._replace(grid_values=pos_grid)
neg_xplotting_grid = xplotting_grid._replace(grid_values=neg_grid)
pos_xplotting_grid = xplotting_grid.copy_grid(grid_values=pos_grid)
neg_xplotting_grid = xplotting_grid.copy_grid(grid_values=neg_grid)

return [xplotting_grid, pos_xplotting_grid, neg_xplotting_grid]

Expand Down
15 changes: 6 additions & 9 deletions validphys2/src/validphys/eff_exponents.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def alpha_eff(pdf: PDF, *,
warnings.simplefilter('ignore', RuntimeWarning)
alphaGrid_values = -np.log(abs(pdfGrid_values/xGrid))/np.log(xGrid)
alphaGrid_values[alphaGrid_values == - np.inf] = np.nan # when PDF_i =0
alphaGrid = pdfGrid._replace(grid_values=alphaGrid_values)
alphaGrid = pdfGrid.copy_grid(grid_values=alphaGrid_values)
return alphaGrid

@check_positive('Q')
Expand Down Expand Up @@ -117,7 +117,7 @@ def beta_eff(pdf, *,
warnings.simplefilter('ignore', RuntimeWarning)
betaGrid_values = np.log(abs(pdfGrid_values/xGrid))/np.log(1-xGrid)
betaGrid_values[betaGrid_values == -np.inf] = np.nan # when PDF_i =0
betaGrid = pdfGrid._replace(grid_values=betaGrid_values)
betaGrid = pdfGrid.copy_grid(grid_values=betaGrid_values)

return betaGrid # .grid_values

Expand Down Expand Up @@ -348,17 +348,14 @@ def next_effective_exponents_table(

eff_exp_data = []

alphagrid = alpha_effs.grid_values
betagrid = beta_effs.grid_values

alphastats = pdf.stats_class(alphagrid)
betastats = pdf.stats_class(betagrid)
alphastats = alpha_effs.stats_gv
betastats = beta_effs.stats_gv

with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)

alpha_cv = np.nanmean(alphagrid, axis=0)
beta_cv = np.nanmean(betagrid, axis=0)
alpha_cv = np.nanmean(alphastats.error_members(), axis=0)
beta_cv = np.nanmean(betastats.error_members(), axis=0)
#tuple of low and high values repectively
alpha68 = alphastats.errorbar68()
beta68 = betastats.errorbar68()
Expand Down
33 changes: 16 additions & 17 deletions validphys2/src/validphys/lhapdfset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@

The ``.members`` and ``.central_member`` of the ``LHAPDFSet`` are
LHAPDF objects (the typical output from ``mkPDFs``) and can be used normally.
For MC PDFs the ``central_member`` is the average of all replicas (members 1-100)
while for Hessian PDFfs the ``central_member`` is also ``members[0]``
The ``central_member`` PDF is also ``member[0]``

Examples
--------
>>> from validphys.lhapdfset import LHAPDFSet
>>> pdf = LHAPDFSet("NNPDF40_nnlo_as_01180", "replicas")
>>> len(pdf.members)
100
101
>>> pdf.central_member.alphasQ(91.19)
0.11800
>>> pdf.members[0].xfxQ2(0.5, 15625)
Expand Down Expand Up @@ -59,11 +58,6 @@ def __init__(self, name, error_type):
self._lhapdf_set = lhapdf.mkPDFs(name)
self._flavors = None

@property
def is_monte_carlo(self):
"""Check whether the error type is MC"""
return self._error_type == "replicas"

@property
def is_t0(self):
"""Check whether we are in t0 mode"""
Expand All @@ -78,25 +72,30 @@ def n_members(self):
def members(self):
"""Return the members of the set, this depends on the error type:
t0: returns only member 0
MC: skip member 0
Hessian: return all
MC & Hessian: return all
"""
if self.is_t0:
return self._lhapdf_set[0:1]
if self.is_monte_carlo:
return self._lhapdf_set[1:]
return self._lhapdf_set

@property
def central_member(self):
"""Returns a reference to member 0 of the PDF list"""
return self._lhapdf_set[0]

def xfxQ(self, x, q, member_idx, flavour):
"""Return the PDF value for one single point for one single member"""
member_pdf = self.members[member_idx]
res = member_pdf.xfxQ(x, q)
return res[flavour]
def xfxQ(self, x, Q, n, fl):
"""Return the PDF value for one single point for one single member
Parameters
---------
x: float
Q: float
n: int
fl: str
"""
member_pdf = self.members[n]
res = member_pdf.xfxQ(x, Q)
# Use `.get` so that non-existing flavours (top / antitop) just return 0
return res.get(fl, 0.0)

@property
def flavors(self):
Expand Down
5 changes: 2 additions & 3 deletions validphys2/src/validphys/mc2hessian.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,8 @@ def _create_mc2hessian(pdf, Q, xgrid, Neig, output_path, name=None):

def _get_X(pdf, Q, xgrid, reshape=False):
pdf_grid = xplotting_grid(pdf, Q, xgrid=xgrid)
pdf_grid_values = pdf_grid.grid_values
replicas = pdf_grid_values
mean = pdf_grid_values.mean(axis=0)
replicas = pdf_grid.stats_gv.error_members()
mean = pdf_grid.stats_gv.central_value()
Xt = replicas - mean
if reshape:
Xt = Xt.reshape(Xt.shape[0], Xt.shape[1] * Xt.shape[2])
Expand Down
Loading