Skip to content
Merged
6 changes: 3 additions & 3 deletions validphys2/examples/mc_gen_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ meta:
keywords: [pseudodata, test, artificial replica]

fit: NNPDF40_nlo_as_01180

theoryid: 208
mcseed: 4
genrep: True

template: mc_gen_report.md

actions_:
- report(main=true)

dataset_inputs:
- { dataset: CHORUSNBPb_dw_ite, frac: 0.5}

- { dataset: CHORUSNBPb_dw_ite, frac: 0.5}
130 changes: 46 additions & 84 deletions validphys2/src/validphys/chi2grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,109 +4,71 @@
Compute and store χ² data from replicas, possibly keeping the correlations
between pseudorreplica fluctuations between different fits. This is applied
here to parameter determinations such as those of αs.

This module is severly thwarted by the poor adecuacy of libnnpdf for this use
case. Several pieces of functionality need to be implemented there.
"""
import logging
from collections import namedtuple

import numpy as np
import pandas as pd

from reportengine import collect
from reportengine.table import table
from NNPDF import pseudodata, single_replica, RandomGenerator

from validphys.core import PDF
from validphys.results import ThPredictionsResult, DataResult, chi2_breakdown_by_dataset
from validphys.calcutils import calc_chi2

PseudoReplicaExpChi2Data = namedtuple('PseudoReplicaChi2Data',
['experiment', 'dataset', 'ndata', 'chi2', 'nnfit_index'])
PseudoReplicaExpChi2Data = namedtuple(
"PseudoReplicaChi2Data", ["group", "ndata", "chi2", "nnfit_index"]
)


log = logging.getLogger(__name__)

def computed_psedorreplicas_chi2(
experiments, dataseed, pdf, fitted_replica_indexes,
t0set:(PDF, type(None))):
"""Return a dataframe with the chi² of each replica wirh its corrsponding
pseudodata (i.e. the one it was fitted with). The chi² is computed for
both each experiment and each dataset in the experiment. The index of the
dataframe is

['experiment', 'dataset', 'ndata' , 'nnfit_index']

where 'experiment' is the name of the experiment, 'dataset' is the name of
the dataset, or "Total" for the total value, 'ndata' is the corresponding
number of points and 'nnfit_index' is the index specifying the
pseudorreplica fluctuation.
def computed_pseudoreplicas_chi2(
fitted_make_replicas,
group_result_table_no_table, # to get the results already in the form of a dataframe
groups_sqrtcovmat,
):
"""Return a dataframe with the chi² of each replica with its corresponding
pseudodata (i.e. the one it was fitted with). The chi² is computed by group.
The index of the output dataframe is
``['group', 'ndata' , 'nnfit_index']``
where ``nnftix_index`` is the name of the corresponding replica
"""

#TODO: Everythning about this function is horrible. We need to rewrite
#experiments.cc from scratch.

#TODO: Do this somewhere else
RandomGenerator.InitRNG(0,0)
if t0set is not None:
lt0 = t0set.load_t0()
pdfname = pdf.name
datas = []

#No need to save these in the cache, so we call __wrapped__
original_experiments = [e.load.__wrapped__(e) for e in experiments]
sqrtcovmat_table = []
log.debug("Generating dataset covmats")
for exp in original_experiments:
if t0set is not None:
exp.SetT0(lt0)
#The covariance matrices are currently very expensive to recompute.
#Store them after computing T0
sqrtcovmat_table.append([ds.get_sqrtcovmat() for ds in exp.DataSets()])

for lhapdf_index, nnfit_index in enumerate(fitted_replica_indexes, 1):

flutuated_experiments = pseudodata(original_experiments, dataseed, nnfit_index)
lpdf = single_replica(pdfname, lhapdf_index)
for expspec, exp, mats in zip(experiments, flutuated_experiments, sqrtcovmat_table):
#We need to manage the memory
exp.thisown = True

th = ThPredictionsResult.from_convolution(pdf, expspec,
loaded_data=exp, loaded_pdf=lpdf)


results = DataResult(exp, exp.get_covmat(), exp.get_sqrtcovmat()), th
#The experiment already has T0. No need to set it again.
#TODO: This is a hack. Get rid of this.
chi2 = chi2_breakdown_by_dataset(results, exp, t0set=None,
datasets_sqrtcovmat=mats)

for i, (dsname,(value, ndata)) in enumerate(chi2.items()):
data = PseudoReplicaExpChi2Data(
nnfit_index=nnfit_index,
experiment=expspec.name,
#We set the i so that the sort order is maintaned here.
dataset = (i, dsname),
ndata = ndata,
chi2=value
)
datas.append(data)

df = pd.DataFrame(datas, columns=PseudoReplicaExpChi2Data._fields)
df.set_index(['experiment', 'dataset', 'ndata' , 'nnfit_index'], inplace=True)
# Stack the replica pseudodata to have the prediction shape
r_data = np.stack(fitted_make_replicas, axis=1)

# Drop data central and theory central which is not useful here
r_prediction = group_result_table_no_table.drop(columns=["data_central", "theory_central"])

# Now compute the chi2 in a per-group basis
diff = r_prediction - r_data
group_level = r_prediction.index.get_level_values("group")

# Save the results in a dataframe similar (but not equal) to the old one
df_output = []
for group in group_level.unique():
group_diff = diff.loc[group_level == group]
its_covmat = groups_sqrtcovmat[group_level == group][group]
Comment thread
scarlehoff marked this conversation as resolved.
chi2_per_replica = calc_chi2(its_covmat, group_diff)
ndata = len(group_diff)
for i, chi2 in enumerate(chi2_per_replica):
df_output.append(PseudoReplicaExpChi2Data(group, ndata, chi2, i))

df = pd.DataFrame(df_output, columns=PseudoReplicaExpChi2Data._fields)
df.set_index(["group", "ndata", "nnfit_index"], inplace=True)
df.sort_index(inplace=True)
#Now that we have the order we like, we remove the i
df.index.set_levels([x[1] for x in df.index.levels[1]], level=1, inplace=True)
return df

#TODO: Probably fitcontext should set all of the variables required to compute
#this. But better setting
#them explicitly than setting some, se we require the user to do that.
fits_computed_psedorreplicas_chi2 = collect(computed_psedorreplicas_chi2, ('fits',))

dataspecs_computed_pseudorreplicas_chi2 = collect(computed_psedorreplicas_chi2, ('dataspecs',))
# TODO: Probably fitcontext should set all of the variables required to compute
# this. But better setting
# them explicitly than setting some, so we require the user to do that.
fits_computed_pseudoreplicas_chi2 = collect(computed_pseudoreplicas_chi2, ("fits",))

dataspecs_computed_pseudorreplicas_chi2 = collect(computed_pseudoreplicas_chi2, ("dataspecs",))


@table
def export_fits_computed_psedorreplicas_chi2(fits_computed_psedorreplicas_chi2):
def export_fits_computed_pseudoreplicas_chi2(fits_computed_pseudoreplicas_chi2):
"""Hack to force writting the CSV output"""
return fits_computed_psedorreplicas_chi2
return fits_computed_pseudoreplicas_chi2
4 changes: 4 additions & 0 deletions validphys2/src/validphys/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ def parse_use_cuts(self, use_cuts: (bool, str)):

return res

def produce_replicas(self, nreplica: int):
Comment thread
siranipour marked this conversation as resolved.
"""Produce a replicas array"""
return NSList(range(1, nreplica+1), nskey="replica")

def produce_inclusive_use_scalevar_uncertainties(self, use_scalevar_uncertainties: bool = False,
point_prescription: (str, None) = None):
"""Whether to use a scale variation uncertainty theory covmat.
Expand Down
9 changes: 8 additions & 1 deletion validphys2/src/validphys/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import numpy as np

from NNPDF import CommonData, RandomGenerator
from NNPDF import CommonData
from reportengine.checks import make_argcheck, check, check_positive, make_check
from reportengine.compat import yaml
import validphys.cuts
Expand Down Expand Up @@ -83,6 +83,13 @@ def prepare_nnpdf_rng(filterseed:int, rngalgo:int, seed:int):
be an integer between 0 and 16, seeded with ``filterseed``.
The RNG can then be subsequently used to i.e generate pseudodata.
"""
try:
Comment thread
siranipour marked this conversation as resolved.
from NNPDF import RandomGenerator
except ImportError as e:
logging.error("Generating closure data needs a valid installation of libNNPDF")
raise e

log.warning("Importing libNNPDF")
log.info("Initialising RNG")
RandomGenerator.InitRNG(rngalgo, seed)
RandomGenerator.GetRNG().SetSeed(filterseed)
Expand Down
Loading