From 8584111bebdd091d4679060da713cb24d6cc137f Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Tue, 5 Oct 2021 16:02:10 +0200
Subject: [PATCH 01/12] removing unused n3fit code

---
 validphys2/src/validphys/n3fit_data.py       |  24 +-
 validphys2/src/validphys/n3fit_data_utils.py | 261 +------------------
 2 files changed, 11 insertions(+), 274 deletions(-)

diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index ed9206c044..43d822e2de 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -14,7 +14,6 @@
 import numpy as np
 import pandas as pd
 
-from NNPDF import RandomGenerator
 from reportengine import collect
 from reportengine.table import table
 
@@ -186,23 +185,6 @@ def _mask_fk_tables(dataset_dicts, tr_masks):
     return np.concatenate(trmask_partial)
 
 
-def generate_data_replica(data, replica_mcseed):
-    """Generate a pseudodata replica for ``data`` given the ``replica_seed``"""
-    spec_c = data.load()
-    base_mcseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10 ** 8
-    # copy C++ object to avoid mutation
-    # t0 not required for replica generation, since libnnpdf uses experimental
-    # covmat to generate replicas.
-    spec_replica_c = type(spec_c)(spec_c)
-
-    # Replica generation
-    if replica_mcseed is not None:
-        mcseed = base_mcseed + replica_mcseed
-        RandomGenerator.InitRNG(0, mcseed)
-        spec_replica_c.MakeReplica()
-    return spec_replica_c.get_cv()
-
-
 def fitting_data_dict(
     data,
     make_replica,
@@ -350,10 +332,6 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn
     return (replica, exps_fitting_data_dict, replica_nnseed)
 
 replicas_nnseed_fitting_data_dict = collect("replica_nnseed_fitting_data_dict", ("replicas",))
-
-exps_pseudodata = collect("generate_data_replica", ("group_dataset_inputs_by_experiment",))
-replicas_exps_pseudodata = collect("exps_pseudodata", ("replicas",))
-
 replicas_indexed_make_replica = collect('indexed_make_replica', ('replicas',))
 
 
@@ -500,7 +478,7 @@ def training_mask(replicas_training_mask):
     ... ]
     >>> API.training_mask(dataset_inputs=ds_inp, replicas=reps, trvlseed=123, theoryid=162, use_cuts="nocuts", mcseed=None, genrep=False)
                         replica 1  replica 2  replica 3
-    group dataset    id                                 
+    group dataset    id
     NMC   NMC        0        True      False      False
                     1        True       True       True
                     2       False       True       True
diff --git a/validphys2/src/validphys/n3fit_data_utils.py b/validphys2/src/validphys/n3fit_data_utils.py
index 547b9ab2f0..80fe469756 100644
--- a/validphys2/src/validphys/n3fit_data_utils.py
+++ b/validphys2/src/validphys/n3fit_data_utils.py
@@ -1,77 +1,10 @@
 """
 n3fit_data_utils.py
 
-Library of function for reading libnnpdf objects.
-
+Library of helper functions to n3fit_data.py for reading libnnpdf objects.
 """
-import hashlib
-from copy import deepcopy
-from collections import defaultdict
 import numpy as np
 
-from NNPDF import RandomGenerator
-from validphys.core import DataGroupSpec as vp_Exp
-from validphys.core import DataSetSpec as vp_Dataset
-
-
-def make_tr_val_mask(datasets, exp_name, seed):
-    """
-    Masks the fktables for a given experiment
-
-    Parameters
-    ----------
-        datasets: list[validphys.core.DataSetSpec]
-            list of datasets specs for a given experiment
-        exp_name: str
-            name of the experiment, it is used for the generation of the random number
-        seed: int
-            seed for the random tr/vl split
-
-    Return
-    ------
-        trmask: np.array
-            boolean array with the mask corresponding to the training set
-        vlmask: np.array
-            boolean array with the mask corresponding to the validation set
-
-    Note: the returned masks are only used in order to mask the covmat
-    """
-    # Set the seed for the experiment
-    nameseed = int(hashlib.sha256(exp_name.encode()).hexdigest(), 16) % 10 ** 8
-    nameseed += seed
-    np.random.seed(nameseed)
-
-    trmask_partial = []
-    vlmask_partial = []
-    for dataset_dict in datasets:
-        ndata = dataset_dict["ndata"]
-        frac = dataset_dict["frac"]
-        trmax = int(frac * ndata)
-        mask = np.concatenate(
-            [np.ones(trmax, dtype=np.bool), np.zeros(ndata - trmax, dtype=np.bool)]
-        )
-        np.random.shuffle(mask)
-        vl_mask = mask == False
-        # Generate the training and validation fktables
-        tr_fks = []
-        vl_fks = []
-        ex_fks = []
-        for fktable_dict in dataset_dict["fktables"]:
-            tr_fks.append(fktable_dict["fktable"][mask])
-            vl_fks.append(fktable_dict["fktable"][vl_mask])
-            ex_fks.append(fktable_dict.get("fktable"))
-        dataset_dict["tr_fktables"] = tr_fks
-        dataset_dict["vl_fktables"] = vl_fks
-        dataset_dict["ex_fktables"] = ex_fks
-
-        trmask_partial.append(mask)
-        vlmask_partial.append(vl_mask)
-
-    trmask = np.concatenate(trmask_partial)
-    vlmask = np.concatenate(vlmask_partial)
-
-    return trmask, vlmask
-
 
 def fk_parser(fk, is_hadronic=False):
     """
@@ -181,196 +114,22 @@ def common_data_reader_dataset(dataset_c, dataset_spec):
 
 def common_data_reader_experiment(experiment_c, experiment_spec):
     """
-        Wrapper around the experiments. Loop over all datasets in an experiment,
-        calls common_data_reader on them and return a list with the content.
+    Wrapper around the experiments. Loop over all datasets in an experiment,
+    calls common_data_reader on them and return a list with the content.
 
-        # Arguments:
-            - `experiment_c`: c representation of the experiment object
-            - `experiment_spec`: python representation of the experiment object
+    # Arguments:
+        - `experiment_c`: c representation of the experiment object
+        - `experiment_spec`: python representation of the experiment object
 
-        # Returns:
-            - `[parsed_datasets]`: a list of dictionaries output from `common_data_reader_dataset`
+    # Returns:
+        - `[parsed_datasets]`: a list of dictionaries output from `common_data_reader_dataset`
     """
     parsed_datasets = []
-    for dataset_c, dataset_spec in zip(
-        experiment_c.DataSets(), experiment_spec.datasets
-    ):
+    for dataset_c, dataset_spec in zip(experiment_c.DataSets(), experiment_spec.datasets):
         parsed_datasets += common_data_reader_dataset(dataset_c, dataset_spec)
     return parsed_datasets
 
 
-def common_data_reader(
-    spec,
-    t0pdfset,
-    replica_seeds=None,
-    trval_seeds=None,
-    kpartitions=None,
-    rotate_diagonal=False
-):
-    """
-    Wrapper to read the information from validphys object
-    This function receives either a validphyis experiment or dataset objects
-
-    # Returns:
-        - `all_dict_out`: a dictionary containing all the information of the experiment/dataset
-        for training, validation and experimental
-                'datasets' : list of the datasets contained in the experiment
-                'name' : name of the experiment
-                'expdata_true' : non-replica data
-                'invcovmat_true' : inverse of the covmat (non-replica)
-
-                'trmask' : mask for the training data
-                'invcovmat' : inverse of the covmat for the training data
-                'ndata' : number of datapoints for the training data
-                'expdata' : experimental data (replica'd) for training
-
-                'vlmask' : (same as above for validation)
-                'invcovmat_vl' : (same as above for validation)
-                'ndata_vl' : (same as above for validation)
-                'expdata_vl' :  (same as above for validation)
-
-                'positivity' : bool - is this a positivity set?
-                'count_chi2' : should this be counted towards the chi2
-    """
-    if replica_seeds is None:
-        replica_seeds = []
-    if trval_seeds is None:
-        trval_seeds = [0]
-    if kpartitions is None:
-        kpartitions = []
-    # TODO
-    # This whole thing would be much more clear / streamlined if
-    #   - The c experiment/dataset object had all the required information for the fit
-    #       (i.e., there is a swig conversion for everything, right now missing the operator)
-    #   - The python object stored the load within the spec when doing spec.load()
-    #                                   this way it would not be necessary to load twice
-    #   - The python object had all necessary information (same as point 1 but inverted)
-
-    spec_c = spec.load()
-    ndata = spec_c.GetNData()
-    expdata_true = spec_c.get_cv().reshape(1, ndata)
-    if t0pdfset is not None:
-        spec_c.SetT0(t0pdfset)
-    base_mcseed = int(hashlib.sha256(str(spec).encode()).hexdigest(), 16) % 10 ** 8
-
-    if replica_seeds:
-        all_expdatas = []
-    else:
-        all_expdatas = [expdata_true.reshape(ndata)]
-
-    for replica_seed in replica_seeds:
-        spec_replica_c = type(spec_c)(spec_c) # I might need the t0 set here as well
-
-        # Replica generation
-        mcseed = base_mcseed + replica_seed
-        RandomGenerator.InitRNG(0, mcseed)
-        spec_replica_c.MakeReplica()
-        all_expdatas.append(spec_replica_c.get_cv())
-
-    if isinstance(spec, vp_Exp):
-        datasets = common_data_reader_experiment(spec_c, spec)
-    elif isinstance(spec, vp_Dataset):
-        datasets = common_data_reader_dataset(spec_c, spec)
-    else:
-        raise ValueError(
-            "reader.py: common_data_reader, didn't understood spec type: {0}".format(
-                type(spec)
-            )
-        )
-
-    # Collect the masks (if any) due to kfolding for this experiment
-    # These will be applied to the experimental data before starting
-    # the training of each fold
-    list_folds = []
-    for partition in kpartitions:
-        data_fold = partition.get("datasets", [])
-        mask = []
-        for dataset in datasets:
-            # If the dataset is in the fold, its mask is full of 0s
-            if dataset['name'] in data_fold:
-                mask.append(np.zeros(dataset['ndata'], dtype=np.bool))
-            # otherwise of ones
-            else:
-                mask.append(np.ones(dataset['ndata'], dtype=np.bool))
-        list_folds.append(np.concatenate(mask))
-
-    exp_name = spec.name
-    covmat = spec_c.get_covmat()
-    inv_true = np.linalg.inv(covmat)
-
-    if rotate_diagonal:
-        eig, v = np.linalg.eigh(covmat)
-        dt_trans = v.T
-    else:
-        dt_trans = None
-        dt_trans_tr = None
-        dt_trans_vl = None
-
-    # Now it is time to build the masks for the training validation split
-    all_dict_out = []
-    for expdata, trval_seed in zip(all_expdatas, trval_seeds):
-        # Each replica has its own dataset
-        datasets_copy = deepcopy(datasets)
-        tr_mask, vl_mask = make_tr_val_mask(datasets_copy, exp_name, seed=trval_seed)
-
-        if rotate_diagonal:
-            expdata = np.matmul(dt_trans, expdata)
-            # make a 1d array of the diagonal
-            covmat_tr = eig[tr_mask]
-            invcovmat_tr = 1./covmat_tr
-
-            covmat_vl = eig[vl_mask]
-            invcovmat_vl = 1./covmat_vl
-
-            # prepare a masking rotation
-            dt_trans_tr = dt_trans[tr_mask]
-            dt_trans_vl = dt_trans[vl_mask]
-        else:
-            covmat_tr = covmat[tr_mask].T[tr_mask]
-            invcovmat_tr = np.linalg.inv(covmat_tr)
-
-            covmat_vl = covmat[vl_mask].T[vl_mask]
-            invcovmat_vl = np.linalg.inv(covmat_vl)
-
-        ndata_tr = np.count_nonzero(tr_mask)
-        expdata_tr = expdata[tr_mask].reshape(1, ndata_tr)
-
-        ndata_vl = np.count_nonzero(vl_mask)
-        expdata_vl = expdata[vl_mask].reshape(1, ndata_vl)
-
-        # Now save a dictionary of training/validation/experimental folds
-        # for training and validation we need to apply the tr/vl masks
-        # for experimental we need to negate the mask
-        folds = defaultdict(list)
-        for fold in list_folds:
-            folds["training"].append(fold[tr_mask])
-            folds["validation"].append(fold[vl_mask])
-            folds["experimental"].append(~fold)
-
-        dict_out = {
-            "datasets": datasets_copy,
-            "name": exp_name,
-            "expdata_true": expdata_true,
-            "invcovmat_true": inv_true,
-            "trmask": tr_mask,
-            "invcovmat": invcovmat_tr,
-            "ndata": ndata_tr,
-            "expdata": expdata_tr,
-            "vlmask": vl_mask,
-            "invcovmat_vl": invcovmat_vl,
-            "ndata_vl": ndata_vl,
-            "expdata_vl": expdata_vl,
-            "positivity": False,
-            "count_chi2": True,
-            "folds" : folds,
-            "data_transformation_tr": dt_trans_tr,
-            "data_transformation_vl": dt_trans_vl,
-        }
-        all_dict_out.append(dict_out)
-
-    return all_dict_out
-
-
 def positivity_reader(pos_spec):
     """
     Specific reader for positivity sets
@@ -388,7 +147,7 @@ def positivity_reader(pos_spec):
             "name": pos_spec.name,
             "frac": 1.0,
             "ndata": ndata,
-            "tr_fktables": [i["fktable"] for i in parsed_set]
+            "tr_fktables": [i["fktable"] for i in parsed_set],
         }
     ]
 

From f5ab2cd785bb840b0206fd100b3c7369b996b4d4 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Tue, 5 Oct 2021 20:42:14 +0200
Subject: [PATCH 02/12] remove computed_psedorreplicas_chi2

---
 validphys2/src/validphys/results.py | 59 -----------------------------
 1 file changed, 59 deletions(-)

diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
index c5de368ae7..4f07646f7b 100644
--- a/validphys2/src/validphys/results.py
+++ b/validphys2/src/validphys/results.py
@@ -441,65 +441,6 @@ def groups_corrmat(groups_covmat):
 def procs_corrmat(procs_covmat):
     return groups_corrmat(procs_covmat)
 
-@table
-def closure_pseudodata_replicas(
-    experiments, pdf, nclosure: int, experiments_index, nnoisy: int = 0
-):
-    """Generate closure pseudodata replicas from the given pdf.
-
-    nclosure: Number of Level 1 pseudodata replicas.
-
-    nnoisy:   Number of Level 2 replicas generated out of each pseudodata replica.
-
-    The columns of the table are of the form (clos_0, noise_0_n0 ..., clos_1, ...)
-    """
-
-    # TODO: Do this somewhere else
-    from NNPDF import RandomGenerator
-
-    RandomGenerator.InitRNG(0, 0)
-    data = np.zeros((len(experiments_index), nclosure * (1 + nnoisy)))
-
-    cols = []
-    for i in range(nclosure):
-        cols += ["clos_%04d" % i, *["noise_%04d_%04d" % (i, j) for j in range(nnoisy)]]
-
-    loaded_pdf = pdf.load()
-
-    for exp in experiments:
-        # Since we are going to modify the experiments, we copy them
-        # (and work on the copies) to avoid all
-        # sorts of weirdness with other providers. We don't want this to interact
-        # with DataGroupSpec at all, because it could do funny things with the
-        # cache when calling load(). We need to copy this yet again, for each
-        # of the noisy replicas.
-        closure_exp = Experiment(exp.load())
-
-        # TODO: This is probably computed somewhere else... All this code is
-        # very error prone.
-        # The predictions are for the unmodified experiment.
-        predictions = [ThPredictions(loaded_pdf, d.load()) for d in exp]
-
-        exp_location = experiments_index.get_loc(closure_exp.GetExpName())
-
-        index = itertools.count()
-        for i in range(nclosure):
-            # Generate predictions with experimental noise, a different for
-            # each closure set.
-            closure_exp.MakeClosure(predictions, True)
-            data[exp_location, next(index)] = closure_exp.get_cv()
-            for j in range(nnoisy):
-                # If we don't copy, we generate noise on top of the noise,
-                # which is not what we want.
-                replica_exp = Experiment(closure_exp)
-                replica_exp.MakeReplica()
-
-                data[exp_location, next(index)] = replica_exp.get_cv()
-
-    df = pd.DataFrame(data, index=experiments_index, columns=cols)
-
-    return df
-
 
 def results(dataset: (DataSetSpec), pdf: PDF, covariance_matrix, sqrt_covmat):
     """Tuple of data and theory results for a single pdf. The data will have an associated

From 4af0923102e129bc553dfedad70d9a45cd6a62b7 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Wed, 6 Oct 2021 14:03:58 +0200
Subject: [PATCH 03/12] free computed_pseudorreplicas_chi2 from NNPDF

---
 validphys2/src/validphys/chi2grids.py | 150 ++++++++++++--------------
 validphys2/src/validphys/results.py   |   1 +
 2 files changed, 69 insertions(+), 82 deletions(-)

diff --git a/validphys2/src/validphys/chi2grids.py b/validphys2/src/validphys/chi2grids.py
index 665c53c45f..2768d34793 100644
--- a/validphys2/src/validphys/chi2grids.py
+++ b/validphys2/src/validphys/chi2grids.py
@@ -4,109 +4,95 @@
 Compute and store χ² data from replicas, possibly keeping the correlations
 between pseudorreplica fluctuations between different fits. This is applied
 here to parameter determinations such as those of αs.
-
-This module is severly thwarted by the poor adecuacy of libnnpdf for this use
-case. Several pieces of functionality need to be implemented there.
 """
 import logging
 from collections import namedtuple
 
+import numpy as np
 import pandas as pd
 
 from reportengine import collect
 from reportengine.table import table
-from NNPDF import pseudodata, single_replica, RandomGenerator
 
 from validphys.core import PDF
-from validphys.results import ThPredictionsResult, DataResult, chi2_breakdown_by_dataset
+from validphys.calcutils import calc_chi2
 
-PseudoReplicaExpChi2Data = namedtuple('PseudoReplicaChi2Data',
-    ['experiment', 'dataset', 'ndata', 'chi2', 'nnfit_index'])
+PseudoReplicaExpChi2Data = namedtuple(
+    "PseudoReplicaChi2Data", ["group", "ndata", "chi2", "nnfit_index"]
+)
 
 
 log = logging.getLogger(__name__)
 
-def computed_psedorreplicas_chi2(
-        experiments, dataseed, pdf, fitted_replica_indexes,
-        t0set:(PDF, type(None))):
-    """Return a dataframe with the chi² of each replica wirh its corrsponding
-    pseudodata (i.e. the one it was fitted with). The chi² is computed for
-    both each experiment and each dataset in the experiment. The index of the
-    dataframe is
-
-    ['experiment', 'dataset', 'ndata' , 'nnfit_index']
 
-    where 'experiment' is the name of the experiment, 'dataset' is the name of
-    the dataset, or "Total" for the total value, 'ndata' is the corresponding
-    number of points and 'nnfit_index' is the index specifying the
-    pseudorreplica fluctuation.
+def computed_pseudoreplicas_chi2(
+    # TODO: these three are just so I can call make_replica?
+    mcseed,
+    dataset_inputs_loaded_cd_with_cuts,
+    fitted_replica_indexes,
+    group_result_table_no_table,  # to get the results already in the form of a dataframe
+    groups_sqrtcovmat,
+    t0set: (PDF, type(None)),
+):
+    """Return a dataframe with the chi² of each replica with its corresponding
+    pseudodata (i.e. the one it was fitted with). The chi² is computed by group.
+    The index of the output dataframe is
+        ``['group',  'ndata' , 'nnfit_index']``
+    where ``nnftix_index`` is the name of the corresponding replica
     """
-
-    #TODO: Everythning about this function is horrible. We need to rewrite
-    #experiments.cc from scratch.
-
-    #TODO: Do this somewhere else
-    RandomGenerator.InitRNG(0,0)
-    if t0set is not None:
-        lt0 = t0set.load_t0()
-    pdfname = pdf.name
-    datas = []
-
-    #No need to save these in the cache, so we call __wrapped__
-    original_experiments = [e.load.__wrapped__(e) for e in experiments]
-    sqrtcovmat_table = []
-    log.debug("Generating dataset covmats")
-    for exp in original_experiments:
-        if t0set is not None:
-            exp.SetT0(lt0)
-        #The covariance matrices are currently very expensive to recompute.
-        #Store them after computing T0
-        sqrtcovmat_table.append([ds.get_sqrtcovmat() for ds in exp.DataSets()])
-
-    for lhapdf_index, nnfit_index in enumerate(fitted_replica_indexes, 1):
-
-        flutuated_experiments = pseudodata(original_experiments, dataseed, nnfit_index)
-        lpdf = single_replica(pdfname, lhapdf_index)
-        for expspec, exp, mats in zip(experiments, flutuated_experiments, sqrtcovmat_table):
-            #We need to manage the memory
-            exp.thisown = True
-
-            th = ThPredictionsResult.from_convolution(pdf, expspec,
-                loaded_data=exp, loaded_pdf=lpdf)
-
-
-            results = DataResult(exp, exp.get_covmat(), exp.get_sqrtcovmat()), th
-            #The experiment already has T0. No need to set it again.
-            #TODO: This is a hack. Get rid of this.
-            chi2 = chi2_breakdown_by_dataset(results, exp, t0set=None,
-                                             datasets_sqrtcovmat=mats)
-
-            for i, (dsname,(value, ndata)) in enumerate(chi2.items()):
-                data = PseudoReplicaExpChi2Data(
-                    nnfit_index=nnfit_index,
-                    experiment=expspec.name,
-                    #We set the i so that the sort order is maintaned here.
-                    dataset = (i, dsname),
-                    ndata = ndata,
-                    chi2=value
-                    )
-                datas.append(data)
-
-    df =  pd.DataFrame(datas, columns=PseudoReplicaExpChi2Data._fields)
-    df.set_index(['experiment', 'dataset', 'ndata' , 'nnfit_index'], inplace=True)
+    #######
+    # Get the replica pseudodata
+    # TODO: it looks like I should be able to have directly make_replica in the arguments
+    # but don't really see how
+    from validphys.n3fit_data import replica_mcseed
+    from validphys.pseudodata import make_replica
+
+    all_data_replicas = []
+    for replica in fitted_replica_indexes:
+        value_of_mcseed = replica_mcseed(replica, mcseed, True)
+        all_data_replicas.append(
+            make_replica(dataset_inputs_loaded_cd_with_cuts, value_of_mcseed)
+        )
+    r_data = np.array(all_data_replicas).T
+    ########################
+
+    # Drop data central and theory central which is not useful here
+    r_prediction = group_result_table_no_table.drop(
+        columns=["data_central", "theory_central"]
+    )
+
+    # Now compute the chi2 in a per-group basis
+    diff = r_prediction - r_data
+    group_level = r_prediction.index.get_level_values("group")
+    groups = group_level.drop_duplicates().to_list()
+
+    # Save the results in a dataframe similar (but not equal) to the old one
+    df_output = []
+    for group in groups:
+        group_diff = diff.loc[group_level == group]
+        its_covmat = groups_sqrtcovmat[group_level == group][group]
+        chi2_per_replica = calc_chi2(its_covmat, group_diff)
+        ndata = len(group_diff)
+        for i, chi2 in enumerate(chi2_per_replica):
+            df_output.append(PseudoReplicaExpChi2Data(group, ndata, chi2, i))
+
+    df = pd.DataFrame(df_output, columns=PseudoReplicaExpChi2Data._fields)
+    df.set_index(["group", "ndata", "nnfit_index"], inplace=True)
     df.sort_index(inplace=True)
-    #Now that we have the order we like, we remove the i
-    df.index.set_levels([x[1] for x in df.index.levels[1]], level=1, inplace=True)
     return df
 
-#TODO: Probably fitcontext should set all of the variables required to compute
-#this. But better setting
-#them explicitly than setting some, se we require the user to do that.
-fits_computed_psedorreplicas_chi2 = collect(computed_psedorreplicas_chi2, ('fits',))
 
-dataspecs_computed_pseudorreplicas_chi2 = collect(computed_psedorreplicas_chi2, ('dataspecs',))
+# TODO: Probably fitcontext should set all of the variables required to compute
+# this. But better setting
+# them explicitly than setting some, so we require the user to do that.
+fits_computed_pseudoreplicas_chi2 = collect(computed_pseudoreplicas_chi2, ("fits",))
+
+dataspecs_computed_pseudorreplicas_chi2 = collect(
+    computed_pseudoreplicas_chi2, ("dataspecs",)
+)
+
 
 @table
-def export_fits_computed_psedorreplicas_chi2(fits_computed_psedorreplicas_chi2):
+def export_fits_computed_pseudoreplicas_chi2(fits_computed_pseudoreplicas_chi2):
     """Hack to force writting the CSV output"""
-    return fits_computed_psedorreplicas_chi2
+    return fits_computed_pseudoreplicas_chi2
diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
index 4f07646f7b..1fee8124af 100644
--- a/validphys2/src/validphys/results.py
+++ b/validphys2/src/validphys/results.py
@@ -628,6 +628,7 @@ def chi2_breakdown_by_dataset(
     """Return a dict with the central chi² of each dataset in the experiment,
     by breaking down the experiment results. If ``prepend_total`` is True.
     """
+    raise Exception
     dt, th = experiment_results
     sqrtcovmat = dt.sqrtcovmat
     central_diff = th.central_value - dt.central_value

From 7480d7f74dca88a342464f8e4d8f6f64d091d2f4 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Wed, 6 Oct 2021 14:05:21 +0200
Subject: [PATCH 04/12] remove deprecated function in results.py

---
 validphys2/src/validphys/results.py | 43 -----------------------------
 1 file changed, 43 deletions(-)

diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
index 1fee8124af..222fda88f0 100644
--- a/validphys2/src/validphys/results.py
+++ b/validphys2/src/validphys/results.py
@@ -617,49 +617,6 @@ def dataset_inputs_bootstrap_chi2_central(
     return chi2_central_resample
 
 
-# TODO: deprecate this function?
-def chi2_breakdown_by_dataset(
-    experiment_results,
-    experiment,
-    t0set,
-    prepend_total: bool = True,
-    datasets_sqrtcovmat=None,
-) -> dict:
-    """Return a dict with the central chi² of each dataset in the experiment,
-    by breaking down the experiment results. If ``prepend_total`` is True.
-    """
-    raise Exception
-    dt, th = experiment_results
-    sqrtcovmat = dt.sqrtcovmat
-    central_diff = th.central_value - dt.central_value
-    d = {}
-    if prepend_total:
-        d["Total"] = (calc_chi2(sqrtcovmat, central_diff), len(sqrtcovmat))
-
-    # Allow lower level access useful for pseudodata and such.
-    # TODO: This is a hack and we should get rid of it.
-    if isinstance(experiment, Experiment):
-        loaded_exp = experiment
-    else:
-        loaded_exp = experiment.load()
-
-    # TODO: This is horrible. find a better way to do it.
-    if t0set:
-        loaded_exp = type(loaded_exp)(loaded_exp)
-        loaded_exp.SetT0(t0set.load_T0())
-
-    indmin = indmax = 0
-
-    if datasets_sqrtcovmat is None:
-        datasets_sqrtcovmat = (ds.get_sqrtcovmat() for ds in loaded_exp.DataSets())
-
-    for ds, mat in zip(loaded_exp.DataSets(), datasets_sqrtcovmat):
-        indmax += len(ds)
-        d[ds.GetSetName()] = (calc_chi2(mat, central_diff[indmin:indmax]), len(mat))
-        indmin = indmax
-    return d
-
-
 def _chs_per_replica(chs):
     th, _, l = chs
     return th.data.ravel() / l

From 51e1c0c6f5e755ba548fc142333219e6ac7257d3 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Wed, 6 Oct 2021 17:43:24 +0200
Subject: [PATCH 05/12] liberate mc_gen.py from libNNPDF

---
 validphys2/examples/mc_gen_example.yaml       |  14 +-
 validphys2/src/validphys/config.py            |   6 +
 validphys2/src/validphys/mc_gen.py            | 224 +++-----
 validphys2/src/validphys/pseudodata.py        |   2 +
 .../regressions/test_art_rep_generation.csv   | 532 +++++++++---------
 .../src/validphys/tests/test_regressions.py   |   3 +-
 6 files changed, 363 insertions(+), 418 deletions(-)

diff --git a/validphys2/examples/mc_gen_example.yaml b/validphys2/examples/mc_gen_example.yaml
index ff38aa993f..e094ee770c 100644
--- a/validphys2/examples/mc_gen_example.yaml
+++ b/validphys2/examples/mc_gen_example.yaml
@@ -6,17 +6,17 @@ meta:
     author: Rosalyn Pearson
     keywords: [pseudodata, test, artificial replica]
 
-fit: NNPDF31_nlo_as_0118
+fit: 210629-n3fit-001
 
-theoryid: 52
+theoryid: 200
+mcseed: 4
+genrep: True
 
 template: mc_gen_report.md
 
 actions_:
   - report(main=true)
 
-experiments:
-  - experiment: BIGEXP
-    datasets:
-      - { dataset: CHORUSNB, frac: 0.5}
-
+dataset_inputs:
+  - { dataset: NMC, frac: 0.5}
+  - { dataset: D0WMASY, frac: 0.5}
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 897d072eb2..967a6803e4 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -16,6 +16,8 @@
 from collections import ChainMap, defaultdict
 from collections.abc import Mapping, Sequence
 
+import numpy as np
+
 from reportengine import configparser
 from reportengine.environment import Environment, EnvironmentError_
 from reportengine.configparser import (
@@ -219,6 +221,10 @@ def parse_use_cuts(self, use_cuts: (bool, str)):
 
         return res
 
+    def produce_replicas(self, nreplica: int):
+        """Produce a replicas array"""
+        return NSList(np.arange(1, nreplica+1), nskey="replica")
+
     def produce_inclusive_use_scalevar_uncertainties(self, use_scalevar_uncertainties: bool = False,
                                         point_prescription: (str, None) = None):
         """Whether to use a scale variation uncertainty theory covmat.
diff --git a/validphys2/src/validphys/mc_gen.py b/validphys2/src/validphys/mc_gen.py
index 397aff3ccb..ff68d59c88 100644
--- a/validphys2/src/validphys/mc_gen.py
+++ b/validphys2/src/validphys/mc_gen.py
@@ -4,8 +4,9 @@
 
 Tools to check the pseudo-data MC generation.
 """
-from __future__ import generator_stop
-
+# The functions in this module have been ported to not use libNNPDF
+# it has been a direct port of the libnnpdf dependent structure 
+# so they should not be used as an example
 import logging
 import matplotlib.patches as mpatches
 import matplotlib.pyplot as plt
@@ -13,183 +14,136 @@
 import pandas as pd
 from scipy.stats import moment as mom
 
-from NNPDF import Experiment, RandomGenerator
 from reportengine.table import table
 from reportengine.figure import figure
-log = logging.getLogger(__name__)
-
 
+log = logging.getLogger(__name__)
 
 
-def art_rep_generation(groups_data, nreplica:int):
+def art_rep_generation(groups_data, make_replicas):
     """Generates the nreplica pseudodata replicas"""
-
-    RandomGenerator.InitRNG(0,0)
-
     real_data_list = []
-    art_replicas_list = []
-    normart_replicas_list = []
-    art_data_list = []
 
     for group in groups_data:
         real_group = group.load()
-
-        art_replicas = []
-        normart_replicas = []
         real_data = real_group.get_cv()
-
-        # producing replicas
-        for _ in range(nreplica):
-            replica_group = Experiment(real_group)
-            replica_group.MakeReplica()
-            artrep = replica_group.get_cv()
-            normartrep = artrep/real_data
-            art_replicas.append(artrep)
-            normart_replicas.append(normartrep)
-
-        art_data = np.mean(art_replicas, axis=0)
-        art_data_list.append(art_data)
         real_data_list.append(real_data)
-        art_replicas_list.append(art_replicas)
-        normart_replicas_list.append(normart_replicas)
 
-    art_replicas = np.concatenate(art_replicas_list, axis=1)
-    normart_replicas = np.concatenate(normart_replicas_list, axis=1)
-    art_data = np.concatenate(art_data_list)
     real_data = np.concatenate(real_data_list)
 
-    return real_data, art_replicas, normart_replicas, art_data
-
+    art_replicas = np.stack(make_replicas)
+    art_data = np.mean(art_replicas, axis=0)
+    normart_replicas = art_replicas / real_data
 
-def per_point_art_rep_generation(groups_data, nreplica:int):
-    """Generates the nreplica pseudodata replicas for a given group"""
-
-    RandomGenerator.InitRNG(0,0)
-
-    for group in groups_data:
-        real_group = group.load()
-
-        art_replicas = []
-        normart_replicas = []
-        real_data = real_group.get_cv()
-
-        # producing replicas
-        for i in range(nreplica):
-            replica_group = Experiment(real_group)
-            for point in range(len(replica_group.get_cv())):
-                replica_group.MakePerPointReplica(point)
-            artrep = replica_group.get_cv()
-            normartrep = artrep/real_data
-            art_replicas.append(artrep)
-            normart_replicas.append(normartrep)
-
-        art_data = np.mean(art_replicas, axis=0)
+    return real_data, art_replicas, normart_replicas, art_data
 
-        return real_data, art_replicas, normart_replicas, art_data
 
 @figure
 def art_data_residuals(art_rep_generation, color="green"):
-
-    #pass
     """
     Plot the residuals distribution of pseudodata compared to experiment.
     """
     real_data, _, _, art_data = art_rep_generation
 
-    residuals=real_data-art_data
-    normresiduals = residuals/real_data
+    residuals = real_data - art_data
+    normresiduals = residuals / real_data
     fig, ax = plt.subplots()
 
-    ax.hist(normresiduals,bins=50,histtype='step', stacked=True, fill=False, color=color)
+    ax.hist(
+        normresiduals, bins=50, histtype="step", stacked=True, fill=False, color=color
+    )
 
-    ax.set_ylabel(r'Data points')
-    ax.set_xlabel(r'$(D^0-<D^{(r)}>)/D^0$')
-    ax.set_title(r'Residuals distribution')
+    ax.set_ylabel(r"Data points")
+    ax.set_xlabel(r"$(D^0-<D^{(r)}>)/D^0$")
+    ax.set_title(r"Residuals distribution")
 
     return fig
 
-@figure
-def per_point_art_data_residuals(per_point_art_rep_generation):
-    return art_data_residuals(per_point_art_rep_generation, color="orange")
-
 
 @figure
-def art_data_distribution(art_rep_generation, title='Artificial Data Distribution', color="green"):
+def art_data_distribution(
+    art_rep_generation, title="Artificial Data Distribution", color="green"
+):
     """
     Plot of the distribution of pseudodata.
     """
     real_data, _, _, art_data = art_rep_generation
 
-    normart_data = art_data/real_data
+    normart_data = art_data / real_data
     fig, ax = plt.subplots()
 
-    ax.hist(normart_data, bins=50, histtype='step', stacked=True, fill=False, color=color)
+    ax.hist(
+        normart_data, bins=50, histtype="step", stacked=True, fill=False, color=color
+    )
 
-    ax.set_ylabel(r'Data points')
-    ax.set_xlabel(r'$<D^{(r)}>/D^0$')
+    ax.set_ylabel(r"Data points")
+    ax.set_xlabel(r"$<D^{(r)}>/D^0$")
     ax.set_title(title)
 
     return fig
 
-@figure
-def per_point_art_data_distribution(art_data_distribution,per_point_art_rep_generation, nreplica:int):
-    return art_data_distribution(per_point_art_rep_generation, nreplica, title='Uncorrelated Artificial Data Distribution', color="orange")
-
 
 @figure
 def art_data_moments(art_rep_generation, color="green"):
     """
     Returns the moments of the distributions per data point, as a histogram.
     """
-    real_data, _, normart_replicas, art_data = art_rep_generation
+    _, _, normart_replicas, _ = art_rep_generation
 
     artrep_array = np.asarray(normart_replicas)
-    normart_data = art_data/real_data
 
-
-    fig, axes = plt.subplots(nrows=3, figsize=(10,12))
+    fig, axes = plt.subplots(nrows=3, figsize=(10, 12))
     # Plot histogram of moments
-    for momno, ax in zip(range(1,4), axes.flatten()):
+    for momno, ax in zip(range(1, 4), axes.flatten()):
         # Calculate moments
         moms = []
-        for i, datapoint in zip(range(len(artrep_array.T)), artrep_array.T):
+        for _, datapoint in zip(range(len(artrep_array.T)), artrep_array.T):
             moment = mom(datapoint, moment=momno)
             moms.append(moment)
-        ax.hist(moms, bins=50, histtype='step', stacked=True, fill=False, color=color)
+        ax.hist(moms, bins=50, histtype="step", stacked=True, fill=False, color=color)
         ax.set_ylabel("Data points")
         ax.set_xlabel(f"Moment {momno}")
 
     return fig
 
-@figure
-def per_point_art_data_moments(per_point_art_rep_generation, nreplica:int):
-    return art_data_moments(per_point_art_rep_generation, nreplica, color="orange")
 
 @figure
-def art_data_comparison(art_rep_generation, nreplica:int):
-
-    #pass
+def art_data_comparison(art_rep_generation, nreplica: int):
     """
     Plots per datapoint of the distribution of replica values.
     """
     real_data, _, normart_replicas, art_data = art_rep_generation
 
     artrep_array = np.asarray(normart_replicas)
-    normart_data = art_data/real_data
+    normart_data = art_data / real_data
 
-    fig, axes = plt.subplots(nrows=len(artrep_array.T), figsize=(4,2*len(artrep_array.T)))
+    fig, axes = plt.subplots(
+        nrows=len(artrep_array.T), figsize=(4, 2 * len(artrep_array.T))
+    )
 
-    for i, ax, datapoint, normartdatapoint in zip(range(len(artrep_array.T)), axes.flatten(), artrep_array.T, normart_data):
+    for i, ax, datapoint, normartdatapoint in zip(
+        range(len(artrep_array.T)), axes.flatten(), artrep_array.T, normart_data
+    ):
         ax.hist(datapoint, bins=10, histtype="step", stacked=True, fill=False)
         extraString = f"Datapoint number = {i}"
-        handles, labels = ax.get_legend_handles_labels()
+        handles, _ = ax.get_legend_handles_labels()
         handles.append(mpatches.Patch(color="none", label=extraString))
-        ax.set_xlim(-0.5,2.5)
-        ax.set_ylim(0,0.5*nreplica)
+        ax.set_xlim(-0.5, 2.5)
+        ax.set_ylim(0, 0.5 * nreplica)
         ax.vlines(1, ax.get_ylim()[0], ax.get_ylim()[1])
-        ax.vlines(normartdatapoint, ax.get_ylim()[0], ax.get_ylim()[1], linestyle="-", color="darkorchid")
-        ax.vlines(0, ax.get_ylim()[0], ax.get_ylim()[1], linestyle="-", color="dodgerblue")
-        ax.vlines(2, ax.get_ylim()[0], ax.get_ylim()[1], linestyle="-", color="dodgerblue")
+        ax.vlines(
+            normartdatapoint,
+            ax.get_ylim()[0],
+            ax.get_ylim()[1],
+            linestyle="-",
+            color="darkorchid",
+        )
+        ax.vlines(
+            0, ax.get_ylim()[0], ax.get_ylim()[1], linestyle="-", color="dodgerblue"
+        )
+        ax.vlines(
+            2, ax.get_ylim()[0], ax.get_ylim()[1], linestyle="-", color="dodgerblue"
+        )
         ax.legend(handles=handles)
         ax.set_xlabel(r"$D^{(r)}/D^0$")
         ax.set_ylabel("Frequency")
@@ -198,74 +152,56 @@ def art_data_comparison(art_rep_generation, nreplica:int):
 
 
 @figure
-def one_art_data_residuals(nreplica:int, groups_data):
-
-    #pass
+def one_art_data_residuals(groups_data, indexed_make_replicas):
     """
     Residuals plot for the first datapoint.
     """
-    RandomGenerator.InitRNG(0,0)
+    one_data_index = 0
+    all_replicas = pd.concat(indexed_make_replicas, axis=1)
+    group_level = all_replicas.index.get_level_values("group")
+
+    group_level = indexed_make_replicas[0].index.get_level_values("group")
+
+    all_normresidual = []
     for group in groups_data:
 
         real_group = group.load()
         real_data = real_group.get_cv()
-        one_art_data = np.zeros(nreplica)
-        one_data_index=0
+        one_art_data = all_replicas[group_level == group.name].iloc[one_data_index]
 
-        #producing replicas
-        for i in range(nreplica):
-            replica_group = Experiment(real_group)
-            replica_group.MakeReplica()
-            one_art_data[i]=replica_group.get_cv()[one_data_index]
+        residual = one_art_data - real_data[one_data_index]
+        all_normresidual.append(residual / real_data[one_data_index])
 
     fig, ax = plt.subplots()
 
-    residual = one_art_data-real_data[one_data_index]
-    normresidual = residual/real_data[one_data_index]
-    ax.hist(normresidual,bins=50,histtype='step', stacked=True, fill=False)
+    ax.hist(all_normresidual, bins=50, histtype="step", stacked=True, fill=False)
 
-    ax.set_ylabel(r'replicas')
-    ax.set_xlabel(r'$(D^{(r)}_{0} - D^0_{0})/D^0_{0}$')
-    ax.set_title(r'Residual for Data Point 0')
+    ax.set_ylabel(r"replicas")
+    ax.set_xlabel(r"$(D^{(r)}_{0} - D^0_{0})/D^0_{0}$")
+    ax.set_title(r"Residual for Data Point 0")
 
     return fig
 
-@figure
-def plot_deviation_from_mean(art_rep_generation, per_point_art_rep_generation):
-    real_data, art_replicas, normart_replicas, art_data = art_rep_generation
-    ppreal_data, ppart_replicas, ppnormart_replicas, ppart_data = per_point_art_rep_generation
-
-    fig, ax = plt.subplots()
-
-    residuals = (real_data - art_data)/np.std(art_replicas, axis=0)
-    ppresiduals = (real_data - ppart_data)/np.std(ppart_replicas, axis=0)
-
-    ax.plot(ppresiduals, color="orange", label="Uncorrelated")
-    ax.plot(residuals, color="green", label="Correlated")
-    ax.legend()
-    ax.set_xlabel("Datapoint index")
-    ax.set_ylabel(r"$(<D> - D_0)/\sigma$")
-    ax.set_title("Deviation from the mean")
-    ax.hlines(0, ax.get_xlim()[0], ax.get_xlim()[1], linestyle="-", color="black")
-
-    return fig
 
 @table
 def art_data_mean_table(art_rep_generation, groups_data):
-    """Generate table for artdata mean values
-    """
-    real_data, art_replicas, normart_replicas, art_data = art_rep_generation
+    """Generate table for artdata mean values"""
+    real_data, _, _, art_data = art_rep_generation
 
-    #residuals=real_data-art_data
-    data=[]
+    data = []
     for group in groups_data:
         for dataset in group.datasets:
             ds = dataset.load()
             Ndata = ds.GetNData()
             for i in range(Ndata):
-                line=[dataset.name,art_data[i],real_data[i],abs(art_data[i]-real_data[i])]
+                line = [
+                    dataset.name,
+                    art_data[i],
+                    real_data[i],
+                    abs(art_data[i] - real_data[i]),
+                ]
                 data.append(line)
 
-    df =  pd.DataFrame(data,columns=["DataSet","ArtData","ExpData","abs(residual)"])
+    df = pd.DataFrame(data, columns=["DataSet", "ArtData", "ExpData", "abs(residual)"])
 
     return df
diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py
index 82692400be..eca9cf2858 100644
--- a/validphys2/src/validphys/pseudodata.py
+++ b/validphys2/src/validphys/pseudodata.py
@@ -229,6 +229,8 @@ def indexed_make_replica(groups_index, make_replica):
 
 fit_tr_masks = collect('replica_training_mask_table', ('fitreplicas', 'fitenvironment'))
 pdf_tr_masks = collect('replica_training_mask_table', ('pdfreplicas', 'fitenvironment'))
+make_replicas = collect('make_replica', ('replicas',))
+indexed_make_replicas = collect('indexed_make_replica', ('replicas',))
 
 def recreate_fit_pseudodata(_recreate_fit_pseudodata, fitreplicas, fit_tr_masks):
     """Function used to reconstruct the pseudodata seen by each of the
diff --git a/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv b/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv
index 17461162f5..01cb65b7fe 100644
--- a/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv
+++ b/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv
@@ -1,267 +1,267 @@
 	rep0
-0	605004.0229655241
-1	602440.6042534113
-2	620858.2593790491
-3	613230.2445669749
-4	639952.7905205782
-5	655949.8790361139
-6	636850.6054514458
-7	628994.3543439173
-8	657301.323486842
-9	627892.0285999136
-10	598536.2932218027
-11	450048.5200059017
-12	431216.7505910402
-13	447044.9288255663
-14	440004.6820123964
-15	429728.93818449834
-16	421295.20118263003
-17	384454.24090431526
-18	370980.411169651
-19	382612.0148703672
-20	354678.0131086338
-21	334603.4158950332
-22	128562.17973857898
-23	127342.18412537871
-24	124185.70091052185
-25	117188.71279217266
-26	110308.86504021735
-27	101783.07229286227
-28	84984.6415717532
-29	50391.89736764995
-30	217.31574309245264
-31	99.256686892602
-32	50.70196796309883
-33	28.166627969438913
-34	18.750385343394814
-35	10.637986338057157
-36	8.458165998514103
-37	5.161374135077057
-38	1.7440116151145275
-39	0.5166681241057582
-40	0.14344058134523924
-41	0.024785429275386427
-42	0.0025163509905126555
-43	11667.782124893894
-44	21223.526886450283
-45	13462.35543683801
-46	6138.873985151094
-47	2708.1167755085808
-48	1226.9779856735504
-49	575818.4164028777
-50	574257.0910354555
-51	580128.0112796086
-52	585059.2199059085
-53	584763.314801447
-54	596571.1334267028
-55	596936.3727577593
-56	600384.1891912837
-57	607933.3346544248
-58	588246.38141835
-59	556859.2623407103
-60	436504.26730428456
-61	432303.05461665214
-62	429878.81481606193
-63	423512.7804381009
-64	414307.7885251502
-65	405518.16477606195
-66	388224.2735100763
-67	375485.63061635545
-68	363704.71285150666
-69	342011.205090951
-70	319474.23506993934
-71	134206.51216829068
-72	133256.30384223114
-73	133281.55486784823
-74	132054.46387549827
-75	130885.56238010839
-76	128780.9376893244
-77	118628.79847887601
-78	106435.44676191603
-79	89055.8013789791
-80	68102.79690334377
-81	45021.34978934775
-82	21975.39579667182
-83	9930.95091649565
-84	2933.8462314989843
-85	1086.2279347212962
-86	465.0529875023162
-87	226.30802040953344
-88	111.79690230475649
-89	62.17381426664207
-90	30.845472233433497
-91	14.362338415025647
-92	0.613668421348077
-93	9932.826070454066
-94	2884.6121408528056
-95	1067.4232418905062
-96	450.52936603855994
-97	214.9415800499354
-98	111.97977024219476
-99	59.02227114568631
-100	30.124745159192784
-101	14.001335018551327
-102	0.6432016353212359
-103	9291.864652700102
-104	2612.102669987266
-105	952.876335785387
-106	422.62773612597147
-107	202.29727198956087
-108	104.80403433709903
-109	55.52211814833202
-110	29.47827303942286
-111	14.10284210742019
-112	0.5484354153200658
-113	6988.302774545696
-114	1953.070125553211
-115	720.4718120407176
-116	326.1560157083265
-117	163.06443813461684
-118	86.09929672384352
-119	46.6760518324352
-120	23.70075105915677
-121	11.811860841607936
-122	0.46671218710115586
-123	3773.1302184015512
-124	1038.010867040705
-125	389.0162743842787
-126	180.38234398606457
-127	92.85633549871146
-128	48.04747813864363
-129	29.151521424980952
-130	14.66798047732213
-131	6.972923144523964
-132	0.27501819107281494
-133	2685.283395705168
-134	1255.614216863319
-135	623.4123408118692
-136	300.9025858506389
-137	154.9496016877635
-138	85.6238960317311
-139	45.697395051818916
-140	24.643819750619578
-141	14.154083386403244
-142	8.083627826692325
-143	4.615053327117134
-144	2.6930698606373755
-145	1.6039820756213605
-146	0.9190543906329204
-147	0.5420879986612372
-148	0.3238060211053549
-149	0.19164359765485747
-150	0.11454603590748376
-151	0.06766963214283701
-152	0.039295554906154065
-153	0.023717740337089382
-154	0.013766586907955021
-155	0.007925087637831662
-156	0.004510292638598202
-157	0.0025257234625036358
-158	0.0015742723776101173
-159	0.0009174769620039548
-160	0.00036071719540926286
-161	0.00017939172781702523
-162	0.00010719970920129199
-163	5.10754171638108e-05
-164	9.494161579905887e-06
-165	4.097867641772405e-06
-166	2496.543317341297
-167	1155.2596235411136
-168	526.9753542249528
-169	273.3672352496259
-170	144.09730724029419
-171	73.8283798920788
-172	40.15431825945132
-173	22.253841536320582
-174	12.876862009947805
-175	7.147197673282873
-176	4.1000363261395325
-177	2.3541540270520005
-178	1.3730432793225176
-179	0.766449205276789
-180	0.4316549850474859
-181	0.2570742612596235
-182	0.15083018094262035
-183	0.08778978138524669
-184	0.05001032462453892
-185	0.029430375664826213
-186	0.01611415361553344
-187	0.008973177721778117
-188	0.004747317124419756
-189	0.0024668103139778887
-190	0.0015283063419005878
-191	0.0007787419053764184
-192	0.0003113213196017707
-193	0.00018410837384159157
-194	9.891363626568309e-05
-195	1.3627519290893994e-05
-196	2049.595793988463
-197	937.9099866627081
-198	440.85996141143124
-199	222.39625542891935
-200	112.54650664349728
-201	58.05800257833228
-202	32.120045403096555
-203	16.32868970476539
-204	9.173988069096177
-205	5.183254049935798
-206	2.8208283943003707
-207	1.6101568970220699
-208	0.8518829065147376
-209	0.484567635991665
-210	0.2695394668785933
-211	0.14428600385730475
-212	0.07791175268089195
-213	0.04329623461829344
-214	0.02197485278250843
-215	0.011369326958557987
-216	0.0056973672800596205
-217	0.002712782984102383
-218	0.0013469711694377787
-219	0.0006248060260332912
-220	0.0002558575769424933
-221	0.00016316339858413995
-222	8.002430545648022e-06
-223	1575.5050472067082
-224	728.6008273425452
-225	321.69440777243614
-226	151.0771234502
-227	75.52867010203853
-228	38.982186662924384
-229	20.356929633419362
-230	9.39420524561827
-231	5.188908325330907
-232	2.6002888128368964
-233	1.3368274693702318
-234	0.6800019692538632
-235	0.34715311449716724
-236	0.17893906702328258
-237	0.0794910332147346
-238	0.03442652404659983
-239	0.016486812605305875
-240	0.007590628806292284
-241	0.003170146326010336
-242	0.0012235655803027057
-243	0.000430484039887203
-244	0.00012033484918649306
-245	3.012808100117648e-05
-246	6.555863621871152e-06
-247	957.1097986160261
-248	406.8294452550777
-249	190.23178545891966
-250	89.90518748652033
-251	39.2527330848522
-252	17.795728828437618
-253	8.172773534716532
-254	3.258833345760595
-255	1.5580913277549024
-256	0.6302044867930973
-257	0.2604667628574771
-258	0.10035630960385046
-259	0.04013064955098406
-260	0.011580736083436731
-261	0.0043259887879153825
-262	0.0012483709943641582
-263	0.0002272026342898553
-264	4.60377440249738e-05
-265	5.936781013576351e-06
+0	608657.9135469823
+1	598587.3652054642
+2	611562.0252168534
+3	619677.7052309483
+4	642785.265120052
+5	642261.2658612704
+6	628243.4944682346
+7	621797.7928727296
+8	646668.3796428698
+9	620041.17823229
+10	569703.6851953906
+11	445252.06377498154
+12	433217.1299099684
+13	454895.544675859
+14	438443.27305097826
+15	428354.037587638
+16	412373.92134376976
+17	382730.96323754865
+18	370486.1070181092
+19	371251.83542146476
+20	352614.1469707638
+21	319878.5814996741
+22	126104.04182198147
+23	129482.92279127579
+24	122265.77521146572
+25	118215.86761904058
+26	111905.56327679456
+27	106057.64300073356
+28	84471.02696419714
+29	46058.76052194961
+30	223.0880887021286
+31	101.7080436215751
+32	50.05099409165558
+33	27.2840499015026
+34	17.358763759922454
+35	10.768383831528809
+36	8.423672723288496
+37	4.712504694201969
+38	1.5138261182805381
+39	0.49769738167029703
+40	0.1295556561530443
+41	0.03367851702098606
+42	0.003887224224144138
+43	12050.43483274158
+44	22007.427084059655
+45	13860.845118319343
+46	6952.626129213045
+47	2493.8762765297342
+48	1188.4120241108963
+49	576140.5999430714
+50	578022.3386753529
+51	580733.8077433861
+52	586192.2026342814
+53	588040.3827016304
+54	602122.7925561155
+55	598513.1117078832
+56	604206.1111315321
+57	608032.1353270585
+58	599065.8054973363
+59	561675.3246431323
+60	436309.5641518851
+61	435169.3190210109
+62	432840.2999748325
+63	426057.28748407355
+64	415468.94069709786
+65	407698.033884484
+66	392193.317176145
+67	379397.06660555885
+68	366623.9125816332
+69	347347.66191160737
+70	321412.94188504893
+71	135343.0110473137
+72	134531.88403984267
+73	134759.2157639614
+74	133325.7970860389
+75	132702.5257633686
+76	129482.5209183002
+77	119774.23720658766
+78	107289.62091219533
+79	89772.2977010038
+80	68328.56829492863
+81	45818.28999683553
+82	22346.066330948524
+83	10265.363273728935
+84	3021.849007967259
+85	1110.5319164446
+86	477.1686488930037
+87	229.60443000231606
+88	114.45592395540837
+89	66.50415863912994
+90	31.257582984767136
+91	14.253305068724345
+92	0.6338586544574546
+93	10238.544058336323
+94	2950.593912076866
+95	1098.653060336677
+96	465.64883192427885
+97	223.54763063649088
+98	112.05046427326687
+99	60.559347342772845
+100	30.872814873913853
+101	14.598524587856607
+102	0.6626369853782214
+103	9578.6201743399
+104	2695.4830162060716
+105	974.2800900436935
+106	437.53418224114614
+107	209.0650264246247
+108	106.17837352237177
+109	58.04742422583697
+110	30.9234983857357
+111	14.798369960319663
+112	0.5962036539847795
+113	7189.372131942591
+114	2017.1929620705396
+115	748.3472610995996
+116	338.9568042227697
+117	169.8083190441882
+118	88.64950988312131
+119	50.33945789566441
+120	26.356398848750707
+121	11.704201528033636
+122	0.48800215047231615
+123	3881.804334560772
+124	1068.6836620286151
+125	402.37372973000157
+126	184.48312027832836
+127	93.56750400068807
+128	49.08827411186096
+129	28.54992409632175
+130	15.220734779318418
+131	7.732060269350959
+132	0.24796399437851982
+133	2582.823949589463
+134	1219.6700981480328
+135	562.6970488020901
+136	296.6540204698964
+137	151.6014422459652
+138	82.95506950524599
+139	43.953347346013246
+140	23.696010888781988
+141	13.47097821621378
+142	7.682451365786761
+143	4.484452713776863
+144	2.630964702812761
+145	1.5144671823473257
+146	0.8675522645191137
+147	0.5158644388387773
+148	0.3083050248969133
+149	0.18717501641932896
+150	0.10745365830194374
+151	0.06525551747432541
+152	0.03887417968661548
+153	0.023286761079460292
+154	0.013563672047534123
+155	0.008091167881199526
+156	0.004239604573509456
+157	0.002285670071784399
+158	0.0016738700605893712
+159	0.0009455967164921172
+160	0.000376820343570252
+161	0.0001558954593991525
+162	0.00010442528435105715
+163	3.788551689822724e-05
+164	1.7966950232535625e-05
+165	2.7229135854733197e-06
+166	2499.3885561190236
+167	1132.8893483371378
+168	526.8933572344981
+169	264.71436854531737
+170	138.14542577786287
+171	72.33044978898185
+172	40.04071253328114
+173	21.768359328338864
+174	11.917367219719257
+175	6.698034228672331
+176	3.913958254392709
+177	2.2282131124925617
+178	1.292365708372697
+179	0.7625086198240366
+180	0.4331013458509874
+181	0.246571027632725
+182	0.14370170562596926
+183	0.08142464147549065
+184	0.04823753035322607
+185	0.027938397489859577
+186	0.015709544796063544
+187	0.008508290073324641
+188	0.004511925949145812
+189	0.002302132866173188
+190	0.0014107777862027463
+191	0.0008231239099922279
+192	0.0004178775006287523
+193	0.00013004472960242183
+194	6.0645724462171906e-05
+195	1.5557530881066646e-05
+196	2032.912370896899
+197	896.7364092880864
+198	440.7855701253145
+199	207.50042619077504
+200	112.6220886264891
+201	58.99161417271952
+202	30.30639449944423
+203	16.0789460972195
+204	8.714974017846798
+205	4.886453088954149
+206	2.636494541948655
+207	1.4981225900591975
+208	0.811644466793108
+209	0.45833257698325564
+210	0.2559462209203253
+211	0.14138654986375482
+212	0.07495200013229694
+213	0.04081304331100372
+214	0.021481280949934258
+215	0.011091329914843809
+216	0.006071970010279882
+217	0.0028445793123748854
+218	0.0013413756999869262
+219	0.0005422502208781788
+220	0.00028248008335300174
+221	0.0001695925828574373
+222	1.2138155852973056e-05
+223	1496.0382397800063
+224	689.1397408974253
+225	299.7461868510539
+226	140.9938903538942
+227	71.53817839451993
+228	34.826076161850736
+229	18.01455288800762
+230	9.524071161739812
+231	4.750556980693393
+232	2.361477436377448
+233	1.2095703525931023
+234	0.6542915363073232
+235	0.31443290799916007
+236	0.1596535280620461
+237	0.07449001412473201
+238	0.034605894118337616
+239	0.015340417182374298
+240	0.006941657353635326
+241	0.0031988119265885548
+242	0.00116522241427977
+243	0.0004012790522180683
+244	0.00012934227098271916
+245	7.432570953394288e-05
+246	6.774497373370989e-06
+247	916.8810000428937
+248	389.52434734273345
+249	171.28695068403653
+250	79.43400687066703
+251	34.72946867196473
+252	15.669548698042346
+253	7.607777109241127
+254	3.3409696428930036
+255	1.4142482676811416
+256	0.6185826911891682
+257	0.24880860791096274
+258	0.09203362721960688
+259	0.03436476966786862
+260	0.012136885406185849
+261	0.003821606428989114
+262	0.0011948273638939866
+263	0.0002895912467153434
+264	6.966695899976558e-05
+265	4.5300214179112075e-06
diff --git a/validphys2/src/validphys/tests/test_regressions.py b/validphys2/src/validphys/tests/test_regressions.py
index 08a6a459ee..955a4da556 100644
--- a/validphys2/src/validphys/tests/test_regressions.py
+++ b/validphys2/src/validphys/tests/test_regressions.py
@@ -131,7 +131,8 @@ def test_datasetchi2(data_singleexp_witht0_config):
 def test_art_rep_generation(data_config):
     config = dict(data_config)
     config["dataset_inputs"] = CORR_DATA
-    config["fitting"] = {"seed": 123456}
+    config["mcseed"] = 123456
+    config["genrep"] = True
     config["nreplica"] = 1
     _, art_replicas, _,_ = API.art_rep_generation(**config)
     return pd.DataFrame(art_replicas.T, columns=['rep0'])

From a3b59a8c0e9d3548d4ea6c8169741b5b09c9b47b Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Thu, 7 Oct 2021 12:45:36 +0200
Subject: [PATCH 06/12] add a warning in filter

---
 validphys2/src/validphys/chi2grids.py | 23 +++++------------------
 validphys2/src/validphys/filters.py   | 10 +++++++++-
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/validphys2/src/validphys/chi2grids.py b/validphys2/src/validphys/chi2grids.py
index 2768d34793..de3d5fcda8 100644
--- a/validphys2/src/validphys/chi2grids.py
+++ b/validphys2/src/validphys/chi2grids.py
@@ -14,8 +14,9 @@
 from reportengine import collect
 from reportengine.table import table
 
-from validphys.core import PDF
 from validphys.calcutils import calc_chi2
+from validphys.n3fit_data import replica_mcseed
+from validphys.pseudodata import make_replica
 
 PseudoReplicaExpChi2Data = namedtuple(
     "PseudoReplicaChi2Data", ["group", "ndata", "chi2", "nnfit_index"]
@@ -26,13 +27,11 @@
 
 
 def computed_pseudoreplicas_chi2(
-    # TODO: these three are just so I can call make_replica?
     mcseed,
     dataset_inputs_loaded_cd_with_cuts,
     fitted_replica_indexes,
     group_result_table_no_table,  # to get the results already in the form of a dataframe
     groups_sqrtcovmat,
-    t0set: (PDF, type(None)),
 ):
     """Return a dataframe with the chi² of each replica with its corresponding
     pseudodata (i.e. the one it was fitted with). The chi² is computed by group.
@@ -40,26 +39,16 @@ def computed_pseudoreplicas_chi2(
         ``['group',  'ndata' , 'nnfit_index']``
     where ``nnftix_index`` is the name of the corresponding replica
     """
-    #######
     # Get the replica pseudodata
-    # TODO: it looks like I should be able to have directly make_replica in the arguments
-    # but don't really see how
-    from validphys.n3fit_data import replica_mcseed
-    from validphys.pseudodata import make_replica
-
     all_data_replicas = []
     for replica in fitted_replica_indexes:
         value_of_mcseed = replica_mcseed(replica, mcseed, True)
-        all_data_replicas.append(
-            make_replica(dataset_inputs_loaded_cd_with_cuts, value_of_mcseed)
-        )
+        all_data_replicas.append(make_replica(dataset_inputs_loaded_cd_with_cuts, value_of_mcseed))
     r_data = np.array(all_data_replicas).T
     ########################
 
     # Drop data central and theory central which is not useful here
-    r_prediction = group_result_table_no_table.drop(
-        columns=["data_central", "theory_central"]
-    )
+    r_prediction = group_result_table_no_table.drop(columns=["data_central", "theory_central"])
 
     # Now compute the chi2 in a per-group basis
     diff = r_prediction - r_data
@@ -87,9 +76,7 @@ def computed_pseudoreplicas_chi2(
 # them explicitly than setting some, so we require the user to do that.
 fits_computed_pseudoreplicas_chi2 = collect(computed_pseudoreplicas_chi2, ("fits",))
 
-dataspecs_computed_pseudorreplicas_chi2 = collect(
-    computed_pseudoreplicas_chi2, ("dataspecs",)
-)
+dataspecs_computed_pseudorreplicas_chi2 = collect(computed_pseudoreplicas_chi2, ("dataspecs",))
 
 
 @table
diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py
index d364764b85..955ea00a2b 100644
--- a/validphys2/src/validphys/filters.py
+++ b/validphys2/src/validphys/filters.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 
-from NNPDF import CommonData, RandomGenerator
+from NNPDF import CommonData
 from reportengine.checks import make_argcheck, check, check_positive, make_check
 from reportengine.compat import yaml
 import validphys.cuts
@@ -83,6 +83,13 @@ def prepare_nnpdf_rng(filterseed:int, rngalgo:int, seed:int):
     be an integer between 0 and 16, seeded with ``filterseed``.
     The RNG can then be subsequently used to i.e generate pseudodata.
     """
+    try:
+        from NNPDF import RandomGenerator
+    except ImportError as e:
+        logging.error("Generating closure data needs a valid installation of libNNPDF")
+        raise e
+
+    log.warning("Importing libNNPDF")
     log.info("Initialising RNG")
     RandomGenerator.InitRNG(rngalgo, seed)
     RandomGenerator.GetRNG().SetSeed(filterseed)
@@ -164,6 +171,7 @@ def _filter_real_data(filter_path, data):
 
 def _filter_closure_data(filter_path, data, fakepdfset, fakenoise, errorsize):
     """Filter closure test data."""
+    import ipdb; ipdb.set_trace()
     total_data_points = 0
     total_cut_data_points = 0
     fakeset = fakepdfset.load()

From 08d0eb747bf39d0c0948a1e4e43e0eadfee3231f Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Thu, 7 Oct 2021 14:37:13 +0200
Subject: [PATCH 07/12] add fitted make replicas

---
 validphys2/src/validphys/chi2grids.py  | 16 +++-------------
 validphys2/src/validphys/pseudodata.py |  1 +
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/validphys2/src/validphys/chi2grids.py b/validphys2/src/validphys/chi2grids.py
index de3d5fcda8..b1f95c1d16 100644
--- a/validphys2/src/validphys/chi2grids.py
+++ b/validphys2/src/validphys/chi2grids.py
@@ -13,10 +13,7 @@
 
 from reportengine import collect
 from reportengine.table import table
-
 from validphys.calcutils import calc_chi2
-from validphys.n3fit_data import replica_mcseed
-from validphys.pseudodata import make_replica
 
 PseudoReplicaExpChi2Data = namedtuple(
     "PseudoReplicaChi2Data", ["group", "ndata", "chi2", "nnfit_index"]
@@ -27,9 +24,7 @@
 
 
 def computed_pseudoreplicas_chi2(
-    mcseed,
-    dataset_inputs_loaded_cd_with_cuts,
-    fitted_replica_indexes,
+    fitted_make_replicas,
     group_result_table_no_table,  # to get the results already in the form of a dataframe
     groups_sqrtcovmat,
 ):
@@ -39,13 +34,8 @@ def computed_pseudoreplicas_chi2(
         ``['group',  'ndata' , 'nnfit_index']``
     where ``nnftix_index`` is the name of the corresponding replica
     """
-    # Get the replica pseudodata
-    all_data_replicas = []
-    for replica in fitted_replica_indexes:
-        value_of_mcseed = replica_mcseed(replica, mcseed, True)
-        all_data_replicas.append(make_replica(dataset_inputs_loaded_cd_with_cuts, value_of_mcseed))
-    r_data = np.array(all_data_replicas).T
-    ########################
+    # Stack the replica pseudodata to have the prediction shape
+    r_data = np.stack(fitted_make_replicas, axis=1)
 
     # Drop data central and theory central which is not useful here
     r_prediction = group_result_table_no_table.drop(columns=["data_central", "theory_central"])
diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py
index eca9cf2858..fd15d6f7cb 100644
--- a/validphys2/src/validphys/pseudodata.py
+++ b/validphys2/src/validphys/pseudodata.py
@@ -230,6 +230,7 @@ def indexed_make_replica(groups_index, make_replica):
 fit_tr_masks = collect('replica_training_mask_table', ('fitreplicas', 'fitenvironment'))
 pdf_tr_masks = collect('replica_training_mask_table', ('pdfreplicas', 'fitenvironment'))
 make_replicas = collect('make_replica', ('replicas',))
+fitted_make_replicas = collect('make_replica', ('pdfreplicas',))
 indexed_make_replicas = collect('indexed_make_replica', ('replicas',))
 
 def recreate_fit_pseudodata(_recreate_fit_pseudodata, fitreplicas, fit_tr_masks):

From 623a21fc247f4c8f44a5a2da6e1e917f98984c77 Mon Sep 17 00:00:00 2001
From: "Juan M. Cruz-Martinez" <juacrumar@lairen.eu>
Date: Thu, 7 Oct 2021 15:10:02 +0200
Subject: [PATCH 08/12] Update validphys2/src/validphys/filters.py

Co-authored-by: siranipour <43517072+siranipour@users.noreply.github.com>
---
 validphys2/src/validphys/filters.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py
index 955ea00a2b..ee61137f3d 100644
--- a/validphys2/src/validphys/filters.py
+++ b/validphys2/src/validphys/filters.py
@@ -171,7 +171,6 @@ def _filter_real_data(filter_path, data):
 
 def _filter_closure_data(filter_path, data, fakepdfset, fakenoise, errorsize):
     """Filter closure test data."""
-    import ipdb; ipdb.set_trace()
     total_data_points = 0
     total_cut_data_points = 0
     fakeset = fakepdfset.load()

From 8dd917c9a3e2704991b0adc8b40caefef5c7847a Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Fri, 8 Oct 2021 10:14:33 +0200
Subject: [PATCH 09/12] use example resource

---
 validphys2/examples/mc_gen_example.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/examples/mc_gen_example.yaml b/validphys2/examples/mc_gen_example.yaml
index e094ee770c..d661ca6e07 100644
--- a/validphys2/examples/mc_gen_example.yaml
+++ b/validphys2/examples/mc_gen_example.yaml
@@ -8,7 +8,7 @@ meta:
 
 fit: 210629-n3fit-001
 
-theoryid: 200
+theoryid: 162
 mcseed: 4
 genrep: True
 

From b15dd54f5baf7cfb4a0b5d7c87f49643bf94f619 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Fri, 22 Oct 2021 13:56:11 +0200
Subject: [PATCH 10/12] remove unnecesary variable

---
 validphys2/src/validphys/chi2grids.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/validphys2/src/validphys/chi2grids.py b/validphys2/src/validphys/chi2grids.py
index b1f95c1d16..c6126677b3 100644
--- a/validphys2/src/validphys/chi2grids.py
+++ b/validphys2/src/validphys/chi2grids.py
@@ -43,11 +43,10 @@ def computed_pseudoreplicas_chi2(
     # Now compute the chi2 in a per-group basis
     diff = r_prediction - r_data
     group_level = r_prediction.index.get_level_values("group")
-    groups = group_level.drop_duplicates().to_list()
 
     # Save the results in a dataframe similar (but not equal) to the old one
     df_output = []
-    for group in groups:
+    for group in group_level.unique():
         group_diff = diff.loc[group_level == group]
         its_covmat = groups_sqrtcovmat[group_level == group][group]
         chi2_per_replica = calc_chi2(its_covmat, group_diff)

From bac91da6769d6e1b91533a5ddfc3b2c6e4b2b985 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Wed, 10 Nov 2021 19:29:13 +0100
Subject: [PATCH 11/12] update test

---
 .../regressions/test_art_rep_generation.csv   | 532 +++++++++---------
 1 file changed, 266 insertions(+), 266 deletions(-)

diff --git a/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv b/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv
index ebdbb99f3d..50fa7c7404 100644
--- a/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv
+++ b/validphys2/src/validphys/tests/regressions/test_art_rep_generation.csv
@@ -1,267 +1,267 @@
 	rep0
-0	605004.0229655241
-1	602440.6042534113
-2	620858.2593790491
-3	613230.2445669749
-4	639952.7905205782
-5	655949.8790361139
-6	636850.6054514458
-7	628994.3543439173
-8	657301.323486842
-9	627892.0285999136
-10	598536.2932218027
-11	450048.5200059017
-12	431216.7505910402
-13	447044.9288255663
-14	440004.6820123964
-15	429728.93818449834
-16	421295.20118263003
-17	384454.24090431526
-18	370980.411169651
-19	382612.0148703672
-20	354678.0131086338
-21	334603.4158950332
-22	128562.17973857898
-23	127342.18412537871
-24	124185.70091052185
-25	117188.71279217266
-26	110308.86504021735
-27	101783.07229286227
-28	84984.6415717532
-29	50391.89736764995
-30	217.31574309245264
-31	99.256686892602
-32	50.70196796309883
-33	28.166627969438913
-34	18.750385343394814
-35	10.637986338057157
-36	8.458165998514103
-37	5.161374135077057
-38	1.7440116151145275
-39	0.5166681241057582
-40	0.14344058134523924
-41	0.024785429275386427
-42	0.0025163509905126555
-43	11667.782124893894
-44	21223.526886450283
-45	13462.35543683801
-46	6138.873985151094
-47	2708.1167755085808
-48	1226.9779856735504
-49	575818.4164028777
-50	574257.0910354555
-51	580128.0112796086
-52	585059.2199059085
-53	584763.314801447
-54	596571.1334267028
-55	596936.3727577593
-56	600384.1891912837
-57	607933.3346544248
-58	588246.38141835
-59	556859.2623407103
-60	436504.26730428456
-61	432303.05461665214
-62	429878.81481606193
-63	423512.7804381009
-64	414307.7885251502
-65	405518.16477606195
-66	388224.2735100763
-67	375485.63061635545
-68	363704.71285150666
-69	342011.205090951
-70	319474.23506993934
-71	134206.51216829068
-72	133256.30384223114
-73	133281.55486784823
-74	132054.46387549827
-75	130885.56238010839
-76	128780.9376893244
-77	118628.79847887601
-78	106435.44676191603
-79	89055.8013789791
-80	68102.79690334377
-81	45021.34978934775
-82	21975.39579667182
-83	9930.95091649565
-84	2933.8462314989843
-85	1086.2279347212962
-86	465.0529875023162
-87	226.30802040953344
-88	111.79690230475649
-89	62.17381426664207
-90	30.845472233433497
-91	14.362338415025647
-92	0.613668421348077
-93	9932.826070454066
-94	2884.6121408528056
-95	1067.4232418905062
-96	450.52936603855994
-97	214.9415800499354
-98	111.97977024219476
-99	59.02227114568631
-100	30.124745159192784
-101	14.001335018551329
-102	0.6432016353212359
-103	9291.864652700102
-104	2612.102669987266
-105	952.876335785387
-106	422.62773612597147
-107	202.29727198956087
-108	104.80403433709903
-109	55.52211814833203
-110	29.47827303942286
-111	14.102842107420193
-112	0.5484354153200658
-113	6988.302774545697
-114	1953.0701255532113
-115	720.4718120407177
-116	326.1560157083265
-117	163.0644381346169
-118	86.09929672384352
-119	46.67605183243521
-120	23.700751059156776
-121	11.811860841607936
-122	0.46671218710115575
-123	3773.1302184015517
-124	1038.0108670407053
-125	389.0162743842788
-126	180.3823439860646
-127	92.85633549871146
-128	48.04747813864365
-129	29.15152142498096
-130	14.667980477322132
-131	6.972923144523965
-132	0.275018191072815
-133	2685.283395705168
-134	1255.614216863319
-135	623.4123408118692
-136	300.9025858506389
-137	154.9496016877635
-138	85.6238960317311
-139	45.697395051818916
-140	24.643819750619578
-141	14.154083386403244
-142	8.083627826692325
-143	4.615053327117134
-144	2.6930698606373755
-145	1.6039820756213605
-146	0.9190543906329204
-147	0.5420879986612372
-148	0.3238060211053549
-149	0.19164359765485747
-150	0.11454603590748376
-151	0.06766963214283701
-152	0.039295554906154065
-153	0.023717740337089382
-154	0.013766586907955021
-155	0.007925087637831662
-156	0.004510292638598202
-157	0.0025257234625036358
-158	0.0015742723776101173
-159	0.0009174769620039548
-160	0.00036071719540926286
-161	0.00017939172781702523
-162	0.00010719970920129199
-163	5.10754171638108e-05
-164	9.494161579905887e-06
-165	4.097867641772405e-06
-166	2496.543317341297
-167	1155.2596235411136
-168	526.9753542249528
-169	273.3672352496259
-170	144.09730724029419
-171	73.8283798920788
-172	40.15431825945132
-173	22.253841536320582
-174	12.876862009947805
-175	7.147197673282873
-176	4.1000363261395325
-177	2.3541540270520005
-178	1.3730432793225176
-179	0.766449205276789
-180	0.4316549850474859
-181	0.2570742612596235
-182	0.15083018094262035
-183	0.08778978138524669
-184	0.05001032462453892
-185	0.029430375664826213
-186	0.01611415361553344
-187	0.008973177721778117
-188	0.004747317124419756
-189	0.0024668103139778887
-190	0.0015283063419005878
-191	0.0007787419053764184
-192	0.0003113213196017707
-193	0.00018410837384159157
-194	9.891363626568309e-05
-195	1.3627519290893994e-05
-196	2049.595793988463
-197	937.9099866627081
-198	440.85996141143124
-199	222.39625542891935
-200	112.54650664349728
-201	58.05800257833228
-202	32.120045403096555
-203	16.32868970476539
-204	9.173988069096177
-205	5.183254049935798
-206	2.8208283943003707
-207	1.6101568970220699
-208	0.8518829065147376
-209	0.484567635991665
-210	0.2695394668785933
-211	0.14428600385730475
-212	0.07791175268089195
-213	0.04329623461829344
-214	0.02197485278250843
-215	0.011369326958557987
-216	0.0056973672800596205
-217	0.002712782984102383
-218	0.0013469711694377787
-219	0.0006248060260332912
-220	0.0002558575769424933
-221	0.00016316339858413995
-222	8.002430545648022e-06
-223	1575.5050472067082
-224	728.6008273425452
-225	321.69440777243614
-226	151.0771234502
-227	75.52867010203853
-228	38.982186662924384
-229	20.356929633419362
-230	9.39420524561827
-231	5.188908325330907
-232	2.6002888128368964
-233	1.3368274693702318
-234	0.6800019692538632
-235	0.34715311449716724
-236	0.17893906702328258
-237	0.0794910332147346
-238	0.03442652404659983
-239	0.016486812605305875
-240	0.007590628806292284
-241	0.003170146326010336
-242	0.0012235655803027057
-243	0.000430484039887203
-244	0.00012033484918649306
-245	3.012808100117648e-05
-246	6.555863621871152e-06
-247	957.1097986160261
-248	406.8294452550777
-249	190.23178545891966
-250	89.90518748652033
-251	39.2527330848522
-252	17.795728828437618
-253	8.172773534716532
-254	3.258833345760595
-255	1.5580913277549024
-256	0.6302044867930973
-257	0.2604667628574771
-258	0.10035630960385046
-259	0.04013064955098406
-260	0.011580736083436731
-261	0.0043259887879153825
-262	0.0012483709943641582
-263	0.0002272026342898553
-264	4.60377440249738e-05
-265	5.936781013576351e-06
\ No newline at end of file
+0	608657.9135469823
+1	598587.3652054642
+2	611562.0252168534
+3	619677.7052309483
+4	642785.265120052
+5	642261.2658612704
+6	628243.4944682346
+7	621797.7928727296
+8	646668.3796428698
+9	620041.17823229
+10	569703.6851953906
+11	445252.06377498154
+12	433217.1299099684
+13	454895.544675859
+14	438443.27305097826
+15	428354.037587638
+16	412373.92134376976
+17	382730.96323754865
+18	370486.1070181092
+19	371251.83542146476
+20	352614.1469707638
+21	319878.5814996741
+22	126104.04182198147
+23	129482.92279127579
+24	122265.77521146572
+25	118215.86761904058
+26	111905.56327679456
+27	106057.64300073356
+28	84471.02696419714
+29	46058.76052194961
+30	223.0880887021286
+31	101.7080436215751
+32	50.05099409165558
+33	27.2840499015026
+34	17.358763759922454
+35	10.768383831528809
+36	8.423672723288496
+37	4.712504694201969
+38	1.5138261182805381
+39	0.49769738167029703
+40	0.1295556561530443
+41	0.03367851702098606
+42	0.003887224224144138
+43	12050.43483274158
+44	22007.427084059655
+45	13860.845118319343
+46	6952.626129213045
+47	2493.8762765297342
+48	1188.4120241108963
+49	576140.5999430714
+50	578022.3386753529
+51	580733.8077433861
+52	586192.2026342814
+53	588040.3827016304
+54	602122.7925561155
+55	598513.1117078832
+56	604206.1111315321
+57	608032.1353270585
+58	599065.8054973363
+59	561675.3246431323
+60	436309.5641518851
+61	435169.3190210109
+62	432840.2999748325
+63	426057.28748407355
+64	415468.94069709786
+65	407698.033884484
+66	392193.317176145
+67	379397.06660555885
+68	366623.9125816332
+69	347347.66191160737
+70	321412.94188504893
+71	135343.0110473137
+72	134531.88403984267
+73	134759.2157639614
+74	133325.7970860389
+75	132702.5257633686
+76	129482.5209183002
+77	119774.23720658766
+78	107289.62091219533
+79	89772.2977010038
+80	68328.56829492863
+81	45818.28999683553
+82	22346.066330948524
+83	10265.363273728935
+84	3021.849007967259
+85	1110.5319164446
+86	477.1686488930037
+87	229.6044300023161
+88	114.45592395540837
+89	66.50415863912994
+90	31.257582984767136
+91	14.253305068724345
+92	0.6338586544574546
+93	10238.544058336323
+94	2950.593912076866
+95	1098.653060336677
+96	465.64883192427885
+97	223.54763063649088
+98	112.05046427326685
+99	60.559347342772845
+100	30.872814873913853
+101	14.598524587856607
+102	0.6626369853782214
+103	9578.6201743399
+104	2695.4830162060716
+105	974.2800900436935
+106	437.5341822411462
+107	209.0650264246247
+108	106.17837352237177
+109	58.04742422583696
+110	30.9234983857357
+111	14.798369960319663
+112	0.5962036539847795
+113	7189.372131942591
+114	2017.1929620705396
+115	748.3472610995996
+116	338.9568042227697
+117	169.8083190441882
+118	88.64950988312131
+119	50.33945789566441
+120	26.356398848750704
+121	11.704201528033636
+122	0.48800215047231615
+123	3881.804334560772
+124	1068.6836620286151
+125	402.3737297300015
+126	184.48312027832836
+127	93.56750400068807
+128	49.08827411186096
+129	28.54992409632175
+130	15.220734779318418
+131	7.732060269350959
+132	0.24796399437851982
+133	2582.823949589463
+134	1219.6700981480328
+135	562.6970488020901
+136	296.6540204698964
+137	151.6014422459652
+138	82.95506950524599
+139	43.953347346013246
+140	23.696010888781988
+141	13.47097821621378
+142	7.682451365786761
+143	4.484452713776863
+144	2.630964702812761
+145	1.5144671823473257
+146	0.8675522645191137
+147	0.5158644388387773
+148	0.3083050248969133
+149	0.18717501641932896
+150	0.10745365830194374
+151	0.06525551747432541
+152	0.03887417968661548
+153	0.023286761079460292
+154	0.013563672047534123
+155	0.008091167881199526
+156	0.004239604573509456
+157	0.002285670071784399
+158	0.0016738700605893712
+159	0.0009455967164921172
+160	0.000376820343570252
+161	0.0001558954593991525
+162	0.00010442528435105715
+163	3.788551689822724e-05
+164	1.7966950232535625e-05
+165	2.7229135854733197e-06
+166	2499.3885561190236
+167	1132.8893483371378
+168	526.8933572344981
+169	264.71436854531737
+170	138.14542577786287
+171	72.33044978898185
+172	40.04071253328114
+173	21.768359328338864
+174	11.917367219719257
+175	6.698034228672331
+176	3.913958254392709
+177	2.2282131124925617
+178	1.292365708372697
+179	0.7625086198240366
+180	0.4331013458509874
+181	0.246571027632725
+182	0.14370170562596926
+183	0.08142464147549065
+184	0.04823753035322607
+185	0.027938397489859577
+186	0.015709544796063544
+187	0.008508290073324641
+188	0.004511925949145812
+189	0.002302132866173188
+190	0.0014107777862027463
+191	0.0008231239099922279
+192	0.0004178775006287523
+193	0.00013004472960242183
+194	6.0645724462171906e-05
+195	1.5557530881066646e-05
+196	2032.912370896899
+197	896.7364092880864
+198	440.7855701253145
+199	207.50042619077504
+200	112.6220886264891
+201	58.99161417271952
+202	30.30639449944423
+203	16.0789460972195
+204	8.714974017846798
+205	4.886453088954149
+206	2.636494541948655
+207	1.4981225900591975
+208	0.811644466793108
+209	0.45833257698325564
+210	0.2559462209203253
+211	0.14138654986375482
+212	0.07495200013229694
+213	0.04081304331100372
+214	0.021481280949934258
+215	0.011091329914843809
+216	0.006071970010279882
+217	0.0028445793123748854
+218	0.0013413756999869262
+219	0.0005422502208781788
+220	0.00028248008335300174
+221	0.0001695925828574373
+222	1.2138155852973056e-05
+223	1496.0382397800063
+224	689.1397408974253
+225	299.7461868510539
+226	140.9938903538942
+227	71.53817839451993
+228	34.826076161850736
+229	18.01455288800762
+230	9.524071161739812
+231	4.750556980693393
+232	2.361477436377448
+233	1.2095703525931023
+234	0.6542915363073232
+235	0.31443290799916007
+236	0.1596535280620461
+237	0.07449001412473201
+238	0.034605894118337616
+239	0.015340417182374298
+240	0.006941657353635326
+241	0.0031988119265885548
+242	0.00116522241427977
+243	0.0004012790522180683
+244	0.00012934227098271916
+245	7.432570953394288e-05
+246	6.774497373370989e-06
+247	916.8810000428937
+248	389.52434734273345
+249	171.28695068403653
+250	79.43400687066703
+251	34.72946867196473
+252	15.669548698042346
+253	7.607777109241127
+254	3.3409696428930036
+255	1.4142482676811416
+256	0.6185826911891682
+257	0.24880860791096274
+258	0.09203362721960688
+259	0.03436476966786862
+260	0.012136885406185849
+261	0.003821606428989114
+262	0.0011948273638939866
+263	0.0002895912467153434
+264	6.966695899976558e-05
+265	4.5300214179112075e-06

From 38bd7f47e293a5b7c7d4ff4647c329a7608d2574 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@gmail.com>
Date: Fri, 12 Nov 2021 12:27:50 +0100
Subject: [PATCH 12/12] remove arange

---
 validphys2/src/validphys/config.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 488f65f48e..f0fcd5bba7 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -16,8 +16,6 @@
 from collections import ChainMap, defaultdict
 from collections.abc import Mapping, Sequence
 
-import numpy as np
-
 from reportengine import configparser
 from reportengine.environment import Environment, EnvironmentError_
 from reportengine.configparser import (
@@ -220,7 +218,7 @@ def parse_use_cuts(self, use_cuts: (bool, str)):
 
     def produce_replicas(self, nreplica: int):
         """Produce a replicas array"""
-        return NSList(np.arange(1, nreplica+1), nskey="replica")
+        return NSList(range(1, nreplica+1), nskey="replica")
 
     def produce_inclusive_use_scalevar_uncertainties(self, use_scalevar_uncertainties: bool = False,
                                         point_prescription: (str, None) = None):