From 0e86af6d4bde5f3b394ea92fd15b28fbeb9a4c30 Mon Sep 17 00:00:00 2001 From: siranipour Date: Mon, 19 Jul 2021 12:16:51 +0100 Subject: [PATCH 1/3] Saving new python pseudodata --- n3fit/src/n3fit/scripts/n3fit_exec.py | 5 ++- validphys2/src/validphys/n3fit_data.py | 51 ++++++++++++++++++++------ 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 1fd591d4e2..d266004495 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -137,8 +137,9 @@ def from_yaml(cls, o, *args, **kwargs): "to `false` or fit replicas one at a time." ) # take same namespace configuration on the pseudodata_table action. - table_action = fit_action.replace('performfit', 'pseudodata_table') - N3FIT_FIXED_CONFIG['actions_'].append(table_action) + training_action = fit_action.replace('performfit', 'training_pseudodata') + validation_action = fit_action.replace('performfit', 'validation_pseudodata') + N3FIT_FIXED_CONFIG['actions_'].extend([training_action, validation_action]) file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 0fa33c7ec0..ca7fed9959 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -354,8 +354,11 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn exps_pseudodata = collect("generate_data_replica", ("group_dataset_inputs_by_experiment",)) replicas_exps_pseudodata = collect("exps_pseudodata", ("replicas",)) +replicas_indexed_make_replica = collect('indexed_make_replica', ('replicas',)) + + @table -def pseudodata_table(replicas_exps_pseudodata, replicas, experiments_index): +def pseudodata_table(replicas_indexed_make_replica, replicas): """Creates a pandas DataFrame containing the generated pseudodata. The index is :py:func:`validphys.results.experiments_index` and the columns are the replica numbers. @@ -367,23 +370,42 @@ def pseudodata_table(replicas_exps_pseudodata, replicas, experiments_index): The table can be found in the replica folder i.e. /nnfit/replica_*/ """ - rep_dfs = [] - for rep_exps_pseudodata, rep in zip(replicas_exps_pseudodata, replicas): - all_pseudodata = np.concatenate(rep_exps_pseudodata) - rep_dfs.append(pd.DataFrame( - all_pseudodata, - columns=[f"replica {rep}"], - index=experiments_index - )) - return pd.concat(rep_dfs, axis=1) + df = pd.concat(replicas_indexed_make_replica) + df.columns = [f"replica {rep}" for rep in replicas] + return df + + +@table +def training_pseudodata(pseudodata_table, training_mask): + """Save the training data for the given replica. + Activate by setting ``fitting::savepseudodata: True`` + from within the fit runcard. + + See Also + -------- + :py:func:`validphys.n3fit_data.validation_pseudodata` + """ + return pseudodata_table.loc[training_mask.values] + + +@table +def validation_pseudodata(pseudodata_table, training_mask): + """Save the training data for the given replica. + Activate by setting ``fitting::savepseudodata: True`` + from within the fit runcard. + + See Also + -------- + :py:func:`validphys.n3fit_data.training_pseudodata` + """ + return pseudodata_table.loc[~training_mask.values] exps_tr_masks = collect("tr_masks", ("group_dataset_inputs_by_experiment",)) replicas_exps_tr_masks = collect("exps_tr_masks", ("replicas",)) -@table -def training_mask_table(replicas_exps_tr_masks, replicas, experiments_index): +def training_mask(replicas_exps_tr_masks, replicas, experiments_index): """Save the boolean mask used to split data into training and validation for each replica as a pandas DataFrame, indexed by :py:func:`validphys.results.experiments_index`. Can be used to reconstruct @@ -451,6 +473,11 @@ def training_mask_table(replicas_exps_tr_masks, replicas, experiments_index): )) return pd.concat(rep_dfs, axis=1) + +@table +def training_mask_table(training_mask): + return training_mask + def fitting_pos_dict(posdataset): """Loads a positivity dataset. For more information see :py:func:`validphys.n3fit_data_utils.positivity_reader`. From 90e5fc579f85aebdebb3a1e3cfe9a0c5dc348a41 Mon Sep 17 00:00:00 2001 From: siranipour Date: Wed, 28 Jul 2021 12:19:37 +0100 Subject: [PATCH 2/3] Using variable for namespace --- n3fit/src/n3fit/scripts/n3fit_exec.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index d266004495..71d34fe7f7 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -26,6 +26,9 @@ actions_ = [] ) +FIT_NAMESPACE = "datacuts::theory::fitting " +CLOSURE_NAMESPACE = "datacuts::theory::closuretest::fitting " + N3FIT_PROVIDERS = [ "n3fit.performfit", "n3fit.n3fit_checks_provider", @@ -124,10 +127,11 @@ def from_yaml(cls, o, *args, **kwargs): raise ConfigError(f"Expecting input runcard to be a mapping, " f"not '{type(file_content)}'.") if file_content.get('closuretest') is not None: - fit_action = 'datacuts::theory::closuretest::fitting performfit' + namespace = CLOSURE_NAMESPACE else: - fit_action = 'datacuts::theory::fitting performfit' - N3FIT_FIXED_CONFIG['actions_'].append(fit_action) + namespace = FIT_NAMESPACE + + N3FIT_FIXED_CONFIG['actions_'].append(namespace + "performfit") if file_content["fitting"].get("savepseudodata"): if len(kwargs["environment"].replicas) != 1: @@ -137,9 +141,10 @@ def from_yaml(cls, o, *args, **kwargs): "to `false` or fit replicas one at a time." ) # take same namespace configuration on the pseudodata_table action. - training_action = fit_action.replace('performfit', 'training_pseudodata') - validation_action = fit_action.replace('performfit', 'validation_pseudodata') - N3FIT_FIXED_CONFIG['actions_'].extend([training_action, validation_action]) + training_action = namespace + "training_pseudodata" + validation_action = namespace + "validation_pseudodata" + + N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action)) file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) From dced555d421bc8962d608b2e02c3eed89c0a08ad Mon Sep 17 00:00:00 2001 From: siranipour Date: Tue, 31 Aug 2021 14:42:09 +0100 Subject: [PATCH 3/3] Adding entry to detailed runcard Explaining where to add the flag and what it does --- doc/sphinx/source/n3fit/runcard_detailed.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/doc/sphinx/source/n3fit/runcard_detailed.rst b/doc/sphinx/source/n3fit/runcard_detailed.rst index 8a6e5a2828..a809a82cd5 100644 --- a/doc/sphinx/source/n3fit/runcard_detailed.rst +++ b/doc/sphinx/source/n3fit/runcard_detailed.rst @@ -361,3 +361,22 @@ and load it in a different runcard and continue the training from there. While the load file is read as an absolute path, the file to save to will be found inside the replica folder. + +Saving and loading fit pseudodata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If the user wishes to save the Monte Carlo pseudodata used for each replica within a fit, +they can do so using the ``savepseudodata`` flag under the ``fitting`` top-level namespace: + +.. code-block:: yaml + + fitting: + savepseudodata: true + +This will cause a ``csv`` file to be saved for each replica under +``/replica_/datacuts_theory_fitting_training_pseudodata.csv`` and +``/replica_/datacuts_theory_fitting_validation_pseudodata.csv`` +for the training and validation splits respectively. The data points are indexed +according to their experiment. Additionally, the union of these two is saved in +``/replica_/datacuts_theory_fitting_pseudodata_table.csv`` +if one is not interested in the exact nature of the splitting.