Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions doc/sphinx/source/n3fit/runcard_detailed.rst
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,25 @@ and load it in a different runcard and continue the training from there.
While the load file is read as an absolute path, the file to save to will be found
inside the replica folder.

Saving and loading fit pseudodata
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

If the user wishes to save the Monte Carlo pseudodata used for each replica within a fit,
they can do so using the ``savepseudodata`` flag under the ``fitting`` top-level namespace:

.. code-block:: yaml

fitting:
savepseudodata: true

This will cause a ``csv`` file to be saved for each replica under
``<fit_directory>/replica_<number>/datacuts_theory_fitting_training_pseudodata.csv`` and
``<fit_directory>/replica_<number>/datacuts_theory_fitting_validation_pseudodata.csv``
for the training and validation splits respectively. The data points are indexed
according to their experiment. Additionally, the union of these two is saved in
``<fit_directory>/replica_<number>/datacuts_theory_fitting_pseudodata_table.csv``
if one is not interested in the exact nature of the splitting.


Imposing sum rules
^^^^^^^^^^^^^^^^^^
Expand Down
16 changes: 11 additions & 5 deletions n3fit/src/n3fit/scripts/n3fit_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
actions_ = []
)

FIT_NAMESPACE = "datacuts::theory::fitting "
CLOSURE_NAMESPACE = "datacuts::theory::closuretest::fitting "

N3FIT_PROVIDERS = [
"n3fit.performfit",
"n3fit.n3fit_checks_provider",
Expand Down Expand Up @@ -124,10 +127,11 @@ def from_yaml(cls, o, *args, **kwargs):
raise ConfigError(f"Expecting input runcard to be a mapping, " f"not '{type(file_content)}'.")

if file_content.get('closuretest') is not None:
fit_action = 'datacuts::theory::closuretest::fitting performfit'
namespace = CLOSURE_NAMESPACE
else:
fit_action = 'datacuts::theory::fitting performfit'
N3FIT_FIXED_CONFIG['actions_'].append(fit_action)
namespace = FIT_NAMESPACE

N3FIT_FIXED_CONFIG['actions_'].append(namespace + "performfit")

if file_content["fitting"].get("savepseudodata"):
if len(kwargs["environment"].replicas) != 1:
Expand All @@ -137,8 +141,10 @@ def from_yaml(cls, o, *args, **kwargs):
"to `false` or fit replicas one at a time."
)
# take same namespace configuration on the pseudodata_table action.
table_action = fit_action.replace('performfit', 'pseudodata_table')
N3FIT_FIXED_CONFIG['actions_'].append(table_action)
training_action = namespace + "training_pseudodata"
validation_action = namespace + "validation_pseudodata"

N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action))

file_content.update(N3FIT_FIXED_CONFIG)
return cls(file_content, *args, **kwargs)
Expand Down
51 changes: 39 additions & 12 deletions validphys2/src/validphys/n3fit_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,11 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn
exps_pseudodata = collect("generate_data_replica", ("group_dataset_inputs_by_experiment",))
replicas_exps_pseudodata = collect("exps_pseudodata", ("replicas",))

replicas_indexed_make_replica = collect('indexed_make_replica', ('replicas',))


@table
def pseudodata_table(replicas_exps_pseudodata, replicas, experiments_index):
def pseudodata_table(replicas_indexed_make_replica, replicas):
"""Creates a pandas DataFrame containing the generated pseudodata. The
index is :py:func:`validphys.results.experiments_index` and the columns
are the replica numbers.
Expand All @@ -367,23 +370,42 @@ def pseudodata_table(replicas_exps_pseudodata, replicas, experiments_index):
The table can be found in the replica folder i.e. <fit dir>/nnfit/replica_*/

"""
rep_dfs = []
for rep_exps_pseudodata, rep in zip(replicas_exps_pseudodata, replicas):
all_pseudodata = np.concatenate(rep_exps_pseudodata)
rep_dfs.append(pd.DataFrame(
all_pseudodata,
columns=[f"replica {rep}"],
index=experiments_index
))
return pd.concat(rep_dfs, axis=1)
df = pd.concat(replicas_indexed_make_replica)
df.columns = [f"replica {rep}" for rep in replicas]
return df


@table
def training_pseudodata(pseudodata_table, training_mask):
"""Save the training data for the given replica.
Activate by setting ``fitting::savepseudodata: True``
from within the fit runcard.

See Also
--------
:py:func:`validphys.n3fit_data.validation_pseudodata`
"""
return pseudodata_table.loc[training_mask.values]


@table
def validation_pseudodata(pseudodata_table, training_mask):
"""Save the training data for the given replica.
Activate by setting ``fitting::savepseudodata: True``
from within the fit runcard.

See Also
--------
:py:func:`validphys.n3fit_data.training_pseudodata`
"""
return pseudodata_table.loc[~training_mask.values]


exps_tr_masks = collect("tr_masks", ("group_dataset_inputs_by_experiment",))
replicas_exps_tr_masks = collect("exps_tr_masks", ("replicas",))


@table
def training_mask_table(replicas_exps_tr_masks, replicas, experiments_index):
def training_mask(replicas_exps_tr_masks, replicas, experiments_index):
"""Save the boolean mask used to split data into training and validation
for each replica as a pandas DataFrame, indexed by
:py:func:`validphys.results.experiments_index`. Can be used to reconstruct
Expand Down Expand Up @@ -451,6 +473,11 @@ def training_mask_table(replicas_exps_tr_masks, replicas, experiments_index):
))
return pd.concat(rep_dfs, axis=1)


@table
def training_mask_table(training_mask):
return training_mask

def fitting_pos_dict(posdataset):
"""Loads a positivity dataset. For more information see
:py:func:`validphys.n3fit_data_utils.positivity_reader`.
Expand Down