From 5432e682fefb664a257f24775bc70beaea27331d Mon Sep 17 00:00:00 2001 From: RoyStegeman Date: Wed, 3 Apr 2024 14:46:50 +0100 Subject: [PATCH 1/5] make pseudodata_table correctly deal with multiple replicas --- validphys2/src/validphys/n3fit_data.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index ced700f98f..673fab2706 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -4,6 +4,7 @@ Providers which prepare the data ready for :py:func:`n3fit.performfit.performfit`. """ + from collections import defaultdict import functools import hashlib @@ -70,8 +71,7 @@ def __init__(self, group_name, seed, masks=None): super().__init__(group_name, seed) def __iter__(self): - for m in self.masks: - yield m + yield from self.masks def tr_masks(data, replica_trvlseed, parallel_models=False, replica=1, replicas=(1,)): @@ -343,7 +343,7 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn replicas_nnseed_fitting_data_dict = collect("replica_nnseed_fitting_data_dict", ("replicas",)) groups_replicas_indexed_make_replica = collect( - "indexed_make_replica", ("group_dataset_inputs_by_experiment", "replicas") + "indexed_make_replica", ("replicas", "group_dataset_inputs_by_experiment") ) @@ -359,10 +359,20 @@ def pseudodata_table(groups_replicas_indexed_make_replica, replicas): `fitting::savepseudodata` is `true` (as per the default setting) and replicas are fitted one at a time. The table can be found in the replica folder i.e. /nnfit/replica_*/ - """ - # Concatenate over replicas - df = pd.concat(groups_replicas_indexed_make_replica) + # groups_replicas_indexed_make_replica is collected over both replicas and dataset_input groups + # to correctly put this into a single dataframe, we first need to know the number of + # dataset_input groups there are for each replica + groups_per_replica = int(len(groups_replicas_indexed_make_replica) / len(replicas)) + # then we make a list of pandas dataframes, each containing the pseudodata of all datasets + # generated for a singel replica + df = [ + pd.concat(groups_replicas_indexed_make_replica[i : i + groups_per_replica]) + for i in range(0, len(groups_replicas_indexed_make_replica), groups_per_replica) + ] + # then we concatentate the pseudodata of all replicas into a single dataframe + df = pd.concat(df, axis=1) + # and finally we add as column titles the replica name df.columns = [f"replica {rep}" for rep in replicas] return df From 4040eb323a3a9d129ea3b287790b74ad1242132e Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Thu, 11 Apr 2024 09:58:25 +0100 Subject: [PATCH 2/5] Update validphys2/src/validphys/n3fit_data.py Co-authored-by: Juan M. Cruz-Martinez --- validphys2/src/validphys/n3fit_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 673fab2706..b571f4f269 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -365,7 +365,7 @@ def pseudodata_table(groups_replicas_indexed_make_replica, replicas): # dataset_input groups there are for each replica groups_per_replica = int(len(groups_replicas_indexed_make_replica) / len(replicas)) # then we make a list of pandas dataframes, each containing the pseudodata of all datasets - # generated for a singel replica + # generated for a single replica df = [ pd.concat(groups_replicas_indexed_make_replica[i : i + groups_per_replica]) for i in range(0, len(groups_replicas_indexed_make_replica), groups_per_replica) From 1b46e3b5e5804de9f77160a88afb57e96ae53e92 Mon Sep 17 00:00:00 2001 From: RoyStegeman Date: Thu, 11 Apr 2024 10:05:04 +0100 Subject: [PATCH 3/5] clarify inline comment in pseudodata_table --- validphys2/src/validphys/n3fit_data.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index b571f4f269..9d13436cbc 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -360,8 +360,12 @@ def pseudodata_table(groups_replicas_indexed_make_replica, replicas): replicas are fitted one at a time. The table can be found in the replica folder i.e. /nnfit/replica_*/ """ - # groups_replicas_indexed_make_replica is collected over both replicas and dataset_input groups - # to correctly put this into a single dataframe, we first need to know the number of + # groups_replicas_indexed_make_replica is collected over both replicas and dataset_input groups, + # in that order. What this means is that groups_replicas_indexed_make_replica is a list of size + # number_of_replicas x number_of_data_groups. Where the ordering inside the list is as follows: + # [data1_rep1, data2_rep1, ..., datan_rep1, ..., data1_repn, data2_repn, ..., datan_repn]. + # + # To correctly put this into a single dataframe, we first need to know the number of # dataset_input groups there are for each replica groups_per_replica = int(len(groups_replicas_indexed_make_replica) / len(replicas)) # then we make a list of pandas dataframes, each containing the pseudodata of all datasets From 2722aa7168b589b4f249bd75d6645abab4d5ba31 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Thu, 11 Apr 2024 10:14:49 +0100 Subject: [PATCH 4/5] Update n3fit_data.py --- validphys2/src/validphys/n3fit_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 9d13436cbc..6212edb285 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -364,7 +364,7 @@ def pseudodata_table(groups_replicas_indexed_make_replica, replicas): # in that order. What this means is that groups_replicas_indexed_make_replica is a list of size # number_of_replicas x number_of_data_groups. Where the ordering inside the list is as follows: # [data1_rep1, data2_rep1, ..., datan_rep1, ..., data1_repn, data2_repn, ..., datan_repn]. - # + # To correctly put this into a single dataframe, we first need to know the number of # dataset_input groups there are for each replica groups_per_replica = int(len(groups_replicas_indexed_make_replica) / len(replicas)) From ddf770238e844b468d9896d549e06811ed3e943c Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Thu, 11 Apr 2024 10:17:26 +0100 Subject: [PATCH 5/5] Update validphys2/src/validphys/n3fit_data.py Co-authored-by: Juan M. Cruz-Martinez --- validphys2/src/validphys/n3fit_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 6212edb285..46d112e12c 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -367,7 +367,7 @@ def pseudodata_table(groups_replicas_indexed_make_replica, replicas): # To correctly put this into a single dataframe, we first need to know the number of # dataset_input groups there are for each replica - groups_per_replica = int(len(groups_replicas_indexed_make_replica) / len(replicas)) + groups_per_replica = len(groups_replicas_indexed_make_replica) // len(replicas) # then we make a list of pandas dataframes, each containing the pseudodata of all datasets # generated for a single replica df = [