From 5432e682fefb664a257f24775bc70beaea27331d Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Wed, 3 Apr 2024 14:46:50 +0100
Subject: [PATCH 1/5] make pseudodata_table correctly deal with multiple
 replicas

---
 validphys2/src/validphys/n3fit_data.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index ced700f98f..673fab2706 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -4,6 +4,7 @@
 Providers which prepare the data ready for
 :py:func:`n3fit.performfit.performfit`.
 """
+
 from collections import defaultdict
 import functools
 import hashlib
@@ -70,8 +71,7 @@ def __init__(self, group_name, seed, masks=None):
         super().__init__(group_name, seed)
 
     def __iter__(self):
-        for m in self.masks:
-            yield m
+        yield from self.masks
 
 
 def tr_masks(data, replica_trvlseed, parallel_models=False, replica=1, replicas=(1,)):
@@ -343,7 +343,7 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn
 
 replicas_nnseed_fitting_data_dict = collect("replica_nnseed_fitting_data_dict", ("replicas",))
 groups_replicas_indexed_make_replica = collect(
-    "indexed_make_replica", ("group_dataset_inputs_by_experiment", "replicas")
+    "indexed_make_replica", ("replicas", "group_dataset_inputs_by_experiment")
 )
 
 
@@ -359,10 +359,20 @@ def pseudodata_table(groups_replicas_indexed_make_replica, replicas):
     `fitting::savepseudodata` is `true` (as per the default setting) and
     replicas are fitted one at a time. The table can be found in the replica
     folder i.e. <fit dir>/nnfit/replica_*/
-
     """
-    # Concatenate over replicas
-    df = pd.concat(groups_replicas_indexed_make_replica)
+    # groups_replicas_indexed_make_replica is collected over both replicas and dataset_input groups
+    # to correctly put this into a single dataframe, we first need to know the number of
+    # dataset_input groups there are for each replica
+    groups_per_replica = int(len(groups_replicas_indexed_make_replica) / len(replicas))
+    # then we make a list of pandas dataframes, each containing the pseudodata of all datasets
+    # generated for a singel replica
+    df = [
+        pd.concat(groups_replicas_indexed_make_replica[i : i + groups_per_replica])
+        for i in range(0, len(groups_replicas_indexed_make_replica), groups_per_replica)
+    ]
+    # then we concatentate the pseudodata of all replicas into a single dataframe
+    df = pd.concat(df, axis=1)
+    # and finally we add as column titles the replica name
     df.columns = [f"replica {rep}" for rep in replicas]
     return df
 

From 4040eb323a3a9d129ea3b287790b74ad1242132e Mon Sep 17 00:00:00 2001
From: Roy Stegeman <roystegeman@live.nl>
Date: Thu, 11 Apr 2024 09:58:25 +0100
Subject: [PATCH 2/5] Update validphys2/src/validphys/n3fit_data.py

Co-authored-by: Juan M. Cruz-Martinez <juacrumar@lairen.eu>
---
 validphys2/src/validphys/n3fit_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 673fab2706..b571f4f269 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -365,7 +365,7 @@ def pseudodata_table(groups_replicas_indexed_make_replica, replicas):
     # dataset_input groups there are for each replica
     groups_per_replica = int(len(groups_replicas_indexed_make_replica) / len(replicas))
     # then we make a list of pandas dataframes, each containing the pseudodata of all datasets
-    # generated for a singel replica
+    # generated for a single replica
     df = [
         pd.concat(groups_replicas_indexed_make_replica[i : i + groups_per_replica])
         for i in range(0, len(groups_replicas_indexed_make_replica), groups_per_replica)

From 1b46e3b5e5804de9f77160a88afb57e96ae53e92 Mon Sep 17 00:00:00 2001
From: RoyStegeman <roystegeman@live.nl>
Date: Thu, 11 Apr 2024 10:05:04 +0100
Subject: [PATCH 3/5] clarify inline comment in pseudodata_table

---
 validphys2/src/validphys/n3fit_data.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index b571f4f269..9d13436cbc 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -360,8 +360,12 @@ def pseudodata_table(groups_replicas_indexed_make_replica, replicas):
     replicas are fitted one at a time. The table can be found in the replica
     folder i.e. <fit dir>/nnfit/replica_*/
     """
-    # groups_replicas_indexed_make_replica is collected over both replicas and dataset_input groups
-    # to correctly put this into a single dataframe, we first need to know the number of
+    # groups_replicas_indexed_make_replica is collected over both replicas and dataset_input groups,
+    # in that order. What this means is that groups_replicas_indexed_make_replica is a list of size
+    # number_of_replicas x number_of_data_groups. Where the ordering inside the list is as follows:
+    # [data1_rep1, data2_rep1, ..., datan_rep1, ..., data1_repn, data2_repn, ..., datan_repn].
+    #
+    # To correctly put this into a single dataframe, we first need to know the number of
     # dataset_input groups there are for each replica
     groups_per_replica = int(len(groups_replicas_indexed_make_replica) / len(replicas))
     # then we make a list of pandas dataframes, each containing the pseudodata of all datasets

From 2722aa7168b589b4f249bd75d6645abab4d5ba31 Mon Sep 17 00:00:00 2001
From: Roy Stegeman <roystegeman@live.nl>
Date: Thu, 11 Apr 2024 10:14:49 +0100
Subject: [PATCH 4/5] Update n3fit_data.py

---
 validphys2/src/validphys/n3fit_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 9d13436cbc..6212edb285 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -364,7 +364,7 @@ def pseudodata_table(groups_replicas_indexed_make_replica, replicas):
     # in that order. What this means is that groups_replicas_indexed_make_replica is a list of size
     # number_of_replicas x number_of_data_groups. Where the ordering inside the list is as follows:
     # [data1_rep1, data2_rep1, ..., datan_rep1, ..., data1_repn, data2_repn, ..., datan_repn].
-    #
+
     # To correctly put this into a single dataframe, we first need to know the number of
     # dataset_input groups there are for each replica
     groups_per_replica = int(len(groups_replicas_indexed_make_replica) / len(replicas))

From ddf770238e844b468d9896d549e06811ed3e943c Mon Sep 17 00:00:00 2001
From: Roy Stegeman <roystegeman@live.nl>
Date: Thu, 11 Apr 2024 10:17:26 +0100
Subject: [PATCH 5/5] Update validphys2/src/validphys/n3fit_data.py

Co-authored-by: Juan M. Cruz-Martinez <juacrumar@lairen.eu>
---
 validphys2/src/validphys/n3fit_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 6212edb285..46d112e12c 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -367,7 +367,7 @@ def pseudodata_table(groups_replicas_indexed_make_replica, replicas):
 
     # To correctly put this into a single dataframe, we first need to know the number of
     # dataset_input groups there are for each replica
-    groups_per_replica = int(len(groups_replicas_indexed_make_replica) / len(replicas))
+    groups_per_replica = len(groups_replicas_indexed_make_replica) // len(replicas)
     # then we make a list of pandas dataframes, each containing the pseudodata of all datasets
     # generated for a single replica
     df = [