From 5c2563008e1d2676a21d07f98951785b56dcf6a9 Mon Sep 17 00:00:00 2001
From: Andrea Barontini <andreab1997@yahoo.it>
Date: Wed, 10 Apr 2024 16:40:27 +0200
Subject: [PATCH 1/5] Fix bugs reordering definitions

---
 validphys2/src/validphys/coredata.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py
index b3e8c21978..6eaaca6778 100644
--- a/validphys2/src/validphys/coredata.py
+++ b/validphys2/src/validphys/coredata.py
@@ -393,28 +393,37 @@ def export_data(self, buffer):
         ret = {"data_central": self.central_values.tolist()}
         yaml.safe_dump(ret, buffer)
 
+    def sort_definitions_by_treatment(definitions, orderkey):
+        sorted_definitions={}
+        for orderkey in orderkey:
+            for key in definitions:
+                if definitions[key]["treatment"] == orderkey:
+                    sorted_definitions[key] = definitions[key]
+        return sorted_definitions
+
     def export_uncertainties(self, buffer):
         """Exports the uncertainties defined by this commondata instance to the given buffer"""
         definitions = {}
         for idx, row in self.systype_table.iterrows():
-            definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]}
-
+            if row["name"] != "SKIP":
+                definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]}
+        orderkey = ["ADD", "MULT"]
+        sorted_definitions = sort_definitions_by_treatment(definitions, orderkey)
         bins = []
         for idx, row in self.systematic_errors().iterrows():
             tmp = {"stat": float(self.stat_errors[idx])}
             # Hope things come in the right order...
-            for key_name, val in zip(definitions, row):
+            for key_name, val in zip(sorted_definitions, row):
                 tmp[key_name] = float(val)
 
             bins.append(tmp)
 
-        definitions["stat"] = {
+        sorted_definitions["stat"] = {
             "description": "Uncorrelated statistical uncertainties",
             "treatment": "ADD",
             "type": "UNCORR",
         }
-
-        ret = {"definitions": definitions, "bins": bins}
+        ret = {"definitions": sorted_definitions, "bins": bins}
         yaml.safe_dump(ret, buffer)
 
     def export(self, folder_path):

From 202f702c63a54c4b1f0093ba6d2a9f2b9bba69fe Mon Sep 17 00:00:00 2001
From: Andrea Barontini <andreab1997@yahoo.it>
Date: Wed, 10 Apr 2024 16:44:57 +0200
Subject: [PATCH 2/5] Forgot self.

---
 validphys2/src/validphys/coredata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py
index 6eaaca6778..2c1a32fa2b 100644
--- a/validphys2/src/validphys/coredata.py
+++ b/validphys2/src/validphys/coredata.py
@@ -408,7 +408,7 @@ def export_uncertainties(self, buffer):
             if row["name"] != "SKIP":
                 definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]}
         orderkey = ["ADD", "MULT"]
-        sorted_definitions = sort_definitions_by_treatment(definitions, orderkey)
+        sorted_definitions = self.sort_definitions_by_treatment(definitions, orderkey)
         bins = []
         for idx, row in self.systematic_errors().iterrows():
             tmp = {"stat": float(self.stat_errors[idx])}

From 1f3cd5ee8dec04a1e2e4a6e019039a79fe1c3101 Mon Sep 17 00:00:00 2001
From: Andrea Barontini <andreab1997@yahoo.it>
Date: Wed, 10 Apr 2024 16:46:32 +0200
Subject: [PATCH 3/5] Forgot self again

---
 validphys2/src/validphys/coredata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py
index 2c1a32fa2b..ed5a0d8b18 100644
--- a/validphys2/src/validphys/coredata.py
+++ b/validphys2/src/validphys/coredata.py
@@ -393,7 +393,7 @@ def export_data(self, buffer):
         ret = {"data_central": self.central_values.tolist()}
         yaml.safe_dump(ret, buffer)
 
-    def sort_definitions_by_treatment(definitions, orderkey):
+    def sort_definitions_by_treatment(self, definitions, orderkey):
         sorted_definitions={}
         for orderkey in orderkey:
             for key in definitions:

From d489adfca4bd51931b600405bd91778983f694d8 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Wed, 10 Apr 2024 16:57:35 +0200
Subject: [PATCH 4/5] rebase and apply comments

---
 validphys2/src/validphys/coredata.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py
index ed5a0d8b18..479d0cc381 100644
--- a/validphys2/src/validphys/coredata.py
+++ b/validphys2/src/validphys/coredata.py
@@ -2,6 +2,7 @@
 Data containers backed by Python managed memory (Numpy arrays and Pandas
 dataframes).
 """
+
 import dataclasses
 import logging
 from typing import Optional
@@ -393,22 +394,18 @@ def export_data(self, buffer):
         ret = {"data_central": self.central_values.tolist()}
         yaml.safe_dump(ret, buffer)
 
-    def sort_definitions_by_treatment(self, definitions, orderkey):
-        sorted_definitions={}
-        for orderkey in orderkey:
-            for key in definitions:
-                if definitions[key]["treatment"] == orderkey:
-                    sorted_definitions[key] = definitions[key]
-        return sorted_definitions
-
     def export_uncertainties(self, buffer):
         """Exports the uncertainties defined by this commondata instance to the given buffer"""
         definitions = {}
         for idx, row in self.systype_table.iterrows():
             if row["name"] != "SKIP":
                 definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]}
-        orderkey = ["ADD", "MULT"]
-        sorted_definitions = self.sort_definitions_by_treatment(definitions, orderkey)
+
+        # Order the definitions by treatment as ADD, MULT
+        # TODO: make it so that it corresponds to the original order exactly
+        sorted_definitions = {
+            k: v for k, v in sorted(definitions.items(), key=lambda item: item[1]["treatment"])
+        }
         bins = []
         for idx, row in self.systematic_errors().iterrows():
             tmp = {"stat": float(self.stat_errors[idx])}

From db8c45dca32899e16ed50dc1da49aec477ac6268 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Wed, 10 Apr 2024 17:57:08 +0200
Subject: [PATCH 5/5] add a test for loading-saving the data/uncertainties into
 the same covmat

---
 validphys2/src/validphys/coredata.py          |  1 +
 .../validphys/tests/test_commondataparser.py  | 54 ++++++++++++++++---
 2 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py
index 479d0cc381..aac4e6fe2d 100644
--- a/validphys2/src/validphys/coredata.py
+++ b/validphys2/src/validphys/coredata.py
@@ -436,3 +436,4 @@ def export(self, folder_path):
         # Export data and uncertainties
         self.export_data(data_path.open("w", encoding="utf-8"))
         self.export_uncertainties(unc_path.open("w", encoding="utf-8"))
+        return data_path, unc_path
diff --git a/validphys2/src/validphys/tests/test_commondataparser.py b/validphys2/src/validphys/tests/test_commondataparser.py
index 7bf492ea76..f6453885a5 100644
--- a/validphys2/src/validphys/tests/test_commondataparser.py
+++ b/validphys2/src/validphys/tests/test_commondataparser.py
@@ -1,10 +1,12 @@
-import pytest
+import numpy as np
 import pandas as pd
+import pytest
 
 from validphys.api import API
 from validphys.commondataparser import load_commondata
+from validphys.covmats_utils import construct_covmat
 from validphys.loader import FallbackLoader as Loader
-from validphys.tests.conftest import THEORYID_NEW, FIT
+from validphys.tests.conftest import FIT, THEORYID_NEW
 
 
 def test_basic_commondata_loading():
@@ -19,7 +21,9 @@ def test_basic_commondata_loading():
     assert isinstance(res.systype_table, pd.DataFrame)
 
     # Test a dataset with no systematics
-    emptysyscd = l.check_posset(theoryID=THEORYID_NEW, setname='NNPDF_POS_2P24GEV_XDQ', postlambda=1e-10)
+    emptysyscd = l.check_posset(
+        theoryID=THEORYID_NEW, setname='NNPDF_POS_2P24GEV_XDQ', postlambda=1e-10
+    )
     emptysysres = load_commondata(emptysyscd.commondata)
     assert emptysysres.nsys == 0
     assert emptysysres.systype_table.empty is True
@@ -33,9 +37,7 @@ def test_commondata_with_cuts():
     loaded_cd = load_commondata(cd)
 
     fit_cuts = l.check_fit_cuts(fit=FIT, commondata=cd)
-    internal_cuts = l.check_internal_cuts(
-        cd, API.rules(theoryid=THEORYID_NEW, use_cuts="internal")
-    )
+    internal_cuts = l.check_internal_cuts(cd, API.rules(theoryid=THEORYID_NEW, use_cuts="internal"))
 
     loaded_cd_fit_cuts = loaded_cd.with_cuts(fit_cuts)
     # We must do these - 1 subtractions due to the fact that cuts indexing
@@ -61,3 +63,43 @@ def test_commondata_with_cuts():
     bad_cuts = l.check_fit_cuts(fit=FIT, commondata=cd_bad)
     with pytest.raises(ValueError):
         loaded_cd.with_cuts(bad_cuts)
+
+
+def test_commondata_load_write_load(tmp):
+    """Test that we can a commondata, write it down, and load it again"""
+    l = Loader()
+
+    # Select a dataset that we know mixes ADD and MULT (so that the ordering is checked)
+    setname = "ATLAS_2JET_7TEV_R06_M12Y"
+    # And a complicated variant
+    variant = "legacy"
+
+    # Get a reference to the commondata
+    cd = l.check_commondata(setname=setname, variant=variant)
+
+    # Load it up, save the covmat, and write it down
+    original_data = cd.load()
+    data_path, unc_path = original_data.export(tmp)
+
+    # Now, reload it with the new data/unc paths
+    new_data = cd.with_modified_data(data_path, uncertainties_file=unc_path).load()
+
+    # central value!
+    original_cv = original_data.central_values.to_numpy()
+    new_cv = new_data.central_values.to_numpy()
+    np.testing.assert_allclose(original_cv, new_cv)
+
+    # stats!
+    original_stats = original_data.stat_errors.to_numpy()
+    new_stats = new_data.stat_errors.to_numpy()
+    np.testing.assert_allclose(original_stats, new_stats)
+
+    # Create fake data in order to check whether the covmats are truly the same
+    # the fake data ensures that the MULT and ADD are treated in the same way in both
+    # otherwise, since the data is saved wrt to the original central value, the test will always pass
+    fake_unc = np.diag(construct_covmat(original_stats, original_data.systematic_errors()))
+    fake_data = np.random.rand(len(original_stats)) * fake_unc + original_cv
+
+    new_covmat = construct_covmat(new_stats, new_data.systematic_errors(fake_data))
+    original_covmat = construct_covmat(original_stats, original_data.systematic_errors(fake_data))
+    np.testing.assert_allclose(new_covmat, original_covmat)