From 5c2563008e1d2676a21d07f98951785b56dcf6a9 Mon Sep 17 00:00:00 2001 From: Andrea Barontini Date: Wed, 10 Apr 2024 16:40:27 +0200 Subject: [PATCH 1/5] Fix bugs reordering definitions --- validphys2/src/validphys/coredata.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index b3e8c21978..6eaaca6778 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -393,28 +393,37 @@ def export_data(self, buffer): ret = {"data_central": self.central_values.tolist()} yaml.safe_dump(ret, buffer) + def sort_definitions_by_treatment(definitions, orderkey): + sorted_definitions={} + for orderkey in orderkey: + for key in definitions: + if definitions[key]["treatment"] == orderkey: + sorted_definitions[key] = definitions[key] + return sorted_definitions + def export_uncertainties(self, buffer): """Exports the uncertainties defined by this commondata instance to the given buffer""" definitions = {} for idx, row in self.systype_table.iterrows(): - definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]} - + if row["name"] != "SKIP": + definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]} + orderkey = ["ADD", "MULT"] + sorted_definitions = sort_definitions_by_treatment(definitions, orderkey) bins = [] for idx, row in self.systematic_errors().iterrows(): tmp = {"stat": float(self.stat_errors[idx])} # Hope things come in the right order... - for key_name, val in zip(definitions, row): + for key_name, val in zip(sorted_definitions, row): tmp[key_name] = float(val) bins.append(tmp) - definitions["stat"] = { + sorted_definitions["stat"] = { "description": "Uncorrelated statistical uncertainties", "treatment": "ADD", "type": "UNCORR", } - - ret = {"definitions": definitions, "bins": bins} + ret = {"definitions": sorted_definitions, "bins": bins} yaml.safe_dump(ret, buffer) def export(self, folder_path): From 202f702c63a54c4b1f0093ba6d2a9f2b9bba69fe Mon Sep 17 00:00:00 2001 From: Andrea Barontini Date: Wed, 10 Apr 2024 16:44:57 +0200 Subject: [PATCH 2/5] Forgot self. --- validphys2/src/validphys/coredata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 6eaaca6778..2c1a32fa2b 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -408,7 +408,7 @@ def export_uncertainties(self, buffer): if row["name"] != "SKIP": definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]} orderkey = ["ADD", "MULT"] - sorted_definitions = sort_definitions_by_treatment(definitions, orderkey) + sorted_definitions = self.sort_definitions_by_treatment(definitions, orderkey) bins = [] for idx, row in self.systematic_errors().iterrows(): tmp = {"stat": float(self.stat_errors[idx])} From 1f3cd5ee8dec04a1e2e4a6e019039a79fe1c3101 Mon Sep 17 00:00:00 2001 From: Andrea Barontini Date: Wed, 10 Apr 2024 16:46:32 +0200 Subject: [PATCH 3/5] Forgot self again --- validphys2/src/validphys/coredata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 2c1a32fa2b..ed5a0d8b18 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -393,7 +393,7 @@ def export_data(self, buffer): ret = {"data_central": self.central_values.tolist()} yaml.safe_dump(ret, buffer) - def sort_definitions_by_treatment(definitions, orderkey): + def sort_definitions_by_treatment(self, definitions, orderkey): sorted_definitions={} for orderkey in orderkey: for key in definitions: From d489adfca4bd51931b600405bd91778983f694d8 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 10 Apr 2024 16:57:35 +0200 Subject: [PATCH 4/5] rebase and apply comments --- validphys2/src/validphys/coredata.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index ed5a0d8b18..479d0cc381 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -2,6 +2,7 @@ Data containers backed by Python managed memory (Numpy arrays and Pandas dataframes). """ + import dataclasses import logging from typing import Optional @@ -393,22 +394,18 @@ def export_data(self, buffer): ret = {"data_central": self.central_values.tolist()} yaml.safe_dump(ret, buffer) - def sort_definitions_by_treatment(self, definitions, orderkey): - sorted_definitions={} - for orderkey in orderkey: - for key in definitions: - if definitions[key]["treatment"] == orderkey: - sorted_definitions[key] = definitions[key] - return sorted_definitions - def export_uncertainties(self, buffer): """Exports the uncertainties defined by this commondata instance to the given buffer""" definitions = {} for idx, row in self.systype_table.iterrows(): if row["name"] != "SKIP": definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]} - orderkey = ["ADD", "MULT"] - sorted_definitions = self.sort_definitions_by_treatment(definitions, orderkey) + + # Order the definitions by treatment as ADD, MULT + # TODO: make it so that it corresponds to the original order exactly + sorted_definitions = { + k: v for k, v in sorted(definitions.items(), key=lambda item: item[1]["treatment"]) + } bins = [] for idx, row in self.systematic_errors().iterrows(): tmp = {"stat": float(self.stat_errors[idx])} From db8c45dca32899e16ed50dc1da49aec477ac6268 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 10 Apr 2024 17:57:08 +0200 Subject: [PATCH 5/5] add a test for loading-saving the data/uncertainties into the same covmat --- validphys2/src/validphys/coredata.py | 1 + .../validphys/tests/test_commondataparser.py | 54 ++++++++++++++++--- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 479d0cc381..aac4e6fe2d 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -436,3 +436,4 @@ def export(self, folder_path): # Export data and uncertainties self.export_data(data_path.open("w", encoding="utf-8")) self.export_uncertainties(unc_path.open("w", encoding="utf-8")) + return data_path, unc_path diff --git a/validphys2/src/validphys/tests/test_commondataparser.py b/validphys2/src/validphys/tests/test_commondataparser.py index 7bf492ea76..f6453885a5 100644 --- a/validphys2/src/validphys/tests/test_commondataparser.py +++ b/validphys2/src/validphys/tests/test_commondataparser.py @@ -1,10 +1,12 @@ -import pytest +import numpy as np import pandas as pd +import pytest from validphys.api import API from validphys.commondataparser import load_commondata +from validphys.covmats_utils import construct_covmat from validphys.loader import FallbackLoader as Loader -from validphys.tests.conftest import THEORYID_NEW, FIT +from validphys.tests.conftest import FIT, THEORYID_NEW def test_basic_commondata_loading(): @@ -19,7 +21,9 @@ def test_basic_commondata_loading(): assert isinstance(res.systype_table, pd.DataFrame) # Test a dataset with no systematics - emptysyscd = l.check_posset(theoryID=THEORYID_NEW, setname='NNPDF_POS_2P24GEV_XDQ', postlambda=1e-10) + emptysyscd = l.check_posset( + theoryID=THEORYID_NEW, setname='NNPDF_POS_2P24GEV_XDQ', postlambda=1e-10 + ) emptysysres = load_commondata(emptysyscd.commondata) assert emptysysres.nsys == 0 assert emptysysres.systype_table.empty is True @@ -33,9 +37,7 @@ def test_commondata_with_cuts(): loaded_cd = load_commondata(cd) fit_cuts = l.check_fit_cuts(fit=FIT, commondata=cd) - internal_cuts = l.check_internal_cuts( - cd, API.rules(theoryid=THEORYID_NEW, use_cuts="internal") - ) + internal_cuts = l.check_internal_cuts(cd, API.rules(theoryid=THEORYID_NEW, use_cuts="internal")) loaded_cd_fit_cuts = loaded_cd.with_cuts(fit_cuts) # We must do these - 1 subtractions due to the fact that cuts indexing @@ -61,3 +63,43 @@ def test_commondata_with_cuts(): bad_cuts = l.check_fit_cuts(fit=FIT, commondata=cd_bad) with pytest.raises(ValueError): loaded_cd.with_cuts(bad_cuts) + + +def test_commondata_load_write_load(tmp): + """Test that we can a commondata, write it down, and load it again""" + l = Loader() + + # Select a dataset that we know mixes ADD and MULT (so that the ordering is checked) + setname = "ATLAS_2JET_7TEV_R06_M12Y" + # And a complicated variant + variant = "legacy" + + # Get a reference to the commondata + cd = l.check_commondata(setname=setname, variant=variant) + + # Load it up, save the covmat, and write it down + original_data = cd.load() + data_path, unc_path = original_data.export(tmp) + + # Now, reload it with the new data/unc paths + new_data = cd.with_modified_data(data_path, uncertainties_file=unc_path).load() + + # central value! + original_cv = original_data.central_values.to_numpy() + new_cv = new_data.central_values.to_numpy() + np.testing.assert_allclose(original_cv, new_cv) + + # stats! + original_stats = original_data.stat_errors.to_numpy() + new_stats = new_data.stat_errors.to_numpy() + np.testing.assert_allclose(original_stats, new_stats) + + # Create fake data in order to check whether the covmats are truly the same + # the fake data ensures that the MULT and ADD are treated in the same way in both + # otherwise, since the data is saved wrt to the original central value, the test will always pass + fake_unc = np.diag(construct_covmat(original_stats, original_data.systematic_errors())) + fake_data = np.random.rand(len(original_stats)) * fake_unc + original_cv + + new_covmat = construct_covmat(new_stats, new_data.systematic_errors(fake_data)) + original_covmat = construct_covmat(original_stats, original_data.systematic_errors(fake_data)) + np.testing.assert_allclose(new_covmat, original_covmat)