From d2cf7419f528e965e672518d4e7aee846b8fbf5f Mon Sep 17 00:00:00 2001
From: Francho Nerin Fonz
 <francho.nerin-fonz@compchem-3d-24.compchem.heptares.co.uk>
Date: Thu, 4 Jul 2024 11:40:50 +0100
Subject: [PATCH 1/7] RBFE protocol changes to use MBAR error for single PU
 repeat

---
 .../protocols/openmm_rfe/equil_rfe_methods.py | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
index 545dc02f5..09349b1d3 100644
--- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py
+++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
@@ -278,15 +278,20 @@ def get_estimate(self) -> unit.Quantity:
 
     def get_uncertainty(self) -> unit.Quantity:
         """The uncertainty/error in the dG value: The std of the estimates of
-        each independent repeat
+        each independent repeat or the MBAR estimate error for a single repeat
         """
-        dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()]
-        u = dGs[0].u
-        # convert all values to units of the first value, then take average of magnitude
-        # this would avoid a screwy case where each value was in different units
-        vals = [dG.to(u).m for dG in dGs]
-
-        return np.std(vals) * u
+        if len(self.data.values()) > 1:
+            dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()]
+            u = dGs[0].u
+            # convert all values to units of the first value, then take average of magnitude
+            # this would avoid a screwy case where each value was in different units
+            vals = [dG.to(u).m for dG in dGs]
+            unc = np.std(vals) * u
+        else:
+            uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()]
+            assert len(uncs) == 1
+            unc = uncs[0]
+        return unc
 
     def get_individual_estimates(self) -> list[tuple[unit.Quantity, unit.Quantity]]:
         """Return a list of tuples containing the individual free energy

From 61eec4d5bc596feb6356e3beb7e91dc05142d0cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= <fnerin@bioacademy.gr>
Date: Thu, 4 Jul 2024 14:36:22 +0100
Subject: [PATCH 2/7] added comments

---
 openfe/protocols/openmm_rfe/equil_rfe_methods.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
index 09349b1d3..464cbccb6 100644
--- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py
+++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
@@ -288,8 +288,9 @@ def get_uncertainty(self) -> unit.Quantity:
             vals = [dG.to(u).m for dG in dGs]
             unc = np.std(vals) * u
         else:
+            # use MBAR estimate error directly for a single repeat
             uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()]
-            assert len(uncs) == 1
+            assert len(uncs) == 1, "Protocol unit estimates and errors number mismatch"
             unc = uncs[0]
         return unc
 

From 8ec28a7a1610d8220402ffde876d2dd3c9a688a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= <fnerin@bioacademy.gr>
Date: Thu, 4 Jul 2024 14:51:25 +0100
Subject: [PATCH 3/7] fixed pep8 compliant line lengths in touched lines

---
 openfe/protocols/openmm_rfe/equil_rfe_methods.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
index 464cbccb6..f50a5f60b 100644
--- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py
+++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
@@ -281,16 +281,22 @@ def get_uncertainty(self) -> unit.Quantity:
         each independent repeat or the MBAR estimate error for a single repeat
         """
         if len(self.data.values()) > 1:
-            dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()]
+            dGs = [
+                pus[0].outputs['unit_estimate'] for pus in self.data.values()
+            ]
             u = dGs[0].u
-            # convert all values to units of the first value, then take average of magnitude
-            # this would avoid a screwy case where each value was in different units
+            # convert all values to units of the first value, then take 
+            # average of magnitude. this would avoid a screwy case where each
+            # value was in different units
             vals = [dG.to(u).m for dG in dGs]
             unc = np.std(vals) * u
         else:
             # use MBAR estimate error directly for a single repeat
-            uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()]
-            assert len(uncs) == 1, "Protocol unit estimates and errors number mismatch"
+            uncs = [
+                pus[0].outputs['unit_estimate_error'] 
+                for pus in self.data.values()
+            ]
+            assert len(uncs) == 1, "Protocols and number of errors mismatch"
             unc = uncs[0]
         return unc
 

From c2aefd4d72609740e8eee8d7f93907be1de8365a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= <fnerin@bioacademy.gr>
Date: Thu, 4 Jul 2024 16:07:00 +0100
Subject: [PATCH 4/7] Revert changes

This reverts commit 8ec28a7a1610d8220402ffde876d2dd3c9a688a2.
---
 openfe/protocols/openmm_rfe/equil_rfe_methods.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
index f50a5f60b..464cbccb6 100644
--- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py
+++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
@@ -281,22 +281,16 @@ def get_uncertainty(self) -> unit.Quantity:
         each independent repeat or the MBAR estimate error for a single repeat
         """
         if len(self.data.values()) > 1:
-            dGs = [
-                pus[0].outputs['unit_estimate'] for pus in self.data.values()
-            ]
+            dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()]
             u = dGs[0].u
-            # convert all values to units of the first value, then take 
-            # average of magnitude. this would avoid a screwy case where each
-            # value was in different units
+            # convert all values to units of the first value, then take average of magnitude
+            # this would avoid a screwy case where each value was in different units
             vals = [dG.to(u).m for dG in dGs]
             unc = np.std(vals) * u
         else:
             # use MBAR estimate error directly for a single repeat
-            uncs = [
-                pus[0].outputs['unit_estimate_error'] 
-                for pus in self.data.values()
-            ]
-            assert len(uncs) == 1, "Protocols and number of errors mismatch"
+            uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()]
+            assert len(uncs) == 1, "Protocol unit estimates and errors number mismatch"
             unc = uncs[0]
         return unc
 

From 2dc5c036e30f5b0de094e99186c072e4bc8cec0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= <fnerin@bioacademy.gr>
Date: Thu, 4 Jul 2024 16:07:26 +0100
Subject: [PATCH 5/7] Revert "added comments"

This reverts commit 61eec4d5bc596feb6356e3beb7e91dc05142d0cc.
---
 openfe/protocols/openmm_rfe/equil_rfe_methods.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
index 464cbccb6..09349b1d3 100644
--- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py
+++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
@@ -288,9 +288,8 @@ def get_uncertainty(self) -> unit.Quantity:
             vals = [dG.to(u).m for dG in dGs]
             unc = np.std(vals) * u
         else:
-            # use MBAR estimate error directly for a single repeat
             uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()]
-            assert len(uncs) == 1, "Protocol unit estimates and errors number mismatch"
+            assert len(uncs) == 1
             unc = uncs[0]
         return unc
 

From 13e8acb472cd1799abe3956816d304dd0cd01621 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= <fnerin@bioacademy.gr>
Date: Thu, 4 Jul 2024 16:09:52 +0100
Subject: [PATCH 6/7] Revert "RBFE protocol changes to use MBAR error for
 single PU repeat"

This reverts commit d2cf7419f528e965e672518d4e7aee846b8fbf5f.
---
 .../protocols/openmm_rfe/equil_rfe_methods.py | 21 +++++++------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
index 09349b1d3..545dc02f5 100644
--- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py
+++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py
@@ -278,20 +278,15 @@ def get_estimate(self) -> unit.Quantity:
 
     def get_uncertainty(self) -> unit.Quantity:
         """The uncertainty/error in the dG value: The std of the estimates of
-        each independent repeat or the MBAR estimate error for a single repeat
+        each independent repeat
         """
-        if len(self.data.values()) > 1:
-            dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()]
-            u = dGs[0].u
-            # convert all values to units of the first value, then take average of magnitude
-            # this would avoid a screwy case where each value was in different units
-            vals = [dG.to(u).m for dG in dGs]
-            unc = np.std(vals) * u
-        else:
-            uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()]
-            assert len(uncs) == 1
-            unc = uncs[0]
-        return unc
+        dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()]
+        u = dGs[0].u
+        # convert all values to units of the first value, then take average of magnitude
+        # this would avoid a screwy case where each value was in different units
+        vals = [dG.to(u).m for dG in dGs]
+
+        return np.std(vals) * u
 
     def get_individual_estimates(self) -> list[tuple[unit.Quantity, unit.Quantity]]:
         """Return a list of tuples containing the individual free energy

From 3323df5f65d46b5237ec432f4b0d08c5e7488933 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= <fnerin@bioacademy.gr>
Date: Fri, 5 Jul 2024 10:29:16 +0100
Subject: [PATCH 7/7] Use of MBAR estim. errors in gather for single protocol
 repeats and corrected raw report

---
 openfecli/commands/gather.py | 43 ++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py
index 22bef88ea..e835f26cf 100644
--- a/openfecli/commands/gather.py
+++ b/openfecli/commands/gather.py
@@ -131,10 +131,11 @@ def _generate_bad_legs_error_message(set_vals, ligpair):
 def _parse_raw_units(results: dict) -> list[tuple]:
     # grab individual unit results from master results dict
     # returns list of (estimate, uncertainty) tuples
-    list_of_pur = list(results['protocol_result']['data'].values())[0]
+    list_of_pur = list(results['protocol_result']['data'].values())
 
-    return [(pu['outputs']['unit_estimate'],
-             pu['outputs']['unit_estimate_error'])
+    # could add to each tuple pu[0]["source_key"] for ID
+    return [(pu[0]['outputs']['unit_estimate'],
+             pu[0]['outputs']['unit_estimate_error'])
             for pu in list_of_pur]
 
 
@@ -178,10 +179,10 @@ def _get_ddgs(legs, error_on_missing=True):
     return DDGs
 
 
-def _write_ddg(legs, writer, allow_partial):
+def _write_ddg(legs, writer, allow_partial): # unc
     DDGs = _get_ddgs(legs, error_on_missing=not allow_partial)
     writer.writerow(["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)",
-                     "uncertainty (kcal/mol)"])
+                     "uncertainty (kcal/mol)"]) # unc])
     for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs:
         if DDGbind is not None:
             DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, bind_unc)
@@ -191,19 +192,19 @@ def _write_ddg(legs, writer, allow_partial):
             writer.writerow([ligA, ligB, DDGhyd, hyd_unc])
 
 
-def _write_raw(legs, writer, allow_partial=True):
-    writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)",
-                     "MBAR uncertainty (kcal/mol)"])
+def _write_raw(legs, writer, allow_partial=True): # *args?
+    writer.writerow(["leg", "repeat", "ligand_i", "ligand_j", 
+                     "DG(i->j) (kcal/mol)", "MBAR uncertainty (kcal/mol)"])
 
     for ligpair, vals in sorted(legs.items()):
         for simtype, repeats in sorted(vals.items()):
-            for m, u in repeats:
+            for rep, (m, u) in enumerate(repeats, 1):
                 if m is None:
                     m, u = 'NaN', 'NaN'
                 else:
                     m, u = format_estimate_uncertainty(m.m, u.m)
 
-                writer.writerow([simtype, *ligpair, m, u])
+                writer.writerow([simtype, rep, *ligpair, m, u])
 
 
 def _write_dg_raw(legs, writer, allow_partial):  # pragma: no-cover
@@ -218,7 +219,7 @@ def _write_dg_raw(legs, writer, allow_partial):  # pragma: no-cover
             writer.writerow([simtype, *ligpair, m, u])
 
 
-def _write_dg_mle(legs, writer, allow_partial):
+def _write_dg_mle(legs, writer, allow_partial): # unc
     import networkx as nx
     import numpy as np
     from cinnabar.stats import mle
@@ -264,7 +265,7 @@ def _write_dg_mle(legs, writer, allow_partial):
             MLEs.append((ligname, f, df))
 
     writer.writerow(["ligand", "DG(MLE) (kcal/mol)",
-                     "uncertainty (kcal/mol)"])
+                     "uncertainty (kcal/mol)"]) # unc])
     for ligA, DG, unc_DG in MLEs:
         DG, unc_DG = format_estimate_uncertainty(DG, unc_DG)
         writer.writerow([ligA, DG, unc_DG])
@@ -336,6 +337,9 @@ def gather(rootdir, output, report, allow_partial):
     # 3) pair legs of simulations together into dict of dicts
     legs = defaultdict(dict)
 
+    ######## CHECK IF ALL RESULTS HAVE SAME # OF PROTOCOLUNITS?
+    # MBAR_errors = True
+
     for result_fn in result_fns:
         result = load_results(result_fn)
         if result is None:
@@ -344,6 +348,8 @@ def gather(rootdir, output, report, allow_partial):
             click.echo(f"WARNING: Calculations for {result_fn} did not finish successfully!",
                        err=True)
 
+        
+
         try:
             names = get_names(result)
         except KeyError:
@@ -353,8 +359,15 @@ def gather(rootdir, output, report, allow_partial):
         except KeyError:
             simtype = legacy_get_type(result_fn)
 
+        raw_units = _parse_raw_units(result)
+        ######## CHECK IF ALL RESULTS HAVE SAME # OF PROTOCOLUNITS?
+        # if MBAR_errors and len(raw_units) > 1:
+        #     MBAR_errors = False
+        
         if report.lower() == 'raw':
-            legs[names][simtype] = _parse_raw_units(result)
+            legs[names][simtype] = raw_units
+        elif len(raw_units) == 1:
+            legs[names][simtype] = raw_units[0]
         else:
             legs[names][simtype] = result['estimate'], result['uncertainty']
 
@@ -364,6 +377,8 @@ def gather(rootdir, output, report, allow_partial):
         lineterminator="\n",  # to exactly reproduce previous, prefer "\r\n"
     )
 
+    # unc = "MBAR uncertainty (kcal/mol)" if MBAR_errors else "uncertainty (kcal/mol)"
+    
     # 5a) write out MLE values
     # 5b) write out DDG values
     # 5c) write out each leg
@@ -373,7 +388,7 @@ def gather(rootdir, output, report, allow_partial):
         #  'dg-raw': _write_dg_raw,
         'raw': _write_raw,
     }[report.lower()]
-    writing_func(legs, writer, allow_partial)
+    writing_func(legs, writer, allow_partial) # , unc)
 
 
 PLUGIN = OFECommandPlugin(