From d2cf7419f528e965e672518d4e7aee846b8fbf5f Mon Sep 17 00:00:00 2001 From: Francho Nerin Fonz Date: Thu, 4 Jul 2024 11:40:50 +0100 Subject: [PATCH 1/7] RBFE protocol changes to use MBAR error for single PU repeat --- .../protocols/openmm_rfe/equil_rfe_methods.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py index 545dc02f5..09349b1d3 100644 --- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py +++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py @@ -278,15 +278,20 @@ def get_estimate(self) -> unit.Quantity: def get_uncertainty(self) -> unit.Quantity: """The uncertainty/error in the dG value: The std of the estimates of - each independent repeat + each independent repeat or the MBAR estimate error for a single repeat """ - dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()] - u = dGs[0].u - # convert all values to units of the first value, then take average of magnitude - # this would avoid a screwy case where each value was in different units - vals = [dG.to(u).m for dG in dGs] - - return np.std(vals) * u + if len(self.data.values()) > 1: + dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()] + u = dGs[0].u + # convert all values to units of the first value, then take average of magnitude + # this would avoid a screwy case where each value was in different units + vals = [dG.to(u).m for dG in dGs] + unc = np.std(vals) * u + else: + uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()] + assert len(uncs) == 1 + unc = uncs[0] + return unc def get_individual_estimates(self) -> list[tuple[unit.Quantity, unit.Quantity]]: """Return a list of tuples containing the individual free energy From 61eec4d5bc596feb6356e3beb7e91dc05142d0cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= Date: Thu, 4 Jul 2024 14:36:22 +0100 Subject: [PATCH 2/7] added comments --- openfe/protocols/openmm_rfe/equil_rfe_methods.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py index 09349b1d3..464cbccb6 100644 --- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py +++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py @@ -288,8 +288,9 @@ def get_uncertainty(self) -> unit.Quantity: vals = [dG.to(u).m for dG in dGs] unc = np.std(vals) * u else: + # use MBAR estimate error directly for a single repeat uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()] - assert len(uncs) == 1 + assert len(uncs) == 1, "Protocol unit estimates and errors number mismatch" unc = uncs[0] return unc From 8ec28a7a1610d8220402ffde876d2dd3c9a688a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= Date: Thu, 4 Jul 2024 14:51:25 +0100 Subject: [PATCH 3/7] fixed pep8 compliant line lengths in touched lines --- openfe/protocols/openmm_rfe/equil_rfe_methods.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py index 464cbccb6..f50a5f60b 100644 --- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py +++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py @@ -281,16 +281,22 @@ def get_uncertainty(self) -> unit.Quantity: each independent repeat or the MBAR estimate error for a single repeat """ if len(self.data.values()) > 1: - dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()] + dGs = [ + pus[0].outputs['unit_estimate'] for pus in self.data.values() + ] u = dGs[0].u - # convert all values to units of the first value, then take average of magnitude - # this would avoid a screwy case where each value was in different units + # convert all values to units of the first value, then take + # average of magnitude. this would avoid a screwy case where each + # value was in different units vals = [dG.to(u).m for dG in dGs] unc = np.std(vals) * u else: # use MBAR estimate error directly for a single repeat - uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()] - assert len(uncs) == 1, "Protocol unit estimates and errors number mismatch" + uncs = [ + pus[0].outputs['unit_estimate_error'] + for pus in self.data.values() + ] + assert len(uncs) == 1, "Protocols and number of errors mismatch" unc = uncs[0] return unc From c2aefd4d72609740e8eee8d7f93907be1de8365a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= Date: Thu, 4 Jul 2024 16:07:00 +0100 Subject: [PATCH 4/7] Revert changes This reverts commit 8ec28a7a1610d8220402ffde876d2dd3c9a688a2. --- openfe/protocols/openmm_rfe/equil_rfe_methods.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py index f50a5f60b..464cbccb6 100644 --- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py +++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py @@ -281,22 +281,16 @@ def get_uncertainty(self) -> unit.Quantity: each independent repeat or the MBAR estimate error for a single repeat """ if len(self.data.values()) > 1: - dGs = [ - pus[0].outputs['unit_estimate'] for pus in self.data.values() - ] + dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()] u = dGs[0].u - # convert all values to units of the first value, then take - # average of magnitude. this would avoid a screwy case where each - # value was in different units + # convert all values to units of the first value, then take average of magnitude + # this would avoid a screwy case where each value was in different units vals = [dG.to(u).m for dG in dGs] unc = np.std(vals) * u else: # use MBAR estimate error directly for a single repeat - uncs = [ - pus[0].outputs['unit_estimate_error'] - for pus in self.data.values() - ] - assert len(uncs) == 1, "Protocols and number of errors mismatch" + uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()] + assert len(uncs) == 1, "Protocol unit estimates and errors number mismatch" unc = uncs[0] return unc From 2dc5c036e30f5b0de094e99186c072e4bc8cec0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= Date: Thu, 4 Jul 2024 16:07:26 +0100 Subject: [PATCH 5/7] Revert "added comments" This reverts commit 61eec4d5bc596feb6356e3beb7e91dc05142d0cc. --- openfe/protocols/openmm_rfe/equil_rfe_methods.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py index 464cbccb6..09349b1d3 100644 --- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py +++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py @@ -288,9 +288,8 @@ def get_uncertainty(self) -> unit.Quantity: vals = [dG.to(u).m for dG in dGs] unc = np.std(vals) * u else: - # use MBAR estimate error directly for a single repeat uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()] - assert len(uncs) == 1, "Protocol unit estimates and errors number mismatch" + assert len(uncs) == 1 unc = uncs[0] return unc From 13e8acb472cd1799abe3956816d304dd0cd01621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= Date: Thu, 4 Jul 2024 16:09:52 +0100 Subject: [PATCH 6/7] Revert "RBFE protocol changes to use MBAR error for single PU repeat" This reverts commit d2cf7419f528e965e672518d4e7aee846b8fbf5f. --- .../protocols/openmm_rfe/equil_rfe_methods.py | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py index 09349b1d3..545dc02f5 100644 --- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py +++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py @@ -278,20 +278,15 @@ def get_estimate(self) -> unit.Quantity: def get_uncertainty(self) -> unit.Quantity: """The uncertainty/error in the dG value: The std of the estimates of - each independent repeat or the MBAR estimate error for a single repeat + each independent repeat """ - if len(self.data.values()) > 1: - dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()] - u = dGs[0].u - # convert all values to units of the first value, then take average of magnitude - # this would avoid a screwy case where each value was in different units - vals = [dG.to(u).m for dG in dGs] - unc = np.std(vals) * u - else: - uncs = [pus[0].outputs['unit_estimate_error'] for pus in self.data.values()] - assert len(uncs) == 1 - unc = uncs[0] - return unc + dGs = [pus[0].outputs['unit_estimate'] for pus in self.data.values()] + u = dGs[0].u + # convert all values to units of the first value, then take average of magnitude + # this would avoid a screwy case where each value was in different units + vals = [dG.to(u).m for dG in dGs] + + return np.std(vals) * u def get_individual_estimates(self) -> list[tuple[unit.Quantity, unit.Quantity]]: """Return a list of tuples containing the individual free energy From 3323df5f65d46b5237ec432f4b0d08c5e7488933 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francho=20Ner=C3=ADn=20Fonz?= Date: Fri, 5 Jul 2024 10:29:16 +0100 Subject: [PATCH 7/7] Use of MBAR estim. errors in gather for single protocol repeats and corrected raw report --- openfecli/commands/gather.py | 43 ++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index 22bef88ea..e835f26cf 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -131,10 +131,11 @@ def _generate_bad_legs_error_message(set_vals, ligpair): def _parse_raw_units(results: dict) -> list[tuple]: # grab individual unit results from master results dict # returns list of (estimate, uncertainty) tuples - list_of_pur = list(results['protocol_result']['data'].values())[0] + list_of_pur = list(results['protocol_result']['data'].values()) - return [(pu['outputs']['unit_estimate'], - pu['outputs']['unit_estimate_error']) + # could add to each tuple pu[0]["source_key"] for ID + return [(pu[0]['outputs']['unit_estimate'], + pu[0]['outputs']['unit_estimate_error']) for pu in list_of_pur] @@ -178,10 +179,10 @@ def _get_ddgs(legs, error_on_missing=True): return DDGs -def _write_ddg(legs, writer, allow_partial): +def _write_ddg(legs, writer, allow_partial): # unc DDGs = _get_ddgs(legs, error_on_missing=not allow_partial) writer.writerow(["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)", - "uncertainty (kcal/mol)"]) + "uncertainty (kcal/mol)"]) # unc]) for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs: if DDGbind is not None: DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, bind_unc) @@ -191,19 +192,19 @@ def _write_ddg(legs, writer, allow_partial): writer.writerow([ligA, ligB, DDGhyd, hyd_unc]) -def _write_raw(legs, writer, allow_partial=True): - writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)", - "MBAR uncertainty (kcal/mol)"]) +def _write_raw(legs, writer, allow_partial=True): # *args? + writer.writerow(["leg", "repeat", "ligand_i", "ligand_j", + "DG(i->j) (kcal/mol)", "MBAR uncertainty (kcal/mol)"]) for ligpair, vals in sorted(legs.items()): for simtype, repeats in sorted(vals.items()): - for m, u in repeats: + for rep, (m, u) in enumerate(repeats, 1): if m is None: m, u = 'NaN', 'NaN' else: m, u = format_estimate_uncertainty(m.m, u.m) - writer.writerow([simtype, *ligpair, m, u]) + writer.writerow([simtype, rep, *ligpair, m, u]) def _write_dg_raw(legs, writer, allow_partial): # pragma: no-cover @@ -218,7 +219,7 @@ def _write_dg_raw(legs, writer, allow_partial): # pragma: no-cover writer.writerow([simtype, *ligpair, m, u]) -def _write_dg_mle(legs, writer, allow_partial): +def _write_dg_mle(legs, writer, allow_partial): # unc import networkx as nx import numpy as np from cinnabar.stats import mle @@ -264,7 +265,7 @@ def _write_dg_mle(legs, writer, allow_partial): MLEs.append((ligname, f, df)) writer.writerow(["ligand", "DG(MLE) (kcal/mol)", - "uncertainty (kcal/mol)"]) + "uncertainty (kcal/mol)"]) # unc]) for ligA, DG, unc_DG in MLEs: DG, unc_DG = format_estimate_uncertainty(DG, unc_DG) writer.writerow([ligA, DG, unc_DG]) @@ -336,6 +337,9 @@ def gather(rootdir, output, report, allow_partial): # 3) pair legs of simulations together into dict of dicts legs = defaultdict(dict) + ######## CHECK IF ALL RESULTS HAVE SAME # OF PROTOCOLUNITS? + # MBAR_errors = True + for result_fn in result_fns: result = load_results(result_fn) if result is None: @@ -344,6 +348,8 @@ def gather(rootdir, output, report, allow_partial): click.echo(f"WARNING: Calculations for {result_fn} did not finish successfully!", err=True) + + try: names = get_names(result) except KeyError: @@ -353,8 +359,15 @@ def gather(rootdir, output, report, allow_partial): except KeyError: simtype = legacy_get_type(result_fn) + raw_units = _parse_raw_units(result) + ######## CHECK IF ALL RESULTS HAVE SAME # OF PROTOCOLUNITS? + # if MBAR_errors and len(raw_units) > 1: + # MBAR_errors = False + if report.lower() == 'raw': - legs[names][simtype] = _parse_raw_units(result) + legs[names][simtype] = raw_units + elif len(raw_units) == 1: + legs[names][simtype] = raw_units[0] else: legs[names][simtype] = result['estimate'], result['uncertainty'] @@ -364,6 +377,8 @@ def gather(rootdir, output, report, allow_partial): lineterminator="\n", # to exactly reproduce previous, prefer "\r\n" ) + # unc = "MBAR uncertainty (kcal/mol)" if MBAR_errors else "uncertainty (kcal/mol)" + # 5a) write out MLE values # 5b) write out DDG values # 5c) write out each leg @@ -373,7 +388,7 @@ def gather(rootdir, output, report, allow_partial): # 'dg-raw': _write_dg_raw, 'raw': _write_raw, }[report.lower()] - writing_func(legs, writer, allow_partial) + writing_func(legs, writer, allow_partial) # , unc) PLUGIN = OFECommandPlugin(