From 88e3fa92be33388ddfbd81bb12754a21b2e765e4 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sat, 19 Jul 2025 08:32:37 +0100 Subject: [PATCH 1/5] Filter based on the number of found host idxs --- .../geometry/boresch/geometry.py | 6 +- .../restraint_utils/geometry/boresch/host.py | 136 +++++++++++++----- .../restraints/test_geometry_boresch_host.py | 39 +++-- 3 files changed, 131 insertions(+), 50 deletions(-) diff --git a/openfe/protocols/restraint_utils/geometry/boresch/geometry.py b/openfe/protocols/restraint_utils/geometry/boresch/geometry.py index 7613e4b06..c5194cfa8 100644 --- a/openfe/protocols/restraint_utils/geometry/boresch/geometry.py +++ b/openfe/protocols/restraint_utils/geometry/boresch/geometry.py @@ -251,12 +251,12 @@ def find_boresch_restraint( host_pool = find_host_atom_candidates( universe=universe, host_idxs=host_idxs, - l1_idx=guest_anchor[0], + guest_anchor_idx=guest_anchor[0], host_selection=host_selection, dssp_filter=dssp_filter, rmsf_cutoff=rmsf_cutoff, - min_distance=host_min_distance, - max_distance=host_max_distance, + min_search_distance=host_min_distance, + max_search_distance=host_max_distance, ) host_anchor = find_host_anchor( diff --git a/openfe/protocols/restraint_utils/geometry/boresch/host.py b/openfe/protocols/restraint_utils/geometry/boresch/host.py index 52e9cdf28..2a5d2d75e 100644 --- a/openfe/protocols/restraint_utils/geometry/boresch/host.py +++ b/openfe/protocols/restraint_utils/geometry/boresch/host.py @@ -29,15 +29,64 @@ from openff.units import Quantity, unit +def _host_atoms_search( + atomgroup: mda.AtomGroup, + guest_anchor_idx: int, + rmsf_cutoff: Quantity, + min_search_distance: Quantity, + max_search_distance: Quantity, +) -> npt.NDArray: + """ + Helper method to get a set of host atoms with minimal RMSF + within a given distance of a guest anchor. + + Parameters + ---------- + atomgroup : mda.AtomGroup + An AtomGroup to find host atoms in. + guest_anchor_idx : int + The index of the proposed guest anchor binding atom. + rmsf_cutoff : Quantity + The maximum allowed RMSF value for any candidate host atom. + min_search_distance : Quantity + The minimum host atom search distance around the guest anchor. + max_search_distance : Quantity + The maximum host atom search distance around the guest anchor. + + Return + ------ + NDArray + Array of host atom indexes + """ + # 0 Deal with the empty case + if len(atomgroup) == 0: + return np.array([], dtype=int) + + # 1 Get the RMSF & filter to create a new AtomGroup + rmsf = get_local_rmsf(atomgroup) + filtered_atomgroup = atomgroup.atoms[rmsf < rmsf_cutoff] + + # 2. Search for atoms within the min/max cutoff of the guest anchor + atom_finder = FindHostAtoms( + host_atoms=filtered_atomgroup, + guest_atoms=atomgroup.universe.atoms[guest_anchor_idx], + min_search_distance=min_search_distance, + max_search_distance=max_search_distance, + ) + atom_finder.run() + + return atom_finder.results.host_idxs + + def find_host_atom_candidates( universe: mda.Universe, host_idxs: list[int], - l1_idx: int, + guest_anchor_idx: int, host_selection: str, dssp_filter: bool = False, rmsf_cutoff: Quantity = 0.1 * unit.nanometer, - min_distance: Quantity = 1 * unit.nanometer, - max_distance: Quantity = 3 * unit.nanometer, + min_search_distance: Quantity = 1 * unit.nanometer, + max_search_distance: Quantity = 3 * unit.nanometer, ) -> npt.NDArray: """ Get a list of suitable host atoms. @@ -48,17 +97,17 @@ def find_host_atom_candidates( An MDAnalysis Universe defining the system and its coordinates. host_idxs : list[int] A list of the host indices in the system topology. - l1_idx : int + guest_anchor_idx : int The index of the proposed l1 binding atom. host_selection : str An MDAnalysis selection string to filter the host by. dssp_filter : bool Whether or not to apply a DSSP filter on the host selection. rmsf_cutoff : openff.units.Quantity - The maximum RMSF value allowwed for any candidate host atom. - min_distance : openff.units.Quantity + The maximum RMSF value allowed for any candidate host atom. + min_search_distance : openff.units.Quantity The minimum search distance around l1 for suitable candidate atoms. - max_distance : openff.units.Quantity + max_search_distance : openff.units.Quantity The maximum search distance around l1 for suitable candidate atoms. Return @@ -80,55 +129,66 @@ def find_host_atom_candidates( ) raise ValueError(errmsg) + # None host_ixs for condition checking later + host_idxs = None + # If requested, filter the host atoms based on if their residues exist # within stable secondary structures. if dssp_filter: - # TODO: allow user-supplied kwargs here - stable_ag = stable_secondary_structure_selection(selected_host_ag) + # TODO: allow more user-supplied kwargs here + host_idxs = _host_atoms_search( + atomgroup=stable_secondary_structure_selection(selected_host_ag), + guest_anchor_idx=guest_anchor_idx, + rmsf_cutoff=rmsf_cutoff, + min_search_distance=min_search_distance, + max_search_distance=max_search_distance, + ) - if len(stable_ag) < 20: + if len(host_idxs) < 20: wmsg = ( - "Secondary structure filtering: " - "Too few atoms found via secondary structure filtering will " - "try to only select all residues in protein chains instead." + "Restraint generation: DSSP filter found too few host atoms " + "will attempt to use all protein chains." ) warnings.warn(wmsg) - stable_ag = protein_chain_selection(selected_host_ag) + print(host_idxs) + host_idxs = _host_atoms_search( + atomgroup=protein_chain_selection(selected_host_ag), + guest_anchor_idx=guest_anchor_idx, + rmsf_cutoff=rmsf_cutoff, + min_search_distance=min_search_distance, + max_search_distance=max_search_distance, + ) - if len(stable_ag) < 20: + if len(host_idxs) < 20: wmsg = ( - "Secondary structure filtering: " - "Too few atoms found in protein residue chains, will just " - "use all atoms." + "Restraint generation: protein chain filter found too few " + "host atoms. Will attempt to use all host atoms in " + f"selection: {host_selection}." ) warnings.warn(wmsg) - else: - selected_host_ag = stable_ag - - # 1. Get the RMSF & filter to create a new AtomGroup - rmsf = get_local_rmsf(selected_host_ag) - filtered_host_ag = selected_host_ag.atoms[rmsf < rmsf_cutoff] - - # 2. Search of atoms within the min/max cutoff - atom_finder = FindHostAtoms( - host_atoms=filtered_host_ag, - guest_atoms=universe.atoms[l1_idx], - min_search_distance=min_distance, - max_search_distance=max_distance, - ) - atom_finder.run() + host_idxs = None + + if host_idxs is None: + host_idxs = _host_atoms_search( + atomgroup=selected_host_ag, + guest_anchor_idx=guest_anchor_idx, + rmsf_cutoff=rmsf_cutoff, + min_search_distance=min_search_distance, + max_search_distance=max_search_distance, + ) - if not atom_finder.results.host_idxs.any(): + # Crash out if no atoms were found + if len(host_idxs) == 0: errmsg = ( f"No host atoms found within the search distance " - f"{min_distance}-{max_distance} consider widening the search window." + f"{min_search_distance}-{max_search_distance} consider widening the search window." ) raise ValueError(errmsg) - # Now we sort them! + # Now we sort them from their distance from the guest anchor atom_sorter = CentroidDistanceSort( - sortable_atoms=universe.atoms[atom_finder.results.host_idxs], - reference_atoms=universe.atoms[l1_idx], + sortable_atoms=universe.atoms[host_idxs], + reference_atoms=universe.atoms[guest_anchor_idx], ) atom_sorter.run() diff --git a/openfe/tests/protocols/restraints/test_geometry_boresch_host.py b/openfe/tests/protocols/restraints/test_geometry_boresch_host.py index a1528c493..0cc22e8d5 100644 --- a/openfe/tests/protocols/restraints/test_geometry_boresch_host.py +++ b/openfe/tests/protocols/restraints/test_geometry_boresch_host.py @@ -3,6 +3,7 @@ import MDAnalysis as mda import numpy as np +from numpy.testing import assert_equal import pytest from openfe.protocols.restraint_utils.geometry.boresch.host import ( EvaluateHostAtoms1, @@ -23,22 +24,42 @@ def eg5_protein_ligand_universe(eg5_protein_pdb, eg5_ligands): def test_host_atom_candidates_dssp(eg5_protein_ligand_universe): + """ + Run DSSP search normally + """ + host_atoms = eg5_protein_ligand_universe.select_atoms("protein") + + idxs = find_host_atom_candidates( + universe=eg5_protein_ligand_universe, + host_idxs=host_atoms.ix, + # hand picked + guest_anchor_idx=5508, + host_selection="backbone and resnum 15:25", + dssp_filter=True, + ) + expected = np.array( + [136, 134, 135, 133, 104, 159, 160, 119, 157, 103, 120, 158, 71, + 118, 102, 101, 117, 70, 87, 86, 85, 69, 68, 88, 51, 50, + 52, 49, 38, 37, 16, 36, 15, 35, 14, 13] + ) + assert_equal(idxs, expected) + + +def test_host_atom_candidates_dssp_too_few_atoms(eg5_protein_ligand_universe): """ Make sure both dssp warnings are triggered """ host_atoms = eg5_protein_ligand_universe.select_atoms("protein") with ( - pytest.warns( - match="Too few atoms found via secondary structure filtering will" - ), - pytest.warns(match="Too few atoms found in protein residue chains,"), + pytest.warns(match="DSSP filter found"), + pytest.warns(match="protein chain filter found"), ): _ = find_host_atom_candidates( universe=eg5_protein_ligand_universe, - host_idxs=[a.ix for a in host_atoms], + host_idxs=host_atoms.ix, # hand picked - l1_idx=5508, + guest_anchor_idx=5508, host_selection="backbone and resnum 15:25", dssp_filter=True, ) @@ -52,12 +73,12 @@ def test_host_atom_candidate_small_search(eg5_protein_ligand_universe): ): _ = find_host_atom_candidates( universe=eg5_protein_ligand_universe, - host_idxs=[a.ix for a in host_atoms], + host_idxs=host_atoms.ix, # hand picked - l1_idx=5508, + guest_anchor_idx=5508, host_selection="backbone", dssp_filter=False, - max_distance=0.1 * unit.angstrom, + max_search_distance=0.1 * unit.angstrom, ) From 68b517aa529600f9d9cc1909be73695c0e5dc5cf Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sat, 19 Jul 2025 08:40:54 +0100 Subject: [PATCH 2/5] fix name overlap --- .../restraint_utils/geometry/boresch/host.py | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/openfe/protocols/restraint_utils/geometry/boresch/host.py b/openfe/protocols/restraint_utils/geometry/boresch/host.py index 2a5d2d75e..9feebc474 100644 --- a/openfe/protocols/restraint_utils/geometry/boresch/host.py +++ b/openfe/protocols/restraint_utils/geometry/boresch/host.py @@ -129,14 +129,14 @@ def find_host_atom_candidates( ) raise ValueError(errmsg) - # None host_ixs for condition checking later - host_idxs = None + # None filtered_host_ixs for condition checking later + filtered_host_idxs = None # If requested, filter the host atoms based on if their residues exist # within stable secondary structures. if dssp_filter: # TODO: allow more user-supplied kwargs here - host_idxs = _host_atoms_search( + filtered_host_idxs = _host_atoms_search( atomgroup=stable_secondary_structure_selection(selected_host_ag), guest_anchor_idx=guest_anchor_idx, rmsf_cutoff=rmsf_cutoff, @@ -144,14 +144,13 @@ def find_host_atom_candidates( max_search_distance=max_search_distance, ) - if len(host_idxs) < 20: + if len(filtered_host_idxs) < 20: wmsg = ( "Restraint generation: DSSP filter found too few host atoms " "will attempt to use all protein chains." ) warnings.warn(wmsg) - print(host_idxs) - host_idxs = _host_atoms_search( + filtered_host_idxs = _host_atoms_search( atomgroup=protein_chain_selection(selected_host_ag), guest_anchor_idx=guest_anchor_idx, rmsf_cutoff=rmsf_cutoff, @@ -159,17 +158,17 @@ def find_host_atom_candidates( max_search_distance=max_search_distance, ) - if len(host_idxs) < 20: + if len(filtered_host_idxs) < 20: wmsg = ( "Restraint generation: protein chain filter found too few " "host atoms. Will attempt to use all host atoms in " f"selection: {host_selection}." ) warnings.warn(wmsg) - host_idxs = None + filtered_host_idxs = None - if host_idxs is None: - host_idxs = _host_atoms_search( + if filtered_host_idxs is None: + filtered_host_idxs = _host_atoms_search( atomgroup=selected_host_ag, guest_anchor_idx=guest_anchor_idx, rmsf_cutoff=rmsf_cutoff, @@ -178,7 +177,7 @@ def find_host_atom_candidates( ) # Crash out if no atoms were found - if len(host_idxs) == 0: + if len(filtered_host_idxs) == 0: errmsg = ( f"No host atoms found within the search distance " f"{min_search_distance}-{max_search_distance} consider widening the search window." @@ -187,7 +186,7 @@ def find_host_atom_candidates( # Now we sort them from their distance from the guest anchor atom_sorter = CentroidDistanceSort( - sortable_atoms=universe.atoms[host_idxs], + sortable_atoms=universe.atoms[filtered_host_idxs], reference_atoms=universe.atoms[guest_anchor_idx], ) atom_sorter.run() From 2cdbada1f3cbbd15e50ab30a859b681667f6ca57 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Mon, 21 Jul 2025 19:07:27 +0100 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Alyssa Travitz <31974495+atravitz@users.noreply.github.com> --- openfe/protocols/restraint_utils/geometry/boresch/host.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openfe/protocols/restraint_utils/geometry/boresch/host.py b/openfe/protocols/restraint_utils/geometry/boresch/host.py index 9feebc474..5f21a5a9c 100644 --- a/openfe/protocols/restraint_utils/geometry/boresch/host.py +++ b/openfe/protocols/restraint_utils/geometry/boresch/host.py @@ -147,7 +147,7 @@ def find_host_atom_candidates( if len(filtered_host_idxs) < 20: wmsg = ( "Restraint generation: DSSP filter found too few host atoms " - "will attempt to use all protein chains." + f"({len(filtered_host_idxs)} found). Will attempt to use all protein chains." ) warnings.warn(wmsg) filtered_host_idxs = _host_atoms_search( @@ -161,7 +161,7 @@ def find_host_atom_candidates( if len(filtered_host_idxs) < 20: wmsg = ( "Restraint generation: protein chain filter found too few " - "host atoms. Will attempt to use all host atoms in " + f"host atoms ({len(filtered_host_idxs)} found). Will attempt to use all host atoms in " f"selection: {host_selection}." ) warnings.warn(wmsg) @@ -180,11 +180,11 @@ def find_host_atom_candidates( if len(filtered_host_idxs) == 0: errmsg = ( f"No host atoms found within the search distance " - f"{min_search_distance}-{max_search_distance} consider widening the search window." + f"{min_search_distance}-{max_search_distance}. Consider widening the search window." ) raise ValueError(errmsg) - # Now we sort them from their distance from the guest anchor + # Now we sort them by their distance from the guest anchor atom_sorter = CentroidDistanceSort( sortable_atoms=universe.atoms[filtered_host_idxs], reference_atoms=universe.atoms[guest_anchor_idx], From 6161b97b241536d5cc4dba78ba65e12e91eee7b0 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Thu, 31 Jul 2025 12:21:53 +0100 Subject: [PATCH 4/5] Update openfe/protocols/restraint_utils/geometry/boresch/host.py --- openfe/protocols/restraint_utils/geometry/boresch/host.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openfe/protocols/restraint_utils/geometry/boresch/host.py b/openfe/protocols/restraint_utils/geometry/boresch/host.py index 5f21a5a9c..1a7cc4e13 100644 --- a/openfe/protocols/restraint_utils/geometry/boresch/host.py +++ b/openfe/protocols/restraint_utils/geometry/boresch/host.py @@ -85,8 +85,8 @@ def find_host_atom_candidates( host_selection: str, dssp_filter: bool = False, rmsf_cutoff: Quantity = 0.1 * unit.nanometer, - min_search_distance: Quantity = 1 * unit.nanometer, - max_search_distance: Quantity = 3 * unit.nanometer, + min_search_distance: Quantity = 0.5 * unit.nanometer, + max_search_distance: Quantity = 1.5 * unit.nanometer, ) -> npt.NDArray: """ Get a list of suitable host atoms. From da30e1a7c5cdcefe6b8b3bea9066abbe9d6c4c48 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Thu, 31 Jul 2025 13:37:53 +0100 Subject: [PATCH 5/5] fix tests --- .../protocols/restraints/test_geometry_boresch_host.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/openfe/tests/protocols/restraints/test_geometry_boresch_host.py b/openfe/tests/protocols/restraints/test_geometry_boresch_host.py index c7aa3bead..249710fbc 100644 --- a/openfe/tests/protocols/restraints/test_geometry_boresch_host.py +++ b/openfe/tests/protocols/restraints/test_geometry_boresch_host.py @@ -34,13 +34,14 @@ def test_host_atom_candidates_dssp(eg5_protein_ligand_universe): host_idxs=host_atoms.ix, # hand picked guest_anchor_idx=5508, - host_selection="backbone and resnum 15:25", + host_selection="backbone and resnum 212:221", dssp_filter=True, ) expected = np.array( - [136, 134, 135, 133, 104, 159, 160, 119, 157, 103, 120, 158, 71, - 118, 102, 101, 117, 70, 87, 86, 85, 69, 68, 88, 51, 50, - 52, 49, 38, 37, 16, 36, 15, 35, 14, 13] + [3144, 3146, 3145, 3143, 3162, 3206, 3200, 3207, 3126, 3201, 3127, + 3163, 3199, 3202, 3164, 3125, 3165, 3177, 3208, 3179, 3124, 3216, + 3209, 3109, 3107, 3178, 3110, 3180, 3108, 3248, 3217, 3249, 3226, + 3218, 3228, 3227, 3250, 3219, 3251, 3229] ) assert_equal(idxs, expected) @@ -62,6 +63,7 @@ def test_host_atom_candidates_dssp_too_few_atoms(eg5_protein_ligand_universe): guest_anchor_idx=5508, host_selection="backbone and resnum 15:25", dssp_filter=True, + max_search_distance=2*unit.nanometer )