From 635ef3243b56782a4f28028c6ebdec8ef4eaa90a Mon Sep 17 00:00:00 2001 From: ayush Date: Sat, 21 Jul 2018 23:12:24 -0700 Subject: [PATCH 1/5] added an option to check for pairs within coordinate array using bruteforce_capped_self, pkdtree_capped_self + tests --- package/MDAnalysis/lib/distances.py | 169 ++++++++++++++++-- .../MDAnalysisTests/lib/test_distances.py | 32 ++++ 2 files changed, 190 insertions(+), 11 deletions(-) diff --git a/package/MDAnalysis/lib/distances.py b/package/MDAnalysis/lib/distances.py index 57a5625b074..b1b77977cf9 100644 --- a/package/MDAnalysis/lib/distances.py +++ b/package/MDAnalysis/lib/distances.py @@ -400,8 +400,9 @@ def self_distance_array(reference, box=None, result=None, backend="serial"): return distances -def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, box=None, method=None): - """Calculates the pairs and distance within a specified distance +def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, + box=None, method=None, equal=False): + """Calculates the pairs and distances within a specified distance If a *box* is supplied, then a minimum image convention is used to evaluate the distances. @@ -433,6 +434,10 @@ def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, box=N method : (optional) 'bruteforce' or 'pkdtree' or 'None' Keyword to override the automatic guessing of method built-in in the function [None] + equal : (optional) 'True' or 'False' + bool value to choose optimized solution within ``method``, + if ``reference`` and ``configuration`` are equal arrays. + [False] Returns ------- @@ -471,7 +476,8 @@ def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, box=N max_cutoff, min_cutoff=min_cutoff, box=box, method=method) pairs, dist = method(reference, configuration, max_cutoff, - min_cutoff=min_cutoff, box=box) + min_cutoff=min_cutoff, box=box, + equal=equal) return np.asarray(pairs), np.asarray(dist) @@ -483,12 +489,38 @@ def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, All the rules to select the method based on the input can be incorporated here. + Parameters + ---------- + reference : array + reference coordinates array with shape ``reference.shape = (3,)`` + or ``reference.shape = (len(reference), 3)`` + configuration : array + Configuration coordinate array with shape ``reference.shape = (3,)`` + or ``reference.shape = (len(reference), 3)`` + max_cutoff : float + Maximum cutoff distance between the reference and configuration + min_cutoff : (optional) float + Minimum cutoff distance between reference and configuration [None] + box : (optional) array or None + The dimensions, if provided, must be provided in the same + The unitcell dimesions for this system format as returned + by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: + ``[lx,ly, lz, alpha, beta, gamma]``. Minimum image convention + is applied if the box is provided [None] + method : (optional) 'bruteforce' or 'pkdtree' or 'None' + Keyword to override the automatic guessing of method built-in + in the function [None] + Returns ------- - Function object based on the rules and specified method - Currently implemented methods are - bruteforce : returns ``_bruteforce_capped`` - PKDtree : return ``_pkdtree_capped` + Method : Function object + Returns function object based on the rules and specified method + + Note + ---- + Currently implemented methods are present in the ``methods`` dictionary + bruteforce : returns ``_bruteforce_capped`` + PKDtree : return ``_pkdtree_capped` """ methods = {'bruteforce': _bruteforce_capped, @@ -520,15 +552,42 @@ def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, return methods['bruteforce'] -def _bruteforce_capped(reference, configuration, max_cutoff, min_cutoff=None, box=None): - """ - Using naive distance calulations, returns a list +def _bruteforce_capped(reference, configuration, max_cutoff, + min_cutoff=None, box=None, equal=False): + """Internal method for bruteforce calculations + + Uses naive distance calulations and returns a list containing the indices with one from each reference and configuration arrays, such that the distance between them is less than the specified cutoff distance + + Returns + ------- + pairs : list + List of ``[(i, j)]`` pairs such that atom-index ``i`` is + from reference and ``j`` from configuration array + distance: list + Distance between ``reference[i]`` and ``configuration[j]`` + atom coordinate + + Note + ---- + if ``equal`` parameter is set to ``True``, then + same particle is never selected to be a pair of itself + + SeeAlso + ------- + MDAnalysis.lib.distances._bruteforce_capped_self + """ pairs, distance = [], [] + if equal: + pairs, distance = _bruteforce_capped_self(reference, max_cutoff, + min_cutoff=min_cutoff, + box=box) + return pairs, distance + reference = np.asarray(reference, dtype=np.float32) configuration = np.asarray(configuration, dtype=np.float32) @@ -552,9 +611,49 @@ def _bruteforce_capped(reference, configuration, max_cutoff, min_cutoff=None, bo return pairs, distance -def _pkdtree_capped(reference, configuration, max_cutoff, min_cutoff=None, box=None): +def _bruteforce_capped_self(reference, max_cutoff, min_cutoff=None, + box=None): + """Finds all the pairs among the coordinates within a fixed distance + + Internal method using brute force method to evaluate all the pairs + of atoms within a fixed distance. + + Returns + ------- + pairs : list + List of ``[(i, j)]`` pairs such that atom-index ``i`` + and ``j`` from reference array are within the fixed distance + distance: list + Distance between ``reference[i]`` and ``reference[j]`` + atom coordinate + + """ + pairs, distance = [], [] + + reference = np.asarray(reference, dtype=np.float32) + if reference.shape == (3, ): + reference = reference[None, :] + for i, coords in enumerate(reference): + dist = distance_array(coords[None, :], reference[i+1:], + box=box)[0] + + if min_cutoff is not None: + idx = np.where((dist <= max_cutoff) & (dist > min_cutoff))[0] + else: + idx = np.where((dist <= max_cutoff))[0] + for other_idx in idx: + j = other_idx + 1 + i + pairs.append((i, j)) + distance.append(dist[other_idx]) + return pairs, distance + + +def _pkdtree_capped(reference, configuration, max_cutoff, + min_cutoff=None, box=None, equal=False): """ Capped Distance evaluations using KDtree. + Uses minimum image convention if *box* is specified + Returns: -------- pairs : list @@ -564,11 +663,22 @@ def _pkdtree_capped(reference, configuration, max_cutoff, min_cutoff=None, box=N distance : list Distance between two atoms corresponding to the (i, j) indices in pairs. + + SeeAlso + ------- + MDAnalysis.lib.distances._pkdtree_capped_self + """ from .pkdtree import PeriodicKDTree pairs, distances = [], [] + if equal: + pairs, distance = _pkdtree_capped_self(reference, max_cutoff, + min_cutoff=min_cutoff, + box=box) + return pairs, distance + reference = np.asarray(reference, dtype=np.float32) configuration = np.asarray(configuration, dtype=np.float32) @@ -600,6 +710,43 @@ def _pkdtree_capped(reference, configuration, max_cutoff, min_cutoff=None, box=N return pairs, distances +def _pkdtree_capped_self(reference, max_cutoff, min_cutoff=None, + box=None): + """Finds all the pairs among the coordinates within a fixed distance + + Internal method using PeriodicKDTree method to evaluate all the pairs + of atoms within a fixed distance. + + Returns + ------- + pairs : list + List of ``[(i, j)]`` pairs such that atom-index ``i`` + and ``j`` from reference array are within the fixed distance + distance: list + Distance between ``reference[i]`` and ``reference[j]`` + atom coordinate + + """ + from .pkdtree import PeriodicKDTree + + pairs, distances = [], [] + reference = np.asarray(reference, dtype=np.float32) + if reference.shape == (3, ): + reference = reference[None, :] + + kdtree = PeriodicKDTree(box=box) + cut = max_cutoff if box is not None else None + kdtree.set_coords(reference, cutoff=cut) + max_pairs = kdtree.search_pairs(max_cutoff) + min_crit = min_cutoff is not None + for (i, j) in max_pairs: + dist = calc_distance(reference[i], reference[j], box=box) + if not min_crit or dist >= min_cutoff: + pairs.append((i, j)) + distances.append(dist) + return pairs, distances + + def transform_RtoS(inputcoords, box, backend="serial"): """Transform an array of coordinates from real space to S space (aka lambda space) diff --git a/testsuite/MDAnalysisTests/lib/test_distances.py b/testsuite/MDAnalysisTests/lib/test_distances.py index 37133cb5abc..84b0b5d9318 100644 --- a/testsuite/MDAnalysisTests/lib/test_distances.py +++ b/testsuite/MDAnalysisTests/lib/test_distances.py @@ -106,6 +106,38 @@ def test_capped_distance_checkbrute(npoints, box, query, method, min_cutoff): assert_equal(np.sort(found_pairs, axis=0), np.sort(indices[1], axis=0)) +@pytest.mark.parametrize('npoints', npoints_1) +@pytest.mark.parametrize('box', boxes_1) +@pytest.mark.parametrize('method', method_1) +@pytest.mark.parametrize('min_cutoff', min_cutoff_1) +def test_capped_distance_equal(npoints, box, method, min_cutoff): + np.random.seed(90003) + points = (np.random.uniform(low=0, high=1.0, + size=(npoints, 3))*(boxes_1[0][:3])).astype(np.float32) + max_cutoff = 0.1 + pairs, distance = mda.lib.distances.capped_distance(points, + points, + max_cutoff, + min_cutoff=min_cutoff, + box=box, + method=method, + equal=True) + found_pairs, found_distance = [], [] + + for i, coord in enumerate(points): + dist = mda.lib.distances.distance_array(coord[None, :], + points[i+1:], + box=box) + if min_cutoff is not None: + idx = np.where((dist <= max_cutoff) & (dist > min_cutoff))[0] + else: + idx = np.where((dist <=max_cutoff))[0] + for other_idx in idx: + j = other_idx + 1 + i + found_pairs.append((i, j)) + found_distance.append(dist[other_idx]) + assert_equal(len(pairs), len(found_pairs)) + @pytest.mark.parametrize('npoints,cutoff,meth', [(1, 0.02, '_bruteforce_capped'), From 64160210d6d1c8a8f3b923bcc1111b4a019b5ace Mon Sep 17 00:00:00 2001 From: ayush Date: Sun, 22 Jul 2018 00:28:01 -0700 Subject: [PATCH 2/5] Modified guess bonds to use capped function --- package/MDAnalysis/topology/guessers.py | 28 ++++++------------- .../MDAnalysisTests/topology/test_guessers.py | 11 ++++++++ 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py index 06689168079..940552177f0 100644 --- a/package/MDAnalysis/topology/guessers.py +++ b/package/MDAnalysis/topology/guessers.py @@ -232,25 +232,15 @@ def guess_bonds(atoms, coords, box=None, **kwargs): bonds = [] - for i, atom in enumerate(atoms[:-1]): - vdw_i = vdwradii[atomtypes[i]] - max_d = (vdw_i + max_vdw) * fudge_factor - - # using self_distance_array scales O(n^2) - # 20,000 atoms = 1.6 Gb memory - dist = distances.distance_array(coords[i][None, :], coords[i + 1:], - box=box)[0] - idx = np.where((dist > lower_bound) & (dist <= max_d))[0] - - for a in idx: - j = i + 1 + a - atom_j = atoms[j] - - if dist[a] < (vdw_i + vdwradii[atomtypes[j]]) * fudge_factor: - # because of method used, same bond won't be seen twice, - # so don't need to worry about duplicates - bonds.append((atom.index, atom_j.index)) - + pairs, dist = distances.capped_distance(coords, coords, + max_cutoff=max_vdw, + min_cutoff=lower_bound, + box=box, equal=True + ) + for idx, (i, j) in enumerate(pairs): + d = (vdwradii[atomtypes[i]] + vdwradii[atomtypes[j]])*fudge_factor + if (dist[idx] < d): + bonds.append((atoms[i].index, atoms[j].index)) return tuple(bonds) diff --git a/testsuite/MDAnalysisTests/topology/test_guessers.py b/testsuite/MDAnalysisTests/topology/test_guessers.py index 574d8bd7a45..7bbb585bf22 100644 --- a/testsuite/MDAnalysisTests/topology/test_guessers.py +++ b/testsuite/MDAnalysisTests/topology/test_guessers.py @@ -25,11 +25,13 @@ from numpy.testing import assert_equal import numpy as np +import MDAnalysis as mda from MDAnalysis.topology import guessers from MDAnalysis.core.topologyattrs import Angles from MDAnalysisTests import make_Universe from MDAnalysisTests.core.test_fragments import make_starshape +from MDAnalysisTests.datafiles import two_water_gro class TestGuessMasses(object): @@ -99,3 +101,12 @@ def test_guess_impropers(): vals = guessers.guess_improper_dihedrals(ag.angles) assert_equal(len(vals), 12) + + +def test_guess_bonds(): + u = mda.Universe(two_water_gro) + bonds = guessers.guess_bonds(u.atoms, u.atoms.positions, u.dimensions) + assert_equal(bonds, ((0, 1), + (0, 2), + (3, 4), + (3, 5))) From 172c7f1806106da1a6fb66d3a50b799cb46a9351 Mon Sep 17 00:00:00 2001 From: ayush Date: Thu, 26 Jul 2018 00:58:08 -0700 Subject: [PATCH 3/5] seperated the self_capped_function, added tests, modified the guess_bonds function --- package/MDAnalysis/lib/distances.py | 241 ++++++++++++------ package/MDAnalysis/topology/guessers.py | 9 +- .../MDAnalysisTests/lib/test_distances.py | 45 +++- 3 files changed, 193 insertions(+), 102 deletions(-) diff --git a/package/MDAnalysis/lib/distances.py b/package/MDAnalysis/lib/distances.py index b1b77977cf9..c844a69b072 100644 --- a/package/MDAnalysis/lib/distances.py +++ b/package/MDAnalysis/lib/distances.py @@ -401,7 +401,7 @@ def self_distance_array(reference, box=None, result=None, backend="serial"): def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, - box=None, method=None, equal=False): + box=None, method=None): """Calculates the pairs and distances within a specified distance If a *box* is supplied, then a minimum image convention is used @@ -434,10 +434,6 @@ def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, method : (optional) 'bruteforce' or 'pkdtree' or 'None' Keyword to override the automatic guessing of method built-in in the function [None] - equal : (optional) 'True' or 'False' - bool value to choose optimized solution within ``method``, - if ``reference`` and ``configuration`` are equal arrays. - [False] Returns ------- @@ -476,8 +472,7 @@ def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, max_cutoff, min_cutoff=min_cutoff, box=box, method=method) pairs, dist = method(reference, configuration, max_cutoff, - min_cutoff=min_cutoff, box=box, - equal=equal) + min_cutoff=min_cutoff, box=box) return np.asarray(pairs), np.asarray(dist) @@ -553,7 +548,7 @@ def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, def _bruteforce_capped(reference, configuration, max_cutoff, - min_cutoff=None, box=None, equal=False): + min_cutoff=None, box=None): """Internal method for bruteforce calculations Uses naive distance calulations and returns a list @@ -570,23 +565,14 @@ def _bruteforce_capped(reference, configuration, max_cutoff, Distance between ``reference[i]`` and ``configuration[j]`` atom coordinate - Note - ---- - if ``equal`` parameter is set to ``True``, then - same particle is never selected to be a pair of itself - - SeeAlso - ------- - MDAnalysis.lib.distances._bruteforce_capped_self - """ pairs, distance = [], [] - if equal: - pairs, distance = _bruteforce_capped_self(reference, max_cutoff, - min_cutoff=min_cutoff, - box=box) - return pairs, distance + #if equal: + # pairs, distance = _bruteforce_capped_self(reference, max_cutoff, + # min_cutoff=min_cutoff, + # box=box) + # return pairs, distance reference = np.asarray(reference, dtype=np.float32) configuration = np.asarray(configuration, dtype=np.float32) @@ -602,54 +588,17 @@ def _bruteforce_capped(reference, configuration, max_cutoff, for i, coords in enumerate(reference): dist = distance_array(coords[None, :], configuration, box=box)[0] if min_cutoff is not None: - idx = np.where((dist <= max_cutoff) & (dist > min_cutoff))[0] + idx = np.where((dist < max_cutoff) & (dist > min_cutoff))[0] else: - idx = np.where((dist <= max_cutoff))[0] + idx = np.where((dist < max_cutoff))[0] for j in idx: pairs.append((i, j)) distance.append(dist[j]) return pairs, distance -def _bruteforce_capped_self(reference, max_cutoff, min_cutoff=None, - box=None): - """Finds all the pairs among the coordinates within a fixed distance - - Internal method using brute force method to evaluate all the pairs - of atoms within a fixed distance. - - Returns - ------- - pairs : list - List of ``[(i, j)]`` pairs such that atom-index ``i`` - and ``j`` from reference array are within the fixed distance - distance: list - Distance between ``reference[i]`` and ``reference[j]`` - atom coordinate - - """ - pairs, distance = [], [] - - reference = np.asarray(reference, dtype=np.float32) - if reference.shape == (3, ): - reference = reference[None, :] - for i, coords in enumerate(reference): - dist = distance_array(coords[None, :], reference[i+1:], - box=box)[0] - - if min_cutoff is not None: - idx = np.where((dist <= max_cutoff) & (dist > min_cutoff))[0] - else: - idx = np.where((dist <= max_cutoff))[0] - for other_idx in idx: - j = other_idx + 1 + i - pairs.append((i, j)) - distance.append(dist[other_idx]) - return pairs, distance - - def _pkdtree_capped(reference, configuration, max_cutoff, - min_cutoff=None, box=None, equal=False): + min_cutoff=None, box=None): """ Capped Distance evaluations using KDtree. Uses minimum image convention if *box* is specified @@ -664,20 +613,16 @@ def _pkdtree_capped(reference, configuration, max_cutoff, Distance between two atoms corresponding to the (i, j) indices in pairs. - SeeAlso - ------- - MDAnalysis.lib.distances._pkdtree_capped_self - """ from .pkdtree import PeriodicKDTree pairs, distances = [], [] - if equal: - pairs, distance = _pkdtree_capped_self(reference, max_cutoff, - min_cutoff=min_cutoff, - box=box) - return pairs, distance + #if equal: + # pairs, distance = _pkdtree_capped_self(reference, max_cutoff, + # min_cutoff=min_cutoff, + # box=box) + # return pairs, distance reference = np.asarray(reference, dtype=np.float32) configuration = np.asarray(configuration, dtype=np.float32) @@ -709,42 +654,170 @@ def _pkdtree_capped(reference, configuration, max_cutoff, distances.append(dist[num]) return pairs, distances +def self_capped_distance(reference, max_cutoff, min_cutoff=None, + box=None, method=None): + """Finds all the pairs and respective distances within a specified cutoff + for a configuration *reference* + + If a *box* is supplied, then a minimum image convention is used + to evaluate the distances. + + An automatic guessing of optimized method to calculate the distances is + included in the function. An optional keyword for the method is also + provided. Users can override the method with this functionality. + Currently pkdtree and bruteforce are implemented. + + """ + if box is not None: + if box.shape[0] != 6: + raise ValueError('Box Argument is of incompatible type. The dimension' + 'should be either None or ' + 'of the type [lx, ly, lz, alpha, beta, gamma]') + method = _determine_method_self(reference, max_cutoff, + min_cutoff=min_cutoff, + box=box, method=method) + pairs, dist = method(reference, max_cutoff, + min_cutoff=min_cutoff, box=box) + + return np.asarray(pairs), np.asarray(dist) + +def _determine_method_self(reference, max_cutoff, min_cutoff=None, + box=None, method=None): + """ + Switch between different methods based on the the optimized time. + All the rules to select the method based on the input can be + incorporated here. + + Parameters + ---------- + reference : array + reference coordinates array with shape ``reference.shape = (3,)`` + or ``reference.shape = (len(reference), 3)`` + max_cutoff : float + Maximum cutoff distance + min_cutoff : (optional) float + Minimum cutoff distance [None] + box : (optional) array or None + The dimensions, if provided, must be provided in the same + The unitcell dimesions for this system format as returned + by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: + ``[lx,ly, lz, alpha, beta, gamma]``. Minimum image convention + is applied if the box is provided [None] + method : (optional) 'bruteforce' or 'pkdtree' or 'None' + Keyword to override the automatic guessing of method built-in + in the function [None] + + Returns + ------- + Method : Function object + Returns function object based on the rules and specified method + + Note + ---- + Currently implemented methods are present in the ``methods`` dictionary + bruteforce : returns ``_bruteforce_capped_self`` + PKDtree : return ``_pkdtree_capped_self`` + + """ + methods = {'bruteforce': _bruteforce_capped_self, + 'pkdtree': _pkdtree_capped_self} + + if method is not None: + return methods[method] + + if len(reference) > 5000: + if box is None and reference.shape[0] != 3: + min_dim = np.array([reference.min(axis=0)]) + max_dim = np.array([reference.max(axis=0)]) + size = max_dim.max(axis=0) - min_dim.min(axis=0) + elif box is not None: + if np.allclose(box[3:], 90): + size = box[:3] + else: + tribox = triclinic_vectors(box) + size = tribox.max(axis=0) - tribox.min(axis=0) + + if (np.any(size < 10.0*max_cutoff) and + len(reference) > 100000): + return methods['bruteforce'] + else: + return methods['pkdtree'] + return methods['bruteforce'] + +def _bruteforce_capped_self(reference, max_cutoff, min_cutoff=None, + box=None): + """Finds all the pairs among the coordinates within a fixed distance + using brute force method + + Internal method using brute force method to evaluate all the pairs + of atoms within a fixed distance. + + Returns + ------- + pairs : array + Arrray of ``[i, j]`` pairs such that atom-index ``i`` + and ``j`` from reference array are within the fixed distance + distance: array + Distance between ``reference[i]`` and ``reference[j]`` + atom coordinate + + """ + pairs, distance = [], [] + + reference = np.asarray(reference, dtype=np.float32) + if reference.shape == (3, ): + reference = reference[None, :] + for i, coords in enumerate(reference): + dist = distance_array(coords[None, :], reference[i+1:], + box=box)[0] + + if min_cutoff is not None: + idx = np.where((dist < max_cutoff) & (dist > min_cutoff))[0] + else: + idx = np.where((dist < max_cutoff))[0] + for other_idx in idx: + j = other_idx + 1 + i + pairs.append((i, j)) + distance.append(dist[other_idx]) + return np.asarray(pairs), np.asarray(distance) + def _pkdtree_capped_self(reference, max_cutoff, min_cutoff=None, box=None): """Finds all the pairs among the coordinates within a fixed distance + using PeriodicKDTree Internal method using PeriodicKDTree method to evaluate all the pairs of atoms within a fixed distance. Returns ------- - pairs : list - List of ``[(i, j)]`` pairs such that atom-index ``i`` + pairs : array + Array of ``[(i, j)]`` pairs such that atom-index ``i`` and ``j`` from reference array are within the fixed distance - distance: list + distance: array Distance between ``reference[i]`` and ``reference[j]`` atom coordinate """ from .pkdtree import PeriodicKDTree - pairs, distances = [], [] reference = np.asarray(reference, dtype=np.float32) if reference.shape == (3, ): reference = reference[None, :] - + + pairs, distance = [], [] kdtree = PeriodicKDTree(box=box) cut = max_cutoff if box is not None else None kdtree.set_coords(reference, cutoff=cut) - max_pairs = kdtree.search_pairs(max_cutoff) - min_crit = min_cutoff is not None - for (i, j) in max_pairs: - dist = calc_distance(reference[i], reference[j], box=box) - if not min_crit or dist >= min_cutoff: - pairs.append((i, j)) - distances.append(dist) - return pairs, distances + pairs = kdtree.search_pairs(max_cutoff) + if pairs.size > 0: + refA, refB = pairs[:, 0], pairs[:, 1] + distance = calc_bonds(reference[refA], reference[refB], box=box) + if min_cutoff is not None: + mask = np.where(distance > min_cutoff)[0] + pairs, distance = pairs[mask], distance[mask] + return np.asarray(pairs), np.asarray(distance) def transform_RtoS(inputcoords, box, backend="serial"): diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py index 940552177f0..bed9cca8b05 100644 --- a/package/MDAnalysis/topology/guessers.py +++ b/package/MDAnalysis/topology/guessers.py @@ -232,11 +232,10 @@ def guess_bonds(atoms, coords, box=None, **kwargs): bonds = [] - pairs, dist = distances.capped_distance(coords, coords, - max_cutoff=max_vdw, - min_cutoff=lower_bound, - box=box, equal=True - ) + pairs, dist = distances.self_capped_distance(coords, + max_cutoff=2.0*max_vdw, + min_cutoff=lower_bound, + box=box) for idx, (i, j) in enumerate(pairs): d = (vdwradii[atomtypes[i]] + vdwradii[atomtypes[j]])*fudge_factor if (dist[idx] < d): diff --git a/testsuite/MDAnalysisTests/lib/test_distances.py b/testsuite/MDAnalysisTests/lib/test_distances.py index 84b0b5d9318..9d46b83563c 100644 --- a/testsuite/MDAnalysisTests/lib/test_distances.py +++ b/testsuite/MDAnalysisTests/lib/test_distances.py @@ -113,31 +113,50 @@ def test_capped_distance_checkbrute(npoints, box, query, method, min_cutoff): def test_capped_distance_equal(npoints, box, method, min_cutoff): np.random.seed(90003) points = (np.random.uniform(low=0, high=1.0, - size=(npoints, 3))*(boxes_1[0][:3])).astype(np.float32) + size=(npoints, 3))*(boxes_1[0][:3])).astype(np.float32) max_cutoff = 0.1 - pairs, distance = mda.lib.distances.capped_distance(points, - points, - max_cutoff, - min_cutoff=min_cutoff, - box=box, - method=method, - equal=True) + pairs, distance = mda.lib.distances.self_capped_distance(points, + max_cutoff, + min_cutoff=min_cutoff, + box=box, + method=method) found_pairs, found_distance = [], [] - for i, coord in enumerate(points): dist = mda.lib.distances.distance_array(coord[None, :], - points[i+1:], - box=box) + points[i+1:], + box=box) if min_cutoff is not None: - idx = np.where((dist <= max_cutoff) & (dist > min_cutoff))[0] + idx = np.where((dist < max_cutoff) & (dist > min_cutoff))[0] else: - idx = np.where((dist <=max_cutoff))[0] + idx = np.where((dist < max_cutoff))[0] for other_idx in idx: j = other_idx + 1 + i found_pairs.append((i, j)) found_distance.append(dist[other_idx]) assert_equal(len(pairs), len(found_pairs)) +@pytest.mark.parametrize('npoints,cutoff,meth', + [(1, 0.02, '_bruteforce_capped_self'), + (1, 0.2, '_bruteforce_capped_self'), + (6000, 0.02, '_pkdtree_capped_self'), + (6000, 0.2, '_pkdtree_capped_self'), + (200000, 0.02, '_pkdtree_capped_self'), + (200000, 0.2, '_bruteforce_capped_self')]) +def test_method_selfselection(npoints, cutoff, meth): + np.random.seed(90003) + box = np.array([1, 1, 1, 90, 90, 90], dtype=np.float32) + points = (np.random.uniform(low=0, high=1.0, + size=(npoints, 3)) * (box[:3])).astype(np.float32) + if box is not None: + boxtype = mda.lib.distances._box_check(box) + # Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]] + if (boxtype == 'tri_box'): + box = triclinic_vectors(box) + if (boxtype == 'tri_vecs_bad'): + box = triclinic_vectors(triclinic_box(box[0], box[1], box[2])) + method = mda.lib.distances._determine_method_self(points, cutoff, box=box) + assert_equal(method.__name__, meth) + @pytest.mark.parametrize('npoints,cutoff,meth', [(1, 0.02, '_bruteforce_capped'), From 0edfb7c3faef3956ee47a02d68fa50407316b815 Mon Sep 17 00:00:00 2001 From: ayush Date: Sat, 4 Aug 2018 02:15:34 -0700 Subject: [PATCH 4/5] modified CHANGELOG, and minor modifications --- package/CHANGELOG | 3 + package/MDAnalysis/lib/distances.py | 62 ++++++++++++++----- .../MDAnalysisTests/lib/test_distances.py | 2 +- 3 files changed, 51 insertions(+), 16 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 05e9b078d1c..52a10a856e9 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -20,6 +20,9 @@ The rules for this file: Enhancements + * Added lib.distances.self_capped_distance to internally select the + optimized method for distance evaluations. Modified topology.guessers.guess_bonds + for fast bonds guessing. (PR #2006) * Added augment functionality to create relevant images of particles in the vicinity of central cell to handle periodic boundary conditions (PR #1977) diff --git a/package/MDAnalysis/lib/distances.py b/package/MDAnalysis/lib/distances.py index c844a69b072..0664865ca69 100644 --- a/package/MDAnalysis/lib/distances.py +++ b/package/MDAnalysis/lib/distances.py @@ -59,6 +59,8 @@ .. autofunction:: calc_angles(atom1, atom2, atom3 [,box [, result [, backend]]]) .. autofunction:: calc_dihedrals(atom1, atom2, atom3, atom4 [,box [, result [, backend]]]) .. autofunction:: apply_PBC(coordinates, box [, backend]) +.. autofunction:: capped_distance(reference, configuration, max_cutoff [, min_cutoff [, box [, method]]]) +.. autofunction:: self_capped_distance(reference, max_cutoff, [, min_cutoff [, box [, method]]]) .. autofunction:: transform_RtoS(coordinates, box [, backend]) .. autofunction:: transform_StoR(coordinates, box [,backend]) .. autofunction:: MDAnalysis.lib._augment.augment_coordinates(coordinates, box, radius) @@ -461,7 +463,6 @@ def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, .. SeeAlso:: :func:'MDAnalysis.lib.distances.distance_array' .. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree' - """ if box is not None: if box.shape[0] != 6: @@ -568,11 +569,6 @@ def _bruteforce_capped(reference, configuration, max_cutoff, """ pairs, distance = [], [] - #if equal: - # pairs, distance = _bruteforce_capped_self(reference, max_cutoff, - # min_cutoff=min_cutoff, - # box=box) - # return pairs, distance reference = np.asarray(reference, dtype=np.float32) configuration = np.asarray(configuration, dtype=np.float32) @@ -618,12 +614,6 @@ def _pkdtree_capped(reference, configuration, max_cutoff, pairs, distances = [], [] - #if equal: - # pairs, distance = _pkdtree_capped_self(reference, max_cutoff, - # min_cutoff=min_cutoff, - # box=box) - # return pairs, distance - reference = np.asarray(reference, dtype=np.float32) configuration = np.asarray(configuration, dtype=np.float32) @@ -667,6 +657,48 @@ def self_capped_distance(reference, max_cutoff, min_cutoff=None, provided. Users can override the method with this functionality. Currently pkdtree and bruteforce are implemented. + Parameters + ----------- + reference : array + reference coordinates array with shape ``reference.shape = (3,)`` + or ``reference.shape = (len(reference), 3)`` + max_cutoff : float + Maximum cutoff distance to check the neighbors with itself + min_cutoff : (optional) float + Minimum cutoff distance [None] + box : (optional) array or None + The dimensions, if provided, must be provided in the same + The unitcell dimesions for this system format as returned + by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: + ``[lx,ly, lz, alpha, beta, gamma]``. Minimum image convention + is applied if the box is provided [None] + method : (optional) 'bruteforce' or 'pkdtree' or 'None' + Keyword to override the automatic guessing of method built-in + in the function [None] + + Returns + ------- + pairs : array + Pair of indices such that distance between them is + within the ``max_cutoff`` and ``min_cutoff`` + distances : array + Distances corresponding to each pair of indices. + d[k] corresponding to the pairs[i,j] gives the distance between + i-th and j-th coordinate in reference + + .. code-block:: python + + pairs, distances = self_capped_distances(reference, max_cutoff) + for indx, [a,b] in enumerate(pairs): + coord1, coords2 = reference[a], reference[b] + distance = distances[indx] + + Note + ----- + Currently only supports brute force and Periodic KDtree + + .. SeeAlso:: :func:'MDAnalysis.lib.distances.distance_array' + .. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree' """ if box is not None: if box.shape[0] != 6: @@ -682,7 +714,7 @@ def self_capped_distance(reference, max_cutoff, min_cutoff=None, return np.asarray(pairs), np.asarray(dist) def _determine_method_self(reference, max_cutoff, min_cutoff=None, - box=None, method=None): + box=None, method=None): """ Switch between different methods based on the the optimized time. All the rules to select the method based on the input can be @@ -746,8 +778,8 @@ def _determine_method_self(reference, max_cutoff, min_cutoff=None, def _bruteforce_capped_self(reference, max_cutoff, min_cutoff=None, box=None): - """Finds all the pairs among the coordinates within a fixed distance - using brute force method + """Finds all the pairs among the *reference* coordinates within + a fixed distance using brute force method Internal method using brute force method to evaluate all the pairs of atoms within a fixed distance. diff --git a/testsuite/MDAnalysisTests/lib/test_distances.py b/testsuite/MDAnalysisTests/lib/test_distances.py index 9d46b83563c..b8d70471f03 100644 --- a/testsuite/MDAnalysisTests/lib/test_distances.py +++ b/testsuite/MDAnalysisTests/lib/test_distances.py @@ -110,7 +110,7 @@ def test_capped_distance_checkbrute(npoints, box, query, method, min_cutoff): @pytest.mark.parametrize('box', boxes_1) @pytest.mark.parametrize('method', method_1) @pytest.mark.parametrize('min_cutoff', min_cutoff_1) -def test_capped_distance_equal(npoints, box, method, min_cutoff): +def test_self_capped_distance(npoints, box, method, min_cutoff): np.random.seed(90003) points = (np.random.uniform(low=0, high=1.0, size=(npoints, 3))*(boxes_1[0][:3])).astype(np.float32) From 7e7ab24c4e1849ad81e572f579cf353ee9c49e68 Mon Sep 17 00:00:00 2001 From: ayush Date: Sat, 4 Aug 2018 18:52:02 -0700 Subject: [PATCH 5/5] modified test for increased coverage, minor modifications to the self_capped_distance --- package/CHANGELOG | 7 ++- package/MDAnalysis/lib/distances.py | 52 +++++++++++-------- .../MDAnalysisTests/lib/test_distances.py | 33 ++++-------- 3 files changed, 45 insertions(+), 47 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 52a10a856e9..08234a6cb6e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -20,9 +20,12 @@ The rules for this file: Enhancements + + * Modified topology.guessers.guess_bonds to automatically select the + fastest method for guessing bonds using + lib.distance.self_capped_distance (PR # 2006) * Added lib.distances.self_capped_distance to internally select the - optimized method for distance evaluations. Modified topology.guessers.guess_bonds - for fast bonds guessing. (PR #2006) + optimized method for distance evaluations of coordinates with itself. (PR # 2006) * Added augment functionality to create relevant images of particles in the vicinity of central cell to handle periodic boundary conditions (PR #1977) diff --git a/package/MDAnalysis/lib/distances.py b/package/MDAnalysis/lib/distances.py index 0664865ca69..1cb86147235 100644 --- a/package/MDAnalysis/lib/distances.py +++ b/package/MDAnalysis/lib/distances.py @@ -526,22 +526,21 @@ def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, return methods[method] if len(reference) > 5000 and len(configuration) > 5000: - if box is None and reference.shape[0] != 3 and configuration.shape[0] != 3: + if box is None: min_dim = np.array([reference.min(axis=0), configuration.min(axis=0)]) max_dim = np.array([reference.max(axis=0), configuration.max(axis=0)]) size = max_dim.max(axis=0) - min_dim.min(axis=0) - elif box is not None: - if np.allclose(box[3:], 90): - size = box[:3] - else: - tribox = triclinic_vectors(box) - size = tribox.max(axis=0) - tribox.min(axis=0) - - if (np.any(size < 10.0*max_cutoff) and + elif np.allclose(box[3:], 90): + size = box[:3] + else: + tribox = triclinic_vectors(box) + size = tribox.max(axis=0) - tribox.min(axis=0) + + if ((np.any(size < 10.0*max_cutoff) and len(reference) > 100000 and - len(configuration) > 100000): + len(configuration) > 100000)): return methods['bruteforce'] else: return methods['pkdtree'] @@ -644,6 +643,7 @@ def _pkdtree_capped(reference, configuration, max_cutoff, distances.append(dist[num]) return pairs, distances + def self_capped_distance(reference, max_cutoff, min_cutoff=None, box=None, method=None): """Finds all the pairs and respective distances within a specified cutoff @@ -679,7 +679,7 @@ def self_capped_distance(reference, max_cutoff, min_cutoff=None, Returns ------- pairs : array - Pair of indices such that distance between them is + Pair of indices such that distance between them is within the ``max_cutoff`` and ``min_cutoff`` distances : array Distances corresponding to each pair of indices. @@ -697,7 +697,7 @@ def self_capped_distance(reference, max_cutoff, min_cutoff=None, ----- Currently only supports brute force and Periodic KDtree - .. SeeAlso:: :func:'MDAnalysis.lib.distances.distance_array' + .. SeeAlso:: :func:'MDAnalysis.lib.distances.self_distance_array' .. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree' """ if box is not None: @@ -713,6 +713,7 @@ def self_capped_distance(reference, max_cutoff, min_cutoff=None, return np.asarray(pairs), np.asarray(dist) + def _determine_method_self(reference, max_cutoff, min_cutoff=None, box=None, method=None): """ @@ -758,24 +759,24 @@ def _determine_method_self(reference, max_cutoff, min_cutoff=None, return methods[method] if len(reference) > 5000: - if box is None and reference.shape[0] != 3: + if box is None: min_dim = np.array([reference.min(axis=0)]) max_dim = np.array([reference.max(axis=0)]) size = max_dim.max(axis=0) - min_dim.min(axis=0) - elif box is not None: - if np.allclose(box[3:], 90): - size = box[:3] - else: - tribox = triclinic_vectors(box) - size = tribox.max(axis=0) - tribox.min(axis=0) - - if (np.any(size < 10.0*max_cutoff) and - len(reference) > 100000): + elif np.allclose(box[3:], 90): + size = box[:3] + else: + tribox = triclinic_vectors(box) + size = tribox.max(axis=0) - tribox.min(axis=0) + + if ((np.any(size < 10.0*max_cutoff) and + (len(reference) > 100000))): return methods['bruteforce'] else: return methods['pkdtree'] return methods['bruteforce'] + def _bruteforce_capped_self(reference, max_cutoff, min_cutoff=None, box=None): """Finds all the pairs among the *reference* coordinates within @@ -800,6 +801,9 @@ def _bruteforce_capped_self(reference, max_cutoff, min_cutoff=None, if reference.shape == (3, ): reference = reference[None, :] for i, coords in enumerate(reference): + # Each pair of atoms needs to be checked only once. + # Only calculate distance for atomA and atomB + # if atomidA < atomidB dist = distance_array(coords[None, :], reference[i+1:], box=box)[0] @@ -808,6 +812,8 @@ def _bruteforce_capped_self(reference, max_cutoff, min_cutoff=None, else: idx = np.where((dist < max_cutoff))[0] for other_idx in idx: + # Actual atomid for atomB + # can be direclty obtained in this way j = other_idx + 1 + i pairs.append((i, j)) distance.append(dist[other_idx]) @@ -837,7 +843,7 @@ def _pkdtree_capped_self(reference, max_cutoff, min_cutoff=None, reference = np.asarray(reference, dtype=np.float32) if reference.shape == (3, ): reference = reference[None, :] - + pairs, distance = [], [] kdtree = PeriodicKDTree(box=box) cut = max_cutoff if box is not None else None diff --git a/testsuite/MDAnalysisTests/lib/test_distances.py b/testsuite/MDAnalysisTests/lib/test_distances.py index b8d70471f03..38f634844e3 100644 --- a/testsuite/MDAnalysisTests/lib/test_distances.py +++ b/testsuite/MDAnalysisTests/lib/test_distances.py @@ -135,6 +135,9 @@ def test_self_capped_distance(npoints, box, method, min_cutoff): found_distance.append(dist[other_idx]) assert_equal(len(pairs), len(found_pairs)) +@pytest.mark.parametrize('box', (None, + np.array([1, 1, 1, 90, 90, 90], dtype=np.float32), + np.array([1, 1, 1, 60, 75, 80], dtype=np.float32))) @pytest.mark.parametrize('npoints,cutoff,meth', [(1, 0.02, '_bruteforce_capped_self'), (1, 0.2, '_bruteforce_capped_self'), @@ -142,22 +145,17 @@ def test_self_capped_distance(npoints, box, method, min_cutoff): (6000, 0.2, '_pkdtree_capped_self'), (200000, 0.02, '_pkdtree_capped_self'), (200000, 0.2, '_bruteforce_capped_self')]) -def test_method_selfselection(npoints, cutoff, meth): +def test_method_selfselection(box, npoints, cutoff, meth): np.random.seed(90003) - box = np.array([1, 1, 1, 90, 90, 90], dtype=np.float32) points = (np.random.uniform(low=0, high=1.0, - size=(npoints, 3)) * (box[:3])).astype(np.float32) - if box is not None: - boxtype = mda.lib.distances._box_check(box) - # Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]] - if (boxtype == 'tri_box'): - box = triclinic_vectors(box) - if (boxtype == 'tri_vecs_bad'): - box = triclinic_vectors(triclinic_box(box[0], box[1], box[2])) + size=(npoints, 3))).astype(np.float32) method = mda.lib.distances._determine_method_self(points, cutoff, box=box) assert_equal(method.__name__, meth) +@pytest.mark.parametrize('box', (None, + np.array([1, 1, 1, 90, 90, 90], dtype=np.float32), + np.array([1, 1, 1, 60, 75, 80], dtype=np.float32))) @pytest.mark.parametrize('npoints,cutoff,meth', [(1, 0.02, '_bruteforce_capped'), (1, 0.2, '_bruteforce_capped'), @@ -165,20 +163,11 @@ def test_method_selfselection(npoints, cutoff, meth): (6000, 0.2, '_pkdtree_capped'), (200000, 0.02, '_pkdtree_capped'), (200000, 0.2, '_bruteforce_capped')]) -def test_method_selection(npoints, cutoff, meth): +def test_method_selection(box, npoints, cutoff, meth): np.random.seed(90003) - box = np.array([1, 1, 1, 90, 90, 90], dtype=np.float32) points = (np.random.uniform(low=0, high=1.0, - size=(npoints, 3)) * (box[:3])).astype(np.float32) - if box is not None: - boxtype = mda.lib.distances._box_check(box) - # Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]] - if (boxtype == 'tri_box'): - box = triclinic_vectors(box) - if (boxtype == 'tri_vecs_bad'): - box = triclinic_vectors(triclinic_box(box[0], box[1], box[2])) - method = mda.lib.distances._determine_method(points, points, - cutoff, box=box) + size=(npoints, 3)).astype(np.float32)) + method = mda.lib.distances._determine_method(points, points, cutoff, box=box) assert_equal(method.__name__, meth)