diff --git a/package/CHANGELOG b/package/CHANGELOG index 05e9b078d1c..08234a6cb6e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -20,6 +20,12 @@ The rules for this file: Enhancements + + * Modified topology.guessers.guess_bonds to automatically select the + fastest method for guessing bonds using + lib.distance.self_capped_distance (PR # 2006) + * Added lib.distances.self_capped_distance to internally select the + optimized method for distance evaluations of coordinates with itself. (PR # 2006) * Added augment functionality to create relevant images of particles in the vicinity of central cell to handle periodic boundary conditions (PR #1977) diff --git a/package/MDAnalysis/lib/distances.py b/package/MDAnalysis/lib/distances.py index 57a5625b074..1cb86147235 100644 --- a/package/MDAnalysis/lib/distances.py +++ b/package/MDAnalysis/lib/distances.py @@ -59,6 +59,8 @@ .. autofunction:: calc_angles(atom1, atom2, atom3 [,box [, result [, backend]]]) .. autofunction:: calc_dihedrals(atom1, atom2, atom3, atom4 [,box [, result [, backend]]]) .. autofunction:: apply_PBC(coordinates, box [, backend]) +.. autofunction:: capped_distance(reference, configuration, max_cutoff [, min_cutoff [, box [, method]]]) +.. autofunction:: self_capped_distance(reference, max_cutoff, [, min_cutoff [, box [, method]]]) .. autofunction:: transform_RtoS(coordinates, box [, backend]) .. autofunction:: transform_StoR(coordinates, box [,backend]) .. autofunction:: MDAnalysis.lib._augment.augment_coordinates(coordinates, box, radius) @@ -400,8 +402,9 @@ def self_distance_array(reference, box=None, result=None, backend="serial"): return distances -def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, box=None, method=None): - """Calculates the pairs and distance within a specified distance +def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, + box=None, method=None): + """Calculates the pairs and distances within a specified distance If a *box* is supplied, then a minimum image convention is used to evaluate the distances. @@ -460,7 +463,6 @@ def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, box=N .. SeeAlso:: :func:'MDAnalysis.lib.distances.distance_array' .. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree' - """ if box is not None: if box.shape[0] != 6: @@ -483,12 +485,38 @@ def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, All the rules to select the method based on the input can be incorporated here. + Parameters + ---------- + reference : array + reference coordinates array with shape ``reference.shape = (3,)`` + or ``reference.shape = (len(reference), 3)`` + configuration : array + Configuration coordinate array with shape ``reference.shape = (3,)`` + or ``reference.shape = (len(reference), 3)`` + max_cutoff : float + Maximum cutoff distance between the reference and configuration + min_cutoff : (optional) float + Minimum cutoff distance between reference and configuration [None] + box : (optional) array or None + The dimensions, if provided, must be provided in the same + The unitcell dimesions for this system format as returned + by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: + ``[lx,ly, lz, alpha, beta, gamma]``. Minimum image convention + is applied if the box is provided [None] + method : (optional) 'bruteforce' or 'pkdtree' or 'None' + Keyword to override the automatic guessing of method built-in + in the function [None] + Returns ------- - Function object based on the rules and specified method - Currently implemented methods are - bruteforce : returns ``_bruteforce_capped`` - PKDtree : return ``_pkdtree_capped` + Method : Function object + Returns function object based on the rules and specified method + + Note + ---- + Currently implemented methods are present in the ``methods`` dictionary + bruteforce : returns ``_bruteforce_capped`` + PKDtree : return ``_pkdtree_capped` """ methods = {'bruteforce': _bruteforce_capped, @@ -498,37 +526,49 @@ def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, return methods[method] if len(reference) > 5000 and len(configuration) > 5000: - if box is None and reference.shape[0] != 3 and configuration.shape[0] != 3: + if box is None: min_dim = np.array([reference.min(axis=0), configuration.min(axis=0)]) max_dim = np.array([reference.max(axis=0), configuration.max(axis=0)]) size = max_dim.max(axis=0) - min_dim.min(axis=0) - elif box is not None: - if np.allclose(box[3:], 90): - size = box[:3] - else: - tribox = triclinic_vectors(box) - size = tribox.max(axis=0) - tribox.min(axis=0) - - if (np.any(size < 10.0*max_cutoff) and + elif np.allclose(box[3:], 90): + size = box[:3] + else: + tribox = triclinic_vectors(box) + size = tribox.max(axis=0) - tribox.min(axis=0) + + if ((np.any(size < 10.0*max_cutoff) and len(reference) > 100000 and - len(configuration) > 100000): + len(configuration) > 100000)): return methods['bruteforce'] else: return methods['pkdtree'] return methods['bruteforce'] -def _bruteforce_capped(reference, configuration, max_cutoff, min_cutoff=None, box=None): - """ - Using naive distance calulations, returns a list +def _bruteforce_capped(reference, configuration, max_cutoff, + min_cutoff=None, box=None): + """Internal method for bruteforce calculations + + Uses naive distance calulations and returns a list containing the indices with one from each reference and configuration arrays, such that the distance between them is less than the specified cutoff distance + + Returns + ------- + pairs : list + List of ``[(i, j)]`` pairs such that atom-index ``i`` is + from reference and ``j`` from configuration array + distance: list + Distance between ``reference[i]`` and ``configuration[j]`` + atom coordinate + """ pairs, distance = [], [] + reference = np.asarray(reference, dtype=np.float32) configuration = np.asarray(configuration, dtype=np.float32) @@ -543,18 +583,21 @@ def _bruteforce_capped(reference, configuration, max_cutoff, min_cutoff=None, bo for i, coords in enumerate(reference): dist = distance_array(coords[None, :], configuration, box=box)[0] if min_cutoff is not None: - idx = np.where((dist <= max_cutoff) & (dist > min_cutoff))[0] + idx = np.where((dist < max_cutoff) & (dist > min_cutoff))[0] else: - idx = np.where((dist <= max_cutoff))[0] + idx = np.where((dist < max_cutoff))[0] for j in idx: pairs.append((i, j)) distance.append(dist[j]) return pairs, distance -def _pkdtree_capped(reference, configuration, max_cutoff, min_cutoff=None, box=None): +def _pkdtree_capped(reference, configuration, max_cutoff, + min_cutoff=None, box=None): """ Capped Distance evaluations using KDtree. + Uses minimum image convention if *box* is specified + Returns: -------- pairs : list @@ -564,6 +607,7 @@ def _pkdtree_capped(reference, configuration, max_cutoff, min_cutoff=None, box=N distance : list Distance between two atoms corresponding to the (i, j) indices in pairs. + """ from .pkdtree import PeriodicKDTree @@ -600,6 +644,220 @@ def _pkdtree_capped(reference, configuration, max_cutoff, min_cutoff=None, box=N return pairs, distances +def self_capped_distance(reference, max_cutoff, min_cutoff=None, + box=None, method=None): + """Finds all the pairs and respective distances within a specified cutoff + for a configuration *reference* + + If a *box* is supplied, then a minimum image convention is used + to evaluate the distances. + + An automatic guessing of optimized method to calculate the distances is + included in the function. An optional keyword for the method is also + provided. Users can override the method with this functionality. + Currently pkdtree and bruteforce are implemented. + + Parameters + ----------- + reference : array + reference coordinates array with shape ``reference.shape = (3,)`` + or ``reference.shape = (len(reference), 3)`` + max_cutoff : float + Maximum cutoff distance to check the neighbors with itself + min_cutoff : (optional) float + Minimum cutoff distance [None] + box : (optional) array or None + The dimensions, if provided, must be provided in the same + The unitcell dimesions for this system format as returned + by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: + ``[lx,ly, lz, alpha, beta, gamma]``. Minimum image convention + is applied if the box is provided [None] + method : (optional) 'bruteforce' or 'pkdtree' or 'None' + Keyword to override the automatic guessing of method built-in + in the function [None] + + Returns + ------- + pairs : array + Pair of indices such that distance between them is + within the ``max_cutoff`` and ``min_cutoff`` + distances : array + Distances corresponding to each pair of indices. + d[k] corresponding to the pairs[i,j] gives the distance between + i-th and j-th coordinate in reference + + .. code-block:: python + + pairs, distances = self_capped_distances(reference, max_cutoff) + for indx, [a,b] in enumerate(pairs): + coord1, coords2 = reference[a], reference[b] + distance = distances[indx] + + Note + ----- + Currently only supports brute force and Periodic KDtree + + .. SeeAlso:: :func:'MDAnalysis.lib.distances.self_distance_array' + .. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree' + """ + if box is not None: + if box.shape[0] != 6: + raise ValueError('Box Argument is of incompatible type. The dimension' + 'should be either None or ' + 'of the type [lx, ly, lz, alpha, beta, gamma]') + method = _determine_method_self(reference, max_cutoff, + min_cutoff=min_cutoff, + box=box, method=method) + pairs, dist = method(reference, max_cutoff, + min_cutoff=min_cutoff, box=box) + + return np.asarray(pairs), np.asarray(dist) + + +def _determine_method_self(reference, max_cutoff, min_cutoff=None, + box=None, method=None): + """ + Switch between different methods based on the the optimized time. + All the rules to select the method based on the input can be + incorporated here. + + Parameters + ---------- + reference : array + reference coordinates array with shape ``reference.shape = (3,)`` + or ``reference.shape = (len(reference), 3)`` + max_cutoff : float + Maximum cutoff distance + min_cutoff : (optional) float + Minimum cutoff distance [None] + box : (optional) array or None + The dimensions, if provided, must be provided in the same + The unitcell dimesions for this system format as returned + by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: + ``[lx,ly, lz, alpha, beta, gamma]``. Minimum image convention + is applied if the box is provided [None] + method : (optional) 'bruteforce' or 'pkdtree' or 'None' + Keyword to override the automatic guessing of method built-in + in the function [None] + + Returns + ------- + Method : Function object + Returns function object based on the rules and specified method + + Note + ---- + Currently implemented methods are present in the ``methods`` dictionary + bruteforce : returns ``_bruteforce_capped_self`` + PKDtree : return ``_pkdtree_capped_self`` + + """ + methods = {'bruteforce': _bruteforce_capped_self, + 'pkdtree': _pkdtree_capped_self} + + if method is not None: + return methods[method] + + if len(reference) > 5000: + if box is None: + min_dim = np.array([reference.min(axis=0)]) + max_dim = np.array([reference.max(axis=0)]) + size = max_dim.max(axis=0) - min_dim.min(axis=0) + elif np.allclose(box[3:], 90): + size = box[:3] + else: + tribox = triclinic_vectors(box) + size = tribox.max(axis=0) - tribox.min(axis=0) + + if ((np.any(size < 10.0*max_cutoff) and + (len(reference) > 100000))): + return methods['bruteforce'] + else: + return methods['pkdtree'] + return methods['bruteforce'] + + +def _bruteforce_capped_self(reference, max_cutoff, min_cutoff=None, + box=None): + """Finds all the pairs among the *reference* coordinates within + a fixed distance using brute force method + + Internal method using brute force method to evaluate all the pairs + of atoms within a fixed distance. + + Returns + ------- + pairs : array + Arrray of ``[i, j]`` pairs such that atom-index ``i`` + and ``j`` from reference array are within the fixed distance + distance: array + Distance between ``reference[i]`` and ``reference[j]`` + atom coordinate + + """ + pairs, distance = [], [] + + reference = np.asarray(reference, dtype=np.float32) + if reference.shape == (3, ): + reference = reference[None, :] + for i, coords in enumerate(reference): + # Each pair of atoms needs to be checked only once. + # Only calculate distance for atomA and atomB + # if atomidA < atomidB + dist = distance_array(coords[None, :], reference[i+1:], + box=box)[0] + + if min_cutoff is not None: + idx = np.where((dist < max_cutoff) & (dist > min_cutoff))[0] + else: + idx = np.where((dist < max_cutoff))[0] + for other_idx in idx: + # Actual atomid for atomB + # can be direclty obtained in this way + j = other_idx + 1 + i + pairs.append((i, j)) + distance.append(dist[other_idx]) + return np.asarray(pairs), np.asarray(distance) + + +def _pkdtree_capped_self(reference, max_cutoff, min_cutoff=None, + box=None): + """Finds all the pairs among the coordinates within a fixed distance + using PeriodicKDTree + + Internal method using PeriodicKDTree method to evaluate all the pairs + of atoms within a fixed distance. + + Returns + ------- + pairs : array + Array of ``[(i, j)]`` pairs such that atom-index ``i`` + and ``j`` from reference array are within the fixed distance + distance: array + Distance between ``reference[i]`` and ``reference[j]`` + atom coordinate + + """ + from .pkdtree import PeriodicKDTree + + reference = np.asarray(reference, dtype=np.float32) + if reference.shape == (3, ): + reference = reference[None, :] + + pairs, distance = [], [] + kdtree = PeriodicKDTree(box=box) + cut = max_cutoff if box is not None else None + kdtree.set_coords(reference, cutoff=cut) + pairs = kdtree.search_pairs(max_cutoff) + if pairs.size > 0: + refA, refB = pairs[:, 0], pairs[:, 1] + distance = calc_bonds(reference[refA], reference[refB], box=box) + if min_cutoff is not None: + mask = np.where(distance > min_cutoff)[0] + pairs, distance = pairs[mask], distance[mask] + return np.asarray(pairs), np.asarray(distance) + + def transform_RtoS(inputcoords, box, backend="serial"): """Transform an array of coordinates from real space to S space (aka lambda space) diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py index 06689168079..bed9cca8b05 100644 --- a/package/MDAnalysis/topology/guessers.py +++ b/package/MDAnalysis/topology/guessers.py @@ -232,25 +232,14 @@ def guess_bonds(atoms, coords, box=None, **kwargs): bonds = [] - for i, atom in enumerate(atoms[:-1]): - vdw_i = vdwradii[atomtypes[i]] - max_d = (vdw_i + max_vdw) * fudge_factor - - # using self_distance_array scales O(n^2) - # 20,000 atoms = 1.6 Gb memory - dist = distances.distance_array(coords[i][None, :], coords[i + 1:], - box=box)[0] - idx = np.where((dist > lower_bound) & (dist <= max_d))[0] - - for a in idx: - j = i + 1 + a - atom_j = atoms[j] - - if dist[a] < (vdw_i + vdwradii[atomtypes[j]]) * fudge_factor: - # because of method used, same bond won't be seen twice, - # so don't need to worry about duplicates - bonds.append((atom.index, atom_j.index)) - + pairs, dist = distances.self_capped_distance(coords, + max_cutoff=2.0*max_vdw, + min_cutoff=lower_bound, + box=box) + for idx, (i, j) in enumerate(pairs): + d = (vdwradii[atomtypes[i]] + vdwradii[atomtypes[j]])*fudge_factor + if (dist[idx] < d): + bonds.append((atoms[i].index, atoms[j].index)) return tuple(bonds) diff --git a/testsuite/MDAnalysisTests/lib/test_distances.py b/testsuite/MDAnalysisTests/lib/test_distances.py index 37133cb5abc..38f634844e3 100644 --- a/testsuite/MDAnalysisTests/lib/test_distances.py +++ b/testsuite/MDAnalysisTests/lib/test_distances.py @@ -106,7 +106,56 @@ def test_capped_distance_checkbrute(npoints, box, query, method, min_cutoff): assert_equal(np.sort(found_pairs, axis=0), np.sort(indices[1], axis=0)) +@pytest.mark.parametrize('npoints', npoints_1) +@pytest.mark.parametrize('box', boxes_1) +@pytest.mark.parametrize('method', method_1) +@pytest.mark.parametrize('min_cutoff', min_cutoff_1) +def test_self_capped_distance(npoints, box, method, min_cutoff): + np.random.seed(90003) + points = (np.random.uniform(low=0, high=1.0, + size=(npoints, 3))*(boxes_1[0][:3])).astype(np.float32) + max_cutoff = 0.1 + pairs, distance = mda.lib.distances.self_capped_distance(points, + max_cutoff, + min_cutoff=min_cutoff, + box=box, + method=method) + found_pairs, found_distance = [], [] + for i, coord in enumerate(points): + dist = mda.lib.distances.distance_array(coord[None, :], + points[i+1:], + box=box) + if min_cutoff is not None: + idx = np.where((dist < max_cutoff) & (dist > min_cutoff))[0] + else: + idx = np.where((dist < max_cutoff))[0] + for other_idx in idx: + j = other_idx + 1 + i + found_pairs.append((i, j)) + found_distance.append(dist[other_idx]) + assert_equal(len(pairs), len(found_pairs)) + +@pytest.mark.parametrize('box', (None, + np.array([1, 1, 1, 90, 90, 90], dtype=np.float32), + np.array([1, 1, 1, 60, 75, 80], dtype=np.float32))) +@pytest.mark.parametrize('npoints,cutoff,meth', + [(1, 0.02, '_bruteforce_capped_self'), + (1, 0.2, '_bruteforce_capped_self'), + (6000, 0.02, '_pkdtree_capped_self'), + (6000, 0.2, '_pkdtree_capped_self'), + (200000, 0.02, '_pkdtree_capped_self'), + (200000, 0.2, '_bruteforce_capped_self')]) +def test_method_selfselection(box, npoints, cutoff, meth): + np.random.seed(90003) + points = (np.random.uniform(low=0, high=1.0, + size=(npoints, 3))).astype(np.float32) + method = mda.lib.distances._determine_method_self(points, cutoff, box=box) + assert_equal(method.__name__, meth) + +@pytest.mark.parametrize('box', (None, + np.array([1, 1, 1, 90, 90, 90], dtype=np.float32), + np.array([1, 1, 1, 60, 75, 80], dtype=np.float32))) @pytest.mark.parametrize('npoints,cutoff,meth', [(1, 0.02, '_bruteforce_capped'), (1, 0.2, '_bruteforce_capped'), @@ -114,20 +163,11 @@ def test_capped_distance_checkbrute(npoints, box, query, method, min_cutoff): (6000, 0.2, '_pkdtree_capped'), (200000, 0.02, '_pkdtree_capped'), (200000, 0.2, '_bruteforce_capped')]) -def test_method_selection(npoints, cutoff, meth): +def test_method_selection(box, npoints, cutoff, meth): np.random.seed(90003) - box = np.array([1, 1, 1, 90, 90, 90], dtype=np.float32) points = (np.random.uniform(low=0, high=1.0, - size=(npoints, 3)) * (box[:3])).astype(np.float32) - if box is not None: - boxtype = mda.lib.distances._box_check(box) - # Convert [A,B,C,alpha,beta,gamma] to [[A],[B],[C]] - if (boxtype == 'tri_box'): - box = triclinic_vectors(box) - if (boxtype == 'tri_vecs_bad'): - box = triclinic_vectors(triclinic_box(box[0], box[1], box[2])) - method = mda.lib.distances._determine_method(points, points, - cutoff, box=box) + size=(npoints, 3)).astype(np.float32)) + method = mda.lib.distances._determine_method(points, points, cutoff, box=box) assert_equal(method.__name__, meth) diff --git a/testsuite/MDAnalysisTests/topology/test_guessers.py b/testsuite/MDAnalysisTests/topology/test_guessers.py index 574d8bd7a45..7bbb585bf22 100644 --- a/testsuite/MDAnalysisTests/topology/test_guessers.py +++ b/testsuite/MDAnalysisTests/topology/test_guessers.py @@ -25,11 +25,13 @@ from numpy.testing import assert_equal import numpy as np +import MDAnalysis as mda from MDAnalysis.topology import guessers from MDAnalysis.core.topologyattrs import Angles from MDAnalysisTests import make_Universe from MDAnalysisTests.core.test_fragments import make_starshape +from MDAnalysisTests.datafiles import two_water_gro class TestGuessMasses(object): @@ -99,3 +101,12 @@ def test_guess_impropers(): vals = guessers.guess_improper_dihedrals(ag.angles) assert_equal(len(vals), 12) + + +def test_guess_bonds(): + u = mda.Universe(two_water_gro) + bonds = guessers.guess_bonds(u.atoms, u.atoms.positions, u.dimensions) + assert_equal(bonds, ((0, 1), + (0, 2), + (3, 4), + (3, 5)))