diff --git a/.appveyor.yml b/.appveyor.yml index b21008db6c9..7669c2e4009 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -7,13 +7,6 @@ max_jobs: 100 cache: - '%LOCALAPPDATA%\pip\Cache' -matrix: - # FIXME: allow all Windows builds - # to fail for now; remove this - # when we have full Windows compatibility - allow_failures: - - PYTHON_ARCH: 64 - image: - Visual Studio 2015 diff --git a/package/.coveragerc b/package/.coveragerc new file mode 100644 index 00000000000..37388f00129 --- /dev/null +++ b/package/.coveragerc @@ -0,0 +1,2 @@ +[run] +plugins = Cython.Coverage \ No newline at end of file diff --git a/package/CHANGELOG b/package/CHANGELOG index 1fb8fee83a1..be3d5df0743 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -14,12 +14,18 @@ The rules for this file: ------------------------------------------------------------------------------ ??/??/18 tylerjereddy, richardjgowers, palnabarun, orbeckst, kain88-de, zemanj, - VOD555, davidercruz, jbarnoud, ayushsuhane, hfmull, micaela-matta + VOD555, davidercruz, jbarnoud, ayushsuhane, hfmull, micaela-matta, + sebastien.buchoux * 0.18.1 Enhancements + * Added a wrapper of lib.nsgrid in lib.distances.self_capped_distance + and lib.distances.capped_distanceto automatically chose the fastest + method for distance based calculations. (PR #2008) + * Added Grid search functionality in lib.nsgrid for faster distance based + calculations. (PR #2008) * Modified around selections to work with KDTree and periodic boundary conditions. Should reduce memory usage (#974 PR #2022) * Modified topology.guessers.guess_bonds to automatically select the diff --git a/package/MDAnalysis/lib/__init__.py b/package/MDAnalysis/lib/__init__.py index 51c148dde65..37ebc2f811c 100644 --- a/package/MDAnalysis/lib/__init__.py +++ b/package/MDAnalysis/lib/__init__.py @@ -29,7 +29,7 @@ from __future__ import absolute_import __all__ = ['log', 'transformations', 'util', 'mdamath', 'distances', - 'NeighborSearch', 'formats', 'pkdtree'] + 'NeighborSearch', 'formats', 'pkdtree', 'nsgrid'] from . import log from . import transformations @@ -39,3 +39,4 @@ from . import NeighborSearch from . import formats from . import pkdtree +from . import nsgrid \ No newline at end of file diff --git a/package/MDAnalysis/lib/distances.py b/package/MDAnalysis/lib/distances.py index 1cb86147235..71608a72fca 100644 --- a/package/MDAnalysis/lib/distances.py +++ b/package/MDAnalysis/lib/distances.py @@ -462,7 +462,8 @@ def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, Currently only supports brute force and Periodic KDtree .. SeeAlso:: :func:'MDAnalysis.lib.distances.distance_array' - .. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree' + .. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree.search' + .. SeeAlso:: :class:'MDAnalysis.lib.nsgrid.FastNS.search' """ if box is not None: if box.shape[0] != 6: @@ -517,10 +518,12 @@ def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, Currently implemented methods are present in the ``methods`` dictionary bruteforce : returns ``_bruteforce_capped`` PKDtree : return ``_pkdtree_capped` + NSGrid : return ``_nsgrid_capped` """ methods = {'bruteforce': _bruteforce_capped, - 'pkdtree': _pkdtree_capped} + 'pkdtree': _pkdtree_capped, + 'nsgrid': _nsgrid_capped} if method is not None: return methods[method] @@ -568,7 +571,6 @@ def _bruteforce_capped(reference, configuration, max_cutoff, """ pairs, distance = [], [] - reference = np.asarray(reference, dtype=np.float32) configuration = np.asarray(configuration, dtype=np.float32) @@ -644,6 +646,77 @@ def _pkdtree_capped(reference, configuration, max_cutoff, return pairs, distances +def _nsgrid_capped(reference, configuration, max_cutoff, min_cutoff=None, + box=None): + """Search all the pairs in *reference* and *configuration* within + a specified distance using Grid Search + + + Parameters + ----------- + reference : array + reference coordinates array with shape ``reference.shape = (3,)`` + or ``reference.shape = (len(reference), 3)``. + configuration : array + Configuration coordinate array with shape ``reference.shape = (3,)`` + or ``reference.shape = (len(reference), 3)`` + max_cutoff : float + Maximum cutoff distance between the reference and configuration + min_cutoff : (optional) float + Minimum cutoff distance between reference and configuration [None] + box : array + The dimensions, if provided, must be provided in the same + The unitcell dimesions for this system format as returned + by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: + ``[lx,ly, lz, alpha, beta, gamma]``. Minimum image convention + is applied if the box is provided + + """ + from .nsgrid import FastNS + if reference.shape == (3, ): + reference = reference[None, :] + if configuration.shape == (3, ): + configuration = configuration[None, :] + + if box is None: + # create a pseudobox + # define the max range + # and supply the pseudobox + # along with only one set of coordinates + pseudobox = np.zeros(6, dtype=np.float32) + all_coords = np.concatenate([reference, configuration]) + lmax = all_coords.max(axis=0) + lmin = all_coords.min(axis=0) + # Using maximum dimension as the box size + boxsize = (lmax-lmin).max() + # to avoid failures of very close particles + # but with larger cutoff + if boxsize < 2*max_cutoff: + # just enough box size so that NSGrid doesnot fails + sizefactor = 2.2*max_cutoff/boxsize + else: + sizefactor = 1.2 + pseudobox[:3] = sizefactor*boxsize + pseudobox[3:] = 90. + shiftref, shiftconf = reference.copy(), configuration.copy() + # Extra padding near the origin + shiftref -= lmin - 0.1*boxsize + shiftconf -= lmin - 0.1*boxsize + gridsearch = FastNS(max_cutoff, shiftconf, box=pseudobox, pbc=False) + results = gridsearch.search(shiftref) + else: + gridsearch = FastNS(max_cutoff, configuration, box=box) + results = gridsearch.search(reference) + + pairs = results.get_pairs() + pair_distance = results.get_pair_distances() + + if min_cutoff is not None: + idx = pair_distance > min_cutoff + pairs, pair_distance = pairs[idx], pair_distance[idx] + return pairs, pair_distance + + def self_capped_distance(reference, max_cutoff, min_cutoff=None, box=None, method=None): """Finds all the pairs and respective distances within a specified cutoff @@ -695,10 +768,11 @@ def self_capped_distance(reference, max_cutoff, min_cutoff=None, Note ----- - Currently only supports brute force and Periodic KDtree + Currently only supports brute force, Periodic KDtree and Grid Search .. SeeAlso:: :func:'MDAnalysis.lib.distances.self_distance_array' - .. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree' + .. SeeAlso:: :func:'MDAnalysis.lib.pkdtree.PeriodicKDTree.search' + .. SeeAlso:: :func:'MDAnalysis.lib.nsgrid.FastNS.self_search' """ if box is not None: if box.shape[0] != 6: @@ -750,10 +824,12 @@ def _determine_method_self(reference, max_cutoff, min_cutoff=None, Currently implemented methods are present in the ``methods`` dictionary bruteforce : returns ``_bruteforce_capped_self`` PKDtree : return ``_pkdtree_capped_self`` + NSGrid : return ``_nsgrid_capped_self`` """ methods = {'bruteforce': _bruteforce_capped_self, - 'pkdtree': _pkdtree_capped_self} + 'pkdtree': _pkdtree_capped_self, + 'nsgrid': _nsgrid_capped_self} if method is not None: return methods[method] @@ -858,6 +934,64 @@ def _pkdtree_capped_self(reference, max_cutoff, min_cutoff=None, return np.asarray(pairs), np.asarray(distance) +def _nsgrid_capped_self(reference, max_cutoff, min_cutoff=None, + box=None): + """Finds all the pairs among the *reference* coordinates within + a fixed distance using gridsearch + + Returns + ------- + pairs : array + Arrray of ``[i, j]`` pairs such that atom-index ``i`` + and ``j`` from reference array are within a fixed distance + distance: array + Distance between ``reference[i]`` and ``reference[j]`` + atom coordinate + + """ + from .nsgrid import FastNS + + reference = np.asarray(reference, dtype=np.float32) + if reference.shape == (3, ) or len(reference) == 1: + return [], [] + + if box is None: + # create a pseudobox + # define the max range + # and supply the pseudobox + # along with only one set of coordinates + pseudobox = np.zeros(6, dtype=np.float32) + lmax = reference.max(axis=0) + lmin = reference.min(axis=0) + # Using maximum dimension as the box size + boxsize = (lmax-lmin).max() + # to avoid failures of very close particles + # but with larger cutoff + if boxsize < 2*max_cutoff: + # just enough box size so that NSGrid doesnot fails + sizefactor = 2.2*max_cutoff/boxsize + else: + sizefactor = 1.2 + pseudobox[:3] = sizefactor*boxsize + pseudobox[3:] = 90. + shiftref = reference.copy() + # Extra padding near the origin + shiftref -= lmin - 0.1*boxsize + gridsearch = FastNS(max_cutoff, shiftref, box=pseudobox, pbc=False) + results = gridsearch.self_search() + else: + gridsearch = FastNS(max_cutoff, reference, box=box) + results = gridsearch.self_search() + + pairs = results.get_pairs()[::2, :] + pair_distance = results.get_pair_distances()[::2] + + if min_cutoff is not None: + idx = pair_distance > min_cutoff + pairs, pair_distance = pairs[idx], pair_distance[idx] + return pairs, pair_distance + + def transform_RtoS(inputcoords, box, backend="serial"): """Transform an array of coordinates from real space to S space (aka lambda space) diff --git a/package/MDAnalysis/lib/nsgrid.pyx b/package/MDAnalysis/lib/nsgrid.pyx new file mode 100644 index 00000000000..97222e2d9aa --- /dev/null +++ b/package/MDAnalysis/lib/nsgrid.pyx @@ -0,0 +1,996 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# +# Copyright (C) 2013-2018 Sébastien Buchoux +# Copyright (c) 2018 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v3 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + + +# cython: cdivision=True +# cython: boundscheck=False +# cython: initializedcheck=False +# cython: embedsignature=True + +""" +Neighbor search library --- :mod:`MDAnalysis.lib.nsgrid` +======================================================== + + +About the code +-------------- + +This Neighbor search library is a serialized Cython version greatly +inspired by the NS grid search implemented in +`GROMACS `_ . + +GROMACS 4.x code (more precisely +`nsgrid.c `_ +and `ns.c `_ ) +was used as reference to write this file. + +GROMACS 4.x code is released under the GNU Public Licence v2. + +About the algorithm +------------------- + +The neighbor search implemented here is based on +`cell lists `_ which allow +computation of pairs [#]_ with a cost of :math:`O(N)`, instead +of :math:`O(N^2)`. The basic algorithm is described in +Appendix F, Page 552 of +``Understanding Molecular Dynamics: From Algorithm to Applications`` by Frenkel and Smit. + +In brief, the algorithm divides the domain into smaller subdomains called `cells` +and distributes every particle to these cells based on their positions. Subsequently, +any distance based query first identifies the corresponding cell position in the +domain followed by distance evaluations within the identified cell and +neighboring cells only. Care must be taken to ensure that `cellsize` is +greater than the desired search distance, otherwise all of the neighbours might +not reflect in the results. + + +.. [#] a pair correspond to two particles that are considered as neighbors . + + +.. versionadded:: 0.19.0 +""" + +from MDAnalysis.lib.distances import _check_array +# Used to handle memory allocation +from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free +from libc.math cimport sqrt +import numpy as np +from libcpp.vector cimport vector +cimport numpy as np + +# Preprocessor DEFs +DEF DIM = 3 +DEF XX = 0 +DEF YY = 1 +DEF ZZ = 2 +DEF EPSILON = 1e-5 + +ctypedef np.int_t ns_int +ctypedef np.float32_t real +ctypedef real rvec[DIM] +ctypedef ns_int ivec[DIM] +ctypedef real matrix[DIM][DIM] + +ctypedef vector[ns_int] intvec +ctypedef vector[real] realvec + +# Useful Functions +cdef real rvec_norm2(const rvec a) nogil: + return a[XX]*a[XX] + a[YY]*a[YY] + a[ZZ]*a[ZZ] + +cdef void rvec_clear(rvec a) nogil: + a[XX] = 0.0 + a[YY] = 0.0 + a[ZZ] = 0.0 + +############################### +# Utility class to handle PBC # +############################### +cdef struct cPBCBox_t: + matrix box + rvec fbox_diag + rvec hbox_diag + rvec mhbox_diag + real max_cutoff2 + + +# Class to handle PBC calculations +cdef class PBCBox(object): + """ + Cython implementation of + `PBC-related `_ + operations. This class is used by classes :class:`FastNS` + and :class:`NSGrid` to put all particles inside a brick-shaped box + and to compute PBC-aware distance. The class can also handle + non-PBC aware distance evaluations through ``periodic`` argument. + + .. warning:: + This class is not meant to be used by end users. + + .. warning:: + Even if MD triclinic boxes can be handled by this class, + internal optimization is made based on the assumption that + particles are inside a brick-shaped box. When this is not + the case, calculated distances are not + warranted to be exact. + """ + + cdef cPBCBox_t c_pbcbox + cdef bint is_triclinic + cdef bint periodic + + def __init__(self, real[:, ::1] box, bint periodic): + """ + Parameters + ---------- + box : numpy.ndarray + box vectors of shape ``(3, 3)`` or + as returned by ``MDAnalysis.lib.mdamath.triclinic_vectors`` + ``dtype`` must be ``numpy.float32`` + periodic : boolean + ``True`` for PBC-aware calculations + ``False`` for non PBC aware calculations + """ + + self.periodic = periodic + self.update(box) + + cdef void fast_update(self, real[:, ::1] box) nogil: + """ + Updates the internal box parameters for + PBC-aware distance calculations. The internal + box parameters are used to define the brick-shaped + box which is eventually used for distance calculations. + + """ + cdef ns_int i, j + cdef real min_hv2, min_ss, tmp + + # Update matrix + self.is_triclinic = False + for i in range(DIM): + for j in range(DIM): + self.c_pbcbox.box[i][j] = box[i, j] + + if i != j: + if box[i, j] > EPSILON: + self.is_triclinic = True + + # Update diagonals + for i in range(DIM): + self.c_pbcbox.fbox_diag[i] = box[i, i] + self.c_pbcbox.hbox_diag[i] = self.c_pbcbox.fbox_diag[i] * 0.5 + self.c_pbcbox.mhbox_diag[i] = - self.c_pbcbox.hbox_diag[i] + + # Update maximum cutoff + + # Physical limitation of the cut-off + # by half the length of the shortest box vector. + min_hv2 = min(0.25 * rvec_norm2(&box[XX, XX]), 0.25 * rvec_norm2(&box[YY, XX])) + min_hv2 = min(min_hv2, 0.25 * rvec_norm2(&box[ZZ, XX])) + + # Limitation to the smallest diagonal element due to optimizations: + # checking only linear combinations of single box-vectors (2 in x) + # in the grid search and pbc_dx is a lot faster + # than checking all possible combinations. + tmp = box[YY, YY] + if box[ZZ, YY] < 0: + tmp -= box[ZZ, YY] + else: + tmp += box[ZZ, YY] + + min_ss = min(box[XX, XX], min(tmp, box[ZZ, ZZ])) + self.c_pbcbox.max_cutoff2 = min(min_hv2, min_ss * min_ss) + + def update(self, real[:, ::1] box): + """ + Updates internal MD box representation and parameters used for calculations. + + Parameters + ---------- + box : numpy.ndarray + Describes the MD box vectors as returned by + :func:`MDAnalysis.lib.mdamath.triclinic_vectors`. + `dtype` must be :class:`numpy.float32` + + Note + ---- + Call to this method is only needed when the MD box is changed + as it always called when class is instantiated. + + """ + + if box.shape[0] != DIM or box.shape[1] != DIM: + raise ValueError("Box must be a {} x {} matrix. Got: {} x {})".format( + DIM, DIM, box.shape[0], box.shape[1])) + if (box[XX, XX] == 0) or (box[YY, YY] == 0) or (box[ZZ, ZZ] == 0): + raise ValueError("Box does not correspond to PBC=xyz") + self.fast_update(box) + + cdef void fast_pbc_dx(self, rvec ref, rvec other, rvec dx) nogil: + """Dislacement between two points for both + PBC and non-PBC conditions + + Modifies the displacement vector between two points based + on the minimum image convention for PBC aware calculations. + + For non-PBC aware distance evaluations, calculates the + displacement vector without any modifications + """ + + cdef ns_int i, j + + for i in range(DIM): + dx[i] = other[i] - ref[i] + + if self.periodic: + for i in range(DIM-1, -1, -1): + while dx[i] > self.c_pbcbox.hbox_diag[i]: + for j in range(i, -1, -1): + dx[j] -= self.c_pbcbox.box[i][j] + + while dx[i] <= self.c_pbcbox.mhbox_diag[i]: + for j in range(i, -1, -1): + dx[j] += self.c_pbcbox.box[i][j] + + cdef real fast_distance2(self, rvec a, rvec b) nogil: + """Distance calculation between two points + for both PBC and non-PBC aware calculations + + Returns the distance obeying minimum + image convention if periodic is set to ``True`` while + instantiating the ``PBCBox`` object. + """ + + cdef rvec dx + self.fast_pbc_dx(a, b, dx) + return rvec_norm2(dx) + + cdef real[:, ::1]fast_put_atoms_in_bbox(self, real[:, ::1] coords) nogil: + """Shifts all ``coords`` to an orthogonal brick shaped box + + All the coordinates are brought into an orthogonal + box. The box vectors for the brick-shaped box + are defined in ``fast_update`` method. + + """ + + cdef ns_int i, m, d, natoms + cdef real[:, ::1] bbox_coords + + natoms = coords.shape[0] + with gil: + bbox_coords = coords.copy() + + if self.periodic: + if self.is_triclinic: + for i in range(natoms): + for m in range(DIM - 1, -1, -1): + while bbox_coords[i, m] < 0: + for d in range(m+1): + bbox_coords[i, d] += self.c_pbcbox.box[m][d] + while bbox_coords[i, m] >= self.c_pbcbox.box[m][m]: + for d in range(m+1): + bbox_coords[i, d] -= self.c_pbcbox.box[m][d] + else: + for i in range(natoms): + for m in range(DIM): + while bbox_coords[i, m] < 0: + bbox_coords[i, m] += self.c_pbcbox.box[m][m] + while bbox_coords[i, m] >= self.c_pbcbox.box[m][m]: + bbox_coords[i, m] -= self.c_pbcbox.box[m][m] + + return bbox_coords + +######################### +# Neighbor Search Stuff # +######################### + +cdef class NSResults(object): + """Class to store the results + + All the required outputs from :class:`FastNS` is stored in the + instance of this class. All the methods of :class:`FastNS` returns + an instance of this class, which can be used to generate the desired + results on demand. + """ + + cdef readonly real cutoff + cdef ns_int npairs + + cdef real[:, ::1] coords # shape: size, DIM + cdef real[:, ::1] searchcoords + + cdef vector[intvec] indices_buffer + cdef vector[realvec] distances_buffer + cdef vector[ns_int] pairs_buffer + cdef vector[real] pair_distances_buffer + cdef vector[real] pair_distances2_buffer + + def __init__(self, real cutoff, real[:, ::1]coords, real[:, ::1]searchcoords): + """ + Parameters + ---------- + cutoff : float + Specified cutoff distance + coords : numpy.ndarray + Array with coordinates of atoms of shape ``(N, 3)`` for + ``N`` particles. ``dtype`` must be ``numpy.float32`` + searchcoords : numpy.ndarray + Array with query coordinates. Shape must be ``(M, 3)`` + for ``M`` queries. ``dtype`` must be ``numpy.float32`` + """ + + self.cutoff = cutoff + self.coords = coords + self.searchcoords = searchcoords + + self.npairs = 0 + + cdef void add_neighbors(self, ns_int beadid_i, ns_int beadid_j, real distance2) nogil: + """Internal function to add pairs and distances to buffers + + The buffers populated using this method are used by + other methods of this class. This is the + primary function used by :class:`FastNS` to save all + the pair of atoms, + which are considered as neighbors. + """ + + self.pairs_buffer.push_back(beadid_i) + self.pairs_buffer.push_back(beadid_j) + self.pair_distances2_buffer.push_back(distance2) + self.npairs += 1 + + def get_pairs(self): + """Returns all the pairs within the desired cutoff distance + + Returns an array of shape ``(N, 2)``, where N is the number of pairs + between ``reference`` and ``configuration`` within the specified distance. + For every pair ``(i, j)``, ``reference[i]`` and ``configuration[j]`` are + atom positions such that ``reference`` is the position of query + atoms while ``configuration`` coontains the position of group of + atoms used to search against the query atoms. + + Returns + ------- + pairs : numpy.ndarray + pairs of atom indices of neighbors from query + and initial atom coordinates of shape ``(N, 2)`` + """ + + return np.asarray(self.pairs_buffer).reshape(self.npairs, 2) + + def get_pair_distances(self): + """Returns all the distances corresponding to each pair of neighbors + + Returns an array of shape ``N`` where N is the number of pairs + among the query atoms and initial atoms within a specified distance. + Every element ``[i]`` corresponds to the distance between + ``pairs[i, 0]`` and ``pairs[i, 1]``, where pairs is the array + obtained from ``get_pairs()`` + + Returns + ------- + distances : numpy.ndarray + distances between pairs of query and initial + atom coordinates of shape ``N`` + + See Also + -------- + MDAnalysis.lib.nsgrid.NSResults.get_pairs + + """ + + self.pair_distances_buffer = np.sqrt(self.pair_distances2_buffer) + return np.asarray(self.pair_distances_buffer) + + cdef void create_buffers(self) nogil: + """ + Creates buffers to get individual neighbour list and distances + of the query atoms. + + """ + + cdef ns_int i, beadid_i, beadid_j + cdef ns_int idx, nsearch + cdef real dist2, dist + + nsearch = len(self.searchcoords) + + self.indices_buffer = vector[intvec]() + self.distances_buffer = vector[realvec]() + + # initialize rows corresponding to search + for i in range(nsearch): + self.indices_buffer.push_back(intvec()) + self.distances_buffer.push_back(realvec()) + + for i in range(0, 2*self.npairs, 2): + beadid_i = self.pairs_buffer[i] + beadid_j = self.pairs_buffer[i + 1] + + dist2 = self.pair_distances2_buffer[i//2] + + self.indices_buffer[beadid_i].push_back(beadid_j) + + dist = sqrt(dist2) + + self.distances_buffer[beadid_i].push_back(dist) + + def get_indices(self): + """Individual neighbours of query atom + + For every queried atom ``i``, an array of all its neighbors + indices can be obtained from ``get_indices()[i]`` + + Returns + ------- + indices : list + Indices of neighboring atoms. + Every element i.e. ``indices[i]`` will be a list of + size ``m`` where m is the number of neighbours of + query atom[i]. + + .. code-block:: python + + results = NSResults() + indices = results.get_indices() + + ``indices[i]`` will output a list of neighboring + atoms of ``atom[i]`` from query atoms ``atom``. + ``indices[i][j]`` will give the atom-id of initial coordinates + such that ``initial_atom[indices[i][j]]`` is a neighbor of ``atom[i]`` + + """ + + if self.indices_buffer.empty(): + self.create_buffers() + return list(self.indices_buffer) + + def get_distances(self): + """Distance corresponding to individual neighbors of query atom + + For every queried atom ``i``, a list of all the distances + from its neighboring atoms can be obtained from ``get_distances()[i]``. + Every ``distance[i][j]`` will correspond + to the distance between atoms ``atom[i]`` from the query + atoms and ``atom[indices[j]]`` from the initialized + set of coordinates, where ``indices`` can be obtained + by ``get_indices()`` + + Returns + ------- + distances : np.ndarray + Every element i.e. ``distances[i]`` will be an array of + shape ``m`` where m is the number of neighbours of + query atom[i]. + + .. code-block:: python + + results = NSResults() + distances = results.get_distances() + + + atoms of ``atom[i]`` and query atoms ``atom``. + ``indices[i][j]`` will give the atom-id of initial coordinates + such that ``initial_atom[indices[i][j]]`` is a neighbor of ``atom[i]`` + + See Also + -------- + MDAnalysis.lib.nsgrid.NSResults.get_indices + + """ + + if self.distances_buffer.empty(): + self.create_buffers() + return list(self.distances_buffer) + +cdef class NSGrid(object): + """Constructs a uniform cuboidal grid for a brick-shaped box + + This class uses :class:`PBCBox` to define the brick shaped box + It is essential to initialize the box with :class:`PBCBox` + inorder to form the grid. + + The domain is subdivided into number of cells based on the desired search + radius. Ideally cellsize should be equal to the search radius, but small + search radius leads to large cell-list data strucutres. + An optimization of cutoff is imposed to limit the size of data + structure such that the cellsize is always greater than or + equal to cutoff distance. + + Note + ---- + This class assumes that all the coordinates are already + inside the brick shaped box. Care must be taken to ensure + all the particles are within the brick shaped box as + defined by :class:`PBCBox`. This can be ensured by using + :func:`~MDAnalysis.lib.nsgrid.PBCBox.fast_put_atoms_in_bbox` + + .. warning:: + This class is not meant to be used by end users. + + """ + + cdef readonly real cutoff # cutoff + cdef ns_int size # total cells + cdef ns_int ncoords # number of coordinates + cdef ns_int[DIM] ncells # individual cells in every dimension + cdef ns_int[DIM] cell_offsets # Cell Multipliers + cdef real[DIM] cellsize # cell size in every dimension + cdef ns_int nbeads_per_cell # maximum beads + cdef ns_int *nbeads # size (Number of beads in every cell) + cdef ns_int *beadids # size * nbeads_per_cell (Beadids in every cell) + cdef ns_int *cellids # ncoords (Cell occupation id for every atom) + cdef bint force # To negate the effects of optimized cutoff + + def __init__(self, ncoords, cutoff, PBCBox box, max_size, force=False): + """ + Parameters + ---------- + ncoords : int + Number of coordinates to fill inside the brick shaped box + cutoff : float + Desired cutoff radius + box : PBCBox + Instance of :class:`PBCBox` + max_size : int + Maximum total number of cells + force : boolean + Optimizes cutoff if set to ``False`` [False] + """ + + cdef ns_int i + cdef ns_int ncellx, ncelly, ncellz, size, nbeadspercell + cdef ns_int xi, yi, zi + cdef real bbox_vol + + self.ncoords = ncoords + + # Calculate best cutoff + self.cutoff = cutoff + if not force: + bbox_vol = box.c_pbcbox.box[XX][XX] * box.c_pbcbox.box[YY][YY] * box.c_pbcbox.box[YY][YY] + size = bbox_vol/cutoff**3 + nbeadspercell = ncoords/size + while bbox_vol/self.cutoff**3 > max_size: + self.cutoff *= 1.2 + + for i in range(DIM): + self.ncells[i] = (box.c_pbcbox.box[i][i] / self.cutoff) + self.cellsize[i] = box.c_pbcbox.box[i][i] / self.ncells[i] + self.size = self.ncells[XX] * self.ncells[YY] * self.ncells[ZZ] + + self.cell_offsets[XX] = 0 + self.cell_offsets[YY] = self.ncells[XX] + self.cell_offsets[ZZ] = self.ncells[XX] * self.ncells[YY] + + # Allocate memory + # Number of beads in every cell + self.nbeads = PyMem_Malloc(sizeof(ns_int) * self.size) + if not self.nbeads: + raise MemoryError("Could not allocate memory from NSGrid.nbeads ({} bits requested)".format(sizeof(ns_int) * self.size)) + self.beadids = NULL + # Cellindex of every bead + self.cellids = PyMem_Malloc(sizeof(ns_int) * self.ncoords) + if not self.cellids: + raise MemoryError("Could not allocate memory from NSGrid.cellids ({} bits requested)".format(sizeof(ns_int) * self.ncoords)) + self.nbeads_per_cell = 0 + + for i in range(self.size): + self.nbeads[i] = 0 + + def __dealloc__(self): + PyMem_Free(self.nbeads) + PyMem_Free(self.beadids) + PyMem_Free(self.cellids) + + cdef ns_int coord2cellid(self, rvec coord) nogil: + """Finds the cell-id for the given coordinate inside the brick shaped box + + Note + ---- + Assumes the coordinate is already inside the brick shaped box. + Return wrong cell-id if this is not the case + """ + return (coord[ZZ] / self.cellsize[ZZ]) * (self.cell_offsets[ZZ]) +\ + (coord[YY] / self.cellsize[YY]) * self.cell_offsets[YY] + \ + (coord[XX] / self.cellsize[XX]) + + cdef bint cellid2cellxyz(self, ns_int cellid, ivec cellxyz) nogil: + """Finds actual cell position `(x, y, z)` from a cell-id + """ + + if cellid < 0: + return False + if cellid >= self.size: + return False + + cellxyz[ZZ] = (cellid / self.cell_offsets[ZZ]) + cellid -= cellxyz[ZZ] * self.cell_offsets[ZZ] + + cellxyz[YY] = (cellid / self.cell_offsets[YY]) + cellxyz[XX] = cellid - cellxyz[YY] * self.cell_offsets[YY] + + return True + + cdef fill_grid(self, real[:, ::1] coords): + """Sorts atoms into cells based on their position in the brick shaped box + + Every atom inside the brick shaped box is assigned a + cell-id based on its position. Another list ``beadids`` + sort the atom-ids in each cell. + + Note + ---- + The method fails if any coordinate is outside the brick shaped box. + + """ + + cdef ns_int i, cellindex = -1 + cdef ns_int ncoords = coords.shape[0] + cdef ns_int[:] beadcounts = np.empty(self.size, dtype=np.int) + + with nogil: + # Initialize buffers + for i in range(self.size): + beadcounts[i] = 0 + + # First loop: find cellindex for each bead + for i in range(ncoords): + cellindex = self.coord2cellid(&coords[i, 0]) + + self.nbeads[cellindex] += 1 + self.cellids[i] = cellindex + + if self.nbeads[cellindex] > self.nbeads_per_cell: + self.nbeads_per_cell = self.nbeads[cellindex] + + # Allocate memory + self.beadids = PyMem_Malloc(sizeof(ns_int) * self.size * self.nbeads_per_cell) # np.empty((self.size, nbeads_max), dtype=np.int) + if not self.beadids: + raise MemoryError("Could not allocate memory for NSGrid.beadids ({} bits requested)".format(sizeof(ns_int) * self.size * self.nbeads_per_cell)) + + with nogil: + # Second loop: fill grid + for i in range(ncoords): + + # Add bead to grid cell + cellindex = self.cellids[i] + self.beadids[cellindex * self.nbeads_per_cell + beadcounts[cellindex]] = i + beadcounts[cellindex] += 1 + + +cdef class FastNS(object): + """Grid based search between two group of atoms + + Instantiates a class object which uses :class:`PBCBox` and + :class:`NSGrid` to construct a cuboidal + grid in an orthogonal brick shaped box. + + Minimum image convention is used for distance evaluations + if pbc is set to ``True``. + """ + cdef PBCBox box + cdef real[:, ::1] coords + cdef real[:, ::1] coords_bbox + cdef readonly real cutoff + cdef NSGrid grid + cdef ns_int max_gridsize + cdef bint periodic + + def __init__(self, cutoff, coords, box, max_gridsize=5000, pbc=True): + """ + Initialize the grid and sort the coordinates in respective + cells by shifting the coordinates in a brick shaped box. + The brick shaped box is defined by :class:`PBCBox` + and cuboidal grid is initialize by :class:`NSGrid`. + If box is supplied, periodic shifts along box vectors are used + to contain all the coordinates inside the brick shaped box. + If box is not supplied, the range of coordinates i.e. + ``[xmax, ymax, zmax] - [xmin, ymin, zmin]`` should be used + to construct a pseudo box. Subsequently, the origin should also be + shifted to ``[xmin, ymin, zmin]``. These arguments must be provided + to the function. + + Parameters + ---------- + cutoff : float + Desired cutoff distance + coords : numpy.ndarray + atom coordinates of shape ``(N, 3)`` for ``N`` atoms. + ``dtype=numpy.float32``. For Non-PBC calculations, + all the coords must be within the bounding box specified + by ``box`` + box : numpy.ndarray + Box dimension of shape (6, ). The dimensions must be + provided in the same format as returned + by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: + ``[lx, ly, lz, alpha, beta, gamma]``. For non-PBC + evaluations, provide an orthogonal bounding box + (dtype = numpy.float32) + max_gridsize : int + maximum number of cells in the grid. This parameter + can be tuned for superior performance. + pbc : boolean + Handle to switch periodic boundary conditions on/off [True] + + Note + ---- + * ``pbc=False`` Only works for orthogonal boxes. + * Care must be taken such that all particles are inside + the bounding box as defined by the box argument for non-PBC + calculations. + * In case of Non-PBC calculations, a bounding box must be provided + to encompass all the coordinates as well as the search coordinates. + The dimension should be similar to ``box`` argument but for + an orthogonal box. For instance, one valid set of argument + for ``box`` for the case of no PBC could be + ``[10, 10, 10, 90, 90, 90]`` + * Following operations are advisable for non-PBC calculations + + ..code-block:: python + + lmax = all_coords.max(axis=0) + lmin = all_coords.min(axis=0) + pseudobox[:3] = 1.1*(lmax - lmin) + pseudobox[3:] = 90. + shift = all_coords.copy() + shift -= lmin + gridsearch = FastNS(max_cutoff, shift, box=pseudobox, pbc=False) + + """ + + from MDAnalysis.lib.mdamath import triclinic_vectors + + + _check_array(coords, 'coords') + + if np.allclose(box[:3], 0.0): + raise ValueError("Any of the box dimensions cannot be 0") + + self.periodic = pbc + self.coords = coords.copy() + + if box.shape != (3, 3): + box = triclinic_vectors(box) + + self.box = PBCBox(box, self.periodic) + + if cutoff < 0: + raise ValueError("Cutoff must be positive!") + if cutoff * cutoff > self.box.c_pbcbox.max_cutoff2: + raise ValueError("Cutoff greater than maximum cutoff ({:.3f}) given the PBC") + + self.coords_bbox = self.box.fast_put_atoms_in_bbox(self.coords) + + self.cutoff = cutoff + self.max_gridsize = max_gridsize + # Note that self.cutoff might be different from self.grid.cutoff + # due to optimization + self.grid = NSGrid(self.coords_bbox.shape[0], self.cutoff, self.box, self.max_gridsize) + + self.grid.fill_grid(self.coords_bbox) + + def search(self, search_coords): + """Search a group of atoms against initialized coordinates + + Creates a new grid with the query atoms and searches + against the initialized coordinates. The search is exclusive + i.e. only the pairs ``(i, j)`` such that ``atom[i]`` from query atoms + and ``atom[j]`` from the initialized set of coordinates is stored as + neighbors. + + PBC-aware/non PBC-aware calculations are automatically enabled during + the instantiation of :class:FastNS. + + Parameters + ---------- + search_coords : numpy.ndarray + Query coordinates of shape ``(N, 3)`` where + ``N`` is the number of queries + + Returns + ------- + results : NSResults object + The object from :class:NSResults + contains ``get_indices``, ``get_distances``. + ``get_pairs``, ``get_pair_distances`` + + Note + ---- + For non-PBC aware calculations, the current implementation doesn't work + if any of the query coordinates is beyond the range specified in + ``box`` in :func:`MDAnalysis.lib.nsgrid.FastNS`. + + See Also + -------- + MDAnalysis.lib.nsgrid.NSResults + + """ + + cdef ns_int i, j, size_search + cdef ns_int d, m + cdef ns_int current_beadid, bid + cdef ns_int cellindex, cellindex_probe + cdef ns_int xi, yi, zi + + cdef NSResults results + + cdef real d2 + cdef rvec probe + + cdef real[:, ::1] searchcoords + cdef real[:, ::1] searchcoords_bbox + cdef NSGrid searchgrid + cdef bint check + + cdef real cutoff2 = self.cutoff * self.cutoff + cdef ns_int npairs = 0 + _check_array(search_coords, 'search_coords') + + # Generate another grid to search + searchcoords = np.ascontiguousarray(search_coords, dtype=np.float32) + searchcoords_bbox = self.box.fast_put_atoms_in_bbox(searchcoords) + searchgrid = NSGrid(searchcoords_bbox.shape[0], self.grid.cutoff, self.box, self.max_gridsize, force=True) + searchgrid.fill_grid(searchcoords_bbox) + + size_search = searchcoords.shape[0] + + results = NSResults(self.cutoff, self.coords, searchcoords) + + with nogil: + for i in range(size_search): + # Start with first search coordinate + current_beadid = i + # find the cellindex of the coordinate + cellindex = searchgrid.cellids[current_beadid] + for xi in range(DIM): + for yi in range(DIM): + for zi in range(DIM): + check = True + #Probe the search coordinates in a brick shaped box + probe[XX] = searchcoords_bbox[current_beadid, XX] + (xi - 1) * searchgrid.cellsize[XX] + probe[YY] = searchcoords_bbox[current_beadid, YY] + (yi - 1) * searchgrid.cellsize[YY] + probe[ZZ] = searchcoords_bbox[current_beadid, ZZ] + (zi - 1) * searchgrid.cellsize[ZZ] + # Make sure the probe coordinates is inside the brick-shaped box + if self.periodic: + for m in range(DIM - 1, -1, -1): + while probe[m] < 0: + for d in range(m+1): + probe[d] += self.box.c_pbcbox.box[m][d] + while probe[m] >= self.box.c_pbcbox.box[m][m]: + for d in range(m+1): + probe[d] -= self.box.c_pbcbox.box[m][d] + else: + for m in range(DIM -1, -1, -1): + if probe[m] < 0: + check = False + break + if probe[m] > self.box.c_pbcbox.box[m][m]: + check = False + break + if not check: + continue + # Get the cell index corresponding to the probe + cellindex_probe = self.grid.coord2cellid(probe) + # for this cellindex search in grid + for j in range(self.grid.nbeads[cellindex_probe]): + bid = self.grid.beadids[cellindex_probe * self.grid.nbeads_per_cell + j] + # find distance between search coords[i] and coords[bid] + d2 = self.box.fast_distance2(&searchcoords_bbox[current_beadid, XX], &self.coords_bbox[bid, XX]) + if d2 < cutoff2: + results.add_neighbors(current_beadid, bid, d2) + npairs += 1 + return results + + def self_search(self): + """Searches all the pairs within the initialized coordinates + + All the pairs among the initialized coordinates are registered + in hald the time. Although the algorithm is still the same, but + the distance checks can be reduced to half in this particular case + as every pair need not be evaluated twice. + + Returns + ------- + results : NSResults object + The object from :class:NSResults + contains ``get_indices``, ``get_distances``. + ``get_pairs``, ``get_pair_distances`` + + See Also + -------- + MDAnalysis.lib.nsgrid.NSResults + + """ + + cdef ns_int i, j, size_search + cdef ns_int d, m + cdef ns_int current_beadid, bid + cdef ns_int cellindex, cellindex_probe + cdef ns_int xi, yi, zi + + cdef NSResults results + cdef real d2 + cdef rvec probe + + cdef real cutoff2 = self.cutoff * self.cutoff + cdef ns_int npairs = 0 + cdef bint check + + size_search = self.coords.shape[0] + + results = NSResults(self.cutoff, self.coords, self.coords) + + with nogil: + for i in range(size_search): + # Start with first search coordinate + current_beadid = i + # find the cellindex of the coordinate + cellindex = self.grid.cellids[current_beadid] + for xi in range(DIM): + for yi in range(DIM): + for zi in range(DIM): + check = True + # Calculate and/or reinitialize shifted coordinates + # Probe the search coordinates in a brick shaped box + probe[XX] = self.coords_bbox[current_beadid, XX] + (xi - 1) * self.grid.cellsize[XX] + probe[YY] = self.coords_bbox[current_beadid, YY] + (yi - 1) * self.grid.cellsize[YY] + probe[ZZ] = self.coords_bbox[current_beadid, ZZ] + (zi - 1) * self.grid.cellsize[ZZ] + # Make sure the shifted coordinates is inside the brick-shaped box + if self.periodic: + for m in range(DIM - 1, -1, -1): + while probe[m] < 0: + for d in range(m+1): + probe[d] += self.box.c_pbcbox.box[m][d] + while probe[m] >= self.box.c_pbcbox.box[m][m]: + for d in range(m+1): + probe[d] -= self.box.c_pbcbox.box[m][d] + else: + for m in range(DIM -1, -1, -1): + if probe[m] < 0: + check = False + break + elif probe[m] >= self.box.c_pbcbox.box[m][m]: + check = False + break + if not check: + continue + # Get the cell index corresponding to the probe + cellindex_probe = self.grid.coord2cellid(probe) + # for this cellindex search in grid + for j in range(self.grid.nbeads[cellindex_probe]): + bid = self.grid.beadids[cellindex_probe * self.grid.nbeads_per_cell + j] + if bid < current_beadid: + continue + # find distance between search coords[i] and coords[bid] + d2 = self.box.fast_distance2(&self.coords_bbox[current_beadid, XX], &self.coords_bbox[bid, XX]) + if d2 < cutoff2 and d2 > EPSILON: + results.add_neighbors(current_beadid, bid, d2) + results.add_neighbors(bid, current_beadid, d2) + npairs += 2 + return results diff --git a/package/doc/sphinx/source/documentation_pages/lib/nsgrid.rst b/package/doc/sphinx/source/documentation_pages/lib/nsgrid.rst new file mode 100644 index 00000000000..f36f2480b0b --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/lib/nsgrid.rst @@ -0,0 +1,2 @@ +.. automodule:: MDAnalysis.lib.nsgrid + :members: \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/lib_modules.rst b/package/doc/sphinx/source/documentation_pages/lib_modules.rst index 5bbe3d041ed..f918f37137e 100644 --- a/package/doc/sphinx/source/documentation_pages/lib_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/lib_modules.rst @@ -36,6 +36,7 @@ functions whereas mathematical functions are to be found in :mod:`MDAnalysis.lib.NeighborSearch` contains classes to do neighbor searches with MDAnalysis objects. +:mod:`MDAnalysis.lib.nsgrid` contains a fast implementation of grid neighbor search. List of modules --------------- @@ -50,6 +51,7 @@ List of modules ./lib/transformations ./lib/qcprot ./lib/util + ./lib/nsgrid Low level file formats ---------------------- diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst index a3ed79c98d8..6868acee19c 100644 --- a/package/doc/sphinx/source/documentation_pages/selections.rst +++ b/package/doc/sphinx/source/documentation_pages/selections.rst @@ -92,6 +92,16 @@ atom *seg-name* *residue-number* *atom-name* e.g. ``DMPC 1 C2`` selects the C2 carbon of the first residue of the DMPC segment +altloc *alternative-location* + a selection for atoms where alternative locations are available, which is + often the case with high-resolution crystal structures + e.g. `resid 4 and resname ALA and altloc B` selects only the atoms of ALA-4 + that have an altloc B record. + +moltype *molecule-type* + select by molecule type, e.g. ``moltype Protein_A``. At the moment, only + the TPR format defines the molecule type. + Boolean ------- @@ -189,6 +199,11 @@ byres *selection* e.g. specify the subselection after the byres keyword. ``byres`` is a shortcut to ``same residue as`` +bonded *selection* + selects all atoms that are bonded to selection + eg: ``select name H and bonded name O`` selects only hydrogens bonded to + oxygens + Index ----- diff --git a/package/setup.py b/package/setup.py index 95cb6176670..d8e4c4b3713 100755 --- a/package/setup.py +++ b/package/setup.py @@ -88,6 +88,9 @@ "parallelization module".format( Cython.__version__, required_version)) cython_found = False + + cython_linetrace = os.getenv("CYTHON_TRACE_NOGIL", False) + except ImportError: cython_found = False if not is_release: @@ -300,6 +303,12 @@ def extensions(config): else: mathlib = ['m'] + # Add cython linetrace define if needed + if cython_linetrace: + extra_compile_args.append("-DCYTHON_TRACE_NOGIL") + cpp_extra_compile_args.append("-DCYTHON_TRACE_NOGIL") + + libdcd = MDAExtension('MDAnalysis.lib.formats.libdcd', ['MDAnalysis/lib/formats/libdcd' + source_suffix], include_dirs=include_dirs + ['MDAnalysis/lib/formats/include'], @@ -380,16 +389,26 @@ def extensions(config): libraries=mathlib, define_macros=define_macros, extra_compile_args=extra_compile_args) + nsgrid = MDAExtension('MDAnalysis.lib.nsgrid', + ['MDAnalysis/lib/nsgrid' + source_suffix], + include_dirs=include_dirs, + language='c++', + define_macros=define_macros, + extra_compile_args=cpp_extra_compile_args) pre_exts = [libdcd, distances, distances_omp, qcprot, transformation, libmdaxdr, util, encore_utils, - ap_clustering, spe_dimred, cutil, augment] + ap_clustering, spe_dimred, cutil, augment, nsgrid] + cython_generated = [] if use_cython: extensions = cythonize( pre_exts, - compiler_directives={'linetrace': os.environ.get('CYTHON_TRACE_NOGIL', False)}, + compiler_directives={'linetrace' : cython_linetrace, + 'embedsignature' : cython_linetrace}, ) + if cython_linetrace: + print("Cython coverage will be enabled") for pre_ext, post_ext in zip(pre_exts, extensions): for source in post_ext.sources: if source not in pre_ext.sources: @@ -573,7 +592,7 @@ def dynamic_author_list(): ) # Releases keep their cythonized stuff for shipping. - if not config.get('keep_cythonized', default=is_release): + if not config.get('keep_cythonized', default=is_release) and not cython_linetrace: for cythonized in cythonfiles: try: os.unlink(cythonized) diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index 1d0d6c23a48..2dfa26361b0 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -28,6 +28,7 @@ import tempfile import numpy as np import sys +import os import warnings import pytest @@ -114,6 +115,9 @@ def test_triangular_matrix(self): err_msg="Error in TriangularMatrix: multiplication by scalar gave\ inconsistent results") + @pytest.mark.xfail(os.name == 'nt', + strict=True, + reason="Not yet supported on Windows.") def test_parallel_calculation(self): def function(x): diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index 6a9201dd52a..0cd1ca4d953 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -333,6 +333,10 @@ def get_MODEL_lines(filename): assert int(line[10:14]) == model % 10000 +@pytest.mark.xfail(os.name == 'nt', + strict=True, + reason="PDB multiframe reading not yet supported " + "on Windows.") class TestMultiPDBReader(TestCase): def setUp(self): self.multiverse = mda.Universe(PDB_multiframe, @@ -479,6 +483,10 @@ def test_conect_bonds_all(tmpdir): # assert_equal(len([b for b in conect.bonds if not b.is_guessed]), 1922) +@pytest.mark.xfail(os.name == 'nt', + strict=True, + reason="PDB multiframe reading not yet supported " + "on Windows.") class TestMultiPDBWriter(TestCase): def setUp(self): self.universe = mda.Universe(PSF, PDB_small) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 89c64d18765..33178097ca0 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -23,6 +23,7 @@ from six.moves import range +import os import itertools import numpy as np from numpy.testing import( @@ -343,6 +344,9 @@ def test_type(self, universe): assert_equal(sel.names, ['HH31', 'HH32', 'HH33', 'HB1', 'HB2', 'HB3']) +@pytest.mark.xfail(os.name == 'nt', + strict=True, + reason="Not supported on Windows yet.") class TestSelectionsNAMD(object): @pytest.fixture() def universe(self): diff --git a/testsuite/MDAnalysisTests/lib/test_distances.py b/testsuite/MDAnalysisTests/lib/test_distances.py index 38f634844e3..38ae57384c8 100644 --- a/testsuite/MDAnalysisTests/lib/test_distances.py +++ b/testsuite/MDAnalysisTests/lib/test_distances.py @@ -25,12 +25,8 @@ import numpy as np from numpy.testing import assert_equal, assert_almost_equal -import itertools - import MDAnalysis as mda -from MDAnalysis.lib.mdamath import triclinic_vectors, triclinic_box - @pytest.mark.parametrize('coord_dtype', (np.float32, np.float64)) def test_transform_StoR_pass(coord_dtype): box = np.array([10, 7, 3, 45, 60, 90], dtype=np.float32) @@ -66,7 +62,7 @@ def test_capped_distance_noresults(): np.array([[0.1, 0.1, 0.1], [0.2, 0.1, 0.1]], dtype=np.float32)) -method_1 = ('bruteforce', 'pkdtree') +method_1 = ('bruteforce', 'pkdtree', 'nsgrid') min_cutoff_1 = (None, 0.1) @@ -89,6 +85,7 @@ def test_capped_distance_checkbrute(npoints, box, query, method, min_cutoff): min_cutoff=min_cutoff, box=box, method=method) + if pairs.shape != (0, ): found_pairs = pairs[:, 1] else: @@ -114,7 +111,7 @@ def test_self_capped_distance(npoints, box, method, min_cutoff): np.random.seed(90003) points = (np.random.uniform(low=0, high=1.0, size=(npoints, 3))*(boxes_1[0][:3])).astype(np.float32) - max_cutoff = 0.1 + max_cutoff = 0.2 pairs, distance = mda.lib.distances.self_capped_distance(points, max_cutoff, min_cutoff=min_cutoff, @@ -126,16 +123,17 @@ def test_self_capped_distance(npoints, box, method, min_cutoff): points[i+1:], box=box) if min_cutoff is not None: - idx = np.where((dist < max_cutoff) & (dist > min_cutoff))[0] + idx = np.where((dist < max_cutoff) & (dist > min_cutoff))[1] else: - idx = np.where((dist < max_cutoff))[0] + idx = np.where((dist < max_cutoff))[1] for other_idx in idx: j = other_idx + 1 + i found_pairs.append((i, j)) - found_distance.append(dist[other_idx]) + found_distance.append(dist[0, other_idx]) assert_equal(len(pairs), len(found_pairs)) -@pytest.mark.parametrize('box', (None, + +@pytest.mark.parametrize('box', (None, np.array([1, 1, 1, 90, 90, 90], dtype=np.float32), np.array([1, 1, 1, 60, 75, 80], dtype=np.float32))) @pytest.mark.parametrize('npoints,cutoff,meth', @@ -153,7 +151,7 @@ def test_method_selfselection(box, npoints, cutoff, meth): assert_equal(method.__name__, meth) -@pytest.mark.parametrize('box', (None, +@pytest.mark.parametrize('box', (None, np.array([1, 1, 1, 90, 90, 90], dtype=np.float32), np.array([1, 1, 1, 60, 75, 80], dtype=np.float32))) @pytest.mark.parametrize('npoints,cutoff,meth', @@ -173,9 +171,9 @@ def test_method_selection(box, npoints, cutoff, meth): # different boxlengths to shift a coordinate shifts = [ - ((0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0)), # no shifting - ((1, 0, 0), (0, 1, 1), (0, 0, 1), (1, 1, 0)), # single box lengths - ((-1, 0, 1), (0, -1, 0), (1, 0, 1), (-1, -1, -1)), # negative single + ((0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0)), # no shifting + ((1, 0, 0), (0, 1, 1), (0, 0, 1), (1, 1, 0)), # single box lengths + ((-1, 0, 1), (0, -1, 0), (1, 0, 1), (-1, -1, -1)), # negative single ((4, 3, -2), (-2, 2, 2), (-5, 2, 2), (0, 2, 2)), # multiple boxlengths ] diff --git a/testsuite/MDAnalysisTests/lib/test_nsgrid.py b/testsuite/MDAnalysisTests/lib/test_nsgrid.py new file mode 100644 index 00000000000..03b3e24c7d3 --- /dev/null +++ b/testsuite/MDAnalysisTests/lib/test_nsgrid.py @@ -0,0 +1,216 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2018 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +from __future__ import print_function, absolute_import + +import pytest + +from numpy.testing import assert_equal, assert_allclose +import numpy as np + +import MDAnalysis as mda +from MDAnalysisTests.datafiles import GRO, Martini_membrane_gro +from MDAnalysis.lib import nsgrid + + +@pytest.fixture +def universe(): + u = mda.Universe(GRO) + return u + +def run_grid_search(u, ref_id, cutoff=3): + coords = u.atoms.positions + searchcoords = u.atoms.positions[ref_id] + if searchcoords.shape == (3, ): + searchcoords = searchcoords[None, :] + # Run grid search + searcher = nsgrid.FastNS(cutoff, coords, box=u.dimensions) + + return searcher.search(searchcoords) + + +def test_pbc_box(): + """Check that PBC box accepts only well-formated boxes""" + pbc = True + with pytest.raises(TypeError): + nsgrid.PBCBox([]) + + with pytest.raises(ValueError): + nsgrid.PBCBox(np.zeros((3)), pbc) # Bad shape + nsgrid.PBCBox(np.zeros((3, 3)), pbc) # Collapsed box + nsgrid.PBCBOX(np.array([[0, 0, 0], [0, 1, 0], [0, 0, 1]]), pbc) # 2D box + nsgrid.PBCBOX(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), pbc) # Box provided as array of integers + nsgrid.PBCBOX(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=float), pbc) # Box provided as array of double + + +def test_nsgrid_badcutoff(universe): + with pytest.raises(ValueError): + run_grid_search(universe, 0, -4) + run_grid_search(universe, 0, 100000) + + +def test_ns_grid_noneighbor(universe): + """Check that grid search returns empty lists/arrays when there is no neighbors""" + ref_id = 0 + cutoff = 0.5 + + results_grid = run_grid_search(universe, ref_id, cutoff) + + # same indices will be selected as neighbour here + assert len(results_grid.get_distances()[0]) == 1 + assert len(results_grid.get_indices()[0]) == 1 + assert len(results_grid.get_pairs()) == 1 + assert len(results_grid.get_pair_distances()) == 1 + + +def test_nsgrid_PBC_rect(): + """Check that nsgrid works with rect boxes and PBC""" + ref_id = 191 + results = np.array([191, 192, 672, 682, 683, 684, 995, 996, 2060, 2808, 3300, 3791, + 3792]) - 1 # Atomid are from gmx select so there start from 1 and not 0. hence -1! + + universe = mda.Universe(Martini_membrane_gro) + cutoff = 7 + + # FastNS is called differently to max coverage + searcher = nsgrid.FastNS(cutoff, universe.atoms.positions, box=universe.dimensions) + + results_grid = searcher.search(universe.atoms.positions[ref_id][None, :]).get_indices()[0] + + results_grid2 = searcher.search(universe.atoms.positions).get_indices() # call without specifying any ids, should do NS for all beads + + assert_equal(np.sort(results), np.sort(results_grid)) + assert_equal(len(universe.atoms), len(results_grid2)) + assert searcher.cutoff == 7 + assert_equal(np.sort(results_grid), np.sort(results_grid2[ref_id])) + + +def test_nsgrid_PBC(universe): + """Check that grid search works when PBC is needed""" + + ref_id = 13937 + results = np.array([4398, 4401, 13938, 13939, 13940, 13941, 17987, 23518, 23519, 23521, 23734, + 47451]) - 1 # Atomid are from gmx select so there start from 1 and not 0. hence -1! + + results_grid = run_grid_search(universe, ref_id).get_indices()[0] + + assert_equal(np.sort(results), np.sort(results_grid)) + + +def test_nsgrid_pairs(universe): + """Check that grid search returns the proper pairs""" + + ref_id = 13937 + neighbors = np.array([4398, 4401, 13938, 13939, 13940, 13941, 17987, 23518, 23519, 23521, 23734, + 47451]) - 1 # Atomid are from gmx select so there start from 1 and not 0. hence -1! + results = [] + + results = np.array(results) + + results_grid = run_grid_search(universe, ref_id).get_pairs() + + assert_equal(np.sort(neighbors, axis=0), np.sort(results_grid[:, 1], axis=0)) + + +def test_nsgrid_pair_distances(universe): + """Check that grid search returns the proper pair distances""" + + ref_id = 13937 + results = np.array([0.0, 0.270, 0.285, 0.096, 0.096, 0.015, 0.278, 0.268, 0.179, 0.259, 0.290, + 0.270]) * 10 # These distances where obtained by gmx distance so they are in nm + + results_grid = run_grid_search(universe, ref_id).get_pair_distances() + + assert_allclose(np.sort(results), np.sort(results_grid), atol=1e-2) + + +def test_nsgrid_distances(universe): + """Check that grid search returns the proper distances""" + + ref_id = 13937 + results = np.array([0.0, 0.270, 0.285, 0.096, 0.096, 0.015, 0.278, 0.268, 0.179, 0.259, 0.290, + 0.270]) * 10 # These distances where obtained by gmx distance so they are in nm + + results_grid = run_grid_search(universe, ref_id).get_distances()[0] + + assert_allclose(np.sort(results), np.sort(results_grid), atol=1e-2) + + +@pytest.mark.parametrize('box, results', + ((None, [3, 13, 24]), + (np.array([10., 10., 10., 90., 90., 90.]), [3, 13, 24, 39, 67]), + (np.array([10., 10., 10., 60., 75., 90.]), [3, 13, 24, 39, 60, 79]))) +def test_nsgrid_search(box, results): + np.random.seed(90003) + points = (np.random.uniform(low=0, high=1.0, + size=(100, 3))*(10.)).astype(np.float32) + cutoff = 2.0 + query = np.array([1., 1., 1.], dtype=np.float32).reshape((1, 3)) + + if box is None: + pseudobox = np.zeros(6, dtype=np.float32) + all_coords = np.concatenate([points, query]) + lmax = all_coords.max(axis=0) + lmin = all_coords.min(axis=0) + pseudobox[:3] = 1.1*(lmax - lmin) + pseudobox[3:] = 90. + shiftpoints, shiftquery = points.copy(), query.copy() + shiftpoints -= lmin + shiftquery -= lmin + searcher = nsgrid.FastNS(cutoff, shiftpoints, box=pseudobox, pbc=False) + searchresults = searcher.search(shiftquery) + else: + searcher = nsgrid.FastNS(cutoff, points, box) + searchresults = searcher.search(query) + indices = searchresults.get_indices()[0] + assert_equal(np.sort(indices), results) + + +@pytest.mark.parametrize('box, result', + ((None, 21), + (np.array([0., 0., 0., 90., 90., 90.]), 21), + (np.array([10., 10., 10., 90., 90., 90.]), 26), + (np.array([10., 10., 10., 60., 75., 90.]), 33))) +def test_nsgrid_selfsearch(box, result): + np.random.seed(90003) + points = (np.random.uniform(low=0, high=1.0, + size=(100, 3))*(10.)).astype(np.float32) + cutoff = 1.0 + if box is None or np.allclose(box[:3], 0): + # create a pseudobox + # define the max range + # and supply the pseudobox + # along with only one set of coordinates + pseudobox = np.zeros(6, dtype=np.float32) + lmax = points.max(axis=0) + lmin = points.min(axis=0) + pseudobox[:3] = 1.1*(lmax - lmin) + pseudobox[3:] = 90. + shiftref = points.copy() + shiftref -= lmin + searcher = nsgrid.FastNS(cutoff, shiftref, box=pseudobox, pbc=False) + searchresults = searcher.self_search() + else: + searcher = nsgrid.FastNS(cutoff, points, box=box) + searchresults = searcher.self_search() + pairs = searchresults.get_pairs() + assert_equal(len(pairs)//2, result) diff --git a/testsuite/MDAnalysisTests/lib/test_pkdtree.py b/testsuite/MDAnalysisTests/lib/test_pkdtree.py index a073057cf77..c161949f9d4 100644 --- a/testsuite/MDAnalysisTests/lib/test_pkdtree.py +++ b/testsuite/MDAnalysisTests/lib/test_pkdtree.py @@ -119,7 +119,6 @@ def test_searchpairs(b, radius, result): indices = tree.search_pairs(radius) assert_equal(len(indices), len(result)) - @pytest.mark.parametrize('radius, result', ((0.1, []), (0.3, [[0, 2]]))) def test_ckd_searchpairs_nopbc(radius, result): diff --git a/testsuite/MDAnalysisTests/utils/test_modelling.py b/testsuite/MDAnalysisTests/utils/test_modelling.py index 376e1baa473..5dcdd2bcfca 100644 --- a/testsuite/MDAnalysisTests/utils/test_modelling.py +++ b/testsuite/MDAnalysisTests/utils/test_modelling.py @@ -21,6 +21,7 @@ # from __future__ import absolute_import, print_function +import os import MDAnalysis import pytest from MDAnalysis.tests.datafiles import ( @@ -140,6 +141,9 @@ def u_water(): class TestMerge(object): + @pytest.mark.xfail(os.name == 'nt', + strict=True, + reason="Setup fixtures fail on Windows.") def test_merge(self, u_protein, u_ligand, u_water, tmpdir): ids_before = [a.index for u in [u_protein, u_ligand, u_water] for a in u.atoms] # Do the merge @@ -178,17 +182,26 @@ def test_merge(self, u_protein, u_ligand, u_water, tmpdir): ids_new2 = [a.index for a in u.atoms] assert_equal(ids_new, ids_new2) + @pytest.mark.xfail(os.name == 'nt', + strict=True, + reason="Setup fixtures fail on Windows.") def test_merge_same_universe(self, u_protein): u0 = MDAnalysis.Merge(u_protein.atoms, u_protein.atoms, u_protein.atoms) assert_equal(len(u0.atoms), 3 * len(u_protein.atoms)) assert_equal(len(u0.residues), 3 * len(u_protein.residues)) assert_equal(len(u0.segments), 3 * len(u_protein.segments)) + @pytest.mark.xfail(os.name == 'nt', + strict=True, + reason="Setup fixtures fail on Windows.") def test_residue_references(self, u_protein, u_ligand): m = Merge(u_protein.atoms, u_ligand.atoms) assert_equal(m.atoms.residues[0].universe, m, "wrong universe reference for residues after Merge()") + @pytest.mark.xfail(os.name == 'nt', + strict=True, + reason="Setup fixtures fail on Windows.") def test_segment_references(self, u_protein, u_ligand): m = Merge(u_protein.atoms, u_ligand.atoms) assert_equal(m.atoms.segments[0].universe, m, @@ -202,6 +215,9 @@ def test_nonsense_TypeError(self): with pytest.raises(TypeError): Merge(['1', 2]) + @pytest.mark.xfail(os.name == 'nt', + strict=True, + reason="Setup fixtures fail on Windows.") def test_emptyAG_ValueError(self, u_protein): a = AtomGroup([], u_protein) b = AtomGroup([], u_protein) @@ -235,6 +251,9 @@ def test_merge_with_topology(self, u): # One of these bonds isn't in the merged Universe assert(len(ag2[0].bonds) - 1 == len(u_merge.atoms[20].bonds)) + @pytest.mark.xfail(os.name == 'nt', + strict=True, + reason="Setup fixtures fail on Windows.") def test_merge_with_topology_from_different_universes(self, u, u_ligand): u_merge = MDAnalysis.Merge(u.atoms[:110], u_ligand.atoms)