diff --git a/maintainer/change_release.sh b/maintainer/change_release.sh index 7d441b5c2c0..ae317be3fb4 100755 --- a/maintainer/change_release.sh +++ b/maintainer/change_release.sh @@ -19,11 +19,11 @@ die () { findcommand() { for name in $*; do - path=$(which $name) - if [ -n "$path" ]; then - echo $path - return 0 - fi + path=$(which $name) + if [ -n "$path" ]; then + echo $path + return 0 + fi done die "None of the commands $* found." 2 } @@ -74,12 +74,12 @@ echo "Using sed = $SED" echo "Setting RELEASE/__version__ in MDAnalysis to $RELEASE" git grep -E -l 'RELEASE.*[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?(-dev[0-9]*|-rc[0-9]*)?' $FILES \ - | xargs -I FILE $SED '/RELEASE/s/[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?(-dev[0-9]*|-rc[0-9]*)?/'${RELEASE}'/' -i.bak FILE + | xargs -I FILE $SED -e '/RELEASE/s/[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?(-dev[0-9]*|-rc[0-9]*)?/'${RELEASE}'/' -i .bak FILE git grep -E -l '__version__ *=.*[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?(-dev[0-9]*|-rc[0-9]*)?' $FILES \ - | xargs -I FILE $SED '/__version__/s/[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?(-dev[0-9]*|-rc[0-9]*)?/'${RELEASE}'/' -i.bak FILE + | xargs -I FILE $SED -e '/__version__/s/[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?(-dev[0-9]*|-rc[0-9]*)?/'${RELEASE}'/' -i .bak FILE git grep -E -l 'version:.*[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?(-dev[0-9]*|-rc[0-9]*)?' $FILES \ - | xargs -I FILE $SED '/version:/s/[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?(-dev[0-9]*|-rc[0-9]*)?/'${RELEASE}'/' -i.bak FILE + | xargs -I FILE $SED -e '/version:/s/[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?(-dev[0-9]*|-rc[0-9]*)?/'${RELEASE}'/' -i .bak FILE git status diff --git a/maintainer/conda/MDAnalysis/meta.yaml b/maintainer/conda/MDAnalysis/meta.yaml index 7951ec8734e..14c7eafb692 100644 --- a/maintainer/conda/MDAnalysis/meta.yaml +++ b/maintainer/conda/MDAnalysis/meta.yaml @@ -1,12 +1,12 @@ package: name: mdanalysis # This has to be changed after a release - version: "0.19.1dev" + version: "2.0.0-dev0" source: git_url: https://github.com/MDAnalysis/mdanalysis - git_branch: develop - # git_tag: release-0.15.0 + # git_branch: develop + git_tag: release-0.19.1 # specify the subversion of the conda package. Defaults to 0. If you make # changes to the conda package alone with out an increase in the source package diff --git a/package/AUTHORS b/package/AUTHORS index 27e7a155623..1ca4210bf76 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -150,6 +150,7 @@ Chronological list of authors - Edis Jakupovic - Nicholas Craven - Mieczyslaw Torchala + - Ramon Crehuet - Haochuan Chen - Karthikeyan Singaravelan 2021 @@ -167,6 +168,7 @@ Chronological list of authors - Sulay Shah - Alexander Yang + External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index bad516022dc..89e73f56b22 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -330,6 +330,115 @@ Deprecations read it from the input TRZ trajectory. +05/01/21 IAlibay + + * 1.1.1 + +Fixes + * Remove absolute paths from package upload to pypi. + + +04/28/21 richardjgowers, IAlibay, tylerjereddy, xiki-tempula, lilyminium, + zemanj, PicoCentauri + + * 1.1.0 + +Fixes + * Removes use of absolute paths in setup.py to avoid Windows installation + failures (Issue #3129) + * Adds test for crashes caused by small box NSGrid searches (Issue #2670) + * Replaces decreated NumPy type aliases and fixes NEP 34 explicit ragged + array warnings (PR #3139, backports PRs #2845 and #2834) + * Fixed several issues with NSGrid and triclinic boxes not finding some pairs. + (Issues #2229 #2345 #2919, PR #2937). + * Removed deprecation warning from numpy in hbond_autocorrel + (Issue #2987 PR #3242) + * Fixed bug in exclusion matrix of hbond_autocorrel (PR #3242) + +Changes + * Maximum pinned versions in setup.py removed for python 3.6+ (PR #3139) + +Deprecations + * ParmEdConverter no longer accepts Timestep objects at all + (Issue #3031, PR #3172) + * NCDFWriter `scale_factor` writing will change in version 2.0 to + better match AMBER outputs (Issue #2327) + * Deprecated using the last letter of the segid as the + chainID when writing PDB files (Issue #3144) + * Deprecated tempfactors and bfactors being separate + TopologyAttrs, with a warning (PR #3161) + * hbonds.WaterBridgeAnalysis will be removed in 2.0.0 and + replaced with hydrogenbonds.WaterBridgeAnalysis (#3111) + * TPRParser indexing resids from 0 by default is deprecated. + From 2.0 TPRParser will index resids from 1 by default. + + +Enhancements + * Added `get_connections` method to get bonds, angles, dihedrals, etc. + with or without atoms outside the group (Issues #1264, #2821, PR #3160) + * Added `tpr_resid_from_one` keyword to select whether TPRParser + indexes resids from 0 or 1 (Issue #2364, PR #3153) + + +01/17/21 richardjgowers, IAlibay, orbeckst, tylerjereddy, jbarnoud, + yuxuanzhuang, lilyminium, VOD555, p-j-smith, bieniekmateusz, + calcraven, ianmkenney, rcrehuet, manuel.nuno.melo, hanatok + + * 1.0.1 + +Fixes + * Due to issues with the reliability/accuracy of `nsgrid`, this method is + currently not recommended for use. It has also been removed as an option + from lib.capped_distance and lib.self_capped_distance. Please use PKDTree + instead (Issue #2930) + * Development status changed from beta to mature (Issue #2773) + * pip installation only requests Python 2.7-compatible packages (#2736) + * Testsuite does not use any more matplotlib.use('agg') (#2191) + * The methods provided by topology attributes now appear in the + documentation (Issue #1845) + * AtomGroup.center now works correctly for compounds + unwrapping + (Issue #2984) + * In ChainReader, read_frame does not trigger change of iterating position. + (Issue #2723, PR #2815) + * empty_atomgroup.select_atoms('name *') now returns an empty + AtomGroup instead of TypeError (Issue #2765) + * TRZReader now checks `n_atoms` on reading. (Issue #2817, PR #2820) + * TRZWriter now writes `n_atoms` to the file. (Issue #2817, PR #2820) + * rdf.InterRDF_s density keyword documented and now gives correct results for + density=True; the keyword was available since 0.19.0 but with incorrect + semantics and not documented and did not produce correct results (Issue + #2811, PR #2812) + * In hydrogenbonds.hbond_analysis.HydrogenbondAnalysis an AttributeError + was thrown when finding D-H pairs via the topology if `hydrogens` was an + empty AtomGroup (Issue #2848) + * Fixed reading in masses and charges from a hoomdxml file + (Issue #2888, PR #2889) + * Fixed performance regression on select_atoms for string selections (#2751) + * Fixed the DMSParser, allowing the creation of multiple segids sharing + residues with identical resids (Issue #1387, PR #2872) + * Fixed missing space between floats in helanal output files (PR #2733) + * ensure that unistd.h is included on macOS when compiling ENCORE's spe.c + (Issue #2934) + +Enhancements + * Improved performance of the ParmEd converter (Issue #3028, PR #3029) + * Improved performances when parsing TPR files (PR #2804) + +Changes (not affecting users) + * Continuous integration uses mamba rather than conda to install the + dependencies (PR #2983) + +Deprecations + * waterdynamics.HydrogenBondLifetimes will be removed in 2.0.0 and + replaced with hydrogenbonds.HydrogenBondAnalysis.lifetime() (#2547) + * lib.util.echo() will be removed in 2.0.0 + * core.universe.as_Universe() will be removed in 2.0.0 + * analysis.leaflets.LeafletFinder() will not accept a filename any more, + only a Universe, in 2.0.0 + * analysis.helanal will be removed in 2.0.0 and replaced by + analysis.helix_analysis (PR #2622) + + 06/09/20 richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira, PicoCentauri, davidercruz, jbarnoud, RMeli, IAlibay, mtiberti, CCook96, Yuan-Yu, xiki-tempula, HTian1997, Iv-Hristov, hmacdope, AnshulAngaria, @@ -356,7 +465,7 @@ Fixes * n_components correctly selects PCA components (Issue #2623) * Use internal residue indices instead of resids for similarity and connectivity selections (Issues #2669 and #2672, PR #2673) - * Checks for cryo-em 1 A^3 default CRYST1 record, + * Checks for cryo-em 1 A^3 default CRYST1 record, disabling setting of box dimensions if found (Issue #2599) * mdamath.angles now returns np.pi instead of -np.pi in cases of lower bound roundoff errors. (Issue #2632, PR #2634) @@ -381,7 +490,7 @@ Fixes * Clarifies density_from_Universe docs and adds user warning (Issue #2372) * Fix upstream deprecation of `matplotlib.axis.Tick` attributes in `MDAnalysis.analysis.psa` - * PDBWriter now uses first character of segid as ChainID (Issue #2224) + * PDBWriter now uses last character of segid as ChainID (Issue #2224) * Adds a more detailed warning when attempting to read chamber-style parm7 files (Issue #2475) * ClusterCollection.get_ids now returns correctly (Issue #2464) @@ -395,6 +504,7 @@ Fixes than one format (Issue #2353) * PDBQTParser now gives icode TopologyAttrs (Issue #2361) * GROReader and GROWriter now can handle boxes with box vectors >1000nm + (Issue #2371) * Guess atom element with uppercase name * TopologyGroup no longer reshapes the type, guessed, and order properties (Issue #2392) @@ -417,7 +527,7 @@ Fixes Enhancements * Added support for FHI-AIMS input files (PR #2705) * vastly improved support for 32-bit Windows (PR #2696) - * Added methods to compute the root-mean-square-inner-product of subspaces + * Added methods to compute the root-mean-square-inner-product of subspaces and the cumulative overlap of a vector in a subspace for PCA (PR #2613) * Added .frames and .times arrays to AnalysisBase (Issue #2661) * Added elements attribute to PDBParser (Issue #2553, #2647) @@ -464,7 +574,7 @@ Enhancements Changes * Unused `MDAnalysis.lib.mdamath._angle` has been removed (Issue #2650) - * Refactored dihedral selections and Ramachandran.__init__ to speed up + * Refactored dihedral selections and Ramachandran.__init__ to speed up dihedral selections for faster tests (Issue #2671, PR #2706) * Removes the deprecated `t0`, `tf`, and `dtmax` from :class:Waterdynamics.SurvivalProbability. Instead the `start`, `stop` and @@ -527,7 +637,6 @@ Changes function calls. (Issue #782) Deprecations - * analysis.helanal is deprecated in 1.0 (remove in 2.0) * analysis.hole is deprecated in 1.0 (remove in 2.0) * analysis.hbonds.HydrogenBondAnalysis is deprecated in 1.0 (remove in 2.0) * analysis.density.density_from_Universe() (remove in 2.0) @@ -536,6 +645,7 @@ Deprecations * Writer.write_next_timestep is deprecated, use write() instead (remove in 2.0) * Writer.write(Timestep) is deprecated, use either a Universe or AtomGroup + 09/05/19 IAlibay, richardjgowers * 0.20.1 @@ -908,7 +1018,6 @@ Testsuite * Unit tests for unwanted side effects when importing MDAnalysis * MDAnalysis.visualization is now tested - 01/24/18 richardjgowers, rathann, orbeckst, tylerjereddy, mtiberti, kain88-de, jbarnoud, nestorwendt, mmattaNU, jmborr, sobuelow, sseyler, rcortini, xiki-tempula, manuel.nuno.melo @@ -1018,6 +1127,9 @@ Fixes * Fixed dtype of numpy arrays to accomodate 32 bit architectures (Issue #1362) * Groups are hashable on python 3 (Issue #1397) +Changes + * scipy and matplotlib are now required dependencies (Issue #1159) + Changes * scipy and matplotlib are now required dependencies (Issue #1159) diff --git a/package/MANIFEST.in b/package/MANIFEST.in index 2e2154293a5..960ce3960e6 100644 --- a/package/MANIFEST.in +++ b/package/MANIFEST.in @@ -12,4 +12,4 @@ global-exclude *.pyc global-exclude *.pyo global-exclude *.pyd global-exclude *~ -global-exclude .git \ No newline at end of file +global-exclude .git diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py index 17d907aa309..0aaca637224 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py @@ -768,7 +768,8 @@ def count_by_time(self): indices /= self.step counts = np.zeros_like(self.frames) - counts[indices.astype(int)] = tmp_counts + counts[indices.astype(np.intp)] = tmp_counts + return counts def count_by_type(self): @@ -786,8 +787,8 @@ def count_by_type(self): acceptor atoms in a hydrogen bond. """ - d = self.u.atoms[self.results.hbonds[:, 1].astype(int)] - a = self.u.atoms[self.results.hbonds[:, 3].astype(int)] + d = self.u.atoms[self.hbonds[:, 1].astype(np.intp)] + a = self.u.atoms[self.hbonds[:, 3].astype(np.intp)] tmp_hbonds = np.array([d.resnames, d.types, a.resnames, a.types], dtype=str).T @@ -815,9 +816,9 @@ def count_by_ids(self): in a hydrogen bond. """ - d = self.u.atoms[self.results.hbonds[:, 1].astype(int)] - h = self.u.atoms[self.results.hbonds[:, 2].astype(int)] - a = self.u.atoms[self.results.hbonds[:, 3].astype(int)] + d = self.u.atoms[self.hbonds[:, 1].astype(np.intp)] + h = self.u.atoms[self.hbonds[:, 2].astype(np.intp)] + a = self.u.atoms[self.hbonds[:, 3].astype(np.intp)] tmp_hbonds = np.array([d.ids, h.ids, a.ids]).T hbond_ids, ids_counts = np.unique(tmp_hbonds, axis=0, diff --git a/package/MDAnalysis/analysis/leaflet.py b/package/MDAnalysis/analysis/leaflet.py index 1749faf593d..e74f429dc3d 100644 --- a/package/MDAnalysis/analysis/leaflet.py +++ b/package/MDAnalysis/analysis/leaflet.py @@ -118,8 +118,8 @@ class LeafletFinder(object): consecutively, starting at 0. To obtain the atoms in the input structure use :meth:`LeafletFinder.groups`:: - u_PDB = mda.Universe(PDB) - L = LeafletFinder(u_PDB, 'name P*') + u = mda.Universe(PDB) + L = LeafletFinder(u, 'name P*') leaflet0 = L.groups(0) leaflet1 = L.groups(1) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 75e83ac7b4e..0203cf32c51 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -942,7 +942,7 @@ class SurvivalProbability(object): Using the MDAnalysis.lib.correlations.py to carry out the intermittency and autocorrelation calculations. Changed `selection` keyword to `select`. - Removed support for the deprecated `t0`, `tf`, and `dtmax` keywords. + Removed support for the deprecated `t0`, `tf`, and `dtmax` keywords. These should instead be passed to :meth:`SurvivalProbability.run` as the `start`, `stop`, and `tau_max` keywords respectively. The `stop` keyword as passed to :meth:`SurvivalProbability.run` has now diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index badf47e8e9d..9b4e8feab55 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -539,6 +539,9 @@ class PDBWriter(base.WriterBase): .. versionchanged:: 2.0.0 Add the `redindex` argument. Setting this keyword to ``True`` (the default) preserves the behavior in earlier versions of MDAnalysis. + The PDB writer checks for a valid chainID entry instead of using the + last character of segid. Should a chainID not be present, or not + conform to the PDB standard, the default value of 'X' is used. """ fmt = { diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index 78bfa3983a0..da7c5ae50e5 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -823,6 +823,18 @@ class NCDFWriter(base.WriterBase): .. _`Issue #506`: https://github.com/MDAnalysis/mdanalysis/issues/506#issuecomment-225081416 + Raises + ------ + FutureWarning + When writing. The :class:`NCDFWriter` currently does not write any + `scale_factor` values for the data variables. Whilst not in breach + of the AMBER NetCDF standard, this behaviour differs from that of + most AMBER writers, especially for velocities which usually have a + `scale_factor` of 20.455. In MDAnalysis 2.0, the :class:`NCDFWriter` + will enforce `scale_factor` writing to either match user inputs (either + manually defined or via the :class:`NCDFReader`) or those as written by + AmberTools's :program:`sander` under default operation. + See Also -------- :class:`NCDFReader` @@ -882,6 +894,13 @@ def __init__(self, self.has_forces = kwargs.get('forces', False) self.curr_frame = 0 + # warn users of upcoming changes + wmsg = ("In MDAnalysis v2.0, `scale_factor` writing will change (" + "currently these are not written), to better match the way " + "they are written by AMBER programs. See NCDFWriter docstring " + "for more details.") + warnings.warn(wmsg, FutureWarning) + def _init_netcdf(self, periodic=True): """Initialize netcdf AMBER 1.0 trajectory. diff --git a/package/MDAnalysis/coordinates/TRZ.py b/package/MDAnalysis/coordinates/TRZ.py index 2048d2acdab..084b595fbe3 100644 --- a/package/MDAnalysis/coordinates/TRZ.py +++ b/package/MDAnalysis/coordinates/TRZ.py @@ -140,7 +140,7 @@ class TRZReader(base.ReaderBase): Extra data (Temperature, Energies, Pressures, etc) now read into ts.data dictionary. Now passes a weakref of self to ts (ts._reader). - .. versionchanged:: 2.0.0 + .. versionchanged:: 1.0.1 Now checks for the correct `n_atoms` on reading and can raise :exc:`ValueError`. """ diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index d72ec4f6c03..4cb6582385d 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -600,6 +600,7 @@ def apply(self, group): return group[np.in1d(nmidx, matches)].unique + class AromaticSelection(Selection): """Select aromatic atoms. @@ -933,7 +934,7 @@ class ProteinSelection(Selection): :func:`MDAnalysis.lib.util.convert_aa_code` - .. versionchanged:: 2.0.0 + .. versionchanged:: 1.0.1 prot_res changed to set (from numpy array) performance improved by ~100x on larger systems """ @@ -990,7 +991,7 @@ class NucleicSelection(Selection): .. versionchanged:: 0.8 additional Gromacs selections - .. versionchanged:: 2.0.0 + .. versionchanged:: 1.0.1 nucl_res changed to set (from numpy array) performance improved by ~100x on larger systems """ @@ -1026,7 +1027,7 @@ class BackboneSelection(ProteinSelection): (which are included by, eg VMD's backbone selection). - .. versionchanged:: 2.0.0 + .. versionchanged:: 1.0.1 bb_atoms changed to set (from numpy array) performance improved by ~100x on larger systems """ @@ -1059,7 +1060,7 @@ class NucleicBackboneSelection(NucleicSelection): by the :class:`NucleicSelection`. - .. versionchanged:: 2.0.0 + .. versionchanged:: 1.0.1 bb_atoms changed to set (from numpy array) performance improved by ~100x on larger systems """ @@ -1094,7 +1095,7 @@ class BaseSelection(NucleicSelection): 'O6','N2','N6', 'O2','N4','O4','C5M' - .. versionchanged:: 2.0.0 + .. versionchanged:: 1.0.1 base_atoms changed to set (from numpy array) performance improved by ~100x on larger systems """ @@ -1127,7 +1128,7 @@ class NucleicSugarSelection(NucleicSelection): """Contains all atoms with name C1', C2', C3', C4', O2', O4', O3'. - .. versionchanged:: 2.0.0 + .. versionchanged:: 1.0.1 sug_atoms changed to set (from numpy array) performance improved by ~100x on larger systems """ diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index a3d0940544c..b32a433ac5b 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -327,7 +327,6 @@ def __init__(cls, name, bases, classdict): dtype = classdict.get("dtype") if dtype is not None: per_obj = classdict.get("per_object", bases[0].per_object) - try: selection.gen_selection_class(singular, attrname, dtype, per_obj) @@ -1113,7 +1112,6 @@ def chi1_selection(residue, n_name='N', ca_name='CA', cb_name='CB', :class:`MDAnalysis.analysis.dihedrals.Janin` class does not incorporate amino acids where the gamma atom is not carbon, into its chi1 selections. - Parameters ---------- c_name: str (optional) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 9ddef51bf3f..46c458ab5d2 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -99,6 +99,7 @@ from .topologyattrs import AtomAttr, ResidueAttr, SegmentAttr from .topologyobjects import TopologyObject + logger = logging.getLogger("MDAnalysis.core.universe") @@ -764,6 +765,14 @@ def add_TopologyAttr(self, topologyattr, values=None): Can now also add TopologyAttrs with a string of the name of the attribute to add (eg 'charges'), can also supply initial values using values keyword. + + .. versionchanged:: 1.1.0 + Now warns when adding bfactors to a Universe with + existing tempfactors, or adding tempfactors to a + Universe with existing bfactors. + In version 2.0, MDAnalysis will stop treating + tempfactors and bfactors as separate attributes. Instead, + they will be aliases of the same attribute. """ if isinstance(topologyattr, str): try: @@ -784,6 +793,16 @@ def add_TopologyAttr(self, topologyattr, values=None): n_residues=self._topology.n_residues, n_segments=self._topology.n_segments, values=values) + alias_pairs = [("bfactors", "tempfactors"), + ("tempfactors", "bfactors")] + for a, b in alias_pairs: + if topologyattr.attrname == a and hasattr(self._topology, b): + err = ("You are adding {a} to a Universe that " + "has {b}. From MDAnalysis version 2.0, {a} " + "and {b} will no longer be separate " + "TopologyAttrs. Instead, they will be aliases " + "of the same attribute.").format(a=a, b=b) + warnings.warn(err, DeprecationWarning) self._topology.add_TopologyAttr(topologyattr) self._process_attr(topologyattr) @@ -1318,24 +1337,18 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, ---------- smiles : str SMILES string - sanitize : bool (optional, default True) Toggle the sanitization of the molecule - addHs : bool (optional, default True) Add all necessary hydrogens to the molecule - generate_coordinates : bool (optional, default True) Generate 3D coordinates using RDKit's `AllChem.EmbedMultipleConfs` function. Requires adding hydrogens with the `addHs` parameter - numConfs : int (optional, default 1) Number of frames to generate coordinates for. Ignored if `generate_coordinates=False` - rdkit_kwargs : dict (optional) Other arguments passed to the RDKit `EmbedMultipleConfs` function - kwargs : dict Parameters passed on Universe creation diff --git a/package/MDAnalysis/lib/_cutil.pyx b/package/MDAnalysis/lib/_cutil.pyx index 81d19d35894..4e11b498f44 100644 --- a/package/MDAnalysis/lib/_cutil.pyx +++ b/package/MDAnalysis/lib/_cutil.pyx @@ -32,6 +32,7 @@ from MDAnalysis import NoDataError from libcpp.set cimport set as cset from libcpp.map cimport map as cmap from libcpp.vector cimport vector +from libcpp.utility cimport pair from cython.operator cimport dereference as deref @@ -90,6 +91,60 @@ def unique_int_1d(np.intp_t[:] values): return np.array(result) +@cython.boundscheck(False) +def _in2d(np.intp_t[:, :] arr1, np.intp_t[:, :] arr2): + """Similar to np.in1d except works on 2d arrays + + Parameters + ---------- + arr1, arr2 : numpy.ndarray, shape (n,2) and (m, 2) + arrays of integers + + Returns + ------- + in1d : bool array + if an element of arr1 was in arr2 + + .. versionadded:: 1.1.0 + """ + cdef object out + cdef ssize_t i + cdef cset[pair[np.intp_t, np.intp_t]] hits + cdef pair[np.intp_t, np.intp_t] p + + """ + Construct a set from arr2 called hits + then for each entry in arr1, check if there's a hit in this set + + python would look like: + + hits = {(i, j) for (i, j) in arr2} + results = np.empty(arr1.shape[0]) + for i, (x, y) in enumerate(arr1): + results[i] = (x, y) in hits + + return results + """ + if not arr1.shape[1] == 2 or not arr2.shape[1] == 2: + raise ValueError("Both arrays must be (n, 2) arrays") + + for i in range(arr2.shape[0]): + p = pair[np.intp_t, np.intp_t](arr2[i, 0], arr2[i, 1]) + hits.insert(p) + + out = np.empty(arr1.shape[0], dtype=np.uint8) + cdef unsigned char[::1] results = out + for i in range(arr1.shape[0]): + p = pair[np.intp_t, np.intp_t](arr1[i, 0], arr1[i, 1]) + + if hits.count(p): + results[i] = True + else: + results[i] = False + + return out.astype(bool) + + cdef intset difference(intset a, intset b): """a.difference(b) diff --git a/package/MDAnalysis/lib/distances.py b/package/MDAnalysis/lib/distances.py index 74782a67797..9b6337e26bd 100644 --- a/package/MDAnalysis/lib/distances.py +++ b/package/MDAnalysis/lib/distances.py @@ -379,16 +379,19 @@ def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, coord2 = configuration[j] distance = distances[k] - Note - ----- - Currently supports brute force, grid-based, and periodic KDtree search - methods. - See Also -------- distance_array MDAnalysis.lib.pkdtree.PeriodicKDTree.search MDAnalysis.lib.nsgrid.FastNS.search + + + .. versionchanged:: 1.0.1 + nsgrid was temporarily removed and replaced with pkdtree due to issues + relating to its reliability and accuracy (Issues #2919, #2229, #2345, + #2670, #2930) + .. versionchanged:: 1.0.2 + nsgrid enabled again """ if box is not None: box = np.asarray(box, dtype=np.float32) @@ -432,10 +435,19 @@ def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, ------- function : callable The function implementing the guessed (or deliberatly chosen) method. + + + .. versionchanged:: 1.0.1 + nsgrid was temporarily removed and replaced with pkdtree due to issues + relating to its reliability and accuracy (Issues #2919, #2229, #2345, + #2670, #2930) + .. versionchanged:: 1.1.0 + enabled nsgrid again """ methods = {'bruteforce': _bruteforce_capped, 'pkdtree': _pkdtree_capped, - 'nsgrid': _nsgrid_capped} + 'nsgrid': _nsgrid_capped, + } if method is not None: return methods[method.lower()] @@ -792,8 +804,15 @@ def self_capped_distance(reference, max_cutoff, min_cutoff=None, box=None, MDAnalysis.lib.pkdtree.PeriodicKDTree.search MDAnalysis.lib.nsgrid.FastNS.self_search + .. versionchanged:: 0.20.0 Added `return_distances` keyword. + .. versionchanged:: 1.0.1 + nsgrid was temporarily removed and replaced with pkdtree due to issues + relating to its reliability and accuracy (Issues #2919, #2229, #2345, + #2670, #2930) + .. versionchanged:: 1.0.2 + enabled nsgrid again """ if box is not None: box = np.asarray(box, dtype=np.float32) @@ -835,10 +854,19 @@ def _determine_method_self(reference, max_cutoff, min_cutoff=None, box=None, ------- function : callable The function implementing the guessed (or deliberatly chosen) method. + + + .. versionchanged:: 1.0.1 + nsgrid was temporarily removed and replaced with pkdtree due to issues + relating to its reliability and accuracy (Issues #2919, #2229, #2345, + #2670, #2930) + .. versionchanged:: 1.0.2 + enabled nsgrid again """ methods = {'bruteforce': _bruteforce_capped_self, 'pkdtree': _pkdtree_capped_self, - 'nsgrid': _nsgrid_capped_self} + 'nsgrid': _nsgrid_capped_self, + } if method is not None: return methods[method.lower()] @@ -1096,9 +1124,9 @@ def _nsgrid_capped_self(reference, max_cutoff, min_cutoff=None, box=None, gridsearch = FastNS(max_cutoff, reference, box=box) results = gridsearch.self_search() - pairs = results.get_pairs()[::2, :] + pairs = results.get_pairs() if return_distances or (min_cutoff is not None): - distances = results.get_pair_distances()[::2] + distances = results.get_pair_distances() if min_cutoff is not None: idx = distances > min_cutoff pairs, distances = pairs[idx], distances[idx] diff --git a/package/MDAnalysis/lib/include/calc_distances.h b/package/MDAnalysis/lib/include/calc_distances.h index 86d6b4941df..c28ac3afddd 100644 --- a/package/MDAnalysis/lib/include/calc_distances.h +++ b/package/MDAnalysis/lib/include/calc_distances.h @@ -31,7 +31,7 @@ typedef float coordinate[3]; #define USED_OPENMP 0 #endif -static void minimum_image(double* x, float* box, float* inverse_box) +void minimum_image(double* x, float* box, float* inverse_box) { int i; double s; @@ -43,7 +43,28 @@ static void minimum_image(double* x, float* box, float* inverse_box) } } -static void minimum_image_triclinic(double* dx, float* box) +inline void _minimum_image_ortho_lazy(double* x, float* box, float* half_box) +{ + /* + * Lazy minimum image convention for orthorhombic boxes. + * + * Assumes that the maximum separation is less than 1.5 times the box length. + */ + for (int i = 0; i < 3; ++i) { + if (x[i] > half_box[i]) { + x[i] -= box[i]; + } + else + { + if (x[i] <= -half_box[i]) + { + x[i] += box[i]; + } + } + } +} + +void minimum_image_triclinic(double* dx, float* box) { /* * Minimum image convention for triclinic systems, modelled after domain.cpp diff --git a/package/MDAnalysis/lib/nsgrid.pyx b/package/MDAnalysis/lib/nsgrid.pyx index 688f41645ae..3941731b0a0 100644 --- a/package/MDAnalysis/lib/nsgrid.pyx +++ b/package/MDAnalysis/lib/nsgrid.pyx @@ -26,6 +26,7 @@ # cython: cdivision=True # cython: boundscheck=False +# cython: wraparound=False # cython: initializedcheck=False # cython: embedsignature=True @@ -33,7 +34,6 @@ Neighbor search library --- :mod:`MDAnalysis.lib.nsgrid` ======================================================== - About the code -------------- @@ -69,249 +69,54 @@ not reflect in the results. .. [#] a pair correspond to two particles that are considered as neighbors . .. versionadded:: 0.19.0 +.. versionchanged:: 1.0.2 + Rewrote module + Classes ------- """ - -# Used to handle memory allocation -from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free -from libc.math cimport sqrt import numpy as np -from libcpp.vector cimport vector -cimport numpy as np - -# Preprocessor DEFs -DEF DIM = 3 -DEF XX = 0 -DEF YY = 1 -DEF ZZ = 2 -DEF EPSILON = 1e-5 - -ctypedef np.int_t ns_int -ctypedef np.float32_t real -ctypedef np.float64_t dreal -ctypedef real rvec[DIM] -ctypedef dreal drvec[DIM] -ctypedef ns_int ivec[DIM] -ctypedef real matrix[DIM][DIM] - -ctypedef vector[ns_int] intvec -ctypedef vector[real] realvec -ctypedef vector[dreal] drealvec - -# Useful Functions -cdef real rvec_norm2(const rvec a) nogil: - return a[XX]*a[XX] + a[YY]*a[YY] + a[ZZ]*a[ZZ] - -cdef dreal drvec_norm2(const drvec a) nogil: - return a[XX]*a[XX] + a[YY]*a[YY] + a[ZZ]*a[ZZ] - -############################### -# Utility class to handle PBC # -############################### -cdef struct cPBCBox_t: - matrix box - rvec fbox_diag - rvec hbox_diag - rvec mhbox_diag - dreal max_cutoff2 - - -# Class to handle PBC calculations -cdef class _PBCBox(object): - """ - Cython implementation of - `PBC-related `_ - operations. This class is used by classes :class:`FastNS` - and :class:`_NSGrid` to put all particles inside a brick-shaped box - and to compute PBC-aware distance. The class can also handle - non-PBC aware distance evaluations through ``periodic`` argument. - - .. warning:: - This class is not meant to be used by end users. - - .. warning:: - Even if MD triclinic boxes can be handled by this class, - internal optimization is made based on the assumption that - particles are inside a brick-shaped box. When this is not - the case, calculated distances are not - warranted to be exact. - """ - - cdef cPBCBox_t c_pbcbox - cdef bint is_triclinic - cdef bint periodic - - def __init__(self, real[:, ::1] box, bint periodic): - """ - Parameters - ---------- - box : numpy.ndarray - box vectors of shape ``(3, 3)`` or - as returned by ``MDAnalysis.lib.mdamath.triclinic_vectors`` - ``dtype`` must be ``numpy.float32`` - periodic : boolean - ``True`` for PBC-aware calculations - ``False`` for non PBC aware calculations - """ - self.periodic = periodic - self.update(box) - - cdef void fast_update(self, real[:, ::1] box) nogil: - """ - Updates the internal box parameters for - PBC-aware distance calculations. The internal - box parameters are used to define the brick-shaped - box which is eventually used for distance calculations. - - """ - cdef ns_int i, j - cdef dreal min_hv2, min_ss, tmp - - # Update matrix - self.is_triclinic = False - for i in range(DIM): - for j in range(DIM): - self.c_pbcbox.box[i][j] = box[i, j] - - if i != j: - # mdamath.triclinic_vectors explicitly sets the off-diagonal - # elements to zero if the box is orthogonal, so we can - # safely check floating point values for equality here - if box[i, j] != 0.0: - self.is_triclinic = True - - # Update diagonals - for i in range(DIM): - self.c_pbcbox.fbox_diag[i] = box[i, i] - self.c_pbcbox.hbox_diag[i] = self.c_pbcbox.fbox_diag[i] * 0.5 - self.c_pbcbox.mhbox_diag[i] = - self.c_pbcbox.hbox_diag[i] - - # Update maximum cutoff - - # Physical limitation of the cut-off - # by half the length of the shortest box vector. - min_hv2 = min(0.25 * rvec_norm2(&box[XX, XX]), 0.25 * rvec_norm2(&box[YY, XX])) - min_hv2 = min(min_hv2, 0.25 * rvec_norm2(&box[ZZ, XX])) - - # Limitation to the smallest diagonal element due to optimizations: - # checking only linear combinations of single box-vectors (2 in x) - # in the grid search and pbc_dx is a lot faster - # than checking all possible combinations. - tmp = box[YY, YY] - if box[ZZ, YY] < 0: - tmp -= box[ZZ, YY] - else: - tmp += box[ZZ, YY] - - min_ss = min(box[XX, XX], min(tmp, box[ZZ, ZZ])) - self.c_pbcbox.max_cutoff2 = min(min_hv2, min_ss * min_ss) - - def update(self, real[:, ::1] box): - """ - Updates internal MD box representation and parameters used for calculations. - - Parameters - ---------- - box : numpy.ndarray - Describes the MD box vectors as returned by - :func:`MDAnalysis.lib.mdamath.triclinic_vectors`. - `dtype` must be :class:`numpy.float32` - - Note - ---- - Call to this method is only needed when the MD box is changed - as it always called when class is instantiated. - - """ - - if box.shape[0] != DIM or box.shape[1] != DIM: - raise ValueError("Box must be a {} x {} matrix. Got: {} x {})".format( - DIM, DIM, box.shape[0], box.shape[1])) - if (box[XX, XX] == 0) or (box[YY, YY] == 0) or (box[ZZ, ZZ] == 0): - raise ValueError("Box does not correspond to PBC=xyz") - self.fast_update(box) - - cdef void fast_pbc_dx(self, rvec ref, rvec other, drvec dx) nogil: - """Dislacement between two points for both - PBC and non-PBC conditions - - Modifies the displacement vector between two points based - on the minimum image convention for PBC aware calculations. - - For non-PBC aware distance evaluations, calculates the - displacement vector without any modifications - """ - - cdef ns_int i, j - - for i in range(DIM): - dx[i] = other[i] - ref[i] - - if self.periodic: - for i in range(DIM-1, -1, -1): - while dx[i] > self.c_pbcbox.hbox_diag[i]: - for j in range(i, -1, -1): - dx[j] -= self.c_pbcbox.box[i][j] +from libcpp.vector cimport vector +from libc cimport math - while dx[i] <= self.c_pbcbox.mhbox_diag[i]: - for j in range(i, -1, -1): - dx[j] += self.c_pbcbox.box[i][j] +DEF END = -1 - cdef dreal fast_distance2(self, rvec a, rvec b) nogil: - """Distance calculation between two points - for both PBC and non-PBC aware calculations +DEF XX = 0 +DEF XY = 3 +DEF YY = 4 +DEF XZ = 6 +DEF YZ = 7 +DEF ZZ = 8 - Returns the distance obeying minimum - image convention if periodic is set to ``True`` while - instantiating the :class:`_PBCBox` object. - """ +ctypedef float coordinate[3]; - cdef drvec dx - self.fast_pbc_dx(a, b, dx) - return drvec_norm2(dx) +cdef extern from "calc_distances.h" nogil: + void _minimum_image_ortho_lazy(double* x, float* box, float* half_box) + void minimum_image_triclinic(double* dx, float* box) + void _ortho_pbc(coordinate* coords, int numcoords, float* box) + void _triclinic_pbc(coordinate* coords, int numcoords, float* box) - cdef real[:, ::1] fast_put_atoms_in_bbox(self, real[:, ::1] coords) nogil: - """Shifts all ``coords`` to an orthogonal brick shaped box - All the coordinates are brought into an orthogonal - box. The box vectors for the brick-shaped box - are defined in ``fast_update`` method. +cdef inline float fmax(float a, float b): + if a > b: + return a + else: + return b - """ - cdef ns_int i, m, d, natoms - cdef real[:, ::1] bbox_coords - - natoms = coords.shape[0] - with gil: - bbox_coords = coords.copy() - - if self.periodic: - if self.is_triclinic: - for i in range(natoms): - for m in range(DIM - 1, -1, -1): - while bbox_coords[i, m] < 0: - for d in range(m+1): - bbox_coords[i, d] += self.c_pbcbox.box[m][d] - while bbox_coords[i, m] >= self.c_pbcbox.box[m][m]: - for d in range(m+1): - bbox_coords[i, d] -= self.c_pbcbox.box[m][d] - else: - for i in range(natoms): - for m in range(DIM): - while bbox_coords[i, m] < 0: - bbox_coords[i, m] += self.c_pbcbox.box[m][m] - while bbox_coords[i, m] >= self.c_pbcbox.box[m][m]: - bbox_coords[i, m] -= self.c_pbcbox.box[m][m] +cdef inline float fmin(float a, float b): + if a < b: + return a + else: + return b - return bbox_coords +cdef inline float degsin(float deg): + # sin in degrees + deg *= math.M_PI / 180. + return math.sin(deg) -######################### -# Neighbor Search Stuff # -######################### cdef class NSResults(object): """Class to store the results @@ -321,39 +126,10 @@ cdef class NSResults(object): be used to generate the desired results on demand. """ - cdef readonly real cutoff - cdef ns_int npairs + cdef vector[int] pairs + cdef vector[double] distances2 - cdef real[:, ::1] coords # shape: size, DIM - cdef real[:, ::1] searchcoords - - cdef vector[intvec] indices_buffer - cdef vector[drealvec] distances_buffer - cdef vector[ns_int] pairs_buffer - cdef vector[dreal] pair_distances_buffer - cdef vector[dreal] pair_distances2_buffer - - def __init__(self, dreal cutoff, real[:, ::1]coords, real[:, ::1]searchcoords): - """ - Parameters - ---------- - cutoff : float - Specified cutoff distance - coords : numpy.ndarray - Array with coordinates of atoms of shape ``(N, 3)`` for - ``N`` particles. ``dtype`` must be ``numpy.float32`` - searchcoords : numpy.ndarray - Array with query coordinates. Shape must be ``(M, 3)`` - for ``M`` queries. ``dtype`` must be ``numpy.float32`` - """ - - self.cutoff = cutoff - self.coords = coords - self.searchcoords = searchcoords - - self.npairs = 0 - - cdef void add_neighbors(self, ns_int beadid_i, ns_int beadid_j, dreal distance2) nogil: + cdef void add_neighbors(self, int beadid_i, int beadid_j, double distance2) nogil: """Internal function to add pairs and distances to buffers The buffers populated using this method are used by @@ -363,10 +139,9 @@ cdef class NSResults(object): which are considered as neighbors. """ - self.pairs_buffer.push_back(beadid_i) - self.pairs_buffer.push_back(beadid_j) - self.pair_distances2_buffer.push_back(distance2) - self.npairs += 1 + self.pairs.push_back(beadid_i) + self.pairs.push_back(beadid_j) + self.distances2.push_back(distance2) def get_pairs(self): """Returns all the pairs within the desired cutoff distance @@ -384,8 +159,7 @@ cdef class NSResults(object): pairs of atom indices of neighbors from query and initial atom coordinates of shape ``(N, 2)`` """ - - return np.asarray(self.pairs_buffer, dtype=np.intp).reshape(self.npairs, 2) + return np.asarray(self.pairs, dtype=np.intp).reshape(-1, 2) def get_pair_distances(self): """Returns all the distances corresponding to each pair of neighbors @@ -407,321 +181,44 @@ cdef class NSResults(object): :meth:`~NSResults.get_pairs` """ + dist2 = np.asarray(self.distances2) - self.pair_distances_buffer = np.sqrt(self.pair_distances2_buffer) - return np.asarray(self.pair_distances_buffer) - - cdef void create_buffers(self) nogil: - """ - Creates buffers to get individual neighbour list and distances - of the query atoms. - - """ - - cdef ns_int i, beadid_i, beadid_j - cdef ns_int idx, nsearch - cdef dreal dist2, dist - - nsearch = self.searchcoords.shape[0] - - self.indices_buffer = vector[intvec]() - self.distances_buffer = vector[drealvec]() - - # initialize rows corresponding to search - for i in range(nsearch): - self.indices_buffer.push_back(intvec()) - self.distances_buffer.push_back(drealvec()) - - for i in range(0, 2*self.npairs, 2): - beadid_i = self.pairs_buffer[i] - beadid_j = self.pairs_buffer[i + 1] - - dist2 = self.pair_distances2_buffer[i//2] - - self.indices_buffer[beadid_i].push_back(beadid_j) - - dist = sqrt(dist2) - - self.distances_buffer[beadid_i].push_back(dist) - - def get_indices(self): - """Individual neighbours of query atom - - For every queried atom ``i``, an array of all its neighbors - indices can be obtained from ``get_indices()[i]`` - - Returns - ------- - indices : list - Indices of neighboring atoms. - Every element i.e. ``indices[i]`` will be a list of - size ``m`` where m is the number of neighbours of - query atom[i]. - - .. code-block:: python - - results = NSResults() - indices = results.get_indices() - - ``indices[i]`` will be a list of neighboring atoms of - ``atom[i]`` from query atoms ``atom``. ``indices[i][j]`` will give - the atom-id of initial coordinates such that - ``initial_atom[indices[i][j]]`` is a neighbor of ``atom[i]``. - - """ - - if self.indices_buffer.empty(): - self.create_buffers() - return list(self.indices_buffer) - - def get_distances(self): - """Distance corresponding to individual neighbors of query atom - - For every queried atom ``i``, a list of all the distances - from its neighboring atoms can be obtained from ``get_distances()[i]``. - Every ``distance[i][j]`` will correspond - to the distance between atoms ``atom[i]`` from the query - atoms and ``atom[indices[j]]`` from the initialized - set of coordinates, where ``indices`` can be obtained - by ``get_indices()`` - - Returns - ------- - distances : list - Every element i.e. ``distances[i]`` will be an array of - shape ``m`` where m is the number of neighbours of - query atom[i]. - - .. code-block:: python - - results = NSResults() - distances = results.get_distances() - - See Also - -------- - :meth:`~NSResults.get_indices` - - """ - - if self.distances_buffer.empty(): - self.create_buffers() - return list(self.distances_buffer) - - -cdef class _NSGrid(object): - """Constructs a uniform cuboidal grid for a brick-shaped box - - This class uses :class:`_PBCBox` to define the brick shaped box - It is essential to initialize the box with :class:`_PBCBox` - inorder to form the grid. - - The domain is subdivided into number of cells based on the desired search - radius. Ideally cellsize should be equal to the search radius, but small - search radius leads to large cell-list data strucutres. - An optimization of cutoff is imposed to limit the size of data - structure such that the cellsize is always greater than or - equal to cutoff distance. - - Note - ---- - This class assumes that all the coordinates are already - inside the brick shaped box. Care must be taken to ensure - all the particles are within the brick shaped box as - defined by :class:`_PBCBox`. This can be ensured by using - :func:`~MDAnalysis.lib.nsgrid._PBCBox.fast_put_atoms_in_bbox` - - .. warning:: - This class is not meant to be used by end users. - - """ - - cdef readonly dreal used_cutoff # cutoff used for cell creation - cdef ns_int size # total cells - cdef ns_int ncoords # number of coordinates - cdef ns_int[DIM] ncells # individual cells in every dimension - cdef ns_int[DIM] cell_offsets # Cell Multipliers - # cellsize MUST be double precision, otherwise coord2cellid() may fail for - # coordinates very close to the upper box boundaries! See Issue #2132 - cdef dreal[DIM] cellsize # cell size in every dimension - cdef ns_int nbeads_per_cell # maximum beads - cdef ns_int *nbeads # size (Number of beads in every cell) - cdef ns_int *beadids # size * nbeads_per_cell (Beadids in every cell) - cdef ns_int *cellids # ncoords (Cell occupation id for every atom) - cdef bint force # To negate the effects of optimized cutoff - - def __init__(self, ncoords, cutoff, _PBCBox box, max_size, force=False): - """ - Parameters - ---------- - ncoords : int - Number of coordinates to fill inside the brick shaped box - cutoff : float - Desired cutoff radius - box : _PBCBox - Instance of :class:`_PBCBox` - max_size : int - Maximum total number of cells - force : boolean - Optimizes cutoff if set to ``False`` [False] - """ - - cdef ns_int i - cdef ns_int ncellx, ncelly, ncellz - cdef ns_int xi, yi, zi - cdef real bbox_vol - cdef dreal relative_cutoff_margin - cdef dreal original_cutoff - - self.ncoords = ncoords - - if not force: - # Calculate best cutoff, with 0.01A minimum - cutoff = max(cutoff, 0.01) - original_cutoff = cutoff - # First, we add a small margin to the cell size so that we can safely - # use the condition d <= cutoff (instead of d < cutoff) for neighbor - # search. - relative_cutoff_margin = 1.0e-8 - while cutoff == original_cutoff: - cutoff = cutoff * (1.0 + relative_cutoff_margin) - relative_cutoff_margin *= 10.0 - bbox_vol = box.c_pbcbox.box[XX][XX] * box.c_pbcbox.box[YY][YY] * box.c_pbcbox.box[YY][YY] - while bbox_vol / cutoff**3 > max_size: - cutoff *= 1.2 - - for i in range(DIM): - self.ncells[i] = (box.c_pbcbox.box[i][i] / cutoff) - self.cellsize[i] = box.c_pbcbox.box[i][i] / self.ncells[i] - self.size = self.ncells[XX] * self.ncells[YY] * self.ncells[ZZ] - self.used_cutoff = cutoff - - self.cell_offsets[XX] = 0 - self.cell_offsets[YY] = self.ncells[XX] - self.cell_offsets[ZZ] = self.ncells[XX] * self.ncells[YY] - - # Allocate memory - # Number of beads in every cell - self.nbeads = PyMem_Malloc(sizeof(ns_int) * self.size) - if not self.nbeads: - raise MemoryError("Could not allocate memory from _NSGrid.nbeads ({} bits requested)".format(sizeof(ns_int) * self.size)) - self.beadids = NULL - # Cellindex of every bead - self.cellids = PyMem_Malloc(sizeof(ns_int) * self.ncoords) - if not self.cellids: - raise MemoryError("Could not allocate memory from _NSGrid.cellids ({} bits requested)".format(sizeof(ns_int) * self.ncoords)) - self.nbeads_per_cell = 0 - - for i in range(self.size): - self.nbeads[i] = 0 - - def __dealloc__(self): - PyMem_Free(self.nbeads) - PyMem_Free(self.beadids) - PyMem_Free(self.cellids) - - cdef ns_int coord2cellid(self, rvec coord) nogil: - """Finds the cell-id for the given coordinate inside the brick shaped box - - Note - ---- - Assumes the coordinate is already inside the brick shaped box. - Return wrong cell-id if this is not the case - """ - return (coord[ZZ] / self.cellsize[ZZ]) * (self.cell_offsets[ZZ]) +\ - (coord[YY] / self.cellsize[YY]) * self.cell_offsets[YY] + \ - (coord[XX] / self.cellsize[XX]) - - cdef bint cellid2cellxyz(self, ns_int cellid, ivec cellxyz) nogil: - """Finds actual cell position `(x, y, z)` from a cell-id - """ - - if cellid < 0: - return False - if cellid >= self.size: - return False - - cellxyz[ZZ] = (cellid / self.cell_offsets[ZZ]) - cellid -= cellxyz[ZZ] * self.cell_offsets[ZZ] - - cellxyz[YY] = (cellid / self.cell_offsets[YY]) - cellxyz[XX] = cellid - cellxyz[YY] * self.cell_offsets[YY] - - return True - - cdef fill_grid(self, real[:, ::1] coords): - """Sorts atoms into cells based on their position in the brick shaped box - - Every atom inside the brick shaped box is assigned a - cell-id based on its position. Another list ``beadids`` - sort the atom-ids in each cell. - - Note - ---- - The method fails if any coordinate is outside the brick shaped box. - - """ - - cdef ns_int i, cellindex = -1 - cdef ns_int ncoords = coords.shape[0] - cdef ns_int[:] beadcounts = np.empty(self.size, dtype=int) - - with nogil: - # Initialize buffers - for i in range(self.size): - beadcounts[i] = 0 - - # First loop: find cellindex for each bead - for i in range(ncoords): - cellindex = self.coord2cellid(&coords[i, 0]) - - self.nbeads[cellindex] += 1 - self.cellids[i] = cellindex - - if self.nbeads[cellindex] > self.nbeads_per_cell: - self.nbeads_per_cell = self.nbeads[cellindex] - - # Allocate memory - self.beadids = PyMem_Malloc(sizeof(ns_int) * self.size * self.nbeads_per_cell) # np.empty((self.size, nbeads_max), dtype=np.int) - if not self.beadids: - raise MemoryError("Could not allocate memory for _NSGrid.beadids ({} bits requested)".format(sizeof(ns_int) * self.size * self.nbeads_per_cell)) - - with nogil: - # Second loop: fill grid - for i in range(ncoords): - - # Add bead to grid cell - cellindex = self.cellids[i] - self.beadids[cellindex * self.nbeads_per_cell + beadcounts[cellindex]] = i - beadcounts[cellindex] += 1 + return np.sqrt(dist2) cdef class FastNS(object): - """Grid based search between two group of atoms - - Instantiates a class object which uses :class:`_PBCBox` and - :class:`_NSGrid` to construct a cuboidal - grid in an orthogonal brick shaped box. + """Grid based search between positions Minimum image convention is used for distance evaluations if pbc is set to ``True``. - """ - cdef _PBCBox box - cdef real[:, ::1] coords - cdef real[:, ::1] coords_bbox - cdef readonly dreal cutoff - cdef _NSGrid grid - cdef ns_int max_gridsize - cdef bint periodic + .. versionchanged:: 1.0.2 + Rewrote to fix bugs with triclinic boxes + """ + cdef readonly double cutoff + cdef float[:, ::1] coords_bbox - def __init__(self, cutoff, coords, box, max_gridsize=5000, pbc=True): + cdef int[3] ncells # individual cells in every dimension + cdef int[3] cell_offsets # Cell Multipliers + # cellsize MUST be double precision, otherwise coord2cellid() may fail for + # coordinates very close to the upper box boundaries! See Issue #2132 + # diagonal stores the cell width, off diagonal elements are the "tilt" + # i.e. cellsize[3] is the dxdy tilt (box[XY] / box[YY]) + cdef double[9] cellsize + + cdef int[::1] head_id # first coord id for a given cell + cdef int[::1] next_id # next coord id after a given cell + cdef bint triclinic + cdef float[6] dimensions + cdef float[3] half_dimensions + cdef float[3] inverse_dimensions + cdef float[9] triclinic_dimensions + # are we periodic in the X, Y and Z dimension? + cdef bint pbc # periodic at all? + cdef bint periodic[3] + + def __init__(self, cutoff, coords, box, pbc=True): """ - Initialize the grid and sort the coordinates in respective - cells by shifting the coordinates in a brick shaped box. - The brick shaped box is defined by :class:`_PBCBox` - and cuboidal grid is initialize by :class:`_NSGrid`. - If box is supplied, periodic shifts along box vectors are used - to contain all the coordinates inside the brick shaped box. If box is not supplied, the range of coordinates i.e. ``[xmax, ymax, zmax] - [xmin, ymin, zmin]`` should be used to construct a pseudo box. Subsequently, the origin should also be @@ -744,9 +241,6 @@ cdef class FastNS(object): ``[lx, ly, lz, alpha, beta, gamma]``. For non-PBC evaluations, provide an orthogonal bounding box (dtype = numpy.float32) - max_gridsize : int - maximum number of cells in the grid. This parameter - can be tuned for superior performance. pbc : boolean Handle to switch periodic boundary conditions on/off [True] @@ -775,40 +269,226 @@ cdef class FastNS(object): gridsearch = FastNS(max_cutoff, shift, box=pseudobox, pbc=False) """ - - from MDAnalysis.lib.mdamath import triclinic_vectors - if (coords.ndim != 2 or coords.shape[1] != 3): raise ValueError("coords must have a shape of (n, 3), got {}." "".format(coords.shape)) - - if np.allclose(box[:3], 0.0): + if box.shape != (6,): + raise ValueError("Box must be a numpy array of [lx, ly, lz, alpha, beta, gamma], got {}" + "".format(box)) + if (box[:3] == 0.0).any(): raise ValueError("Any of the box dimensions cannot be 0") + if cutoff < 0: + raise ValueError("Cutoff must be positive") + self.cutoff = cutoff + max_cutoff = self._prepare_box(box, pbc) + if cutoff > max_cutoff: + raise ValueError("Cutoff {} too large for box (max {})".format(cutoff, max_cutoff)) + self._pack_grid(coords) - self.periodic = pbc - self.coords = coords.astype(np.float32, order='C', copy=True) + cdef float _prepare_box(self, box, bint pbc): + """ + Parameters + ---------- + box : numpy ndarray shape=(6,) + Box info, [lx, ly, lz, alpha, beta, gamma] + pbc : bool + is this NSGrid periodic at all? - if box.shape != (3, 3): - box = triclinic_vectors(box) + Returns + ------- + max_cutoff : float + the maximum allowable cutoff given the box shape and size + """ + cdef float cutoff, min_cellsize, max_cutoff, new_cellsize + cdef int i - self.box = _PBCBox(box, self.periodic) + from MDAnalysis.lib.mdamath import triclinic_vectors - if cutoff < 0: - raise ValueError("Cutoff must be positive!") - if cutoff * cutoff > self.box.c_pbcbox.max_cutoff2: - raise ValueError("Cutoff greater than maximum cutoff ({:.3f}) given the PBC") + for i in range(3): + self.dimensions[i] = box[i] + self.half_dimensions[i] = 0.5 * box[i]; + self.inverse_dimensions[i] = 1.0 / box[i] + self.dimensions[3] = box[3] + self.dimensions[4] = box[4] + self.dimensions[5] = box[5] + + self.triclinic_dimensions = triclinic_vectors(box).reshape((9,)) + self.triclinic = (self.triclinic_dimensions[XY] != 0 or + self.triclinic_dimensions[XZ] != 0 or + self.triclinic_dimensions[YZ] != 0) + cutoff = max(self.cutoff, 1.0) # TODO: Figure out max ncells and stick to that + # Find maximum allowable cutoff + # For orthogonal cell this is just half the shortest boxlength (sine values all 1.0) + # For triclinic the box tilt creates a shorter path across the box we must account for + # alpha + max_cutoff = self.triclinic_dimensions[YY] * degsin(self.dimensions[3]) + max_cutoff = fmin(max_cutoff, self.triclinic_dimensions[ZZ] * degsin(self.dimensions[3])) + # beta + max_cutoff = fmin(max_cutoff, self.triclinic_dimensions[XX] * degsin(self.dimensions[4])) + max_cutoff = fmin(max_cutoff, self.triclinic_dimensions[ZZ] * degsin(self.dimensions[4])) + # gamma + max_cutoff = fmin(max_cutoff, self.triclinic_dimensions[XX] * degsin(self.dimensions[5])) + max_cutoff = fmin(max_cutoff, self.triclinic_dimensions[YY] * degsin(self.dimensions[5])) + max_cutoff /= 2 + + # for triclinic cells, we need to worry about the shortest path across the cells + min_cellsize = cutoff + if self.triclinic: + for i in range(3, 6): + # cutoff/sin(theta) to elongate the XX/YY/ZZ dimension to make smallest diagonal large enough + new_cellsize = cutoff / degsin(self.dimensions[i]) + min_cellsize = fmax(new_cellsize, min_cellsize) + + # add 0.001 here to avoid floating point errors + # will make cells slightly too large as a result, ah well + min_cellsize += 0.001 + self.ncells[0] = math.floor(self.triclinic_dimensions[XX] / min_cellsize) + self.ncells[1] = math.floor(self.triclinic_dimensions[YY] / min_cellsize) + self.ncells[2] = math.floor(self.triclinic_dimensions[ZZ] / min_cellsize) + + self.pbc = pbc + # If there aren't enough cells in a given dimension it's equivalent to one + # this prevents double counting of results if a cell would have the same + # neighbour above and below + if pbc: + for i in range(3): + if self.ncells[i] <= 3: + self.ncells[i] = 1 + self.periodic[i] = False + else: + self.periodic[i] = True + else: + for i in range(3): + if self.ncells[i] <= 2: + self.ncells[i] = 1 + self.periodic[i] = False + # Off diagonal cellsizes are actually the tilt, i.e. dy/dz or similar + self.cellsize[XX] = self.triclinic_dimensions[XX] / self.ncells[0] + # [YX] and [ZX] are 0 + self.cellsize[XY] = self.triclinic_dimensions[XY] / self.triclinic_dimensions[YY] + self.cellsize[YY] = self.triclinic_dimensions[YY] / self.ncells[1] + # [ZY] is zero + self.cellsize[XZ] = self.triclinic_dimensions[XZ] / self.triclinic_dimensions[ZZ] + self.cellsize[YZ] = self.triclinic_dimensions[YZ] / self.triclinic_dimensions[ZZ] + self.cellsize[ZZ] = self.triclinic_dimensions[ZZ] / self.ncells[2] + + self.cell_offsets[0] = 0 + self.cell_offsets[1] = self.ncells[0] + self.cell_offsets[2] = self.ncells[0] * self.ncells[1] + + return max_cutoff + + cdef void _pack_grid(self, float[:, :] coords): + """Assigns coordinates into cells - self.coords_bbox = self.box.fast_put_atoms_in_bbox(self.coords) + Parameters + ---------- + coords : np.ndarray float of shape (ncoords, 3) + Coordinates to populate the box + """ + cdef int i, j - self.cutoff = cutoff - self.max_gridsize = max_gridsize - # Note that self.cutoff might be different from self.grid.cutoff - # due to optimization - self.grid = _NSGrid(self.coords_bbox.shape[0], self.cutoff, self.box, self.max_gridsize) + # Linked list for each cell + # Starting coordinate index for each cell (END if empty cell) + self.head_id = np.full(self.cell_offsets[2] * self.ncells[2], END, dtype=np.int32, order='C') + # Next coordinate index in cell for each coordinate (END if end of sequence) + self.next_id = np.full(coords.shape[0], END, dtype=np.int32, order='C') + + self.coords_bbox = coords.copy() + with nogil: + if self.triclinic: + _triclinic_pbc(&self.coords_bbox[0][0], + self.coords_bbox.shape[0], + &self.triclinic_dimensions[0]) + else: + _ortho_pbc(&self.coords_bbox[0][0], + self.coords_bbox.shape[0], + &self.dimensions[0]) + + for i in range(self.coords_bbox.shape[0]): + j = self.coord2cellid(&self.coords_bbox[i][0]) + self.next_id[i] = self.head_id[j] + self.head_id[j] = i + + cdef int coord2cellid(self, const float* coord) nogil: + """Finds the cell-id for the given coordinate + + Note + ---- + Assumes the coordinate is already inside the primary unit cell. + Return wrong cell-id if this is not the case + """ + cdef int xyz[3] + + self.coord2cellxyz(coord, xyz) + + return xyz[0] + xyz[1] * self.cell_offsets[1] + xyz[2] * self.cell_offsets[2] + + cdef void coord2cellxyz(self, const float* coord, int* xyz) nogil: + """Calculate cell coordinate for coord""" + # This assumes coordinate is inside the primary unit cell + xyz[2] = (coord[2] / self.cellsize[ZZ]) + xyz[1] = ((coord[1] - coord[2] * self.cellsize[YZ]) / self.cellsize[YY]) + xyz[0] = ((coord[0] - coord[1] * self.cellsize[XY] + - coord[2] * self.cellsize[XZ]) / self.cellsize[XX]) + # Make sure cell coordinate indices are within the primary unit cell + # (better safe than sorry): + xyz[0] %= self.ncells[0] + xyz[1] %= self.ncells[1] + xyz[2] %= self.ncells[2] + + cdef int cellxyz2cellid(self, int cx, int cy, int cz) nogil: + """Convert cell coordinate to cell id, END for out of bounds""" + if cx < 0: + if self.periodic[0]: + cx = self.ncells[0] - 1 + else: + return END + elif cx == self.ncells[0]: + if self.periodic[0]: + cx = 0 + else: + return END + if cy < 0: + if self.periodic[1]: + cy = self.ncells[1] - 1 + else: + return END + elif cy == self.ncells[1]: + if self.periodic[1]: + cy = 0 + else: + return END + if cz < 0: + if self.periodic[2]: + cz = self.ncells[2] - 1 + else: + return END + elif cz == self.ncells[2]: + if self.periodic[2]: + cz = 0 + else: + return END - self.grid.fill_grid(self.coords_bbox) + return cx + cy * self.cell_offsets[1] + cz * self.cell_offsets[2] - def search(self, search_coords): + cdef double calc_distsq(self, const float* a, const float* b) nogil: + cdef double dx[3] + + dx[0] = a[0] - b[0] + dx[1] = a[1] - b[1] + dx[2] = a[2] - b[2] + + if self.pbc: + if self.triclinic: + minimum_image_triclinic(dx, &self.triclinic_dimensions[0]) + else: + _minimum_image_ortho_lazy(dx, &self.dimensions[0], + &self.half_dimensions[0]) + + return dx[0]*dx[0] + dx[1]*dx[1] + dx[2]*dx[2] + + def search(self, float[:, :] search_coords): """Search a group of atoms against initialized coordinates Creates a new grid with the query atoms and searches @@ -830,8 +510,7 @@ cdef class FastNS(object): ------- results : NSResults An :class:`NSResults` object holding neighbor search results, which - can be accessed by its methods :meth:`~NSResults.get_indices`, - :meth:`~NSResults.get_distances`, :meth:`~NSResults.get_pairs`, and + can be accessed by its methods :meth:`~NSResults.get_pairs` and :meth:`~NSResults.get_pair_distances`. Note @@ -840,82 +519,56 @@ cdef class FastNS(object): if any of the query coordinates lies outside the `box` supplied to :class:`~MDAnalysis.lib.nsgrid.FastNS`. """ - - cdef ns_int i, j, size_search - cdef ns_int d, m - cdef ns_int current_beadid, bid - cdef ns_int cellindex, cellindex_probe - cdef ns_int xi, yi, zi - - cdef NSResults results - - cdef dreal d2 - cdef rvec probe - - cdef real[:, ::1] searchcoords - cdef real[:, ::1] searchcoords_bbox - cdef _NSGrid searchgrid - cdef bint check - - cdef dreal cutoff2 = self.cutoff * self.cutoff - cdef ns_int npairs = 0 + cdef int i, j, size_search + cdef int cx, cy, cz + cdef int cellid + cdef int xi, yi, zi + cdef int cellcoord[3] + cdef float tmpcoord[3] + + cdef NSResults results = NSResults() + cdef double d2, cutoff2 + + cutoff2 = self.cutoff * self.cutoff if (search_coords.ndim != 2 or search_coords.shape[1] != 3): raise ValueError("search_coords must have a shape of (n, 3), got " "{}.".format(search_coords.shape)) - # Generate another grid to search - searchcoords = search_coords.astype(np.float32, order='C', copy=False) - searchcoords_bbox = self.box.fast_put_atoms_in_bbox(searchcoords) - searchgrid = _NSGrid(searchcoords_bbox.shape[0], self.grid.used_cutoff, self.box, self.max_gridsize, force=True) - - size_search = searchcoords.shape[0] - - results = NSResults(self.cutoff, self.coords, searchcoords) - with nogil: + size_search = search_coords.shape[0] for i in range(size_search): - # Start with first search coordinate - current_beadid = i - # find the cellindex of the coordinate - cellindex = searchgrid.cellids[current_beadid] - for xi in range(DIM): - for yi in range(DIM): - for zi in range(DIM): - check = True - #Probe the search coordinates in a brick shaped box - probe[XX] = searchcoords_bbox[current_beadid, XX] + (xi - 1) * searchgrid.cellsize[XX] - probe[YY] = searchcoords_bbox[current_beadid, YY] + (yi - 1) * searchgrid.cellsize[YY] - probe[ZZ] = searchcoords_bbox[current_beadid, ZZ] + (zi - 1) * searchgrid.cellsize[ZZ] - # Make sure the probe coordinates is inside the brick-shaped box - if self.periodic: - for m in range(DIM - 1, -1, -1): - while probe[m] < 0: - for d in range(m+1): - probe[d] += self.box.c_pbcbox.box[m][d] - while probe[m] >= self.box.c_pbcbox.box[m][m]: - for d in range(m+1): - probe[d] -= self.box.c_pbcbox.box[m][d] - else: - for m in range(DIM -1, -1, -1): - if probe[m] < 0: - check = False - break - if probe[m] > self.box.c_pbcbox.box[m][m]: - check = False - break - if not check: + tmpcoord[0] = search_coords[i][0] + tmpcoord[1] = search_coords[i][1] + tmpcoord[2] = search_coords[i][2] + if self.triclinic: + _triclinic_pbc(&tmpcoord[0], 1, + &self.triclinic_dimensions[0]) + else: + _ortho_pbc(&tmpcoord[0], 1, + &self.dimensions[0]) + # which cell is atom *i* in + self.coord2cellxyz(&tmpcoord[0], cellcoord) + # loop over all 27 neighbouring cells + for xi in range(3): + for yi in range(3): + for zi in range(3): + cx = cellcoord[0] - 1 + xi + cy = cellcoord[1] - 1 + yi + cz = cellcoord[2] - 1 + zi + cellid = self.cellxyz2cellid(cx, cy, cz) + + if cellid == END: # out of bounds continue - # Get the cell index corresponding to the probe - cellindex_probe = self.grid.coord2cellid(probe) - # for this cellindex search in grid - for j in range(self.grid.nbeads[cellindex_probe]): - bid = self.grid.beadids[cellindex_probe * self.grid.nbeads_per_cell + j] - # find distance between search coords[i] and coords[bid] - d2 = self.box.fast_distance2(&searchcoords_bbox[current_beadid, XX], &self.coords_bbox[bid, XX]) + # for loop over atoms in searchcoord + j = self.head_id[cellid] + while (j != END): + d2 = self.calc_distsq(&tmpcoord[0], + &self.coords_bbox[j][0]) if d2 <= cutoff2: - results.add_neighbors(current_beadid, bid, d2) - npairs += 1 + # place search_coords then self.bbox_coords + results.add_neighbors(i, j, d2) + j = self.next_id[j] return results def self_search(self): @@ -930,74 +583,56 @@ cdef class FastNS(object): ------- results : NSResults An :class:`NSResults` object holding neighbor search results, which - can be accessed by its methods :meth:`~NSResults.get_indices`, - :meth:`~NSResults.get_distances`, :meth:`~NSResults.get_pairs`, and + can be accessed by its methods :meth:`~NSResults.get_pairs` and :meth:`~NSResults.get_pair_distances`. """ + cdef int cx, cy, cz, ox, oy, oz + cdef int ci, cj, i, j, nj + cdef int cellindex, cellindex_probe + cdef int xi, yi, zi + cdef NSResults results = NSResults() + cdef double d2 + cdef double cutoff2 = self.cutoff * self.cutoff + # route over 13 neighbouring cells + cdef int[13][3] route = [[1, 0, 0], [1, 1, 0], [0, 1, 0], [-1, 1, 0], + [1, 0, -1], [1, 1, -1], [0, 1, -1], [-1, 1, -1], + [1, 0, 1], [1, 1, 1], [0, 1, 1], [-1, 1, 1], [0, 0, 1]] + + for cx in range(self.ncells[0]): + for cy in range(self.ncells[1]): + for cz in range(self.ncells[2]): + ci = self.cellxyz2cellid(cx, cy, cz) + + i = self.head_id[ci] + while (i != END): + # pairwise within this cell + j = self.next_id[i] + while (j != END): + d2 = self.calc_distsq(&self.coords_bbox[i][0], + &self.coords_bbox[j][0]) + if d2 <= cutoff2: + results.add_neighbors(i, j, d2) + j = self.next_id[j] + + # loop over 13 neighbouring cells + for nj in range(13): + ox = cx + route[nj][0] + oy = cy + route[nj][1] + oz = cz + route[nj][2] + + cj = self.cellxyz2cellid(ox, oy, oz) + if cj == END: + continue - cdef ns_int i, j, size_search - cdef ns_int d, m - cdef ns_int current_beadid, bid - cdef ns_int cellindex, cellindex_probe - cdef ns_int xi, yi, zi - - cdef NSResults results - cdef dreal d2 - cdef rvec probe - - cdef dreal cutoff2 = self.cutoff * self.cutoff - cdef ns_int npairs = 0 - cdef bint check - - size_search = self.coords.shape[0] + j = self.head_id[cj] + while (j != END): + d2 = self.calc_distsq(&self.coords_bbox[i][0], + &self.coords_bbox[j][0]) + if d2 <= cutoff2: + results.add_neighbors(i, j, d2) + j = self.next_id[j] - results = NSResults(self.cutoff, self.coords, self.coords) + # move to next position in cell *ci* + i = self.next_id[i] - with nogil: - for i in range(size_search): - # Start with first search coordinate - current_beadid = i - # find the cellindex of the coordinate - cellindex = self.grid.cellids[current_beadid] - for xi in range(DIM): - for yi in range(DIM): - for zi in range(DIM): - check = True - # Calculate and/or reinitialize shifted coordinates - # Probe the search coordinates in a brick shaped box - probe[XX] = self.coords_bbox[current_beadid, XX] + (xi - 1) * self.grid.cellsize[XX] - probe[YY] = self.coords_bbox[current_beadid, YY] + (yi - 1) * self.grid.cellsize[YY] - probe[ZZ] = self.coords_bbox[current_beadid, ZZ] + (zi - 1) * self.grid.cellsize[ZZ] - # Make sure the shifted coordinates is inside the brick-shaped box - if self.periodic: - for m in range(DIM - 1, -1, -1): - while probe[m] < 0: - for d in range(m+1): - probe[d] += self.box.c_pbcbox.box[m][d] - while probe[m] >= self.box.c_pbcbox.box[m][m]: - for d in range(m+1): - probe[d] -= self.box.c_pbcbox.box[m][d] - else: - for m in range(DIM -1, -1, -1): - if probe[m] < 0: - check = False - break - elif probe[m] > self.box.c_pbcbox.box[m][m]: - check = False - break - if not check: - continue - # Get the cell index corresponding to the probe - cellindex_probe = self.grid.coord2cellid(probe) - # for this cellindex search in grid - for j in range(self.grid.nbeads[cellindex_probe]): - bid = self.grid.beadids[cellindex_probe * self.grid.nbeads_per_cell + j] - if bid < current_beadid: - continue - # find distance between search coords[i] and coords[bid] - d2 = self.box.fast_distance2(&self.coords_bbox[current_beadid, XX], &self.coords_bbox[bid, XX]) - if d2 <= cutoff2 and d2 > EPSILON: - results.add_neighbors(current_beadid, bid, d2) - results.add_neighbors(bid, current_beadid, d2) - npairs += 2 return results diff --git a/package/MDAnalysis/topology/TPRParser.py b/package/MDAnalysis/topology/TPRParser.py index 9b7365bd231..6490a67362d 100644 --- a/package/MDAnalysis/topology/TPRParser.py +++ b/package/MDAnalysis/topology/TPRParser.py @@ -195,7 +195,6 @@ def parse(self, tpr_resid_from_one=True, **kwargs): .. versionchanged:: 1.1.0 Added the ``tpr_resid_from_one`` keyword to control if resids are indexed from 0 or 1. Default ``False``. - .. versionchanged:: 2.0.0 Changed to ``tpr_resid_from_one=True`` by default. """ diff --git a/package/pypi-description.rst b/package/pypi-description.rst new file mode 100644 index 00000000000..e35df75bfa9 --- /dev/null +++ b/package/pypi-description.rst @@ -0,0 +1,183 @@ +================================ + MDAnalysis Repository README +================================ + +|numfocus| |build| |cov| [*]_ + +|docs| |devdocs| |usergroup| |developergroup| |anaconda| |mybinder| + +MDAnalysis_ is a Python library for the analysis of computer simulations of many-body systems at the molecular scale, spanning use cases from interactions of drugs with proteins to novel materials. It is widely used in the scientific community and is written by scientists for scientists. + +It works with a wide range of popular simulation packages including Gromacs, Amber, NAMD, CHARMM, DL_Poly, HooMD, LAMMPS and many others — see the lists of supported `trajectory formats`_ and `topology formats`_. +MDAnalysis also includes widely used analysis algorithms in the `MDAnalysis.analysis`_ module. + +.. _numfocus-fiscal-sponsor-attribution: + +The MDAnalysis project uses an `open governance model`_ and is fiscally sponsored by `NumFOCUS`_. Consider making +a `tax-deductible donation`_ to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs. + +.. image:: https://www.mdanalysis.org/public/images/numfocus-sponsored-small.png + :alt: NumFOCUS (Fiscally Sponsored Project) + :target: https://numfocus.org/project/mdanalysis + :align: center + +This project is bound by a `Code of Conduct`_. + + +Example analysis script +======================= + +.. code:: python + + import MDAnalysis as mda + + # Load simulation results with a single line + u = mda.Universe('topol.tpr','traj.trr') + + # Select atoms + ag = u.select_atoms('name OH') + + # Atom data made available as Numpy arrays + ag.positions + ag.velocities + ag.forces + + # Iterate through trajectories + for ts in u.trajectory: + print(ag.center_of_mass()) + + +Documentation +============= + +**New users** should read the `Quickstart Guide`_ and might want to +look at our videos_, in which core developers explain various aspects +of MDAnalysis. + +**All users** should read the `User Guide`_. + +**Developers** may also want to refer to the `MDAnalysis API docs`_. + +A growing number of **tutorials_** are available that explain how to +conduct RMSD calculations, structural alignment, distance and contact +analysis, and many more. + + +Installation and availability +============================= + +The latest release can be **installed via ``pip`` or ``conda``** as +described in the `Installation Quick Start`_. + +**Source code** is hosted in a git repository at +https://github.com/MDAnalysis/mdanalysis and is available under the +GNU General Public License, version 2 (see the file LICENSE_). + + +Contributing +============ + +Please report **bugs** or **enhancement requests** through the `Issue +Tracker`_. Questions can also be asked on the `user mailing list`_. + +If you are a **new developer** who would like to start contributing to +MDAnalysis get in touch on the `developer mailing list`_. To set up a +development environment and run the test suite read the `developer +guide`_. + + +Citation +======== + +When using MDAnalysis in published work, please cite the following +two papers: + +* R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, + M. N. Melo, S. L. Seyler, D. L. Dotson, J. Domanski, + S. Buchoux, I. M. Kenney, and O. Beckstein. MDAnalysis: + A Python package for the rapid analysis of molecular + dynamics simulations. In S. Benthall and S. Rostrup, + editors, Proceedings of the 15th Python in Science + Conference, pages 102-109, Austin, TX, 2016. SciPy. + doi:`10.25080/Majora-629e541a-00e`_ + +* N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, + and O. Beckstein. MDAnalysis: A Toolkit for the Analysis of Molecular + Dynamics Simulations. *J. Comput. Chem.* **32** (2011), 2319--2327. + doi:`10.1002/jcc.21787`_ + +For citations of included algorithms and sub-modules please see the references_. + + + +.. Footnotes + +.. [*] **build**: Unit testing is for the whole package; **coverage** is + shown for the core library modules and the analysis modules. + +.. _NumFOCUS: https://numfocus.org/ +.. _open governance model: https://www.mdanalysis.org/about/#governance +.. _tax-deductible donation: https://numfocus.org/donate-to-mdanalysis +.. _`Code of Conduct`: https://www.mdanalysis.org/pages/conduct/ +.. _trajectory formats: https://docs.mdanalysis.org/documentation_pages/coordinates/init.html#id1 +.. _topology formats: https://docs.mdanalysis.org/documentation_pages/topology/init.html#supported-topology-formats +.. _MDAnalysis: https://www.mdanalysis.org +.. _LICENSE: + https://github.com/MDAnalysis/mdanalysis/blob/master/LICENSE +.. _`Installation Quick Start`: + https://www.mdanalysis.org/pages/installation_quick_start/ +.. _`MDAnalysis.analysis`: https://docs.mdanalysis.org/documentation_pages/analysis_modules.html +.. _`tutorials`: https://userguide.mdanalysis.org/examples/README.html +.. _`videos`: https://www.mdanalysis.org/pages/learning_MDAnalysis/#videos +.. _`Quickstart Guide`: + https://userguide.mdanalysis.org/examples/quickstart.html +.. _`User Guide`: https://userguide.mdanalysis.org +.. _`MDAnalysis API docs`: + https://docs.mdanalysis.org +.. _`Issue Tracker`: https://github.com/mdanalysis/mdanalysis/issues +.. _`user mailing list`: + https://groups.google.com/group/mdnalysis-discussion +.. _`developer guide`: + https://userguide.mdanalysis.org/contributing.html +.. _`developer mailing list`: + https://groups.google.com/group/mdnalysis-devel +.. _`10.1002/jcc.21787`: https://dx.doi.org/10.1002/jcc.21787 +.. _`10.25080/Majora-629e541a-00e`: https://doi.org/10.25080/Majora-629e541a-00e +.. _references: https://docs.mdanalysis.org/documentation_pages/references.html + + +.. |usergroup| image:: https://img.shields.io/badge/Google%20Group-Users-lightgrey.svg + :alt: User Google Group + :target: https://groups.google.com/group/mdnalysis-discussion + +.. |developergroup| image:: https://img.shields.io/badge/Google%20Group-Developers-lightgrey.svg + :alt: Developer Google Group + :target: https://groups.google.com/group/mdnalysis-devel + +.. |docs| image:: https://img.shields.io/badge/docs-latest-brightgreen.svg + :alt: Documentation (latest release) + :target: https://docs.mdanalysis.org + +.. |devdocs| image:: https://img.shields.io/badge/docs-development-yellow.svg + :alt: Documentation (development version) + :target: https://docs.mdanalysis.org/dev + +.. |numfocus| image:: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A + :alt: Powered by NumFOCUS + :target: https://www.numfocus.org/ + +.. |build| image:: https://travis-ci.com/MDAnalysis/mdanalysis.svg?branch=develop + :alt: Build Status + :target: https://travis-ci.com/MDAnalysis/mdanalysis + +.. |cov| image:: https://codecov.io/gh/MDAnalysis/mdanalysis/branch/develop/graph/badge.svg + :alt: Coverage Status + :target: https://codecov.io/gh/MDAnalysis/mdanalysis + +.. |anaconda| image:: https://anaconda.org/conda-forge/mdanalysis/badges/version.svg + :alt: Anaconda + :target: https://anaconda.org/conda-forge/mdanalysis + +.. |mybinder| image:: https://mybinder.org/badge.svg + :alt: My Binder + :target: https://mybinder.org/v2/gh/MDAnalysis/binder-notebook/master diff --git a/package/setup.py b/package/setup.py index 004394c2449..400f8fbf42d 100755 --- a/package/setup.py +++ b/package/setup.py @@ -158,7 +158,7 @@ class MDAExtension(Extension, object): # care of calling it when needed. def __init__(self, name, sources, *args, **kwargs): self._mda_include_dirs = [] - sources = [abspath(s) for s in sources] + # don't abspath sources else packaging fails on Windows (issue #3129) super(MDAExtension, self).__init__(name, sources, *args, **kwargs) @property @@ -269,12 +269,12 @@ def using_clang(): def extensions(config): - # dev installs must build their own cythonized files. - use_cython = config.get('use_cython', default=not is_release) + # usually (except coming from release tarball) cython files must be generated + use_cython = config.get('use_cython', default=cython_found) use_openmp = config.get('use_openmp', default=True) extra_compile_args = ['-std=c99', '-ffast-math', '-O3', '-funroll-loops', - '-fsigned-zeros'] # see #2722 + '-fsigned-zeros'] # see #2722 define_macros = [] if config.get('debug_cflags', default=False): extra_compile_args.extend(['-Wall', '-pedantic']) @@ -432,7 +432,7 @@ def extensions(config): extra_compile_args=encore_compile_args) nsgrid = MDAExtension('MDAnalysis.lib.nsgrid', ['MDAnalysis/lib/nsgrid' + cpp_source_suffix], - include_dirs=include_dirs, + include_dirs=include_dirs + ['MDAnalysis/lib/include'], language='c++', define_macros=define_macros, extra_compile_args=cpp_extra_compile_args, @@ -599,6 +599,7 @@ def long_description(readme): 'tqdm>=4.43.0', 'threadpoolctl', ] + if not os.name == 'nt': install_requires.append('gsd>=1.4.0') else: diff --git a/testsuite/MDAnalysisTests/analysis/test_hole2.py b/testsuite/MDAnalysisTests/analysis/test_hole2.py index d23b30b5d57..1f8a74e400f 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hole2.py +++ b/testsuite/MDAnalysisTests/analysis/test_hole2.py @@ -486,6 +486,7 @@ def test_over_order_parameters(self, hole): idx = np.argsort(op) arr = np.array(list(hole.results.profiles.values()), dtype=object) + for op_prof, arr_prof in zip(profiles.values(), arr[idx]): assert op_prof is arr_prof @@ -502,6 +503,7 @@ def test_over_order_parameters_file(self, hole, tmpdir): idx = np.argsort(op) arr = np.array(list(hole.results.profiles.values()), dtype=object) + for op_prof, arr_prof in zip(profiles.values(), arr[idx]): assert op_prof is arr_prof @@ -525,6 +527,7 @@ def test_over_order_parameters_frames(self, hole): idx = np.argsort(op[:n_frames]) values = list(hole.results.profiles.values())[:n_frames] + arr = np.array(values, dtype=object) for op_prof, arr_prof in zip(profiles.values(), arr[idx]): assert op_prof is arr_prof diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py index 0d27429bcd7..c3ddb728bba 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py @@ -206,7 +206,6 @@ def test_no_bond_info_exception(self, universe): def test_first_hbond(self, hydrogen_bonds): assert len(hydrogen_bonds.results.hbonds) == 2 - frame_no, donor_index, hydrogen_index, acceptor_index, da_dst, angle =\ hydrogen_bonds.results.hbonds[0] assert_equal(donor_index, 0) @@ -410,6 +409,7 @@ def test_no_hydrogens(self, universe): assert h._donors.n_atoms == 0 assert h.results.hbonds.size == 0 + class TestHydrogenBondAnalysisTIP3P_GuessDonors_NoTopology(object): """Guess the donor atoms involved in hydrogen bonds using the partial charges of the atoms. """ diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index 02edefc541a..9b28a261ddb 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -918,3 +918,11 @@ def test_wrong_n_atoms(self, outfile): u = make_Universe(trajectory=True) with pytest.raises(IOError): w.write(u) + + def test_scale_factor_future(self, outfile): + u = mda.Universe(GRO) + wmsg = "`scale_factor` writing will change" + with pytest.warns(FutureWarning, match=wmsg): + with NCDFWriter(outfile, u.trajectory.n_atoms) as w: + w.write(u.atoms) + diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 319d08c8011..0557d5c4417 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -46,6 +46,7 @@ GRO, TRR, two_water_gro, two_water_gro_nonames, TRZ, TRZ_psf, + PDB, MMTF, ) import MDAnalysis as mda @@ -678,7 +679,7 @@ def test_add_charges(self, universe, toadd, attrname, default): assert hasattr(universe.atoms, attrname) assert getattr(universe.atoms, attrname)[0] == default - + @pytest.mark.parametrize( 'attr,values', ( ('bonds', [(1, 0), (1, 2)]), @@ -854,7 +855,7 @@ def _check_invalid_addition(self, u, attr, to_add, err_msg): with pytest.raises(ValueError) as excinfo: _add_func(to_add) assert err_msg in str(excinfo.value) - + @pytest.mark.parametrize( 'attr,values', small_atom_indices ) @@ -912,7 +913,7 @@ def test_add_topologyobjects_wrong_universe_error(self, universe, empty, attr, v 'attr,values', large_atom_indices ) def test_add_topologygroups_to_populated(self, universe, attr, values): - topologygroup = mda.core.topologyobjects.TopologyGroup(np.array(values), + topologygroup = mda.core.topologyobjects.TopologyGroup(np.array(values), universe) self._check_valid_added_to_populated(universe, attr, values, topologygroup) @@ -920,15 +921,15 @@ def test_add_topologygroups_to_populated(self, universe, attr, values): 'attr,values', small_atom_indices ) def test_add_topologygroup_wrong_universe_error(self, universe, empty, attr, values): - tg = mda.core.topologyobjects.TopologyGroup(np.array(values), + tg = mda.core.topologyobjects.TopologyGroup(np.array(values), universe) self._check_invalid_addition(empty, attr, tg, 'different Universes') - + @pytest.mark.parametrize( 'attr,values', small_atom_indices ) def test_add_topologygroup_different_universe(self, universe, empty, attr, values): - tg = mda.core.topologyobjects.TopologyGroup(np.array(values), + tg = mda.core.topologyobjects.TopologyGroup(np.array(values), universe) self._check_valid_added_to_empty(empty, attr, values, tg.to_indices()) @@ -969,7 +970,7 @@ def test_add_wrong_number_of_atoms_error(self, universe, attr, n): 'tuples with {} atom indices').format(attr, n) idx = [(0, 1), (0, 1, 2), (8, 22, 1, 3), (5, 3, 4, 2)] self._check_invalid_addition(universe, attr, idx, errmsg) - + def test_add_bonds_refresh_fragments(self, empty): with pytest.raises(NoDataError): getattr(empty.atoms, 'fragments') @@ -993,7 +994,7 @@ def test_roundtrip(self, empty, attr, values): _delete_func(values) u_attr = getattr(empty, attr) assert len(u_attr) == 0 - + class TestDeleteTopologyObjects(object): TOP = {'bonds': [(0, 1), (2, 3), (3, 4), (4, 5), (7, 8)], @@ -1039,7 +1040,7 @@ def _check_valid_deleted(self, u, attr, values, to_delete): assert len(u_attr) == original_length-len(values) not_deleted = [x for x in self.TOP[attr] if list(x) not in values] - assert all([x in u_attr.indices or x[::-1] in u_attr.indices + assert all([x in u_attr.indices or x[::-1] in u_attr.indices for x in not_deleted]) def _check_invalid_deleted(self, u, attr, to_delete, err_msg): @@ -1083,7 +1084,7 @@ def test_delete_atomgroup_wrong_universe_error(self, universe, universe2, attr, def test_delete_missing_atomgroup(self, universe, attr, values): ag = [universe.atoms[x] for x in values] self._check_invalid_deleted(universe, attr, ag, 'Cannot delete nonexistent') - + @pytest.mark.parametrize( 'attr,values', existing_atom_indices ) @@ -1129,7 +1130,7 @@ def test_delete_topologygroup_different_universe(self, universe, universe2, attr arr = np.array(values) tg = mda.core.topologyobjects.TopologyGroup(arr, universe2) self._check_valid_deleted(universe, attr, values, tg.to_indices()) - + @pytest.mark.parametrize( 'attr,n', ( ('bonds', 2), @@ -1142,8 +1143,8 @@ def test_delete_wrong_number_of_atoms_error(self, universe, attr, n): idx = [(0, 1), (0, 1, 2), (8, 22, 1, 3), (5, 3, 4, 2)] errmsg = ('{} must be an iterable of ' 'tuples with {} atom indices').format(attr, n) - self._check_invalid_deleted(universe, attr, idx, errmsg) - + self._check_invalid_deleted(universe, attr, idx, errmsg) + @pytest.mark.parametrize( 'attr,values', existing_atom_indices ) @@ -1180,7 +1181,7 @@ def test_roundtrip(self, universe, attr, values): assert_array_equal(u_attr.indices, nu_attr.indices) - + class TestAllCoordinatesKwarg(object): @pytest.fixture(scope='class') def u_GRO_TRR(self): @@ -1291,3 +1292,15 @@ def test_empty_creation_raises_error(self): with pytest.raises(TypeError) as exc: u = mda.Universe() assert 'Universe.empty' in str(exc.value) + + +def test_deprecate_b_tempfactors(): + u = mda.Universe(PDB) + with pytest.warns(DeprecationWarning, match="alias"): + u.add_TopologyAttr("bfactors") + + +def test_deprecate_temp_bfactors(): + u = mda.Universe(MMTF) + with pytest.warns(DeprecationWarning, match="alias"): + u.add_TopologyAttr("tempfactors") diff --git a/testsuite/MDAnalysisTests/data/surface.pdb.bz2 b/testsuite/MDAnalysisTests/data/surface.pdb.bz2 new file mode 100644 index 00000000000..3cd40151455 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/surface.pdb.bz2 differ diff --git a/testsuite/MDAnalysisTests/data/surface.trr b/testsuite/MDAnalysisTests/data/surface.trr new file mode 100644 index 00000000000..bdb8d9a33e3 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/surface.trr differ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 879c9ee664d..2daa778f61c 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -203,6 +203,9 @@ "SDF_molecule", # MDL SDFile for rdkit "PDBX", # PDBxfile "PDB_elements", # PDB file with elements + "PDB_elements", # PDB file with elements + "SURFACE_PDB", # 111 FCC lattice topology for NSGrid bug #2345 + "SURFACE_TRR", # full precision coordinates for NSGrid bug #2345 ] from pkg_resources import resource_filename @@ -560,7 +563,12 @@ PDB_elements = resource_filename(__name__, 'data/elements.pdb') + PDBX = resource_filename(__name__, "data/4x8u.pdbx") +SURFACE_PDB = resource_filename(__name__, 'data/surface.pdb.bz2') +SURFACE_TRR = resource_filename(__name__, 'data/surface.trr') + + # This should be the last line: clean up namespace del resource_filename diff --git a/testsuite/MDAnalysisTests/lib/test_cutil.py b/testsuite/MDAnalysisTests/lib/test_cutil.py index 4dc33265495..c00e68eedd6 100644 --- a/testsuite/MDAnalysisTests/lib/test_cutil.py +++ b/testsuite/MDAnalysisTests/lib/test_cutil.py @@ -24,7 +24,9 @@ import numpy as np from numpy.testing import assert_equal -from MDAnalysis.lib._cutil import unique_int_1d, find_fragments +from MDAnalysis.lib._cutil import ( + unique_int_1d, find_fragments, _in2d, +) @pytest.mark.parametrize('values', ( @@ -62,3 +64,24 @@ def test_find_fragments(edges, ref): assert len(fragments) == len(ref) for frag, r in zip(fragments, ref): assert_equal(frag, r) + + +def test_in2d(): + arr1 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.intp) + arr2 = np.array([[3, 4], [2, 1], [5, 5], [6, 6]], dtype=np.intp) + + result = _in2d(arr1, arr2) + + assert_equal(result, np.array([False, True, False])) + + +@pytest.mark.parametrize('arr1,arr2', [ + (np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.intp), + np.array([[1, 2], [3, 4]], dtype=np.intp)), + (np.array([[1, 2], [3, 4]], dtype=np.intp), + np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.intp)), +]) +def test_in2d_VE(arr1, arr2): + with pytest.raises(ValueError, + match=r'Both arrays must be \(n, 2\) arrays'): + _in2d(arr1, arr2) diff --git a/testsuite/MDAnalysisTests/lib/test_distances.py b/testsuite/MDAnalysisTests/lib/test_distances.py index b033fcf8572..7df0727dffe 100644 --- a/testsuite/MDAnalysisTests/lib/test_distances.py +++ b/testsuite/MDAnalysisTests/lib/test_distances.py @@ -84,8 +84,8 @@ def test_capped_distance_noresults(): npoints_1 = (1, 100) -boxes_1 = (np.array([1, 2, 3, 90, 90, 90], dtype=np.float32), # ortho - np.array([1, 2, 3, 30, 45, 60], dtype=np.float32), # tri_box +boxes_1 = (np.array([10, 20, 30, 90, 90, 90], dtype=np.float32), # ortho + np.array([10, 20, 30, 30, 45, 60], dtype=np.float32), # tri_box None, # Non Periodic ) @@ -108,7 +108,7 @@ def test_capped_distance_checkbrute(npoints, box, query, method, min_cutoff): np.random.seed(90003) points = (np.random.uniform(low=0, high=1.0, size=(npoints, 3))*(boxes_1[0][:3])).astype(np.float32) - max_cutoff = 0.3 + max_cutoff = 2.5 # capped distance should be able to handle array of vectors # as well as single vectors. pairs, dist = distances.capped_distance(query, points, max_cutoff, @@ -183,32 +183,29 @@ def test_self_capped_distance(npoints, box, method, min_cutoff, ret_dist): pairs, cdists = result else: pairs = result - found_pairs, found_distance = [], [] - for i, coord in enumerate(points): - dist = distances.distance_array(coord, points[i+1:], box=box) - if min_cutoff is not None: - idx = np.where((dist <= max_cutoff) & (dist > min_cutoff))[1] - else: - idx = np.where((dist < max_cutoff))[1] - for other_idx in idx: - j = other_idx + 1 + i - found_pairs.append((i, j)) - found_distance.append(dist[0, other_idx]) - # check number of found pairs: - assert_equal(len(pairs), len(found_pairs)) - # check pair/distance correspondence: - if ret_dist and len(pairs) > 0: - # get result pairs and distances in one array: - res = np.hstack((pairs.astype(cdists.dtype), cdists[:, None])) - # get reference pairs and distances in one array: - ref = np.hstack((np.array(found_pairs, dtype=np.float64), - np.array(found_distance, dtype=np.float64)[:, None])) - # sort both arrays by column 1 and 0: - res = res[res[:, 1].argsort()] # no stable sort needed. - res = res[res[:, 0].argsort(kind='mergesort')] # sort must be stable! - ref = ref[ref[:, 1].argsort()] - ref = ref[ref[:, 0].argsort(kind='mergesort')] - assert_almost_equal(res, ref, decimal=5) + + # Check we found all hits + ref = distances.self_distance_array(points, box) + ref_d = ref[ref < 0.2] + if not min_cutoff is None: + ref_d = ref_d[ref_d > min_cutoff] + assert len(ref_d) == len(pairs) + + # Go through hit by hit and check we got the indices correct too + ref = distances.distance_array(points, points, box) + if ret_dist: + for (i, j), d in zip(pairs, cdists): + d_ref = ref[i, j] + assert d_ref < 0.2 + if not min_cutoff is None: + assert d_ref > min_cutoff + assert_almost_equal(d, d_ref, decimal=6) + else: + for i, j in pairs: + d_ref = ref[i, j] + assert d_ref < 0.2 + if not min_cutoff is None: + assert d_ref > min_cutoff @pytest.mark.parametrize('box', (None, diff --git a/testsuite/MDAnalysisTests/lib/test_nsgrid.py b/testsuite/MDAnalysisTests/lib/test_nsgrid.py index c7bb8fa8492..975bf175e2e 100644 --- a/testsuite/MDAnalysisTests/lib/test_nsgrid.py +++ b/testsuite/MDAnalysisTests/lib/test_nsgrid.py @@ -23,12 +23,16 @@ import pytest +from collections import defaultdict, Counter from numpy.testing import assert_equal, assert_allclose import numpy as np import MDAnalysis as mda -from MDAnalysisTests.datafiles import GRO, Martini_membrane_gro, PDB +from MDAnalysisTests.datafiles import ( + GRO, Martini_membrane_gro, PDB, PDB_xvf, SURFACE_PDB, SURFACE_TRR +) from MDAnalysis.lib import nsgrid +from MDAnalysis.transformations.translate import center_in_box @pytest.fixture @@ -46,24 +50,25 @@ def run_grid_search(u, ref_id, cutoff=3): return searcher.search(searchcoords) -def test_pbc_box(): +@pytest.mark.parametrize('box', [ + np.zeros(3), # Bad shape + np.zeros((3, 3)), # Collapsed box + np.array([[0, 0, 0], [0, 1, 0], [0, 0, 1]]), # 2D box + np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), # Box provided as array of integers + np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float64), # Box provided as array of double +]) +def test_pbc_box(box): """Check that PBC box accepts only well-formated boxes""" - pbc = True - with pytest.raises(TypeError): - nsgrid._PBCBox([]) + coords = np.array([[1.0, 1.0, 1.0]], dtype=np.float32) with pytest.raises(ValueError): - nsgrid._PBCBox(np.zeros((3)), pbc) # Bad shape - nsgrid._PBCBox(np.zeros((3, 3)), pbc) # Collapsed box - nsgrid._PBCBox(np.array([[0, 0, 0], [0, 1, 0], [0, 0, 1]]), pbc) # 2D box - nsgrid._PBCBox(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), pbc) # Box provided as array of integers - nsgrid._PBCBox(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=float), pbc) # Box provided as array of double + nsgrid.FastNS(4.0, coords, box=box) -def test_nsgrid_badcutoff(universe): +@pytest.mark.parametrize('cutoff', [-4, 100000]) +def test_nsgrid_badcutoff(universe, cutoff): with pytest.raises(ValueError): - run_grid_search(universe, 0, -4) - run_grid_search(universe, 0, 100000) + run_grid_search(universe, 0, cutoff) def test_ns_grid_noneighbor(universe): @@ -74,8 +79,6 @@ def test_ns_grid_noneighbor(universe): results_grid = run_grid_search(universe, ref_id, cutoff) # same indices will be selected as neighbour here - assert len(results_grid.get_distances()[0]) == 1 - assert len(results_grid.get_indices()[0]) == 1 assert len(results_grid.get_pairs()) == 1 assert len(results_grid.get_pair_distances()) == 1 @@ -83,8 +86,9 @@ def test_ns_grid_noneighbor(universe): def test_nsgrid_PBC_rect(): """Check that nsgrid works with rect boxes and PBC""" ref_id = 191 + # Atomid are from gmx select so there start from 1 and not 0. hence -1! results = np.array([191, 192, 672, 682, 683, 684, 995, 996, 2060, 2808, 3300, 3791, - 3792]) - 1 # Atomid are from gmx select so there start from 1 and not 0. hence -1! + 3792]) - 1 universe = mda.Universe(Martini_membrane_gro) cutoff = 7 @@ -92,26 +96,26 @@ def test_nsgrid_PBC_rect(): # FastNS is called differently to max coverage searcher = nsgrid.FastNS(cutoff, universe.atoms.positions, box=universe.dimensions) - results_grid = searcher.search(universe.atoms.positions[ref_id][None, :]).get_indices()[0] + results_grid = searcher.search(universe.atoms.positions[ref_id][None, :]).get_pairs() + other_ix = sorted(i for (_, i) in results_grid) - results_grid2 = searcher.search(universe.atoms.positions).get_indices() # call without specifying any ids, should do NS for all beads - - assert_equal(np.sort(results), np.sort(results_grid)) - assert_equal(len(universe.atoms), len(results_grid2)) - assert searcher.cutoff == 7 - assert_equal(np.sort(results_grid), np.sort(results_grid2[ref_id])) + assert len(results) == len(results_grid) + assert other_ix == sorted(results) def test_nsgrid_PBC(universe): """Check that grid search works when PBC is needed""" - + # Atomid are from gmx select so there start from 1 and not 0. hence -1! ref_id = 13937 results = np.array([4398, 4401, 13938, 13939, 13940, 13941, 17987, 23518, 23519, 23521, 23734, - 47451]) - 1 # Atomid are from gmx select so there start from 1 and not 0. hence -1! + 47451]) - 1 - results_grid = run_grid_search(universe, ref_id).get_indices()[0] + results_grid = run_grid_search(universe, ref_id).get_pairs() - assert_equal(np.sort(results), np.sort(results_grid)) + other_ix = sorted(i for (_, i) in results_grid) + + assert len(results) == len(other_ix) + assert other_ix == sorted(results) def test_nsgrid_pairs(universe): @@ -143,12 +147,12 @@ def test_nsgrid_pair_distances(universe): def test_nsgrid_distances(universe): """Check that grid search returns the proper distances""" - + # These distances where obtained by gmx distance so they are in nm ref_id = 13937 results = np.array([0.0, 0.270, 0.285, 0.096, 0.096, 0.015, 0.278, 0.268, 0.179, 0.259, 0.290, - 0.270]) * 10 # These distances where obtained by gmx distance so they are in nm + 0.270]) * 10 - results_grid = run_grid_search(universe, ref_id).get_distances()[0] + results_grid = run_grid_search(universe, ref_id).get_pair_distances() assert_allclose(np.sort(results), np.sort(results_grid), atol=1e-2) @@ -179,7 +183,7 @@ def test_nsgrid_search(box, results): else: searcher = nsgrid.FastNS(cutoff, points, box) searchresults = searcher.search(query) - indices = searchresults.get_indices()[0] + indices = searchresults.get_pairs()[:, 1] assert_equal(np.sort(indices), results) @@ -211,7 +215,7 @@ def test_nsgrid_selfsearch(box, result): searcher = nsgrid.FastNS(cutoff, points, box=box) searchresults = searcher.self_search() pairs = searchresults.get_pairs() - assert_equal(len(pairs)//2, result) + assert_equal(len(pairs), result) def test_nsgrid_probe_close_to_box_boundary(): # FastNS.search used to segfault with this box, cutoff and reference @@ -272,3 +276,98 @@ def test_around_overlapping(): # box=u.dimensions) # assert np.count_nonzero(np.any(dist <= 0.0, axis=0)) == 48 assert u.select_atoms('around 0.0 index 0:11').n_atoms == 48 + + +def test_issue_2229_part1(): + # reproducing first case in GH issue 2229 + u = mda.Universe.empty(2, trajectory=True) + + u.dimensions = [57.45585, 50.0000, 50.0000, 90, 90, 90] + + u.atoms[0].position = [0, 0, 0] + u.atoms[1].position = [55.00, 0, 0] + + g = mda.lib.nsgrid.FastNS(3.0, u.atoms[[0]].positions, box=u.dimensions) + assert len(g.search(u.atoms[[1]].positions).get_pairs()) == 1 + + g = mda.lib.nsgrid.FastNS(3.0, u.atoms[[1]].positions, box=u.dimensions) + assert len(g.search(u.atoms[[0]].positions).get_pairs()) == 1 + + +def test_issue_2229_part2(): + u = mda.Universe.empty(2, trajectory=True) + + u.dimensions = [45.0000, 55.0000, 109.8375, 90, 90, 90] + + u.atoms[0].position = [0, 0, 29.29] + u.atoms[1].position = [0, 0, 28.23] + + g = mda.lib.nsgrid.FastNS(3.0, u.atoms[[0]].positions, box=u.dimensions, pbc=False) + assert len(g.search(u.atoms[[1]].positions).get_pairs()) == 1 + + g = mda.lib.nsgrid.FastNS(3.0, u.atoms[[1]].positions, box=u.dimensions) + assert len(g.search(u.atoms[[0]].positions).get_pairs()) == 1 + + +def test_issue_2919(): + # regression test reported in issue 2919 + # other methods will also give 1115 or 2479 results + u = mda.Universe(PDB_xvf) + ag = u.select_atoms('index 0') + u.trajectory.ts = center_in_box(ag)(u.trajectory.ts) + + box = u.dimensions + reference = u.select_atoms('protein') + configuration = u.select_atoms('not protein') + + for cutoff, expected in [(2.8, 1115), (3.2, 2497)]: + pairs, distances = mda.lib.distances.capped_distance( + reference.positions, + configuration.positions, + max_cutoff=cutoff, + box=box, + method='nsgrid', + return_distances=True, + ) + assert len(pairs) == expected + + +def test_issue_2345(): + # another example of NSGrid being wrong + # this is a 111 FCC slab + # coordination numbers for atoms should be either 9 or 12, 50 of each + u = mda.Universe(SURFACE_PDB, SURFACE_TRR) + + g = mda.lib.nsgrid.FastNS(2.9, u.atoms.positions, box=u.dimensions) + + cn = defaultdict(list) + + idx = g.self_search().get_pairs() + # count number of contacts for each atom + for (i, j) in idx: + cn[i].append(j) + cn[j].append(i) + c = Counter(len(v) for v in cn.values()) + + assert c == {9: 50, 12: 50} + + +def test_issue_2670(): + # Tests that NSGrid no longer crashes when using small box sizes + u = mda.Universe(PDB) + u.dimensions = [1e-3, 1e-3, 1e-3, 90, 90, 90] + + # PDB files only have a coordinate precision of 1.0e-3, so we need to scale + # the coordinates for this test to make any sense: + u.atoms.positions = u.atoms.positions * 1.0e-3 + + ag1 = u.select_atoms('resid 2 3') + # should return nothing as nothing except resid 3 is within 0.0 or resid 3 + assert len(ag1.select_atoms('around 0.0 resid 3')) == 0 + + # force atom 0 of resid 1 to overlap with atom 0 of resid 3 + u.residues[0].atoms[0].position = u.residues[2].atoms[0].position + ag2 = u.select_atoms('resid 1 3') + + # should return the one atom overlap + assert len(ag2.select_atoms('around 0.0 resid 3')) == 1 diff --git a/testsuite/MDAnalysisTests/topology/test_guessers.py b/testsuite/MDAnalysisTests/topology/test_guessers.py index 8135fe8a276..be6310bef77 100644 --- a/testsuite/MDAnalysisTests/topology/test_guessers.py +++ b/testsuite/MDAnalysisTests/topology/test_guessers.py @@ -131,9 +131,19 @@ def test_guess_impropers(): assert_equal(len(vals), 12) +def bond_sort(arr): + # sort from low to high, also within a tuple + # e.g. ([5, 4], [0, 1], [0, 3]) -> ([0, 1], [0, 3], [4, 5]) + out = [] + for (i, j) in arr: + if i > j: + i, j = j, i + out.append((i, j)) + return sorted(out) + def test_guess_bonds_water(): u = mda.Universe(datafiles.two_water_gro) - bonds = guessers.guess_bonds(u.atoms, u.atoms.positions, u.dimensions) + bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions, u.dimensions)) assert_equal(bonds, ((0, 1), (0, 2), (3, 4), @@ -142,14 +152,14 @@ def test_guess_bonds_water(): def test_guess_bonds_adk(): u = mda.Universe(datafiles.PSF, datafiles.DCD) u.atoms.types = guessers.guess_types(u.atoms.names) - bonds = guessers.guess_bonds(u.atoms, u.atoms.positions) + bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions)) assert_equal(np.sort(u.bonds.indices, axis=0), np.sort(bonds, axis=0)) def test_guess_bonds_peptide(): u = mda.Universe(datafiles.PSF_NAMD, datafiles.PDB_NAMD) u.atoms.types = guessers.guess_types(u.atoms.names) - bonds = guessers.guess_bonds(u.atoms, u.atoms.positions) + bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions)) assert_equal(np.sort(u.bonds.indices, axis=0), np.sort(bonds, axis=0))