Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Enhancements
default True. (Issue #1938)
* ChainReader can correctly handle continuous trajectories split into multiple files,
generated with gromacs -noappend (PR #1728)
* MDAnalysis.lib.mdamath now supports triclinic boxes and rewrote in Cython (PR #1965)

Fixes
* rewind in the SingleFrameReader now reads the frame from the file (Issue #1929)
Expand Down
210 changes: 202 additions & 8 deletions package/MDAnalysis/lib/_cutil.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,33 @@ import cython
import numpy as np
cimport numpy as np

__all__ = ['unique_int_1d', ]
from MDAnalysis import NoDataError

from libcpp.set cimport set as cset
from libcpp.map cimport map as cmap

__all__ = ['unique_int_1d', 'make_whole']

cdef extern from "calc_distances.h":
ctypedef float coordinate[3]
void minimum_image(double *x, float *box, float *inverse_box)
void minimum_image_triclinic(double *dx, coordinate *box)

ctypedef cset[int] intset
ctypedef cmap[int, intset] intmap


@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False) # turn off negative index wrapping for entire function
def unique_int_1d(np.ndarray[np.int64_t, ndim=1] values):
def unique_int_1d(np.int64_t[:] values):
"""
Find the unique elements of a 1D array of integers.

This function is optimal on sorted arrays.

Parameters
----------
values: np.ndarray
values: np.ndarray of type int64
1D array of int in which to find the unique values.

Returns
Expand All @@ -51,11 +64,11 @@ def unique_int_1d(np.ndarray[np.int64_t, ndim=1] values):
cdef int i = 0
cdef int j = 0
cdef int n_values = values.shape[0]
cdef np.ndarray[np.int64_t, ndim=1] result = np.empty(n_values, dtype=np.int64)
cdef np.int64_t[:] result = np.empty(n_values, dtype=np.int64)

if n_values == 0:
return result

result[0] = values[0]
for i in range(1, n_values):
if values[i] != result[j]:
Expand All @@ -65,6 +78,187 @@ def unique_int_1d(np.ndarray[np.int64_t, ndim=1] values):
is_monotonic = False
result = result[:j + 1]
if not is_monotonic:
result.sort()
result = unique_int_1d(result)
return result
result = unique_int_1d(np.sort(result))

return np.array(result)


cdef intset difference(intset a, intset b):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does not look the most efficient way to do, see https://en.cppreference.com/w/cpp/algorithm/set_difference. But it should do for now.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think @kain88-de tried to use this but it's not available on osx or something

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can rewrite the algorithm ourselves. They assume that the containers are sorted (which they are for c++11 and later). Using the std algorithm is difficult on linux. I did not get the compiler to resolve the template. This could get better when we update to use the new conda compilers.

"""a.difference(b)

Returns set of values in a which are not in b
"""
cdef intset output
for val in a:
if b.count(val) != 1:
output.insert(val)
return output


@cython.boundscheck(False)
@cython.wraparound(False)
def make_whole(atomgroup, reference_atom=None):
"""Move all atoms in a single molecule so that bonds don't split over images

Atom positions are modified in place.

This function is most useful when atoms have been packed into the primary
unit cell, causing breaks mid molecule, with the molecule then appearing
on either side of the unit cell. This is problematic for operations
such as calculating the center of mass of the molecule. ::

+-----------+ +-----------+
| | | |
| 6 3 | | 3 | 6
| ! ! | | ! | !
|-5-8 1-2-| -> | 1-2-|-5-8
| ! ! | | ! | !
| 7 4 | | 4 | 7
| | | |
+-----------+ +-----------+


Parameters
----------
atomgroup : AtomGroup
The :class:`MDAnalysis.core.groups.AtomGroup` to work with.
The positions of this are modified in place. All these atoms
must belong in the same molecule or fragment.
reference_atom : :class:`~MDAnalysis.core.groups.Atom`
The atom around which all other atoms will be moved.
Defaults to atom 0 in the atomgroup.

Raises
------
NoDataError
There are no bonds present.
(See :func:`~MDAnalysis.topology.core.guess_bonds`)

ValueError
The algorithm fails to work. This is usually
caused by the atomgroup not being a single fragment.
(ie the molecule can't be traversed by following bonds)


Example
-------
Make fragments whole::

from MDAnalysis.lib.mdamath import make_whole

# This algorithm requires bonds, these can be guessed!
u = mda.Universe(......, guess_bonds=True)

# MDAnalysis can split molecules into their fragments
# based on bonding information.
# Note that this function will only handle a single fragment
# at a time, necessitating a loop.
for frag in u.fragments:
make_whole(frag)

Alternatively, to keep a single atom in place as the anchor::

# This will mean that atomgroup[10] will NOT get moved,
# and all other atoms will move (if necessary).
make_whole(atomgroup, reference_atom=atomgroup[10])


.. versionadded:: 0.11.0
"""
cdef intset agset, refpoints, todo, done
cdef int i, nloops, ref, atom, other
cdef intmap bonding
cdef int[:, :] bonds
cdef float[:, :] oldpos, newpos
cdef bint ortho
cdef float[:] box
cdef float tri_box[3][3]
cdef float inverse_box[3]
cdef double vec[3]

if reference_atom is None:
ref = atomgroup[0].index
else:
# Sanity check
if not reference_atom in atomgroup:
raise ValueError("Reference atom not in atomgroup")
ref = reference_atom.index

box = atomgroup.dimensions

for i in range(3):
if box[i] == 0.0:
raise ValueError("One or more dimensions was zero. "
"You can set dimensions using 'atomgroup.dimensions='")

ortho = True
for i in range(3, 6):
if box[i] != 90.0:
ortho = False

if ortho:
for i in range(3):
inverse_box[i] = 1.0 / box[i]
else:
from .mdamath import triclinic_vectors
tri_box = triclinic_vectors(box)

# set of indices in AtomGroup
agset = intset()
for i in atomgroup.indices.astype(np.int32):
agset.insert(i)
# C++ dict of bonds
try:
bonds = atomgroup.bonds.to_indices()
except (AttributeError, NoDataError):
raise NoDataError("The atomgroup is required to have bonds")
for i in range(bonds.shape[0]):
atom = bonds[i, 0]
other = bonds[i, 1]
# only add bonds if both atoms are in atoms set
if agset.count(atom) and agset.count(other):
bonding[atom].insert(other)
bonding[other].insert(atom)

oldpos = atomgroup.positions
newpos = np.zeros((oldpos.shape[0], 3), dtype=np.float32)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I checked the C code made for this, between here and the return at the bottom is pure C :)

done = intset() # Who have I already done?
refpoints = intset() # Who is safe to use as reference point?
# initially we have one starting atom whose position we trust
refpoints.insert(ref)
for i in range(3):
newpos[ref, i] = oldpos[ref, i]

nloops = 0
while refpoints.size() < agset.size() and nloops < agset.size():
nloops += 1

# We want to iterate over atoms that are good to use as reference
# points, but haven't been done yet.
todo = difference(refpoints, done)
for atom in todo:
for other in bonding[atom]:
# If other is already a refpoint, leave alone
if refpoints.count(other):
continue
# Draw vector from atom to other
for i in range(3):
vec[i] = oldpos[other, i] - oldpos[atom, i]
# Apply periodic boundary conditions to this vector
if ortho:
minimum_image(&vec[0], &box[0], &inverse_box[0])
else:
minimum_image_triclinic(&vec[0], <coordinate*>&tri_box[0])
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't tri_box be casted only once before the loops?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does casting take any time? I thought it was more an instruction to the compiler rather than a copy of data?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not expect any cost for the cast; though, I do not see why tri_box could not be directly the right type. Nevertheless, it is really not a big deal.

# Then define position of other based on this vector
for i in range(3):
newpos[other, i] = newpos[atom, i] + vec[i]

# This other atom can now be used as a reference point
refpoints.insert(other)
done.insert(atom)

if refpoints.size() < agset.size():
raise ValueError("AtomGroup was not contiguous from bonds, process failed")
else:
atomgroup.positions = newpos
18 changes: 9 additions & 9 deletions package/MDAnalysis/lib/include/calc_distances.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,25 +310,25 @@ static void _calc_self_distance_array_triclinic(coordinate* ref, int numref,
void _coord_transform(float coords[][3], int numCoords, float box[][3])
{
int i, j, k;
float new[3];
float newpos[3];
// Matrix multiplication inCoords * box = outCoords
// Multiplication done in place using temp array 'new'
// Used to transform coordinates to/from S/R space in trilinic boxes
#ifdef PARALLEL
#pragma omp parallel for private(i, j, k, new) shared(coords)
#pragma omp parallel for private(i, j, k, newpos) shared(coords)
#endif
for (i=0; i < numCoords; i++){
new[0] = 0.0;
new[1] = 0.0;
new[2] = 0.0;
newpos[0] = 0.0;
newpos[1] = 0.0;
newpos[2] = 0.0;
for (j=0; j<3; j++){
for (k=0; k<3; k++){
new[j] += coords[i][k] * box[k][j];
newpos[j] += coords[i][k] * box[k][j];
}
}
coords[i][0] = new[0];
coords[i][1] = new[1];
coords[i][2] = new[2];
coords[i][0] = newpos[0];
coords[i][1] = newpos[1];
coords[i][2] = newpos[2];
}
}

Expand Down
Loading