diff --git a/package/CHANGELOG b/package/CHANGELOG index 9aae6e3db92..a789f5c07ae 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -203,6 +203,7 @@ Enhancements checking if it can be used in parallel analysis. (Issue #2996, PR #2950) * Improved performance of H5MDReader by using h5py.Dataset.read_direct() to transfer data from HDF5 file into timestep. (PR #3293) + * Added `H5MDWriter` to coordinate writers (Issue #2866, PR #3189) Changes * NCDFWriter now allows for the writing of `scale_factor` attributes for @@ -314,10 +315,10 @@ Deprecations * Deprecated the `bfactors` topology attribute in favour of `tempfactors`. In 2.0 `bfactors` is simply an alias of `tempfactors`; in 3.0 `bfactors` will be removed. (Issue #1901, PR #3345) - * The attributes `p_components`, `variance`, `cumulated_variance` and - `mean_atoms` in `analysis.pca.PCA` are now deprecated in favour of - `results.p_components`, `results.variance`, `results.cumulated_variance` - and `results.mean_atoms`. They will be removed in 3.0.0 + * The attributes `p_components`, `variance`, `cumulated_variance` and + `mean_atoms` in `analysis.pca.PCA` are now deprecated in favour of + `results.p_components`, `results.variance`, `results.cumulated_variance` + and `results.mean_atoms`. They will be removed in 3.0.0 (Issues #3275 #3285) * The `bins`, `edges`, `count`, `rdf` attributes for `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF_s`, and `cdf` attributes for diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index e08ff032b57..3b8baf1c101 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -49,8 +49,14 @@ provided, a :exc:`RuntimeError` is raised. If no units are provided, MDAnalysis stores a value of ``None`` for each unit. If the H5MD file does not contain units and ``convert_units=True``, MDAnalysis will raise a -:exc`ValueError`. To load a universe from an H5MD file with no units, set -``convert_units=False``. +:exc:`ValueError`. To load a universe from an H5MD file with no units defined, +set ``convert_units=False``. + +:class:`H5MDWriter` detects the native units of the parent trajectory and +writes the trajectory with those units, unless one of `timeunit`, +`lengthunit`, `velocityunit`, `forceunit` arugments are supplied. In +this case, the writer will write the corresponding dataset with the selected +unit only if it is recognized by `MDAnalysis units`_. Example: Loading an H5MD simulation @@ -70,9 +76,40 @@ with h5py.File("trajectory.h5md", 'r') as f: u = mda.Universe("topology.tpr", f) + .. Note:: Directly using a `h5py.File` does not work yet. See issue `#2884 `_. + +Example: Writing an H5MD file +----------------------------- + +To write to an H5MD file from a trajectory loaded with MDAnalysis, do: + +.. code-block:: python + + import MDAnalysis as mda + u = mda.Universe("topology.tpr", "trajectory.h5md") + with mda.Writer("output.h5md", n_atoms=u.trajectory.n_atoms) as W: + for ts in u.trajectory: + W.write(u) + +To write an H5MD file with contiguous datasets, you must specify the +number of frames to be written and set ``chunks=False``: + +.. code-block:: python + + with mda.Writer("output_contigous.h5md", n_atoms=u.trajectory.n_atoms, + n_frames=3, chunks=False) as W: + for ts in u.trajectory[:3]: + W.write(u) + +The writer also supports writing directly from an :class:`~MDAnalysis.core.groups.AtomGroup`:: + + ag = u.select_atoms("protein and name CA") + ag.write("alpha_carbons.h5md", frames='all') + + Example: Opening an H5MD file in parallel ----------------------------------------- @@ -87,6 +124,7 @@ u = mda.Universe("topology.tpr", "trajectory.h5md", driver="mpio", comm=MPI.COMM_WORLD) + .. Note:: :mod:`h5py` must be built with parallel features enabled on top of a parallel HDF5 build, and HDF5 and :mod:`mpi4py` must be built with a working MPI @@ -148,38 +186,23 @@ .. _`Build h5py from sources`: https://docs.h5py.org/en/stable/mpi.html#building-against-parallel-hdf5 .. _`H5MD notation`: https://nongnu.org/h5md/modules/units.html .. _`MDAnalysis notation`: https://userguide.mdanalysis.org/units.html +.. _`MDAnalysis units`: https://userguide.mdanalysis.org/units.html .. _`MDAnalysis forums`: https://www.mdanalysis.org/#participating Classes ------- -.. autoclass:: Timestep - :members: - - .. attribute:: positions - - coordinates of the atoms as a :class:`numpy.ndarray` of shape - `(n_atoms, 3)` - - .. attribute:: velocities - - velocities of the atoms as a :class:`numpy.ndarray` of shape - `(n_atoms, 3)`; only available if the trajectory contains velocities - or if the `velocities` = ``True`` keyword has been supplied. - - .. attribute:: forces - - forces of the atoms as a :class:`numpy.ndarray` of shape - `(n_atoms, 3)`; only available if the trajectory contains forces - or if the `forces` = ``True`` keyword has been supplied. - - .. autoclass:: H5MDReader :members: + :inherited-members: .. automethod:: H5MDReader._reopen +.. autoclass:: H5MDWriter + :members: + :inherited-members: + .. autoclass:: H5PYPicklable :members: @@ -188,8 +211,8 @@ import numpy as np import MDAnalysis as mda from . import base, core -from .base import Timestep from ..exceptions import NoDataError +from ..due import due, Doi try: import h5py except ImportError: @@ -218,10 +241,12 @@ class H5MDReader(base.ReaderBase): `convert_units` is set to ``True``. Additional data in the *observables* group of the H5MD file are - loaded into the :attr:`Timestep.data` dictionary. + loaded into the :attr:`Timestep.data + ` dictionary. Only 3D-periodic boxes or no periodicity are supported; for no - periodicity, :attr:`Timestep.dimensions` will return ``None``. + periodicity, :attr:`Timestep.dimensions + ` will return ``None``. Although H5MD can store varying numbers of particles per time step as produced by, e.g., GCMC simulations, MDAnalysis can currently @@ -352,6 +377,12 @@ class H5MDReader(base.ReaderBase): } } + @due.dcite(Doi("10.25080/majora-1b6fd038-005"), + description="MDAnalysis trajectory reader/writer of the H5MD" + "format", path=__name__) + @due.dcite(Doi("10.1016/j.cpc.2014.01.018"), + description="Specifications of the H5MD standard", + path=__name__, version='1.1') def __init__(self, filename, convert_units=True, driver=None, @@ -416,10 +447,14 @@ def __init__(self, filename, self._has = {name: name in self._particle_group for name in ('position', 'velocity', 'force')} - # Gets n_atoms from first available group + # Gets some info about what settings the datasets were created with + # from first available group for name, value in self._has.items(): if value: - self.n_atoms = self._particle_group[name]['value'].shape[1] + dset = self._particle_group[f'{name}/value'] + self.n_atoms = dset.shape[1] + self.compression = dset.compression + self.compression_opts = dset.compression_opts break else: raise NoDataError("Provide at least a position, velocity" @@ -544,11 +579,11 @@ def open_trajectory(self): mode='r', driver=self._driver, comm=self._comm) - elif self._driver is not None: - self._file = H5PYPicklable(name=self.filename, mode='r', - driver=self._driver) else: - self._file = H5PYPicklable(name=self.filename, mode='r') + self._file = H5PYPicklable(name=self.filename, + mode='r', + driver=self._driver) + # pulls first key out of 'particles' # allows for arbitrary name of group1 in 'particles' self._particle_group = self._file['particles'][ @@ -613,13 +648,19 @@ def _copy_to_data(self): self.ts.data[key] = self._file['observables'][key][ 'value'][self._frame] - # pulls 'time' out of first available parent group + # pulls 'time' and 'step' out of first available parent group for name, value in self._has.items(): if value: if 'time' in self._particle_group[name]: - self.ts.data['time'] = self._particle_group[name][ + self.ts.time = self._particle_group[name][ 'time'][self._frame] break + for name, value in self._has.items(): + if value: + if 'step' in self._particle_group[name]: + self.ts.data['step'] = self._particle_group[name][ + 'step'][self._frame] + break def _read_dataset_into_ts(self, dataset, attribute): """reads position, velocity, or force dataset array at current frame @@ -687,6 +728,40 @@ def _reopen(self): self.close() self.open_trajectory() + def Writer(self, filename, n_atoms=None, **kwargs): + """Return writer for trajectory format + + Note + ---- + The chunk shape of the input file will not be copied to the output + file, as :class:`H5MDWriter` uses a chunk shape of ``(1, n_atoms, 3)`` + by default. To use a custom chunk shape, you must specify the + `chunks` argument. If you would like to copy an existing chunk + format from a dataset (positions, velocities, or forces), do + the following:: + + chunks = u.trajectory._particle_group['position/value'].chunks + + Note that the writer will set the same layout for all particle groups. + + See Also + -------- + :class:`H5MDWriter` Output class for the H5MD format + + + .. versionadded:: 2.0.0 + + """ + if n_atoms is None: + n_atoms = self.n_atoms + kwargs.setdefault('driver', self._driver) + kwargs.setdefault('compression', self.compression) + kwargs.setdefault('compression_opts', self.compression_opts) + kwargs.setdefault('positions', self.has_positions) + kwargs.setdefault('velocities', self.has_velocities) + kwargs.setdefault('forces', self.has_forces) + return H5MDWriter(filename, n_atoms, **kwargs) + @property def has_positions(self): """``True`` if 'position' group is in trajectory.""" @@ -726,6 +801,642 @@ def __setstate__(self, state): self[self.ts.frame] +class H5MDWriter(base.WriterBase): + """Writer for `H5MD`_ format (version 1.1). + + H5MD trajectories are automatically recognised by the + file extension ".h5md". + + All data from the input :class:`~MDAnalysis.coordinates.base.Timestep` is + written by default. For detailed information on how :class:`H5MDWriter` + handles units, compression, and chunking, see the Notes section below. + + Note + ---- + Parellel writing with the use of a MPI communicator and the ``'mpio'`` + HDF5 driver is currently not supported. + + Note + ---- + :exc:`NoDataError` is raised if no positions, velocities, or forces are + found in the input trajectory. While the H5MD standard allows for this + case, :class:`H5MDReader` cannot currently read files without at least + one of these three groups. + + Note + ---- + Writing H5MD files with fancy trajectory slicing where the Timestep + does not increase monotonically such as ``u.trajectory[[2,1,0]]`` + or ``u.trajectory[[0,1,2,0,1,2]]`` raises a :exc:`ValueError` as this + violates the rules of the step dataset in the H5MD standard. + + Parameters + ---------- + filename : str or :class:`h5py.File` + trajectory filename or open h5py file + n_atoms : int + number of atoms in trajectory + n_frames : int (optional) + number of frames to be written in trajectory + driver : str (optional) + H5PY file driver used to open H5MD file. See `H5PY drivers`_ for + list of available drivers. + convert_units : bool (optional) + Convert units from MDAnalysis to desired units + chunks : tuple (optional) + Custom chunk layout to be applied to the position, + velocity, and force datasets. By default, these datasets + are chunked in ``(1, n_atoms, 3)`` blocks + compression : str or int (optional) + HDF5 dataset compression setting to be applied + to position, velocity, and force datasets. Allowed + settings are 'gzip', 'szip', 'lzf'. If an integer + in range(10), this indicates gzip compression level. + Otherwise, an integer indicates the number of a + dynamically loaded compression filter. + compression_opts : int or tup (optional) + Compression settings. This is an integer for gzip, 2-tuple for + szip, etc. If specifying a dynamically loaded compression filter + number, this must be a tuple of values. For gzip, 1 indicates + the lowest level of compression and 9 indicates maximum compression. + positions : bool (optional) + Write positions into the trajectory [``True``] + velocities : bool (optional) + Write velocities into the trajectory [``True``] + forces : bool (optional) + Write forces into the trajectory [``True``] + timeunit : str (optional) + Option to convert values in the 'time' dataset to a custom unit, + must be recognizable by MDAnalysis + lengthunit : str (optional) + Option to convert values in the 'position/value' dataset to a + custom unit, must be recognizable by MDAnalysis + velocityunit : str (optional) + Option to convert values in the 'velocity/value' dataset to a + custom unit, must be recognizable by MDAnalysis + forceunit : str (optional) + Option to convert values in the 'force/value' dataset to a + custom unit, must be recognizable by MDAnalysis + author : str (optional) + Name of the author of the file + author_email : str (optional) + Email of the author of the file + creator : str (optional) + Software that wrote the file [``MDAnalysis``] + creator_version : str (optional) + Version of software that wrote the file + [:attr:`MDAnalysis.__version__`] + + Raises + ------ + RuntimeError + when `H5PY`_ is not installed + ValueError + when `n_atoms` is 0 + ValueError + when ``chunks=False`` but the user did not specify `n_frames` + ValueError + when `positions`, `velocities`, and `forces` are all + set to ``False`` + TypeError + when the input object is not a :class:`Universe` or + :class:`AtomGroup` + IOError + when `n_atoms` of the :class:`Universe` or :class:`AtomGroup` + being written does not match `n_atoms` passed as an argument + to the writer + ValueError + when any of the optional `timeunit`, `lengthunit`, + `velocityunit`, or `forceunit` keyword arguments are + not recognized by MDAnalysis + + Notes + ----- + + By default, the writer will write all available data (positions, + velocities, and forces) if detected in the input + :class:`~MDAnalysis.coordinates.base.Timestep`. In addition, the settings + for `compression` and `compression_opts` will be read from + the first available group of positions, velocities, or forces and used as + the default value. To write a file without any one of these datsets, + set `positions`, `velocities`, or `forces` to ``False``. + + .. rubric:: Units + + The H5MD format is very flexible with regards to units, as there is no + standard defined unit for the format. For this reason, :class:`H5MDWriter` + does not enforce any units. The units of the written trajectory can be set + explicitly with the keyword arguments `lengthunit`, `velocityunit`, + and `forceunit`. If units are not explicitly specified, they are set to + the native units of the trajectory that is the source of the coordinates. + For example, if one converts a DCD trajectory, then positions are written + in ångstrom and time in AKMA units. A GROMACS XTC will be written in nm and + ps. The units are stored in the metadata of the H5MD file so when + MDAnalysis loads the H5MD trajectory, the units will be automatically + set correctly. + + .. rubric:: Compression + + HDF5 natively supports various compression modes. To write the trajectory + with compressed datasets, set ``compression='gzip'``, ``compression='lzf'`` + , etc. See `H5PY compression options`_ for all supported modes of + compression. An additional argument, `compression_opts`, can be used to + fine tune the level of compression. For example, for GZIP compression, + `compression_opts` can be set to 1 for minimum compression and 9 for + maximum compression. + + .. rubric:: HDF5 Chunking + + HDF5 datasets can be *chunked*, meaning the dataset can be split into equal + sized pieces and stored in different, noncontiguous places on disk. + If HDF5 tries to read an element from a chunked dataset, the *entire* + dataset must be read, therefore an ill-thought-out chunking scheme can + drastically effect file I/O performance. In the case of all MDAnalysis + writers, in general, the number of frames being written is not known + apriori by the writer, therefore the HDF5 must be extendable. However, the + allocation of diskspace is defined when the dataset is created, therefore + extendable HDF5 datasets *must* be chunked so as to allow dynamic storage + on disk of any incoming data to the writer. In such cases where chunking + isn't explicity defined by the user, H5PY automatically selects a chunk + shape via an algorithm that attempts to make mostly square chunks between + 1 KiB - 1 MiB, however this can lead to suboptimal I/O performance. + :class:`H5MDWriter` uses a default chunk shape of ``(1, n_atoms, 3)``so + as to mimic the typical access pattern of a trajectory by MDAnalysis. In + our tests ([Jakupovic2021]_), this chunk shape led to a speedup on the + order of 10x versus H5PY's auto-chunked shape. Users can set a custom + chunk shape with the `chunks` argument. Additionaly, the datasets in a + file can be written with a contiguous layout by setting ``chunks=False``, + however this must be accompanied by setting `n_frames` equal to the + number of frames being written, as HDF5 must know how much space to + allocate on disk when creating the dataset. + + .. _`H5PY compression options`: https://docs.h5py.org/en/stable/high/dataset.html#filter-pipeline + .. _`H5PY drivers`: https://docs.h5py.org/en/stable/high/file.html#file-drivers + + + .. versionadded:: 2.0.0 + + """ + + format = 'H5MD' + multiframe = True + #: These variables are not written from :attr:`Timestep.data` + #: dictionary to the observables group in the H5MD file + data_blacklist = ['step', 'time', 'dt'] + + #: currently written version of the file format + H5MD_VERSION = (1, 1) + + # This dictionary is used to translate MDAnalysis units to H5MD units. + # (https://nongnu.org/h5md/modules/units.html) + _unit_translation_dict = { + 'time': { + 'ps': 'ps', + 'fs': 'fs', + 'ns': 'ns', + 'second': 's', + 'sec': 's', + 's': 's', + 'AKMA': 'AKMA'}, + 'length': { + 'Angstrom': 'Angstrom', + 'angstrom': 'Angstrom', + 'A': 'Angstrom', + 'nm': 'nm', + 'pm': 'pm', + 'fm': 'fm'}, + 'velocity': { + 'Angstrom/ps': 'Angstrom ps-1', + 'A/ps': 'Angstrom ps-1', + 'Angstrom/fs': 'Angstrom fs-1', + 'A/fs': 'Angstrom fs-1', + 'Angstrom/AKMA': 'Angstrom AKMA-1', + 'A/AKMA': 'Angstrom AKMA-1', + 'nm/ps': 'nm ps-1', + 'nm/ns': 'nm ns-1', + 'pm/ps': 'pm ps-1', + 'm/s': 'm s-1'}, + 'force': { + 'kJ/(mol*Angstrom)': 'kJ mol-1 Angstrom-1', + 'kJ/(mol*nm)': 'kJ mol-1 nm-1', + 'Newton': 'Newton', + 'N': 'N', + 'J/m': 'J m-1', + 'kcal/(mol*Angstrom)': 'kcal mol-1 Angstrom-1', + 'kcal/(mol*A)': 'kcal mol-1 Angstrom-1'}} + + @due.dcite(Doi("10.25080/majora-1b6fd038-005"), + description="MDAnalysis trajectory reader/writer of the H5MD" + "format", path=__name__) + @due.dcite(Doi("10.1016/j.cpc.2014.01.018"), + description="Specifications of the H5MD standard", + path=__name__, version='1.1') + def __init__(self, filename, n_atoms, n_frames=None, driver=None, + convert_units=True, chunks=None, compression=None, + compression_opts=None, positions=True, velocities=True, + forces=True, timeunit=None, lengthunit=None, + velocityunit=None, forceunit=None, author='N/A', + author_email=None, creator='MDAnalysis', + creator_version=mda.__version__, **kwargs): + + if not HAS_H5PY: + raise RuntimeError("H5MDWriter: Please install h5py") + self.filename = filename + if n_atoms == 0: + raise ValueError("H5MDWriter: no atoms in output trajectory") + self._driver = driver + if self._driver == 'mpio': + raise ValueError("H5MDWriter: parallel writing with MPI I/O " + "is not currently supported.") + self.n_atoms = n_atoms + self.n_frames = n_frames + self.chunks = (1, n_atoms, 3) if chunks is None else chunks + if self.chunks is False and self.n_frames is None: + raise ValueError("H5MDWriter must know how many frames will be " + "written if ``chunks=False``.") + self.contiguous = self.chunks is False and self.n_frames is not None + self.compression = compression + self.compression_opts = compression_opts + self.convert_units = convert_units + self.h5md_file = None + + # The writer defaults to writing all data from the parent Timestep if + # it exists. If these are True, the writer will check each + # Timestep.has_* value and fill the self._has dictionary accordingly + # in _initialize_hdf5_datasets() + self._write_positions = positions + self._write_velocities = velocities + self._write_forces = forces + if not any([self._write_positions, + self._write_velocities, + self._write_velocities]): + raise ValueError("At least one of positions, velocities, or " + "forces must be set to ``True``.") + + self._new_units = {'time': timeunit, + 'length': lengthunit, + 'velocity': velocityunit, + 'force': forceunit} + + # Pull out various keywords to store metadata in 'h5md' group + self.author = author + self.author_email = author_email + self.creator = creator + self.creator_version = creator_version + + def _write_next_frame(self, ag): + """Write information associated with ``ag`` at current frame + into trajectory + + Parameters + ---------- + ag : AtomGroup or Universe + + """ + try: + # Atomgroup? + ts = ag.ts + except AttributeError: + try: + # Universe? + ts = ag.trajectory.ts + except AttributeError: + errmsg = "Input obj is neither an AtomGroup or Universe" + raise TypeError(errmsg) from None + + if ts.n_atoms != self.n_atoms: + raise IOError("H5MDWriter: Timestep does not have" + " the correct number of atoms") + + # This should only be called once when first timestep is read. + if self.h5md_file is None: + self._determine_units(ag) + self._open_file() + self._initialize_hdf5_datasets(ts) + + return self._write_next_timestep(ts) + + def _determine_units(self, ag): + """determine which units the file will be written with + + By default, it fills the :attr:`self.units` dictionary by copying the + units dictionary of the parent file. Because H5MD files have no + standard unit restrictions, users may pass a kwarg in ``(timeunit, + lengthunit, velocityunit, forceunit)`` to the writer so long as + MDAnalysis has a conversion factor for it (:exc:`ValueError` raised if + it does not). These custom unit arguments must be in + `MDAnalysis notation`_. If custom units are supplied from the user, + :attr`self.units[unit]` is replaced with the corresponding + `unit` argument. + + """ + + self.units = ag.universe.trajectory.units.copy() + + # set user input units + for key, value in self._new_units.items(): + if value is not None: + if value not in self._unit_translation_dict[key]: + raise ValueError(f"{value} is not a unit recognized by" + " MDAnalysis. Allowed units are:" + f" {self._unit_translation_dict.keys()}" + " For more information on units, see" + " `MDAnalysis units`_.") + else: + self.units[key] = self._new_units[key] + + if self.convert_units: + # check if all units are None + if not any(self.units.values()): + raise ValueError("The trajectory has no units, but " + "`convert_units` is set to ``True`` by " + "default in MDAnalysis. To write the file " + "with no units, set ``convert_units=False``.") + + def _open_file(self): + """Opens file with `H5PY`_ library and fills in metadata from kwargs. + + :attr:`self.h5md_file` becomes file handle that links to root level. + + """ + + self.h5md_file = h5py.File(name=self.filename, + mode='w', + driver=self._driver) + + # fill in H5MD metadata from kwargs + self.h5md_file.require_group('h5md') + self.h5md_file['h5md'].attrs['version'] = np.array(self.H5MD_VERSION) + self.h5md_file['h5md'].require_group('author') + self.h5md_file['h5md/author'].attrs['name'] = self.author + if self.author_email is not None: + self.h5md_file['h5md/author'].attrs['email'] = self.author_email + self.h5md_file['h5md'].require_group('creator') + self.h5md_file['h5md/creator'].attrs['name'] = self.creator + self.h5md_file['h5md/creator'].attrs['version'] = self.creator_version + + def _initialize_hdf5_datasets(self, ts): + """initializes all datasets that will be written to by + :meth:`_write_next_timestep` + + Note + ---- + :exc:`NoDataError` is raised if no positions, velocities, or forces are + found in the input trajectory. While the H5MD standard allows for this + case, :class:`H5MDReader` cannot currently read files without at least + one of these three groups. A future change to both the reader and + writer will allow this case. + + + """ + + # for keeping track of where to write in the dataset + self._counter = 0 + + # ask the parent file if it has positions, velocities, and forces + # if prompted by the writer with the self._write_* attributes + self._has = {group: getattr(ts, f'has_{attr}') + if getattr(self, f'_write_{attr}') + else False for group, attr in zip( + ('position', 'velocity', 'force'), + ('positions', 'velocities', 'forces'))} + + # initialize trajectory group + self.h5md_file.require_group('particles').require_group('trajectory') + self._traj = self.h5md_file['particles/trajectory'] + self.data_keys = [ + key for key in ts.data.keys() if key not in self.data_blacklist] + if self.data_keys: + self._obsv = self.h5md_file.require_group('observables') + + # box group is required for every group in 'particles' + self._traj.require_group('box') + self._traj['box'].attrs['dimension'] = 3 + if ts.dimensions is not None and np.all(ts.dimensions > 0): + self._traj['box'].attrs['boundary'] = 3*['periodic'] + self._traj['box'].require_group('edges') + self._edges = self._traj.require_dataset('box/edges/value', + shape=(0, 3, 3), + maxshape=(None, 3, 3), + dtype=np.float32) + self._step = self._traj.require_dataset('box/edges/step', + shape=(0,), + maxshape=(None,), + dtype=np.int32) + self._time = self._traj.require_dataset('box/edges/time', + shape=(0,), + maxshape=(None,), + dtype=np.float32) + self._set_attr_unit(self._edges, 'length') + self._set_attr_unit(self._time, 'time') + else: + # if no box, boundary attr must be "none" according to H5MD + self._traj['box'].attrs['boundary'] = 3*['none'] + self._create_step_and_time_datasets() + + if self.has_positions: + self._create_trajectory_dataset('position') + self._pos = self._traj['position/value'] + self._set_attr_unit(self._pos, 'length') + if self.has_velocities: + self._create_trajectory_dataset('velocity') + self._vel = self._traj['velocity/value'] + self._set_attr_unit(self._vel, 'velocity') + if self.has_forces: + self._create_trajectory_dataset('force') + self._force = self._traj['force/value'] + self._set_attr_unit(self._force, 'force') + + # intialize observable datasets from ts.data dictionary that + # are NOT in self.data_blacklist + if self.data_keys: + for key in self.data_keys: + self._create_observables_dataset(key, ts.data[key]) + + def _create_step_and_time_datasets(self): + """helper function to initialize a dataset for step and time + + Hunts down first available location to create the step and time + datasets. This should only be called if the trajectory has no + dimension, otherwise the 'box/edges' group creates step and time + datasets since 'box' is the only required group in 'particles'. + + :attr:`self._step` and :attr`self._time` serve as links to the created + datasets that other datasets can also point to for their step and time. + This serves two purposes: + 1. Avoid redundant writing of multiple datasets that share the + same step and time data. + 2. In HDF5, each chunked dataset has a cache (default 1 MiB), + so only 1 read is required to access step and time data + for all datasets that share the same step and time. + + """ + + for group, value in self._has.items(): + if value: + self._step = self._traj.require_dataset(f'{group}/step', + shape=(0,), + maxshape=(None,), + dtype=np.int32) + self._time = self._traj.require_dataset(f'{group}/time', + shape=(0,), + maxshape=(None,), + dtype=np.float32) + self._set_attr_unit(self._time, 'time') + break + + def _create_trajectory_dataset(self, group): + """helper function to initialize a dataset for + position, velocity, and force""" + + if self.n_frames is None: + shape = (0, self.n_atoms, 3) + maxshape = (None, self.n_atoms, 3) + else: + shape = (self.n_frames, self.n_atoms, 3) + maxshape = None + + chunks = None if self.contiguous else self.chunks + + self._traj.require_group(group) + self._traj.require_dataset(f'{group}/value', + shape=shape, + maxshape=maxshape, + dtype=np.float32, + chunks=chunks, + compression=self.compression, + compression_opts=self.compression_opts) + if 'step' not in self._traj[group]: + self._traj[f'{group}/step'] = self._step + if 'time' not in self._traj[group]: + self._traj[f'{group}/time'] = self._time + + def _create_observables_dataset(self, group, data): + """helper function to initialize a dataset for each observable""" + + self._obsv.require_group(group) + # guarantee ints and floats have a shape () + data = np.asarray(data) + self._obsv.require_dataset(f'{group}/value', + shape=(0,) + data.shape, + maxshape=(None,) + data.shape, + dtype=data.dtype) + if 'step' not in self._obsv[group]: + self._obsv[f'{group}/step'] = self._step + if 'time' not in self._obsv[group]: + self._obsv[f'{group}/time'] = self._time + + def _set_attr_unit(self, dset, unit): + """helper function to set a 'unit' attribute for an HDF5 dataset""" + + if self.units[unit] is None: + return + + dset.attrs['unit'] = self._unit_translation_dict[unit][self.units[unit]] + + def _write_next_timestep(self, ts): + """Write coordinates and unitcell information to H5MD file. + + Do not call this method directly; instead use + :meth:`write` because some essential setup is done + there before writing the first frame. + + The first dimension of each dataset is extended by +1 and + then the data is written to the new slot. + + Note + ---- + Writing H5MD files with fancy trajectory slicing where the Timestep + does not increase monotonically such as ``u.trajectory[[2,1,0]]`` + or ``u.trajectory[[0,1,2,0,1,2]]`` raises a :exc:`ValueError` as this + violates the rules of the step dataset in the H5MD standard. + + """ + + i = self._counter + + # H5MD step refers to the integration step at which the data were + # sampled, therefore ts.data['step'] is the most appropriate value + # to use. However, step is also necessary in H5MD to allow + # temporal matching of the data, so ts.frame is used as an alternative + self._step.resize(self._step.shape[0]+1, axis=0) + try: + self._step[i] = ts.data['step'] + except(KeyError): + self._step[i] = ts.frame + if len(self._step) > 1 and self._step[i] < self._step[i-1]: + raise ValueError("The H5MD standard dictates that the step " + "dataset must increase monotonically in value.") + + # the dataset.resize() method should work with any chunk shape + self._time.resize(self._time.shape[0]+1, axis=0) + self._time[i] = ts.time + + if 'edges' in self._traj['box']: + self._edges.resize(self._edges.shape[0]+1, axis=0) + self._edges.write_direct(ts.triclinic_dimensions, + dest_sel=np.s_[i, :]) + # These datasets are not resized if n_frames was provided as an + # argument, as they were initialized with their full size. + if self.has_positions: + if self.n_frames is None: + self._pos.resize(self._pos.shape[0]+1, axis=0) + self._pos.write_direct(ts.positions, dest_sel=np.s_[i, :]) + if self.has_velocities: + if self.n_frames is None: + self._vel.resize(self._vel.shape[0]+1, axis=0) + self._vel.write_direct(ts.velocities, dest_sel=np.s_[i, :]) + if self.has_forces: + if self.n_frames is None: + self._force.resize(self._force.shape[0]+1, axis=0) + self._force.write_direct(ts.forces, dest_sel=np.s_[i, :]) + + if self.data_keys: + for key in self.data_keys: + obs = self._obsv[f'{key}/value'] + obs.resize(obs.shape[0]+1, axis=0) + obs[i] = ts.data[key] + + if self.convert_units: + self._convert_dataset_with_units(i) + + self._counter += 1 + + def _convert_dataset_with_units(self, i): + """convert values in the dataset arrays with self.units dictionary""" + + # Note: simply doing convert_pos_to_native(self._pos[-1]) does not + # actually change the values in the dataset, so assignment required + if self.units['time'] is not None: + self._time[i] = self.convert_time_to_native(self._time[i]) + if self.units['length'] is not None: + if self._has['position']: + self._pos[i] = self.convert_pos_to_native(self._pos[i]) + if 'edges' in self._traj['box']: + self._edges[i] = self.convert_pos_to_native(self._edges[i]) + if self._has['velocity']: + if self.units['velocity'] is not None: + self._vel[i] = self.convert_velocities_to_native(self._vel[i]) + if self._has['force']: + if self.units['force'] is not None: + self._force[i] = self.convert_forces_to_native(self._force[i]) + + @property + def has_positions(self): + """``True`` if writer is writing positions from Timestep.""" + return self._has['position'] + + @property + def has_velocities(self): + """``True`` if writer is writing velocities from Timestep.""" + return self._has['velocity'] + + @property + def has_forces(self): + """``True`` if writer is writing forces from Timestep.""" + return self._has['force'] + + class H5PYPicklable(h5py.File): """H5PY file object (read-only) that can be pickled. diff --git a/package/MDAnalysis/coordinates/base.py b/package/MDAnalysis/coordinates/base.py index 2a2207a474a..f0f6adf4fc1 100644 --- a/package/MDAnalysis/coordinates/base.py +++ b/package/MDAnalysis/coordinates/base.py @@ -107,6 +107,12 @@ .. autoattribute:: dimensions .. autoattribute:: triclinic_dimensions .. autoattribute:: volume + .. attribute:: data + + :class:`dict` that holds arbitrary per Timestep data + + .. versionadded:: 0.11.0 + .. automethod:: __getitem__ .. automethod:: __eq__ .. automethod:: __iter__ diff --git a/package/doc/sphinx/source/documentation_pages/references.rst b/package/doc/sphinx/source/documentation_pages/references.rst index 8536445294d..1aab5479dd2 100644 --- a/package/doc/sphinx/source/documentation_pages/references.rst +++ b/package/doc/sphinx/source/documentation_pages/references.rst @@ -56,7 +56,7 @@ supersede these two citations.) .. _`MDAnalysis: A Python package for the rapid analysis of molecular dynamics simulations`: http://conference.scipy.org/proceedings/scipy2016/oliver_beckstein.html - + .. _references-components: @@ -140,28 +140,28 @@ If you use the hydrogen bond analysis code in .. _`10.1039/C9CP01532A`: http://dx.doi.org/10.1039/C9CP01532A -If you use :meth:`~MDAnalysis.analysis.pca.PCA.rmsip` or -:func:`~MDAnalysis.analysis.pca.rmsip` please cite [Amadei1999]_ and +If you use :meth:`~MDAnalysis.analysis.pca.PCA.rmsip` or +:func:`~MDAnalysis.analysis.pca.rmsip` please cite [Amadei1999]_ and [Leo-Macias2004]_ . -.. [Amadei1999] Amadei, A., Ceruso, M. A. & Nola, A. D. - On the convergence of the conformational coordinates basis set obtained by the essential dynamics analysis of proteins’ molecular dynamics simulations. +.. [Amadei1999] Amadei, A., Ceruso, M. A. & Nola, A. D. + On the convergence of the conformational coordinates basis set obtained by the essential dynamics analysis of proteins’ molecular dynamics simulations. *Proteins: Structure, Function, and Bioinformatics* **36**, 419–424 (1999). doi: `10.1002/(SICI)1097-0134(19990901)36:4<419::AID-PROT5>3.0.CO;2-U`_ .. _`10.1002/(SICI)1097-0134(19990901)36:4<419::AID-PROT5>3.0.CO;2-U`: https://doi.org/10.1002/(SICI)1097-0134(19990901)36:4%3C419::AID-PROT5%3E3.0.CO;2-U -.. [Leo-Macias2004] Leo-Macias, A., Lopez-Romero, P., Lupyan, D., Zerbino, D. & Ortiz, A. R. - An Analysis of Core Deformations in Protein Superfamilies. +.. [Leo-Macias2004] Leo-Macias, A., Lopez-Romero, P., Lupyan, D., Zerbino, D. & Ortiz, A. R. + An Analysis of Core Deformations in Protein Superfamilies. *Biophys J* **88**, 1291–1299 (2005). doi: `10.1529/biophysj.104.052449`_ .. _`10.1529/biophysj.104.052449`: https://dx.doi.org/10.1529%2Fbiophysj.104.052449 -If you use :meth:`~MDAnalysis.analysis.pca.PCA.cumulative_overlap` or +If you use :meth:`~MDAnalysis.analysis.pca.PCA.cumulative_overlap` or :func:`~MDAnalysis.analysis.pca.cumulative_overlap` please cite [Yang2008]_ . -.. [Yang2008] Yang, L., Song, G., Carriquiry, A. & Jernigan, R. L. - Close Correspondence between the Motions from Principal Component Analysis of Multiple HIV-1 Protease Structures and Elastic Network Modes. +.. [Yang2008] Yang, L., Song, G., Carriquiry, A. & Jernigan, R. L. + Close Correspondence between the Motions from Principal Component Analysis of Multiple HIV-1 Protease Structures and Elastic Network Modes. *Structure* **16**, 321–330 (2008). doi: `10.1016/j.str.2007.12.011`_ .. _`10.1016/j.str.2007.12.011`: https://dx.doi.org/10.1016/j.str.2007.12.011 @@ -169,10 +169,10 @@ If you use :meth:`~MDAnalysis.analysis.pca.PCA.cumulative_overlap` or If you use the Mean Squared Displacement analysis code in :mod:`MDAnalysis.analysis.msd` please cite [Calandri2011]_ and [Buyl2018]_. -.. [Calandri2011] Calandrini, V., Pellegrini, E., Calligari, P., Hinsen, K., Kneller, G. R. - NMoldyn-Interfacing Spectroscopic Experiments, Molecular Dynamics Simulations and Models for Time Correlation Functions. +.. [Calandri2011] Calandrini, V., Pellegrini, E., Calligari, P., Hinsen, K., Kneller, G. R. + NMoldyn-Interfacing Spectroscopic Experiments, Molecular Dynamics Simulations and Models for Time Correlation Functions. *Collect. SFN*, **12**, 201–232 (2011). doi: `10.1051/sfn/201112010`_ - + .. _`10.1051/sfn/201112010`: https://doi.org/10.1051/sfn/201112010 .. [Buyl2018] Buyl, P. tidynamics: A tiny package to compute the dynamics of stochastic and molecular simulations. Journal of Open Source Software, @@ -204,6 +204,22 @@ please cite [Dima2004b]_. 6564-6570. doi:`10.1021/jp037128y `_ +If you use H5MD files using +:mod:`MDAnalysis.coordinates.H5MD.py`, please cite [Buyl2013]_ and +[Jakupovic2021]_. + +.. [Buyl2013] Buyl P., Colberg P., and Höfling F.(2013). + H5MD: A structured, efficient, and portable file format for molecular data. + *Computer Physics Communications*, 185. doi:`10.1016/j.cpc.2014.01.018. + `_ + +.. [Jakupovic2021] Jakupovic E. and Beckstein O., MPI-parallel Molecular + Dynamics Trajectory Analysis with the H5MD Format in the MDAnalysis + Python Package, in *Proceedings of the 20th Python in Science Conference*, + (Meghann Agarwal, Chris Calloway, Dillon Niederhut, and David Shupe, eds.), + pp. 18 – 26, 2021. doi:`10.25080/majora-1b6fd038-005. + `_ + .. _citations-using-duecredit: @@ -241,8 +257,8 @@ export them to different formats. For example, one can display them in BibTeX format, using: .. code-block:: bash - - duecredit summary --format=bibtex + + duecredit summary --format=bibtex **Please cite your use of MDAnalysis and the packages and algorithms @@ -250,4 +266,3 @@ that it uses. Thanks!** .. _duecredit: https://github.com/duecredit/duecredit - diff --git a/testsuite/MDAnalysisTests/coordinates/test_h5md.py b/testsuite/MDAnalysisTests/coordinates/test_h5md.py index bbd660aa979..5614433c83d 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_h5md.py +++ b/testsuite/MDAnalysisTests/coordinates/test_h5md.py @@ -1,12 +1,14 @@ import pytest -from numpy.testing import assert_almost_equal, assert_array_equal +from numpy.testing import assert_almost_equal, assert_equal import numpy as np +import sys import MDAnalysis as mda from MDAnalysis.coordinates.H5MD import HAS_H5PY if HAS_H5PY: import h5py from MDAnalysis.exceptions import NoDataError -from MDAnalysisTests.datafiles import (H5MD_xvf, TPR_xvf, +from MDAnalysisTests import make_Universe +from MDAnalysisTests.datafiles import (H5MD_xvf, TPR_xvf, TRR_xvf, COORDINATES_TOPOLOGY, COORDINATES_H5MD) from MDAnalysisTests.coordinates.base import (MultiframeReaderTest, @@ -24,6 +26,7 @@ def __init__(self): self.trajectory = COORDINATES_H5MD self.topology = COORDINATES_TOPOLOGY self.reader = mda.coordinates.H5MD.H5MDReader + self.writer = mda.coordinates.H5MD.H5MDWriter self.ext = 'h5md' self.prec = 3 self.changing_dimensions = True @@ -51,16 +54,13 @@ def iter_ts(self, i): @pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -class TestH5MDReader(MultiframeReaderTest): - """Tests H5MDReader with MultiframeReaderTest. - get_writer tests are marked with expected to fail since - H5MDWriter is not implemented yet.""" +class TestH5MDReaderBaseAPI(MultiframeReaderTest): + """Tests H5MDReader with with synthetic trajectory.""" @staticmethod @pytest.fixture() def ref(): return H5MDReference() - @pytest.mark.xfail(reason='H5MD writer not implemented yet') def test_get_writer_1(self, ref, reader, tmpdir): with tmpdir.as_cwd(): outfile = 'test-writer.' + ref.ext @@ -68,7 +68,6 @@ def test_get_writer_1(self, ref, reader, tmpdir): assert_equal(isinstance(W, ref.writer), True) assert_equal(W.n_atoms, reader.n_atoms) - @pytest.mark.xfail(reason='H5MD writer not implemented yet') def test_get_writer_2(self, ref, reader, tmpdir): with tmpdir.as_cwd(): outfile = 'test-writer.' + ref.ext @@ -77,336 +76,229 @@ def test_get_writer_2(self, ref, reader, tmpdir): assert_equal(W.n_atoms, 100) -# The tests below test an example trajectory H5MD_xvf -@pytest.fixture @pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def h5md_universe(): - return mda.Universe(TPR_xvf, H5MD_xvf) - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_h5md_n_frames(h5md_universe): - assert len(h5md_universe.trajectory) == 3 - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_h5md_positions(h5md_universe): - # first timestep tests - h5md_universe.trajectory[0] - assert_almost_equal(h5md_universe.atoms.positions[0], - [32.309906, 13.77798, 14.372463], - decimal=6) - assert_almost_equal(h5md_universe.atoms.positions[42], - [28.116928, 19.405945, 19.647358], - decimal=6) - assert_almost_equal(h5md_universe.atoms.positions[10000], - [44.117805, 50.442093, 23.299038], - decimal=6) - # second timestep tests - h5md_universe.trajectory[1] - assert_almost_equal(h5md_universe.atoms.positions[0], - [30.891968, 13.678971, 13.6000595], - decimal=6) - assert_almost_equal(h5md_universe.atoms.positions[42], - [27.163246, 19.846561, 19.3582], - decimal=6) - assert_almost_equal(h5md_universe.atoms.positions[10000], - [45.869278, 5.0342298, 25.460655], - decimal=6) - # third timestep tests - h5md_universe.trajectory[2] - assert_almost_equal(h5md_universe.atoms.positions[0], - [31.276512, 13.89617, 15.015897], - decimal=6) - assert_almost_equal(h5md_universe.atoms.positions[42], - [28.567991, 20.56532, 19.40814], - decimal=6) - assert_almost_equal(h5md_universe.atoms.positions[10000], - [39.713223, 6.127234, 18.284992], - decimal=6) - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_h5md_velocities(h5md_universe): - h5md_universe.trajectory[0] - assert_almost_equal(h5md_universe.atoms.velocities[0], - [-2.697732, 0.613568, 0.14334752], - decimal=6) - h5md_universe.trajectory[1] - assert_almost_equal(h5md_universe.atoms.velocities[42], - [-6.8698354, 7.834235, -8.114698], - decimal=6) - h5md_universe.trajectory[2] - assert_almost_equal(h5md_universe.atoms.velocities[10000], - [9.799492, 5.631466, 6.852126], - decimal=6) - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_h5md_forces(h5md_universe): - h5md_universe.trajectory[0] - assert_almost_equal(h5md_universe.atoms.forces[0], - [20.071287, -155.2285, -96.72112], - decimal=5) - h5md_universe.trajectory[1] - assert_almost_equal(h5md_universe.atoms.forces[42], - [-4.1959066, -31.31548, 22.663044], - decimal=6) - h5md_universe.trajectory[2] - assert_almost_equal(h5md_universe.atoms.forces[10000], - [-41.43743, 83.35207, 62.94751], - decimal=5) - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_h5md_dimensions(h5md_universe): - # first timestep - h5md_universe.trajectory[0] - assert_almost_equal(h5md_universe.trajectory.ts.dimensions, - [52.763, 52.763, 52.763, 90., 90., 90.], - decimal=6) - # second timestep - h5md_universe.trajectory[1] - assert_almost_equal(h5md_universe.trajectory.ts.dimensions, - [52.807877, 52.807877, 52.807877, 90., 90., 90.], - decimal=6) - # third timestep - h5md_universe.trajectory[2] - assert_almost_equal(h5md_universe.trajectory.ts.dimensions, - [52.839806, 52.839806, 52.839806, 90., 90., 90.], - decimal=6) - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_h5md_data_step(h5md_universe): - h5md_universe.trajectory[0] - assert h5md_universe.trajectory.ts.data['step'] == 0 - h5md_universe.trajectory[1] - assert h5md_universe.trajectory.ts.data['step'] == 25000 - h5md_universe.trajectory[2] - assert h5md_universe.trajectory.ts.data['step'] == 50000 - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_rewind(h5md_universe): - h5md_universe.trajectory.rewind() - assert h5md_universe.trajectory.ts.frame == 0 - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_next(h5md_universe): - h5md_universe.trajectory.rewind() - h5md_universe.trajectory.next() - assert h5md_universe.trajectory.ts.frame == 1 - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_jump_last_frame(h5md_universe): - h5md_universe.trajectory[-1] - assert h5md_universe.trajectory.ts.frame == 2 - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -@pytest.mark.parametrize("start, stop, step", ((0, 2, 1), - (1, 2, 1))) -def test_slice(h5md_universe, start, stop, step): - frames = [h5md_universe.trajectory.ts.frame - for ts in h5md_universe.trajectory[start:stop:step]] - assert_array_equal(frames, np.arange(start, stop, step)) - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -@pytest.mark.parametrize("array_like", [list, np.array]) -def test_array_like(h5md_universe, array_like): - array = array_like([0, 2]) - frames = [h5md_universe.trajectory.ts.frame - for ts in h5md_universe.trajectory[array]] - assert_array_equal(frames, array) - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -@pytest.mark.parametrize("indices", ([0, 1, 2, 1, 2, 2, 0])) -def test_list_indices(h5md_universe, indices): - frames = [h5md_universe.trajectory.ts.frame - for ts in h5md_universe.trajectory[indices]] - assert_array_equal(frames, indices) - +class TestH5MDWriterBaseAPI(BaseWriterTest): + """Tests H5MDWriter base API with synthetic trajectory""" + @staticmethod + @pytest.fixture() + def ref(): + return H5MDReference() -@pytest.fixture -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def ref(): - return H5MDReference() + def test_write_trajectory_atomgroup(self, ref, reader, universe, tmpdir): + outfile = 'write-atoms-test.' + ref.ext + with tmpdir.as_cwd(): + with ref.writer(outfile, universe.atoms.n_atoms, + velocities=True, forces=True) as w: + for ts in universe.trajectory: + w.write(universe.atoms) + self._check_copy(outfile, ref, reader) + + def test_write_trajectory_universe(self, ref, reader, universe, tmpdir): + outfile = 'write-uni-test.' + ref.ext + with tmpdir.as_cwd(): + with ref.writer(outfile, universe.atoms.n_atoms, + velocities=True, forces=True) as w: + for ts in universe.trajectory: + w.write(universe) + self._check_copy(outfile, ref, reader) -@pytest.fixture @pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def h5md_file(): - return h5py.File(H5MD_xvf, 'r') +class TestH5MDReaderWithRealTrajectory(object): + prec = 3 + ext = 'h5md' -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_no_positions(h5md_file, ref, tmpdir): - outfile = 'test_no_positions' + ref.ext - with tmpdir.as_cwd(): - with h5md_file as f: - with h5py.File(outfile, 'w') as g: - f.copy(source='particles', dest=g) - f.copy(source='h5md', dest=g) - del g['particles/trajectory/position'] - - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - with pytest.raises(NoDataError): - u.trajectory.ts.positions - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_no_velocities(h5md_file, ref, tmpdir): - outfile = 'test_no_velocities' + ref.ext - with tmpdir.as_cwd(): - with h5md_file as f: - with h5py.File(outfile, 'w') as g: - f.copy(source='particles', dest=g) - f.copy(source='h5md', dest=g) - del g['particles/trajectory/velocity'] - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - with pytest.raises(NoDataError): - u.trajectory.ts.velocities + @pytest.fixture(scope='class') + def universe(self): + return mda.Universe(TPR_xvf, H5MD_xvf) + @pytest.fixture() + def h5md_file(self): + return h5py.File(H5MD_xvf, 'r') -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_no_forces(h5md_file, ref, tmpdir): - outfile = 'test_no_forces' + ref.ext - with tmpdir.as_cwd(): + @pytest.fixture() + def outfile(self, tmpdir): + return str(tmpdir.join('h5md-reader-test.' + self.ext)) + + def test_n_frames(self, universe): + assert len(universe.trajectory) == 3 + + def test_positions(self, universe): + universe.trajectory[0] + assert_almost_equal(universe.atoms.positions[0], + [32.309906, 13.77798, 14.372463], + decimal=self.prec) + assert_almost_equal(universe.atoms.positions[42], + [28.116928, 19.405945, 19.647358], + decimal=self.prec) + assert_almost_equal(universe.atoms.positions[10000], + [44.117805, 50.442093, 23.299038], + decimal=self.prec) + + universe.trajectory[1] + assert_almost_equal(universe.atoms.positions[0], + [30.891968, 13.678971, 13.6000595], + decimal=self.prec) + assert_almost_equal(universe.atoms.positions[42], + [27.163246, 19.846561, 19.3582], + decimal=self.prec) + assert_almost_equal(universe.atoms.positions[10000], + [45.869278, 5.0342298, 25.460655], + decimal=self.prec) + universe.trajectory[2] + assert_almost_equal(universe.atoms.positions[0], + [31.276512, 13.89617, 15.015897], + decimal=self.prec) + assert_almost_equal(universe.atoms.positions[42], + [28.567991, 20.56532, 19.40814], + decimal=self.prec) + assert_almost_equal(universe.atoms.positions[10000], + [39.713223, 6.127234, 18.284992], + decimal=self.prec) + + def test_h5md_velocities(self, universe): + universe.trajectory[0] + assert_almost_equal(universe.atoms.velocities[0], + [-2.697732, 0.613568, 0.14334752], + decimal=self.prec) + universe.trajectory[1] + assert_almost_equal(universe.atoms.velocities[42], + [-6.8698354, 7.834235, -8.114698], + decimal=self.prec) + universe.trajectory[2] + assert_almost_equal(universe.atoms.velocities[10000], + [9.799492, 5.631466, 6.852126], + decimal=self.prec) + + def test_h5md_forces(self, universe): + universe.trajectory[0] + assert_almost_equal(universe.atoms.forces[0], + [20.071287, -155.2285, -96.72112], + decimal=self.prec) + universe.trajectory[1] + assert_almost_equal(universe.atoms.forces[42], + [-4.1959066, -31.31548, 22.663044], + decimal=self.prec) + universe.trajectory[2] + assert_almost_equal(universe.atoms.forces[10000], + [-41.43743, 83.35207, 62.94751], + decimal=self.prec) + + def test_h5md_dimensions(self, universe): + universe.trajectory[0] + assert_almost_equal(universe.trajectory.ts.dimensions, + [52.763, 52.763, 52.763, 90., 90., 90.], + decimal=self.prec) + universe.trajectory[1] + assert_almost_equal(universe.trajectory.ts.dimensions, + [52.807877, 52.807877, 52.807877, 90., 90., 90.], + decimal=self.prec) + universe.trajectory[2] + assert_almost_equal(universe.trajectory.ts.dimensions, + [52.839806, 52.839806, 52.839806, 90., 90., 90.], + decimal=self.prec) + + def test_h5md_data_step(self, universe): + for ts, step in zip(universe.trajectory, (0, 25000, 50000)): + assert_equal(ts.data['step'], step) + + def test_rewind(self, universe): + universe.trajectory[1] + universe.trajectory.rewind() + assert universe.trajectory.ts.frame == 0 + + def test_next(self, universe): + universe.trajectory.rewind() + universe.trajectory.next() + assert universe.trajectory.ts.frame == 1 + + def test_jump_last_frame(self, universe): + universe.trajectory[-1] + assert universe.trajectory.ts.frame == 2 + + @pytest.mark.parametrize("start, stop, step", ((0, 2, 1), + (1, 2, 1))) + def test_slice(universe, start, stop, step): + frames = [universe.trajectory.ts.frame + for ts in universe.trajectory[start:stop:step]] + assert_equal(frames, np.arange(start, stop, step)) + + @pytest.mark.parametrize("start, stop, step", ((0, 2, 1), + (1, 2, 1))) + def test_slice(self, universe, start, stop, step): + frames = [universe.trajectory.ts.frame + for ts in universe.trajectory[start:stop:step]] + assert_equal(frames, np.arange(start, stop, step)) + + @pytest.mark.parametrize("array_like", [list, np.array]) + def test_array_like(self, universe, array_like): + array = array_like([0, 2]) + frames = [universe.trajectory.ts.frame + for ts in universe.trajectory[array]] + assert_equal(frames, array) + + def test_list_indices(self, universe): + indices = [0, 1, 2, 1, 2, 2, 0] + frames = [universe.trajectory.ts.frame + for ts in universe.trajectory[indices]] + assert_equal(frames, indices) + + @pytest.mark.parametrize('group, attr', (('position', 'positions'), + ('velocity', 'velocities'), + ('force', 'forces'))) + def test_no_group(self, h5md_file, outfile, attr, group): with h5md_file as f: with h5py.File(outfile, 'w') as g: f.copy(source='particles', dest=g) f.copy(source='h5md', dest=g) - del g['particles/trajectory/force'] - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - with pytest.raises(NoDataError): - u.trajectory.ts.forces - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_unknown_position_unit(h5md_file, ref, tmpdir): - outfile = 'test_unknown_position_unit' + ref.ext - with tmpdir.as_cwd(): + del g[f'particles/trajectory/{group}'] + u = mda.Universe(TPR_xvf, outfile) + with pytest.raises(NoDataError, + match="This Timestep has no"): + getattr(u.trajectory.ts, attr) + + @pytest.mark.parametrize('dset', + ('position/value', 'position/time', 'velocity/value', 'force/value')) + def test_unknown_unit(self, h5md_file, outfile, dset): with h5md_file as f: with h5py.File(outfile, 'w') as g: f.copy(source='particles', dest=g) f.copy(source='h5md', dest=g) g['particles' '/trajectory' - '/position' - '/value'].attrs['unit'] = 'random string' - with pytest.raises(RuntimeError): - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - + f'/{dset}'].attrs['unit'] = 'random string' + with pytest.raises(RuntimeError, + match=" is not recognized by H5MDReader."): + u = mda.Universe(TPR_xvf, outfile) -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_length_unit_from_box(h5md_file, ref, tmpdir): - outfile = 'test_length_unit_from_box' + ref.ext - with tmpdir.as_cwd(): + def test_length_unit_from_box(self, h5md_file, universe, outfile): with h5md_file as f: with h5py.File(outfile, 'w') as g: f.copy(source='particles', dest=g) f.copy(source='h5md', dest=g) del g['particles/trajectory/position'] - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - assert u.trajectory.units['length'] == 'Angstrom' - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_unknown_time_unit(h5md_file, ref, tmpdir): - outfile = 'test_unknown_time_unit' + ref.ext - with tmpdir.as_cwd(): + ref_u = universe + uw = mda.Universe(TPR_xvf, outfile) + assert_equal(ref_u.trajectory.units['length'], + uw.trajectory.units['length']) + for ref_ts, new_ts in zip(ref_u.trajectory, uw.trajectory): + assert_equal(ref_ts.dimensions, new_ts.dimensions) + assert_equal(ref_ts.triclinic_dimensions, + new_ts.triclinic_dimensions) + + @pytest.mark.parametrize('group', ('position', 'velocity', 'force')) + def test_changing_n_atoms(self, h5md_file, outfile, group): with h5md_file as f: with h5py.File(outfile, 'w') as g: f.copy(source='particles', dest=g) f.copy(source='h5md', dest=g) - g['particles' - '/trajectory' - '/position' - '/time'].attrs['unit'] = 'random string' - with pytest.raises(RuntimeError): - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - + g[f'particles/trajectory/{group}/value'].resize((3, 10000, 3)) + with pytest.raises(ValueError, + match=" of either the postion, velocity, or force"): + u = mda.Universe(TPR_xvf, outfile) -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_unknown_velocity_unit(h5md_file, ref, tmpdir): - outfile = 'test_unknown_velocity_unit' + ref.ext - with tmpdir.as_cwd(): - with h5md_file as f: - with h5py.File(outfile, 'w') as g: - f.copy(source='particles', dest=g) - f.copy(source='h5md', dest=g) - g['particles' - '/trajectory' - '/velocity' - '/value'].attrs['unit'] = 'random string' - with pytest.raises(RuntimeError): - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_unknown_force_unit(h5md_file, ref, tmpdir): - outfile = 'test_unknown_force_unit' + ref.ext - with tmpdir.as_cwd(): - with h5md_file as f: - with h5py.File(outfile, 'w') as g: - f.copy(source='particles', dest=g) - f.copy(source='h5md', dest=g) - g['particles' - '/trajectory' - '/force' - '/value'].attrs['unit'] = 'random string' - with pytest.raises(RuntimeError): - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_changing_n_atoms1(h5md_file, ref, tmpdir): - outfile = 'test_changing_n_atoms1' + ref.ext - with tmpdir.as_cwd(): - with h5md_file as f: - with h5py.File(outfile, 'w') as g: - f.copy(source='particles', dest=g) - f.copy(source='h5md', dest=g) - g['particles/trajectory/position/value'].resize((3, 10000, 3)) - with pytest.raises(ValueError): - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_changing_n_atoms2(h5md_file, ref, tmpdir): - outfile = 'test_changing_n_atoms2' + ref.ext - with tmpdir.as_cwd(): - with h5md_file as f: - with h5py.File(outfile, 'w') as g: - f.copy(source='particles', dest=g) - f.copy(source='h5md', dest=g) - g['particles/trajectory/velocity/value'].resize((3, 10000, 3)) - with pytest.raises(ValueError): - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_changing_n_atoms3(h5md_file, ref, tmpdir): - outfile = 'test_changing_n_atoms3' + ref.ext - with tmpdir.as_cwd(): - with h5md_file as f: - with h5py.File(outfile, 'w') as g: - f.copy(source='particles', dest=g) - f.copy(source='h5md', dest=g) - g['particles/trajectory/force/value'].resize((3, 10000, 3)) - with pytest.raises(ValueError): - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_2D_box(h5md_file, ref, tmpdir): - outfile = 'test_2D_box' + ref.ext - with tmpdir.as_cwd(): + def test_2D_box(self, h5md_file, outfile): with h5md_file as f: with h5py.File(outfile, 'w') as g: f.copy(source='particles', dest=g) @@ -416,27 +308,20 @@ def test_2D_box(h5md_file, ref, tmpdir): del g['particles/trajectory/box/edges/value'] g['particles/trajectory' '/box/edges'].create_dataset('value', data=new_box) - with pytest.raises(ValueError): - u = mda.Universe(TPR_xvf, outfile, format='H5MD') + with pytest.raises(ValueError, + match="MDAnalysis only supports 3-dimensional"): + u = mda.Universe(TPR_xvf, outfile) - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_no_box(h5md_file, ref, tmpdir): - outfile = 'test_no_box' + ref.ext - with tmpdir.as_cwd(): + def test_no_box(self, h5md_file, outfile): with h5md_file as f: with h5py.File(outfile, 'w') as g: f.copy(source='particles', dest=g) f.copy(source='h5md', dest=g) del g['particles/trajectory/box/edges'] - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - assert u.trajectory.ts.dimensions is None + u = mda.Universe(TPR_xvf, outfile) + assert_equal(u.trajectory.ts.dimensions, None) - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_no_groups(h5md_file, ref, tmpdir): - outfile = 'test_no_groups' + ref.ext - with tmpdir.as_cwd(): + def test_no_groups(self, h5md_file, outfile): with h5md_file as f: with h5py.File(outfile, 'w') as g: f.copy(source='particles', dest=g) @@ -444,28 +329,11 @@ def test_no_groups(h5md_file, ref, tmpdir): del g['particles/trajectory/position'] del g['particles/trajectory/velocity'] del g['particles/trajectory/force'] - with pytest.raises(NoDataError): - u = mda.Universe(TPR_xvf, outfile, format='H5MD') - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -@pytest.mark.xfail(reason='Issue #2884') -def test_open_filestream(h5md_file): - with h5md_file as f: - u = mda.Universe(TPR_xvf, h5md_file) - - -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_wrong_driver(): - with pytest.raises(ValueError): - u = mda.Universe(TPR_xvf, H5MD_xvf, - driver='wrong_driver', comm="MPI.COMM_WORLD") - + with pytest.raises(NoDataError, + match="Provide at least a position, velocity"): + u = mda.Universe(TPR_xvf, outfile) -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_no_convert_units(h5md_file, ref, tmpdir): - outfile = 'test_no_convert_units' + ref.ext - with tmpdir.as_cwd(): + def test_no_convert_units(self, h5md_file, outfile): with h5md_file as f: with h5py.File(outfile, 'w') as g: f.copy(source='particles', dest=g) @@ -474,15 +342,11 @@ def test_no_convert_units(h5md_file, ref, tmpdir): for name in groups: del g['particles/trajectory'][name]['value'].attrs['unit'] del g['particles/trajectory/position/time'].attrs['unit'] - u = mda.Universe(TPR_xvf, outfile, convert_units=False, format="H5MD") + u = mda.Universe(TPR_xvf, outfile, convert_units=False) for unit in u.trajectory.units: - assert u.trajectory.units[unit] is None - + assert_equal(u.trajectory.units[unit], None) -@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_no_units(h5md_file, ref, tmpdir): - outfile = 'test_no_units' + ref.ext - with tmpdir.as_cwd(): + def test_no_units_but_convert_units_true_error(self, h5md_file, outfile): with h5md_file as f: with h5py.File(outfile, 'w') as g: f.copy(source='particles', dest=g) @@ -492,10 +356,462 @@ def test_no_units(h5md_file, ref, tmpdir): del g['particles/trajectory'][name]['value'].attrs['unit'] del g['particles/trajectory/position/time'].attrs['unit'] del g['particles/trajectory/box/edges/value'].attrs['unit'] - with pytest.raises(ValueError): - u = mda.Universe(TPR_xvf, outfile, format="H5MD") + with pytest.raises(ValueError, + match="H5MD file must have readable units if"): + u = mda.Universe(TPR_xvf, outfile, convert_units=True) + + @pytest.mark.xfail(reason='Issue #2884') + def test_open_filestream(self, h5md_file, universe): + with h5md_file as f: + u = mda.Universe(TPR_xvf, h5md_file) + for ts1, ts2 in zip(universe.trajectory, u.trajectory): + assert_equal(ts1.positions, ts2.positions) + assert_equal(ts1.velocities, ts2.velocities) + assert_equal(ts1.forces, ts2.forces) + + def test_wrong_driver(self): + with pytest.raises(ValueError, + match="If MPI communicator object is used to open"): + u = mda.Universe(TPR_xvf, H5MD_xvf, + driver='wrong_driver', + comm="mock MPI.COMM_WORLD") + + def test_open_with_driver(self): + u = mda.Universe(TPR_xvf, H5MD_xvf, driver="core") + assert_equal(u.trajectory._file.driver, "core") @pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") -def test_open_with_driver(): - u = mda.Universe(TPR_xvf, H5MD_xvf, driver="core") +class TestH5MDWriterWithRealTrajectory(object): + + prec = 3 + + @pytest.fixture() + def universe(self): + return mda.Universe(TPR_xvf, H5MD_xvf) + + @pytest.fixture() + def universe_no_units(self): + u = mda.Universe(TPR_xvf, H5MD_xvf, convert_units=False) + u.trajectory.units['time'] = None + u.trajectory.units['length'] = None + u.trajectory.units['velocity'] = None + u.trajectory.units['force'] = None + return u + + @pytest.fixture() + def Writer(self): + return mda.coordinates.H5MD.H5MDWriter + + @pytest.fixture() + def outfile(self, tmpdir): + return str(tmpdir) + 'h5md-writer-test.h5md' + + @pytest.fixture() + def outtop(self, tmpdir): + return str(tmpdir) + 'h5md-writer-top.pdb' + + @pytest.mark.parametrize('scalar, error, match', + ((0, ValueError, "H5MDWriter: no atoms in output trajectory"), + (0.5, IOError, "H5MDWriter: Timestep does not have"))) + def test_n_atoms_errors(self, universe, Writer, outfile, + scalar, error, match): + n_atoms = universe.atoms.n_atoms * scalar + with pytest.raises(error, match=match): + with Writer(outfile, n_atoms) as W: + W.write(universe) + + def test_chunk_error(self, universe, Writer, outfile): + n_atoms = universe.atoms.n_atoms + err = "H5MDWriter must know how many frames will be " + with pytest.raises(ValueError, match=err): + with Writer(outfile, n_atoms, chunks=False) as W: + for ts in universe.trajectory: + W.write(universe) + + @pytest.mark.parametrize('dimensions', (None, 0)) + def test_no_dimensions(self, universe, Writer, outfile, dimensions): + with Writer(outfile, universe.atoms.n_atoms) as W: + for ts in universe.trajectory: + ts.dimensions = dimensions + W.write(universe) + + uw = mda.Universe(TPR_xvf, outfile) + box = uw.trajectory._particle_group['box'] + assert 'edges' not in box + assert_equal(3*['none'], box.attrs['boundary']) + assert_equal(3, box.attrs['dimension']) + + def test_step_not_monotonic(self, universe, Writer, outfile): + with pytest.raises(ValueError, + match="The H5MD standard dictates that the step "): + with Writer(outfile, universe.atoms.n_atoms) as W: + for ts in universe.trajectory[[0, 1, 2, 1]]: + W.write(universe) + + with pytest.raises(ValueError, + match="The H5MD standard dictates that the step "): + with Writer(outfile, universe.atoms.n_atoms) as W: + for ts in universe.trajectory: + if ts.frame == 2: + ts.data['step'] = 0 + W.write(universe) + + def test_step_from_frame(self, universe, Writer, outfile): + with Writer(outfile, universe.atoms.n_atoms) as W: + for ts in universe.trajectory: + del ts.data['step'] + W.write(universe) + + uw = mda.Universe(TPR_xvf, outfile) + steps = [ts.data['step'] for ts in uw.trajectory] + frames = [ts.frame for ts in universe.trajectory] + for step, frame in zip(steps, frames): + assert_equal(step, frame) + + def test_has_property(self, universe, Writer, outfile): + with Writer(outfile, universe.atoms.n_atoms) as W: + W.write(universe) + # make sure property is pulled from _has dict + assert W.has_positions == W._has['position'] + assert W.has_velocities == W._has['velocity'] + assert W.has_forces == W._has['force'] + # make sure the values are correct + assert W.has_positions is True + assert W.has_velocities is True + assert W.has_forces is True + + @pytest.mark.parametrize('pos, vel, force', ( + (True, False, False), + (True, True, False), + (True, False, True), + (True, True, True), + (False, True, True), + (False, False, True), + (False, False, False))) + def test_write_trajectory(self, universe, Writer, outfile, + pos, vel, force): + try: + with Writer(outfile, + universe.atoms.n_atoms, + positions=pos, + velocities=vel, + forces=force, + author='My Name', + author_email='my_email@asu.edu') as W: + for ts in universe.trajectory: + W.write(universe) + + uw = mda.Universe(TPR_xvf, outfile) + + # check the trajectory contents match reference universes + for ts, ref_ts in zip(uw.trajectory, universe.trajectory): + assert_almost_equal(ts.dimensions, ref_ts.dimensions, self.prec) + if pos: + assert_almost_equal(ts._pos, ref_ts._pos, self.prec) + else: + with pytest.raises(NoDataError, + match="This Timestep has no"): + getattr(ts, 'positions') + if vel: + assert_almost_equal(ts._velocities, ref_ts._velocities, + self.prec) + else: + with pytest.raises(NoDataError, + match="This Timestep has no"): + getattr(ts, 'velocities') + if force: + assert_almost_equal(ts._forces, ref_ts._forces, self.prec) + else: + with pytest.raises(NoDataError, + match="This Timestep has no"): + getattr(ts, 'forces') + + # when (False, False, False) + except(ValueError): + with pytest.raises(ValueError, + match="At least one of positions, velocities"): + with Writer(outfile, + universe.atoms.n_atoms, + positions=pos, + velocities=vel, + forces=force, + author='My Name', + author_email='my_email@asu.edu') as W: + for ts in universe.trajectory: + W.write(universe) + + def test_write_AtomGroup_with(self, universe, outfile, outtop, Writer): + """test to write H5MD from AtomGroup""" + ca = universe.select_atoms("protein and name CA") + ca.write(outtop) + with Writer(outfile, n_atoms=ca.n_atoms) as W: + for ts in universe.trajectory: + W.write(ca) + + uw = mda.Universe(outtop, outfile) + caw = uw.atoms + + for orig_ts, written_ts in zip(universe.trajectory, + uw.trajectory): + assert_almost_equal(ca.positions, caw.positions, self.prec) + assert_almost_equal(orig_ts.time, written_ts.time, self.prec) + assert_almost_equal(written_ts.dimensions, + orig_ts.dimensions, + self.prec) + + @pytest.mark.parametrize('frames, n_frames', ((None, 1), + ('all', 3))) + def test_ag_write(self, universe, outfile, outtop, + Writer, frames, n_frames): + """test to write with ag.write()""" + ca = universe.select_atoms("protein and name CA") + ca.write(outtop) + + ca.write(outfile, frames=frames, format='h5md') + + uw = mda.Universe(outtop, outfile) + caw = uw.atoms + + assert_equal(n_frames, len(uw.trajectory)) + for orig_ts, written_ts in zip(universe.trajectory, + uw.trajectory): + assert_almost_equal(ca.positions, caw.positions, self.prec) + assert_almost_equal(orig_ts.time, written_ts.time, self.prec) + assert_almost_equal(written_ts.dimensions, + orig_ts.dimensions, + self.prec) + + @pytest.mark.parametrize('timeunit, lengthunit, velocityunit, forceunit', ( + ('fs', 'Angstrom', 'Angstrom/ps', 'kJ/(mol*Angstrom)'), + ('s', 'pm', 'm/s', 'Newton'), + ('ps', 'fm', 'Angstrom/fs', 'kcal/(mol*Angstrom)',), + ('AKMA', 'nm', 'Angstrom/AKMA', 'kcal/(mol*Angstrom)'))) + def test_write_custom_units(self, universe, outfile, Writer, + timeunit, lengthunit, + velocityunit, forceunit): + with Writer(outfile, + universe.atoms.n_atoms, + lengthunit=lengthunit, + velocityunit=velocityunit, + forceunit=forceunit, + timeunit=timeunit) as W: + for ts in universe.trajectory: + W.write(universe) + + u = mda.Universe(TPR_xvf, outfile) + for u_unit, custom_unit in zip(u.trajectory.units.values(), + (timeunit, lengthunit, + velocityunit, forceunit)): + assert_equal(u_unit, custom_unit) + + @pytest.mark.parametrize('timeunit, lengthunit, velocityunit, forceunit', ( + ('imaginary time', None, None, None), + (None, None, 'c', None), + (None, None, None, 'HUGE FORCE',), + (None, 'lightyear', None, None),)) + def test_write_bad_units(self, universe, outfile, Writer, + timeunit, lengthunit, + velocityunit, forceunit): + with pytest.raises(ValueError, match=" is not a unit recognized by"): + with Writer(outfile, + universe.atoms.n_atoms, + lengthunit=lengthunit, + velocityunit=velocityunit, + forceunit=forceunit, + timeunit=timeunit) as W: + for ts in universe.trajectory: + W.write(universe) + + def test_no_units_w_convert_true(self, universe_no_units, outfile, Writer): + # no units + convert_units = ValueError + with pytest.raises(ValueError, match="The trajectory has no units,"): + with Writer(outfile, + universe_no_units.atoms.n_atoms) as W: + for ts in universe_no_units.trajectory: + W.write(universe_no_units) + + def test_no_units_w_convert_false(self, universe_no_units, + outfile, Writer): + with Writer(outfile, + universe_no_units.atoms.n_atoms, + convert_units=False) as W: + for ts in universe_no_units.trajectory: + W.write(universe_no_units) + + uw = mda.Universe(TPR_xvf, outfile, convert_units=False) + for unit in uw.trajectory.units.values(): + assert_equal(unit, None) + + @pytest.mark.parametrize('convert_units', (True, False)) + def test_convert_units(self, universe, outfile, Writer, + convert_units): + with Writer(outfile, + universe.atoms.n_atoms, + convert_units=convert_units) as W: + for ts in universe.trajectory: + W.write(universe) + + ref_units = universe.trajectory.units.items() + uw = mda.Universe(TPR_xvf, outfile) + uw_units = uw.trajectory.units.items() + for u1, u2 in zip(ref_units, uw_units): + assert_equal(u1, u2) + + @pytest.mark.parametrize('chunks', ((3, 1000, 1), + (1, 1000, 3), + (100, 100, 3))) + def test_write_chunks(self, universe, outfile, Writer, chunks): + with Writer(outfile, + universe.atoms.n_atoms, + chunks=chunks) as W: + for ts in universe.trajectory: + W.write(universe) + + uw = mda.Universe(TPR_xvf, outfile) + for dset in (uw.trajectory._particle_group['position/value'], + uw.trajectory._particle_group['velocity/value'], + uw.trajectory._particle_group['force/value']): + assert_equal(dset.chunks, chunks) + + for ts1, ts2 in zip(universe.trajectory, uw.trajectory): + assert_equal(ts1.positions, ts2.positions) + assert_equal(ts1.velocities, ts2.velocities) + assert_equal(ts1.forces, ts2.forces) + + def test_write_chunks_with_nframes(self, universe, outfile, Writer): + n_atoms = universe.atoms.n_atoms + n_frames = universe.trajectory.n_frames + with Writer(outfile, + n_atoms=n_atoms, + n_frames=n_frames) as W: + for ts in universe.trajectory: + W.write(universe) + + uw = mda.Universe(TPR_xvf, outfile) + for dset in (uw.trajectory._particle_group['position/value'], + uw.trajectory._particle_group['velocity/value'], + uw.trajectory._particle_group['force/value']): + assert_equal(dset.chunks, (1, n_atoms, 3)) + + for ts1, ts2 in zip(universe.trajectory, uw.trajectory): + assert_equal(ts1.positions, ts2.positions) + assert_equal(ts1.velocities, ts2.velocities) + assert_equal(ts1.forces, ts2.forces) + + def test_write_contiguous1(self, universe, Writer, outfile): + n_atoms = universe.atoms.n_atoms + n_frames = len(universe.trajectory) + with Writer(outfile, + n_atoms=n_atoms, + n_frames=n_frames, + chunks=False) as W: + for ts in universe.trajectory: + W.write(universe) + + uw = mda.Universe(TPR_xvf, outfile) + for dset in (uw.trajectory._particle_group['position/value'], + uw.trajectory._particle_group['velocity/value'], + uw.trajectory._particle_group['force/value']): + assert_equal(dset.chunks, None) + + def test_write_contiguous2(self, universe, Writer, outfile): + ag = universe.select_atoms('all') + n_frames = len(ag.universe.trajectory) + ag.write(outfile, frames='all', n_frames=n_frames, chunks=False) + + uw = mda.Universe(TPR_xvf, outfile) + for dset in (uw.trajectory._particle_group['position/value'], + uw.trajectory._particle_group['velocity/value'], + uw.trajectory._particle_group['force/value']): + assert_equal(dset.chunks, None) + + @pytest.mark.parametrize('filter, opts', (('gzip', 1), + ('gzip', 9), + ('lzf', None))) + def test_write_with_compression(self, universe, + outfile, Writer, + filter, opts): + with Writer(outfile, + universe.atoms.n_atoms, + compression=filter, + compression_opts=opts) as W: + for ts in universe.trajectory: + W.write(universe) + + uw = mda.Universe(TPR_xvf, outfile) + dset = uw.trajectory._particle_group['position/value'] + assert_equal(dset.compression, filter) + assert_equal(dset.compression_opts, opts) + + @pytest.mark.parametrize('driver', ('core', 'stdio')) + def test_write_with_drivers(self, universe, outfile, Writer, driver): + with Writer(outfile, + universe.atoms.n_atoms, + driver=driver) as W: + for ts in universe.trajectory: + W.write(universe) + + uw = mda.Universe(TPR_xvf, outfile, driver=driver) + file = uw.trajectory._file + assert_equal(file.driver, driver) + + def test_parallel_disabled(self, universe, Writer, outfile, + driver='mpio'): + with pytest.raises(ValueError, + match="H5MDWriter: parallel writing with MPI I/O "): + with Writer(outfile, + universe.atoms.n_atoms, + driver=driver) as W: + for ts in universe.trajectory: + W.write(universe) + + def test_timestep_not_modified_by_writer(self, universe, Writer, outfile): + trj = universe.trajectory + ts = trj.ts + + trj[-1] # last timestep (so that time != 0) + x = ts._pos.copy() + time = ts.time + + with Writer(outfile, trj.n_atoms) as W: + # last timestep (so that time != 0) (say it again, just in case...) + trj[-1] + W.write(universe) + + assert_equal( + ts._pos, + x, + err_msg="Positions in Timestep were modified by writer.") + assert_equal( + ts.time, time, err_msg="Time in Timestep was modified by writer.") + + +class TestH5PYNotInstalled(object): + """Tests RuntimeErrors when h5py not installed""" + + @pytest.fixture(autouse=True) + def block_h5py(self, monkeypatch): + monkeypatch.setattr( + sys.modules['MDAnalysis.coordinates.H5MD'], 'HAS_H5PY', False) + + @pytest.fixture() + def Writer(self): + return mda.coordinates.H5MD.H5MDWriter + + @pytest.fixture() + def outfile(self, tmpdir): + return str(tmpdir) + 'h5md-writer-test.h5md' + + def test_reader_no_h5py(self): + with pytest.raises(RuntimeError, match="Please install h5py"): + u = mda.Universe(TPR_xvf, H5MD_xvf) + + def test_writer_no_h5py(self, Writer, outfile): + u = mda.Universe(TPR_xvf, TRR_xvf) + with pytest.raises(RuntimeError, + match="H5MDWriter: Please install h5py"): + with Writer(outfile, + u.atoms.n_atoms) as W: + for ts in u.trajectory: + W.write(universe) diff --git a/testsuite/MDAnalysisTests/coordinates/test_writer_api.py b/testsuite/MDAnalysisTests/coordinates/test_writer_api.py index 47625e94456..230f2b0cf47 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_writer_api.py +++ b/testsuite/MDAnalysisTests/coordinates/test_writer_api.py @@ -52,6 +52,8 @@ def test_ts_error(writer, tmpdir): fn = str(tmpdir.join('out.xtc')) elif writer == mda.coordinates.LAMMPS.DATAWriter: pytest.skip("DATAWriter requires integer atom types") + elif writer == mda.coordinates.H5MD.H5MDWriter and not mda.coordinates.H5MD.HAS_H5PY: + pytest.skip("skipping H5MDWriter test because h5py is not installed") else: fn = str(tmpdir.join('out.traj')) @@ -79,10 +81,16 @@ def test_write_with_atomgroup(writer, tmpdir): pytest.skip("MOL2 only writes MOL2 back out") elif writer == mda.coordinates.LAMMPS.DATAWriter: pytest.skip("DATAWriter requires integer atom types") + elif writer == mda.coordinates.H5MD.H5MDWriter and not mda.coordinates.H5MD.HAS_H5PY: + pytest.skip("skipping H5MDWriter test because h5py is not installed") else: fn = str(tmpdir.join('out.traj')) - with writer(fn, n_atoms=u.atoms.n_atoms) as w: + # H5MDWriter raises ValueError if the trajectory has no units and + # convert_units is True + convert_units = (writer != mda.coordinates.H5MD.H5MDWriter) + + with writer(fn, n_atoms=u.atoms.n_atoms, convert_units=convert_units) as w: w.write(u.atoms) @@ -99,8 +107,14 @@ def test_write_with_universe(writer, tmpdir): pytest.skip("MOL2 only writes MOL2 back out") elif writer == mda.coordinates.LAMMPS.DATAWriter: pytest.skip("DATAWriter requires integer atom types") + elif writer == mda.coordinates.H5MD.H5MDWriter and not mda.coordinates.H5MD.HAS_H5PY: + pytest.skip("skipping H5MDWriter test because h5py is not installed") else: fn = str(tmpdir.join('out.traj')) - with writer(fn, n_atoms=10) as w: - w.write(u) + # H5MDWriter raises ValueError if the trajectory has no units and + # convert_units is True + convert_units = (writer != mda.coordinates.H5MD.H5MDWriter) + + with writer(fn, n_atoms=u.atoms.n_atoms, convert_units=convert_units) as w: + w.write(u.atoms) diff --git a/testsuite/MDAnalysisTests/data/cobrotoxin.h5md b/testsuite/MDAnalysisTests/data/cobrotoxin.h5md index 723b4bf52a7..d95a55147b9 100644 Binary files a/testsuite/MDAnalysisTests/data/cobrotoxin.h5md and b/testsuite/MDAnalysisTests/data/cobrotoxin.h5md differ diff --git a/testsuite/MDAnalysisTests/data/coordinates/README.md b/testsuite/MDAnalysisTests/data/coordinates/README.md index 3276bb88fb1..b05160d7e6a 100644 --- a/testsuite/MDAnalysisTests/data/coordinates/README.md +++ b/testsuite/MDAnalysisTests/data/coordinates/README.md @@ -44,10 +44,8 @@ plain text using Gromacs utilities. test.h5md --------- ## Creation -Converted test.trr to test.h5md with MDAnalysis and pyh5md -using the 'create_h5md_data.py' script. -See https://github.com/pdebuyl/pyh5md for pyh5md repository. +Written with MDAnalysis using the 'create_h5md_data.py' script ## Validation -Manually examined contents of test.trr with MDAnalysis and test.h5md -with h5py and checked that the contents were the same. +Manually examined contents of the datasets in test.h5md with h5py and compared +with MDAnalysis. diff --git a/testsuite/MDAnalysisTests/data/coordinates/create_h5md_data.py b/testsuite/MDAnalysisTests/data/coordinates/create_h5md_data.py index 2ac287756e5..090ac163717 100644 --- a/testsuite/MDAnalysisTests/data/coordinates/create_h5md_data.py +++ b/testsuite/MDAnalysisTests/data/coordinates/create_h5md_data.py @@ -1,81 +1,41 @@ import MDAnalysis as mda -from MDAnalysis.tests.datafiles import COORDINATES_TRR, COORDINATES_TOPOLOGY -# To create the test trajectory, install pyh5md -# from https://github.com/pdebuyl/pyh5md -# with 'pip install pyh5md' -import pyh5md +import numpy as np + + +def create_test_trj(uni, fname): + n_atoms = uni.atoms.n_atoms + pos = np.arange(3 * n_atoms).reshape(n_atoms, 3) + uni.trajectory.ts.dt = 1 + orig_box = np.array([81.1, 82.2, 83.3, 75, 80, 85], dtype=np.float32) + uni.trajectory.ts.dimensions = orig_box + uni.trajectory.units = {'time': 'ps', + 'length': 'Angstrom', + 'velocity': 'Angstrom/ps', + 'force': 'kJ/(mol*Angstrom)'} + print(uni.trajectory) + print(uni.trajectory.ts.__class__) + with mda.Writer(fname, n_atoms, + positions=True, + velocities=True, + forces=True, + convert_units=False) as w: + for i in range(5): + uni.atoms.positions = 2 ** i * pos + uni.trajectory.ts.time = i + uni.trajectory.ts.velocities = uni.atoms.positions / 10 + uni.trajectory.ts.forces = uni.atoms.positions / 100 + uni.trajectory.ts.frame = i + uni.trajectory.ts.dimensions[:3] = orig_box[:3] + i + uni.trajectory.ts.dimensions[3:] = orig_box[3:] + i * 0.1 + print(uni.trajectory.ts.dimensions) + w.write(uni) -""" -This script converts the file test.trr to test.h5md, where -test.h5md will be used to as the reference trajectory for -tests using MDAnalysisTests.coordinates.base.BaseReference -""" - - -def create_test_trj(uni, filename): - """uses pyh5md library to write h5md file""" - # will eventually change this to use h5py instead - - with pyh5md.File(filename, 'w', creator='create_h5md_data.py') as f: - trajectory = f.particles_group('trajectory') - obsv = f.require_group('observables') - - positions = pyh5md.element(trajectory, 'position', store='time', - data=uni.trajectory.ts.positions, - time=True) - f['particles/trajectory/position/value'].attrs['unit'] = 'Angstrom' - f['particles/trajectory/position/time'].attrs['unit'] = 'ps' - - velocities = pyh5md.element(trajectory, 'velocity', - data=uni.trajectory.ts.velocities, - step_from=positions, - store='time', time=True) - f['particles/trajectory/velocity/value'].attrs['unit'] = 'Angstrom ps-1' - f['particles/trajectory/velocity/time'].attrs['unit'] = 'ps' - - forces = pyh5md.element(trajectory, 'force', - data=uni.trajectory.ts.forces, - step_from=positions, - store='time', time=True) - f['particles/trajectory/force/value'].attrs['unit'] = 'kJ mol-1 Angstrom-1' - f['particles/trajectory/force/time'].attrs['unit'] = 'ps' - - data_step = pyh5md.element(obsv, 'step', - data=uni.trajectory.ts.data['step'], - step_from=positions, - store='time', time=True) - data_lambda = pyh5md.element(obsv, 'lambda', - data=uni.trajectory.ts.data['lambda'], - step_from=positions, - store='time', time=True) - - trajectory.create_box(dimension=3, - boundary=['periodic', 'periodic', 'periodic'], - store='time', - data=uni.trajectory.ts.triclinic_dimensions, - step_from=positions) - f['particles/trajectory/box/edges/value'].attrs['unit'] = 'Angstrom' - - for ts in uni.trajectory: - trajectory.box.edges.append(uni.trajectory.ts.triclinic_dimensions, - ts.frame, time=ts.time) - positions.append(uni.trajectory.ts.positions, - ts.frame, time=ts.time) - velocities.append(uni.trajectory.ts.velocities, - ts.frame, time=ts.time) - forces.append(uni.trajectory.ts.forces, - ts.frame, time=ts.time) - data_step.append(uni.trajectory.ts.data['step'], - ts.frame, time=ts.time) - data_lambda.append(uni.trajectory.ts.data['lambda'], - ts.frame, time=ts.time) def main(): - pdb = COORDINATES_TOPOLOGY - trr = COORDINATES_TRR - u = mda.Universe(pdb, trr) - create_test_trj(u, 'test.h5md') + pdb = 'test_topology.pdb' + u = mda.Universe(pdb) + create_test_trj(u, 'test.h5md') if __name__ == '__main__': main() diff --git a/testsuite/MDAnalysisTests/data/coordinates/test.h5md b/testsuite/MDAnalysisTests/data/coordinates/test.h5md index a8965689303..5a71fb9e165 100644 Binary files a/testsuite/MDAnalysisTests/data/coordinates/test.h5md and b/testsuite/MDAnalysisTests/data/coordinates/test.h5md differ diff --git a/testsuite/MDAnalysisTests/utils/test_duecredit.py b/testsuite/MDAnalysisTests/utils/test_duecredit.py index 1a7152f9c20..e68ec403300 100644 --- a/testsuite/MDAnalysisTests/utils/test_duecredit.py +++ b/testsuite/MDAnalysisTests/utils/test_duecredit.py @@ -29,7 +29,7 @@ # (if set to 'no', the tests will be SKIPPED; has to be yes, true, or 1 for duecredit # to work; duecredit must also be installed) import MDAnalysis as mda -from MDAnalysisTests.datafiles import MMTF +from MDAnalysisTests.datafiles import MMTF, TPR_xvf, H5MD_xvf # duecredit itself is not needed in the name space but this is a # convenient way to skip all tests if duecredit is not installed @@ -80,7 +80,7 @@ def test_duecredit_collector_primary(self, module, path, citekey): "qcprot2"), ("MDAnalysis.analysis.encore", "MDAnalysis.analysis.encore", - "10.1371/journal.pcbi.1004415"), + "10.1371/journal.pcbi.1004415") ]) def test_duecredit_collector_analysis_modules(self, module, path, citekey): importlib.import_module(module) @@ -94,3 +94,12 @@ def test_duecredit_mmtf(self): '10.1371/journal.pcbi.1005575')].cites_module assert mda.due.citations[('MDAnalysis.topology.MMTFParser', '10.1371/journal.pcbi.1005575')].cites_module + + def test_duecredit_h5md(self): + # doesn't trigger on import but on use of either reader or writer + u = mda.Universe(TPR_xvf, H5MD_xvf) + + assert mda.due.citations[('MDAnalysis.coordinates.H5MD', + '10.25080/majora-1b6fd038-005')].cites_module + assert mda.due.citations[('MDAnalysis.coordinates.H5MD', + '10.1016/j.cpc.2014.01.018')].cites_module