diff --git a/.appveyor.yml b/.appveyor.yml index 5176556fe97..37cbd9db0d8 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -10,7 +10,7 @@ cache: environment: global: CONDA_CHANNELS: conda-forge - CONDA_DEPENDENCIES: pip setuptools wheel cython biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov chemfiles tqdm tidynamics>=1.0.0 rdkit>=2020.03.1 + CONDA_DEPENDENCIES: pip setuptools wheel cython biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov chemfiles tqdm tidynamics>=1.0.0 rdkit>=2020.03.1 h5py PIP_DEPENDENCIES: gsd==1.9.3 duecredit parmed DEBUG: "False" MINGW_64: C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin @@ -22,7 +22,7 @@ environment: # is fixed in ci-helpers https://github.com/astropy/ci-helpers/issues/406 # MINICONDA_VERSION: "latest" MPLBACKEND: "agg" - + matrix: - PYTHON_VERSION: 3.6 PYTHON_ARCH: 64 diff --git a/.travis.yml b/.travis.yml index 54ab78a61a3..d94dc955f0a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,7 +30,7 @@ env: - SETUP_CMD="${PYTEST_FLAGS}" - BUILD_CMD="pip install -e package/ && (cd testsuite/ && python setup.py build)" - CONDA_MIN_DEPENDENCIES="mmtf-python biopython networkx cython matplotlib scipy griddataformats hypothesis gsd codecov" - - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 tidynamics>=1.0.0 rdkit>=2020.03.1" + - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 tidynamics>=1.0.0 rdkit>=2020.03.1 h5py" - CONDA_CHANNELS='biobuilds conda-forge' - CONDA_CHANNEL_PRIORITY=True - PIP_DEPENDENCIES="duecredit parmed" @@ -151,7 +151,7 @@ after_success: fi # can't use test here since this leads to travis fails even though the build passes - # turn off blocking as it causes large writes to stdout to fail + # turn off blocking as it causes large writes to stdout to fail # (see https://github.com/travis-ci/travis-ci/issues/4704) - | if [[ ${TRAVIS_PULL_REQUEST} == "false" ]] && [[ ${BUILD_DOCS} == "true" ]] ; then @@ -162,4 +162,3 @@ after_success: cd - bash ${TRAVIS_BUILD_DIR}/maintainer/deploy_docs_via_travis.sh; fi - diff --git a/azure-pipelines.yml b/azure-pipelines.yml index a22501cab99..0560cc04718 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,7 +20,7 @@ jobs: pool: vmImage: 'VS2017-Win2016' variables: - MPLBACKEND: agg + MPLBACKEND: agg strategy: matrix: Python37-32bit-full: @@ -54,6 +54,7 @@ jobs: pytest-xdist scikit-learn scipy + h5py tqdm displayName: 'Install dependencies' # TODO: recent rdkit is not on PyPI @@ -64,7 +65,7 @@ jobs: duecredit gsd==1.9.3 joblib - GridDataFormats + GridDataFormats mmtf-python networkx parmed diff --git a/package/AUTHORS b/package/AUTHORS index 0d729be658a..69cebb5e184 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -147,6 +147,7 @@ Chronological list of authors - Andrea Rizzi - William Glass - Marcello Sega + - Edis Jakupovic External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index e5e506d82a1..3694082eed2 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -21,7 +21,7 @@ The rules for this file: Fixes * Bond attribute is no longer set if PDB file contains no CONECT records (Issue #2832) - * ResidueAttrs now have Atom as a target class by default; ICodes and + * ResidueAttrs now have Atom as a target class by default; ICodes and Molnums now have default target_classes (#2803, PR #2805) * Selections on emtpy AtomGroups now return an empty AtomGroup instead of an error (Issue #2765) @@ -36,8 +36,8 @@ Fixes density=True; the keyword was available since 0.19.0 but with incorrect semantics and not documented and did not produce correct results (Issue #2811, PR #2812) - * In hydrogenbonds.hbond_analysis.HydrogenbondAnalysis an AttributeError - was thrown when finding D-H pairs via the topology if `hydrogens` was an + * In hydrogenbonds.hbond_analysis.HydrogenbondAnalysis an AttributeError + was thrown when finding D-H pairs via the topology if `hydrogens` was an empty AtomGroup (Issue #2848) Enhancements @@ -58,6 +58,7 @@ Enhancements * Added Hydrogen Bond Lifetime keyword "between" (PR #2791) * Dead code removed from the TPR parser and increased test coverage (PR #2840) * TPR parser exposes the elements topology attribute (PR #2858, see Issue #2553) + * Added `H5MDReader` to coordinate readers (Issue #762, PR #2787) Changes * deprecated NumPy type aliases have been replaced with their actual types diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py new file mode 100644 index 00000000000..bf33eec169b --- /dev/null +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -0,0 +1,729 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +H5MD trajectories --- :mod:`MDAnalysis.coordinates.H5MD` +======================================================== + +The `H5MD`_ trajectory file format is based upon the general, high performance +`HDF5`_ file format. +HDF5 files are self documenting and can be accessed with the `h5py`_ library. +HDF5 can make use of parallel file system features through the MPI-IO +interface of the HDF5 library to improve parallel reads and writes. + +The HDF5 library and `h5py`_ must be installed; otherwise, H5MD files +cannot be read by MDAnalysis. If `h5py`_ is not installed, a +:exc:`RuntimeError` is raised. + +Units +----- + +H5MD files are very flexible and can store data in a wide range of physical +units. The :class:`H5MDReader` will attempt to match the units in order to +convert all data to the standard MDAnalysis units (see +:mod:`MDAnalysis.units`). + +Units are read from the attributes of the position, velocity, force, and time +datasets provided by the H5MD file. The unit string is translated from `H5MD +notation`_ to `MDAnalysis notation`_. If MDAnalysis does not recognize the unit +(likely because that unit string is not defined in :mod:`MDAnalysis.units`) +provided, a :exc:`RuntimeError` is raised. If no units are provided, +MDAnalysis stores a value of ``None`` for each unit. If the H5MD file does not +contain units and ``convert_units=True``, MDAnalysis will raise a +:exc`ValueError`. To load a universe from an H5MD file with no units, set +``convert_units=False``. + + +Example: Loading an H5MD simulation +----------------------------------- + +To load an H5MD simulation from an H5MD trajectory data file (using the +:class:`~MDAnalysis.coordinates.H5MD.H5MDReader`), pass the topology +and trajectory files to :class:`~MDAnalysis.core.universe.Universe`:: + + import MDAnalysis as mda + u = mda.Universe("topology.tpr", "trajectory.h5md") + +It is also possible to pass an open :class:`h5py.File` file stream +into the reader:: + + import MDAnalysis as mda + with h5py.File("trajectory.h5md", 'r') as f: + u = mda.Universe("topology.tpr", f) + +.. Note:: Directly using a `h5py.File` does not work yet. + See issue `#2884 `_. + +Example: Opening an H5MD file in parallel +----------------------------------------- + +The parallel features of HDF5 can be accessed through h5py +(see `parallel h5py docs`_ for more detail) by using the `mpi4py`_ Python +package with a Parallel build of HDF5. To load a an H5MD simulation with +parallel HDF5, pass `driver` and `comm` arguments to +:class:`~MDAnalysis.core.universe.Universe`:: + + import MDAnalysis as mda + from mpi4py import MPI + u = mda.Universe("topology.tpr", "trajectory.h5md", + driver="mpio", comm=MPI.COMM_WORLD) + +.. Note:: + :mod:`h5py` must be built with parallel features enabled on top of a parallel + HDF5 build, and HDF5 and :mod:`mpi4py` must be built with a working MPI + implementation. See instructions below. + +Building parallel h5py and HDF5 on Linux +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Building a working parallel HDF5/h5py/mpi4py environment can be +challenging and is often specific to your local computing resources, +e.g., the supercomputer that you're running on typically already has +its preferred MPI installation. As a starting point we provide +instructions that worked in a specific, fairly generic environment. + +These instructions successfully built parallel HDF5/h5py +with *OpenMPI 4.0.4*, *HDF5 1.10.6*, *h5py 2.9.0*, and *mpi4py 3.0.3* +on *Ubuntu 16.0.6*. You may have to play around with different combinations of +versions of h5py/HDF5 to get a working parallel build. + + 1. `Build MPI from sources`_ + 2. `Build HDF5 from sources`_ with parallel settings enabled: + + .. code-block:: bash + + ./configure --enable-parallel --enable-shared + make + make install + + 3. `Install mpi4py`_, making sure to point `mpicc` to where you've + installed your MPI implemenation: + + .. code-block:: bash + + env MPICC=/path/to/mpicc pip install mpi4py + + 4. `Build h5py from sources`_, making sure to enable mpi and to point + to your parallel build of HDF5: + + .. code-block:: bash + + export HDF5_PATH=path-to-parallel-hdf5 + python setup.py clean --all + python setup.py configure -r --hdf5-version=X.Y.Z --mpi --hdf5=$HDF5_PATH + export gcc=gcc + CC=mpicc HDF5_DIR=$HDF5_PATH python setup.py build + python setup.py install + +If you have questions or want to share how you managed to build +parallel hdf5/h5py/mpi4py please let everyone know on the +`MDAnalysis forums`_. + +.. _`H5MD`: https://nongnu.org/h5md/index.html +.. _`HDF5`: https://www.hdfgroup.org/solutions/hdf5/ +.. _`H5PY`: http://docs.h5py.org/ +.. _`parallel h5py docs`: https://docs.h5py.org/en/stable/mpi.html +.. _`mpi4py`: https://mpi4py.readthedocs.io/en/stable/index.html +.. _`Build MPI from sources`: https://mpi4py.readthedocs.io/en/stable/appendix.html#building-mpi-from-sources +.. _`Build HDF5 from sources`: https://support.hdfgroup.org/ftp/HDF5/current/src/unpacked/release_docs/INSTALL_parallel +.. _`Install mpi4py`: https://mpi4py.readthedocs.io/en/stable/install.html#requirements +.. _`Build h5py from sources`: https://docs.h5py.org/en/stable/mpi.html#building-against-parallel-hdf5 +.. _`H5MD notation`: https://nongnu.org/h5md/modules/units.html +.. _`MDAnalysis notation`: https://userguide.mdanalysis.org/units.html +.. _`MDAnalysis forums`: https://www.mdanalysis.org/#participating + + +Classes +------- + +.. autoclass:: Timestep + :members: + + .. attribute:: positions + + coordinates of the atoms as a :class:`numpy.ndarray` of shape + `(n_atoms, 3)` + + .. attribute:: velocities + + velocities of the atoms as a :class:`numpy.ndarray` of shape + `(n_atoms, 3)`; only available if the trajectory contains velocities + or if the `velocities` = ``True`` keyword has been supplied. + + .. attribute:: forces + + forces of the atoms as a :class:`numpy.ndarray` of shape + `(n_atoms, 3)`; only available if the trajectory contains forces + or if the `forces` = ``True`` keyword has been supplied. + + +.. autoclass:: H5MDReader + :members: + + .. automethod:: H5MDReader._reopen + +""" + +import numpy as np +import MDAnalysis as mda +from . import base, core +from ..exceptions import NoDataError +try: + import h5py +except ImportError: + HAS_H5PY = False +else: + HAS_H5PY = True + + +class Timestep(base.Timestep): + """H5MD Timestep + """ + order = 'C' + + def _init_unitcell(self): + return np.zeros((3, 3), dtype=np.float32) + + @property + def dimensions(self): + """unitcell dimensions (*A*, *B*, *C*, *alpha*, *beta*, *gamma*) + + lengths *A*, *B*, *C* are in the MDAnalysis length unit (Å), and + angles are in degrees. + + Setting dimensions will populate the underlying native format + description (triclinic box vectors). If `edges + `_ is a matrix, + the box is of triclinic shape with the edge vectors given by + the rows of the matrix. + """ + if self._unitcell is not None: + return core.triclinic_box(*self._unitcell) + + @dimensions.setter + def dimensions(self, box): + self._unitcell[:] = core.triclinic_vectors(box) + + +class H5MDReader(base.ReaderBase): + """Reader for the H5MD format. + + See `h5md documentation `_ + for a detailed overview of the H5MD file format. + + The reader attempts to convert units in the trajectory file to + the standard MDAnalysis units (:mod:`MDAnalysis.units`) if + `convert_units` is set to ``True``. + + Additional data in the *observables* group of the H5MD file are + loaded into the :attr:`Timestep.data` dictionary. + + Only 3D-periodic boxes or no periodicity are supported; for no + periodicity, :attr:`Timestep.dimensions` will return ``None``. + + Although H5MD can store varying numbers of particles per time step + as produced by, e.g., GCMC simulations, MDAnalysis can currently + only process a fixed number of particles per step. If the number + of particles changes a :exc:`ValueError` is raised. + + The :class:`H5MDReader` reads .h5md files with the following + HDF5 hierarchy: + + .. code-block:: text + + Notation: + (name) is an HDF5 group that the reader recognizes + {name} is an HDF5 group with arbitrary name + [variable] is an HDF5 dataset + is dataset datatype + +-- is an attribute of a group or dataset + + H5MD root + \-- (h5md) + +-- version + \-- author + +-- name , author's name + +-- email , optional email address + \-- creator + +-- name , file that created .h5md file + +-- version + \-- (particles) + \-- {group1} + \-- (box) + +-- dimension : , number of spatial dimensions + +-- boundary : , boundary conditions of unit cell + \-- (edges) + \-- [step] , gives frame + \-- [value] , gives box dimensions + +-- unit + \-- (position) + \-- [step] , gives frame + \-- [time] , gives time + +-- units + \-- [value] , gives numpy arrary of positions + with shape (n_atoms, 3) + +-- unit + \-- (velocity) + \-- [step] , gives frame + \-- [time] , gives time + +-- units + \-- [value] , gives numpy arrary of velocities + with shape (n_atoms, 3) + +-- unit + \-- (force) + \-- [step] , gives frame + \-- [time] , gives time + +-- units + \-- [value] , gives numpy arrary of forces + with shape (n_atoms, 3) + +-- unit + \-- (observables) + \-- (lambda) + \-- [step] , gives frame + \-- [time] , gives time + \-- [value] + \-- (step) + \-- [step] , gives frame + \-- [time] , gives time + \-- [value] , gives integration step + + + .. note:: + The reader does not currently read mass or charge data. + + .. note:: + If the `driver` and `comm` arguments were used to open the + hdf5 file (specifically, ``driver="mpio"``) then the :meth:`_reopen` + method does *not* close and open the file like most readers because + the information about the MPI communicator would be lost; instead + it rewinds the trajectory back to the first timestep. + + + .. versionadded:: 2.0.0 + + """ + + format = 'H5MD' + # units is defined as instance-level variable and set from the + # H5MD file in __init__() below + + # This dictionary is used to translate H5MD units to MDAnalysis units. + # (https://nongnu.org/h5md/modules/units.html) + _unit_translation = { + 'time': { + 'ps': 'ps', + 'fs': 'fs', + 'ns': 'ns', + 'second': 's', + 'sec': 's', + 's': 's', + 'AKMA': 'AKMA', + }, + 'length': { + 'Angstrom': 'Angstrom', + 'angstrom': 'Angstrom', + 'A': 'Angstrom', + 'nm': 'nm', + 'pm': 'pm', + 'fm': 'fm', + }, + 'velocity': { + 'Angstrom ps-1': 'Angstrom/ps', + 'A ps-1': 'Angstrom/ps', + 'Angstrom fs-1': 'Angstrom/fs', + 'A fs-1': 'Angstrom/fs', + 'Angstrom AKMA-1': 'Angstrom/AKMA', + 'A AKMA-1': 'Angstrom/AKMA', + 'nm ps-1': 'nm/ps', + 'nm ns-1': 'nm/ns', + 'pm ps-1': 'pm/ps', + 'm s-1': 'm/s' + }, + 'force': { + 'kJ mol-1 Angstrom-1': 'kJ/(mol*Angstrom)', + 'kJ mol-1 nm-1': 'kJ/(mol*nm)', + 'Newton': 'Newton', + 'N': 'N', + 'J m-1': 'J/m', + 'kcal mol-1 Angstrom-1': 'kcal/(mol*Angstrom)', + 'kcal mol-1 A-1': 'kcal/(mol*Angstrom)' + } + } + _Timestep = Timestep + + def __init__(self, filename, + convert_units=True, + driver=None, + comm=None, + **kwargs): + """ + Parameters + ---------- + filename : str or :class:`h5py.File` + trajectory filename or open h5py file + convert_units : bool (optional) + convert units to MDAnalysis units + driver : str (optional) + H5PY file driver used to open H5MD file + comm : :class:`MPI.Comm` (optional) + MPI communicator used to open H5MD file + Must be passed with `'mpio'` file driver + **kwargs : dict + General reader arguments. + + Raises + ------ + RuntimeError + when `H5PY`_ is not installed + RuntimeError + when a unit is not recognized by MDAnalysis + ValueError + when ``n_atoms`` changes values between timesteps + ValueError + when ``convert_units=True`` but the H5MD file contains no units + ValueError + when dimension of unitcell is not 3 + ValueError + when an MPI communicator object is passed to the reader + but ``driver != 'mpio'`` + NoDataError + when the H5MD file has no 'position', 'velocity', or + 'force' group + + """ + if not HAS_H5PY: + raise RuntimeError("Please install h5py") + super(H5MDReader, self).__init__(filename, **kwargs) + self.filename = filename + self.convert_units = convert_units + + # if comm is provided, driver must be 'mpio' and file will be + # opened with parallel h5py/hdf5 enabled + self._driver = driver + self._comm = comm + if (self._comm is not None) and (self._driver != 'mpio'): + raise ValueError("If MPI communicator object is used to open" + " h5md file, ``driver='mpio'`` must be passed.") + + self.open_trajectory() + if self._particle_group['box'].attrs['dimension'] != 3: + raise ValueError("MDAnalysis only supports 3-dimensional" + " simulation boxes") + + # _has dictionary used for checking whether h5md file has + # 'position', 'velocity', or 'force' groups in the file + self._has = {name: name in self._particle_group for + name in ('position', 'velocity', 'force')} + + # Gets n_atoms from first available group + for name, value in self._has.items(): + if value: + self.n_atoms = self._particle_group[name]['value'].shape[1] + break + else: + raise NoDataError("Provide at least a position, velocity" + " or force group in the h5md file.") + + self.ts = self._Timestep(self.n_atoms, + positions=self.has_positions, + velocities=self.has_velocities, + forces=self.has_forces, + **self._ts_kwargs) + + self.units = {'time': None, + 'length': None, + 'velocity': None, + 'force': None} + self._set_translated_units() # fills units dictionary + self._read_next_timestep() + + def _set_translated_units(self): + """converts units from H5MD to MDAnalysis notation + and fills units dictionary""" + + # need this dictionary to associate 'position': 'length' + _group_unit_dict = {'time': 'time', + 'position': 'length', + 'velocity': 'velocity', + 'force': 'force' + } + + for group, unit in _group_unit_dict.items(): + self._translate_h5md_units(group, unit) + self._check_units(group, unit) + + def _translate_h5md_units(self, group, unit): + """stores the translated unit string into the units dictionary""" + + errmsg = "{} unit '{}' is not recognized by H5MDReader. Please raise" + " an issue in https://github.com/MDAnalysis/mdanalysis/issues" + + # doing time unit separately because time has to fish for + # first available parent group - either position, velocity, or force + if unit == 'time': + for name, value in self._has.items(): + if value: + if 'unit' in self._particle_group[name]['time'].attrs: + try: + self.units['time'] = self._unit_translation[ + 'time'][self._particle_group[name][ + 'time'].attrs['unit']] + break + except KeyError: + raise RuntimeError(errmsg.format( + unit, self._particle_group[ + name]['time'].attrs['unit']) + ) from None + + else: + if self._has[group]: + if 'unit' in self._particle_group[group]['value'].attrs: + try: + self.units[unit] = self._unit_translation[unit][ + self._particle_group[group]['value'].attrs['unit']] + except KeyError: + raise RuntimeError(errmsg.format( + unit, self._particle_group[group][ + 'value'].attrs['unit']) + ) from None + + # if position group is not provided, can still get 'length' unit + # from unitcell box + if (not self._has['position']) and ('edges' in self._particle_group['box']): + if 'unit' in self._particle_group['box/edges/value'].attrs: + try: + self.units['length'] = self._unit_translation[ + 'length'][self._particle_group[ + 'box/edges/value' + ].attrs['unit']] + except KeyError: + raise RuntimeError(errmsg.format(unit, + self._particle_group[ + 'box/edges/value'].attrs[ + 'unit'])) from None + + def _check_units(self, group, unit): + """Raises error if no units are provided from H5MD file + and convert_units=True""" + + if not self.convert_units: + return + + errmsg = "H5MD file must have readable units if ``convert_units`` is" + " set to ``True``. MDAnalysis sets ``convert_units=True`` by default." + " Set ``convert_units=False`` to load Universe without units." + + if unit == 'time': + if self.units['time'] is None: + raise ValueError(errmsg) + + else: + if self._has[group]: + if self.units[unit] is None: + raise ValueError(errmsg) + + @staticmethod + def _format_hint(thing): + """Can this Reader read *thing*""" + # nb, filename strings can still get passed through if + # format='H5MD' is used + return HAS_H5PY and isinstance(thing, h5py.File) + + def open_trajectory(self): + """opens the trajectory file using h5py library""" + self._frame = -1 + if isinstance(self.filename, h5py.File): + self._file = self.filename + self._driver = self._file.driver + else: + if self._comm is not None: + # can only pass comm argument to h5py.File if driver='mpio' + assert self._driver == 'mpio' + self._file = h5py.File(self.filename, 'r', # pragma: no cover + driver=self._driver, + comm=self._comm) + elif self._driver is not None: + self._file = h5py.File(self.filename, 'r', driver=self._driver) + else: + self._file = h5py.File(self.filename, 'r') + # pulls first key out of 'particles' + # allows for arbitrary name of group1 in 'particles' + self._particle_group = self._file['particles'][ + list(self._file['particles'])[0]] + + @property + def n_frames(self): + """number of frames in trajectory""" + for name, value in self._has.items(): + if value: + return self._particle_group[name]['value'].shape[0] + + def _read_frame(self, frame): + """reads data from h5md file and copies to current timestep""" + try: + for name, value in self._has.items(): + if value: + _ = self._particle_group[name]['step'][frame] + break + else: + raise NoDataError("Provide at least a position, velocity" + " or force group in the h5md file.") + except ValueError: + raise IOError from None + + self._frame = frame + ts = self.ts + particle_group = self._particle_group + ts.frame = frame + + # fills data dictionary from 'observables' group + # Note: dt is not read into data as it is not decided whether + # Timestep should have a dt attribute (see Issue #2825) + self._copy_to_data() + + # Sets frame box dimensions + # Note: H5MD files must contain 'box' group in each 'particles' group + if 'edges' in particle_group['box'] and ts._unitcell is not None: + ts._unitcell[:] = particle_group['box/edges/value'][frame, :] + else: + # sets ts.dimensions = None + ts._unitcell = None + + # set the timestep positions, velocities, and forces with + # current frame dataset + if self._has['position']: + ts.positions = self._get_frame_dataset('position') + if self._has['velocity']: + ts.velocities = self._get_frame_dataset('velocity') + if self._has['force']: + ts.forces = self._get_frame_dataset('force') + + if self.convert_units: + self._convert_units() + + return ts + + def _copy_to_data(self): + """assigns values to keys in data dictionary""" + + if 'observables' in self._file: + for key in self._file['observables'].keys(): + self.ts.data[key] = self._file['observables'][key][ + 'value'][self._frame] + + # pulls 'time' out of first available parent group + for name, value in self._has.items(): + if value: + if 'time' in self._particle_group[name]: + self.ts.data['time'] = self._particle_group[name][ + 'time'][self._frame] + break + + def _get_frame_dataset(self, dataset): + """retrieves dataset array at current frame""" + + frame_dataset = self._particle_group[ + dataset]['value'][self._frame, :] + n_atoms_now = frame_dataset.shape[0] + if n_atoms_now != self.n_atoms: + raise ValueError("Frame {} has {} atoms but the initial frame" + " has {} atoms. MDAnalysis is unable to deal" + " with variable topology!" + "".format(self._frame, + n_atoms_now, + self.n_atoms)) + return frame_dataset + + def _convert_units(self): + """converts time, position, velocity, and force values if they + are not given in MDAnalysis standard units + + See https://userguide.mdanalysis.org/1.0.0/units.html + """ + + self.ts.time = self.convert_time_from_native(self.ts.time) + + if 'edges' in self._particle_group['box']: + self.convert_pos_from_native(self.ts.dimensions[:3]) + + if self._has['position']: + self.convert_pos_from_native(self.ts.positions) + + if self._has['velocity']: + self.convert_velocities_from_native(self.ts.velocities) + + if self._has['force']: + self.convert_forces_from_native(self.ts.forces) + + def _read_next_timestep(self): + """read next frame in trajectory""" + return self._read_frame(self._frame + 1) + + def close(self): + """close reader""" + self._file.close() + + def _reopen(self): + """reopen trajectory + + Note + ---- + + If the `driver` and `comm` arguments were used to open the + hdf5 file (specifically, ``driver="mpio"``) then this method + does *not* close and open the file like most readers because + the information about the MPI communicator would be lost; instead + it rewinds the trajectory back to the first timstep. + + """ + if self._driver == "mpio": # pragma: no cover + self._read_frame(-1) + return + + self.close() + self.open_trajectory() + + @property + def has_positions(self): + """``True`` if 'position' group is in trajectory.""" + return self._has['position'] + + @has_positions.setter + def has_positions(self, value: bool): + self._has['position'] = value + + @property + def has_velocities(self): + """``True`` if 'velocity' group is in trajectory.""" + return self._has['velocity'] + + @has_velocities.setter + def has_velocities(self, value: bool): + self._has['velocity'] = value + + @property + def has_forces(self): + """``True`` if 'force' group is in trajectory.""" + return self._has['force'] + + @has_forces.setter + def has_forces(self, value: bool): + self._has['force'] = value diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index ad3d5b53cb8..b7ca28be9f7 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -257,6 +257,9 @@ class can choose an appropriate reader automatically. | FHIAIMS | in | r/w | FHI-AIMS file format for coordinates | | | | | :mod:`MDAnalysis.coordinates.FHIAIMS` | +---------------+-----------+-------+------------------------------------------------------+ + | H5MD | h5md | r | H5MD_ file format for coordinates | + | | | | :mod:`MDAnalysis.coordinates.H5MD` | + +---------------+-----------+-------+------------------------------------------------------+ .. [#a] This format can also be used to provide basic *topology* information (i.e. the list of atoms); it is possible to create a @@ -264,6 +267,7 @@ class can choose an appropriate reader automatically. providing a file of this format: ``u = Universe(filename)`` .. _`netcdf4-python`: https://github.com/Unidata/netcdf4-python +.. _`H5MD`: https://nongnu.org/h5md/index.html .. _Trajectory API: @@ -728,6 +732,7 @@ class can choose an appropriate reader automatically. from . import PQR from . import TRJ from . import TRR +from . import H5MD from . import TRZ from . import XTC from . import XYZ diff --git a/package/doc/sphinx/source/conf.py b/package/doc/sphinx/source/conf.py index 1a813e28657..296b479d6f9 100644 --- a/package/doc/sphinx/source/conf.py +++ b/package/doc/sphinx/source/conf.py @@ -345,4 +345,5 @@ 'https://www.mdanalysis.org/GridDataFormats/': None, 'https://gsd.readthedocs.io/en/stable/': None, 'https://parmed.github.io/ParmEd/html/': None, + 'https://docs.h5py.org/en/stable': None, } diff --git a/package/doc/sphinx/source/documentation_pages/coordinates/H5MD.rst b/package/doc/sphinx/source/documentation_pages/coordinates/H5MD.rst new file mode 100644 index 00000000000..3757e061488 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/coordinates/H5MD.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.coordinates.H5MD diff --git a/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst b/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst index c0e7c3f3467..95983359a31 100644 --- a/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst @@ -26,6 +26,7 @@ provide the format in the keyword argument *format* to coordinates/GMS coordinates/GSD coordinates/GRO + coordinates/H5MD coordinates/INPCRD coordinates/LAMMPS coordinates/MMTF @@ -36,7 +37,7 @@ provide the format in the keyword argument *format* to coordinates/PQR coordinates/TRJ coordinates/TRR - coordinates/TRZ + coordinates/TRZ coordinates/TXYZ coordinates/XTC coordinates/XYZ diff --git a/testsuite/MDAnalysisTests/coordinates/test_h5md.py b/testsuite/MDAnalysisTests/coordinates/test_h5md.py new file mode 100644 index 00000000000..bbd660aa979 --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_h5md.py @@ -0,0 +1,501 @@ +import pytest +from numpy.testing import assert_almost_equal, assert_array_equal +import numpy as np +import MDAnalysis as mda +from MDAnalysis.coordinates.H5MD import HAS_H5PY +if HAS_H5PY: + import h5py +from MDAnalysis.exceptions import NoDataError +from MDAnalysisTests.datafiles import (H5MD_xvf, TPR_xvf, + COORDINATES_TOPOLOGY, + COORDINATES_H5MD) +from MDAnalysisTests.coordinates.base import (MultiframeReaderTest, + BaseReference, BaseWriterTest, + assert_timestep_almost_equal) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +class H5MDReference(BaseReference): + """Reference synthetic trajectory that was + copied from test_xdr.TRRReference""" + + def __init__(self): + super(H5MDReference, self).__init__() + self.trajectory = COORDINATES_H5MD + self.topology = COORDINATES_TOPOLOGY + self.reader = mda.coordinates.H5MD.H5MDReader + self.ext = 'h5md' + self.prec = 3 + self.changing_dimensions = True + + self.first_frame.velocities = self.first_frame.positions / 10 + self.first_frame.forces = self.first_frame.positions / 100 + + self.second_frame.velocities = self.second_frame.positions / 10 + self.second_frame.forces = self.second_frame.positions / 100 + + self.last_frame.velocities = self.last_frame.positions / 10 + self.last_frame.forces = self.last_frame.positions / 100 + + self.jump_to_frame.velocities = self.jump_to_frame.positions / 10 + self.jump_to_frame.forces = self.jump_to_frame.positions / 100 + + def iter_ts(self, i): + ts = self.first_frame.copy() + ts.positions = 2**i * self.first_frame.positions + ts.velocities = ts.positions / 10 + ts.forces = ts.positions / 100 + ts.time = i + ts.frame = i + return ts + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +class TestH5MDReader(MultiframeReaderTest): + """Tests H5MDReader with MultiframeReaderTest. + get_writer tests are marked with expected to fail since + H5MDWriter is not implemented yet.""" + @staticmethod + @pytest.fixture() + def ref(): + return H5MDReference() + + @pytest.mark.xfail(reason='H5MD writer not implemented yet') + def test_get_writer_1(self, ref, reader, tmpdir): + with tmpdir.as_cwd(): + outfile = 'test-writer.' + ref.ext + with reader.Writer(outfile) as W: + assert_equal(isinstance(W, ref.writer), True) + assert_equal(W.n_atoms, reader.n_atoms) + + @pytest.mark.xfail(reason='H5MD writer not implemented yet') + def test_get_writer_2(self, ref, reader, tmpdir): + with tmpdir.as_cwd(): + outfile = 'test-writer.' + ref.ext + with reader.Writer(outfile, n_atoms=100) as W: + assert_equal(isinstance(W, ref.writer), True) + assert_equal(W.n_atoms, 100) + + +# The tests below test an example trajectory H5MD_xvf +@pytest.fixture +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def h5md_universe(): + return mda.Universe(TPR_xvf, H5MD_xvf) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_h5md_n_frames(h5md_universe): + assert len(h5md_universe.trajectory) == 3 + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_h5md_positions(h5md_universe): + # first timestep tests + h5md_universe.trajectory[0] + assert_almost_equal(h5md_universe.atoms.positions[0], + [32.309906, 13.77798, 14.372463], + decimal=6) + assert_almost_equal(h5md_universe.atoms.positions[42], + [28.116928, 19.405945, 19.647358], + decimal=6) + assert_almost_equal(h5md_universe.atoms.positions[10000], + [44.117805, 50.442093, 23.299038], + decimal=6) + # second timestep tests + h5md_universe.trajectory[1] + assert_almost_equal(h5md_universe.atoms.positions[0], + [30.891968, 13.678971, 13.6000595], + decimal=6) + assert_almost_equal(h5md_universe.atoms.positions[42], + [27.163246, 19.846561, 19.3582], + decimal=6) + assert_almost_equal(h5md_universe.atoms.positions[10000], + [45.869278, 5.0342298, 25.460655], + decimal=6) + # third timestep tests + h5md_universe.trajectory[2] + assert_almost_equal(h5md_universe.atoms.positions[0], + [31.276512, 13.89617, 15.015897], + decimal=6) + assert_almost_equal(h5md_universe.atoms.positions[42], + [28.567991, 20.56532, 19.40814], + decimal=6) + assert_almost_equal(h5md_universe.atoms.positions[10000], + [39.713223, 6.127234, 18.284992], + decimal=6) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_h5md_velocities(h5md_universe): + h5md_universe.trajectory[0] + assert_almost_equal(h5md_universe.atoms.velocities[0], + [-2.697732, 0.613568, 0.14334752], + decimal=6) + h5md_universe.trajectory[1] + assert_almost_equal(h5md_universe.atoms.velocities[42], + [-6.8698354, 7.834235, -8.114698], + decimal=6) + h5md_universe.trajectory[2] + assert_almost_equal(h5md_universe.atoms.velocities[10000], + [9.799492, 5.631466, 6.852126], + decimal=6) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_h5md_forces(h5md_universe): + h5md_universe.trajectory[0] + assert_almost_equal(h5md_universe.atoms.forces[0], + [20.071287, -155.2285, -96.72112], + decimal=5) + h5md_universe.trajectory[1] + assert_almost_equal(h5md_universe.atoms.forces[42], + [-4.1959066, -31.31548, 22.663044], + decimal=6) + h5md_universe.trajectory[2] + assert_almost_equal(h5md_universe.atoms.forces[10000], + [-41.43743, 83.35207, 62.94751], + decimal=5) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_h5md_dimensions(h5md_universe): + # first timestep + h5md_universe.trajectory[0] + assert_almost_equal(h5md_universe.trajectory.ts.dimensions, + [52.763, 52.763, 52.763, 90., 90., 90.], + decimal=6) + # second timestep + h5md_universe.trajectory[1] + assert_almost_equal(h5md_universe.trajectory.ts.dimensions, + [52.807877, 52.807877, 52.807877, 90., 90., 90.], + decimal=6) + # third timestep + h5md_universe.trajectory[2] + assert_almost_equal(h5md_universe.trajectory.ts.dimensions, + [52.839806, 52.839806, 52.839806, 90., 90., 90.], + decimal=6) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_h5md_data_step(h5md_universe): + h5md_universe.trajectory[0] + assert h5md_universe.trajectory.ts.data['step'] == 0 + h5md_universe.trajectory[1] + assert h5md_universe.trajectory.ts.data['step'] == 25000 + h5md_universe.trajectory[2] + assert h5md_universe.trajectory.ts.data['step'] == 50000 + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_rewind(h5md_universe): + h5md_universe.trajectory.rewind() + assert h5md_universe.trajectory.ts.frame == 0 + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_next(h5md_universe): + h5md_universe.trajectory.rewind() + h5md_universe.trajectory.next() + assert h5md_universe.trajectory.ts.frame == 1 + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_jump_last_frame(h5md_universe): + h5md_universe.trajectory[-1] + assert h5md_universe.trajectory.ts.frame == 2 + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +@pytest.mark.parametrize("start, stop, step", ((0, 2, 1), + (1, 2, 1))) +def test_slice(h5md_universe, start, stop, step): + frames = [h5md_universe.trajectory.ts.frame + for ts in h5md_universe.trajectory[start:stop:step]] + assert_array_equal(frames, np.arange(start, stop, step)) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +@pytest.mark.parametrize("array_like", [list, np.array]) +def test_array_like(h5md_universe, array_like): + array = array_like([0, 2]) + frames = [h5md_universe.trajectory.ts.frame + for ts in h5md_universe.trajectory[array]] + assert_array_equal(frames, array) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +@pytest.mark.parametrize("indices", ([0, 1, 2, 1, 2, 2, 0])) +def test_list_indices(h5md_universe, indices): + frames = [h5md_universe.trajectory.ts.frame + for ts in h5md_universe.trajectory[indices]] + assert_array_equal(frames, indices) + + +@pytest.fixture +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def ref(): + return H5MDReference() + + +@pytest.fixture +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def h5md_file(): + return h5py.File(H5MD_xvf, 'r') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_no_positions(h5md_file, ref, tmpdir): + outfile = 'test_no_positions' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + del g['particles/trajectory/position'] + + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + with pytest.raises(NoDataError): + u.trajectory.ts.positions + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_no_velocities(h5md_file, ref, tmpdir): + outfile = 'test_no_velocities' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + del g['particles/trajectory/velocity'] + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + with pytest.raises(NoDataError): + u.trajectory.ts.velocities + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_no_forces(h5md_file, ref, tmpdir): + outfile = 'test_no_forces' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + del g['particles/trajectory/force'] + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + with pytest.raises(NoDataError): + u.trajectory.ts.forces + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_unknown_position_unit(h5md_file, ref, tmpdir): + outfile = 'test_unknown_position_unit' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + g['particles' + '/trajectory' + '/position' + '/value'].attrs['unit'] = 'random string' + with pytest.raises(RuntimeError): + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_length_unit_from_box(h5md_file, ref, tmpdir): + outfile = 'test_length_unit_from_box' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + del g['particles/trajectory/position'] + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + assert u.trajectory.units['length'] == 'Angstrom' + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_unknown_time_unit(h5md_file, ref, tmpdir): + outfile = 'test_unknown_time_unit' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + g['particles' + '/trajectory' + '/position' + '/time'].attrs['unit'] = 'random string' + with pytest.raises(RuntimeError): + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_unknown_velocity_unit(h5md_file, ref, tmpdir): + outfile = 'test_unknown_velocity_unit' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + g['particles' + '/trajectory' + '/velocity' + '/value'].attrs['unit'] = 'random string' + with pytest.raises(RuntimeError): + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_unknown_force_unit(h5md_file, ref, tmpdir): + outfile = 'test_unknown_force_unit' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + g['particles' + '/trajectory' + '/force' + '/value'].attrs['unit'] = 'random string' + with pytest.raises(RuntimeError): + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_changing_n_atoms1(h5md_file, ref, tmpdir): + outfile = 'test_changing_n_atoms1' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + g['particles/trajectory/position/value'].resize((3, 10000, 3)) + with pytest.raises(ValueError): + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_changing_n_atoms2(h5md_file, ref, tmpdir): + outfile = 'test_changing_n_atoms2' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + g['particles/trajectory/velocity/value'].resize((3, 10000, 3)) + with pytest.raises(ValueError): + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_changing_n_atoms3(h5md_file, ref, tmpdir): + outfile = 'test_changing_n_atoms3' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + g['particles/trajectory/force/value'].resize((3, 10000, 3)) + with pytest.raises(ValueError): + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_2D_box(h5md_file, ref, tmpdir): + outfile = 'test_2D_box' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + new_box = np.ones(shape=(3, 2, 2)) + g['particles/trajectory/box'].attrs['dimension'] = 2 + del g['particles/trajectory/box/edges/value'] + g['particles/trajectory' + '/box/edges'].create_dataset('value', data=new_box) + with pytest.raises(ValueError): + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_no_box(h5md_file, ref, tmpdir): + outfile = 'test_no_box' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + del g['particles/trajectory/box/edges'] + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + assert u.trajectory.ts.dimensions is None + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_no_groups(h5md_file, ref, tmpdir): + outfile = 'test_no_groups' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + del g['particles/trajectory/position'] + del g['particles/trajectory/velocity'] + del g['particles/trajectory/force'] + with pytest.raises(NoDataError): + u = mda.Universe(TPR_xvf, outfile, format='H5MD') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +@pytest.mark.xfail(reason='Issue #2884') +def test_open_filestream(h5md_file): + with h5md_file as f: + u = mda.Universe(TPR_xvf, h5md_file) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_wrong_driver(): + with pytest.raises(ValueError): + u = mda.Universe(TPR_xvf, H5MD_xvf, + driver='wrong_driver', comm="MPI.COMM_WORLD") + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_no_convert_units(h5md_file, ref, tmpdir): + outfile = 'test_no_convert_units' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + groups = ['position', 'velocity', 'force'] + for name in groups: + del g['particles/trajectory'][name]['value'].attrs['unit'] + del g['particles/trajectory/position/time'].attrs['unit'] + u = mda.Universe(TPR_xvf, outfile, convert_units=False, format="H5MD") + for unit in u.trajectory.units: + assert u.trajectory.units[unit] is None + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_no_units(h5md_file, ref, tmpdir): + outfile = 'test_no_units' + ref.ext + with tmpdir.as_cwd(): + with h5md_file as f: + with h5py.File(outfile, 'w') as g: + f.copy(source='particles', dest=g) + f.copy(source='h5md', dest=g) + groups = ['position', 'velocity', 'force'] + for name in groups: + del g['particles/trajectory'][name]['value'].attrs['unit'] + del g['particles/trajectory/position/time'].attrs['unit'] + del g['particles/trajectory/box/edges/value'].attrs['unit'] + with pytest.raises(ValueError): + u = mda.Universe(TPR_xvf, outfile, format="H5MD") + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_open_with_driver(): + u = mda.Universe(TPR_xvf, H5MD_xvf, driver="core") diff --git a/testsuite/MDAnalysisTests/data/cobrotoxin.h5md b/testsuite/MDAnalysisTests/data/cobrotoxin.h5md new file mode 100644 index 00000000000..723b4bf52a7 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/cobrotoxin.h5md differ diff --git a/testsuite/MDAnalysisTests/data/coordinates/README.md b/testsuite/MDAnalysisTests/data/coordinates/README.md index 2699f4e7d97..3276bb88fb1 100644 --- a/testsuite/MDAnalysisTests/data/coordinates/README.md +++ b/testsuite/MDAnalysisTests/data/coordinates/README.md @@ -26,7 +26,7 @@ unpack and check that the content is the same as test.xyz test.xtc -------- ## Creation -Written with MDAnalysis using the 'create_data.py script +Written with MDAnalysis using the 'create_data.py' script ## Validation With `gmx dump -f test.xtc` you can look at the content of the file in @@ -35,8 +35,19 @@ plain text using Gromacs utilities. test.trr -------- ## Creation -Written with MDAnalysis using the 'create_data.py script +Written with MDAnalysis using the 'create_data.py' script ## Validation With `gmx dump -f test.trr` you can look at the content of the file in plain text using Gromacs utilities. + +test.h5md +--------- +## Creation +Converted test.trr to test.h5md with MDAnalysis and pyh5md +using the 'create_h5md_data.py' script. +See https://github.com/pdebuyl/pyh5md for pyh5md repository. + +## Validation +Manually examined contents of test.trr with MDAnalysis and test.h5md +with h5py and checked that the contents were the same. diff --git a/testsuite/MDAnalysisTests/data/coordinates/create_h5md_data.py b/testsuite/MDAnalysisTests/data/coordinates/create_h5md_data.py new file mode 100644 index 00000000000..2ac287756e5 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/coordinates/create_h5md_data.py @@ -0,0 +1,81 @@ +import MDAnalysis as mda +from MDAnalysis.tests.datafiles import COORDINATES_TRR, COORDINATES_TOPOLOGY +# To create the test trajectory, install pyh5md +# from https://github.com/pdebuyl/pyh5md +# with 'pip install pyh5md' +import pyh5md + +""" +This script converts the file test.trr to test.h5md, where +test.h5md will be used to as the reference trajectory for +tests using MDAnalysisTests.coordinates.base.BaseReference +""" + + +def create_test_trj(uni, filename): + """uses pyh5md library to write h5md file""" + # will eventually change this to use h5py instead + + with pyh5md.File(filename, 'w', creator='create_h5md_data.py') as f: + trajectory = f.particles_group('trajectory') + obsv = f.require_group('observables') + + positions = pyh5md.element(trajectory, 'position', store='time', + data=uni.trajectory.ts.positions, + time=True) + f['particles/trajectory/position/value'].attrs['unit'] = 'Angstrom' + f['particles/trajectory/position/time'].attrs['unit'] = 'ps' + + velocities = pyh5md.element(trajectory, 'velocity', + data=uni.trajectory.ts.velocities, + step_from=positions, + store='time', time=True) + f['particles/trajectory/velocity/value'].attrs['unit'] = 'Angstrom ps-1' + f['particles/trajectory/velocity/time'].attrs['unit'] = 'ps' + + forces = pyh5md.element(trajectory, 'force', + data=uni.trajectory.ts.forces, + step_from=positions, + store='time', time=True) + f['particles/trajectory/force/value'].attrs['unit'] = 'kJ mol-1 Angstrom-1' + f['particles/trajectory/force/time'].attrs['unit'] = 'ps' + + data_step = pyh5md.element(obsv, 'step', + data=uni.trajectory.ts.data['step'], + step_from=positions, + store='time', time=True) + data_lambda = pyh5md.element(obsv, 'lambda', + data=uni.trajectory.ts.data['lambda'], + step_from=positions, + store='time', time=True) + + trajectory.create_box(dimension=3, + boundary=['periodic', 'periodic', 'periodic'], + store='time', + data=uni.trajectory.ts.triclinic_dimensions, + step_from=positions) + f['particles/trajectory/box/edges/value'].attrs['unit'] = 'Angstrom' + + for ts in uni.trajectory: + trajectory.box.edges.append(uni.trajectory.ts.triclinic_dimensions, + ts.frame, time=ts.time) + positions.append(uni.trajectory.ts.positions, + ts.frame, time=ts.time) + velocities.append(uni.trajectory.ts.velocities, + ts.frame, time=ts.time) + forces.append(uni.trajectory.ts.forces, + ts.frame, time=ts.time) + data_step.append(uni.trajectory.ts.data['step'], + ts.frame, time=ts.time) + data_lambda.append(uni.trajectory.ts.data['lambda'], + ts.frame, time=ts.time) + +def main(): + pdb = COORDINATES_TOPOLOGY + trr = COORDINATES_TRR + u = mda.Universe(pdb, trr) + create_test_trj(u, 'test.h5md') + + +if __name__ == '__main__': + main() diff --git a/testsuite/MDAnalysisTests/data/coordinates/test.h5md b/testsuite/MDAnalysisTests/data/coordinates/test.h5md new file mode 100644 index 00000000000..a8965689303 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/coordinates/test.h5md differ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 90a2e1b8e24..6ec9e26fe44 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -74,6 +74,7 @@ "GRO_residwrap_0base", # corner case of #728 with resid=0 for first atom "GRO_sameresid_diffresname", # Case where two residues share the same resid "PDB_xvf", "TPR_xvf", "TRR_xvf", # Gromacs coords/veloc/forces (cobrotoxin, OPLS-AA, Gromacs 4.5.5 tpr) + "H5MD_xvf", # TPR_xvf + TRR_xvf converted to h5md format "XVG_BZ2", # Compressed xvg file about cobrotoxin "PDB_xlserial", "TPR400", "TPR402", "TPR403", "TPR404", "TPR405", "TPR406", "TPR407", @@ -152,6 +153,7 @@ "Martini_membrane_gro", # for testing the leaflet finder "COORDINATES_XTC", "COORDINATES_TRR", + "COORDINATES_H5MD", "COORDINATES_DCD", "COORDINATES_TOPOLOGY", "NUCLsel", @@ -193,7 +195,7 @@ "PDB_CHECK_RIGHTHAND_PA", # for testing right handedness of principal_axes "MMTF_NOCRYST", # File with meaningless CRYST1 record (Issue #2679, PR #2685) "FHIAIMS", # to test FHIAIMS coordinate files - "SDF_molecule" # MDL SDFile for rdkit + "SDF_molecule" # MDL SDFile for rdkit ] from pkg_resources import resource_filename @@ -232,6 +234,7 @@ __name__, 'data/coordinates/test.xyz.bz2') COORDINATES_XTC = resource_filename(__name__, 'data/coordinates/test.xtc') COORDINATES_TRR = resource_filename(__name__, 'data/coordinates/test.trr') +COORDINATES_H5MD = resource_filename(__name__, 'data/coordinates/test.h5md') COORDINATES_DCD = resource_filename(__name__, 'data/coordinates/test.dcd') COORDINATES_TOPOLOGY = resource_filename(__name__, 'data/coordinates/test_topology.pdb') @@ -311,6 +314,7 @@ PDB_xvf = resource_filename(__name__, 'data/cobrotoxin.pdb') TPR_xvf = resource_filename(__name__, 'data/cobrotoxin.tpr') TRR_xvf = resource_filename(__name__, 'data/cobrotoxin.trr') +H5MD_xvf = resource_filename(__name__, 'data/cobrotoxin.h5md') XVG_BZ2 = resource_filename(__name__, 'data/cobrotoxin_protein_forces.xvg.bz2') XPDB_small = resource_filename(__name__, 'data/5digitResid.pdb') diff --git a/testsuite/setup.py b/testsuite/setup.py index 5b9f80c2864..f454ba9e9cd 100755 --- a/testsuite/setup.py +++ b/testsuite/setup.py @@ -174,6 +174,7 @@ def run(self): 'data/windows/*', 'data/*.itp', 'data/*.coor', + 'data/*.h5md', ], }, install_requires=[