Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ Enhancements
* add high order water bridge support to water bridge analysis. (PR #2087)

Changes
* added support for scale_factor in NCDFReader (Issue #2323)
* added official support for Python 3.7 (PR #1963)
* stopped official support of Python 3.4 (#2066, PR #2174)
* In lib.mdamath, calculations within triclinic_box(), triclinic_vectors(),
Expand All @@ -76,6 +77,7 @@ Changes
* changed the water bridge analysis output format (PR #2087)

Fixes
* fixed lack of check for scaling of NCDFReader velocities (Issue #2323)
* fixed PDBReader and PDBWriter newlines for PDB header (Issue #2324)
* fixes ProgressMeter issues with older Jupyter Lab versions (Issue #2078)
* fixes ProgressMeter behaviour for non-AnalysisBase methods (Issue #2084)
Expand Down
216 changes: 170 additions & 46 deletions package/MDAnalysis/coordinates/TRJ.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)

import scipy.io.netcdf
import numpy as np
import warnings
import errno
Expand All @@ -167,14 +167,14 @@

logger = logging.getLogger("MDAnalysis.coordinates.AMBER")

import scipy.io.netcdf

try:
import netCDF4
except ImportError:
netCDF4 = None
logger.warning("netCDF4 is not available. Writing AMBER ncdf files will be slow.")


class Timestep(base.Timestep):
"""AMBER trajectory Timestep.

Expand Down Expand Up @@ -417,12 +417,13 @@ class NCDFReader(base.ReaderBase):
Periodic unit cell information is detected and used to populate the
:attr:`Timestep.dimensions` attribute. (If no unit cell is available in
the trajectory, then :attr:`Timestep.dimensions` will return
``[0,0,0,0,0,0]``.)
``[0,0,0,0,0,0]``).

Current limitations:

* only trajectories with time in ps and lengths in Angstroem are processed
* scale_factors are not supported (and not checked)
* scale_factors are supported on read but are currently not kept/used when
writing

The NCDF reader uses :mod:`scipy.io.netcdf` and therefore :mod:`scipy` must
be installed. It supports the *mmap* keyword argument (when reading):
Expand All @@ -448,6 +449,17 @@ class NCDFReader(base.ReaderBase):
kwarg `delta` renamed to `dt`, for uniformity with other Readers.
.. versionchanged:: 0.17.0
Uses :mod:`scipy.io.netcdf` and supports the *mmap* kwarg.
.. versionchanged:: 0.20.0
Now reads scale_factors for all expected AMBER convention variables.
Timestep variables now adhere standard MDAnalysis units, with lengths
of angstrom, time of ps, velocity of angstrom/ps and force of
kJ/(mol*Angstrom). It is noted that with 0.19.2 and earlier versions,
velocities would have often been reported in values of angstrom/AKMA
time units instead (Issue #2323).

.. TODO:
* Remove support for `degrees` units in MDAnalysis version > 1.0
(Issue #2327, PR #2326).

"""

Expand All @@ -458,74 +470,168 @@ class NCDFReader(base.ReaderBase):
'length': 'Angstrom',
'velocity': 'Angstrom/ps',
'force': 'kcal/(mol*Angstrom)'}

_Timestep = Timestep

def __init__(self, filename, n_atoms=None, mmap=None, **kwargs):

self._mmap = mmap

super(NCDFReader, self).__init__(filename, **kwargs)

self.trjfile = scipy.io.netcdf.netcdf_file(self.filename,
mmap=self._mmap)

if not ('AMBER' in self.trjfile.Conventions.decode('utf-8').split(',') or
'AMBER' in self.trjfile.Conventions.decode('utf-8').split()):
# AMBER NetCDF files should always have a convention
try:
conventions = self.trjfile.Conventions
if not ('AMBER' in conventions.decode('utf-8').split(',') or
'AMBER' in conventions.decode('utf-8').split()):
errmsg = ("NCDF trajectory {0} does not conform to AMBER "
"specifications, "
"http://ambermd.org/netcdf/nctraj.xhtml "
"('AMBER' must be one of the token in attribute "
"Conventions)".format(self.filename))
logger.fatal(errmsg)
raise TypeError(errmsg)
except AttributeError:
errmsg = "NCDF trajectory {0} is missing Conventions".format(
self.filename)
logger.fatal(errmsg)
raise ValueError(errmsg)

# AMBER NetCDF files should also have a ConventionVersion
try:
ConventionVersion = self.trjfile.ConventionVersion.decode('utf-8')
if not ConventionVersion == self.version:
wmsg = ("NCDF trajectory format is {0!s} but the reader "
"implements format {1!s}".format(
ConventionVersion, self.version))
warnings.warn(wmsg)
logger.warning(wmsg)
except AttributeError:
errmsg = "NCDF trajectory {0} is missing ConventionVersion".format(
self.filename)
raise ValueError(errmsg)

# The AMBER NetCDF standard enforces 64 bit offsets
if not self.trjfile.version_byte == 2:
errmsg = ("NCDF trajectory {0} does not conform to AMBER "
"specifications, http://ambermd.org/netcdf/nctraj.xhtml "
"('AMBER' must be one of the tokens in attribute "
"Conventions)".format(self.filename))
"specifications, as detailed in "
"https://ambermd.org/netcdf/nctraj.xhtml "
"(NetCDF file does not use 64 bit offsets "
"[version_byte = 2])".format(self.filename))
logger.fatal(errmsg)
raise TypeError(errmsg)
if not self.trjfile.ConventionVersion.decode('utf-8') == self.version:
wmsg = ("NCDF trajectory format is {0!s} but the reader "
"implements format {1!s}".format(
self.trjfile.ConventionVersion, self.version))

# The AMBER NetCDF standard enforces 3D coordinates
try:
if not self.trjfile.dimensions['spatial'] == 3:
errmsg = "Incorrect spatial value for NCDF trajectory file"
raise TypeError(errmsg)
except KeyError:
errmsg = "NCDF trajectory does not contain spatial dimension"
raise ValueError(errmsg)

# AMBER NetCDF specs require program and programVersion. Warn users
# if those attributes do not exist
if not (hasattr(self.trjfile, 'program') and
hasattr(self.trjfile, 'programVersion')):
wmsg = ("NCDF trajectory {0} may not fully adhere to AMBER "
"standards as either the `program` or `programVersion` "
"attributes are missing".format(self.filename))
warnings.warn(wmsg)
logger.warning(wmsg)

self.n_atoms = self.trjfile.dimensions['atom']
self.n_frames = self.trjfile.dimensions['frame']
# example trajectory when read with scipy.io.netcdf has
# dimensions['frame'] == None (indicating a record dimension that can
# grow) whereas if read with netCDF4 I get len(dimensions['frame']) ==
# 10: in any case, we need to get the number of frames from somewhere
# such as the time variable:
if self.n_frames is None:
self.n_frames = self.trjfile.variables['time'].shape[0]
try:
self.n_atoms = self.trjfile.dimensions['atom']
if n_atoms is not None and n_atoms != self.n_atoms:
errmsg = ("Supplied n_atoms ({0}) != natom from ncdf ({1}). "
"Note: n_atoms can be None and then the ncdf value "
"is used!".format(n_atoms, self.n_atoms))
raise ValueError(errmsg)
except KeyError:
errmsg = ("NCDF trajectory {0} does not contain atom "
"information".format(self.filename))
raise ValueError(errmsg)

try:
self.n_frames = self.trjfile.dimensions['frame']
# example trajectory when read with scipy.io.netcdf has
# dimensions['frame'] == None (indicating a record dimension that
# can grow) whereas if read with netCDF4 I get
# len(dimensions['frame']) == 10: in any case, we need to get
# the number of frames from somewhere such as the time variable:
if self.n_frames is None:
self.n_frames = self.trjfile.variables['time'].shape[0]
except KeyError:
errmsg = ("NCDF trajectory {0} does not contain frame "
"information".format(self.filename))
raise ValueError(errmsg)

try:
self.remarks = self.trjfile.title
except AttributeError:
self.remarks = ""
# other metadata (*= requd):
# - program* sander
# - programVersion* 9.0
# - application AMBER
#

# checks for not-implemented features (other units would need to be
# hacked into MDAnalysis.units)
if self.trjfile.variables['time'].units.decode('utf-8') != "picosecond":
raise NotImplementedError(
"NETCDFReader currently assumes that the trajectory was written "
"with a time unit of picoseconds and not {0}.".format(
self.trjfile.variables['time'].units))
if self.trjfile.variables['coordinates'].units.decode('utf-8') != "angstrom":
raise NotImplementedError(
"NETCDFReader currently assumes that the trajectory was written "
"with a length unit of Angstroem and not {0}.".format(
self.trjfile.variables['coordinates'].units))
if hasattr(self.trjfile.variables['coordinates'], 'scale_factor'):
raise NotImplementedError("scale_factors are not implemented")
if n_atoms is not None and n_atoms != self.n_atoms:
raise ValueError("Supplied n_atoms ({0}) != natom from ncdf ({1}). "
"Note: n_atoms can be None and then the ncdf value "
"is used!".format(n_atoms, self.n_atoms))
self._verify_units(self.trjfile.variables['time'].units, 'picosecond')
self._verify_units(self.trjfile.variables['coordinates'].units,
'angstrom')

# Check for scale_factor attributes for all data variables and
# store this to multiply through later (Issue #2323)
self.scale_factors = {'time': 1.0,
'cell_lengths': 1.0,
'cell_angles': 1.0,
'coordinates': 1.0,
'velocities': 1.0,
'forces': 1.0}

for variable in self.trjfile.variables:
if hasattr(self.trjfile.variables[variable], 'scale_factor'):
if variable in self.scale_factors:
scale_factor = self.trjfile.variables[variable].scale_factor
self.scale_factors[variable] = scale_factor
else:
errmsg = ("scale_factors for variable {0} are "
"not implemented".format(variable))
raise NotImplementedError(errmsg)

self.has_velocities = 'velocities' in self.trjfile.variables
if self.has_velocities:
self._verify_units(self.trjfile.variables['velocities'].units,
'angstrom/picosecond')

self.has_forces = 'forces' in self.trjfile.variables
if self.has_forces:
self._verify_units(self.trjfile.variables['forces'].units,
'kilocalorie/mole/angstrom')

self.periodic = 'cell_lengths' in self.trjfile.variables
if self.periodic:
self._verify_units(self.trjfile.variables['cell_lengths'].units,
'angstrom')
# Currently the MDA writer outputs 'degrees' rather than the
# singular form 'degree' agreed in the conventions. As discussed
# in PR #2326 from MDA 1.0 only and 'degree' will be accepted.
cell_angle_units = self.trjfile.variables['cell_angles'].units
if (cell_angle_units.decode('utf-8') == 'degrees'):
wmsg = ("DEPRECATED (1.0): NCDF trajectory {0} uses units of "
"`degrees` for the `cell_angles` variable instead of "
"`degree`. Support for non-AMBER convention units is "
"now deprecated and will end in MDAnalysis version "
"1.0. Afterwards, reading this file will raise an "
"error.")
warnings.warn(wmsg, category=DeprecationWarning)
logger.warning(wmsg)
else:
self._verify_units(cell_angle_units, 'degree')

self._current_frame = 0

self.ts = self._Timestep(self.n_atoms,
Expand All @@ -537,6 +643,14 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs):
# load first data frame
self._read_frame(0)

@staticmethod
def _verify_units(eval_unit, expected_units):
if eval_unit.decode('utf-8') != expected_units:
errmsg = ("NETCDFReader currently assumes that the trajectory "
"was written in units of {0} instead of {1}".format(
eval_unit.decode('utf-8'), expected_units))
raise NotImplementedError(errmsg)

@staticmethod
def parse_n_atoms(filename, **kwargs):
with scipy.io.netcdf.netcdf_file(filename, mmap=None) as f:
Expand All @@ -555,15 +669,21 @@ def _read_frame(self, frame):
raise IndexError("frame index must be 0 <= frame < {0}".format(
self.n_frames))
# note: self.trjfile.variables['coordinates'].shape == (frames, n_atoms, 3)
ts._pos[:] = self.trjfile.variables['coordinates'][frame]
ts.time = self.trjfile.variables['time'][frame]
ts._pos[:] = (self.trjfile.variables['coordinates'][frame] *
self.scale_factors['coordinates'])
ts.time = (self.trjfile.variables['time'][frame] *
self.scale_factors['time'])
if self.has_velocities:
ts._velocities[:] = self.trjfile.variables['velocities'][frame]
ts._velocities[:] = (self.trjfile.variables['velocities'][frame] *
self.scale_factors['velocities'])
if self.has_forces:
ts._forces[:] = self.trjfile.variables['forces'][frame]
ts._forces[:] = (self.trjfile.variables['forces'][frame] *
self.scale_factors['forces'])
if self.periodic:
ts._unitcell[:3] = self.trjfile.variables['cell_lengths'][frame]
ts._unitcell[3:] = self.trjfile.variables['cell_angles'][frame]
ts._unitcell[:3] = (self.trjfile.variables['cell_lengths'][frame] *
self.scale_factors['cell_lengths'])
ts._unitcell[3:] = (self.trjfile.variables['cell_angles'][frame] *
self.scale_factors['cell_angles'])
if self.convert_units:
self.convert_pos_from_native(ts._pos) # in-place !
self.convert_time_from_native(
Expand Down Expand Up @@ -728,6 +848,10 @@ class NCDFWriter(base.WriterBase):
Use fast :mod:`netCDF4` for writing but fall back to slow
:mod:`scipy.io.netcdf` if :mod:`netCDF4` is not available.

.. TODO:
* Change the `cell_angles` units to `degree`, implement `scale_factor`
handling (Issue #2327).

"""

format = 'NCDF'
Expand Down
Loading