diff --git a/package/CHANGELOG b/package/CHANGELOG index 05e9b078d1c..3742ad94e2f 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -14,7 +14,7 @@ The rules for this file: ------------------------------------------------------------------------------ ??/??/18 tylerjereddy, richardjgowers, palnabarun, orbeckst, kain88-de, zemanj, - VOD555, davidercruz, jbarnoud, ayushsuhane, hfmull + VOD555, davidercruz, jbarnoud, ayushsuhane, hfmull, micaela-matta * 0.18.1 @@ -68,6 +68,7 @@ Fixes * PDBWriter now properly sets start value * Zero length TopologyGroup now return proper shape array via to_indices (Issue #1974) + * Added periodic boundary box support to the Tinker file reader (Issue #2001) Changes * TopologyAttrs are now statically typed (Issue #1876) diff --git a/package/MDAnalysis/coordinates/TXYZ.py b/package/MDAnalysis/coordinates/TXYZ.py index 260d11d6ead..9b0d6003276 100644 --- a/package/MDAnalysis/coordinates/TXYZ.py +++ b/package/MDAnalysis/coordinates/TXYZ.py @@ -26,7 +26,8 @@ Coordinate reader for Tinker_ xyz files .txyz and trajectory .arc files. Differences between Tinker format_ and normal xyz files: -- there is only one header line containing both the number of atoms and a comment +- the 1st header line contains both the number of atoms and a comment +- the 2nd header line (optional) has periodic boundary conditions information - column 1 contains atom numbers (starting from 1) - column 6 contains atoms types - the following columns indicate connectivity (atoms to which that particular atom is @@ -39,6 +40,10 @@ Classes ------- +.. autoclass:: TXYZWriter + :members: + :inherited-members: + .. autoclass:: TXYZReader :members: :inherited-members: @@ -51,9 +56,110 @@ import os import errno -from ..lib import util from . import base +from ..core import flags +from ..lib import util from ..lib.util import openany, cached +from ..exceptions import NoDataError +from ..version import __version__ + + +class TXYZWriter(base.WriterBase): + """Writes a TXYZ file (single frame) + or an ARC file (multiple frames) + """ + + format = 'TXYZ', 'ARC' + multiframe = True + # these are assumed! + units = {'time': 'ps', 'length': 'Angstrom'} + + def __init__(self, filename, n_atoms=None, atoms=None, convert_units=None, + remark=None, **kwargs): + """Initialize the TXYZ trajectory writer + + Parameters + ---------- + filename: str + filename of trajectory file. If it ends with "gz" then the file + will be gzip-compressed; if it ends with "bz2" it will be bzip2 + compressed. + + remark: str (optional) + text following n_atoms ("molecule name"). By default writes MDAnalysis + version + """ + self.filename = filename + self.n_atoms = n_atoms + if convert_units is not None: + self.convert_units = convert_units + else: + self.convert_units = flags['convert_lengths'] + + default_remark = "Written by {0} (release {1})".format( + self.__class__.__name__, __version__) + self.remark = default_remark if remark is None else remark + # can also be gz, bz2 + self._txyz = util.anyopen(self.filename, 'wt') + + def close(self): + """Close the trajectory file and finalize the writing""" + if self._txyz is not None: + self._txyz.write("\n") + self._txyz.close() + self._txyz = None + + def write(self, obj): + """Write object `obj` at current trajectory frame to file. + + Atom names in the output are taken from the `obj` or default + to the value of the `atoms` keyword supplied to the + :class:`TXYZWriter` constructor. + + Parameters + ---------- + obj : Universe or AtomGroup + The :class:`~MDAnalysis.core.groups.AtomGroup` or + :class:`~MDAnalysis.core.universe.Universe` to write. + """ + # information needed: + # timestep if writing a trajectory + # atom numbers, names, positions, types, connectivity + + atoms = obj.atoms + ts = atoms.ts + if not hasattr(atoms, 'bonds'): + raise NoDataError('TXYZWriter requires bond information! ' + 'provide a topology file or use guess_bonds ' + 'before writing the current trajectory in ' + 'Tinker format.') + + if self.n_atoms is None: + self.n_atoms = atoms.n_atoms + else: + if self.n_atoms != atoms.n_atoms: + raise ValueError('n_atoms keyword was specified indicating ' + 'that this should be a trajectory of the ' + 'same model. But the provided TimeStep has a ' + 'different number ({}) then expected ({})' + ''.format(atoms.n_atoms, self.n_atoms)) + + + if self.convert_units: + coordinates = self.convert_pos_to_native( + ts.positions, inplace=False) + else: + coordinates = ts.positions + + self._txyz.write("{0:d} frame {1} {2}\n".format( + self.n_atoms, ts.frame, self.remark)) + self._txyz.write("{0:10.5f} {1:10.5f} {2:10.5f} " + "{3:10.5f} {4:10.5f} {5:10.5f}\n".format(*ts.dimensions)) + + for atom, (x, y, z) in zip(atoms, coordinates): + self._txyz.write("{0} {1!s:>8} {2:10.5f} {3:10.5f} {4:10.5f} {5} {b}\n" + .format(atom.ix+1, atom.name, x, y, z, atom.type, + b = " ".join(str(item) for item in (atom.bonded_atoms).indices + 1))) class TXYZReader(base.ReaderBase): @@ -72,9 +178,17 @@ def __init__(self, filename, **kwargs): # coordinates::core.py so the last file extension will tell us if it is # bzipped or not root, ext = os.path.splitext(self.filename) - self.xyzfile = util.anyopen(self.filename) + self.txyzfile = util.anyopen(self.filename) self._cache = dict() - + with util.openany(self.filename) as inp: + inp.readline() + line=inp.readline() + try: + float(line.split()[1]) + except ValueError: + self.periodic=False + else: + self.periodic=True self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs) # Haven't quite figured out where to start with all the self._reopen() # etc. @@ -95,14 +209,16 @@ def n_atoms(self): @cached('n_frames') def n_frames(self): try: - return self._read_xyz_n_frames() + return self._read_txyz_n_frames() except IOError: return 0 - def _read_xyz_n_frames(self): + def _read_txyz_n_frames(self): # the number of lines in the XYZ file will be 1 greater than the # number of atoms linesPerFrame = self.n_atoms + 1 + if self.periodic: + linesPerFrame +=1 counter = 0 offsets = [] @@ -120,7 +236,7 @@ def _read_xyz_n_frames(self): return n_frames def _read_frame(self, frame): - self.xyzfile.seek(self._offsets[frame]) + self.txyzfile.seek(self._offsets[frame]) self.ts.frame = frame - 1 # gets +1'd in next return self._read_next_timestep() @@ -129,11 +245,13 @@ def _read_next_timestep(self, ts=None): if ts is None: ts = self.ts - f = self.xyzfile + f = self.txyzfile try: # we assume that there is only one header line per frame f.readline() + if self.periodic: + ts.dimensions=f.readline().split() # convert all entries at the end once for optimal speed tmp_buf = [] for i in range(self.n_atoms): @@ -149,21 +267,21 @@ def _reopen(self): self.open_trajectory() def open_trajectory(self): - if self.xyzfile is not None: + if self.txyzfile is not None: raise IOError( errno.EALREADY, 'TXYZ file already opened', self.filename) - self.xyzfile = util.anyopen(self.filename) + self.txyzfile = util.anyopen(self.filename) # reset ts ts = self.ts ts.frame = -1 - return self.xyzfile + return self.txyzfile def close(self): """Close arc trajectory file if it was open.""" - if self.xyzfile is None: + if self.txyzfile is None: return - self.xyzfile.close() - self.xyzfile = None + self.txyzfile.close() + self.txyzfile = None diff --git a/package/MDAnalysis/topology/TXYZParser.py b/package/MDAnalysis/topology/TXYZParser.py index 63698602d06..7cd8f93f902 100644 --- a/package/MDAnalysis/topology/TXYZParser.py +++ b/package/MDAnalysis/topology/TXYZParser.py @@ -43,6 +43,7 @@ """ from __future__ import absolute_import +import itertools import numpy as np from . import guessers @@ -88,9 +89,17 @@ def parse(self, **kwargs): names = np.zeros(natoms, dtype=object) types = np.zeros(natoms, dtype=np.int) bonds = [] + #Checks whether file contains periodic boundary box info + fline = inf.readline() + try: + float(fline.split()[1]) + except: + pass + else: + fline = inf.readline() # Can't infinitely read as XYZ files can be multiframe - for i in range(natoms): - line = inf.readline().split() + for i, line in zip(range(natoms),itertools.chain([fline],inf)): + line = line.split() atomids[i]= line[0] names[i] = line[1] types[i] = line[5] diff --git a/testsuite/MDAnalysisTests/coordinates/test_txyz.py b/testsuite/MDAnalysisTests/coordinates/test_txyz.py index 6334352645d..e37131e16e4 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_txyz.py +++ b/testsuite/MDAnalysisTests/coordinates/test_txyz.py @@ -20,11 +20,14 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from __future__ import absolute_import - +import os import pytest -from numpy.testing import assert_almost_equal +from numpy.testing import (assert_equal, + assert_array_almost_equal, + assert_almost_equal) -from MDAnalysisTests.datafiles import TXYZ, ARC +from MDAnalysisTests import tempdir +from MDAnalysisTests.datafiles import TXYZ, ARC, ARC_PBC import MDAnalysis as mda @@ -39,6 +42,11 @@ def ARC_U(): return mda.Universe(ARC) +@pytest.fixture +def ARC_PBC_U(): + return mda.Universe(ARC_PBC) + + def test_txyz_positions(TXYZ_U): assert_almost_equal(TXYZ_U.atoms.positions[0], [-6.553398, -1.854369, 0.000000]) @@ -56,5 +64,35 @@ def test_arc_positions_frame_2(ARC_U): [-0.231579, -0.350841, -0.037475]) -def test_arc_traj_legnth(ARC_U): +def test_arc_traj_length(ARC_U): assert len(ARC_U.trajectory) == 2 + + +def test_arcpbc_traj_length(ARC_PBC_U): + assert len(ARC_PBC_U.trajectory) == 3 + + +def test_pbc_boxsize(ARC_PBC_U): + ref_dimensions=[[ 38.860761, 38.860761, 38.860761, 90.000000, 90.000000, 90.000000], + [ 39.860761, 39.860761, 39.860761, 90.000000, 90.000000, 90.000000], + [ 40.860761, 40.860761, 40.860761, 90.000000, 90.000000, 90.000000]] + + for ref_box, ts in zip(ref_dimensions, ARC_PBC_U.trajectory): + assert_almost_equal(ref_box, ts.dimensions, decimal=5) + + +def test_txyz_writer(TXYZ_U): + with tempdir.TempDir() as tmpdir: + outfile = tmpdir + 'test_write.txyz' + with mda.coordinates.TXYZ.TXYZWriter(outfile, len(TXYZ_U.atoms)) as W: + W.write(TXYZ_U.atoms) + + utest = mda.Universe(outfile) + assert_equal(TXYZ_U.atoms.indices, + utest.atoms.indices) + assert_almost_equal(TXYZ_U.atoms.positions, + utest.atoms.positions, 3) + assert_equal(TXYZ_U.atoms.types, + utest.atoms.types) + assert_equal(TXYZ_U.atoms.bonds.to_indices(), + utest.atoms.bonds.to_indices()) diff --git a/testsuite/MDAnalysisTests/data/coordinates/new_hexane.arc b/testsuite/MDAnalysisTests/data/coordinates/new_hexane.arc new file mode 100644 index 00000000000..6705ae636a9 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/coordinates/new_hexane.arc @@ -0,0 +1,24 @@ + 6 Hexane (267 OPLS-UA in 38.8 Ang Box; JPC, 100, 14511 '96) + 38.860761 38.860761 38.860761 90.000000 90.000000 90.000000 + 1 CH3 -0.533809 8.893843 -17.718901 83 2 + 2 CH2 -0.492066 8.734009 -16.201869 86 1 3 + 3 CH2 0.609320 7.723360 -15.894925 86 2 4 + 4 CH2 0.723163 7.301395 -14.432851 86 3 5 + 5 CH2 1.923300 6.401842 -14.151512 86 4 6 + 6 CH3 1.576645 4.928146 -14.343148 83 5 + 6 Hexane (test file for MDA) + 39.860761 39.860761 39.860761 90.000000 90.000000 90.000000 + 1 CH3 0.039519 9.187867 -17.899888 83 2 + 2 CH2 -0.171280 8.486514 -16.561104 86 1 3 + 3 CH2 0.988507 7.632303 -16.057222 86 2 4 + 4 CH2 0.630146 7.086837 -14.677831 86 3 5 + 5 CH2 1.729866 6.240985 -14.042356 86 4 6 + 6 CH3 2.065117 4.899261 -14.687383 83 5 + 6 Hexane + 40.860761 40.860761 40.860761 90.000000 90.000000 90.000000 + 1 CH3 -0.533809 8.893843 -17.718901 83 2 + 2 CH2 -0.492066 8.734009 -16.201869 86 1 3 + 3 CH2 0.609320 7.723360 -15.894925 86 2 4 + 4 CH2 0.723163 7.301395 -14.432851 86 3 5 + 5 CH2 1.923300 6.401842 -14.151512 86 4 6 + 6 CH3 1.576645 4.928146 -14.343148 83 5 diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 3109f100527..c0b79224a08 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -85,7 +85,7 @@ "XTC_sub_sol", "XYZ", "XYZ_psf", "XYZ_bz2", "XYZ_mini", "XYZ_five", # 3 and 5 atoms xyzs for an easy topology - "TXYZ", "ARC", # Tinker files + "TXYZ", "ARC", "ARC_PDB", # Tinker files "PRM", "TRJ", "TRJ_bz2", # Amber (no periodic box) "INPCRD", "PRMpbc", "TRJpbc_bz2", # Amber (periodic box) @@ -294,6 +294,7 @@ XYZ_five = resource_filename(__name__, 'data/five.xyz') TXYZ = resource_filename(__name__, 'data/coordinates/test.txyz') ARC = resource_filename(__name__, 'data/coordinates/test.arc') +ARC_PBC = resource_filename(__name__, 'data/coordinates/new_hexane.arc') PRM = resource_filename(__name__, 'data/Amber/ache.prmtop') TRJ = resource_filename(__name__, 'data/Amber/ache.mdcrd')