From e293b76574e81cd492308c9aa5c26d8809cadd78 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sat, 8 May 2021 10:10:01 +0100 Subject: [PATCH 01/13] Add scale_factor write support, refactor some of the read --- package/MDAnalysis/coordinates/TRJ.py | 201 ++++++++++++++---- .../coordinates/test_netcdf.py | 42 ++++ 2 files changed, 197 insertions(+), 46 deletions(-) diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index 78bfa3983a0..77573cdc1a8 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -414,7 +414,6 @@ class NCDFReader(base.ReaderBase): Current limitations: * only trajectories with time in ps and lengths in Angstroem are processed - * scale_factors are supported on read but are currently not kept/used when writing The NCDF reader uses :mod:`scipy.io.netcdf` and therefore :mod:`scipy` must @@ -456,6 +455,8 @@ class NCDFReader(base.ReaderBase): :class:`NCDFPicklable`. Reading of `dt` now defaults to 1.0 ps if `dt` cannot be extracted from the first two frames of the trajectory. + :meth:`Writer` now also sets `convert_units`, `velocities`, `forces` and + `scale_factor` information for the :class:`NCDFWriter`. """ @@ -581,17 +582,19 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): # Check for scale_factor attributes for all data variables and # store this to multiply through later (Issue #2323) - self.scale_factors = {'time': 1.0, - 'cell_lengths': 1.0, - 'cell_angles': 1.0, - 'coordinates': 1.0, - 'velocities': 1.0, - 'forces': 1.0} + self.scale_factors = {'time': None, + 'cell_lengths': None, + 'cell_angles': None, + 'coordinates': None, + 'velocities': None, + 'forces': None} for variable in self.trjfile.variables: if hasattr(self.trjfile.variables[variable], 'scale_factor'): if variable in self.scale_factors: scale_factor = self.trjfile.variables[variable].scale_factor + if not isinstance(scale_factor, (float, np.floating)): + raise TypeError(f"{scale_factor} is not a float") self.scale_factors[variable] = scale_factor else: errmsg = ("scale_factors for variable {0} are " @@ -641,6 +644,13 @@ def parse_n_atoms(filename, **kwargs): n_atoms = f.dimensions['atom'] return n_atoms + @staticmethod + def _scale(variable, scale_factor): + if scale_factor is None or scale_factor == 1: + return variable + else: + return scale_factor * variable + def _read_frame(self, frame): ts = self.ts @@ -653,21 +663,25 @@ def _read_frame(self, frame): raise IndexError("frame index must be 0 <= frame < {0}".format( self.n_frames)) # note: self.trjfile.variables['coordinates'].shape == (frames, n_atoms, 3) - ts._pos[:] = (self.trjfile.variables['coordinates'][frame] * - self.scale_factors['coordinates']) - ts.time = (self.trjfile.variables['time'][frame] * - self.scale_factors['time']) + ts._pos[:] = self._scale(self.trjfile.variables['coordinates'][frame], + self.scale_factors['coordinates']) + ts.time = self._scale(self.trjfile.variables['time'][frame], + self.scale_factors['time']) if self.has_velocities: - ts._velocities[:] = (self.trjfile.variables['velocities'][frame] * - self.scale_factors['velocities']) + ts._velocities[:] = self._scale( + self.trjfile.variables['velocities'][frame], + self.scale_factors['velocities']) if self.has_forces: - ts._forces[:] = (self.trjfile.variables['forces'][frame] * - self.scale_factors['forces']) + ts._forces[:] = self._scale( + self.trjfile.variables['forces'][frame], + self.scale_factors['forces']) if self.periodic: - ts._unitcell[:3] = (self.trjfile.variables['cell_lengths'][frame] * - self.scale_factors['cell_lengths']) - ts._unitcell[3:] = (self.trjfile.variables['cell_angles'][frame] * - self.scale_factors['cell_angles']) + ts._unitcell[:3] = self._scale( + self.trjfile.variables['cell_lengths'][frame], + self.scale_factors['cell_lengths']) + ts._unitcell[3:] = self._scale( + self.trjfile.variables['cell_angles'][frame], + self.scale_factors['cell_angles']) if self.convert_units: self.convert_pos_from_native(ts._pos) # in-place ! self.convert_time_from_native( @@ -735,6 +749,24 @@ def Writer(self, filename, **kwargs): length of one timestep in picoseconds remarks : str (optional) string that is stored in the title field + convert_units : bool (optional) + ``True``: units are converted to the AMBER base format + velocities : bool (optional) + Write velocities into the trajectory + forces : bool (optional) + Write forces into the trajectory + scale_time : float (optional) + Scale factor for time units + scale_cell_lengths : float (optional) + Scale factor for cell lengths + scale_cell_angles : float (optional) + Scale factor for cell angles + scale_coordinates : float (optional) + Scale factor for coordinates + scale_velocities : float (optional) + Scale factor for velocities + scale_forces : float (optional) + Scale factor for forces Returns ------- @@ -743,6 +775,11 @@ def Writer(self, filename, **kwargs): n_atoms = kwargs.pop('n_atoms', self.n_atoms) kwargs.setdefault('remarks', self.remarks) kwargs.setdefault('dt', self.dt) + kwargs.setdefault('convert_units', self.convert_units) + kwargs.setdefault('velocities', self.has_velocities) + kwargs.setdefault('forces', self.has_forces) + for key in self.scale_factors: + kwargs.setdefault(f'scale_{key}', self.scale_factors[key]) return NCDFWriter(filename, n_atoms, **kwargs) @@ -755,7 +792,14 @@ class NCDFWriter(base.WriterBase): Velocities are written out if they are detected in the input :class:`Timestep`. The trajectories are always written with ångström for the lengths and picoseconds for the time (and hence Å/ps for - velocities). + velocities and kilocalorie/mole/Å for forces). + + If passed, `scale_factor` variables for time / velocities / cell lengths / + cell angles / coordinates / velocities / forces will be written to the + NetCDF file. In this case, the trajectory data will be written to the + NetCDF file divided by the scale_factor value. By default, `scale_factor` + variables are not written, except for velocities where it is set to 20.455 + replicating the default behaviour of AMBER. Unit cell information is written if available. @@ -768,10 +812,6 @@ class NCDFWriter(base.WriterBase): name of output file n_atoms : int number of atoms in trajectory file - start : int (optional) - starting timestep - step : int (optional) - skip between subsequent timesteps dt : float (optional) timestep convert_units : bool (optional) @@ -780,6 +820,18 @@ class NCDFWriter(base.WriterBase): Write velocities into the trajectory [``False``] forces : bool (optional) Write forces into the trajectory [``False``] + scale_time : float (optional) + Scale factor for time units [`None`] + scale_cell_lengths : float (optional) + Scale factor for cell lengths [`None`] + scale_cell_angles : float (optional) + Scale factor for cell angles [`None`] + scale_coordinates : float (optional) + Scale factor for coordinates [`None`] + scale_velocities : float (optional) + Scale factor for velocities [20.455] + scale_forces : float (optional) + Scale factor for forces [`None`] Note @@ -840,9 +892,8 @@ class NCDFWriter(base.WriterBase): Changes the `cell_angles` unit to the AMBER NetCDF convention standard of `degree` instead of the `degrees` written in previous version of MDAnalysis (Issue #2327). - - .. TODO: - * Implement `scale_factor` handling (Issue #2327). + .. versionchanged:: 2.0.0 + Writing of `scale_factor` values has now been implemented. """ @@ -854,15 +905,11 @@ class NCDFWriter(base.WriterBase): 'velocity': 'Angstrom/ps', 'force': 'kcal/(mol*Angstrom)'} - def __init__(self, - filename, - n_atoms, - start=0, - step=1, - dt=1.0, - remarks=None, - convert_units=True, - **kwargs): + def __init__(self, filename, n_atoms, dt=1.0, remarks=None, + convert_units=True, velocities=False, forces=False, + scale_time=None, scale_cell_lengths=None, + scale_cell_angles=None, scale_coordinates=None, + scale_velocities=None, scale_forces=None, **kwargs): self.filename = filename if n_atoms == 0: raise ValueError("NCDFWriter: no atoms in output trajectory") @@ -870,16 +917,29 @@ def __init__(self, # convert length and time to base units on the fly? self.convert_units = convert_units - self.start = start # do we use those? - self.step = step # do we use those? self.dt = dt self.remarks = remarks or "AMBER NetCDF format (MDAnalysis.coordinates.trj.NCDFWriter)" self._first_frame = True # signals to open trajectory self.trjfile = None # open on first write with _init_netcdf() self.periodic = None # detect on first write - self.has_velocities = kwargs.get('velocities', False) - self.has_forces = kwargs.get('forces', False) + self.has_velocities = velocities + self.has_forces = forces + + self.scale_factors = { + 'time': scale_time, + 'cell_lengths': scale_cell_lengths, + 'cell_angles': scale_cell_angles, + 'coordinates': scale_coordinates, + 'velocities': scale_velocities, + 'forces': scale_forces} + # NCDF standard enforces float scale_factors + for value in self.scale_factors.values(): + if (value is not None) and ( + not isinstance(value, (float, np.floating))): + errmsg = f"scale_factor {value} is not a float" + raise TypeError(errmsg) + self.curr_frame = 0 def _init_netcdf(self, periodic=True): @@ -937,18 +997,25 @@ def _init_netcdf(self, periodic=True): coords = ncfile.createVariable('coordinates', 'f4', ('frame', 'atom', 'spatial')) setattr(coords, 'units', 'angstrom') + if self.scale_factors['coordinates']: + setattr(coords, 'scale_factor', self.scale_factors['coordinates']) spatial = ncfile.createVariable('spatial', 'c', ('spatial', )) spatial[:] = np.asarray(list('xyz')) time = ncfile.createVariable('time', 'f4', ('frame',)) setattr(time, 'units', 'picosecond') + if self.scale_factors['time']: + setattr(time, 'scale_factor', self.scale_factors['time']) self.periodic = periodic if self.periodic: cell_lengths = ncfile.createVariable('cell_lengths', 'f8', ('frame', 'cell_spatial')) setattr(cell_lengths, 'units', 'angstrom') + if self.scale_factors['cell_lengths']: + setattr(cell_lengths, 'scale_factor', + self.scale_factors['cell_lengths']) cell_spatial = ncfile.createVariable('cell_spatial', 'c', ('cell_spatial', )) @@ -957,6 +1024,9 @@ def _init_netcdf(self, periodic=True): cell_angles = ncfile.createVariable('cell_angles', 'f8', ('frame', 'cell_angular')) setattr(cell_angles, 'units', 'degree') + if self.scale_factors['cell_angles']: + setattr(cell_angles, 'scale_factor', + self.scale_factors['cell_angles']) cell_angular = ncfile.createVariable('cell_angular', 'c', ('cell_angular', 'label')) @@ -968,10 +1038,16 @@ def _init_netcdf(self, periodic=True): velocs = ncfile.createVariable('velocities', 'f4', ('frame', 'atom', 'spatial')) setattr(velocs, 'units', 'angstrom/picosecond') + if self.scale_factors['velocities']: + setattr(velocs, 'scale_factor', + self.scale_factors['velocities']) if self.has_forces: forces = ncfile.createVariable('forces', 'f4', ('frame', 'atom', 'spatial')) setattr(forces, 'units', 'kilocalorie/mole/angstrom') + if self.scale_factors['forces']: + setattr(forces, 'scale_factor', + self.scale_factors['forces']) ncfile.sync() self._first_frame = False @@ -1029,6 +1105,13 @@ def _write_next_frame(self, ag): return self._write_next_timestep(ts) + @staticmethod + def _scale(variable, scale_factor): + if scale_factor is None or scale_factor == 1: + return variable + else: + return variable / scale_factor + def _write_next_timestep(self, ts): """Write coordinates and unitcell information to NCDF file. @@ -1042,6 +1125,10 @@ def _write_next_timestep(self, ts): https://github.com/MDAnalysis/mdanalysis/issues/109 .. _`netcdf4storage.py`: https://storage.googleapis.com/google-code-attachments/mdanalysis/issue-109/comment-2/netcdf4storage.py + + + .. versionchanged:: 2.0.0 + Can now write scale_factors, and scale variables accordingly. """ pos = ts._pos time = ts.time @@ -1059,27 +1146,49 @@ def _write_next_timestep(self, ts): unitcell = self.convert_dimensions_to_unitcell(ts) # write step - self.trjfile.variables['coordinates'][self.curr_frame, :, :] = pos - self.trjfile.variables['time'][self.curr_frame] = time + # coordinates + self.trjfile.variables['coordinates'][ + self.curr_frame, :, :] = self._scale( + pos, self.scale_factors['coordinates']) + + # time + self.trjfile.variables['time'][self.curr_frame] = self._scale( + time, self.scale_factors['time']) + + # unitcell if self.periodic: + # cell lengths self.trjfile.variables['cell_lengths'][ - self.curr_frame, :] = unitcell[:3] + self.curr_frame, :] = self._scale( + unitcell[:3], + self.scale_factors['cell_lengths']) + self.trjfile.variables['cell_angles'][ - self.curr_frame, :] = unitcell[3:] + self.curr_frame, :] = self._scale( + unitcell[3:], + self.scale_factors['cell_angles']) + # velocities if self.has_velocities: velocities = ts._velocities if self.convert_units: velocities = self.convert_velocities_to_native( velocities, inplace=False) - self.trjfile.variables['velocities'][self.curr_frame, :, :] = velocities + self.trjfile.variables['velocities'][ + self.curr_frame, :, :] = self._scale( + velocities, self.scale_factors['velocities']) + + # forces if self.has_forces: forces = ts._forces if self.convert_units: forces = self.convert_forces_to_native( forces, inplace=False) - self.trjfile.variables['forces'][self.curr_frame, :, :] = forces + + self.trjfile.variables['forces'][ + self.curr_frame, :, :] = self._scale( + forces, self.scale_factors['forces']) self.trjfile.sync() self.curr_frame += 1 diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index 02edefc541a..1dc218762a4 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -501,6 +501,16 @@ def test_scale_factor_box(self, tmpdir, mutation, expected): for ts in u.trajectory: assert_almost_equal(ts.dimensions, expected, self.prec) + def test_scale_factor_not_float(self, tmpdir): + mutation = {'scale_factor': 'coordinates', + 'scale_factor_value': 'parsnips'} + params = self.gen_params(keypair=mutation, restart=False) + with tmpdir.as_cwd(): + self.create_ncdf(params) + errmsg = "b'parsnips' is not a float" + with pytest.raises(TypeError, match=errmsg): + u = mda.Universe(params['filename']) + class TestNCDFReaderExceptionsWarnings(_NCDFGenerator): @@ -877,6 +887,38 @@ def test_write_u(self, pos, vel, force, tmpdir, u1, u2): u.trajectory.close() +class TestNCDFWriterScaleFactors: + """Class to check that the NCDF Writer properly handles the + application of scale factors""" + + @pytest.fixture() + def outfile(self, tmpdir): + return str(tmpdir) + 'ncdf-write-scale.ncdf' + + @pytest.fixture() + def universe(self): + return mda.Universe(PRM_NCBOX, TRJ_NCBOX) + + def get_scale_factors(self, ncdfile): + """Get a dictionary of scale factors stored in netcdf file""" + sfactors = {} + with netcdf.netcdf_file(ncdfile) as f: + for var in f.variables: + if hasattr(f.variables[var], 'scale_factor'): + sfactors[var] = f.variables[var].scale_factor + + return sfactors + + def test_write_read_factors_default(self, outfile, universe): + with universe.trajectory.Writer(outfile) as W: + W.write(universe.atoms) + + # check scale_factors + sfactors = self.get_scale_factors(outfile) + assert len(sfactors) == 1 + assert sfactors['velocities'] == 20.455 + + class TestNCDFWriterUnits(object): """Tests that the writer adheres to AMBER convention units""" @pytest.fixture() From 3246fe7ee85e9b66db403900273e6d710b1cd789 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sat, 8 May 2021 12:01:34 +0100 Subject: [PATCH 02/13] Completes test, adds changelog entry --- package/CHANGELOG | 3 + package/MDAnalysis/coordinates/TRJ.py | 21 +++-- .../coordinates/test_netcdf.py | 80 +++++++++++++++++++ 3 files changed, 92 insertions(+), 12 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index c63631fb5aa..4c6986ea9a8 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -175,6 +175,9 @@ Enhancements checking if it can be used in parallel analysis. (Issue #2996, PR #2950) Changes + * NCDFWriter now allows for the writing of `scale_factor` attributes for + trajectory variables. Default writing is no `scale_factor` except for + velocities where it is 20.455 (Issue #2327) * `analysis.rms.RMSD` and `analysis.rms.RMSF` classes now store `rmsd` and `rmsf` data using the `analysis.base.Results` class (Issues #3274 #3261) * `analysis.dihedrals` classes now store angle data using the diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index 77573cdc1a8..cc61148f6d1 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -745,8 +745,6 @@ def Writer(self, filename, **kwargs): filename of the output NCDF trajectory n_atoms : int (optional) number of atoms - dt : float (optional) - length of one timestep in picoseconds remarks : str (optional) string that is stored in the title field convert_units : bool (optional) @@ -774,7 +772,6 @@ def Writer(self, filename, **kwargs): """ n_atoms = kwargs.pop('n_atoms', self.n_atoms) kwargs.setdefault('remarks', self.remarks) - kwargs.setdefault('dt', self.dt) kwargs.setdefault('convert_units', self.convert_units) kwargs.setdefault('velocities', self.has_velocities) kwargs.setdefault('forces', self.has_forces) @@ -812,8 +809,6 @@ class NCDFWriter(base.WriterBase): name of output file n_atoms : int number of atoms in trajectory file - dt : float (optional) - timestep convert_units : bool (optional) ``True``: units are converted to the AMBER base format; [``True``] velocities : bool (optional) @@ -893,7 +888,10 @@ class NCDFWriter(base.WriterBase): of `degree` instead of the `degrees` written in previous version of MDAnalysis (Issue #2327). .. versionchanged:: 2.0.0 - Writing of `scale_factor` values has now been implemented. + `dt`, `start`, and `step` keywords were unused and are no longer set. + Writing of `scale_factor` values has now been implemented. By default + only velocities write a scale_factor of 20.455 (echoing the behaviour + seen from AMBER) """ @@ -905,11 +903,11 @@ class NCDFWriter(base.WriterBase): 'velocity': 'Angstrom/ps', 'force': 'kcal/(mol*Angstrom)'} - def __init__(self, filename, n_atoms, dt=1.0, remarks=None, - convert_units=True, velocities=False, forces=False, - scale_time=None, scale_cell_lengths=None, - scale_cell_angles=None, scale_coordinates=None, - scale_velocities=None, scale_forces=None, **kwargs): + def __init__(self, filename, n_atoms, remarks=None, convert_units=True, + velocities=False, forces=False, scale_time=None, + scale_cell_lengths=None, scale_cell_angles=None, + scale_coordinates=None, scale_velocities=None, + scale_forces=None, **kwargs): self.filename = filename if n_atoms == 0: raise ValueError("NCDFWriter: no atoms in output trajectory") @@ -917,7 +915,6 @@ def __init__(self, filename, n_atoms, dt=1.0, remarks=None, # convert length and time to base units on the fly? self.convert_units = convert_units - self.dt = dt self.remarks = remarks or "AMBER NetCDF format (MDAnalysis.coordinates.trj.NCDFWriter)" self._first_frame = True # signals to open trajectory diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index 1dc218762a4..861da953e30 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -895,6 +895,10 @@ class TestNCDFWriterScaleFactors: def outfile(self, tmpdir): return str(tmpdir) + 'ncdf-write-scale.ncdf' + @pytest.fixture() + def outfile2(self, tmpdir): + return str(tmpdir) + 'ncdf-write-scale2.ncdf' + @pytest.fixture() def universe(self): return mda.Universe(PRM_NCBOX, TRJ_NCBOX) @@ -909,6 +913,11 @@ def get_scale_factors(self, ncdfile): return sfactors + def get_variable(self, ncdfile, variable, frame): + """Return a varible array from netcdf file""" + with netcdf.netcdf_file(ncdfile) as f: + return f.variables[variable][frame] + def test_write_read_factors_default(self, outfile, universe): with universe.trajectory.Writer(outfile) as W: W.write(universe.atoms) @@ -918,6 +927,77 @@ def test_write_read_factors_default(self, outfile, universe): assert len(sfactors) == 1 assert sfactors['velocities'] == 20.455 + def test_write_bad_scale_factor(self, outfile, universe): + errmsg = "scale_factor parsnips is not a float" + with pytest.raises(TypeError, match=errmsg): + NCDFWriter(outfile, n_atoms=len(universe.atoms), + scale_velocities="parsnips") + + @pytest.mark.parametrize( + 'stime, slengths, sangles, scoords, svels, sfrcs', + [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-2.0, -2.0, -2.0, -2.0, -2.0, -2.0], + [2.0, 4.0, 8.0, 16.0, 32.0, 64.0]]) + def test_write_read_write(self, outfile, outfile2, universe, stime, + slengths, sangles, scoords, svels, sfrcs): + """Write out a file with assorted scale_factors, then + read it back it, then write it out to make sure that the + new assorted scale_factors have been retrained by Write""" + + with NCDFWriter(outfile, n_atoms=len(universe.atoms), velocities=True, + forces=True, scale_time=stime, + scale_cell_lengths=slengths, scale_cell_angles=sangles, + scale_coordinates=scoords, scale_velocities=svels, + scale_forces=sfrcs) as W: + for ts in universe.trajectory: + W.write(universe.atoms) + + universe2 = mda.Universe(PRM_NCBOX, outfile) + + # Write back checking that the same scale_factors as used + with universe2.trajectory.Writer(outfile2) as OWriter: + OWriter.write(universe2.atoms) + + universe3 = mda.Universe(PRM_NCBOX, outfile2) + + # check stored scale_factors + sfactors1 = self.get_scale_factors(outfile) + sfactors2 = self.get_scale_factors(outfile2) + + assert sfactors1 == sfactors2 + assert len(sfactors1) == 6 + assert sfactors1['time'] == stime + assert sfactors1['cell_lengths'] == slengths + assert sfactors1['cell_angles'] == sangles + assert sfactors1['coordinates'] == scoords + assert sfactors1['velocities'] == svels + assert sfactors1['forces'] == sfrcs + + # check that the stored values are indeed scaled + assert_almost_equal(universe.trajectory.time / stime, + self.get_variable(outfile, 'time', 0), 4) + assert_almost_equal(universe.dimensions[:3] / slengths, + self.get_variable(outfile, 'cell_lengths', 0), 4) + assert_almost_equal(universe.dimensions[3:] / sangles, + self.get_variable(outfile, 'cell_angles', 0), 4) + assert_almost_equal(universe.atoms.positions / scoords, + self.get_variable(outfile, 'coordinates', 0), 4) + assert_almost_equal(universe.atoms.velocities / svels, + self.get_variable(outfile, 'velocities', 0), 4) + # note: kJ/mol -> kcal/mol = 4.184 conversion + assert_almost_equal(universe.atoms.forces / (sfrcs * 4.184), + self.get_variable(outfile, 'forces', 0), 4) + + # check that the individual components were saved/read properly + for ts1, ts3 in zip(universe.trajectory, universe3.trajectory): + assert_almost_equal(ts1.time, ts3.time) + assert_almost_equal(ts1.dimensions, ts3.dimensions) + assert_almost_equal(universe.atoms.positions, + universe3.atoms.positions, 4) + assert_almost_equal(universe.atoms.velocities, + universe3.atoms.velocities, 4) + assert_almost_equal(universe.atoms.forces, + universe3.atoms.forces, 4) + class TestNCDFWriterUnits(object): """Tests that the writer adheres to AMBER convention units""" From 744ca165cce6cced5a3eeb136e2a668e4a8b50a1 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sat, 8 May 2021 12:03:42 +0100 Subject: [PATCH 03/13] fix pep8 --- testsuite/MDAnalysisTests/coordinates/test_netcdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index 861da953e30..68232ca8701 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -938,7 +938,7 @@ def test_write_bad_scale_factor(self, outfile, universe): [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-2.0, -2.0, -2.0, -2.0, -2.0, -2.0], [2.0, 4.0, 8.0, 16.0, 32.0, 64.0]]) def test_write_read_write(self, outfile, outfile2, universe, stime, - slengths, sangles, scoords, svels, sfrcs): + slengths, sangles, scoords, svels, sfrcs): """Write out a file with assorted scale_factors, then read it back it, then write it out to make sure that the new assorted scale_factors have been retrained by Write""" From 1f4ad8cdd09bee61bbe32362f17f4c114a140d57 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sat, 8 May 2021 12:43:33 +0100 Subject: [PATCH 04/13] turn off mmap for fixtures --- testsuite/MDAnalysisTests/coordinates/test_netcdf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index 68232ca8701..c1b3e8fe771 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -906,7 +906,7 @@ def universe(self): def get_scale_factors(self, ncdfile): """Get a dictionary of scale factors stored in netcdf file""" sfactors = {} - with netcdf.netcdf_file(ncdfile) as f: + with netcdf.netcdf_file(ncdfile, mmap=False) as f: for var in f.variables: if hasattr(f.variables[var], 'scale_factor'): sfactors[var] = f.variables[var].scale_factor @@ -915,7 +915,7 @@ def get_scale_factors(self, ncdfile): def get_variable(self, ncdfile, variable, frame): """Return a varible array from netcdf file""" - with netcdf.netcdf_file(ncdfile) as f: + with netcdf.netcdf_file(ncdfile, mmap=False) as f: return f.variables[variable][frame] def test_write_read_factors_default(self, outfile, universe): From c7e8a5fb84f2dabc00939a88b74511db2a634b8d Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sat, 8 May 2021 13:04:15 +0100 Subject: [PATCH 05/13] Switch order --- testsuite/MDAnalysisTests/coordinates/test_netcdf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index c1b3e8fe771..b9cb2e96de7 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -933,10 +933,11 @@ def test_write_bad_scale_factor(self, outfile, universe): NCDFWriter(outfile, n_atoms=len(universe.atoms), scale_velocities="parsnips") - @pytest.mark.parametrize( - 'stime, slengths, sangles, scoords, svels, sfrcs', - [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-2.0, -2.0, -2.0, -2.0, -2.0, -2.0], - [2.0, 4.0, 8.0, 16.0, 32.0, 64.0]]) + @pytest.mark.parametrize('stime, slengths, sangles, scoords, svels, sfrcs', ( + (-2.0, -2.0, -2.0, -2.0, -2.0, -2.0), + (1.0, 1.0, 1.0, 1.0, 1.0, 1.0), + (2.0, 4.0, 8.0, 16.0, 32.0, 64.0) + )) def test_write_read_write(self, outfile, outfile2, universe, stime, slengths, sangles, scoords, svels, sfrcs): """Write out a file with assorted scale_factors, then From 7c4a374323aea576c5cf40f5cbfe67ac2359c026 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Mon, 10 May 2021 01:00:17 +0100 Subject: [PATCH 06/13] Apply suggestions from code review Co-authored-by: Lily Wang <31115101+lilyminium@users.noreply.github.com> --- package/MDAnalysis/coordinates/TRJ.py | 7 +++---- testsuite/MDAnalysisTests/coordinates/test_netcdf.py | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index cc61148f6d1..ff51230a9e0 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -791,11 +791,10 @@ class NCDFWriter(base.WriterBase): for the lengths and picoseconds for the time (and hence Å/ps for velocities and kilocalorie/mole/Å for forces). - If passed, `scale_factor` variables for time / velocities / cell lengths / - cell angles / coordinates / velocities / forces will be written to the + Scale factor variables for time, velocities, cell lengths, cell angles, coordinates, velocities, or forces can be passed into the writer. If so, they will be written to the NetCDF file. In this case, the trajectory data will be written to the - NetCDF file divided by the scale_factor value. By default, `scale_factor` - variables are not written, except for velocities where it is set to 20.455 + NetCDF file divided by the scale factor value. By default, scale factor + variables are not written. The only exception is for velocities, where it is set to 20.455, replicating the default behaviour of AMBER. Unit cell information is written if available. diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index b9cb2e96de7..c554ac2a62b 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -914,7 +914,7 @@ def get_scale_factors(self, ncdfile): return sfactors def get_variable(self, ncdfile, variable, frame): - """Return a varible array from netcdf file""" + """Return a variable array from netcdf file""" with netcdf.netcdf_file(ncdfile, mmap=False) as f: return f.variables[variable][frame] @@ -941,8 +941,8 @@ def test_write_bad_scale_factor(self, outfile, universe): def test_write_read_write(self, outfile, outfile2, universe, stime, slengths, sangles, scoords, svels, sfrcs): """Write out a file with assorted scale_factors, then - read it back it, then write it out to make sure that the - new assorted scale_factors have been retrained by Write""" + read it back in, then write it out to make sure that the + new assorted scale_factors have been retained by Write""" with NCDFWriter(outfile, n_atoms=len(universe.atoms), velocities=True, forces=True, scale_time=stime, From 66be2a0957c60eaa00a5895194e1f18c1209e30c Mon Sep 17 00:00:00 2001 From: IAlibay Date: Tue, 17 Aug 2021 13:21:13 +0100 Subject: [PATCH 07/13] Fix maskandscale issues --- package/MDAnalysis/coordinates/TRJ.py | 93 +++++++++++++++++---------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index ff51230a9e0..4e344f56cb9 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -476,8 +476,10 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): super(NCDFReader, self).__init__(filename, **kwargs) + # ensure maskandscale is off so we don't end up double scaling self.trjfile = NCDFPicklable(self.filename, - mmap=self._mmap) + mmap=self._mmap, + maskandscale=False) # AMBER NetCDF files should always have a convention try: @@ -644,12 +646,22 @@ def parse_n_atoms(filename, **kwargs): n_atoms = f.dimensions['atom'] return n_atoms - @staticmethod - def _scale(variable, scale_factor): - if scale_factor is None or scale_factor == 1: - return variable + def _get_var_and_scale(self, variable, frame): + """Helper function to get variable at given frame from NETCDF file and + scale if necessary. + + TODO + ---- + For now we always scale if a scale_factor exists as AMBER doesn't tend + to add scale_factors for anything else but velocities. Should other + engines add scale_factors of 1.0, then we should skip scaling in those + cases to avoid overheads. + """ + scale_factor = self.scale_factors[variable] + if scale_factor is None: + return self.trjfile.variables[variable][frame] else: - return scale_factor * variable + return self.trjfile.variables[variable][frame] * scale_factor def _read_frame(self, frame): ts = self.ts @@ -663,25 +675,15 @@ def _read_frame(self, frame): raise IndexError("frame index must be 0 <= frame < {0}".format( self.n_frames)) # note: self.trjfile.variables['coordinates'].shape == (frames, n_atoms, 3) - ts._pos[:] = self._scale(self.trjfile.variables['coordinates'][frame], - self.scale_factors['coordinates']) - ts.time = self._scale(self.trjfile.variables['time'][frame], - self.scale_factors['time']) + ts._pos[:] = self._get_var_and_scale('coordinates', frame) + ts.time = self._get_var_and_scale('time', frame) if self.has_velocities: - ts._velocities[:] = self._scale( - self.trjfile.variables['velocities'][frame], - self.scale_factors['velocities']) + ts._velocities[:] = self._get_var_and_scale('velocities', frame) if self.has_forces: - ts._forces[:] = self._scale( - self.trjfile.variables['forces'][frame], - self.scale_factors['forces']) + ts._forces[:] = self._get_var_and_scale('forces', frame) if self.periodic: - ts._unitcell[:3] = self._scale( - self.trjfile.variables['cell_lengths'][frame], - self.scale_factors['cell_lengths']) - ts._unitcell[3:] = self._scale( - self.trjfile.variables['cell_angles'][frame], - self.scale_factors['cell_angles']) + ts._unitcell[:3] = self._get_var_and_scale('cell_lengths', frame) + ts._unitcell[3:] = self._get_var_and_scale('cell_angles', frame) if self.convert_units: self.convert_pos_from_native(ts._pos) # in-place ! self.convert_time_from_native( @@ -817,15 +819,15 @@ class NCDFWriter(base.WriterBase): scale_time : float (optional) Scale factor for time units [`None`] scale_cell_lengths : float (optional) - Scale factor for cell lengths [`None`] + Scale factor for cell lengths [``None``] scale_cell_angles : float (optional) - Scale factor for cell angles [`None`] + Scale factor for cell angles [``None``] scale_coordinates : float (optional) - Scale factor for coordinates [`None`] + Scale factor for coordinates [``None``] scale_velocities : float (optional) Scale factor for velocities [20.455] scale_forces : float (optional) - Scale factor for forces [`None`] + Scale factor for forces [``None``] Note @@ -887,10 +889,11 @@ class NCDFWriter(base.WriterBase): of `degree` instead of the `degrees` written in previous version of MDAnalysis (Issue #2327). .. versionchanged:: 2.0.0 - `dt`, `start`, and `step` keywords were unused and are no longer set. - Writing of `scale_factor` values has now been implemented. By default + ``dt``, ``start``, and ``step`` keywords were unused and are no longer + set. + Writing of ``scale_factor`` values has now been implemented. By default only velocities write a scale_factor of 20.455 (echoing the behaviour - seen from AMBER) + seen from AMBER). """ @@ -938,6 +941,12 @@ def __init__(self, filename, n_atoms, remarks=None, convert_units=True, self.curr_frame = 0 + @staticmethod + def _create_var(): + """Helper function to create ncdfile variable and disable maskandscale + if necessary (i.e. using netCDF4)""" + pass + def _init_netcdf(self, periodic=True): """Initialize netcdf AMBER 1.0 trajectory. @@ -963,12 +972,14 @@ def _init_netcdf(self, periodic=True): "Attempt to write to closed file {0}".format(self.filename)) if netCDF4: - ncfile = netCDF4.Dataset(self.filename, 'w', format='NETCDF3_64BIT') + ncfile = netCDF4.Dataset(self.filename, 'w', + format='NETCDF3_64BIT') else: ncfile = scipy.io.netcdf.netcdf_file(self.filename, - mode='w', version=2) - wmsg = "Could not find netCDF4 module. Falling back to MUCH slower "\ - "scipy.io.netcdf implementation for writing." + mode='w', version=2, + maskandscale=False) + wmsg = ("Could not find netCDF4 module. Falling back to MUCH " + "slower scipy.io.netcdf implementation for writing.") logger.warning(wmsg) warnings.warn(wmsg) @@ -1045,6 +1056,11 @@ def _init_netcdf(self, periodic=True): setattr(forces, 'scale_factor', self.scale_factors['forces']) + # Important for netCDF4! Disable maskandscale for created variables! + # Won't work if called before variable creation! + if netCDF4: + ncfile.set_auto_maskandscale(False) + ncfile.sync() self._first_frame = False self.trjfile = ncfile @@ -1216,6 +1232,11 @@ class NCDFPicklable(scipy.io.netcdf.netcdf_file): mmap : None or bool, optional Whether to mmap `filename` when reading. True when `filename` is a file name, False when `filename` is a file-like object. + version : {1, 2}, optional + Sets the netcdf file version to read / write. 1 is classic, 2 is + 65-bit offset format. Default is 1. + maskandscale : bool, optional + Whether to automatically scale and mask data. Default is False. Example ------- @@ -1242,7 +1263,9 @@ class NCDFPicklable(scipy.io.netcdf.netcdf_file): .. versionadded:: 2.0.0 """ def __getstate__(self): - return self.filename, self.use_mmap + return (self.filename, self.use_mmap, self.version_byte, + self.maskandscale) def __setstate__(self, args): - self.__init__(args[0], mmap=args[1]) + self.__init__(args[0], mmap=args[1], version=args[2], + maskandscale=args[3]) From 8f244fe3e38b18060212d4e673de83c7a584fdd6 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Tue, 17 Aug 2021 13:27:27 +0100 Subject: [PATCH 08/13] Remove FutureWarning about scale_factors --- testsuite/MDAnalysisTests/coordinates/test_netcdf.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index 72bb7738e6b..50bd1ebe5cc 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -1040,11 +1040,3 @@ def test_wrong_n_atoms(self, outfile): u = make_Universe(trajectory=True) with pytest.raises(IOError): w.write(u) - - def test_scale_factor_future(self, outfile): - u = mda.Universe(GRO) - wmsg = "`scale_factor` writing will change" - with pytest.warns(FutureWarning, match=wmsg): - with NCDFWriter(outfile, u.trajectory.n_atoms) as w: - w.write(u.atoms) - From a6952a69e28b5dd8eaedfcbfc75896dc653e5d79 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Tue, 17 Aug 2021 16:14:46 +0100 Subject: [PATCH 09/13] Simplifies scaled writing --- package/MDAnalysis/coordinates/TRJ.py | 81 ++++++++++++--------------- 1 file changed, 36 insertions(+), 45 deletions(-) diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index 1c72b4fa91f..a88937f671d 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -156,6 +156,7 @@ import warnings import errno import logging +from math import isclose import MDAnalysis from . import base @@ -647,15 +648,13 @@ def _get_var_and_scale(self, variable, frame): """Helper function to get variable at given frame from NETCDF file and scale if necessary. - TODO + Note ---- - For now we always scale if a scale_factor exists as AMBER doesn't tend - to add scale_factors for anything else but velocities. Should other - engines add scale_factors of 1.0, then we should skip scaling in those - cases to avoid overheads. + If scale_factor is 1.0 within numerical precision then we don't apply + the scaling. """ scale_factor = self.scale_factors[variable] - if scale_factor is None: + if scale_factor is None or isclose(scale_factor, 1): return self.trjfile.variables[variable][frame] else: return self.trjfile.variables[variable][frame] * scale_factor @@ -792,11 +791,13 @@ class NCDFWriter(base.WriterBase): for the lengths and picoseconds for the time (and hence Å/ps for velocities and kilocalorie/mole/Å for forces). - Scale factor variables for time, velocities, cell lengths, cell angles, coordinates, velocities, or forces can be passed into the writer. If so, they will be written to the - NetCDF file. In this case, the trajectory data will be written to the - NetCDF file divided by the scale factor value. By default, scale factor - variables are not written. The only exception is for velocities, where it is set to 20.455, - replicating the default behaviour of AMBER. + Scale factor variables for time, velocities, cell lengths, cell angles, + coordinates, velocities, or forces can be passed into the writer. If so, + they will be written to the NetCDF file. In this case, the trajectory data + will be written to the NetCDF file divided by the scale factor value. By + default, scale factor variables are not written. The only exception is for + velocities, where it is set to 20.455, replicating the default behaviour of + AMBER. Unit cell information is written if available. @@ -1010,7 +1011,7 @@ def _init_netcdf(self, periodic=True): ('frame', 'atom', 'spatial')) setattr(coords, 'units', 'angstrom') if self.scale_factors['coordinates']: - setattr(coords, 'scale_factor', self.scale_factors['coordinates']) + coords.scale_factor = self.scale_factors['coordinates'] spatial = ncfile.createVariable('spatial', 'c', ('spatial', )) spatial[:] = np.asarray(list('xyz')) @@ -1018,7 +1019,7 @@ def _init_netcdf(self, periodic=True): time = ncfile.createVariable('time', 'f4', ('frame',)) setattr(time, 'units', 'picosecond') if self.scale_factors['time']: - setattr(time, 'scale_factor', self.scale_factors['time']) + time.scale_factor = self.scale_factors['time'] self.periodic = periodic if self.periodic: @@ -1026,8 +1027,7 @@ def _init_netcdf(self, periodic=True): ('frame', 'cell_spatial')) setattr(cell_lengths, 'units', 'angstrom') if self.scale_factors['cell_lengths']: - setattr(cell_lengths, 'scale_factor', - self.scale_factors['cell_lengths']) + cell_lengths.scale_factor = self.scale_factors['cell_lengths'] cell_spatial = ncfile.createVariable('cell_spatial', 'c', ('cell_spatial', )) @@ -1037,8 +1037,7 @@ def _init_netcdf(self, periodic=True): ('frame', 'cell_angular')) setattr(cell_angles, 'units', 'degree') if self.scale_factors['cell_angles']: - setattr(cell_angles, 'scale_factor', - self.scale_factors['cell_angles']) + cell_angles.scale_factor = self.scale_factors['cell_angles'] cell_angular = ncfile.createVariable('cell_angular', 'c', ('cell_angular', 'label')) @@ -1051,15 +1050,13 @@ def _init_netcdf(self, periodic=True): ('frame', 'atom', 'spatial')) setattr(velocs, 'units', 'angstrom/picosecond') if self.scale_factors['velocities']: - setattr(velocs, 'scale_factor', - self.scale_factors['velocities']) + velocs.scale_factor = self.scale_factors['velocities'] if self.has_forces: forces = ncfile.createVariable('forces', 'f4', ('frame', 'atom', 'spatial')) setattr(forces, 'units', 'kilocalorie/mole/angstrom') if self.scale_factors['forces']: - setattr(forces, 'scale_factor', - self.scale_factors['forces']) + forces.scale_factor = self.scale_factors['forces'] # Important for netCDF4! Disable maskandscale for created variables! # Won't work if called before variable creation! @@ -1122,12 +1119,19 @@ def _write_next_frame(self, ag): return self._write_next_timestep(ts) - @staticmethod - def _scale(variable, scale_factor): - if scale_factor is None or scale_factor == 1: - return variable + def _set_frame_var_and_scale(self, varname, data): + """Helper function to set variables and scale them if necessary. + + Note + ---- + If scale_factor is numerically close to 1.0, the variable data is not + scaled. + """ + sfactor = self.scale_factors[varname] + if sfactor is None or isclose(sfactor, 1): + self.trjfile.variables[varname][self.curr_frame] = data else: - return variable / scale_factor + self.trjfile.variables[varname][self.curr_frame] = data / sfactor def _write_next_timestep(self, ts): """Write coordinates and unitcell information to NCDF file. @@ -1164,26 +1168,17 @@ def _write_next_timestep(self, ts): # write step # coordinates - self.trjfile.variables['coordinates'][ - self.curr_frame, :, :] = self._scale( - pos, self.scale_factors['coordinates']) + self._set_frame_var_and_scale('coordinates', pos) # time - self.trjfile.variables['time'][self.curr_frame] = self._scale( - time, self.scale_factors['time']) + self._set_frame_var_and_scale('time', time) # unitcell if self.periodic: # cell lengths - self.trjfile.variables['cell_lengths'][ - self.curr_frame, :] = self._scale( - unitcell[:3], - self.scale_factors['cell_lengths']) + self._set_frame_var_and_scale('cell_lengths', unitcell[:3]) - self.trjfile.variables['cell_angles'][ - self.curr_frame, :] = self._scale( - unitcell[3:], - self.scale_factors['cell_angles']) + self._set_frame_var_and_scale('cell_angles', unitcell[3:]) # velocities if self.has_velocities: @@ -1192,9 +1187,7 @@ def _write_next_timestep(self, ts): velocities = self.convert_velocities_to_native( velocities, inplace=False) - self.trjfile.variables['velocities'][ - self.curr_frame, :, :] = self._scale( - velocities, self.scale_factors['velocities']) + self._set_frame_var_and_scale('velocities', velocities) # forces if self.has_forces: @@ -1203,9 +1196,7 @@ def _write_next_timestep(self, ts): forces = self.convert_forces_to_native( forces, inplace=False) - self.trjfile.variables['forces'][ - self.curr_frame, :, :] = self._scale( - forces, self.scale_factors['forces']) + self._set_frame_var_and_scale('forces', forces) self.trjfile.sync() self.curr_frame += 1 From 6ad2089c3887110600accda492e9c6f5a4b6253f Mon Sep 17 00:00:00 2001 From: IAlibay Date: Tue, 17 Aug 2021 18:51:43 +0100 Subject: [PATCH 10/13] Adds scipy.io.netcdf only test --- .../MDAnalysisTests/coordinates/test_netcdf.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index 50bd1ebe5cc..02408936698 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -39,6 +39,7 @@ from MDAnalysisTests.coordinates.test_trj import _TRJReaderTest from MDAnalysisTests.coordinates.reference import (RefVGV, RefTZ2) from MDAnalysisTests import make_Universe +from MDAnalysisTests.util import block_import class _NCDFReaderTest(_TRJReaderTest): @@ -999,6 +1000,20 @@ def test_write_read_write(self, outfile, outfile2, universe, stime, universe3.atoms.forces, 4) +class TestScipyScaleFactors(TestNCDFWriterScaleFactors): + """As above, but netCDF4 is disabled since scaleandmask is different + between the two libraries""" + @pytest.fixture(autouse=True) + def block_netcdf4(self, monkeypatch): + monkeypatch.setattr(sys.modules['MDAnalysis.coordinates.TRJ'], 'netCDF4', None) + + def test_ncdf4_not_present(self, outfile, universe): + # whilst we're here, let's also test this warning + with pytest.warns(UserWarning, match="Could not find netCDF4 module"): + with universe.trajectory.Writer(outfile) as W: + W.write(universe.atoms) + + class TestNCDFWriterUnits(object): """Tests that the writer adheres to AMBER convention units""" @pytest.fixture() From 749886d1643e6b7bd7e537d6d9414e83d5947e72 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Thu, 19 Aug 2021 09:40:09 +0100 Subject: [PATCH 11/13] remove extra docs line Co-authored-by: Oliver Beckstein --- package/MDAnalysis/coordinates/TRJ.py | 1 - 1 file changed, 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index a88937f671d..878a55b2d49 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -412,7 +412,6 @@ class NCDFReader(base.ReaderBase): Current limitations: * only trajectories with time in ps and lengths in Angstroem are processed - writing The NCDF reader uses :mod:`scipy.io.netcdf` and therefore :mod:`scipy` must be installed. It supports the *mmap* keyword argument (when reading): From a83dd6ca8cfdd0b40717c2b4241b8117b63de8c4 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Fri, 20 Aug 2021 20:52:24 +0100 Subject: [PATCH 12/13] Address review comments --- package/MDAnalysis/coordinates/TRJ.py | 10 ++++++++++ testsuite/MDAnalysisTests/coordinates/test_netcdf.py | 2 ++ 2 files changed, 12 insertions(+) diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index a88937f671d..dff8398b5db 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -1221,6 +1221,13 @@ class NCDFPicklable(scipy.io.netcdf.netcdf_file): This means that for a successful unpickle, the original file still has to be accessible with its filename. + + .. note:: + This class subclasses :class:`scipy.io.netcdf.netcdf_file`, please + see the `scipy netcdf API documentation`_ for more information on + the parameters and how the class behaviour. + + Parameters ---------- filename : str or file-like @@ -1257,6 +1264,9 @@ class NCDFPicklable(scipy.io.netcdf.netcdf_file): .. versionadded:: 2.0.0 + + + .. _`scipy netcdf API documentation`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.netcdf_file.html """ def __getstate__(self): return (self.filename, self.use_mmap, self.version_byte, diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index 02408936698..0341506b2ae 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -906,6 +906,8 @@ def universe(self): def get_scale_factors(self, ncdfile): """Get a dictionary of scale factors stored in netcdf file""" sfactors = {} + # being overly cautious by setting mmap to False, probably would + # be faster & ok to set it to True with netcdf.netcdf_file(ncdfile, mmap=False) as f: for var in f.variables: if hasattr(f.variables[var], 'scale_factor'): From 46f63ef0150da4b2ca91c0653e1f7c11fa1b6994 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Fri, 20 Aug 2021 21:06:05 +0100 Subject: [PATCH 13/13] Update package/MDAnalysis/coordinates/TRJ.py Co-authored-by: Oliver Beckstein --- package/MDAnalysis/coordinates/TRJ.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index 5f215df8f61..3587458cad1 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -1236,7 +1236,7 @@ class NCDFPicklable(scipy.io.netcdf.netcdf_file): is a file name, False when `filename` is a file-like object. version : {1, 2}, optional Sets the netcdf file version to read / write. 1 is classic, 2 is - 65-bit offset format. Default is 1. + 64-bit offset format. Default is 1 (but note AMBER ncdf *requires* 2). maskandscale : bool, optional Whether to automatically scale and mask data. Default is False.