From 89c73638cc3d2e5b2a6faa400870e8b3639416e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Mon, 7 Feb 2022 18:49:00 +0100 Subject: [PATCH 01/30] Extended mdanalysis to accept other attributes as well. --- package/MDAnalysis/coordinates/LAMMPS.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 0cd3874997b..ef6dd695fcd 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -509,7 +509,8 @@ class DumpReader(base.ReaderBase): """ format = 'LAMMPSDUMP' _conventions = ["auto", "unscaled", "scaled", "unwrapped", - "scaled_unwrapped"] + "charge", "scaled_unwrapped"] + _coordtype_column_names = { "unscaled": ["x", "y", "z"], "scaled": ["xs", "ys", "zs"], @@ -518,7 +519,7 @@ class DumpReader(base.ReaderBase): } @store_init_arguments - def __init__(self, filename, + def __init__(self, filename, lammps_coordinate_convention="auto", unwrap_images=False, **kwargs): @@ -681,6 +682,14 @@ def _read_next_timestep(self): coord_cols.extend(image_cols) ids = "id" in attr_to_col_ix + + # Create the data arrays for additional attributes which will be saved + # under ts.data + if len(attrs) > 3: + for attribute_key in attrs[3:]: + ts.data[attribute_key] = np.empty(self.n_atoms) + + # Parse all the atoms for i in range(self.n_atoms): fields = f.readline().split() if ids: @@ -701,6 +710,11 @@ def _read_next_timestep(self): if self._has_forces: ts.forces[i] = [fields[dim] for dim in force_cols] + # Add the capability to also collect other data + if len(attrs) > 3: # Then there is also more than just the positional data + for attribute_key in attrs[3:]: + ts.data[attribute_key][i] = fields[attr_to_col_ix[attribute_key]] + order = np.argsort(indices) ts.positions = ts.positions[order] if self._has_vels: From 99b7875dc72c80aee5166afa48e0a05b2c2ad91c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Wed, 6 Apr 2022 14:24:34 +0200 Subject: [PATCH 02/30] Able to parse arbitrary columns now. --- package/MDAnalysis/coordinates/LAMMPS.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index ef6dd695fcd..291073a6d72 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -685,9 +685,15 @@ def _read_next_timestep(self): # Create the data arrays for additional attributes which will be saved # under ts.data + additional_keys = [] if len(attrs) > 3: - for attribute_key in attrs[3:]: + for attribute_key in attrs: + # Skip the normal columns + if attribute_key == "id" or attribute_key in self._coordtype_column_names[self.lammps_coordinate_convention]: + continue + # Else this is an additional field ts.data[attribute_key] = np.empty(self.n_atoms) + additional_keys.append(attribute_key) # Parse all the atoms for i in range(self.n_atoms): @@ -711,8 +717,8 @@ def _read_next_timestep(self): ts.forces[i] = [fields[dim] for dim in force_cols] # Add the capability to also collect other data - if len(attrs) > 3: # Then there is also more than just the positional data - for attribute_key in attrs[3:]: + if len(additional_keys) != 0: # Then there is also more than just the positional data + for attribute_key in additional_keys: ts.data[attribute_key][i] = fields[attr_to_col_ix[attribute_key]] order = np.argsort(indices) @@ -721,6 +727,11 @@ def _read_next_timestep(self): ts.velocities = ts.velocities[order] if self._has_forces: ts.forces = ts.forces[order] + + # Also need to sort the additional keys + for attribute_key in additional_keys: + ts.data[attribute_key] = ts.data[attribute_key][order] + if (self.lammps_coordinate_convention.startswith("scaled")): # if coordinates are given in scaled format, undo that ts.positions = distances.transform_StoR(ts.positions, From 36e9bb6bdf70777913929ab1c7d118c0a8797737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Fri, 6 May 2022 11:57:25 +0200 Subject: [PATCH 03/30] Tried to fix most of the pep8 problems. --- package/MDAnalysis/coordinates/LAMMPS.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 291073a6d72..ca914d464fa 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -508,8 +508,8 @@ class DumpReader(base.ReaderBase): .. versionadded:: 0.19.0 """ format = 'LAMMPSDUMP' - _conventions = ["auto", "unscaled", "scaled", "unwrapped", - "charge", "scaled_unwrapped"] + _conventions = ["auto", "unscaled", "scaled", "unwrapped", + "scaled_unwrapped"] _coordtype_column_names = { "unscaled": ["x", "y", "z"], @@ -522,7 +522,7 @@ class DumpReader(base.ReaderBase): def __init__(self, filename, lammps_coordinate_convention="auto", unwrap_images=False, - **kwargs): + additional_columns=False, **kwargs): super(DumpReader, self).__init__(filename, **kwargs) root, ext = os.path.splitext(self.filename) @@ -537,6 +537,9 @@ def __init__(self, filename, self._unwrap = unwrap_images + if additional_columns: + self._additional_columns = additional_columns + self._cache = {} self._reopen() @@ -683,13 +686,17 @@ def _read_next_timestep(self): ids = "id" in attr_to_col_ix - # Create the data arrays for additional attributes which will be saved + # Create the data arrays for additional attributes which will be saved # under ts.data additional_keys = [] if len(attrs) > 3: for attribute_key in attrs: # Skip the normal columns - if attribute_key == "id" or attribute_key in self._coordtype_column_names[self.lammps_coordinate_convention]: + if attribute_key == "id" or \ + attribute_key in \ + self._coordtype_column_names[ + self.lammps_coordinate_convention] \ + or attribute_key not in self._additional_columns: continue # Else this is an additional field ts.data[attribute_key] = np.empty(self.n_atoms) @@ -717,9 +724,11 @@ def _read_next_timestep(self): ts.forces[i] = [fields[dim] for dim in force_cols] # Add the capability to also collect other data - if len(additional_keys) != 0: # Then there is also more than just the positional data + # Then there is also more than just the positional data + if len(additional_keys) != 0: for attribute_key in additional_keys: - ts.data[attribute_key][i] = fields[attr_to_col_ix[attribute_key]] + ts.data[attribute_key][i] = \ + fields[attr_to_col_ix[attribute_key]] order = np.argsort(indices) ts.positions = ts.positions[order] From 3406e5b691a7dd01463900912e7235906b59eed6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Fri, 6 May 2022 12:47:06 +0200 Subject: [PATCH 04/30] First try at testing the additional column part. --- .../MDAnalysisTests/coordinates/reference.py | 10 +++++-- .../coordinates/test_lammps.py | 24 ++++++++++++++- .../data/lammps/additional_columns.data | 29 +++++++++++++++++++ .../data/lammps/additional_columns.lammpstrj | 19 ++++++++++++ testsuite/MDAnalysisTests/datafiles.py | 4 +++ 5 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 testsuite/MDAnalysisTests/data/lammps/additional_columns.data create mode 100644 testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj diff --git a/testsuite/MDAnalysisTests/coordinates/reference.py b/testsuite/MDAnalysisTests/coordinates/reference.py index 2d2ec036bb5..15306769142 100644 --- a/testsuite/MDAnalysisTests/coordinates/reference.py +++ b/testsuite/MDAnalysisTests/coordinates/reference.py @@ -25,9 +25,9 @@ from MDAnalysisTests import datafiles from MDAnalysisTests.datafiles import (PDB_small, PDB, LAMMPSdata, LAMMPSdata2, LAMMPSdcd2, - LAMMPSdata_mini, PSF_TRICLINIC, - DCD_TRICLINIC, PSF_NAMD_TRICLINIC, - DCD_NAMD_TRICLINIC) + LAMMPSdata_mini, LAMMPSdata_additional_columns, + PSF_TRICLINIC, DCD_TRICLINIC, + PSF_NAMD_TRICLINIC, DCD_NAMD_TRICLINIC) class RefAdKSmall(object): @@ -227,3 +227,7 @@ class RefLAMMPSDataMini(object): dtype=np.float32) dimensions = np.array([60., 50., 30., 90., 90., 90.], dtype=np.float32) + +class RefLAMMPSDataAdditionalColumns(object): + filename = LAMMPSdata_additional_columns + n_atoms = 10 diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index c8afb286fb7..0b561ba05ff 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -34,11 +34,13 @@ from MDAnalysisTests import make_Universe from MDAnalysisTests.coordinates.reference import ( RefLAMMPSData, RefLAMMPSDataMini, RefLAMMPSDataDCD, + RefLAMMPSDataAdditionalColumns ) from MDAnalysisTests.datafiles import ( LAMMPScnt, LAMMPShyd, LAMMPSdata, LAMMPSdata_mini, LAMMPSdata_triclinic, LAMMPSDUMP, LAMMPSDUMP_allcoords, LAMMPSDUMP_nocoords, LAMMPSDUMP_triclinic, - LAMMPSDUMP_image_vf, LAMMPS_image_vf + LAMMPSDUMP_image_vf, LAMMPS_image_vf, LAMMPSdata_additional_columns, + LAMMPSDUMP_additional_columns ) @@ -498,6 +500,7 @@ def u(self, tmpdir, request): # no conversion needed f = LAMMPSDUMP else: + # Select if one wants to use the additional column format f = str(tmpdir.join('lammps.' + trjtype)) with bz2.BZ2File(LAMMPSDUMP, 'rb') as datain: data = datain.read() @@ -511,6 +514,21 @@ def u(self, tmpdir, request): yield mda.Universe(f, format='LAMMPSDUMP', lammps_coordinate_convention="auto") + @pytest.fixture() + def u_add(self): + f = LAMMPSDUMP_additional_columns + top = LAMMPSdata_additional_columns + yield mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns=['q']) + + @pytest.fixture() + def reference_additional_columns(self): + data['charges'] = np.array([2.58855e-03, 6.91952e-05, 1.05548e-02, 4.20319e-03, + 9.19172e-03, 4.79777e-03, 6.36864e-04, 5.87125e-03, + -2.18125e-03, 6.88910e-03]) + return data + @pytest.fixture() def reference_positions(self): # manually copied from traj file @@ -592,6 +610,10 @@ def test_atom_reordering(self, u, reference_positions): assert_allclose(atom1.position, atom1_pos-bmin, atol=1e-5) assert_allclose(atom13.position, atom13_pos-bmin, atol=1e-5) + def test_additional_colunmns(self, u_add, reference_additional_columns): + charges = u.trajectory[0].data['q'] + assert_almost_equal(charges, reference_additional_columns['charges']) + @pytest.mark.parametrize("convention", ["unscaled", "unwrapped", "scaled_unwrapped"]) diff --git a/testsuite/MDAnalysisTests/data/lammps/additional_columns.data b/testsuite/MDAnalysisTests/data/lammps/additional_columns.data new file mode 100644 index 00000000000..80b9a57bb7d --- /dev/null +++ b/testsuite/MDAnalysisTests/data/lammps/additional_columns.data @@ -0,0 +1,29 @@ +LAMMPS data file via write_data, version 24 Mar 2022, timestep = 500 + +10 atoms +1 atom types + +0 42.6 xlo xhi +0 44.2712 ylo yhi +-25.1 25.1 zlo zhi + +Masses + +1 12.011 + +Pair Coeffs # lj/cut/coul/long/omp + +1 0.0663 3.5812 + +Atoms # full + +1 2 1 -0.00706800004577013 2.84 8.17 -25 0 0 0 +2 2 1 0.004078816788554217 7.1 8.17 -25 0 0 0 +3 2 1 -0.005824512619752745 2.13 6.94 -25 0 0 0 +4 2 1 0.002812345167059992 6.39 6.94 -25 0 0 0 +5 2 1 -0.004070019151543417 2.84 5.71 -25 0 0 0 +6 2 1 0.004796116641855679 7.1 5.71 -25 0 0 0 +7 2 1 -0.003217742434809291 2.13 4.48 -25 0 0 0 +8 2 1 0.0008273956785370801 6.39 4.48 -25 0 0 0 +9 2 1 -0.0003942558636157474 2.84 3.25 -25 0 0 0 +10 2 1 0.001288716009147968 7.1 3.25 -25 0 0 0 diff --git a/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj b/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj new file mode 100644 index 00000000000..703e5d248db --- /dev/null +++ b/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj @@ -0,0 +1,19 @@ +ITEM: TIMESTEP +0 +ITEM: NUMBER OF ATOMS +10 +ITEM: BOX BOUNDS pp pp ff +0.0000000000000000e+00 4.2600000000000001e+01 +0.0000000000000000e+00 4.4271200000000000e+01 +-2.5100000000000001e+01 2.5100000000000001e+01 +ITEM: ATOMS id x y z q +1 2.84 8.17 -25 0.00258855 +2 7.1 8.17 -25 6.91952e-05 +3 2.13 6.94 -25 0.0105548 +4 6.39 6.94 -25 0.00420319 +5 2.84 5.71 -25 0.00919172 +6 7.1 5.71 -25 0.00479777 +7 2.13 4.48 -25 0.000636864 +8 6.39 4.48 -25 0.00587125 +9 2.84 3.25 -25 -0.00218125 +10 7.1 3.25 -25 0.0068891 diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 25e85618afe..223d52fa6bb 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -155,6 +155,7 @@ "LAMMPSdata_deletedatoms", # with deleted atoms "LAMMPSdata_triclinic", # lammpsdata file to test triclinic dimension parsing, albite with most atoms deleted "LAMMPSdata_PairIJ", # lammps datafile with a PairIJ Coeffs section + "LAMMPSdata_additional_columns", # structure for the additional column lammpstrj "LAMMPSDUMP", "LAMMPSDUMP_long", # lammpsdump file with a few zeros sprinkled in the first column first frame "LAMMPSDUMP_allcoords", # lammpsdump file with all coordinate conventions (x,xs,xu,xsu) present, from LAMMPS rdf example @@ -165,6 +166,7 @@ "LAMMPSDUMP_chain1", # Lammps dump file with chain reader "LAMMPSDUMP_chain2", # Lammps dump file with chain reader "LAMMPS_chain", # Lammps data file with chain reader + "LAMMPSDUMP_additional_columns", # lammpsdump file with additional data (an additional charge column) "unordered_res", # pdb file with resids non sequential "GMS_ASYMOPT", # GAMESS C1 optimization "GMS_SYMOPT", # GAMESS D4h optimization @@ -544,6 +546,8 @@ LAMMPSDUMP_chain2 = (_data_ref / "lammps/chain_dump_2.lammpstrj").as_posix() LAMMPS_chain = (_data_ref / "lammps/chain_initial.data").as_posix() LAMMPSdata_many_bonds = (_data_ref / "lammps/a_lot_of_bond_types.data").as_posix() +LAMMPSdata_additional_columns = (_data_ref / "lammps/additional_columns.data").as_posix() +LAMMPSDUMP_additional_columns = (_data_ref / "lammps/additional_columns.lammpstrj").as_posix() unordered_res = (_data_ref / "unordered_res.pdb").as_posix() From 87ed9cea5660a8c03debf9b968734b14e0d95b30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Fri, 13 May 2022 11:08:11 +0200 Subject: [PATCH 05/30] Testing multi read columns as well. --- .../coordinates/test_lammps.py | 12 +++++++--- .../data/lammps/additional_columns.lammpstrj | 22 +++++++++---------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index 0b561ba05ff..f57e2a9ad60 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -518,15 +518,19 @@ def u(self, tmpdir, request): def u_add(self): f = LAMMPSDUMP_additional_columns top = LAMMPSdata_additional_columns - yield mda.Universe(top, f, format='LAMMPSDUMP', + yield (mda.Universe(top, f, format='LAMMPSDUMP', lammps_coordinate_convention="auto", - additional_columns=['q']) + additional_columns=['q']), + mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns=['q', 'l'])) @pytest.fixture() def reference_additional_columns(self): data['charges'] = np.array([2.58855e-03, 6.91952e-05, 1.05548e-02, 4.20319e-03, 9.19172e-03, 4.79777e-03, 6.36864e-04, 5.87125e-03, -2.18125e-03, 6.88910e-03]) + data['l'] = np.array([1.1, 1.2]*5) # random test data return data @pytest.fixture() @@ -611,8 +615,10 @@ def test_atom_reordering(self, u, reference_positions): assert_allclose(atom13.position, atom13_pos-bmin, atol=1e-5) def test_additional_colunmns(self, u_add, reference_additional_columns): - charges = u.trajectory[0].data['q'] + charges = u_add[0].trajectory[0].data['q'] # this is the universe with just q + second = u_add[1].trajectory[0].data['l'] # this is the universe with both assert_almost_equal(charges, reference_additional_columns['charges']) + assert_almost_equal(second, reference_additional_columns['l']) @pytest.mark.parametrize("convention", diff --git a/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj b/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj index 703e5d248db..b2d59267968 100644 --- a/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj +++ b/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj @@ -6,14 +6,14 @@ ITEM: BOX BOUNDS pp pp ff 0.0000000000000000e+00 4.2600000000000001e+01 0.0000000000000000e+00 4.4271200000000000e+01 -2.5100000000000001e+01 2.5100000000000001e+01 -ITEM: ATOMS id x y z q -1 2.84 8.17 -25 0.00258855 -2 7.1 8.17 -25 6.91952e-05 -3 2.13 6.94 -25 0.0105548 -4 6.39 6.94 -25 0.00420319 -5 2.84 5.71 -25 0.00919172 -6 7.1 5.71 -25 0.00479777 -7 2.13 4.48 -25 0.000636864 -8 6.39 4.48 -25 0.00587125 -9 2.84 3.25 -25 -0.00218125 -10 7.1 3.25 -25 0.0068891 +ITEM: ATOMS id x y z q l +1 2.84 8.17 -25 0.00258855 1.1 +2 7.1 8.17 -25 6.91952e-05 1.2 +3 2.13 6.94 -25 0.0105548 1.1 +4 6.39 6.94 -25 0.00420319 1.2 +5 2.84 5.71 -25 0.00919172 1.1 +6 7.1 5.71 -25 0.00479777 1.2 +7 2.13 4.48 -25 0.000636864 1.1 +8 6.39 4.48 -25 0.00587125 1.2 +9 2.84 3.25 -25 -0.00218125 1.1 +10 7.1 3.25 -25 0.0068891 1.2 From efca264b68e2238fd8a207dbc25f97e0afefdca8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Fri, 20 May 2022 10:21:15 +0200 Subject: [PATCH 06/30] Fix the no additional columns case. --- package/MDAnalysis/coordinates/LAMMPS.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index ca914d464fa..3ed4477ce70 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -539,6 +539,8 @@ def __init__(self, filename, if additional_columns: self._additional_columns = additional_columns + else: + self._additional_columns = [] self._cache = {} From 2f023953cc9ae841495451ce9b8edf48ac0946ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Tue, 28 Jun 2022 16:47:25 +0200 Subject: [PATCH 07/30] Implemented requested changes to docs. --- package/MDAnalysis/coordinates/LAMMPS.py | 38 ++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 3ed4477ce70..342869befa5 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -465,6 +465,35 @@ class DumpReader(base.ReaderBase): *gamma*)`` to represent the unit cell. Lengths *A*, *B*, *C* are in the MDAnalysis length unit (Å), and angles are in degrees. + By using the keyword `additional_columns`, you can specify arbitrary data to be + read alongside the coordinates. If specified, the keyword expects a list of the + names of the columns that you want to have read. The results of the parsing are + saved to the time step `data` dictionary alongside the name of the data column. + For instance, if you have time-dependent charges saved in a LAMMPS dump such as + + ``` + ITEM: ATOMS id x y z q l + 1 2.84 8.17 -25 0.00258855 1.1 + 2 7.1 8.17 -25 6.91952e-05 1.2 + ``` + + Then you may parse the additional columns `q` and `l` via. + + ``` + u = mda.Universe('path_to_data', 'path_to_lammpsdump', + additional_columns=['q', 'l']) + ``` + + The additional data is then available for each time step via (as the value of + the `data` dictionary, sorted by the ids of the atoms). + + ``` + for ts in u.trajectory: + charges = ts.data['q'] # Access the additional data, sorted by the id + ls = ts.data['l'] + ... + ``` + Parameters ---------- filename : str @@ -508,8 +537,7 @@ class DumpReader(base.ReaderBase): .. versionadded:: 0.19.0 """ format = 'LAMMPSDUMP' - _conventions = ["auto", "unscaled", "scaled", "unwrapped", - "scaled_unwrapped"] + _conventions = ["auto", "unscaled", "scaled", "unwrapped", "scaled_unwrapped"] _coordtype_column_names = { "unscaled": ["x", "y", "z"], @@ -694,10 +722,10 @@ def _read_next_timestep(self): if len(attrs) > 3: for attribute_key in attrs: # Skip the normal columns - if attribute_key == "id" or \ - attribute_key in \ + if attribute_key == "id" or + attribute_key in self._coordtype_column_names[ - self.lammps_coordinate_convention] \ + self.lammps_coordinate_convention] or attribute_key not in self._additional_columns: continue # Else this is an additional field From c8f61dbd0f3abdf066e849f966bbc782ce070e8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Tue, 28 Jun 2022 16:48:12 +0200 Subject: [PATCH 08/30] Implemented the requested changes to the tests. --- .../MDAnalysisTests/coordinates/reference.py | 6 ++++-- .../MDAnalysisTests/coordinates/test_lammps.py | 16 ++++------------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/reference.py b/testsuite/MDAnalysisTests/coordinates/reference.py index 15306769142..5792aeaa129 100644 --- a/testsuite/MDAnalysisTests/coordinates/reference.py +++ b/testsuite/MDAnalysisTests/coordinates/reference.py @@ -229,5 +229,7 @@ class RefLAMMPSDataMini(object): class RefLAMMPSDataAdditionalColumns(object): - filename = LAMMPSdata_additional_columns - n_atoms = 10 + charges = np.array([2.58855e-03, 6.91952e-05, 1.05548e-02, 4.20319e-03, + 9.19172e-03, 4.79777e-03, 6.36864e-04, 5.87125e-03, + -2.18125e-03, 6.88910e-03]) + additional_data = np.array(5 * [1.1, 1.2]) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index f57e2a9ad60..bf47efa96ed 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -515,7 +515,7 @@ def u(self, tmpdir, request): lammps_coordinate_convention="auto") @pytest.fixture() - def u_add(self): + def u_additional_columns(self): f = LAMMPSDUMP_additional_columns top = LAMMPSdata_additional_columns yield (mda.Universe(top, f, format='LAMMPSDUMP', @@ -525,14 +525,6 @@ def u_add(self): lammps_coordinate_convention="auto", additional_columns=['q', 'l'])) - @pytest.fixture() - def reference_additional_columns(self): - data['charges'] = np.array([2.58855e-03, 6.91952e-05, 1.05548e-02, 4.20319e-03, - 9.19172e-03, 4.79777e-03, 6.36864e-04, 5.87125e-03, - -2.18125e-03, 6.88910e-03]) - data['l'] = np.array([1.1, 1.2]*5) # random test data - return data - @pytest.fixture() def reference_positions(self): # manually copied from traj file @@ -614,11 +606,11 @@ def test_atom_reordering(self, u, reference_positions): assert_allclose(atom1.position, atom1_pos-bmin, atol=1e-5) assert_allclose(atom13.position, atom13_pos-bmin, atol=1e-5) - def test_additional_colunmns(self, u_add, reference_additional_columns): + def test_additional_columns(self, u_add, reference_additional_columns): charges = u_add[0].trajectory[0].data['q'] # this is the universe with just q second = u_add[1].trajectory[0].data['l'] # this is the universe with both - assert_almost_equal(charges, reference_additional_columns['charges']) - assert_almost_equal(second, reference_additional_columns['l']) + assert_almost_equal(charges, RefLAMMPSDataAdditionalColumns.charges) + assert_almost_equal(second, RefLAMMPSDataAdditionalColumns.additional_data) @pytest.mark.parametrize("convention", From 140b0627ebdae905b0088dc7f906ec21f7995643 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Tue, 28 Jun 2022 17:14:51 +0200 Subject: [PATCH 09/30] Incorporated the PEP8 comments. --- package/MDAnalysis/coordinates/LAMMPS.py | 50 ++++++++++++------- .../MDAnalysisTests/coordinates/reference.py | 7 +-- .../coordinates/test_lammps.py | 22 ++++---- 3 files changed, 48 insertions(+), 31 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 342869befa5..41e6dcbecfb 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -458,18 +458,30 @@ class DumpReader(base.ReaderBase): """Reads the default `LAMMPS dump format `__ - Supports coordinates in various LAMMPS coordinate conventions and both - orthogonal and triclinic simulation box dimensions (for more details see - `documentation `__). In - either case, MDAnalysis will always use ``(*A*, *B*, *C*, *alpha*, *beta*, - *gamma*)`` to represent the unit cell. Lengths *A*, *B*, *C* are in the - MDAnalysis length unit (Å), and angles are in degrees. - - By using the keyword `additional_columns`, you can specify arbitrary data to be - read alongside the coordinates. If specified, the keyword expects a list of the - names of the columns that you want to have read. The results of the parsing are - saved to the time step `data` dictionary alongside the name of the data column. - For instance, if you have time-dependent charges saved in a LAMMPS dump such as + Supports coordinates in the LAMMPS "unscaled" (x,y,z), "scaled" (xs,ys,zs), + "unwrapped" (xu,yu,zu) and "scaled_unwrapped" (xsu,ysu,zsu) coordinate + conventions (see https://docs.lammps.org/dump.html for more details). + If `lammps_coordinate_convention='auto'` (default), + one will be guessed. Guessing checks whether the coordinates fit each + convention in the order "unscaled", "scaled", "unwrapped", + "scaled_unwrapped" and whichever set of coordinates is detected first will + be used. If coordinates are given in the scaled coordinate convention + (xs,ys,zs) or scaled unwrapped coordinate convention (xsu,ysu,zsu) they + will automatically be converted from their scaled/fractional representation + to their real values. + + Supports both orthogonal and triclinic simulation box dimensions (for more + details see https://docs.lammps.org/Howto_triclinic.html). In either case, + MDAnalysis will always use ``(*A*, *B*, *C*, *alpha*, *beta*, *gamma*)`` + to represent the unit cell. Lengths *A*, *B*, *C* are in the MDAnalysis + length unit (Å), and angles are in degrees. + + By using the keyword `additional_columns`, you can specify arbitrary data + to be read alongside the coordinates. If specified, the keyword expects a + list of the names of the columns that you want to have read. The results + of the parsing are saved to the time step `data` dictionary alongside the + name of the data column. For instance, if you have time-dependent charges + saved in a LAMMPS dump such as ``` ITEM: ATOMS id x y z q l @@ -484,8 +496,8 @@ class DumpReader(base.ReaderBase): additional_columns=['q', 'l']) ``` - The additional data is then available for each time step via (as the value of - the `data` dictionary, sorted by the ids of the atoms). + The additional data is then available for each time step via + (as the value of the `data` dictionary, sorted by the ids of the atoms). ``` for ts in u.trajectory: @@ -722,11 +734,11 @@ def _read_next_timestep(self): if len(attrs) > 3: for attribute_key in attrs: # Skip the normal columns - if attribute_key == "id" or - attribute_key in - self._coordtype_column_names[ - self.lammps_coordinate_convention] - or attribute_key not in self._additional_columns: + if (attribute_key == "id" or + attribute_key in + self._coordtype_column_names[ + self.lammps_coordinate_convention] + or attribute_key not in self._additional_columns): continue # Else this is an additional field ts.data[attribute_key] = np.empty(self.n_atoms) diff --git a/testsuite/MDAnalysisTests/coordinates/reference.py b/testsuite/MDAnalysisTests/coordinates/reference.py index 5792aeaa129..dc65d8c2af2 100644 --- a/testsuite/MDAnalysisTests/coordinates/reference.py +++ b/testsuite/MDAnalysisTests/coordinates/reference.py @@ -25,8 +25,9 @@ from MDAnalysisTests import datafiles from MDAnalysisTests.datafiles import (PDB_small, PDB, LAMMPSdata, LAMMPSdata2, LAMMPSdcd2, - LAMMPSdata_mini, LAMMPSdata_additional_columns, - PSF_TRICLINIC, DCD_TRICLINIC, + LAMMPSdata_mini, + LAMMPSdata_additional_columns, + PSF_TRICLINIC, DCD_TRICLINIC, PSF_NAMD_TRICLINIC, DCD_NAMD_TRICLINIC) @@ -230,6 +231,6 @@ class RefLAMMPSDataMini(object): class RefLAMMPSDataAdditionalColumns(object): charges = np.array([2.58855e-03, 6.91952e-05, 1.05548e-02, 4.20319e-03, - 9.19172e-03, 4.79777e-03, 6.36864e-04, 5.87125e-03, + 9.19172e-03, 4.79777e-03, 6.36864e-04, 5.87125e-03, -2.18125e-03, 6.88910e-03]) additional_data = np.array(5 * [1.1, 1.2]) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index bf47efa96ed..b33a1c6f587 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -519,11 +519,11 @@ def u_additional_columns(self): f = LAMMPSDUMP_additional_columns top = LAMMPSdata_additional_columns yield (mda.Universe(top, f, format='LAMMPSDUMP', - lammps_coordinate_convention="auto", - additional_columns=['q']), - mda.Universe(top, f, format='LAMMPSDUMP', - lammps_coordinate_convention="auto", - additional_columns=['q', 'l'])) + lammps_coordinate_convention="auto", + additional_columns=['q']), + mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns=['q', 'l'])) @pytest.fixture() def reference_positions(self): @@ -607,10 +607,14 @@ def test_atom_reordering(self, u, reference_positions): assert_allclose(atom13.position, atom13_pos-bmin, atol=1e-5) def test_additional_columns(self, u_add, reference_additional_columns): - charges = u_add[0].trajectory[0].data['q'] # this is the universe with just q - second = u_add[1].trajectory[0].data['l'] # this is the universe with both - assert_almost_equal(charges, RefLAMMPSDataAdditionalColumns.charges) - assert_almost_equal(second, RefLAMMPSDataAdditionalColumns.additional_data) + # this is the universe with just q + charges = u_add[0].trajectory[0].data['q'] + # this is the universe with both + second = u_add[1].trajectory[0].data['l'] + assert_almost_equal(charges, + RefLAMMPSDataAdditionalColumns.charges) + assert_almost_equal(second, + RefLAMMPSDataAdditionalColumns.additional_data) @pytest.mark.parametrize("convention", From 03654040c94134920134bdc19705589564a9253b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Tue, 28 Jun 2022 17:19:56 +0200 Subject: [PATCH 10/30] Third round of PEP... --- package/MDAnalysis/coordinates/LAMMPS.py | 10 ++++++---- testsuite/MDAnalysisTests/coordinates/test_lammps.py | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 41e6dcbecfb..4264b00e379 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -492,7 +492,7 @@ class DumpReader(base.ReaderBase): Then you may parse the additional columns `q` and `l` via. ``` - u = mda.Universe('path_to_data', 'path_to_lammpsdump', + u = mda.Universe('path_to_data', 'path_to_lammpsdump', additional_columns=['q', 'l']) ``` @@ -549,7 +549,8 @@ class DumpReader(base.ReaderBase): .. versionadded:: 0.19.0 """ format = 'LAMMPSDUMP' - _conventions = ["auto", "unscaled", "scaled", "unwrapped", "scaled_unwrapped"] + _conventions = ["auto", "unscaled", "scaled", "unwrapped", + "scaled_unwrapped"] _coordtype_column_names = { "unscaled": ["x", "y", "z"], @@ -737,8 +738,9 @@ def _read_next_timestep(self): if (attribute_key == "id" or attribute_key in self._coordtype_column_names[ - self.lammps_coordinate_convention] - or attribute_key not in self._additional_columns): + self.lammps_coordinate_convention + ] or + attribute_key not in self._additional_columns): continue # Else this is an additional field ts.data[attribute_key] = np.empty(self.n_atoms) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index b33a1c6f587..187c2513c97 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -608,12 +608,12 @@ def test_atom_reordering(self, u, reference_positions): def test_additional_columns(self, u_add, reference_additional_columns): # this is the universe with just q - charges = u_add[0].trajectory[0].data['q'] + charges = u_add[0].trajectory[0].data['q'] # this is the universe with both second = u_add[1].trajectory[0].data['l'] - assert_almost_equal(charges, + assert_almost_equal(charges, RefLAMMPSDataAdditionalColumns.charges) - assert_almost_equal(second, + assert_almost_equal(second, RefLAMMPSDataAdditionalColumns.additional_data) From 138d5694e286abadaac42a7f3234fd00013c0f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Tue, 28 Jun 2022 17:24:25 +0200 Subject: [PATCH 11/30] PEP8 ... --- package/MDAnalysis/coordinates/LAMMPS.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 4264b00e379..38aafe51e91 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -736,11 +736,10 @@ def _read_next_timestep(self): for attribute_key in attrs: # Skip the normal columns if (attribute_key == "id" or - attribute_key in - self._coordtype_column_names[ - self.lammps_coordinate_convention - ] or - attribute_key not in self._additional_columns): + attribute_key in + self._coordtype_column_names[ + self.lammps_coordinate_convention + ] or attribute_key not in self._additional_columns): continue # Else this is an additional field ts.data[attribute_key] = np.empty(self.n_atoms) From 5ab661a80a451958e8c3447790ccf03a7064dd00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Tue, 28 Jun 2022 17:30:37 +0200 Subject: [PATCH 12/30] Authors and changelog. --- package/AUTHORS | 1 + package/CHANGELOG | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/package/AUTHORS b/package/AUTHORS index c413c90462d..e53fd78cafe 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -222,6 +222,7 @@ Chronological list of authors - Shubham Kumar - Zaheer Timol - Geongi Moon + - Philipp Stärk External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index bca8cc647ad..0fe870bc440 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,7 +15,7 @@ The rules for this file: ------------------------------------------------------------------------------- ??/??/?? IAlibay, ianmkenney, PicoCentauri, pgbarletta, p-j-smith, - richardjgowers, lilyminium + richardjgowers, lilyminium, pstaerk * 2.7.0 @@ -29,6 +29,7 @@ Fixes * Fixes hydrogenbonds tutorial path to point to hbonds (Issue #4285, PR #4286) Enhancements + * Added parsing of arbitrary columns of the LAMMPS dump parser. * Adds external sidebar links (Issue #4296) * Updated lib.qcprot.CalcRMSDRotationalMatrix to accept either float32 or float64 inputs (PR #4273, part of #3927) From a785ceccdac7ba91238098b04dced165f514349a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Wed, 29 Jun 2022 11:00:02 +0200 Subject: [PATCH 13/30] Variable renaming issue. --- testsuite/MDAnalysisTests/coordinates/test_lammps.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index 187c2513c97..ee4361fe203 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -606,11 +606,11 @@ def test_atom_reordering(self, u, reference_positions): assert_allclose(atom1.position, atom1_pos-bmin, atol=1e-5) assert_allclose(atom13.position, atom13_pos-bmin, atol=1e-5) - def test_additional_columns(self, u_add, reference_additional_columns): + def test_additional_columns(self, u_additional_columns, reference_additional_columns): # this is the universe with just q - charges = u_add[0].trajectory[0].data['q'] + charges = u_additional_columns[0].trajectory[0].data['q'] # this is the universe with both - second = u_add[1].trajectory[0].data['l'] + second = u_additional_columns[1].trajectory[0].data['l'] assert_almost_equal(charges, RefLAMMPSDataAdditionalColumns.charges) assert_almost_equal(second, From ade564780061a4f851087a78386240b9687fac19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Wed, 29 Jun 2022 14:17:36 +0200 Subject: [PATCH 14/30] Hopefully fixed documentation. --- package/MDAnalysis/coordinates/LAMMPS.py | 25 +++++++++++------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 38aafe51e91..05867732f01 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -483,28 +483,25 @@ class DumpReader(base.ReaderBase): name of the data column. For instance, if you have time-dependent charges saved in a LAMMPS dump such as - ``` - ITEM: ATOMS id x y z q l - 1 2.84 8.17 -25 0.00258855 1.1 - 2 7.1 8.17 -25 6.91952e-05 1.2 - ``` + .. code-block:: python + ITEM: ATOMS id x y z q l + 1 2.84 8.17 -25 0.00258855 1.1 + 2 7.1 8.17 -25 6.91952e-05 1.2 Then you may parse the additional columns `q` and `l` via. - ``` - u = mda.Universe('path_to_data', 'path_to_lammpsdump', - additional_columns=['q', 'l']) - ``` + .. code-block:: python + u = mda.Universe('path_to_data', 'path_to_lammpsdump', + additional_columns=['q', 'l']) The additional data is then available for each time step via (as the value of the `data` dictionary, sorted by the ids of the atoms). - ``` - for ts in u.trajectory: - charges = ts.data['q'] # Access the additional data, sorted by the id - ls = ts.data['l'] + .. code-block:: python + for ts in u.trajectory: + charges = ts.data['q'] # Access the additional data, sorted by the id + ls = ts.data['l'] ... - ``` Parameters ---------- From 603116a708d129736fbd2aefff23369d26e1579d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Wed, 29 Jun 2022 14:50:01 +0200 Subject: [PATCH 15/30] Sphinx --- package/MDAnalysis/coordinates/LAMMPS.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 05867732f01..e637289f7d9 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -484,6 +484,7 @@ class DumpReader(base.ReaderBase): saved in a LAMMPS dump such as .. code-block:: python + ITEM: ATOMS id x y z q l 1 2.84 8.17 -25 0.00258855 1.1 2 7.1 8.17 -25 6.91952e-05 1.2 @@ -491,6 +492,7 @@ class DumpReader(base.ReaderBase): Then you may parse the additional columns `q` and `l` via. .. code-block:: python + u = mda.Universe('path_to_data', 'path_to_lammpsdump', additional_columns=['q', 'l']) @@ -498,6 +500,7 @@ class DumpReader(base.ReaderBase): (as the value of the `data` dictionary, sorted by the ids of the atoms). .. code-block:: python + for ts in u.trajectory: charges = ts.data['q'] # Access the additional data, sorted by the id ls = ts.data['l'] From bc9296ce6b8d63d5ffdf2d86089d7b9a242e4760 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Tue, 8 Nov 2022 10:39:10 +0100 Subject: [PATCH 16/30] Hopefully fixed the tests --- testsuite/MDAnalysisTests/coordinates/test_lammps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index ee4361fe203..ca533aa937b 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -606,7 +606,7 @@ def test_atom_reordering(self, u, reference_positions): assert_allclose(atom1.position, atom1_pos-bmin, atol=1e-5) assert_allclose(atom13.position, atom13_pos-bmin, atol=1e-5) - def test_additional_columns(self, u_additional_columns, reference_additional_columns): + def test_additional_columns(self, u_additional_columns): # this is the universe with just q charges = u_additional_columns[0].trajectory[0].data['q'] # this is the universe with both From d6d54263259d6968b0d03d3f488126de7d59df21 Mon Sep 17 00:00:00 2001 From: hejamu Date: Fri, 29 Sep 2023 18:22:11 +0100 Subject: [PATCH 17/30] Implement input from UGM23 --- package/MDAnalysis/coordinates/LAMMPS.py | 61 +++++++++++++----------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index e637289f7d9..14781229158 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -479,11 +479,11 @@ class DumpReader(base.ReaderBase): By using the keyword `additional_columns`, you can specify arbitrary data to be read alongside the coordinates. If specified, the keyword expects a list of the names of the columns that you want to have read. The results - of the parsing are saved to the time step `data` dictionary alongside the - name of the data column. For instance, if you have time-dependent charges - saved in a LAMMPS dump such as + of the parsing are saved to the time step :attr:`Timestep.data` dictionary + alongside the name of the data column. For instance, if you have time-dependent + charges saved in a LAMMPS dump such as - .. code-block:: python + .. code-block:: ITEM: ATOMS id x y z q l 1 2.84 8.17 -25 0.00258855 1.1 @@ -497,7 +497,8 @@ class DumpReader(base.ReaderBase): additional_columns=['q', 'l']) The additional data is then available for each time step via - (as the value of the `data` dictionary, sorted by the ids of the atoms). + (as the value of the :attr:`Timestep.data` dictionary, sorted by the ids of the + atoms). .. code-block:: python @@ -538,6 +539,9 @@ class DumpReader(base.ReaderBase): **kwargs Other keyword arguments used in :class:`~MDAnalysis.coordinates.base.ReaderBase` + .. versionchanged:: 2.7.0 + Reading of arbitrary, additional columns is now supported. + (Issue `#3608 `__) .. versionchanged:: 2.4.0 Now imports velocities and forces, translates the box to the origin, and optionally unwraps trajectories with image flags upon loading. @@ -559,11 +563,15 @@ class DumpReader(base.ReaderBase): "scaled_unwrapped": ["xsu", "ysu", "zsu"] } + _parsable_columns = ["id", "vx", "vy", "vz", "fx", "fy", "fz"] + for key in _coordtype_column_names.keys(): + _parsable_columns += _coordtype_column_names[key] + @store_init_arguments def __init__(self, filename, lammps_coordinate_convention="auto", unwrap_images=False, - additional_columns=False, **kwargs): + additional_columns=None, **kwargs): super(DumpReader, self).__init__(filename, **kwargs) root, ext = os.path.splitext(self.filename) @@ -578,10 +586,15 @@ def __init__(self, filename, self._unwrap = unwrap_images - if additional_columns: + if (util.iterable(additional_columns) + or additional_columns is None + or additional_columns is True): self._additional_columns = additional_columns else: - self._additional_columns = [] + raise ValueError(f"additional_columns={additional_columns} " + "is not a valid option. Pleae provide an" + "iterable containing the additional" + "coloum headers.") self._cache = {} @@ -731,19 +744,15 @@ def _read_next_timestep(self): # Create the data arrays for additional attributes which will be saved # under ts.data - additional_keys = [] - if len(attrs) > 3: - for attribute_key in attrs: - # Skip the normal columns - if (attribute_key == "id" or - attribute_key in - self._coordtype_column_names[ - self.lammps_coordinate_convention - ] or attribute_key not in self._additional_columns): - continue - # Else this is an additional field - ts.data[attribute_key] = np.empty(self.n_atoms) - additional_keys.append(attribute_key) + if self._additional_columns is True: + # Parse every column that is not already parsed elsewhere (total \ parsable) + additional_keys = set(attrs).difference(self._parsable_columns) + elif self._additional_columns: + additional_keys = [key for key in self._additional_columns if key in attrs] + else: + additional_keys = [] + for key in additional_keys: + ts.data[key] = np.empty(self.n_atoms) # Parse all the atoms for i in range(self.n_atoms): @@ -766,12 +775,10 @@ def _read_next_timestep(self): if self._has_forces: ts.forces[i] = [fields[dim] for dim in force_cols] - # Add the capability to also collect other data - # Then there is also more than just the positional data - if len(additional_keys) != 0: - for attribute_key in additional_keys: - ts.data[attribute_key][i] = \ - fields[attr_to_col_ix[attribute_key]] + # Collect additional cols + for attribute_key in additional_keys: + ts.data[attribute_key][i] = \ + fields[attr_to_col_ix[attribute_key]] order = np.argsort(indices) ts.positions = ts.positions[order] From 27cb7beec246447f776ea57b5d1d2ecb2a9c768e Mon Sep 17 00:00:00 2001 From: hejamu Date: Fri, 29 Sep 2023 18:22:39 +0100 Subject: [PATCH 18/30] refine tests --- package/CHANGELOG | 2 +- package/MDAnalysis/coordinates/LAMMPS.py | 22 +++++---- .../MDAnalysisTests/coordinates/reference.py | 8 ++-- .../coordinates/test_lammps.py | 45 ++++++++++++------- 4 files changed, 45 insertions(+), 32 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 0fe870bc440..0401c85f794 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -29,7 +29,7 @@ Fixes * Fixes hydrogenbonds tutorial path to point to hbonds (Issue #4285, PR #4286) Enhancements - * Added parsing of arbitrary columns of the LAMMPS dump parser. + * Added parsing of arbitrary columns of the LAMMPS dump parser. (Issue #3504) * Adds external sidebar links (Issue #4296) * Updated lib.qcprot.CalcRMSDRotationalMatrix to accept either float32 or float64 inputs (PR #4273, part of #3927) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 14781229158..6de4a8833d2 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -477,11 +477,9 @@ class DumpReader(base.ReaderBase): length unit (Å), and angles are in degrees. By using the keyword `additional_columns`, you can specify arbitrary data - to be read alongside the coordinates. If specified, the keyword expects a - list of the names of the columns that you want to have read. The results - of the parsing are saved to the time step :attr:`Timestep.data` dictionary - alongside the name of the data column. For instance, if you have time-dependent - charges saved in a LAMMPS dump such as + to be read. The keyword expects a list of the names of the columns or `True` to read + all additional columns. The results are saved to :attr:`Timestep.data`. + For example, if your LAMMPS dump looks like this .. code-block:: @@ -489,16 +487,14 @@ class DumpReader(base.ReaderBase): 1 2.84 8.17 -25 0.00258855 1.1 2 7.1 8.17 -25 6.91952e-05 1.2 - Then you may parse the additional columns `q` and `l` via. + Then you may parse the additional columns `q` and `l` via: .. code-block:: python u = mda.Universe('path_to_data', 'path_to_lammpsdump', additional_columns=['q', 'l']) - The additional data is then available for each time step via - (as the value of the :attr:`Timestep.data` dictionary, sorted by the ids of the - atoms). + The additional data is then available for each time step via: .. code-block:: python @@ -587,7 +583,7 @@ def __init__(self, filename, self._unwrap = unwrap_images if (util.iterable(additional_columns) - or additional_columns is None + or additional_columns is None or additional_columns is True): self._additional_columns = additional_columns else: @@ -745,10 +741,12 @@ def _read_next_timestep(self): # Create the data arrays for additional attributes which will be saved # under ts.data if self._additional_columns is True: - # Parse every column that is not already parsed elsewhere (total \ parsable) + # Parse every column that is not already parsed + # elsewhere (total \ parsable) additional_keys = set(attrs).difference(self._parsable_columns) elif self._additional_columns: - additional_keys = [key for key in self._additional_columns if key in attrs] + additional_keys = \ + [key for key in self._additional_columns if key in attrs] else: additional_keys = [] for key in additional_keys: diff --git a/testsuite/MDAnalysisTests/coordinates/reference.py b/testsuite/MDAnalysisTests/coordinates/reference.py index dc65d8c2af2..5f18cf23166 100644 --- a/testsuite/MDAnalysisTests/coordinates/reference.py +++ b/testsuite/MDAnalysisTests/coordinates/reference.py @@ -230,7 +230,7 @@ class RefLAMMPSDataMini(object): class RefLAMMPSDataAdditionalColumns(object): - charges = np.array([2.58855e-03, 6.91952e-05, 1.05548e-02, 4.20319e-03, - 9.19172e-03, 4.79777e-03, 6.36864e-04, 5.87125e-03, - -2.18125e-03, 6.88910e-03]) - additional_data = np.array(5 * [1.1, 1.2]) + q = np.array([2.58855e-03, 6.91952e-05, 1.05548e-02, 4.20319e-03, + 9.19172e-03, 4.79777e-03, 6.36864e-04, 5.87125e-03, + -2.18125e-03, 6.88910e-03]) + l = np.array(5 * [1.1, 1.2]) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index ca533aa937b..8a2a3b8bf93 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -29,7 +29,7 @@ import MDAnalysis as mda from MDAnalysis import NoDataError -from numpy.testing import (assert_equal, assert_allclose, assert_allclose) +from numpy.testing import (assert_equal, assert_allclose) from MDAnalysisTests import make_Universe from MDAnalysisTests.coordinates.reference import ( @@ -515,15 +515,28 @@ def u(self, tmpdir, request): lammps_coordinate_convention="auto") @pytest.fixture() - def u_additional_columns(self): + def u_additional_columns_true(self): f = LAMMPSDUMP_additional_columns top = LAMMPSdata_additional_columns - yield (mda.Universe(top, f, format='LAMMPSDUMP', + return mda.Universe(top, f, format='LAMMPSDUMP', lammps_coordinate_convention="auto", - additional_columns=['q']), - mda.Universe(top, f, format='LAMMPSDUMP', + additional_columns=True) + + @pytest.fixture() + def u_additional_columns_single(self): + f = LAMMPSDUMP_additional_columns + top = LAMMPSdata_additional_columns + return mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns=['q']) + + @pytest.fixture() + def u_additional_columns_multiple(self): + f = LAMMPSDUMP_additional_columns + top = LAMMPSdata_additional_columns + return mda.Universe(top, f, format='LAMMPSDUMP', lammps_coordinate_convention="auto", - additional_columns=['q', 'l'])) + additional_columns=['q', 'l']) @pytest.fixture() def reference_positions(self): @@ -606,15 +619,17 @@ def test_atom_reordering(self, u, reference_positions): assert_allclose(atom1.position, atom1_pos-bmin, atol=1e-5) assert_allclose(atom13.position, atom13_pos-bmin, atol=1e-5) - def test_additional_columns(self, u_additional_columns): - # this is the universe with just q - charges = u_additional_columns[0].trajectory[0].data['q'] - # this is the universe with both - second = u_additional_columns[1].trajectory[0].data['l'] - assert_almost_equal(charges, - RefLAMMPSDataAdditionalColumns.charges) - assert_almost_equal(second, - RefLAMMPSDataAdditionalColumns.additional_data) + @pytest.mark.parametrize("system, fields", [ + ('u_additional_columns_true', ['q', 'l']), + ('u_additional_columns_single', ['q']), + ('u_additional_columns_multiple', ['q', 'l']), + ]) + def test_additional_columns(self, system, fields, request): + u = request.getfixturevalue(system) + for field in fields: + data = u.trajectory[0].data[field] + assert_allclose(data, + getattr(RefLAMMPSDataAdditionalColumns, field)) @pytest.mark.parametrize("convention", From 1e8557bfea318b531af45ae6a6c35cb0b81acbde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?PSt=C3=A4rk?= Date: Mon, 26 Feb 2024 14:42:38 +0100 Subject: [PATCH 19/30] Small typo Co-authored-by: Hugo MacDermott-Opeskin --- package/MDAnalysis/coordinates/LAMMPS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 6de4a8833d2..f16c0da780d 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -590,7 +590,7 @@ def __init__(self, filename, raise ValueError(f"additional_columns={additional_columns} " "is not a valid option. Pleae provide an" "iterable containing the additional" - "coloum headers.") + "column headers.") self._cache = {} From 07d0c18c950d50941ece87d33da8f3c9994b68b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?PSt=C3=A4rk?= Date: Mon, 26 Feb 2024 14:45:18 +0100 Subject: [PATCH 20/30] Removed comment --- testsuite/MDAnalysisTests/coordinates/test_lammps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index 8a2a3b8bf93..83a212590a7 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -500,7 +500,6 @@ def u(self, tmpdir, request): # no conversion needed f = LAMMPSDUMP else: - # Select if one wants to use the additional column format f = str(tmpdir.join('lammps.' + trjtype)) with bz2.BZ2File(LAMMPSDUMP, 'rb') as datain: data = datain.read() From 5f36ff49becac589de27ffd097af0e4f601878e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Mon, 26 Feb 2024 15:04:41 +0100 Subject: [PATCH 21/30] Addressed hmacdope's comments regarding issue link --- package/MDAnalysis/coordinates/LAMMPS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index f16c0da780d..4476c7a4582 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -537,7 +537,7 @@ class DumpReader(base.ReaderBase): .. versionchanged:: 2.7.0 Reading of arbitrary, additional columns is now supported. - (Issue `#3608 `__) + (Issue #3608) .. versionchanged:: 2.4.0 Now imports velocities and forces, translates the box to the origin, and optionally unwraps trajectories with image flags upon loading. From e45e764527c1a95a452b5a09c211819a43ad2fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Mon, 26 Feb 2024 15:07:14 +0100 Subject: [PATCH 22/30] Addressed hmacdope's comments regarding file paths. --- package/MDAnalysis/coordinates/LAMMPS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 4476c7a4582..facf6c17038 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -491,7 +491,7 @@ class DumpReader(base.ReaderBase): .. code-block:: python - u = mda.Universe('path_to_data', 'path_to_lammpsdump', + u = mda.Universe('structure.data', 'traj.lammpsdump', additional_columns=['q', 'l']) The additional data is then available for each time step via: From 546dd43bfc5d29aa25e67fb37465bbe1cfce5f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Mon, 26 Feb 2024 15:15:13 +0100 Subject: [PATCH 23/30] Added warning if keys are not in lammpsdump file. --- package/MDAnalysis/coordinates/LAMMPS.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index facf6c17038..bc125232e12 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -135,6 +135,7 @@ from ..topology.LAMMPSParser import DATAParser from ..exceptions import NoDataError from . import base +import warnings btype_sections = {'bond':'Bonds', 'angle':'Angles', 'dihedral':'Dihedrals', 'improper':'Impropers'} @@ -745,6 +746,9 @@ def _read_next_timestep(self): # elsewhere (total \ parsable) additional_keys = set(attrs).difference(self._parsable_columns) elif self._additional_columns: + if not all([key in attrs for key in self._additional_columns]): + warnings.warn("Some of the additional columns are not present " + "in the file, they will be ignored") additional_keys = \ [key for key in self._additional_columns if key in attrs] else: From adec7943a620d40cc7beea5657b619c389f69751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Mon, 26 Feb 2024 15:30:59 +0100 Subject: [PATCH 24/30] Tested the formatting error of additional_columns. --- package/MDAnalysis/coordinates/LAMMPS.py | 2 +- .../MDAnalysisTests/coordinates/test_lammps.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index bc125232e12..516ebb96062 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -589,7 +589,7 @@ def __init__(self, filename, self._additional_columns = additional_columns else: raise ValueError(f"additional_columns={additional_columns} " - "is not a valid option. Pleae provide an" + "is not a valid option. Please provide an " "iterable containing the additional" "column headers.") diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index 83a212590a7..5aef48e6aa4 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -537,6 +537,14 @@ def u_additional_columns_multiple(self): lammps_coordinate_convention="auto", additional_columns=['q', 'l']) + @pytest.fixture() + def u_additional_columns_wrong_format(self): + f = LAMMPSDUMP_additional_columns + top = LAMMPSdata_additional_columns + return mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns='q') + @pytest.fixture() def reference_positions(self): # manually copied from traj file @@ -630,6 +638,13 @@ def test_additional_columns(self, system, fields, request): assert_allclose(data, getattr(RefLAMMPSDataAdditionalColumns, field)) + @pytest.mark.parametrize("system", [ + ('u_additional_columns_wrong_format'), + ]) + def test_wrong_format_additional_colums(self, system, request): + with pytest.raises(ValueError, + match="Please provide an iterable containing"): + request.getfixturevalue(system) @pytest.mark.parametrize("convention", ["unscaled", "unwrapped", "scaled_unwrapped"]) From 57b895d6d882e1e2e4cd7aada745e7db59176db6 Mon Sep 17 00:00:00 2001 From: hejamu Date: Tue, 27 Feb 2024 22:27:06 +0100 Subject: [PATCH 25/30] Make changed lines comply with pep8 --- package/MDAnalysis/coordinates/LAMMPS.py | 8 ++++---- testsuite/MDAnalysisTests/coordinates/reference.py | 2 +- .../MDAnalysisTests/coordinates/test_lammps.py | 4 ++-- testsuite/MDAnalysisTests/datafiles.py | 14 ++++++++++---- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 516ebb96062..77040033414 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -478,9 +478,9 @@ class DumpReader(base.ReaderBase): length unit (Å), and angles are in degrees. By using the keyword `additional_columns`, you can specify arbitrary data - to be read. The keyword expects a list of the names of the columns or `True` to read - all additional columns. The results are saved to :attr:`Timestep.data`. - For example, if your LAMMPS dump looks like this + to be read. The keyword expects a list of the names of the columns or `True` + to read all additional columns. The results are saved to + :attr:`Timestep.data`. For example, if your LAMMPS dump looks like this .. code-block:: @@ -500,7 +500,7 @@ class DumpReader(base.ReaderBase): .. code-block:: python for ts in u.trajectory: - charges = ts.data['q'] # Access the additional data, sorted by the id + charges = ts.data['q'] # Access additional data, sorted by the id ls = ts.data['l'] ... diff --git a/testsuite/MDAnalysisTests/coordinates/reference.py b/testsuite/MDAnalysisTests/coordinates/reference.py index 5f18cf23166..8c523c639e1 100644 --- a/testsuite/MDAnalysisTests/coordinates/reference.py +++ b/testsuite/MDAnalysisTests/coordinates/reference.py @@ -233,4 +233,4 @@ class RefLAMMPSDataAdditionalColumns(object): q = np.array([2.58855e-03, 6.91952e-05, 1.05548e-02, 4.20319e-03, 9.19172e-03, 4.79777e-03, 6.36864e-04, 5.87125e-03, -2.18125e-03, 6.88910e-03]) - l = np.array(5 * [1.1, 1.2]) + p = np.array(5 * [1.1, 1.2]) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index 5aef48e6aa4..424c179f75f 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -627,9 +627,9 @@ def test_atom_reordering(self, u, reference_positions): assert_allclose(atom13.position, atom13_pos-bmin, atol=1e-5) @pytest.mark.parametrize("system, fields", [ - ('u_additional_columns_true', ['q', 'l']), + ('u_additional_columns_true', ['q', 'p']), ('u_additional_columns_single', ['q']), - ('u_additional_columns_multiple', ['q', 'l']), + ('u_additional_columns_multiple', ['q', 'p']), ]) def test_additional_columns(self, system, fields, request): u = request.getfixturevalue(system) diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 223d52fa6bb..e060ce66f69 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -155,7 +155,8 @@ "LAMMPSdata_deletedatoms", # with deleted atoms "LAMMPSdata_triclinic", # lammpsdata file to test triclinic dimension parsing, albite with most atoms deleted "LAMMPSdata_PairIJ", # lammps datafile with a PairIJ Coeffs section - "LAMMPSdata_additional_columns", # structure for the additional column lammpstrj + # structure for the additional column lammpstrj + "LAMMPSdata_additional_columns", "LAMMPSDUMP", "LAMMPSDUMP_long", # lammpsdump file with a few zeros sprinkled in the first column first frame "LAMMPSDUMP_allcoords", # lammpsdump file with all coordinate conventions (x,xs,xu,xsu) present, from LAMMPS rdf example @@ -166,7 +167,8 @@ "LAMMPSDUMP_chain1", # Lammps dump file with chain reader "LAMMPSDUMP_chain2", # Lammps dump file with chain reader "LAMMPS_chain", # Lammps data file with chain reader - "LAMMPSDUMP_additional_columns", # lammpsdump file with additional data (an additional charge column) + # lammpsdump file with additional data (an additional charge column) + "LAMMPSDUMP_additional_columns", "unordered_res", # pdb file with resids non sequential "GMS_ASYMOPT", # GAMESS C1 optimization "GMS_SYMOPT", # GAMESS D4h optimization @@ -546,8 +548,12 @@ LAMMPSDUMP_chain2 = (_data_ref / "lammps/chain_dump_2.lammpstrj").as_posix() LAMMPS_chain = (_data_ref / "lammps/chain_initial.data").as_posix() LAMMPSdata_many_bonds = (_data_ref / "lammps/a_lot_of_bond_types.data").as_posix() -LAMMPSdata_additional_columns = (_data_ref / "lammps/additional_columns.data").as_posix() -LAMMPSDUMP_additional_columns = (_data_ref / "lammps/additional_columns.lammpstrj").as_posix() +LAMMPSdata_additional_columns = ( + _data_ref / "lammps/additional_columns.data" +).as_posix() +LAMMPSDUMP_additional_columns = ( + _data_ref / "lammps/additional_columns.lammpstrj" +).as_posix() unordered_res = (_data_ref / "unordered_res.pdb").as_posix() From d054946342ab0124d79eacb5e52f6fdb47125d24 Mon Sep 17 00:00:00 2001 From: hejamu Date: Tue, 27 Feb 2024 22:28:51 +0100 Subject: [PATCH 26/30] 80 is indeed longer than 79... --- package/MDAnalysis/coordinates/LAMMPS.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 77040033414..0227ed742cd 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -478,8 +478,8 @@ class DumpReader(base.ReaderBase): length unit (Å), and angles are in degrees. By using the keyword `additional_columns`, you can specify arbitrary data - to be read. The keyword expects a list of the names of the columns or `True` - to read all additional columns. The results are saved to + to be read. The keyword expects a list of the names of the columns or + `True` to read all additional columns. The results are saved to :attr:`Timestep.data`. For example, if your LAMMPS dump looks like this .. code-block:: From 9f14a8a50ada2e044e4835d1d39b3a4d63cea86a Mon Sep 17 00:00:00 2001 From: hejamu Date: Tue, 27 Feb 2024 23:00:18 +0100 Subject: [PATCH 27/30] Fix test --- .../MDAnalysisTests/data/lammps/additional_columns.lammpstrj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj b/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj index b2d59267968..79ad9a6fb74 100644 --- a/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj +++ b/testsuite/MDAnalysisTests/data/lammps/additional_columns.lammpstrj @@ -6,7 +6,7 @@ ITEM: BOX BOUNDS pp pp ff 0.0000000000000000e+00 4.2600000000000001e+01 0.0000000000000000e+00 4.4271200000000000e+01 -2.5100000000000001e+01 2.5100000000000001e+01 -ITEM: ATOMS id x y z q l +ITEM: ATOMS id x y z q p 1 2.84 8.17 -25 0.00258855 1.1 2 7.1 8.17 -25 6.91952e-05 1.2 3 2.13 6.94 -25 0.0105548 1.1 From bcb044fd4870a30535fadaa15791fc1207140b26 Mon Sep 17 00:00:00 2001 From: hejamu Date: Tue, 27 Feb 2024 23:18:17 +0100 Subject: [PATCH 28/30] Fix test --- testsuite/MDAnalysisTests/coordinates/test_lammps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index 424c179f75f..7c1da50f4ae 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -535,7 +535,7 @@ def u_additional_columns_multiple(self): top = LAMMPSdata_additional_columns return mda.Universe(top, f, format='LAMMPSDUMP', lammps_coordinate_convention="auto", - additional_columns=['q', 'l']) + additional_columns=['q', 'p']) @pytest.fixture() def u_additional_columns_wrong_format(self): From 8ef649c524a57bd760eac583c83b23ad098aac0f Mon Sep 17 00:00:00 2001 From: hejamu Date: Tue, 27 Feb 2024 23:55:52 +0100 Subject: [PATCH 29/30] Don't format `datafiles.py` --- testsuite/MDAnalysisTests/datafiles.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index e1a51f39510..3fb17695de6 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -156,8 +156,7 @@ "LAMMPSdata_deletedatoms", # with deleted atoms "LAMMPSdata_triclinic", # lammpsdata file to test triclinic dimension parsing, albite with most atoms deleted "LAMMPSdata_PairIJ", # lammps datafile with a PairIJ Coeffs section - # structure for the additional column lammpstrj - "LAMMPSdata_additional_columns", + "LAMMPSdata_additional_columns", # structure for the additional column lammpstrj "LAMMPSDUMP", "LAMMPSDUMP_long", # lammpsdump file with a few zeros sprinkled in the first column first frame "LAMMPSDUMP_allcoords", # lammpsdump file with all coordinate conventions (x,xs,xu,xsu) present, from LAMMPS rdf example @@ -168,8 +167,7 @@ "LAMMPSDUMP_chain1", # Lammps dump file with chain reader "LAMMPSDUMP_chain2", # Lammps dump file with chain reader "LAMMPS_chain", # Lammps data file with chain reader - # lammpsdump file with additional data (an additional charge column) - "LAMMPSDUMP_additional_columns", + "LAMMPSDUMP_additional_columns", # lammpsdump file with additional data (an additional charge column) "unordered_res", # pdb file with resids non sequential "GMS_ASYMOPT", # GAMESS C1 optimization "GMS_SYMOPT", # GAMESS D4h optimization @@ -556,12 +554,8 @@ LAMMPSDUMP_chain2 = (_data_ref / "lammps/chain_dump_2.lammpstrj").as_posix() LAMMPS_chain = (_data_ref / "lammps/chain_initial.data").as_posix() LAMMPSdata_many_bonds = (_data_ref / "lammps/a_lot_of_bond_types.data").as_posix() -LAMMPSdata_additional_columns = ( - _data_ref / "lammps/additional_columns.data" -).as_posix() -LAMMPSDUMP_additional_columns = ( - _data_ref / "lammps/additional_columns.lammpstrj" -).as_posix() +LAMMPSdata_additional_columns = (_data_ref / "lammps/additional_columns.data").as_posix() +LAMMPSDUMP_additional_columns = (_data_ref / "lammps/additional_columns.lammpstrj").as_posix() unordered_res = (_data_ref / "unordered_res.pdb").as_posix() From bac1f4c85099d7b8173f77972c64075920f60cae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20St=C3=A4rk?= Date: Thu, 29 Feb 2024 14:51:45 +0100 Subject: [PATCH 30/30] Added test of warning. --- .../MDAnalysisTests/coordinates/test_lammps.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index 7c1da50f4ae..88b4b8c35ee 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -545,6 +545,14 @@ def u_additional_columns_wrong_format(self): lammps_coordinate_convention="auto", additional_columns='q') + @pytest.fixture() + def u_additional_columns_not_present(self): + f = LAMMPSDUMP_additional_columns + top = LAMMPSdata_additional_columns + return mda.Universe(top, f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto", + additional_columns=['q', 'w']) + @pytest.fixture() def reference_positions(self): # manually copied from traj file @@ -646,6 +654,13 @@ def test_wrong_format_additional_colums(self, system, request): match="Please provide an iterable containing"): request.getfixturevalue(system) + @pytest.mark.parametrize("system", [ + ('u_additional_columns_not_present'), + ]) + def test_warning(self, system, request): + with pytest.warns(match="Some of the additional"): + request.getfixturevalue(system) + @pytest.mark.parametrize("convention", ["unscaled", "unwrapped", "scaled_unwrapped"]) def test_open_absent_convention_fails(convention):