diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index f1219b6..e823073 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -49,7 +49,7 @@ jobs: - name: install package deps run: | - mamba install numpy scipy pytest pytest-cov codecov six + mamba install numpy scipy mrcfile six pytest pytest-cov codecov - name: check install run: | diff --git a/CHANGELOG b/CHANGELOG index e3a7805..2198a36 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,25 @@ The rules for this file: * accompany each entry with github issue/PR number (Issue #xyz) ------------------------------------------------------------------------------ +??/??/2022 orbeckst, tluchko, IAlibay + + * 0.7.0 + + Enhancements + + * use mrcfile library to parse MRC files (including CCP4) using the + new mrc.MRC class (issue #83) + + Fixes + + * The new mrc module correctly reorients the coordinate system based + on mapc, mapr, maps and correctly calculates the origin (issue #76) + + Deprecations + + * The CCP4 module (replaced by mrc) will be removed in 0.8.0. + + 10/10/2021 eloyfelix, renehamburger1993, lilyminium, jvermaas, xiki-tempula, IAlibay, orbeckst diff --git a/doc/source/gridData/formats.rst b/doc/source/gridData/formats.rst index baa642e..f6de7d0 100644 --- a/doc/source/gridData/formats.rst +++ b/doc/source/gridData/formats.rst @@ -27,7 +27,7 @@ small number of file formats is directly supported. ============================ ========== ========= ===== ===== ========================================= :mod:`~gridData.OpenDX` OpenDX_ dx x x subset of OpenDX implemented :mod:`~gridData.gOpenMol` gOpenMol_ plt x - :mod:`~gridData.CCP4` CCP4_ ccp4 x subset implemented + :mod:`~gridData.mrc` CCP4_ ccp4,mrc x subset implemented :class:`~gridData.core.Grid` pickle pickle x x standard Python pickle of the Grid class ============================ ========== ========= ===== ===== ========================================= @@ -38,7 +38,7 @@ small number of file formats is directly supported. .. _Chimera: https://www.cgl.ucsf.edu/chimera/ .. _OpenDX: http://www.opendx.org/ .. _gOpenMol: http://www.csc.fi/gopenmol/ -.. _CCP4: http://www.ccp4.ac.uk/html/maplib.html#description +.. _CCP4: http://www.ccpem.ac.uk/mrc_format/mrc2014.php Format-specific modules @@ -49,4 +49,5 @@ Format-specific modules formats/OpenDX formats/gOpenMol + formats/mrc formats/CCP4 diff --git a/doc/source/gridData/formats/mrc.rst b/doc/source/gridData/formats/mrc.rst new file mode 100644 index 0000000..352f040 --- /dev/null +++ b/doc/source/gridData/formats/mrc.rst @@ -0,0 +1,2 @@ +.. automodule:: gridData.mrc + :members: diff --git a/gridData/CCP4.py b/gridData/CCP4.py index ff9a0ae..aa16b6f 100644 --- a/gridData/CCP4.py +++ b/gridData/CCP4.py @@ -6,11 +6,14 @@ # Copyright Science and Technologies Facilities Council, 2015. """ -:mod:`CCP4` --- the CCP4 volumetric data format -=============================================== +:mod:`CCP4` --- the CCP4 volumetric data format (DEPRECATED) +============================================================ .. versionadded:: 0.3.0 +.. deprecated:: 0.7.0 The CCP4 module is replaced by :mod:`gridData.mrc` and + will be removed in 0.8.0. + .. _CCP4: http://www.ccp4.ac.uk/html/maplib.html#description The module provides a simple implementation of a reader for CCP4_ @@ -160,6 +163,10 @@ class CCP4(object): * symmetry records * index ordering besides standard column-major and row-major * non-standard fields, such any in filed in future use block + + + .. deprecated:: 0.7.0 + Use :class:`gridData.mrc.MRC` instead. This class will be removed in 0.8.0. """ _axis_map = {1: 'x', 2: 'y', 3: 'z'} @@ -196,6 +203,10 @@ def __init__(self, filename=None): if filename is not None: self.read(filename) + warnings.warn("CCP4.CCP4 is being replaced by mrc.MRC and will be removed " + "in release 0.8.0", + category=DeprecationWarning) + def read(self, filename): """Populate the instance from the ccp4 file *filename*.""" if filename is not None: diff --git a/gridData/__init__.py b/gridData/__init__.py index cdc5f48..b4e064a 100644 --- a/gridData/__init__.py +++ b/gridData/__init__.py @@ -93,16 +93,8 @@ Formats ------- -The following formats are available (:ref:`supported-file-formats`): - - :mod:`~gridData.OpenDX` - IBM's Data Explorer, http://www.opendx.org/ - :mod:`~gridData.gOpenMol` - http://www.csc.fi/gopenmol/ - :mod:`~gridData.CCP4` - CCP4 format http://www.ccp4.ac.uk/html/maplib.html#description - pickle - python pickle file (:mod:`pickle`) +For the available file formats see :ref:`supported-file-formats`. + """ diff --git a/gridData/core.py b/gridData/core.py index a9dbc54..b038779 100644 --- a/gridData/core.py +++ b/gridData/core.py @@ -40,7 +40,7 @@ from . import OpenDX from . import gOpenMol -from . import CCP4 +from . import mrc def _grid(x): @@ -114,6 +114,10 @@ def __init__(self, grid=None, edges=None, origin=None, delta=None, .. versionchanged:: 0.5.0 New *file_format* keyword argument. + .. versionchanged:: 0.7.0 + CCP4 files are now read with :class:`gridData.mrc.MRC` and not anymore + with the deprecated/buggy `ccp4.CCP4` + """ # file formats are guess from extension == lower case key self._exporters = { @@ -123,7 +127,8 @@ def __init__(self, grid=None, edges=None, origin=None, delta=None, 'PYTHON': self._export_python, # compatibility } self._loaders = { - 'CCP4': self._load_cpp4, + 'CCP4': self._load_mrc, + 'MRC': self._load_mrc, 'DX': self._load_dx, 'PLT': self._load_plt, 'PKL': self._load_python, @@ -471,12 +476,14 @@ def _load_python(self, filename): edges=saved['edges'], metadata=saved['metadata']) - def _load_cpp4(self, filename): - """Initializes Grid from a CCP4 file.""" - ccp4 = CCP4.CCP4() - ccp4.read(filename) - grid, edges = ccp4.histogramdd() + def _load_mrc(self, filename): + """Initializes Grid from a MRC/CCP4 file.""" + mrcfile = mrc.MRC(filename) + grid, edges = mrcfile.histogramdd() self._load(grid=grid, edges=edges, metadata=self.metadata) + # Store header for access from Grid object (undocumented) + # https://github.com/MDAnalysis/GridDataFormats/pull/100#discussion_r782604833 + self._mrc_header = mrcfile.header.copy() def _load_dx(self, filename): """Initializes Grid from a OpenDX file.""" diff --git a/gridData/mrc.py b/gridData/mrc.py new file mode 100644 index 0000000..f814ee6 --- /dev/null +++ b/gridData/mrc.py @@ -0,0 +1,149 @@ +# gridDataFormats --- python modules to read and write gridded data +# Copyright (c) 2009-2021 Oliver Beckstein +# Released under the GNU Lesser General Public License, version 3 or later. +# + +""":mod:`mrc` --- the mrc/CCP4 volumetric data format +=================================================== + +.. versionadded:: 0.7.0 + +Reading of MRC/CCP4 volumetric files (`MRC2014 file format`_) using +the mrcfile_ library [Burnley2017]_. + +This implementation replaces the :mod:`CCP4` module. + +.. _mrcfile: https://mrcfile.readthedocs.io/ +.. _`MRC2014 file format`: http://www.ccpem.ac.uk/mrc_format/mrc2014.php + + +References +---------- + +.. [Burnley2017] Burnley T, Palmer C and Winn M (2017) Recent + developments in the CCP-EM software suite. *Acta + Cryst.* D73:469-477. doi: `10.1107/S2059798317007859`_ + + +.. _`10.1107/S2059798317007859`: https://doi.org/10.1107/S2059798317007859 + +Classes +------- + +""" +from __future__ import absolute_import +from six.moves import range + +import numpy as np +import mrcfile + + +class MRC(object): + """Represent a MRC/CCP4 file. + + Load `MRC/CCP4 2014 `_ 3D volumetric data with + the mrcfile_ library. + + Parameters + ---------- + filename : str (optional) + input file (or stream), can be compressed + + + Attributes + ---------- + header : numpy.recarray + Header data from the MRC file as a numpy record array. + + array : numpy.ndarray + Data as a 3-dimensional array where axis 0 corresponds to X, + axis 1 to Y, and axis 2 to Z. This order is always enforced, + regardless of the order in the mrc file. + + delta : numpy.ndarray + Diagonal matrix with the voxel size in X, Y, and Z direction + (taken from the :attr:`mrcfile.mrcfile.voxel_size` attribute) + + origin : numpy.ndarray + numpy array with coordinates of the coordinate system origin + (taken from :attr:`header.origin`) + + rank : int + The integer 3, denoting that only 3D maps are read. + + + Notes + ----- + * Only volumetric (3D) densities are read. + * Only orthorhombic unitcells supported (other raise :exc:`ValueError`) + * Only reading is currently supported. + + + .. versionadded:: 0.7.0 + + """ + + def __init__(self, filename=None): + self.filename = filename + if filename is not None: + self.read(filename) + + def read(self, filename): + """Populate the instance from the MRC/CCP4 file *filename*.""" + if filename is not None: + self.filename = filename + with mrcfile.open(filename) as mrc: + if not mrc.is_volume(): #pragma: no cover + raise ValueError( + "MRC file {} is not a volumetric density.".format(filename)) + self.header = h = mrc.header.copy() + # check for being orthorhombic + if not np.allclose([h.cellb.alpha, h.cellb.beta, h.cellb.gamma], + [90, 90, 90]): + raise ValueError("Only orthorhombic unitcells are currently " + "supported, not " + "alpha={0}, beta={1}, gamma={2}".format( + h.cellb.alpha, h.cellb.beta, h.cellb.gamma)) + # mrc.data[z, y, x] indexed: convert to x,y,z as used in GridDataFormats + # together with the axes orientation information in mapc/mapr/maps. + # mapc, mapr, maps = 1, 2, 3 for Fortran-ordering and 3, 2, 1 for C-ordering. + # Other combinations are possible. We reorder the data for the general case + # by sorting mapc, mapr, maps in ascending order, i.e., to obtain x,y,z. + # mrcfile provides the data in zyx shape (without regard to map*) so we first + # transpose it to xyz and then reorient with axes_c_order. + # + # All other "xyz" quantitities are also reordered. + axes_order = np.hstack([h.mapc, h.mapr, h.maps]) + axes_c_order = np.argsort(axes_order) + transpose_order = np.argsort(axes_order[::-1]) + self.array = np.transpose(mrc.data, axes=transpose_order) + self.delta = np.diag(np.array([mrc.voxel_size.x, mrc.voxel_size.y, mrc.voxel_size.z])) + # the grid is shifted to the MRC origin by offset + # (assume orthorhombic) + offsets = np.hstack([h.nxstart, h.nystart, h.nzstart])[axes_c_order] * np.diag(self.delta) + # GridData origin is centre of cell at x=col=0, y=row=0 z=seg=0 + self.origin = np.hstack([h.origin.x, h.origin.y, h.origin.z]) + offsets + self.rank = 3 + + @property + def shape(self): + """Shape of the :attr:`array`""" + return self.array.shape + + @property + def edges(self): + """Edges of the grid cells, origin at centre of 0,0,0 grid cell. + + Only works for regular, orthonormal grids. + """ + # TODO: Add triclinic cell support. + return [self.delta[d, d] * np.arange(self.shape[d] + 1) + + self.origin[d] - 0.5 * self.delta[d, d] + for d in range(self.rank)] + + def histogramdd(self): + """Return array data as (edges,grid), i.e. a numpy nD histogram.""" + return (self.array, self.edges) + + + diff --git a/gridData/tests/datafiles/EMD-3001.map.bz2 b/gridData/tests/datafiles/EMD-3001.map.bz2 new file mode 100644 index 0000000..76e07e6 Binary files /dev/null and b/gridData/tests/datafiles/EMD-3001.map.bz2 differ diff --git a/gridData/tests/datafiles/__init__.py b/gridData/tests/datafiles/__init__.py index e38acf1..d759d01 100644 --- a/gridData/tests/datafiles/__init__.py +++ b/gridData/tests/datafiles/__init__.py @@ -10,6 +10,8 @@ # from http://www.ebi.ac.uk/pdbe/coordinates/files/1jzv.ccp4 # (see issue #57) CCP4_1JZV = resource_filename(__name__, '1jzv.ccp4') +# from https://github.com/ccpem/mrcfile/blob/master/tests/test_data/EMD-3001.map.bz2 +MRC_EMD3001 = resource_filename(__name__, 'EMD-3001.map.bz2') # water density around M2 TM helices of nAChR from MD simulations # [O. Beckstein and M. S. P. Sansom. Physical Biology 3(2):147-159, 2006] gOpenMol = resource_filename(__name__, 'nAChR_M2_water.plt') diff --git a/gridData/tests/test_ccp4.py b/gridData/tests/test_ccp4.py index 5bebf28..9a9e614 100644 --- a/gridData/tests/test_ccp4.py +++ b/gridData/tests/test_ccp4.py @@ -12,7 +12,12 @@ @pytest.fixture(scope="module") def g(): - return Grid(datafiles.CCP4) + with pytest.warns(DeprecationWarning, + match="CCP4.CCP4 is being replaced by mrc.MRC and will be removed"): + ccp4 = CCP4.CCP4() + ccp4.read(datafiles.CCP4) + grid, edges = ccp4.histogramdd() + return Grid(grid=grid, edges=edges) def test_ccp4(g): POINTS = 192 @@ -24,7 +29,10 @@ def test_ccp4(g): @pytest.fixture(scope="module") def ccp4data(): - return CCP4.CCP4(datafiles.CCP4_1JZV) + with pytest.warns(DeprecationWarning, + match="CCP4.CCP4 is being replaced by mrc.MRC and will be removed"): + ccp4 = CCP4.CCP4(datafiles.CCP4_1JZV) + return ccp4 @pytest.mark.parametrize('name,value', [ ('nc', 96), diff --git a/gridData/tests/test_mrc.py b/gridData/tests/test_mrc.py new file mode 100644 index 0000000..a746426 --- /dev/null +++ b/gridData/tests/test_mrc.py @@ -0,0 +1,137 @@ +from __future__ import absolute_import, division + +import pytest + +import numpy as np +from numpy.testing import (assert_allclose, + assert_equal) + +from gridData import Grid, mrc + +from . import datafiles + +@pytest.fixture(scope="module") +def g1(): + return Grid(datafiles.CCP4, file_format="MRC") + +@pytest.fixture(scope="module") +def g2(): + data = mrc.MRC() + data.read(datafiles.CCP4) + grid, edges = data.histogramdd() + return Grid(grid=grid, edges=edges) + +def test_ccp4_Grid(g1): + _test_ccp4(g1) + +def test_ccp4_mrc(g2): + _test_ccp4(g2) + +def _test_ccp4(g): + POINTS = 192 + assert_equal(g.grid.flat, np.arange(1, POINTS+1)) + assert_equal(g.grid.size, POINTS) + assert_allclose(g.delta, [3./4, .5, 2./3]) + assert_equal(g.origin, np.zeros(3)) + + + +@pytest.fixture(scope="module") +def ccp4data(): + return mrc.MRC(datafiles.CCP4_1JZV) + +@pytest.mark.parametrize('name,value', [ + # nx, ny, nz are named nc, nr, ns in the CCP4 module + ('nx', 96), + ('ny', 76), + ('nz', 70), + ('mode', 2), + ('nxstart', -4), + ('nystart', -23), + ('nzstart', 102), + ('mx', 84), + ('my', 84), + ('mz', 160), + ('cella', np.rec.array((45.8, 45.8, 89.65), + dtype=[('x', '=1.0.3', 'six', 'scipy'], + install_requires=['numpy>=1.0.3', 'six', 'scipy', 'mrcfile'], tests_require=['pytest', 'numpy'], zip_safe=True, )