Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/gh-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:

- name: install package deps
run: |
mamba install numpy scipy pytest pytest-cov codecov six
mamba install numpy scipy mrcfile six pytest pytest-cov codecov

- name: check install
run: |
Expand Down
19 changes: 19 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,25 @@ The rules for this file:
* accompany each entry with github issue/PR number (Issue #xyz)

------------------------------------------------------------------------------
??/??/2022 orbeckst, tluchko, IAlibay

* 0.7.0

Enhancements

* use mrcfile library to parse MRC files (including CCP4) using the
new mrc.MRC class (issue #83)

Fixes

* The new mrc module correctly reorients the coordinate system based
on mapc, mapr, maps and correctly calculates the origin (issue #76)

Deprecations

* The CCP4 module (replaced by mrc) will be removed in 0.8.0.


10/10/2021 eloyfelix, renehamburger1993, lilyminium, jvermaas, xiki-tempula,
IAlibay, orbeckst

Expand Down
5 changes: 3 additions & 2 deletions doc/source/gridData/formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ small number of file formats is directly supported.
============================ ========== ========= ===== ===== =========================================
:mod:`~gridData.OpenDX` OpenDX_ dx x x subset of OpenDX implemented
:mod:`~gridData.gOpenMol` gOpenMol_ plt x
:mod:`~gridData.CCP4` CCP4_ ccp4 x subset implemented
:mod:`~gridData.mrc` CCP4_ ccp4,mrc x subset implemented
:class:`~gridData.core.Grid` pickle pickle x x standard Python pickle of the Grid class
============================ ========== ========= ===== ===== =========================================

Expand All @@ -38,7 +38,7 @@ small number of file formats is directly supported.
.. _Chimera: https://www.cgl.ucsf.edu/chimera/
.. _OpenDX: http://www.opendx.org/
.. _gOpenMol: http://www.csc.fi/gopenmol/
.. _CCP4: http://www.ccp4.ac.uk/html/maplib.html#description
.. _CCP4: http://www.ccpem.ac.uk/mrc_format/mrc2014.php


Format-specific modules
Expand All @@ -49,4 +49,5 @@ Format-specific modules

formats/OpenDX
formats/gOpenMol
formats/mrc
formats/CCP4
2 changes: 2 additions & 0 deletions doc/source/gridData/formats/mrc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.. automodule:: gridData.mrc
:members:
15 changes: 13 additions & 2 deletions gridData/CCP4.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
# Copyright Science and Technologies Facilities Council, 2015.

"""
:mod:`CCP4` --- the CCP4 volumetric data format
===============================================
:mod:`CCP4` --- the CCP4 volumetric data format (DEPRECATED)
============================================================

.. versionadded:: 0.3.0

.. deprecated:: 0.7.0 The CCP4 module is replaced by :mod:`gridData.mrc` and
will be removed in 0.8.0.

.. _CCP4: http://www.ccp4.ac.uk/html/maplib.html#description

The module provides a simple implementation of a reader for CCP4_
Expand Down Expand Up @@ -160,6 +163,10 @@ class CCP4(object):
* symmetry records
* index ordering besides standard column-major and row-major
* non-standard fields, such any in filed in future use block


.. deprecated:: 0.7.0
Use :class:`gridData.mrc.MRC` instead. This class will be removed in 0.8.0.
"""

_axis_map = {1: 'x', 2: 'y', 3: 'z'}
Expand Down Expand Up @@ -196,6 +203,10 @@ def __init__(self, filename=None):
if filename is not None:
self.read(filename)

warnings.warn("CCP4.CCP4 is being replaced by mrc.MRC and will be removed "
"in release 0.8.0",
category=DeprecationWarning)

def read(self, filename):
"""Populate the instance from the ccp4 file *filename*."""
if filename is not None:
Expand Down
12 changes: 2 additions & 10 deletions gridData/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,8 @@
Formats
-------

The following formats are available (:ref:`supported-file-formats`):

:mod:`~gridData.OpenDX`
IBM's Data Explorer, http://www.opendx.org/
:mod:`~gridData.gOpenMol`
http://www.csc.fi/gopenmol/
:mod:`~gridData.CCP4`
CCP4 format http://www.ccp4.ac.uk/html/maplib.html#description
pickle
python pickle file (:mod:`pickle`)
For the available file formats see :ref:`supported-file-formats`.



"""
Expand Down
21 changes: 14 additions & 7 deletions gridData/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

from . import OpenDX
from . import gOpenMol
from . import CCP4
from . import mrc


def _grid(x):
Expand Down Expand Up @@ -114,6 +114,10 @@ def __init__(self, grid=None, edges=None, origin=None, delta=None,
.. versionchanged:: 0.5.0
New *file_format* keyword argument.

.. versionchanged:: 0.7.0
CCP4 files are now read with :class:`gridData.mrc.MRC` and not anymore
with the deprecated/buggy `ccp4.CCP4`

"""
# file formats are guess from extension == lower case key
self._exporters = {
Expand All @@ -123,7 +127,8 @@ def __init__(self, grid=None, edges=None, origin=None, delta=None,
'PYTHON': self._export_python, # compatibility
}
self._loaders = {
'CCP4': self._load_cpp4,
'CCP4': self._load_mrc,
'MRC': self._load_mrc,
'DX': self._load_dx,
'PLT': self._load_plt,
'PKL': self._load_python,
Expand Down Expand Up @@ -471,12 +476,14 @@ def _load_python(self, filename):
edges=saved['edges'],
metadata=saved['metadata'])

def _load_cpp4(self, filename):
"""Initializes Grid from a CCP4 file."""
ccp4 = CCP4.CCP4()
ccp4.read(filename)
grid, edges = ccp4.histogramdd()
def _load_mrc(self, filename):
"""Initializes Grid from a MRC/CCP4 file."""
mrcfile = mrc.MRC(filename)
grid, edges = mrcfile.histogramdd()
self._load(grid=grid, edges=edges, metadata=self.metadata)
# Store header for access from Grid object (undocumented)
# https://github.com/MDAnalysis/GridDataFormats/pull/100#discussion_r782604833
self._mrc_header = mrcfile.header.copy()

def _load_dx(self, filename):
"""Initializes Grid from a OpenDX file."""
Expand Down
149 changes: 149 additions & 0 deletions gridData/mrc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# gridDataFormats --- python modules to read and write gridded data
# Copyright (c) 2009-2021 Oliver Beckstein <orbeckst@gmail.com>
# Released under the GNU Lesser General Public License, version 3 or later.
#

""":mod:`mrc` --- the mrc/CCP4 volumetric data format
===================================================

.. versionadded:: 0.7.0

Reading of MRC/CCP4 volumetric files (`MRC2014 file format`_) using
the mrcfile_ library [Burnley2017]_.

This implementation replaces the :mod:`CCP4` module.

.. _mrcfile: https://mrcfile.readthedocs.io/
.. _`MRC2014 file format`: http://www.ccpem.ac.uk/mrc_format/mrc2014.php


References
----------

.. [Burnley2017] Burnley T, Palmer C and Winn M (2017) Recent
developments in the CCP-EM software suite. *Acta
Cryst.* D73:469-477. doi: `10.1107/S2059798317007859`_


.. _`10.1107/S2059798317007859`: https://doi.org/10.1107/S2059798317007859

Classes
-------

"""
from __future__ import absolute_import
from six.moves import range

import numpy as np
import mrcfile


class MRC(object):
"""Represent a MRC/CCP4 file.

Load `MRC/CCP4 2014 <MRC2014 file format>`_ 3D volumetric data with
the mrcfile_ library.

Parameters
----------
filename : str (optional)
input file (or stream), can be compressed


Attributes
----------
header : numpy.recarray
Header data from the MRC file as a numpy record array.

array : numpy.ndarray
Data as a 3-dimensional array where axis 0 corresponds to X,
axis 1 to Y, and axis 2 to Z. This order is always enforced,
regardless of the order in the mrc file.

delta : numpy.ndarray
Diagonal matrix with the voxel size in X, Y, and Z direction
(taken from the :attr:`mrcfile.mrcfile.voxel_size` attribute)

origin : numpy.ndarray
numpy array with coordinates of the coordinate system origin
(taken from :attr:`header.origin`)

rank : int
The integer 3, denoting that only 3D maps are read.


Notes
-----
* Only volumetric (3D) densities are read.
* Only orthorhombic unitcells supported (other raise :exc:`ValueError`)
* Only reading is currently supported.


.. versionadded:: 0.7.0

"""

def __init__(self, filename=None):
self.filename = filename
if filename is not None:
self.read(filename)

def read(self, filename):
"""Populate the instance from the MRC/CCP4 file *filename*."""
if filename is not None:
self.filename = filename
with mrcfile.open(filename) as mrc:
if not mrc.is_volume(): #pragma: no cover
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is calling validate worth it too? (not sure how much of an issue a non-valid mrc file is)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might want to read a file with an iffy header and just get the data. Not sure, TBH.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No implementing for now because I don't understand the implication/standard use.

raise ValueError(
"MRC file {} is not a volumetric density.".format(filename))
self.header = h = mrc.header.copy()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any difference between header and extended_header here?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't know — experts??

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From https://www.ccpem.ac.uk/mrc_format/mrc2014.php,

The extended header is now used by different software to hold various additional metadata instead, as indicated by the EXTTYP tag.

The EXTTYP flag takes seven different values. You could make a copy incase the user wants it. Note that self.header does not seem to be accessible from Grid or, at least, it is not obvious to me how to access it.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want to have access to header ?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a personal-use case in mind. But, if it isn't difficult, it could be beneficial in cases we haven't thought of yet. Not everything in the header ends up in a Grid object.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am adding the header as undocumented Grid._mrc_header. If it is used then we can think of a better way to keep format-specific data.

# check for being orthorhombic
if not np.allclose([h.cellb.alpha, h.cellb.beta, h.cellb.gamma],
[90, 90, 90]):
raise ValueError("Only orthorhombic unitcells are currently "
"supported, not "
"alpha={0}, beta={1}, gamma={2}".format(
h.cellb.alpha, h.cellb.beta, h.cellb.gamma))
# mrc.data[z, y, x] indexed: convert to x,y,z as used in GridDataFormats
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to do further manipulations here?

Note that the MRC format allows the data axes to be swapped using the header’s mapc, mapr and maps fields. This library does not attempt to swap the axes and simply assigns the columns to X, rows to Y and sections to Z. (The data array is indexed in C style, so data values can be accessed using mrc.data[z][y][x].) In general, EM data is written using the default axes, but crystallographic data files might use swapped axes in certain space groups – if this might matter to you, you should check the mapc, mapr and maps fields after opening the file and consider transposing the data array if necessary.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. If the file has mapc, mapr, maps = 3, 2, 1, then the data should already be in C-order and np.transpose() would be unnecessary. mapc, mapr, maps = 1, 2, 3 is Fortran-order. It might be best to check for these two cases and disallow the rest. np.transpose() should be able to handle the other cases but it they are probably rare.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, this needs to be fixed then. Perhaps that fixes #76, too.

Code welcome!!!

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ultimately, this and calculating the origin correctly did fix #76

# together with the axes orientation information in mapc/mapr/maps.
# mapc, mapr, maps = 1, 2, 3 for Fortran-ordering and 3, 2, 1 for C-ordering.
# Other combinations are possible. We reorder the data for the general case
# by sorting mapc, mapr, maps in ascending order, i.e., to obtain x,y,z.
# mrcfile provides the data in zyx shape (without regard to map*) so we first
# transpose it to xyz and then reorient with axes_c_order.
#
# All other "xyz" quantitities are also reordered.
axes_order = np.hstack([h.mapc, h.mapr, h.maps])
axes_c_order = np.argsort(axes_order)
transpose_order = np.argsort(axes_order[::-1])
self.array = np.transpose(mrc.data, axes=transpose_order)
self.delta = np.diag(np.array([mrc.voxel_size.x, mrc.voxel_size.y, mrc.voxel_size.z]))
# the grid is shifted to the MRC origin by offset
# (assume orthorhombic)
offsets = np.hstack([h.nxstart, h.nystart, h.nzstart])[axes_c_order] * np.diag(self.delta)
# GridData origin is centre of cell at x=col=0, y=row=0 z=seg=0
self.origin = np.hstack([h.origin.x, h.origin.y, h.origin.z]) + offsets
self.rank = 3

@property
def shape(self):
"""Shape of the :attr:`array`"""
return self.array.shape

@property
def edges(self):
"""Edges of the grid cells, origin at centre of 0,0,0 grid cell.

Only works for regular, orthonormal grids.
"""
# TODO: Add triclinic cell support.
return [self.delta[d, d] * np.arange(self.shape[d] + 1) +
self.origin[d] - 0.5 * self.delta[d, d]
for d in range(self.rank)]

def histogramdd(self):
"""Return array data as (edges,grid), i.e. a numpy nD histogram."""
return (self.array, self.edges)



Binary file added gridData/tests/datafiles/EMD-3001.map.bz2
Binary file not shown.
2 changes: 2 additions & 0 deletions gridData/tests/datafiles/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
# from http://www.ebi.ac.uk/pdbe/coordinates/files/1jzv.ccp4
# (see issue #57)
CCP4_1JZV = resource_filename(__name__, '1jzv.ccp4')
# from https://github.com/ccpem/mrcfile/blob/master/tests/test_data/EMD-3001.map.bz2
MRC_EMD3001 = resource_filename(__name__, 'EMD-3001.map.bz2')
# water density around M2 TM helices of nAChR from MD simulations
# [O. Beckstein and M. S. P. Sansom. Physical Biology 3(2):147-159, 2006]
gOpenMol = resource_filename(__name__, 'nAChR_M2_water.plt')
12 changes: 10 additions & 2 deletions gridData/tests/test_ccp4.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@

@pytest.fixture(scope="module")
def g():
return Grid(datafiles.CCP4)
with pytest.warns(DeprecationWarning,
match="CCP4.CCP4 is being replaced by mrc.MRC and will be removed"):
ccp4 = CCP4.CCP4()
ccp4.read(datafiles.CCP4)
grid, edges = ccp4.histogramdd()
return Grid(grid=grid, edges=edges)

def test_ccp4(g):
POINTS = 192
Expand All @@ -24,7 +29,10 @@ def test_ccp4(g):

@pytest.fixture(scope="module")
def ccp4data():
return CCP4.CCP4(datafiles.CCP4_1JZV)
with pytest.warns(DeprecationWarning,
match="CCP4.CCP4 is being replaced by mrc.MRC and will be removed"):
ccp4 = CCP4.CCP4(datafiles.CCP4_1JZV)
return ccp4

@pytest.mark.parametrize('name,value', [
('nc', 96),
Expand Down
Loading