Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions dpdata/deepmd/hdf5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""Utils for deepmd/hdf5 format."""
import h5py
import numpy as np

from wcmatch.glob import globfilter


__all__ = ['to_system_data', 'dump']

def to_system_data(f: h5py.File,
folder: str,
type_map: list = None,
labels: bool = True) :
"""Load a HDF5 file.

Parameters
----------
f : h5py.File
HDF5 file object
folder : str
path in the HDF5 file
type_map : list
type map
labels : bool
labels
"""
g = f[folder] if folder else f

data = {}
data['atom_types'] = g['type.raw'][:]
ntypes = np.max(data['atom_types']) + 1
natoms = data['atom_types'].size
data['atom_numbs'] = []
for ii in range (ntypes) :
data['atom_numbs'].append(np.count_nonzero(data['atom_types'] == ii))
data['atom_names'] = []
# if find type_map.raw, use it
if 'type_map.raw' in g.keys():
my_type_map = list(np.char.decode(g['type_map.raw'][:]))
# else try to use arg type_map
elif type_map is not None:
my_type_map = type_map
# in the last case, make artificial atom names
else:
my_type_map = []
for ii in range(ntypes) :
my_type_map.append('Type_%d' % ii)
assert(len(my_type_map) >= len(data['atom_numbs']))
for ii in range(len(data['atom_numbs'])) :
data['atom_names'].append(my_type_map[ii])

data['orig'] = np.zeros([3])
if 'nopbc' in g.keys():
data['nopbc'] = True
sets = globfilter(g.keys(), 'set.*')

data_types = {
'cells': {'fn': 'box', 'labeled': False, 'shape': (3,3), 'required': 'nopbc' not in data},
'coords': {'fn': 'coord', 'labeled': False, 'shape': (natoms,3), 'required': True},
'energies': {'fn': 'energy', 'labeled': True, 'shape': tuple(), 'required': False},
'forces': {'fn': 'force', 'labeled': True, 'shape': (natoms,3), 'required': False},
'virials': {'fn': 'virial', 'labeled': True, 'shape': (3,3), 'required': False},
}

for dt, prop in data_types.items():
all_data = []

for ii in sets:
set = g[ii]
fn = '%s.npy' % prop['fn']
if fn in set.keys():
dd = set[fn][:]
nframes = dd.shape[0]
all_data.append(np.reshape(dd, (nframes, *prop['shape'])))
elif prop['required']:
raise RuntimeError("%s/%s/%s not found" % (folder, ii, fn))

if len(all_data) > 0 :
data[dt] = np.concatenate(all_data, axis = 0)
return data

def dump(f: h5py.File,
folder: str,
data: dict,
set_size = 5000,
comp_prec = np.float32,
) -> None:
"""Dump data to a HDF5 file.

Parameters
----------
f : h5py.File
HDF5 file object
folder : str
path in the HDF5 file
data : dict
System or LabeledSystem data
set_size : int, default: 5000
size of a set
comp_prec : np.dtype, default: np.float32
precision of data
"""
# if folder is None, use the root of the file
if folder:
if folder in f:
del f[folder]
g = f.create_group(folder)
else:
g = f
# dump raw (array in fact)
g.create_dataset('type.raw', data=data['atom_types'])
g.create_dataset('type_map.raw', data=np.array(data['atom_names'], dtype='S'))
# BondOrder System
if "bonds" in data:
g.create_dataset("bonds.raw", data=data['bonds'])
if "formal_charges" in data:
g.create_dataset("formal_charges.raw", data=data['formal_charges'])
# reshape frame properties and convert prec
nframes = data['cells'].shape[0]

nopbc = data.get("nopbc", False)
reshaped_data = {}

data_types = {
'cells': {'fn': 'box', 'shape': (nframes, 9), 'dump': not nopbc},
'coords': {'fn': 'coord', 'shape': (nframes, -1), 'dump': True},
'energies': {'fn': 'energy', 'shape': (nframes,), 'dump': True},
'forces': {'fn': 'force', 'shape': (nframes, -1), 'dump': True},
'virials': {'fn': 'virial', 'shape': (nframes, 9), 'dump': True},
}
for dt, prop in data_types.items():
if dt in data:
if prop['dump']:
reshaped_data[dt] = np.reshape(data[dt], prop['shape']).astype(comp_prec)

# dump frame properties: cell, coord, energy, force and virial
nsets = nframes // set_size
if set_size * nsets < nframes :
nsets += 1
for ii in range(nsets) :
set_stt = ii * set_size
set_end = (ii+1) * set_size
set_folder = g.create_group('set.%03d' % ii)
for dt, prop in data_types.items():
if dt in reshaped_data:
set_folder.create_dataset('%s.npy' % prop['fn'], data=reshaped_data[dt][set_stt:set_end])

if nopbc:
g.create_dataset("nopbc", True)
48 changes: 48 additions & 0 deletions dpdata/plugins/deepmd.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import dpdata.deepmd.raw
import dpdata.deepmd.comp
import dpdata.deepmd.hdf5
import numpy as np
import h5py
from dpdata.format import Format


Expand Down Expand Up @@ -54,3 +56,49 @@ def from_labeled_system(self, file_name, type_map=None, **kwargs):
return dpdata.deepmd.comp.to_system_data(file_name, type_map=type_map, labels=True)

MultiMode = Format.MultiModes.Directory

@Format.register("deepmd/hdf5")
class DeePMDCompFormat(Format):
"""HDF5 format for DeePMD-kit.

Examples
--------
Dump a MultiSystems to a HDF5 file:
>>> import dpdata
>>> dpdata.MultiSystems().from_deepmd_npy("data").to_deepmd_hdf5("data.hdf5")
"""
def from_system(self, file_name, type_map=None, **kwargs):
s = file_name.split("#")
name = s[1] if len(s) > 1 else ""
with h5py.File(s[0], 'r') as f:
return dpdata.deepmd.hdf5.to_system_data(f, name, type_map=type_map, labels=False)

def from_labeled_system(self, file_name, type_map=None, **kwargs):
s = file_name.split("#")
name = s[1] if len(s) > 1 else ""
with h5py.File(s[0], 'r') as f:
return dpdata.deepmd.hdf5.to_system_data(f, name, type_map=type_map, labels=True)

def to_system(self,
data : dict,
file_name : str,
set_size : int = 5000,
comp_prec : np.dtype = np.float32,
**kwargs):
s = file_name.split("#")
name = s[1] if len(s) > 1 else ""
mode = 'a' if name else 'w'
with h5py.File(s[0], mode) as f:
dpdata.deepmd.hdf5.dump(f, name, data, set_size = set_size, comp_prec = comp_prec)

def from_multi_systems(self,
directory,
**kwargs):
with h5py.File(directory, 'r') as f:
return ["%s#%s" % (directory, ff) for ff in f.keys()]

def to_multi_systems(self,
formulas,
directory,
**kwargs):
return ["%s#%s" % (directory, ff) for ff in formulas]
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
readme = f.read()

# install_requires = ['xml']
install_requires=['numpy>=1.14.3', 'monty', 'scipy']
install_requires=['numpy>=1.14.3', 'monty', 'scipy', 'h5py', 'wcmatch']

setuptools.setup(
name="dpdata",
Expand Down
45 changes: 45 additions & 0 deletions tests/test_deepmd_hdf5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os
import numpy as np
import unittest
from context import dpdata
from comp_sys import CompLabeledSys, CompSys, IsPBC

class TestDeepmdLoadDumpComp(unittest.TestCase, CompLabeledSys, IsPBC):
def setUp (self) :
self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md',
fmt = 'vasp/outcar')
self.system_1.to_deepmd_hdf5('tmp.deepmd.hdf5',
prec = np.float64,
set_size = 2)

self.system_2 = dpdata.LabeledSystem('tmp.deepmd.hdf5',
fmt = 'deepmd/hdf5',
type_map = ['O', 'H'])
self.places = 6
self.e_places = 6
self.f_places = 6
self.v_places = 6

def tearDown(self) :
if os.path.exists('tmp.deepmd.hdf5'):
os.remove('tmp.deepmd.hdf5')


class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC) :
def setUp (self) :
self.system_1 = dpdata.System('poscars/POSCAR.h2o.md',
fmt = 'vasp/poscar')
self.system_1.to_deepmd_hdf5('tmp.deepmd.hdf5',
prec = np.float64,
set_size = 2)
self.system_2 = dpdata.System('tmp.deepmd.hdf5',
fmt = 'deepmd/hdf5',
type_map = ['O', 'H'])
self.places = 6
self.e_places = 6
self.f_places = 6
self.v_places = 6

def tearDown(self) :
if os.path.exists('tmp.deepmd.hdf5'):
os.remove('tmp.deepmd.hdf5')