Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 85 additions & 74 deletions dpdata/deepmd/mixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,60 +54,80 @@ def to_system_data(folder, type_map=None, labels=True):
if os.path.isfile(os.path.join(folder, "nopbc")):
data["nopbc"] = True
sets = sorted(glob.glob(os.path.join(folder, "set.*")))
assert len(sets) == 1, "Mixed type must have only one set!"
cells, coords, eners, forces, virs, real_atom_types = _load_set(
sets[0], data.get("nopbc", False)
)
nframes = np.reshape(cells, [-1, 3, 3]).shape[0]
cells = np.reshape(cells, [nframes, 3, 3])
coords = np.reshape(coords, [nframes, -1, 3])
real_atom_types = np.reshape(real_atom_types, [nframes, -1])
natom = real_atom_types.shape[1]
if labels:
if eners is not None and eners.size > 0:
all_cells = []
all_coords = []
all_eners = []
all_forces = []
all_virs = []
all_real_atom_types = []
for ii in sets:
cells, coords, eners, forces, virs, real_atom_types = _load_set(
ii, data.get("nopbc", False)
)
nframes = np.reshape(cells, [-1, 3, 3]).shape[0]
all_cells.append(np.reshape(cells, [nframes, 3, 3]))
all_coords.append(np.reshape(coords, [nframes, -1, 3]))
all_real_atom_types.append(np.reshape(real_atom_types, [nframes, -1]))
if eners is not None:
eners = np.reshape(eners, [nframes])
if forces is not None and forces.size > 0:
forces = np.reshape(forces, [nframes, -1, 3])
if virs is not None and virs.size > 0:
virs = np.reshape(virs, [nframes, 3, 3])
if labels:
if eners is not None and eners.size > 0:
all_eners.append(np.reshape(eners, [nframes]))
if forces is not None and forces.size > 0:
all_forces.append(np.reshape(forces, [nframes, -1, 3]))
if virs is not None and virs.size > 0:
all_virs.append(np.reshape(virs, [nframes, 3, 3]))
all_cells_concat = np.concatenate(all_cells, axis=0)
all_coords_concat = np.concatenate(all_coords, axis=0)
all_real_atom_types_concat = np.concatenate(all_real_atom_types, axis=0)
all_eners_concat = None
all_forces_concat = None
all_virs_concat = None
if len(all_eners) > 0:
all_eners_concat = np.concatenate(all_eners, axis=0)
if len(all_forces) > 0:
all_forces_concat = np.concatenate(all_forces, axis=0)
if len(all_virs) > 0:
all_virs_concat = np.concatenate(all_virs, axis=0)
data_list = []
while True:
if real_atom_types.size == 0:
if all_real_atom_types_concat.size == 0:
break
temp_atom_numbs = [
np.count_nonzero(real_atom_types[0] == i)
np.count_nonzero(all_real_atom_types_concat[0] == i)
for i in range(len(data["atom_names"]))
]
# temp_formula = formula(data['atom_names'], temp_atom_numbs)
temp_idx = np.arange(real_atom_types.shape[0])[
(real_atom_types == real_atom_types[0]).all(-1)
temp_idx = np.arange(all_real_atom_types_concat.shape[0])[
(all_real_atom_types_concat == all_real_atom_types_concat[0]).all(-1)
]
rest_idx = np.arange(real_atom_types.shape[0])[
(real_atom_types != real_atom_types[0]).any(-1)
rest_idx = np.arange(all_real_atom_types_concat.shape[0])[
(all_real_atom_types_concat != all_real_atom_types_concat[0]).any(-1)
]
temp_data = data.copy()
temp_data["atom_names"] = data["atom_names"].copy()
temp_data["atom_numbs"] = temp_atom_numbs
temp_data["atom_types"] = real_atom_types[0]
real_atom_types = real_atom_types[rest_idx]
temp_data["cells"] = cells[temp_idx]
cells = cells[rest_idx]
temp_data["coords"] = coords[temp_idx]
coords = coords[rest_idx]
temp_data["atom_types"] = all_real_atom_types_concat[0]
all_real_atom_types_concat = all_real_atom_types_concat[rest_idx]
temp_data["cells"] = all_cells_concat[temp_idx]
all_cells_concat = all_cells_concat[rest_idx]
temp_data["coords"] = all_coords_concat[temp_idx]
all_coords_concat = all_coords_concat[rest_idx]
if labels:
if eners is not None and eners.size > 0:
temp_data["energies"] = eners[temp_idx]
eners = eners[rest_idx]
if forces is not None and forces.size > 0:
temp_data["forces"] = forces[temp_idx]
forces = forces[rest_idx]
if virs is not None and virs.size > 0:
temp_data["virials"] = virs[temp_idx]
virs = virs[rest_idx]
if all_eners_concat is not None and all_eners_concat.size > 0:
temp_data["energies"] = all_eners_concat[temp_idx]
all_eners_concat = all_eners_concat[rest_idx]
if all_forces_concat is not None and all_forces_concat.size > 0:
temp_data["forces"] = all_forces_concat[temp_idx]
all_forces_concat = all_forces_concat[rest_idx]
if all_virs_concat is not None and all_virs_concat.size > 0:
temp_data["virials"] = all_virs_concat[temp_idx]
all_virs_concat = all_virs_concat[rest_idx]
data_list.append(temp_data)
return data_list


def dump(folder, data, comp_prec=np.float32, remove_sets=True):
def dump(folder, data, set_size=2000, comp_prec=np.float32, remove_sets=True):
os.makedirs(folder, exist_ok=True)
sets = sorted(glob.glob(os.path.join(folder, "set.*")))
if len(sets) > 0:
Expand Down Expand Up @@ -164,20 +184,29 @@ def dump(folder, data, comp_prec=np.float32, remove_sets=True):
np.int64
)
# dump frame properties: cell, coord, energy, force and virial
set_folder = os.path.join(folder, "set.%03d" % 0)
os.makedirs(set_folder)
np.save(os.path.join(set_folder, "box"), cells)
np.save(os.path.join(set_folder, "coord"), coords)
if eners is not None:
np.save(os.path.join(set_folder, "energy"), eners)
if forces is not None:
np.save(os.path.join(set_folder, "force"), forces)
if virials is not None:
np.save(os.path.join(set_folder, "virial"), virials)
if real_atom_types is not None:
np.save(os.path.join(set_folder, "real_atom_types"), real_atom_types)
if "atom_pref" in data:
np.save(os.path.join(set_folder, "atom_pref"), atom_pref)
nsets = nframes // set_size
if set_size * nsets < nframes:
nsets += 1
for ii in range(nsets):
set_stt = ii * set_size
set_end = (ii + 1) * set_size
set_folder = os.path.join(folder, "set.%06d" % ii)
os.makedirs(set_folder)
np.save(os.path.join(set_folder, "box"), cells[set_stt:set_end])
np.save(os.path.join(set_folder, "coord"), coords[set_stt:set_end])
if eners is not None:
np.save(os.path.join(set_folder, "energy"), eners[set_stt:set_end])
if forces is not None:
np.save(os.path.join(set_folder, "force"), forces[set_stt:set_end])
if virials is not None:
np.save(os.path.join(set_folder, "virial"), virials[set_stt:set_end])
if real_atom_types is not None:
np.save(
os.path.join(set_folder, "real_atom_types"),
real_atom_types[set_stt:set_end],
)
if "atom_pref" in data:
np.save(os.path.join(set_folder, "atom_pref"), atom_pref[set_stt:set_end])
try:
os.remove(os.path.join(folder, "nopbc"))
except OSError:
Expand All @@ -187,61 +216,43 @@ def dump(folder, data, comp_prec=np.float32, remove_sets=True):
pass


def mix_system(*system, type_map, split_num=200, **kwargs):
"""Mix the systems into mixed_type ones
def mix_system(*system, type_map, **kwargs):
"""Mix the systems into mixed_type ones according to the unified given type_map.

Parameters
----------
*system : System
The systems to mix
type_map : list of str
Maps atom type to name
split_num : int
Number of frames in each system

Returns
-------
mixed_systems: dict
dict of mixed system with key '{atom_numbs}/sys.xxx'
dict of mixed system with key 'atom_numbs'
"""
mixed_systems = {}
temp_systems = {}
atom_numbs_sys_index = {} # index of sys
atom_numbs_frame_index = {} # index of frames in cur sys
for sys in system:
tmp_sys = sys.copy()
natom = tmp_sys.get_natoms()
tmp_sys.convert_to_mixed_type(type_map=type_map)
if str(natom) not in atom_numbs_sys_index:
atom_numbs_sys_index[str(natom)] = 0
if str(natom) not in atom_numbs_frame_index:
atom_numbs_frame_index[str(natom)] = 0
atom_numbs_frame_index[str(natom)] += tmp_sys.get_nframes()
if str(natom) not in temp_systems or not temp_systems[str(natom)]:
temp_systems[str(natom)] = tmp_sys
else:
temp_systems[str(natom)].append(tmp_sys)
if atom_numbs_frame_index[str(natom)] >= split_num:
while True:
sys_split, temp_systems[str(natom)], rest_num = split_system(
temp_systems[str(natom)], split_num=split_num
)
sys_name = (
f"{str(natom)}/sys." + "%.6d" % atom_numbs_sys_index[str(natom)]
)
mixed_systems[sys_name] = sys_split
atom_numbs_sys_index[str(natom)] += 1
if rest_num < split_num:
atom_numbs_frame_index[str(natom)] = rest_num
break
for natom in temp_systems:
if atom_numbs_frame_index[natom] > 0:
sys_name = f"{natom}/sys." + "%.6d" % atom_numbs_sys_index[natom]
sys_name = f"{natom}"
mixed_systems[sys_name] = temp_systems[natom]
return mixed_systems


def split_system(sys, split_num=100):
def split_system(sys, split_num=10000):
rest = sys.get_nframes() - split_num
if rest <= 0:
return sys, None, 0
Expand Down
6 changes: 2 additions & 4 deletions dpdata/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def to_multi_systems(self, formulas, directory, **kwargs):
"%s doesn't support MultiSystems.to" % (self.__class__.__name__)
)

def mix_system(self, *system, type_map, split_num=200, **kwargs):
def mix_system(self, *system, type_map, **kwargs):
"""Mix the systems into mixed_type ones according to the unified given type_map.

Parameters
Expand All @@ -141,13 +141,11 @@ def mix_system(self, *system, type_map, split_num=200, **kwargs):
The systems to mix
type_map : list of str
Maps atom type to name
split_num : int
Number of frames in each system

Returns
-------
mixed_systems: dict
dict of mixed system with key '{atom_numbs}/sys.xxx'
dict of mixed system with key 'atom_numbs'
"""
raise NotImplementedError(
"%s doesn't support System.from" % (self.__class__.__name__)
Expand Down
47 changes: 10 additions & 37 deletions dpdata/plugins/deepmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def from_labeled_system_mix(self, file_name, type_map=None, **kwargs):
file_name, type_map=type_map, labels=True
)

def mix_system(self, *system, type_map, split_num=200, **kwargs):
def mix_system(self, *system, type_map, **kwargs):
"""Mix the systems into mixed_type ones according to the unified given type_map.

Parameters
Expand All @@ -126,49 +126,22 @@ def mix_system(self, *system, type_map, split_num=200, **kwargs):
The systems to mix
type_map : list of str
Maps atom type to name
split_num : int
Number of frames in each system

Returns
-------
mixed_systems: dict
dict of mixed system with key '{atom_numbs}/sys.xxx'
dict of mixed system with key 'atom_numbs'
"""
return dpdata.deepmd.mixed.mix_system(
*system, type_map=type_map, split_num=split_num, **kwargs
)
return dpdata.deepmd.mixed.mix_system(*system, type_map=type_map, **kwargs)

def from_multi_systems(self, directory, **kwargs):
"""MultiSystems.from

Parameters
----------
directory : str
directory of system

Returns
-------
filenames: list[str]
list of filenames
"""
if self.MultiMode == self.MultiModes.Directory:
level_1_dir = [
os.path.join(directory, name)
for name in os.listdir(directory)
if os.path.isdir(os.path.join(directory, name))
and os.path.isfile(os.path.join(directory, name, "type_map.raw"))
]
level_2_dir = [
os.path.join(directory, name1, name2)
for name1 in os.listdir(directory)
for name2 in os.listdir(os.path.join(directory, name1))
if os.path.isdir(os.path.join(directory, name1))
and os.path.isdir(os.path.join(directory, name1, name2))
and os.path.isfile(
os.path.join(directory, name1, name2, "type_map.raw")
)
]
return level_1_dir + level_2_dir
sys_dir = []
for root, dirs, files in os.walk(directory):
if (
"type_map.raw" in files
): # mixed_type format systems must have type_map.raw
sys_dir.append(root)
return sys_dir

MultiMode = Format.MultiModes.Directory

Expand Down
10 changes: 4 additions & 6 deletions dpdata/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -1307,15 +1307,13 @@ def from_fmt_obj(self, fmtobj, directory, labeled=True, **kwargs):
if labeled:
data_list = fmtobj.from_labeled_system_mix(dd, **kwargs)
for data_item in data_list:
system_list.append(LabeledSystem(data=data_item))
system_list.append(LabeledSystem(data=data_item, **kwargs))
else:
data_list = fmtobj.from_system_mix(dd, **kwargs)
for data_item in data_list:
system_list.append(System(data=data_item))
return self.__class__(
*system_list,
type_map=kwargs["type_map"] if "type_map" in kwargs else None,
)
system_list.append(System(data=data_item, **kwargs))
self.append(*system_list)
return self

def to_fmt_obj(self, fmtobj, directory, *args, **kwargs):
if not isinstance(fmtobj, dpdata.plugins.deepmd.DeePMDMixedFormat):
Expand Down
Loading