diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py index 537c5b4868..66870e85b2 100644 --- a/deepmd/utils/data.py +++ b/deepmd/utils/data.py @@ -91,6 +91,10 @@ def __init__( self.type_idx_map = np.array( sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)] ) + # padding for virtual atom + self.type_idx_map = np.append( + self.type_idx_map, np.array([-1], dtype=np.int32) + ) self.type_map = type_map if type_map is None and self.type_map is None and self.mixed_type: raise RuntimeError("mixed_type format must have type_map!") @@ -489,8 +493,12 @@ def _load_set(self, set_name: DPPath): [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())], dtype=np.int32, ).T + ghost_nums = np.array( + [(real_type == -1).sum(axis=-1)], + dtype=np.int32, + ).T assert ( - atom_type_nums.sum(axis=-1) == natoms + atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms ).all(), "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format( set_name, self.get_ntypes() ) diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py index b07614fd57..eebe33d7a0 100644 --- a/deepmd/utils/neighbor_stat.py +++ b/deepmd/utils/neighbor_stat.py @@ -69,8 +69,8 @@ def builder(): t_natoms = place_holders["natoms_vec"] if self.one_type: # all types = 0, natoms_vec = [natoms, natoms, natoms] - t_type = tf.zeros_like(t_type, dtype=tf.int32) - t_natoms = tf.repeat(t_natoms[0], 3) + t_type = tf.clip_by_value(t_type, -1, 0) + t_natoms = tf.tile(t_natoms[0:1], [3]) _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat( place_holders["coord"], diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md index e91ae646c4..8b006346a9 100644 --- a/doc/model/train-se-atten.md +++ b/doc/model/train-se-atten.md @@ -103,11 +103,13 @@ ID | Property | File | Requir ---------- | -------------------------------- | ------------------- | -------------------- | ----------------------- | ----------- / | Atom type indexes (place holder) | type.raw | Required | Natoms | All zeros to fake the type input type_map | Atom type names | type_map.raw | Required | Ntypes | Atom names that map to atom type contained in all the frames, which is unnecessart to be contained in the periodic table -type | Atom type indexes of each frame | real_atom_types.npy | Required | Nframes \* Natoms | Integers that describe atom types in each frame, corresponding to indexes in type_map +type | Atom type indexes of each frame | real_atom_types.npy | Required | Nframes \* Natoms | Integers that describe atom types in each frame, corresponding to indexes in type_map. `-1` means virtual atoms. With these edited files, one can put together frames with the same `Natoms`, instead of the same formula (like `H2O`). Note that this `mixed_type` format only supports `se_atten` descriptor. -The API to generate or transfer to `mixed_type` format will be uploaded on [dpdata](https://github.com/deepmodeling/dpdata) soon for a more convenient experience. +To put frames with different `Natoms` into the same system, one can pad systems by adding virtual atoms whose type is `-1`. Virtual atoms do not contribute to any fitting property, so the atomic property of virtual atoms (e.g. forces) should be given zero. + +The API to generate or transfer to `mixed_type` format is available on [dpdata](https://github.com/deepmodeling/dpdata) for a more convenient experience. ## Training example Here we upload the AlMgCu example shown in the paper, you can download it here: diff --git a/source/op/neighbor_stat.cc b/source/op/neighbor_stat.cc index 6c58096824..ba067786f9 100644 --- a/source/op/neighbor_stat.cc +++ b/source/op/neighbor_stat.cc @@ -180,8 +180,10 @@ class NeighborStatOp : public OpKernel { #pragma omp parallel for for (int ii = 0; ii < nloc; ii++) { + if (d_type[ii] < 0) continue; // virtual atom for (int jj = 0; jj < d_nlist_r[ii].size(); jj++) { int type = d_type[d_nlist_r[ii][jj]]; + if (type < 0) continue; // virtual atom max_nbor_size[ii * ntypes + type] += 1; compute_t rij[3] = { d_coord3[d_nlist_r[ii][jj] * 3 + 0] - d_coord3[ii * 3 + 0], diff --git a/source/tests/common.py b/source/tests/common.py index d0dd2d2608..8fc303c1d7 100644 --- a/source/tests/common.py +++ b/source/tests/common.py @@ -85,9 +85,62 @@ def gen_data_mixed_type(nframes=1): ) -def gen_data(nframes=1, mixed_type=False): +def gen_data_virtual_type(nframes=1, nghost=4): + tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes) + sys = dpdata.LabeledSystem() + real_type_map = ["foo", "bar"] + sys.data["atom_names"] = ["X"] + sys.data["coords"] = tmpdata.coord + sys.data["atom_types"] = np.concatenate( + [ + np.zeros_like(tmpdata.atype), + np.zeros([nghost], dtype=np.int32), + ], + axis=0, + ) + sys.data["cells"] = tmpdata.cell + nframes = tmpdata.nframes + natoms = tmpdata.natoms + sys.data["coords"] = np.concatenate( + [ + sys.data["coords"].reshape([nframes, natoms, 3]), + np.zeros([nframes, nghost, 3]), + ], + axis=1, + ) + sys.data["cells"] = sys.data["cells"].reshape([nframes, 3, 3]) + sys.data["energies"] = np.zeros([nframes, 1]) + sys.data["forces"] = np.zeros([nframes, natoms + nghost, 3]) + sys.to_deepmd_npy("system_mixed_type", prec=np.float64) + np.savetxt("system_mixed_type/type_map.raw", real_type_map, fmt="%s") + np.save( + "system_mixed_type/set.000/real_atom_types.npy", + np.concatenate( + [ + tmpdata.atype.reshape(1, -1).repeat(nframes, 0), + np.full([nframes, nghost], -1, dtype=np.int32), + ], + axis=1, + ), + ) + np.save("system_mixed_type/set.000/fparam.npy", tmpdata.fparam) + np.save( + "system_mixed_type/set.000/aparam.npy", + np.concatenate( + [ + tmpdata.aparam.reshape([nframes, natoms, 2]), + np.zeros([nframes, nghost, 2]), + ], + axis=1, + ), + ) + + +def gen_data(nframes=1, mixed_type=False, virtual_type=False): if not mixed_type: gen_data_type_specific(nframes) + elif virtual_type: + gen_data_virtual_type(nframes) else: gen_data_mixed_type(nframes) diff --git a/source/tests/test_virtual_type.py b/source/tests/test_virtual_type.py index 575f6dc768..98ab84cf93 100644 --- a/source/tests/test_virtual_type.py +++ b/source/tests/test_virtual_type.py @@ -4,15 +4,26 @@ import numpy as np from common import ( + gen_data, + j_loader, tests_path, ) +from deepmd.common import ( + j_must_have, +) from deepmd.infer import ( DeepPot, ) from deepmd.utils.convert import ( convert_pbtxt_to_pb, ) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.neighbor_stat import ( + NeighborStat, +) class TestVirtualType(unittest.TestCase): @@ -106,3 +117,24 @@ def test_infer_mixed_type(self): np.testing.assert_almost_equal(v1, v2) np.testing.assert_almost_equal(ae1[:nloc], ae2[nghost:]) np.testing.assert_almost_equal(av1[:nloc], av2[nghost:]) + + +class TestTrainVirtualType(unittest.TestCase): + def setUp(self) -> None: + gen_data(mixed_type=True, virtual_type=True) + + def test_data_mixed_type(self): + jfile = "water_se_atten_mixed_type.json" + jdata = j_loader(jfile) + + systems = j_must_have(jdata, "systems") + batch_size = 1 + test_size = 1 + rcut = j_must_have(jdata["model"]["descriptor"], "rcut") + type_map = j_must_have(jdata["model"], "type_map") + + data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map) + data.get_batch() + # neighbor stat + nei_stat = NeighborStat(len(type_map), rcut, one_type=True) + min_nbor_dist, max_nbor_size = nei_stat.get_stat(data)