From a7c4c6219cc06a6e236be19d52b88a4036b90a73 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 3 Mar 2023 19:00:40 -0500 Subject: [PATCH 1/5] support mixed natoms training Signed-off-by: Jinzhe Zeng --- deepmd/utils/data.py | 10 +++++++++- deepmd/utils/neighbor_stat.py | 2 +- source/op/neighbor_stat.cc | 2 ++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py index 537c5b4868..66870e85b2 100644 --- a/deepmd/utils/data.py +++ b/deepmd/utils/data.py @@ -91,6 +91,10 @@ def __init__( self.type_idx_map = np.array( sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)] ) + # padding for virtual atom + self.type_idx_map = np.append( + self.type_idx_map, np.array([-1], dtype=np.int32) + ) self.type_map = type_map if type_map is None and self.type_map is None and self.mixed_type: raise RuntimeError("mixed_type format must have type_map!") @@ -489,8 +493,12 @@ def _load_set(self, set_name: DPPath): [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())], dtype=np.int32, ).T + ghost_nums = np.array( + [(real_type == -1).sum(axis=-1)], + dtype=np.int32, + ).T assert ( - atom_type_nums.sum(axis=-1) == natoms + atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms ).all(), "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format( set_name, self.get_ntypes() ) diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py index b07614fd57..9d815dda72 100644 --- a/deepmd/utils/neighbor_stat.py +++ b/deepmd/utils/neighbor_stat.py @@ -69,7 +69,7 @@ def builder(): t_natoms = place_holders["natoms_vec"] if self.one_type: # all types = 0, natoms_vec = [natoms, natoms, natoms] - t_type = tf.zeros_like(t_type, dtype=tf.int32) + t_type = tf.clip_by_value(t_type, -1, 0) t_natoms = tf.repeat(t_natoms[0], 3) _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat( diff --git a/source/op/neighbor_stat.cc b/source/op/neighbor_stat.cc index 6c58096824..ba067786f9 100644 --- a/source/op/neighbor_stat.cc +++ b/source/op/neighbor_stat.cc @@ -180,8 +180,10 @@ class NeighborStatOp : public OpKernel { #pragma omp parallel for for (int ii = 0; ii < nloc; ii++) { + if (d_type[ii] < 0) continue; // virtual atom for (int jj = 0; jj < d_nlist_r[ii].size(); jj++) { int type = d_type[d_nlist_r[ii][jj]]; + if (type < 0) continue; // virtual atom max_nbor_size[ii * ntypes + type] += 1; compute_t rij[3] = { d_coord3[d_nlist_r[ii][jj] * 3 + 0] - d_coord3[ii * 3 + 0], From ce93da473902cd6ff3ee0fce86c4ca1f67f6a7b0 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 3 Mar 2023 19:43:15 -0500 Subject: [PATCH 2/5] add doc Signed-off-by: Jinzhe Zeng --- doc/model/train-se-atten.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md index e91ae646c4..8b006346a9 100644 --- a/doc/model/train-se-atten.md +++ b/doc/model/train-se-atten.md @@ -103,11 +103,13 @@ ID | Property | File | Requir ---------- | -------------------------------- | ------------------- | -------------------- | ----------------------- | ----------- / | Atom type indexes (place holder) | type.raw | Required | Natoms | All zeros to fake the type input type_map | Atom type names | type_map.raw | Required | Ntypes | Atom names that map to atom type contained in all the frames, which is unnecessart to be contained in the periodic table -type | Atom type indexes of each frame | real_atom_types.npy | Required | Nframes \* Natoms | Integers that describe atom types in each frame, corresponding to indexes in type_map +type | Atom type indexes of each frame | real_atom_types.npy | Required | Nframes \* Natoms | Integers that describe atom types in each frame, corresponding to indexes in type_map. `-1` means virtual atoms. With these edited files, one can put together frames with the same `Natoms`, instead of the same formula (like `H2O`). Note that this `mixed_type` format only supports `se_atten` descriptor. -The API to generate or transfer to `mixed_type` format will be uploaded on [dpdata](https://github.com/deepmodeling/dpdata) soon for a more convenient experience. +To put frames with different `Natoms` into the same system, one can pad systems by adding virtual atoms whose type is `-1`. Virtual atoms do not contribute to any fitting property, so the atomic property of virtual atoms (e.g. forces) should be given zero. + +The API to generate or transfer to `mixed_type` format is available on [dpdata](https://github.com/deepmodeling/dpdata) for a more convenient experience. ## Training example Here we upload the AlMgCu example shown in the paper, you can download it here: From 6605dae02bc8ac64e17ec8b49daebc6ca0d7e20c Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 4 Mar 2023 23:26:14 -0500 Subject: [PATCH 3/5] add tests Signed-off-by: Jinzhe Zeng --- source/tests/common.py | 55 ++++++++++++++++++++++++++++++- source/tests/test_virtual_type.py | 32 ++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/source/tests/common.py b/source/tests/common.py index d0dd2d2608..8fc303c1d7 100644 --- a/source/tests/common.py +++ b/source/tests/common.py @@ -85,9 +85,62 @@ def gen_data_mixed_type(nframes=1): ) -def gen_data(nframes=1, mixed_type=False): +def gen_data_virtual_type(nframes=1, nghost=4): + tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes) + sys = dpdata.LabeledSystem() + real_type_map = ["foo", "bar"] + sys.data["atom_names"] = ["X"] + sys.data["coords"] = tmpdata.coord + sys.data["atom_types"] = np.concatenate( + [ + np.zeros_like(tmpdata.atype), + np.zeros([nghost], dtype=np.int32), + ], + axis=0, + ) + sys.data["cells"] = tmpdata.cell + nframes = tmpdata.nframes + natoms = tmpdata.natoms + sys.data["coords"] = np.concatenate( + [ + sys.data["coords"].reshape([nframes, natoms, 3]), + np.zeros([nframes, nghost, 3]), + ], + axis=1, + ) + sys.data["cells"] = sys.data["cells"].reshape([nframes, 3, 3]) + sys.data["energies"] = np.zeros([nframes, 1]) + sys.data["forces"] = np.zeros([nframes, natoms + nghost, 3]) + sys.to_deepmd_npy("system_mixed_type", prec=np.float64) + np.savetxt("system_mixed_type/type_map.raw", real_type_map, fmt="%s") + np.save( + "system_mixed_type/set.000/real_atom_types.npy", + np.concatenate( + [ + tmpdata.atype.reshape(1, -1).repeat(nframes, 0), + np.full([nframes, nghost], -1, dtype=np.int32), + ], + axis=1, + ), + ) + np.save("system_mixed_type/set.000/fparam.npy", tmpdata.fparam) + np.save( + "system_mixed_type/set.000/aparam.npy", + np.concatenate( + [ + tmpdata.aparam.reshape([nframes, natoms, 2]), + np.zeros([nframes, nghost, 2]), + ], + axis=1, + ), + ) + + +def gen_data(nframes=1, mixed_type=False, virtual_type=False): if not mixed_type: gen_data_type_specific(nframes) + elif virtual_type: + gen_data_virtual_type(nframes) else: gen_data_mixed_type(nframes) diff --git a/source/tests/test_virtual_type.py b/source/tests/test_virtual_type.py index 575f6dc768..69ba87885c 100644 --- a/source/tests/test_virtual_type.py +++ b/source/tests/test_virtual_type.py @@ -4,15 +4,26 @@ import numpy as np from common import ( + gen_data, tests_path, ) +from deepmd.common import ( + j_loader, + j_must_have, +) from deepmd.infer import ( DeepPot, ) from deepmd.utils.convert import ( convert_pbtxt_to_pb, ) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.neighbor_stat import ( + NeighborStat, +) class TestVirtualType(unittest.TestCase): @@ -106,3 +117,24 @@ def test_infer_mixed_type(self): np.testing.assert_almost_equal(v1, v2) np.testing.assert_almost_equal(ae1[:nloc], ae2[nghost:]) np.testing.assert_almost_equal(av1[:nloc], av2[nghost:]) + + +class TestTrainVirtualType(unittest.TestCase): + def setUp(self) -> None: + gen_data(mixed_type=True, virtual_type=True) + + def test_data_mixed_type(self): + jfile = "water_se_atten_mixed_type.json" + jdata = j_loader(jfile) + + systems = j_must_have(jdata, "systems") + batch_size = 1 + test_size = 1 + rcut = j_must_have(jdata["model"]["descriptor"], "rcut") + type_map = j_must_have(jdata["model"], "type_map") + + data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map) + data.get_batch() + # neighbor stat + nei_stat = NeighborStat(len(type_map), rcut, one_type=True) + min_nbor_dist, max_nbor_size = nei_stat.get_stat(data) From 75f71056fd8a26c3b71a885dc2c3d83d6ae8d9af Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 4 Mar 2023 23:42:03 -0500 Subject: [PATCH 4/5] use tests.common.j_loader Signed-off-by: Jinzhe Zeng --- source/tests/test_virtual_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/tests/test_virtual_type.py b/source/tests/test_virtual_type.py index 69ba87885c..98ab84cf93 100644 --- a/source/tests/test_virtual_type.py +++ b/source/tests/test_virtual_type.py @@ -5,11 +5,11 @@ import numpy as np from common import ( gen_data, + j_loader, tests_path, ) from deepmd.common import ( - j_loader, j_must_have, ) from deepmd.infer import ( From 70350173117bf38e71a090a8e698525f73bbff24 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sun, 5 Mar 2023 00:53:37 -0500 Subject: [PATCH 5/5] support old TF Signed-off-by: Jinzhe Zeng --- deepmd/utils/neighbor_stat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py index 9d815dda72..eebe33d7a0 100644 --- a/deepmd/utils/neighbor_stat.py +++ b/deepmd/utils/neighbor_stat.py @@ -70,7 +70,7 @@ def builder(): if self.one_type: # all types = 0, natoms_vec = [natoms, natoms, natoms] t_type = tf.clip_by_value(t_type, -1, 0) - t_natoms = tf.repeat(t_natoms[0], 3) + t_natoms = tf.tile(t_natoms[0:1], [3]) _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat( place_holders["coord"],