deepmodeling · wanghan-iapcm · Mar 16, 2023 · Mar 4, 2023 · Mar 4, 2023 · Mar 5, 2023
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
@@ -91,6 +91,10 @@ def __init__(
                 self.type_idx_map = np.array(
                     sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)]
                 )
+                # padding for virtual atom
+                self.type_idx_map = np.append(
+                    self.type_idx_map, np.array([-1], dtype=np.int32)
+                )
             self.type_map = type_map
         if type_map is None and self.type_map is None and self.mixed_type:
             raise RuntimeError("mixed_type format must have type_map!")
@@ -489,8 +493,12 @@ def _load_set(self, set_name: DPPath):
                 [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())],
                 dtype=np.int32,
             ).T
+            ghost_nums = np.array(
+                [(real_type == -1).sum(axis=-1)],
+                dtype=np.int32,
+            ).T
             assert (
-                atom_type_nums.sum(axis=-1) == natoms
+                atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms
             ).all(), "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format(
                 set_name, self.get_ntypes()
             )

diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py
@@ -69,8 +69,8 @@ def builder():
             t_natoms = place_holders["natoms_vec"]
             if self.one_type:
                 # all types = 0, natoms_vec = [natoms, natoms, natoms]
-                t_type = tf.zeros_like(t_type, dtype=tf.int32)
-                t_natoms = tf.repeat(t_natoms[0], 3)
+                t_type = tf.clip_by_value(t_type, -1, 0)
+                t_natoms = tf.tile(t_natoms[0:1], [3])
 
             _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat(
                 place_holders["coord"],

diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
@@ -103,11 +103,13 @@ ID             | Property                         | File                | Requir
 ----------     | -------------------------------- | ------------------- | -------------------- | -----------------------  | -----------
 /              | Atom type indexes (place holder) | type.raw            | Required             | Natoms                   | All zeros to fake the type input
 type_map       | Atom type names                  | type_map.raw        | Required             | Ntypes                   | Atom names that map to atom type contained in all the frames, which is unnecessart to be contained in the periodic table
-type           | Atom type indexes of each frame  | real_atom_types.npy | Required             | Nframes \* Natoms        | Integers that describe atom types in each frame, corresponding to indexes in type_map
+type           | Atom type indexes of each frame  | real_atom_types.npy | Required             | Nframes \* Natoms        | Integers that describe atom types in each frame, corresponding to indexes in type_map. `-1` means virtual atoms.
 
 With these edited files, one can put together frames with the same `Natoms`, instead of the same formula (like `H2O`). Note that this `mixed_type` format only supports `se_atten` descriptor.
 
-The API to generate or transfer to `mixed_type` format will be uploaded on [dpdata](https://github.com/deepmodeling/dpdata) soon for a more convenient experience.
+To put frames with different `Natoms` into the same system, one can pad systems by adding virtual atoms whose type is `-1`. Virtual atoms do not contribute to any fitting property, so the atomic property of virtual atoms (e.g. forces) should be given zero.
+
+The API to generate or transfer to `mixed_type` format is available on [dpdata](https://github.com/deepmodeling/dpdata) for a more convenient experience.
 
 ## Training example
 Here we upload the AlMgCu example shown in the paper, you can download it here:

diff --git a/source/op/neighbor_stat.cc b/source/op/neighbor_stat.cc
@@ -180,8 +180,10 @@ class NeighborStatOp : public OpKernel {
 
 #pragma omp parallel for
     for (int ii = 0; ii < nloc; ii++) {
+      if (d_type[ii] < 0) continue;  // virtual atom
       for (int jj = 0; jj < d_nlist_r[ii].size(); jj++) {
         int type = d_type[d_nlist_r[ii][jj]];
+        if (type < 0) continue;  // virtual atom
         max_nbor_size[ii * ntypes + type] += 1;
         compute_t rij[3] = {
             d_coord3[d_nlist_r[ii][jj] * 3 + 0] - d_coord3[ii * 3 + 0],

diff --git a/source/tests/common.py b/source/tests/common.py
@@ -85,9 +85,62 @@ def gen_data_mixed_type(nframes=1):
     )
 
 
-def gen_data(nframes=1, mixed_type=False):
+def gen_data_virtual_type(nframes=1, nghost=4):
+    tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes)
+    sys = dpdata.LabeledSystem()
+    real_type_map = ["foo", "bar"]
+    sys.data["atom_names"] = ["X"]
+    sys.data["coords"] = tmpdata.coord
+    sys.data["atom_types"] = np.concatenate(
+        [
+            np.zeros_like(tmpdata.atype),
+            np.zeros([nghost], dtype=np.int32),
+        ],
+        axis=0,
+    )
+    sys.data["cells"] = tmpdata.cell
+    nframes = tmpdata.nframes
+    natoms = tmpdata.natoms
+    sys.data["coords"] = np.concatenate(
+        [
+            sys.data["coords"].reshape([nframes, natoms, 3]),
+            np.zeros([nframes, nghost, 3]),
+        ],
+        axis=1,
+    )
+    sys.data["cells"] = sys.data["cells"].reshape([nframes, 3, 3])
+    sys.data["energies"] = np.zeros([nframes, 1])
+    sys.data["forces"] = np.zeros([nframes, natoms + nghost, 3])
+    sys.to_deepmd_npy("system_mixed_type", prec=np.float64)
+    np.savetxt("system_mixed_type/type_map.raw", real_type_map, fmt="%s")
+    np.save(
+        "system_mixed_type/set.000/real_atom_types.npy",
+        np.concatenate(
+            [
+                tmpdata.atype.reshape(1, -1).repeat(nframes, 0),
+                np.full([nframes, nghost], -1, dtype=np.int32),
+            ],
+            axis=1,
+        ),
+    )
+    np.save("system_mixed_type/set.000/fparam.npy", tmpdata.fparam)
+    np.save(
+        "system_mixed_type/set.000/aparam.npy",
+        np.concatenate(
+            [
+                tmpdata.aparam.reshape([nframes, natoms, 2]),
+                np.zeros([nframes, nghost, 2]),
+            ],
+            axis=1,
+        ),
+    )
+
+
+def gen_data(nframes=1, mixed_type=False, virtual_type=False):
     if not mixed_type:
         gen_data_type_specific(nframes)
+    elif virtual_type:
+        gen_data_virtual_type(nframes)
     else:
         gen_data_mixed_type(nframes)
 

diff --git a/source/tests/test_virtual_type.py b/source/tests/test_virtual_type.py
@@ -4,15 +4,26 @@
 
 import numpy as np
 from common import (
+    gen_data,
+    j_loader,
     tests_path,
 )
 
+from deepmd.common import (
+    j_must_have,
+)
 from deepmd.infer import (
     DeepPot,
 )
 from deepmd.utils.convert import (
     convert_pbtxt_to_pb,
 )
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.neighbor_stat import (
+    NeighborStat,
+)
 
 
 class TestVirtualType(unittest.TestCase):
@@ -106,3 +117,24 @@ def test_infer_mixed_type(self):
         np.testing.assert_almost_equal(v1, v2)
         np.testing.assert_almost_equal(ae1[:nloc], ae2[nghost:])
         np.testing.assert_almost_equal(av1[:nloc], av2[nghost:])
+
+
+class TestTrainVirtualType(unittest.TestCase):
+    def setUp(self) -> None:
+        gen_data(mixed_type=True, virtual_type=True)
+
+    def test_data_mixed_type(self):
+        jfile = "water_se_atten_mixed_type.json"
+        jdata = j_loader(jfile)
+
+        systems = j_must_have(jdata, "systems")
+        batch_size = 1
+        test_size = 1
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+        type_map = j_must_have(jdata["model"], "type_map")
+
+        data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map)
+        data.get_batch()
+        # neighbor stat
+        nei_stat = NeighborStat(len(type_map), rcut, one_type=True)
+        min_nbor_dist, max_nbor_size = nei_stat.get_stat(data)