diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index bb34fe0bf6..c497e86e1d 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -33,7 +33,7 @@ jobs:
             platform_id: manylinux_aarch64
             dp_variant: cpu
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           submodules: true
           # https://github.com/pypa/setuptools_scm/issues/480
@@ -42,13 +42,13 @@ jobs:
         name: Setup QEMU
         if: matrix.platform_id == 'manylinux_aarch64'
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.11.3
+        uses: pypa/cibuildwheel@v2.12.1
         env:
           CIBW_BUILD_VERBOSITY: 1
           CIBW_ARCHS: all
           CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }}
           DP_VARIANT: ${{ matrix.dp_variant }}
-      - uses: actions/upload-artifact@v2
+      - uses: actions/upload-artifact@v3
         with:
           path: ./wheelhouse/*.whl
   build_sdist:
@@ -79,7 +79,7 @@ jobs:
         with:
           name: artifact
           path: dist
-      - uses: pypa/gh-action-pypi-publish@v4
+      - uses: pypa/gh-action-pypi-publish@release/v1
         with:
           user: __token__
           password: ${{ secrets.pypi_password }}
@@ -103,12 +103,12 @@ jobs:
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
+        uses: docker/metadata-action@507c2f2dc502c992ad446e3d7a5dfbe311567a96
         with:
           images: ghcr.io/deepmodeling/deepmd-kit
 
       - name: Build and push Docker image
-        uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
+        uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
         with:
           context: source/install/docker
           push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' }}
diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml
index e57c0a30e1..6cc5ed8dc8 100644
--- a/.github/workflows/package_c.yml
+++ b/.github/workflows/package_c.yml
@@ -9,14 +9,14 @@ jobs:
     name: Build C library
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Package C library
         run: ./source/install/docker_package_c.sh
       - name: Test C library
         run: ./source/install/docker_test_package_c.sh
       # for download and debug
       - name: Upload artifact
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v3
         with:
           path: ./libdeepmd_c.tar.gz
       - name: Release
diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index d49c329353..2bff523608 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -96,8 +96,8 @@ class DescrptSeA(DescrptSe):
     .. math::
         (\mathcal{G}^i)_j = \mathcal{N}(s(r_{ji}))
 
-    :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2`$` columns of
-    :math:`\mathcal{G}^i`$`. The equation of embedding network :math:`\mathcal{N}` can be found at
+    :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of
+    :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at
     :meth:`deepmd.utils.network.embedding_net`.
 
     Parameters
diff --git a/deepmd/descriptor/se_a_mask.py b/deepmd/descriptor/se_a_mask.py
index 00b23232f1..93256e3910 100644
--- a/deepmd/descriptor/se_a_mask.py
+++ b/deepmd/descriptor/se_a_mask.py
@@ -70,8 +70,8 @@ class DescrptSeAMask(DescrptSeA):
     .. math::
         (\mathcal{G}^i)_j = \mathcal{N}(s(r_{ji}))
 
-    :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2`$` columns of
-    :math:`\mathcal{G}^i`$`. The equation of embedding network :math:`\mathcal{N}` can be found at
+    :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of
+    :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at
     :meth:`deepmd.utils.network.embedding_net`.
     Specially for descriptor se_a_mask is a concise implementation of se_a.
     The difference is that se_a_mask only considered a non-pbc system.
diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py
index 356c1fae2b..ba8fa9adfa 100644
--- a/deepmd/descriptor/se_atten.py
+++ b/deepmd/descriptor/se_atten.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import (
     List,
     Optional,
@@ -67,6 +68,8 @@ class DescrptSeAtten(DescrptSeA):
     exclude_types : List[List[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+    set_davg_zero
+            Set the shift of embedding net input to zero.
     activation_function
             The activation function in the embedding net. Supported options are |ACTIVATION_FN|
     precision
@@ -97,6 +100,7 @@ def __init__(
         trainable: bool = True,
         seed: Optional[int] = None,
         type_one_side: bool = True,
+        set_davg_zero: bool = True,
         exclude_types: List[List[int]] = [],
         activation_function: str = "tanh",
         precision: str = "default",
@@ -107,6 +111,11 @@ def __init__(
         attn_mask: bool = False,
         multi_task: bool = False,
     ) -> None:
+        if not set_davg_zero:
+            warnings.warn(
+                "Set 'set_davg_zero' False in descriptor 'se_atten' "
+                "may cause unexpected incontinuity during model inference!"
+            )
         DescrptSeA.__init__(
             self,
             rcut,
@@ -119,7 +128,7 @@ def __init__(
             seed=seed,
             type_one_side=type_one_side,
             exclude_types=exclude_types,
-            set_davg_zero=True,
+            set_davg_zero=set_davg_zero,
             activation_function=activation_function,
             precision=precision,
             uniform_seed=uniform_seed,
diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index d65a3a984b..c9f70117f7 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -59,9 +59,9 @@ class EnerFitting(Fitting):
         \mathbf{y}=\mathcal{L}(\mathbf{x};\mathbf{w},\mathbf{b})=
             \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b})
 
-    where :math:`\mathbf{x} \in \mathbb{R}^{N_1}`$` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}`
+    where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}`
     is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and
-    :math:`\mathbf{b} \in \mathbb{R}^{N_2}`$` are weights and biases, respectively,
+    :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively,
     both of which are trainable if `trainable[i]` is `True`. :math:`\boldsymbol{\phi}`
     is the activation function.
 
@@ -71,9 +71,9 @@ class EnerFitting(Fitting):
         \mathbf{y}=\mathcal{L}^{(n)}(\mathbf{x};\mathbf{w},\mathbf{b})=
             \mathbf{x}^T\mathbf{w}+\mathbf{b}
 
-    where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}`$` is the input vector and :math:`\mathbf{y} \in \mathbb{R}`
+    where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}`
     is the output scalar. :math:`\mathbf{w} \in \mathbb{R}^{N_{n-1}}` and
-    :math:`\mathbf{b} \in \mathbb{R}`$` are weights and bias, respectively,
+    :math:`\mathbf{b} \in \mathbb{R}` are weights and bias, respectively,
     both of which are trainable if `trainable[n]` is `True`.
 
     Parameters
@@ -549,13 +549,14 @@ def build(
                 aparam = tf.reshape(aparam, [-1, self.numb_aparam * natoms[0]])
 
         atype_nall = tf.reshape(atype, [-1, natoms[1]])
-        atype_filter = tf.cast(atype_nall >= 0, GLOBAL_TF_FLOAT_PRECISION)
+        self.atype_nloc = tf.slice(
+            atype_nall, [0, 0], [-1, natoms[0]]
+        )  ## lammps will make error
+        atype_filter = tf.cast(self.atype_nloc >= 0, GLOBAL_TF_FLOAT_PRECISION)
+        self.atype_nloc = tf.reshape(self.atype_nloc, [-1])
         # prevent embedding_lookup error,
         # but the filter will be applied anyway
-        atype_nall = tf.clip_by_value(atype_nall, 0, self.ntypes - 1)
-        self.atype_nloc = tf.reshape(
-            tf.slice(atype_nall, [0, 0], [-1, natoms[0]]), [-1]
-        )  ## lammps will make error
+        self.atype_nloc = tf.clip_by_value(self.atype_nloc, 0, self.ntypes - 1)
         if type_embedding is not None:
             atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc)
         else:
diff --git a/deepmd/loggers/loggers.py b/deepmd/loggers/loggers.py
index bb2e5f2dad..4249d7fb3d 100644
--- a/deepmd/loggers/loggers.py
+++ b/deepmd/loggers/loggers.py
@@ -229,6 +229,8 @@ def set_log_handles(
 
     ch.setLevel(level)
     ch.addFilter(_AppFilter())
+    # clean old handlers before adding new one
+    root_log.handlers.clear()
     root_log.addHandler(ch)
 
     # * add file handler ***************************************************************
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 93135304a9..9fb977e5eb 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -325,6 +325,7 @@ def descrpt_se_atten_args():
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_trainable = "If the parameters in the embedding net is trainable"
     doc_seed = "Random seed for parameter initialization"
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_attn = "The length of hidden vectors in attention layers"
     doc_attn_layer = "The number of attention layers"
@@ -361,6 +362,9 @@ def descrpt_se_atten_args():
         Argument(
             "exclude_types", list, optional=True, default=[], doc=doc_exclude_types
         ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero
+        ),
         Argument("attn", int, optional=True, default=128, doc=doc_attn),
         Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
         Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr),
@@ -972,7 +976,8 @@ def training_data_args():  # ! added by Ziyao: new specification style for data
 - list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
 - int: all {link_sys} use the same batch size.\n\n\
 - string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
-- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.'
+- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.\n\n\
+- string "mixed:N": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.'
     doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
 - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
 - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index 537c5b4868..66870e85b2 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -91,6 +91,10 @@ def __init__(
                 self.type_idx_map = np.array(
                     sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)]
                 )
+                # padding for virtual atom
+                self.type_idx_map = np.append(
+                    self.type_idx_map, np.array([-1], dtype=np.int32)
+                )
             self.type_map = type_map
         if type_map is None and self.type_map is None and self.mixed_type:
             raise RuntimeError("mixed_type format must have type_map!")
@@ -489,8 +493,12 @@ def _load_set(self, set_name: DPPath):
                 [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())],
                 dtype=np.int32,
             ).T
+            ghost_nums = np.array(
+                [(real_type == -1).sum(axis=-1)],
+                dtype=np.int32,
+            ).T
             assert (
-                atom_type_nums.sum(axis=-1) == natoms
+                atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms
             ).all(), "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format(
                 set_name, self.get_ntypes()
             )
diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index 28dc799bf8..d87219fcc9 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -112,6 +112,7 @@ def __init__(
         # batch size
         self.batch_size = batch_size
         is_auto_bs = False
+        self.mixed_systems = False
         if isinstance(self.batch_size, int):
             self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int)
         elif isinstance(self.batch_size, str):
@@ -121,9 +122,16 @@ def __init__(
                 rule = 32
                 if len(words) == 2:
                     rule = int(words[1])
+                self.batch_size = self._make_auto_bs(rule)
+            elif "mixed" == words[0]:
+                self.mixed_systems = True
+                if len(words) == 2:
+                    rule = int(words[1])
+                else:
+                    raise RuntimeError("batch size must be specified for mixed systems")
+                self.batch_size = rule * np.ones(self.nsystems, dtype=int)
             else:
                 raise RuntimeError("unknown batch_size rule " + words[0])
-            self.batch_size = self._make_auto_bs(rule)
         elif isinstance(self.batch_size, list):
             pass
         else:
@@ -361,7 +369,7 @@ def _get_sys_probs(self, sys_probs, auto_prob_style):  # depreciated
             prob = self._process_sys_probs(sys_probs)
         return prob
 
-    def get_batch(self, sys_idx: Optional[int] = None):
+    def get_batch(self, sys_idx: Optional[int] = None) -> dict:
         # batch generation style altered by Ziyao Li:
         # one should specify the "sys_prob" and "auto_prob_style" params
         # via set_sys_prob() function. The sys_probs this function uses is
@@ -375,9 +383,36 @@ def get_batch(self, sys_idx: Optional[int] = None):
             The index of system from which the batch is get.
             If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
             If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
+            This option does not work for mixed systems.
+
+        Returns
+        -------
+        dict
+            The batch data
         """
         if not hasattr(self, "default_mesh"):
             self._make_default_mesh()
+        if not self.mixed_systems:
+            b_data = self.get_batch_standard(sys_idx)
+        else:
+            b_data = self.get_batch_mixed()
+        return b_data
+
+    def get_batch_standard(self, sys_idx: Optional[int] = None) -> dict:
+        """Get a batch of data from the data systems in the standard way.
+
+        Parameters
+        ----------
+        sys_idx : int
+            The index of system from which the batch is get.
+            If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
+            If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
+
+        Returns
+        -------
+        dict
+            The batch data
+        """
         if sys_idx is not None:
             self.pick_idx = sys_idx
         else:
@@ -390,6 +425,73 @@ def get_batch(self, sys_idx: Optional[int] = None):
         b_data["default_mesh"] = self.default_mesh[self.pick_idx]
         return b_data
 
+    def get_batch_mixed(self) -> dict:
+        """Get a batch of data from the data systems in the mixed way.
+
+        Returns
+        -------
+        dict
+            The batch data
+        """
+        # mixed systems have a global batch size
+        batch_size = self.batch_size[0]
+        batch_data = []
+        for _ in range(batch_size):
+            self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs)
+            bb_data = self.data_systems[self.pick_idx].get_batch(1)
+            bb_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
+            bb_data["default_mesh"] = self.default_mesh[self.pick_idx]
+            batch_data.append(bb_data)
+        b_data = self._merge_batch_data(batch_data)
+        return b_data
+
+    def _merge_batch_data(self, batch_data: List[dict]) -> dict:
+        """Merge batch data from different systems.
+
+        Parameters
+        ----------
+        batch_data : list of dict
+            A list of batch data from different systems.
+
+        Returns
+        -------
+        dict
+            The merged batch data.
+        """
+        b_data = {}
+        max_natoms = max(bb["natoms_vec"][0] for bb in batch_data)
+        # natoms_vec
+        natoms_vec = np.zeros(2 + self.get_ntypes(), dtype=int)
+        natoms_vec[0:3] = max_natoms
+        b_data["natoms_vec"] = natoms_vec
+        # real_natoms_vec
+        real_natoms_vec = np.vstack([bb["natoms_vec"] for bb in batch_data])
+        b_data["real_natoms_vec"] = real_natoms_vec
+        # type
+        type_vec = np.full((len(batch_data), max_natoms), -1, dtype=int)
+        for ii, bb in enumerate(batch_data):
+            type_vec[ii, : bb["type"].shape[1]] = bb["type"][0]
+        b_data["type"] = type_vec
+        # default_mesh
+        default_mesh = np.mean([bb["default_mesh"] for bb in batch_data], axis=0)
+        b_data["default_mesh"] = default_mesh
+        # other data
+        data_dict = self.get_data_dict(0)
+        for kk, vv in data_dict.items():
+            if kk not in batch_data[0]:
+                continue
+            b_data["find_" + kk] = batch_data[0]["find_" + kk]
+            if not vv["atomic"]:
+                b_data[kk] = np.concatenate([bb[kk] for bb in batch_data], axis=0)
+            else:
+                b_data[kk] = np.zeros(
+                    (len(batch_data), max_natoms * vv["ndof"] * vv["repeat"]),
+                    dtype=batch_data[0][kk].dtype,
+                )
+                for ii, bb in enumerate(batch_data):
+                    b_data[kk][ii, : bb[kk].shape[1]] = bb[kk][0]
+        return b_data
+
     # ! altered by Marián Rynik
     def get_test(self, sys_idx: Optional[int] = None, n_test: int = -1):  # depreciated
         """Get test data from the the data systems.
diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py
index b07614fd57..eebe33d7a0 100644
--- a/deepmd/utils/neighbor_stat.py
+++ b/deepmd/utils/neighbor_stat.py
@@ -69,8 +69,8 @@ def builder():
             t_natoms = place_holders["natoms_vec"]
             if self.one_type:
                 # all types = 0, natoms_vec = [natoms, natoms, natoms]
-                t_type = tf.zeros_like(t_type, dtype=tf.int32)
-                t_natoms = tf.repeat(t_natoms[0], 3)
+                t_type = tf.clip_by_value(t_type, -1, 0)
+                t_natoms = tf.tile(t_natoms[0:1], [3])
 
             _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat(
                 place_holders["coord"],
diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py
index e5ce249f45..a718da0b26 100644
--- a/deepmd/utils/network.py
+++ b/deepmd/utils/network.py
@@ -144,9 +144,9 @@ def embedding_net(
             \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b}), & \text{otherwise} \\
         \end{cases}
 
-    where :math:`\mathbf{x} \in \mathbb{R}^{N_1}`$` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}`
+    where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}`
     is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and
-    :math:`\mathbf{b} \in \mathbb{R}^{N_2}`$` are weights and biases, respectively,
+    :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively,
     both of which are trainable if `trainable` is `True`. :math:`\boldsymbol{\phi}`
     is the activation function.
 
diff --git a/doc/conf.py b/doc/conf.py
index e338274f0e..2fd0dcdc33 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -305,6 +305,10 @@ def setup(app):
     "fontpkg": r"""
 \usepackage{fontspec}
 \setmainfont{Symbola}
+""",
+    "preamble": r"""
+\usepackage{enumitem}
+\setlistdepth{99}
 """,
 }
 
diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
index e91ae646c4..8b006346a9 100644
--- a/doc/model/train-se-atten.md
+++ b/doc/model/train-se-atten.md
@@ -103,11 +103,13 @@ ID             | Property                         | File                | Requir
 ----------     | -------------------------------- | ------------------- | -------------------- | -----------------------  | -----------
 /              | Atom type indexes (place holder) | type.raw            | Required             | Natoms                   | All zeros to fake the type input
 type_map       | Atom type names                  | type_map.raw        | Required             | Ntypes                   | Atom names that map to atom type contained in all the frames, which is unnecessart to be contained in the periodic table
-type           | Atom type indexes of each frame  | real_atom_types.npy | Required             | Nframes \* Natoms        | Integers that describe atom types in each frame, corresponding to indexes in type_map
+type           | Atom type indexes of each frame  | real_atom_types.npy | Required             | Nframes \* Natoms        | Integers that describe atom types in each frame, corresponding to indexes in type_map. `-1` means virtual atoms.
 
 With these edited files, one can put together frames with the same `Natoms`, instead of the same formula (like `H2O`). Note that this `mixed_type` format only supports `se_atten` descriptor.
 
-The API to generate or transfer to `mixed_type` format will be uploaded on [dpdata](https://github.com/deepmodeling/dpdata) soon for a more convenient experience.
+To put frames with different `Natoms` into the same system, one can pad systems by adding virtual atoms whose type is `-1`. Virtual atoms do not contribute to any fitting property, so the atomic property of virtual atoms (e.g. forces) should be given zero.
+
+The API to generate or transfer to `mixed_type` format is available on [dpdata](https://github.com/deepmodeling/dpdata) for a more convenient experience.
 
 ## Training example
 Here we upload the AlMgCu example shown in the paper, you can download it here:
diff --git a/examples/nopbc/mixed/input.json b/examples/nopbc/mixed/input.json
new file mode 100644
index 0000000000..ab49b4e442
--- /dev/null
+++ b/examples/nopbc/mixed/input.json
@@ -0,0 +1,71 @@
+{
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "C",
+      "H",
+      "O"
+    ],
+    "descriptor": {
+      "type": "se_atten",
+      "sel": 120,
+      "rcut_smth": 1.00,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 12,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
+
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 4000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
+
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
+
+  "training": {
+    "training_data": {
+      "systems": "../data/",
+      "batch_size": "mixed:4",
+      "_comment": "that's all"
+    },
+    "numb_steps": 4000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
+
+  "_comment": "that's all"
+}
diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt
index fdc7fa7ea2..059e72b9a8 100644
--- a/source/op/CMakeLists.txt
+++ b/source/op/CMakeLists.txt
@@ -56,6 +56,9 @@ add_library(${LIB_DEEPMD_OP} MODULE ${OP_SRC} ${OP_REMAPPER_SRC})
 # link: libdeepmd libtensorflow_cc libtensorflow_framework
 target_link_libraries(${LIB_DEEPMD_OP} PRIVATE TensorFlow::tensorflow_framework)
 target_link_libraries(${LIB_DEEPMD_OP} PRIVATE ${LIB_DEEPMD})
+if(Protobuf_LIBRARY)
+  target_link_libraries(${LIB_DEEPMD_OP} PRIVATE ${Protobuf_LIBRARY})
+endif()
 if(APPLE)
   set_target_properties(
     ${LIB_DEEPMD_OP} PROPERTIES INSTALL_RPATH
diff --git a/source/op/neighbor_stat.cc b/source/op/neighbor_stat.cc
index 6c58096824..ba067786f9 100644
--- a/source/op/neighbor_stat.cc
+++ b/source/op/neighbor_stat.cc
@@ -180,8 +180,10 @@ class NeighborStatOp : public OpKernel {
 
 #pragma omp parallel for
     for (int ii = 0; ii < nloc; ii++) {
+      if (d_type[ii] < 0) continue;  // virtual atom
       for (int jj = 0; jj < d_nlist_r[ii].size(); jj++) {
         int type = d_type[d_nlist_r[ii][jj]];
+        if (type < 0) continue;  // virtual atom
         max_nbor_size[ii * ntypes + type] += 1;
         compute_t rij[3] = {
             d_coord3[d_nlist_r[ii][jj] * 3 + 0] - d_coord3[ii * 3 + 0],
diff --git a/source/tests/common.py b/source/tests/common.py
index d0dd2d2608..8fc303c1d7 100644
--- a/source/tests/common.py
+++ b/source/tests/common.py
@@ -85,9 +85,62 @@ def gen_data_mixed_type(nframes=1):
     )
 
 
-def gen_data(nframes=1, mixed_type=False):
+def gen_data_virtual_type(nframes=1, nghost=4):
+    tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes)
+    sys = dpdata.LabeledSystem()
+    real_type_map = ["foo", "bar"]
+    sys.data["atom_names"] = ["X"]
+    sys.data["coords"] = tmpdata.coord
+    sys.data["atom_types"] = np.concatenate(
+        [
+            np.zeros_like(tmpdata.atype),
+            np.zeros([nghost], dtype=np.int32),
+        ],
+        axis=0,
+    )
+    sys.data["cells"] = tmpdata.cell
+    nframes = tmpdata.nframes
+    natoms = tmpdata.natoms
+    sys.data["coords"] = np.concatenate(
+        [
+            sys.data["coords"].reshape([nframes, natoms, 3]),
+            np.zeros([nframes, nghost, 3]),
+        ],
+        axis=1,
+    )
+    sys.data["cells"] = sys.data["cells"].reshape([nframes, 3, 3])
+    sys.data["energies"] = np.zeros([nframes, 1])
+    sys.data["forces"] = np.zeros([nframes, natoms + nghost, 3])
+    sys.to_deepmd_npy("system_mixed_type", prec=np.float64)
+    np.savetxt("system_mixed_type/type_map.raw", real_type_map, fmt="%s")
+    np.save(
+        "system_mixed_type/set.000/real_atom_types.npy",
+        np.concatenate(
+            [
+                tmpdata.atype.reshape(1, -1).repeat(nframes, 0),
+                np.full([nframes, nghost], -1, dtype=np.int32),
+            ],
+            axis=1,
+        ),
+    )
+    np.save("system_mixed_type/set.000/fparam.npy", tmpdata.fparam)
+    np.save(
+        "system_mixed_type/set.000/aparam.npy",
+        np.concatenate(
+            [
+                tmpdata.aparam.reshape([nframes, natoms, 2]),
+                np.zeros([nframes, nghost, 2]),
+            ],
+            axis=1,
+        ),
+    )
+
+
+def gen_data(nframes=1, mixed_type=False, virtual_type=False):
     if not mixed_type:
         gen_data_type_specific(nframes)
+    elif virtual_type:
+        gen_data_virtual_type(nframes)
     else:
         gen_data_mixed_type(nframes)
 
diff --git a/source/tests/test_deepmd_data_sys.py b/source/tests/test_deepmd_data_sys.py
index d24291114d..9df48c3f57 100644
--- a/source/tests/test_deepmd_data_sys.py
+++ b/source/tests/test_deepmd_data_sys.py
@@ -7,6 +7,9 @@
 from deepmd.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
+from deepmd.utils import (
+    random,
+)
 from deepmd.utils.data_system import (
     DeepmdDataSystem,
 )
@@ -398,3 +401,24 @@ def test_sys_prob_floating_point_error(self):
         ]
         ds = DeepmdDataSystem(self.sys_name, 3, 2, 2.0, sys_probs=sys_probs)
         self.assertEqual(ds.sys_probs.size, len(sys_probs))
+
+    def test_get_mixed_batch(self):
+        """test get_batch with mixed system."""
+        batch_size = "mixed:3"
+        test_size = 2
+
+        ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
+        ds.add("test", self.test_ndof, atomic=True, must=True)
+        ds.add("null", self.test_ndof, atomic=True, must=False)
+        random.seed(114514)
+        # with this seed, the batch is fixed, with natoms 3, 6, 6
+        data = ds.get_batch()
+        np.testing.assert_equal(data["natoms_vec"], np.array([6, 6, 6, 0, 0]))
+        np.testing.assert_equal(data["real_natoms_vec"][:, 0], np.array([3, 6, 6]))
+        np.testing.assert_equal(data["type"][0, 3:6], np.array([-1, -1, -1]))
+        np.testing.assert_equal(data["coord"][0, 9:18], np.zeros(9))
+        for kk in ("test", "null"):
+            np.testing.assert_equal(
+                data[kk][0, 3 * self.test_ndof : 6 * self.test_ndof],
+                np.zeros(3 * self.test_ndof),
+            )
diff --git a/source/tests/test_virtual_type.py b/source/tests/test_virtual_type.py
index 575f6dc768..98ab84cf93 100644
--- a/source/tests/test_virtual_type.py
+++ b/source/tests/test_virtual_type.py
@@ -4,15 +4,26 @@
 
 import numpy as np
 from common import (
+    gen_data,
+    j_loader,
     tests_path,
 )
 
+from deepmd.common import (
+    j_must_have,
+)
 from deepmd.infer import (
     DeepPot,
 )
 from deepmd.utils.convert import (
     convert_pbtxt_to_pb,
 )
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.neighbor_stat import (
+    NeighborStat,
+)
 
 
 class TestVirtualType(unittest.TestCase):
@@ -106,3 +117,24 @@ def test_infer_mixed_type(self):
         np.testing.assert_almost_equal(v1, v2)
         np.testing.assert_almost_equal(ae1[:nloc], ae2[nghost:])
         np.testing.assert_almost_equal(av1[:nloc], av2[nghost:])
+
+
+class TestTrainVirtualType(unittest.TestCase):
+    def setUp(self) -> None:
+        gen_data(mixed_type=True, virtual_type=True)
+
+    def test_data_mixed_type(self):
+        jfile = "water_se_atten_mixed_type.json"
+        jdata = j_loader(jfile)
+
+        systems = j_must_have(jdata, "systems")
+        batch_size = 1
+        test_size = 1
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+        type_map = j_must_have(jdata["model"], "type_map")
+
+        data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map)
+        data.get_batch()
+        # neighbor stat
+        nei_stat = NeighborStat(len(type_map), rcut, one_type=True)
+        min_nbor_dist, max_nbor_size = nei_stat.get_stat(data)