diff --git a/deepmd/entrypoints/__init__.py b/deepmd/entrypoints/__init__.py
index 2f8bae9bbd..3beceace3a 100644
--- a/deepmd/entrypoints/__init__.py
+++ b/deepmd/entrypoints/__init__.py
@@ -7,6 +7,7 @@
 from .test import test
 from .train import train
 from .transfer import transfer
+from ..infer.model_devi import make_model_devi
 
 __all__ = [
     "config",
@@ -17,4 +18,5 @@
     "transfer",
     "compress",
     "doc_train_input",
+    "make_model_devi"
 ]
diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
index 23bb30ccde..e0c1d8d4af 100644
--- a/deepmd/entrypoints/main.py
+++ b/deepmd/entrypoints/main.py
@@ -13,6 +13,7 @@
     test,
     train,
     transfer,
+    make_model_devi,
 )
 from deepmd.loggers import set_log_handles
 
@@ -311,6 +312,46 @@ def parse_args(args: Optional[List[str]] = None):
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
+    # * make model deviation ***********************************************************
+    parser_model_devi = subparsers.add_parser(
+        "model-devi",
+        parents=[parser_log],
+        help="calculate model deviation",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser_model_devi.add_argument(
+        "-m",
+        "--models",
+        default=["graph.000.pb", "graph.001.pb", "graph.002.pb", "graph.003.pb"],
+        nargs="+",
+        type=str,
+        help="Frozen models file to import",
+    )
+    parser_model_devi.add_argument(
+        "-s",
+        "--system",
+        default=".",
+        type=str,
+        help="The system directory, not support recursive detection.",
+    )
+    parser_model_devi.add_argument(
+        "-S", "--set-prefix", default="set", type=str, help="The set prefix"
+    )
+    parser_model_devi.add_argument(
+        "-o",
+        "--output", 
+        default="model_devi.out", 
+        type=str, 
+        help="The output file for results of model deviation"
+    )
+    parser_model_devi.add_argument(
+        "-f",
+        "--frequency",
+        default=1,
+        type=int,
+        help="The trajectory frequency of the system"
+    )
+
     parsed_args = parser.parse_args(args=args)
     if parsed_args.command is None:
         parser.print_help()
@@ -352,6 +393,8 @@ def main():
         compress(**dict_args)
     elif args.command == "doc-train-input":
         doc_train_input()
+    elif args.command == "model-devi":
+        make_model_devi(**dict_args)
     elif args.command is None:
         pass
     else:
diff --git a/deepmd/infer/__init__.py b/deepmd/infer/__init__.py
index c0c7c73182..0eb3bddcea 100644
--- a/deepmd/infer/__init__.py
+++ b/deepmd/infer/__init__.py
@@ -10,6 +10,7 @@
 from .deep_pot import DeepPot
 from .deep_wfc import DeepWFC
 from .ewald_recp import EwaldRecp
+from .model_devi import calc_model_devi
 
 __all__ = [
     "DeepPotential",
@@ -21,6 +22,7 @@
     "DeepWFC",
     "DipoleChargeModifier",
     "EwaldRecp",
+    "calc_model_devi"
 ]
 
 
diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py
new file mode 100644
index 0000000000..0fb4faa326
--- /dev/null
+++ b/deepmd/infer/model_devi.py
@@ -0,0 +1,184 @@
+import numpy as np
+from .deep_pot import DeepPot
+from ..utils.data import DeepmdData
+        
+
+def calc_model_devi_f(fs):
+    '''
+    fs : numpy.ndarray, size of `n_models x n_frames x n_atoms x 3`
+    '''
+    fs_devi = np.linalg.norm(np.std(fs, axis=0), axis=-1)
+    max_devi_f = np.max(fs_devi, axis=-1)
+    min_devi_f = np.min(fs_devi, axis=-1)
+    avg_devi_f = np.mean(fs_devi, axis=-1)
+    return max_devi_f, min_devi_f, avg_devi_f
+
+def calc_model_devi_e(es):
+    '''
+    es : numpy.ndarray, size of `n_models x n_frames x n_atoms
+    '''
+    es_devi = np.std(es, axis=0)
+    max_devi_e = np.max(es_devi, axis=1)
+    min_devi_e = np.min(es_devi, axis=1)
+    avg_devi_e = np.mean(es_devi, axis=1)
+    return max_devi_e, min_devi_e, avg_devi_e
+
+def calc_model_devi_v(vs):
+    '''
+    vs : numpy.ndarray, size of `n_models x n_frames x 9`
+    '''
+    vs_devi = np.std(vs, axis=0)
+    max_devi_v = np.max(vs_devi, axis=-1)
+    min_devi_v = np.min(vs_devi, axis=-1)
+    avg_devi_v = np.linalg.norm(vs_devi, axis=-1) / 3
+    return max_devi_v, min_devi_v, avg_devi_v
+
+def write_model_devi_out(devi, fname):
+    '''
+    devi : numpy.ndarray, the first column is the steps index
+    fname : str, the file name to dump
+    '''
+    assert devi.shape[1] == 7
+    header = "%10s" % "step"
+    for item in 'vf':
+        header += "%19s%19s%19s" % (f"max_devi_{item}", f"min_devi_{item}", f"avg_devi_{item}")
+    np.savetxt(fname,
+               devi,
+               fmt=['%12d'] + ['%19.6e' for _ in range(6)],
+               delimiter='',
+               header=header)
+    return devi
+
+def _check_tmaps(tmaps, ref_tmap=None):
+    '''
+    Check whether type maps are identical
+    '''
+    assert isinstance(tmaps, list)
+    if ref_tmap is None:
+        ref_tmap = tmaps[0]
+    assert isinstance(ref_tmap, list)
+
+    flag = True
+    for tmap in tmaps:
+        if tmap != ref_tmap:
+            flag = False
+            break
+    return flag
+
+def calc_model_devi(coord,
+                    box,
+                    atype,
+                    models,
+                    fname=None,
+                    frequency=1, 
+                    nopbc=True):
+    '''
+    Python interface to calculate model deviation
+
+    Parameters:
+    -----------
+    coord : numpy.ndarray, `n_frames x n_atoms x 3`
+        Coordinates of system to calculate
+    box : numpy.ndarray or None, `n_frames x 3 x 3`
+        Box to specify periodic boundary condition. If None, no pbc will be used
+    atype : numpy.ndarray, `n_atoms x 1`
+        Atom types
+    models : list of DeepPot models
+        Models used to evaluate deviation
+    fname : str or None
+        File to dump results, default None
+    frequency : int
+        Steps between frames (if the system is given by molecular dynamics engine), default 1
+    nopbc : bool
+        Whether to use pbc conditions
+    
+    Return:
+    -------
+    model_devi : numpy.ndarray, `n_frames x 7`
+        Model deviation results. The first column is index of steps, the other 6 columns are
+        max_devi_v, min_devi_v, avg_devi_v, max_devi_f, min_devi_f, avg_devi_f.
+    '''
+    if nopbc:
+        box = None
+
+    forces = []
+    virials = []
+    for dp in models:
+        ret = dp.eval(
+            coord,
+            box,
+            atype,
+        )
+        forces.append(ret[1])
+        virials.append(ret[2] / len(atype))
+    
+    forces = np.array(forces)
+    virials = np.array(virials)
+    
+    devi = [np.arange(coord.shape[0]) * frequency]
+    devi += list(calc_model_devi_v(virials))
+    devi += list(calc_model_devi_f(forces))
+    devi = np.vstack(devi).T
+    if fname:
+        write_model_devi_out(devi, fname)
+    return devi
+    
+def make_model_devi(
+    *,
+    models: list,
+    system: str,
+    set_prefix: str,
+    output: str,
+    frequency: int,
+    **kwargs
+):
+    '''
+    Make model deviation calculation
+
+    Parameters
+    ----------
+
+    models: list
+        A list of paths of models to use for making model deviation
+    system: str
+        The path of system to make model deviation calculation
+    set_prefix: str
+        The set prefix of the system
+    output: str
+        The output file for model deviation results
+    frequency: int
+        The number of steps that elapse between writing coordinates 
+        in a trajectory by a MD engine (such as Gromacs / Lammps).
+        This paramter is used to determine the index in the output file.
+    '''
+    # init models
+    dp_models = [DeepPot(model) for model in models]
+
+    # check type maps
+    tmaps = [dp.get_type_map() for dp in dp_models]
+    if _check_tmaps(tmaps):
+        tmap = tmaps[0]
+    else:
+        raise RuntimeError("The models does not have the same type map.")
+    
+    # create data-system
+    dp_data = DeepmdData(system, set_prefix, shuffle_test=False, type_map=tmap)
+    if dp_data.pbc:
+        nopbc = False
+    else:
+        nopbc = True
+
+    data_sets = [dp_data._load_set(set_name) for set_name in dp_data.dirs]
+    nframes_tot = 0
+    devis = []
+    for data in data_sets:
+        coord = data["coord"]
+        box = data["box"]
+        atype = data["type"][0] 
+        devi = calc_model_devi(coord, box, atype, dp_models, nopbc=nopbc)
+        nframes_tot += coord.shape[0]
+        devis.append(devi)
+    devis = np.vstack(devis)
+    devis[:, 0] = np.arange(nframes_tot) * frequency
+    write_model_devi_out(devis, output)
+    return devis
diff --git a/doc/getting-started.md b/doc/getting-started.md
index 12948f0775..a355fc9836 100644
--- a/doc/getting-started.md
+++ b/doc/getting-started.md
@@ -182,6 +182,45 @@ optional arguments:
                         accuracy
 ```
 
+### Calculate Model Deviation
+
+One can also use a subcommand to calculate deviation of prediced forces or virials for a bunch of models in the following way:
+```bash
+dp model-devi -m graph.000.pb graph.001.pb graph.002.pb graph.003.pb -s ./data -o model_devi.out
+```
+where `-m` specifies graph files to be calculated, `-s` gives the data to be evaluated, `-o` the file to which model deviation results is dumped. Here is more information on this sub-command:
+```bash
+usage: dp model-devi [-h] [-v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}]
+                     [-l LOG_PATH] [-m MODELS [MODELS ...]] [-s SYSTEM]
+                     [-S SET_PREFIX] [-o OUTPUT] [-f FREQUENCY] [-i ITEMS]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}, --log-level {DEBUG,3,INFO,2,WARNING,1,ERROR,0}
+                        set verbosity level by string or number, 0=ERROR,
+                        1=WARNING, 2=INFO and 3=DEBUG (default: INFO)
+  -l LOG_PATH, --log-path LOG_PATH
+                        set log file to log messages to disk, if not
+                        specified, the logs will only be output to console
+                        (default: None)
+  -m MODELS [MODELS ...], --models MODELS [MODELS ...]
+                        Frozen models file to import (default:
+                        ['graph.000.pb', 'graph.001.pb', 'graph.002.pb',
+                        'graph.003.pb'])
+  -s SYSTEM, --system SYSTEM
+                        The system directory, not support recursive detection.
+                        (default: .)
+  -S SET_PREFIX, --set-prefix SET_PREFIX
+                        The set prefix (default: set)
+  -o OUTPUT, --output OUTPUT
+                        The output file for results of model deviation
+                        (default: model_devi.out)
+  -f FREQUENCY, --frequency FREQUENCY
+                        The trajectory frequency of the system (default: 1)
+```
+
+For more details with respect to definition of model deviation and its application, please refer to `Yuzhi Zhang, Haidi Wang, Weijie Chen, Jinzhe Zeng, Linfeng Zhang, Han Wang, and Weinan E, DP-GEN: A concurrent learning platform for the generation of reliable deep learning based potential energy models, Computer Physics Communications, 2020, 253, 107206.`
+
 ## Compress a model
 
 Once the frozen model is obtained from deepmd-kit, we can get the neural network structure and its parameters (weights, biases, etc.) from the trained model, and compress it in the following way:
@@ -253,6 +292,19 @@ e, f, v = dp.eval(coord, cell, atype)
 ```
 where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively.
 
+Furthermore, one can use the python interface to calulate model deviation.
+```python
+from deepmd.infer import calc_model_devi
+from deepmd.infer import DeepPot as DP
+import numpy as np
+
+coord = np.array([[1,0,0], [0,0,1.5], [1,0,3]]).reshape([1, -1])
+cell = np.diag(10 * np.ones(3)).reshape([1, -1])
+atype = [1,0,1]
+graphs = [DP("graph.000.pb"), DP("graph.001.pb")]
+model_devi = calc_model_devi(coord, cell, atype, graphs)
+```
+
 ### C++ interface
 The C++ interface of DeePMD-kit is also avaiable for model interface, which is considered faster than Python interface. An example `infer_water.cpp` is given below:
 ```cpp
diff --git a/source/tests/test_argument_parser.py b/source/tests/test_argument_parser.py
index a7e42ad81e..1c85728e40 100644
--- a/source/tests/test_argument_parser.py
+++ b/source/tests/test_argument_parser.py
@@ -44,7 +44,7 @@ def build_args(args: "TEST_DICT", command: str) -> List[str]:
         # arguments without value are passed as such, typically these are where action
         # is 'count' or 'store_true'
         if "value" in test_data:
-            args_list.append(str(test_data["value"]))
+            args_list += str(test_data["value"]).split()
 
     return args_list
 
@@ -285,6 +285,19 @@ def test_parser_doc(self):
         ARGS = {}
 
         self.run_test(command="doc-train-input", mapping=ARGS)
+    
+    def test_parser_model_devi(self):
+        """Test model-devi subparser"""
+        ARGS = {
+            "--models": dict(type=list, value="GRAPH.000.pb GRAPH.001.pb",
+                             expected=["GRAPH.000.pb", "GRAPH.001.pb"]),
+            "--system": dict(type=str, value="SYSTEM_DIR"),
+            "--set-prefix": dict(type=str, value="SET_PREFIX"),
+            "--output": dict(type=str, value="OUTFILE"),
+            "--frequency": dict(type=int, value=1)
+        }
+
+        self.run_test(command="model-devi", mapping=ARGS)
 
     def test_get_log_level(self):
         MAPPING = {
diff --git a/source/tests/test_model_devi.py b/source/tests/test_model_devi.py
new file mode 100644
index 0000000000..3e5161b44d
--- /dev/null
+++ b/source/tests/test_model_devi.py
@@ -0,0 +1,50 @@
+from deepmd.infer import DeepPotential
+import unittest
+import os, sys, shutil
+import numpy as np
+from deepmd.infer import calc_model_devi
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+from infer.convert2pb import convert_pbtxt_to_pb
+from common import gen_data, tests_path, del_data
+
+
+class TestMakeModelDevi(unittest.TestCase):
+    def setUp(self):
+        gen_data()
+        self.data_dir = "system"
+        coord = np.load(os.path.join(self.data_dir, "set.000/coord.npy"))
+        box = np.load(os.path.join(self.data_dir, "set.000/box.npy"))
+        self.atype = np.loadtxt(os.path.join(self.data_dir, "type.raw"))
+        self.coord = np.vstack([coord, coord])
+        self.box = np.vstack([box, box])
+        self.freq = 10
+
+        self.pbtxts = [os.path.join(tests_path, "infer/deeppot.pbtxt"),
+                       os.path.join(tests_path, "infer/deeppot-1.pbtxt")]
+        self.graph_dirs = [pbtxt.replace("pbtxt", "pb") for pbtxt in self.pbtxts]
+        for pbtxt, pb in zip(self.pbtxts, self.graph_dirs):
+            convert_pbtxt_to_pb(pbtxt, pb)
+        self.graphs = [DeepPotential(pb) for pb in self.graph_dirs]
+        self.output = os.path.join(tests_path, "model_devi.out")
+        self.expect = np.array([0, 1.670048e-01, 4.182279e-04, 8.048649e-02, 5.095047e-01, 4.584241e-01, 4.819783e-01])
+    
+    def test_calc_model_devi(self):
+        model_devi = calc_model_devi(self.coord,
+                                     self.box, 
+                                     self.atype, 
+                                     self.graphs,
+                                     frequency=self.freq,
+                                     nopbc=True,
+                                     fname=self.output)
+        self.assertEqual(model_devi[0][0], 0)
+        self.assertEqual(model_devi[1][0], self.freq)
+        for ii in range(1, 7):
+            self.assertAlmostEqual(model_devi[0][ii], self.expect[ii])
+            self.assertEqual(model_devi[0][ii], model_devi[1][ii])
+        self.assertTrue(os.path.isfile(self.output))
+    
+    def tearDown(self):
+        for pb in self.graph_dirs:
+            os.remove(pb)
+        os.remove(self.output)
+        del_data()