From 05086f5877e14d344a648ad472326af16f47c13e Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Tue, 16 Nov 2021 01:34:59 +0800
Subject: [PATCH 01/16] enable mixed precision support for dp

---
 deepmd/descriptor/se_a.py |  3 ++
 deepmd/env.py             | 79 +++++++++++++++++++++++++++++++++++++++
 deepmd/train/trainer.py   |  6 +++
 deepmd/utils/network.py   | 24 +++++++++++-
 4 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index 74b12a412a..5ee3aa70f1 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -9,6 +9,7 @@
 from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
 from deepmd.env import op_module
 from deepmd.env import default_tf_session_config
+from deepmd.env import DP_ENABLE_MIXED_PRECISION, cast_to_compute
 from deepmd.utils.network import embedding_net, embedding_net_rand_seed_shift
 from deepmd.utils.tabulate import DPTabulate
 from deepmd.utils.type_embed import embed_atom_type
@@ -735,6 +736,8 @@ def _filter(
             name='linear', 
             reuse=None,
             trainable = True):
+        if DP_ENABLE_MIXED_PRECISION:
+            inputs = cast_to_compute(inputs)
         nframes = tf.shape(tf.reshape(inputs, [-1, natoms[0], self.ndescrpt]))[0]
         # natom x (nei x 4)
         shape = inputs.get_shape().as_list()
diff --git a/deepmd/env.py b/deepmd/env.py
index 6e6543697e..c593e9e3b6 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -28,6 +28,9 @@
     "GLOBAL_NP_FLOAT_PRECISION",
     "GLOBAL_ENER_FLOAT_PRECISION",
     "global_float_prec",
+    "DP_ENABLE_MIXED_PRECISION",
+    "DP_MIXED_OUTPUT_PRECISION",
+    "DP_MIXED_COMPUTE_PRECISION",
     "global_cvt_2_tf_float",
     "global_cvt_2_ener_float",
     "MODEL_VERSION",
@@ -310,6 +313,82 @@ def _get_package_constants(
         "DP_INTERFACE_PREC." % dp_float_prec
     )
 
+# MIXED_PREC
+# only support tf.float16 mixed precision training.
+dp_mixed_prec = os.environ.get("DP_ENABLE_MIXED_PREC", "").lower()
+if dp_mixed_prec is "fp16":
+    # default setting of the global precision
+    GLOBAL_TF_FLOAT_PRECISION = tf.float32
+    GLOBAL_NP_FLOAT_PRECISION = np.float32
+    GLOBAL_ENER_FLOAT_PRECISION = np.float64
+    global_float_prec = "half"
+    #
+    DP_ENABLE_MIXED_PRECISION = True
+    DP_MIXED_OUTPUT_PRECISION = tf.float32
+    DP_MIXED_COMPUTE_PRECISION = tf.float16
+elif dp_mixed_prec is "":
+    DP_ENABLE_MIXED_PRECISION = False
+    DP_MIXED_OUTPUT_PRECISION = None
+    DP_MIXED_COMPUTE_PRECISION = None
+else:
+    raise RuntimeError(
+        "Unsupported mixed precision option: %s. Supported: fp16. "
+        "Please set mixed precision training with environmental variable "
+        "DP_ENABLE_MIXED_PREC." % dp_mixed_prec
+    )
+
+
+def cast_to_compute(xx: tf.Tensor) -> tf.Tensor:
+    """Cast tensor to compute precision.
+
+    Parameters
+    ----------
+    xx : tf.Tensor
+        input tensor
+
+    Returns
+    -------
+    tf.Tensor
+        output tensor cast to compute precision
+    
+    Raises
+    ------
+    RuntimeError
+        if mixed precision training mode is on
+    """
+    if DP_MIXED_COMPUTE_PRECISION is None:
+        raise RuntimeError(
+            "'cast_to_compute' function only support the mixed precision mode."
+            "Please set mixed precision training with environmental variable "
+            "DP_ENABLE_MIXED_PREC."
+        )
+    return tf.cast(xx, DP_MIXED_COMPUTE_PRECISION)
+
+def cast_to_output(xx: tf.Tensor) -> tf.Tensor:
+    """Cast tensor to output precision.
+
+    Parameters
+    ----------
+    xx : tf.Tensor
+        input tensor
+
+    Returns
+    -------
+    tf.Tensor
+        output tensor cast to output precision
+    
+    Raises
+    ------
+    RuntimeError
+        if mixed precision training mode is on
+    """
+    if DP_MIXED_COMPUTE_PRECISION is None:
+        raise RuntimeError(
+            "'cast_to_output' function only support the mixed precision mode."
+            "Please set mixed precision training with environmental variable "
+            "DP_ENABLE_MIXED_PREC."
+        )
+    return tf.cast(xx, DP_MIXED_OUTPUT_PRECISION)
 
 def global_cvt_2_tf_float(xx: tf.Tensor) -> tf.Tensor:
     """Cast tensor to globally set TF precision.
diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index 16d1234112..d8751a506f 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -11,6 +11,7 @@
 from deepmd.env import get_tf_session_config
 from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
 from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
+from deepmd.env import DP_ENABLE_MIXED_PRECISION
 from deepmd.fit import EnerFitting, WFCFitting, PolarFittingLocFrame, PolarFittingSeA, GlobalPolarFittingSeA, DipoleFittingSeA
 from deepmd.descriptor import Descriptor
 from deepmd.model import EnerModel, WFCModel, DipoleModel, PolarModel, GlobalPolarModel
@@ -332,6 +333,8 @@ def _build_network(self, data):
                                self.place_holders,
                                suffix = "test")
 
+        if DP_ENABLE_MIXED_PRECISION:
+            self.l2_l = tf.cast(self.l2_l, GLOBAL_TF_FLOAT_PRECISION)
         log.info("built network")
 
     def _build_training(self):
@@ -345,6 +348,9 @@ def _build_training(self):
             optimizer = self.run_opt._HVD.DistributedOptimizer(optimizer)
         else:
             optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate)
+        if DP_ENABLE_MIXED_PRECISION:
+            # enable dynamic loss scale of the gradients
+            optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
         apply_op = optimizer.minimize(loss=self.l2_l,
                                       global_step=self.global_step,
                                       var_list=trainable_variables,
diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py
index 5c78031167..3ded5db063 100644
--- a/deepmd/utils/network.py
+++ b/deepmd/utils/network.py
@@ -2,6 +2,7 @@
 
 from deepmd.env import tf
 from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
+from deepmd.env import DP_ENABLE_MIXED_PRECISION, cast_to_compute, cast_to_output
 
 def one_layer_rand_seed_shift():
     return 3
@@ -20,6 +21,12 @@ def one_layer(inputs,
               useBN = False, 
               uniform_seed = False,
               initial_variables = None):
+    # Do mixed precision training check
+    if DP_ENABLE_MIXED_PRECISION and precision is not tf.float32:
+        raise RuntimeError("The network precision %s does not match the mixed precision training settting! Please check the input training script. " % (precision))
+    # For good accuracy, the last layer of the fitting network uses a single-precision neuron network.
+    if DP_ENABLE_MIXED_PRECISION and outputs_size is 1:
+        inputs = cast_to_output(inputs)
     with tf.variable_scope(name, reuse=reuse):
         shape = inputs.get_shape().as_list()
         w_initializer  = tf.random_normal_initializer(
@@ -37,13 +44,17 @@ def one_layer(inputs,
                             precision,
                             w_initializer, 
                             trainable = trainable)
-        variable_summaries(w, 'matrix')
+        variable_summaries(w, 'matrix')      
         b = tf.get_variable('bias', 
                             [outputs_size], 
                             precision,
                             b_initializer, 
                             trainable = trainable)
         variable_summaries(b, 'bias')
+        if DP_ENABLE_MIXED_PRECISION and outputs_size is not 1:
+            inputs = cast_to_compute(inputs)
+            w = cast_to_compute(w)
+            b = cast_to_compute(b)
         hidden = tf.matmul(inputs, w) + b
         if activation_fn != None and use_timestep :
             idt_initializer = tf.random_normal_initializer(
@@ -65,6 +76,8 @@ def one_layer(inputs,
                 # return activation_fn(hidden_bn)
             else:
                 if use_timestep :
+                    if DP_ENABLE_MIXED_PRECISION and outputs_size is not 1:
+                       idt = cast_to_compute(idt)
                     return tf.reshape(activation_fn(hidden), [-1, outputs_size]) * idt
                 else :
                     return tf.reshape(activation_fn(hidden), [-1, outputs_size])                    
@@ -154,6 +167,9 @@ def embedding_net(xx,
        in deep residual networks. InComputer Vision – ECCV 2016,pages 630–645. Springer
        International Publishing, 2016.
     """
+    # Do mixed precision training check
+    if DP_ENABLE_MIXED_PRECISION and precision is not tf.float32:
+        raise RuntimeError("The network precision %s does not match the mixed precision training settting! Please check the input training script. " % (precision))
     input_shape = xx.get_shape().as_list()
     outputs_size = [input_shape[1]] + network_size
 
@@ -185,6 +201,10 @@ def embedding_net(xx,
                             trainable = trainable)
         variable_summaries(b, 'bias_'+str(ii)+name_suffix)
 
+        if DP_ENABLE_MIXED_PRECISION:
+            xx = cast_to_compute(xx)
+            w  = cast_to_compute(w)
+            b  = cast_to_compute(b)
         hidden = tf.reshape(activation_fn(tf.matmul(xx, w) + b), [-1, outputs_size[ii]])
         if resnet_dt :
             idt_initializer = tf.random_normal_initializer(
@@ -201,6 +221,8 @@ def embedding_net(xx,
                                   idt_initializer, 
                                   trainable = trainable)
             variable_summaries(idt, 'idt_'+str(ii)+name_suffix)
+            if DP_ENABLE_MIXED_PRECISION:
+                idt = cast_to_compute(idt)
 
         if outputs_size[ii] == outputs_size[ii-1]:
             if resnet_dt :

From e1cc674f9ece2d273d31c225a35542bd130c4146 Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Tue, 16 Nov 2021 02:12:29 +0800
Subject: [PATCH 02/16] set the default embedding net & fitting net precision

---
 deepmd/env.py            |  4 ++++
 deepmd/utils/argcheck.py | 15 ++++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/deepmd/env.py b/deepmd/env.py
index c593e9e3b6..5494f10a19 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -24,6 +24,7 @@
 
 __all__ = [
     "GLOBAL_CONFIG",
+    "GLOBAL_FLOAT_PRECISION",
     "GLOBAL_TF_FLOAT_PRECISION",
     "GLOBAL_NP_FLOAT_PRECISION",
     "GLOBAL_ENER_FLOAT_PRECISION",
@@ -297,11 +298,13 @@ def _get_package_constants(
 dp_float_prec = os.environ.get("DP_INTERFACE_PREC", "high").lower()
 if dp_float_prec in ("high", ""):
     # default is high
+    GLOBAL_FLOAT_PRECISION = "float64"
     GLOBAL_TF_FLOAT_PRECISION = tf.float64
     GLOBAL_NP_FLOAT_PRECISION = np.float64
     GLOBAL_ENER_FLOAT_PRECISION = np.float64
     global_float_prec = "double"
 elif dp_float_prec == "low":
+    GLOBAL_FLOAT_PRECISION = "float32"
     GLOBAL_TF_FLOAT_PRECISION = tf.float32
     GLOBAL_NP_FLOAT_PRECISION = np.float32
     GLOBAL_ENER_FLOAT_PRECISION = np.float64
@@ -318,6 +321,7 @@ def _get_package_constants(
 dp_mixed_prec = os.environ.get("DP_ENABLE_MIXED_PREC", "").lower()
 if dp_mixed_prec is "fp16":
     # default setting of the global precision
+    GLOBAL_FLOAT_PRECISION = "float32"
     GLOBAL_TF_FLOAT_PRECISION = tf.float32
     GLOBAL_NP_FLOAT_PRECISION = np.float32
     GLOBAL_ENER_FLOAT_PRECISION = np.float64
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 36e9eb2ee6..736d2f894f 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -4,6 +4,7 @@
 from deepmd import descriptor
 from deepmd.common import ACTIVATION_FN_DICT, PRECISION_DICT
 from deepmd.utils.plugin import Plugin
+from deepmd.env import GLOBAL_FLOAT_PRECISION
 import json
 
 
@@ -35,7 +36,7 @@ def type_embedding_args():
         Argument("neuron", list, optional = True, default = [2, 4, 8], doc = doc_neuron),
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = "float64", doc = doc_precision),
+        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
         Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
         Argument("seed", [int,None], optional = True, doc = doc_seed),
     ]        
@@ -138,7 +139,7 @@ def descrpt_se_a_args():
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
         Argument("type_one_side", bool, optional = True, default = False, doc = doc_type_one_side),
-        Argument("precision", str, optional = True, default = "float64", doc = doc_precision),
+        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
         Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
         Argument("seed", [int,None], optional = True, doc = doc_seed),
         Argument("exclude_types", list, optional = True, default = [], doc = doc_exclude_types),
@@ -168,7 +169,7 @@ def descrpt_se_t_args():
         Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = "float64", doc = doc_precision),
+        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
         Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
         Argument("seed", [int,None], optional = True, doc = doc_seed),
         Argument("set_davg_zero", bool, optional = True, default = False, doc = doc_set_davg_zero)
@@ -214,7 +215,7 @@ def descrpt_se_r_args():
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
         Argument("type_one_side", bool, optional = True, default = False, doc = doc_type_one_side),
-        Argument("precision", str, optional = True, default = "float64", doc = doc_precision),
+        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
         Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
         Argument("seed", [int,None], optional = True, doc = doc_seed),
         Argument("exclude_types", list, optional = True, default = [], doc = doc_exclude_types),
@@ -269,7 +270,7 @@ def fitting_ener():
         Argument("numb_aparam", int, optional = True, default = 0, doc = doc_numb_aparam),
         Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
-        Argument("precision", str, optional = True, default = 'float64', doc = doc_precision),
+        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
         Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
         Argument("trainable", [list,bool], optional = True, default = True, doc = doc_trainable),
         Argument("rcond", float, optional = True, default = 1e-3, doc = doc_rcond),
@@ -296,7 +297,7 @@ def fitting_polar():
         Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = 'float64', doc = doc_precision),
+        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
         Argument("fit_diag", bool, optional = True, default = True, doc = doc_fit_diag),
         Argument("scale", [list,float], optional = True, default = 1.0, doc = doc_scale),
         #Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
@@ -321,7 +322,7 @@ def fitting_dipole():
         Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = 'float64', doc = doc_precision),
+        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
         Argument("sel_type", [list,int,None], optional = True, alias = ['dipole_type'], doc = doc_sel_type),
         Argument("seed", [int,None], optional = True, doc = doc_seed)
     ]    

From 1589b12a513b604d320e0006adb8d1fb798cc96f Mon Sep 17 00:00:00 2001
From: Denghui Lu <denghuilu@pku.edu.cn>
Date: Tue, 16 Nov 2021 02:28:46 +0800
Subject: [PATCH 03/16] add doc for mixed precision

---
 doc/train/training-advanced.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md
index ea9e1e8075..184a61f1d7 100644
--- a/doc/train/training-advanced.md
+++ b/doc/train/training-advanced.md
@@ -126,3 +126,4 @@ One can set other environmental variables:
 | Environment variables | Allowed value          | Default value | Usage                      |
 | --------------------- | ---------------------- | ------------- | -------------------------- |
 | DP_INTERFACE_PREC     | `high`, `low`          | `high`        | Control high (double) or low (float) precision of training. |
+| DP_ENABLE_MIXED_PREC  | `fp16`                 |               | Control mixed precision(fp16) of training and inference.    |

From fb48b01398c59b2b59689664d553b0a903d1d611 Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Tue, 16 Nov 2021 02:32:07 +0800
Subject: [PATCH 04/16] fix typo

---
 deepmd/utils/network.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py
index 3ded5db063..45df6357a1 100644
--- a/deepmd/utils/network.py
+++ b/deepmd/utils/network.py
@@ -44,7 +44,7 @@ def one_layer(inputs,
                             precision,
                             w_initializer, 
                             trainable = trainable)
-        variable_summaries(w, 'matrix')      
+        variable_summaries(w, 'matrix')
         b = tf.get_variable('bias', 
                             [outputs_size], 
                             precision,

From 4aae04b08f149258fec4b97b59a8e5db97be317e Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Tue, 16 Nov 2021 03:29:20 +0800
Subject: [PATCH 05/16] fix UT bug

---
 deepmd/env.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deepmd/env.py b/deepmd/env.py
index 5494f10a19..26cbde003a 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -319,7 +319,7 @@ def _get_package_constants(
 # MIXED_PREC
 # only support tf.float16 mixed precision training.
 dp_mixed_prec = os.environ.get("DP_ENABLE_MIXED_PREC", "").lower()
-if dp_mixed_prec is "fp16":
+if dp_mixed_prec == "fp16":
     # default setting of the global precision
     GLOBAL_FLOAT_PRECISION = "float32"
     GLOBAL_TF_FLOAT_PRECISION = tf.float32
@@ -330,7 +330,7 @@ def _get_package_constants(
     DP_ENABLE_MIXED_PRECISION = True
     DP_MIXED_OUTPUT_PRECISION = tf.float32
     DP_MIXED_COMPUTE_PRECISION = tf.float16
-elif dp_mixed_prec is "":
+elif dp_mixed_prec == "":
     DP_ENABLE_MIXED_PRECISION = False
     DP_MIXED_OUTPUT_PRECISION = None
     DP_MIXED_COMPUTE_PRECISION = None

From 5b633a813281004bd7a5c9361613e7b7cdfbef38 Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Sun, 21 Nov 2021 21:12:18 +0800
Subject: [PATCH 06/16] use input script to control the mixed precision
 workflow

---
 deepmd/descriptor/descriptor.py | 18 +++++++
 deepmd/descriptor/se_a.py       | 21 +++++++--
 deepmd/env.py                   | 83 ---------------------------------
 deepmd/fit/dipole.py            | 22 +++++++--
 deepmd/fit/ener.py              | 25 ++++++++--
 deepmd/fit/polar.py             | 33 +++++++++++--
 deepmd/train/trainer.py         | 20 ++++++--
 deepmd/utils/argcheck.py        | 35 ++++++++++----
 deepmd/utils/network.py         | 51 ++++++++++----------
 9 files changed, 170 insertions(+), 138 deletions(-)

diff --git a/deepmd/descriptor/descriptor.py b/deepmd/descriptor/descriptor.py
index d179660a9d..0642779985 100644
--- a/deepmd/descriptor/descriptor.py
+++ b/deepmd/descriptor/descriptor.py
@@ -262,6 +262,24 @@ def enable_compression(self,
         raise NotImplementedError(
             "Descriptor %s doesn't support compression!" % type(self).__name__)
 
+
+    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+        """
+        Reveive the mixed precision setting.
+
+        Parameters
+        ----------
+        mixed_prec
+                The mixed precision setting used in the embedding net
+        
+        Notes
+        -----
+        This method is called by others when the descriptor supported compression.
+        """
+        raise NotImplementedError(
+            "Descriptor %s doesn't support mixed precision training!" % type(self).__name__)
+
+
     @abstractmethod
     def prod_force_virial(self,
                           atom_ener: tf.Tensor,
diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index 5ee3aa70f1..91843a47d3 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -9,7 +9,6 @@
 from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
 from deepmd.env import op_module
 from deepmd.env import default_tf_session_config
-from deepmd.env import DP_ENABLE_MIXED_PRECISION, cast_to_compute
 from deepmd.utils.network import embedding_net, embedding_net_rand_seed_shift
 from deepmd.utils.tabulate import DPTabulate
 from deepmd.utils.type_embed import embed_atom_type
@@ -161,6 +160,7 @@ def __init__ (self,
         self.davg = None
         self.compress = False
         self.embedding_net_variables = None
+        self.mixed_prec = None
         self.place_holders = {}
         nei_type = np.array([])
         for ii in range(self.ntypes):
@@ -349,6 +349,18 @@ def enable_compression(self,
         self.dstd = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_std' % suffix)
 
 
+    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+        """
+        Reveive the mixed precision setting.
+
+        Parameters
+        ----------
+        mixed_prec
+                The mixed precision setting used in the embedding net
+        """
+        self.mixed_prec = mixed_prec
+        self.filter_precision = get_precision(mixed_prec['output_prec'])
+
 
     def build (self, 
                coord_ : tf.Tensor, 
@@ -709,7 +721,8 @@ def _filter_lower(
                   seed = self.seed,
                   trainable = trainable, 
                   uniform_seed = self.uniform_seed,
-                  initial_variables = self.embedding_net_variables)
+                  initial_variables = self.embedding_net_variables,
+                  mixed_prec = self.mixed_prec)
               if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
           else:
             # we can safely return the final xyz_scatter filled with zero directly
@@ -736,8 +749,8 @@ def _filter(
             name='linear', 
             reuse=None,
             trainable = True):
-        if DP_ENABLE_MIXED_PRECISION:
-            inputs = cast_to_compute(inputs)
+        if self.mixed_prec is not None:
+            inputs = tf.cast(inputs, get_precision(self.mixed_prec['compute_prec']))
         nframes = tf.shape(tf.reshape(inputs, [-1, natoms[0], self.ndescrpt]))[0]
         # natom x (nei x 4)
         shape = inputs.get_shape().as_list()
diff --git a/deepmd/env.py b/deepmd/env.py
index 26cbde003a..6e6543697e 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -24,14 +24,10 @@
 
 __all__ = [
     "GLOBAL_CONFIG",
-    "GLOBAL_FLOAT_PRECISION",
     "GLOBAL_TF_FLOAT_PRECISION",
     "GLOBAL_NP_FLOAT_PRECISION",
     "GLOBAL_ENER_FLOAT_PRECISION",
     "global_float_prec",
-    "DP_ENABLE_MIXED_PRECISION",
-    "DP_MIXED_OUTPUT_PRECISION",
-    "DP_MIXED_COMPUTE_PRECISION",
     "global_cvt_2_tf_float",
     "global_cvt_2_ener_float",
     "MODEL_VERSION",
@@ -298,13 +294,11 @@ def _get_package_constants(
 dp_float_prec = os.environ.get("DP_INTERFACE_PREC", "high").lower()
 if dp_float_prec in ("high", ""):
     # default is high
-    GLOBAL_FLOAT_PRECISION = "float64"
     GLOBAL_TF_FLOAT_PRECISION = tf.float64
     GLOBAL_NP_FLOAT_PRECISION = np.float64
     GLOBAL_ENER_FLOAT_PRECISION = np.float64
     global_float_prec = "double"
 elif dp_float_prec == "low":
-    GLOBAL_FLOAT_PRECISION = "float32"
     GLOBAL_TF_FLOAT_PRECISION = tf.float32
     GLOBAL_NP_FLOAT_PRECISION = np.float32
     GLOBAL_ENER_FLOAT_PRECISION = np.float64
@@ -316,83 +310,6 @@ def _get_package_constants(
         "DP_INTERFACE_PREC." % dp_float_prec
     )
 
-# MIXED_PREC
-# only support tf.float16 mixed precision training.
-dp_mixed_prec = os.environ.get("DP_ENABLE_MIXED_PREC", "").lower()
-if dp_mixed_prec == "fp16":
-    # default setting of the global precision
-    GLOBAL_FLOAT_PRECISION = "float32"
-    GLOBAL_TF_FLOAT_PRECISION = tf.float32
-    GLOBAL_NP_FLOAT_PRECISION = np.float32
-    GLOBAL_ENER_FLOAT_PRECISION = np.float64
-    global_float_prec = "half"
-    #
-    DP_ENABLE_MIXED_PRECISION = True
-    DP_MIXED_OUTPUT_PRECISION = tf.float32
-    DP_MIXED_COMPUTE_PRECISION = tf.float16
-elif dp_mixed_prec == "":
-    DP_ENABLE_MIXED_PRECISION = False
-    DP_MIXED_OUTPUT_PRECISION = None
-    DP_MIXED_COMPUTE_PRECISION = None
-else:
-    raise RuntimeError(
-        "Unsupported mixed precision option: %s. Supported: fp16. "
-        "Please set mixed precision training with environmental variable "
-        "DP_ENABLE_MIXED_PREC." % dp_mixed_prec
-    )
-
-
-def cast_to_compute(xx: tf.Tensor) -> tf.Tensor:
-    """Cast tensor to compute precision.
-
-    Parameters
-    ----------
-    xx : tf.Tensor
-        input tensor
-
-    Returns
-    -------
-    tf.Tensor
-        output tensor cast to compute precision
-    
-    Raises
-    ------
-    RuntimeError
-        if mixed precision training mode is on
-    """
-    if DP_MIXED_COMPUTE_PRECISION is None:
-        raise RuntimeError(
-            "'cast_to_compute' function only support the mixed precision mode."
-            "Please set mixed precision training with environmental variable "
-            "DP_ENABLE_MIXED_PREC."
-        )
-    return tf.cast(xx, DP_MIXED_COMPUTE_PRECISION)
-
-def cast_to_output(xx: tf.Tensor) -> tf.Tensor:
-    """Cast tensor to output precision.
-
-    Parameters
-    ----------
-    xx : tf.Tensor
-        input tensor
-
-    Returns
-    -------
-    tf.Tensor
-        output tensor cast to output precision
-    
-    Raises
-    ------
-    RuntimeError
-        if mixed precision training mode is on
-    """
-    if DP_MIXED_COMPUTE_PRECISION is None:
-        raise RuntimeError(
-            "'cast_to_output' function only support the mixed precision mode."
-            "Please set mixed precision training with environmental variable "
-            "DP_ENABLE_MIXED_PREC."
-        )
-    return tf.cast(xx, DP_MIXED_OUTPUT_PRECISION)
 
 def global_cvt_2_tf_float(xx: tf.Tensor) -> tf.Tensor:
     """Cast tensor to globally set TF precision.
diff --git a/deepmd/fit/dipole.py b/deepmd/fit/dipole.py
index 6c115e3fb3..5dfa5fab0f 100644
--- a/deepmd/fit/dipole.py
+++ b/deepmd/fit/dipole.py
@@ -77,6 +77,7 @@ def __init__ (self,
         self.dim_rot_mat = self.dim_rot_mat_1 * 3
         self.useBN = False
         self.fitting_net_variables = None
+        self.mixed_prec = None
 
     def get_sel_type(self) -> int:
         """
@@ -141,12 +142,12 @@ def build (self,
             layer = inputs_i
             for ii in range(0,len(self.n_neuron)) :
                 if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii-1] :
-                    layer+= one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, use_timestep = self.resnet_dt, activation_fn = self.fitting_activation_fn, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables)
+                    layer+= one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, use_timestep = self.resnet_dt, activation_fn = self.fitting_activation_fn, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
                 else :
-                    layer = one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, activation_fn = self.fitting_activation_fn, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables)
+                    layer = one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, activation_fn = self.fitting_activation_fn, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
                 if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
             # (nframes x natoms) x naxis
-            final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables)
+            final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
             if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
             # (nframes x natoms) x 1 * naxis
             final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms[2+type_i], 1, self.dim_rot_mat_1])
@@ -177,4 +178,17 @@ def init_variables(self,
         model_file : str
             The input frozen model file
         """
-        self.fitting_net_variables = get_fitting_net_variables(model_file)
\ No newline at end of file
+        self.fitting_net_variables = get_fitting_net_variables(model_file)
+
+
+    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+        """
+        Reveive the mixed precision setting.
+
+        Parameters
+        ----------
+        mixed_prec
+                The mixed precision setting used in the embedding net
+        """
+        self.mixed_prec = mixed_prec
+        self.fitting_precision = get_precision(mixed_prec['output_prec'])
\ No newline at end of file
diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index 0afcf26de2..3fe4013616 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -150,6 +150,7 @@ def __init__ (self,
             self.aparam_inv_std = None
 
         self.fitting_net_variables = None
+        self.mixed_prec = None
 
     def get_numb_fparam(self) -> int:
         """
@@ -293,7 +294,8 @@ def _build_lower(
                     precision = self.fitting_precision,
                     trainable = self.trainable[ii],
                     uniform_seed = self.uniform_seed,
-                    initial_variables = self.fitting_net_variables)
+                    initial_variables = self.fitting_net_variables,
+                    mixed_prec = self.mixed_prec)
             else :
                 layer = one_layer(
                     layer,
@@ -305,7 +307,8 @@ def _build_lower(
                     precision = self.fitting_precision,
                     trainable = self.trainable[ii],
                     uniform_seed = self.uniform_seed,
-                    initial_variables = self.fitting_net_variables)
+                    initial_variables = self.fitting_net_variables,
+                    mixed_prec = self.mixed_prec)
             if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
         final_layer = one_layer(
             layer, 
@@ -318,7 +321,8 @@ def _build_lower(
             precision = self.fitting_precision, 
             trainable = self.trainable[-1],
             uniform_seed = self.uniform_seed,
-            initial_variables = self.fitting_net_variables)
+            initial_variables = self.fitting_net_variables,
+            mixed_prec = self.mixed_prec)
         if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
 
         return final_layer
@@ -494,4 +498,17 @@ def init_variables(self,
         model_file : str
             The input frozen model file
         """
-        self.fitting_net_variables = get_fitting_net_variables(model_file)
\ No newline at end of file
+        self.fitting_net_variables = get_fitting_net_variables(model_file)
+
+
+    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+        """
+        Reveive the mixed precision setting.
+
+        Parameters
+        ----------
+        mixed_prec
+                The mixed precision setting used in the embedding net
+        """
+        self.mixed_prec = mixed_prec
+        self.fitting_precision = get_precision(mixed_prec['output_prec'])
\ No newline at end of file
diff --git a/deepmd/fit/polar.py b/deepmd/fit/polar.py
index 65b1ff6aef..d858d37ac1 100644
--- a/deepmd/fit/polar.py
+++ b/deepmd/fit/polar.py
@@ -194,6 +194,7 @@ def __init__ (self,
         self.dim_rot_mat = self.dim_rot_mat_1 * 3
         self.useBN = False
         self.fitting_net_variables = None
+        self.mixed_prec = None
 
     def get_sel_type(self) -> List[int]:
         """
@@ -324,9 +325,9 @@ def build (self,
             layer = inputs_i
             for ii in range(0,len(self.n_neuron)) :
                 if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii-1] :
-                    layer+= one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, use_timestep = self.resnet_dt, activation_fn = self.fitting_activation_fn, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables)
+                    layer+= one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, use_timestep = self.resnet_dt, activation_fn = self.fitting_activation_fn, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
                 else :
-                    layer = one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, activation_fn = self.fitting_activation_fn, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables)
+                    layer = one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, activation_fn = self.fitting_activation_fn, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
                 if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
             if self.fit_diag :
                 bavg = np.zeros(self.dim_rot_mat_1)
@@ -334,7 +335,7 @@ def build (self,
                 # bavg[1] = self.avgeig[1]
                 # bavg[2] = self.avgeig[2]
                 # (nframes x natoms) x naxis
-                final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables)
+                final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
                 if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
                 # (nframes x natoms) x naxis
                 final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms[2+type_i], self.dim_rot_mat_1])
@@ -346,7 +347,7 @@ def build (self,
                 # bavg[1*self.dim_rot_mat_1+1] = self.avgeig[1]
                 # bavg[2*self.dim_rot_mat_1+2] = self.avgeig[2]
                 # (nframes x natoms) x (naxis x naxis)
-                final_layer = one_layer(layer, self.dim_rot_mat_1*self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables)
+                final_layer = one_layer(layer, self.dim_rot_mat_1*self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
                 if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
                 # (nframes x natoms) x naxis x naxis
                 final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms[2+type_i], self.dim_rot_mat_1, self.dim_rot_mat_1])
@@ -387,6 +388,19 @@ def init_variables(self,
         self.fitting_net_variables = get_fitting_net_variables(model_file)
 
 
+    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+        """
+        Reveive the mixed precision setting.
+
+        Parameters
+        ----------
+        mixed_prec
+                The mixed precision setting used in the embedding net
+        """
+        self.mixed_prec = mixed_prec
+        self.fitting_precision = get_precision(mixed_prec['output_prec'])
+
+
 class GlobalPolarFittingSeA () :
     """
     Fit the system polarizability with descriptor se_a
@@ -509,3 +523,14 @@ def init_variables(self,
         """
         self.polar_fitting.init_variables(model_file)
 
+
+    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+        """
+        Reveive the mixed precision setting.
+
+        Parameters
+        ----------
+        mixed_prec
+                The mixed precision setting used in the embedding net
+        """
+        self.polar_fitting.enable_mixed_precision(mixed_prec)
diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index d8751a506f..c1ab19ed05 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -11,7 +11,6 @@
 from deepmd.env import get_tf_session_config
 from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
 from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-from deepmd.env import DP_ENABLE_MIXED_PRECISION
 from deepmd.fit import EnerFitting, WFCFitting, PolarFittingLocFrame, PolarFittingSeA, GlobalPolarFittingSeA, DipoleFittingSeA
 from deepmd.descriptor import Descriptor
 from deepmd.model import EnerModel, WFCModel, DipoleModel, PolarModel, GlobalPolarModel
@@ -30,7 +29,7 @@
 # load grad of force module
 import deepmd.op
 
-from deepmd.common import j_must_have, ClassArg, data_requirement
+from deepmd.common import j_must_have, ClassArg, data_requirement, get_precision
 
 log = logging.getLogger(__name__)
 
@@ -228,6 +227,13 @@ def _init_param(self, jdata):
         self.tensorboard = self.run_opt.is_chief and tr_data.get('tensorboard', False)
         self.tensorboard_log_dir = tr_data.get('tensorboard_log_dir', 'log')
         self.tensorboard_freq = tr_data.get('tensorboard_freq', 1)
+        self.mixed_prec = tr_data.get('mixed_precision', None)
+        if self.mixed_prec is not None:
+            if (self.mixed_prec['compute_prec'] != 'float16' or self.mixed_prec['output_prec'] != 'float32'):
+                raise RuntimeError(
+                    "Unsupported mixed precision option [output_prec, compute_prec]: [%s, %s], "
+                    " Supported: [float32, float16], Please set mixed precision option correctly!"
+                     % (self.mixed_prec['output_prec'], self.mixed_prec['compute_prec']))
         # self.sys_probs = tr_data['sys_probs']
         # self.auto_prob_style = tr_data['auto_prob']
         self.useBN = False
@@ -290,6 +296,10 @@ def build (self,
             tf.constant("compressed_model", name = 'model_type', dtype = tf.string)
         else:
             tf.constant("original_model", name = 'model_type', dtype = tf.string)
+        
+        if self.mixed_prec is not None:
+            self.descrpt.enable_mixed_precision(self.mixed_prec)
+            self.fitting.enable_mixed_precision(self.mixed_prec)
 
         self._build_lr()
         self._build_network(data)
@@ -333,8 +343,8 @@ def _build_network(self, data):
                                self.place_holders,
                                suffix = "test")
 
-        if DP_ENABLE_MIXED_PRECISION:
-            self.l2_l = tf.cast(self.l2_l, GLOBAL_TF_FLOAT_PRECISION)
+        if self.mixed_prec is not None:
+            self.l2_l = tf.cast(self.l2_l, get_precision(self.mixed_prec['output_prec']))
         log.info("built network")
 
     def _build_training(self):
@@ -348,7 +358,7 @@ def _build_training(self):
             optimizer = self.run_opt._HVD.DistributedOptimizer(optimizer)
         else:
             optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate)
-        if DP_ENABLE_MIXED_PRECISION:
+        if self.mixed_prec is not None:
             # enable dynamic loss scale of the gradients
             optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
         apply_op = optimizer.minimize(loss=self.l2_l,
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 736d2f894f..847eccc52e 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -4,7 +4,6 @@
 from deepmd import descriptor
 from deepmd.common import ACTIVATION_FN_DICT, PRECISION_DICT
 from deepmd.utils.plugin import Plugin
-from deepmd.env import GLOBAL_FLOAT_PRECISION
 import json
 
 
@@ -36,7 +35,7 @@ def type_embedding_args():
         Argument("neuron", list, optional = True, default = [2, 4, 8], doc = doc_neuron),
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
+        Argument("precision", str, optional = True, default = "float64", doc = doc_precision),
         Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
         Argument("seed", [int,None], optional = True, doc = doc_seed),
     ]        
@@ -139,7 +138,7 @@ def descrpt_se_a_args():
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
         Argument("type_one_side", bool, optional = True, default = False, doc = doc_type_one_side),
-        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
+        Argument("precision", str, optional = True, default = "float64", doc = doc_precision),
         Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
         Argument("seed", [int,None], optional = True, doc = doc_seed),
         Argument("exclude_types", list, optional = True, default = [], doc = doc_exclude_types),
@@ -169,7 +168,7 @@ def descrpt_se_t_args():
         Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
+        Argument("precision", str, optional = True, default = "float64", doc = doc_precision),
         Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
         Argument("seed", [int,None], optional = True, doc = doc_seed),
         Argument("set_davg_zero", bool, optional = True, default = False, doc = doc_set_davg_zero)
@@ -215,7 +214,7 @@ def descrpt_se_r_args():
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
         Argument("type_one_side", bool, optional = True, default = False, doc = doc_type_one_side),
-        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
+        Argument("precision", str, optional = True, default = "float64", doc = doc_precision),
         Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
         Argument("seed", [int,None], optional = True, doc = doc_seed),
         Argument("exclude_types", list, optional = True, default = [], doc = doc_exclude_types),
@@ -270,7 +269,7 @@ def fitting_ener():
         Argument("numb_aparam", int, optional = True, default = 0, doc = doc_numb_aparam),
         Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
-        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
+        Argument("precision", str, optional = True, default = 'float64', doc = doc_precision),
         Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
         Argument("trainable", [list,bool], optional = True, default = True, doc = doc_trainable),
         Argument("rcond", float, optional = True, default = 1e-3, doc = doc_rcond),
@@ -297,7 +296,7 @@ def fitting_polar():
         Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
+        Argument("precision", str, optional = True, default = 'float64', doc = doc_precision),
         Argument("fit_diag", bool, optional = True, default = True, doc = doc_fit_diag),
         Argument("scale", [list,float], optional = True, default = 1.0, doc = doc_scale),
         #Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
@@ -322,7 +321,7 @@ def fitting_dipole():
         Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
         Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
         Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = GLOBAL_FLOAT_PRECISION, doc = doc_precision),
+        Argument("precision", str, optional = True, default = 'float64', doc = doc_precision),
         Argument("sel_type", [list,int,None], optional = True, alias = ['dipole_type'], doc = doc_sel_type),
         Argument("seed", [int,None], optional = True, doc = doc_seed)
     ]    
@@ -601,6 +600,24 @@ def validation_data_args():  # ! added by Ziyao: new specification style for dat
                     sub_fields=args, sub_variants=[], doc=doc_validation_data)
 
 
+def mixed_precision_args():  # ! added by Denghui.
+    doc_output_prec  = 'The precision for mixed precision params. " \
+        "The trainable variables precision during the mixed precision training process, " \
+        "supported options are float32 only currently.'
+    doc_compute_prec  = 'The precision for mixed precision compute. " \
+        "The compute precision during the mixed precision training process, "" \
+        "supported options are float16 only currently.'
+
+    args = [
+        Argument("output_prec", str, optional=True, default="float32", doc=doc_output_prec),
+        Argument("compute_prec", str, optional=False, default="float16", doc=doc_compute_prec),
+    ]
+
+    doc_mixed_precision = "Configurations of mixed precision."
+    return Argument("mixed_precision", dict, optional=True,
+                    sub_fields=args, sub_variants=[], doc=doc_mixed_precision)
+
+
 def training_args():  # ! modified by Ziyao: data configuration isolated.
     doc_numb_steps = 'Number of training batch. Each training uses one batch of data.'
     doc_seed = 'The random seed for getting frames from the training data set.'
@@ -618,10 +635,12 @@ def training_args():  # ! modified by Ziyao: data configuration isolated.
 
     arg_training_data = training_data_args()
     arg_validation_data = validation_data_args()
+    mixed_precision_data = mixed_precision_args()
 
     args = [
         arg_training_data,
         arg_validation_data,
+        mixed_precision_data,
         Argument("numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"]),
         Argument("seed", [int,None], optional=True, doc=doc_seed),
         Argument("disp_file", str, optional=True, default='lcurve.out', doc=doc_disp_file),
diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py
index 45df6357a1..cadcc7964b 100644
--- a/deepmd/utils/network.py
+++ b/deepmd/utils/network.py
@@ -2,7 +2,7 @@
 
 from deepmd.env import tf
 from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import DP_ENABLE_MIXED_PRECISION, cast_to_compute, cast_to_output
+from deepmd.common import get_precision
 
 def one_layer_rand_seed_shift():
     return 3
@@ -17,16 +17,14 @@ def one_layer(inputs,
               reuse=None,
               seed=None, 
               use_timestep = False, 
-              trainable = True,
+              trainable = False,
               useBN = False, 
               uniform_seed = False,
-              initial_variables = None):
-    # Do mixed precision training check
-    if DP_ENABLE_MIXED_PRECISION and precision is not tf.float32:
-        raise RuntimeError("The network precision %s does not match the mixed precision training settting! Please check the input training script. " % (precision))
-    # For good accuracy, the last layer of the fitting network uses a single-precision neuron network.
-    if DP_ENABLE_MIXED_PRECISION and outputs_size is 1:
-        inputs = cast_to_output(inputs)
+              initial_variables = None,
+              mixed_prec = None):
+    # For good accuracy, the last layer of the fitting network uses a higher precision neuron network.
+    if mixed_prec is not None and outputs_size == 1:
+        inputs = tf.cast(inputs, get_precision(mixed_prec['output_prec']))
     with tf.variable_scope(name, reuse=reuse):
         shape = inputs.get_shape().as_list()
         w_initializer  = tf.random_normal_initializer(
@@ -51,10 +49,12 @@ def one_layer(inputs,
                             b_initializer, 
                             trainable = trainable)
         variable_summaries(b, 'bias')
-        if DP_ENABLE_MIXED_PRECISION and outputs_size is not 1:
-            inputs = cast_to_compute(inputs)
-            w = cast_to_compute(w)
-            b = cast_to_compute(b)
+
+        if mixed_prec is not None and outputs_size != 1:
+            inputs = tf.cast(inputs, get_precision(mixed_prec['compute_prec']))
+            w = tf.cast(w, get_precision(mixed_prec['compute_prec']))
+            b = tf.cast(b, get_precision(mixed_prec['compute_prec']))
+
         hidden = tf.matmul(inputs, w) + b
         if activation_fn != None and use_timestep :
             idt_initializer = tf.random_normal_initializer(
@@ -76,8 +76,8 @@ def one_layer(inputs,
                 # return activation_fn(hidden_bn)
             else:
                 if use_timestep :
-                    if DP_ENABLE_MIXED_PRECISION and outputs_size is not 1:
-                       idt = cast_to_compute(idt)
+                    if mixed_prec is not None and outputs_size != 1:
+                       idt = tf.cast(idt, get_precision(mixed_prec['compute_prec']))
                     return tf.reshape(activation_fn(hidden), [-1, outputs_size]) * idt
                 else :
                     return tf.reshape(activation_fn(hidden), [-1, outputs_size])                    
@@ -106,7 +106,8 @@ def embedding_net(xx,
                   seed = None,
                   trainable = True, 
                   uniform_seed = False,
-                  initial_variables = None):
+                  initial_variables = None,
+                  mixed_prec = None):
     r"""The embedding network.
 
     The embedding network function :math:`\mathcal{N}` is constructed by is the
@@ -159,6 +160,8 @@ def embedding_net(xx,
         Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
     initial_variables : dict
         The input dict which stores the embedding net variables
+    mixed_prec
+        The input dict which stores the mixed precision setting for the embedding net
 
 
     References
@@ -167,9 +170,6 @@ def embedding_net(xx,
        in deep residual networks. InComputer Vision – ECCV 2016,pages 630–645. Springer
        International Publishing, 2016.
     """
-    # Do mixed precision training check
-    if DP_ENABLE_MIXED_PRECISION and precision is not tf.float32:
-        raise RuntimeError("The network precision %s does not match the mixed precision training settting! Please check the input training script. " % (precision))
     input_shape = xx.get_shape().as_list()
     outputs_size = [input_shape[1]] + network_size
 
@@ -201,10 +201,10 @@ def embedding_net(xx,
                             trainable = trainable)
         variable_summaries(b, 'bias_'+str(ii)+name_suffix)
 
-        if DP_ENABLE_MIXED_PRECISION:
-            xx = cast_to_compute(xx)
-            w  = cast_to_compute(w)
-            b  = cast_to_compute(b)
+        if mixed_prec is not None:
+            xx = tf.cast(xx, get_precision(mixed_prec['compute_prec']))
+            w  = tf.cast(w,  get_precision(mixed_prec['compute_prec']))
+            b  = tf.cast(b,  get_precision(mixed_prec['compute_prec']))
         hidden = tf.reshape(activation_fn(tf.matmul(xx, w) + b), [-1, outputs_size[ii]])
         if resnet_dt :
             idt_initializer = tf.random_normal_initializer(
@@ -221,8 +221,8 @@ def embedding_net(xx,
                                   idt_initializer, 
                                   trainable = trainable)
             variable_summaries(idt, 'idt_'+str(ii)+name_suffix)
-            if DP_ENABLE_MIXED_PRECISION:
-                idt = cast_to_compute(idt)
+            if mixed_prec is not None:
+                idt = tf.cast(idt, get_precision(mixed_prec['compute_prec']))
 
         if outputs_size[ii] == outputs_size[ii-1]:
             if resnet_dt :
@@ -236,7 +236,6 @@ def embedding_net(xx,
                 xx = tf.concat([xx,xx], 1) + hidden
         else:
             xx = hidden
-
     return xx
 
 def variable_summaries(var: tf.Variable, name: str):

From b47c56dd97757dd64f04ea5dc3013a36e88496eb Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Sun, 21 Nov 2021 21:35:44 +0800
Subject: [PATCH 07/16] add tf version check for mixed precision

---
 deepmd/env.py           | 1 +
 deepmd/train/trainer.py | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/deepmd/env.py b/deepmd/env.py
index 6e6543697e..577da78ed5 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -39,6 +39,7 @@
     "TRANSFER_PATTERN",
     "FITTING_NET_PATTERN",
     "EMBEDDING_NET_PATTERN",
+    "TF_VERSION"
 ]
 
 SHARED_LIB_MODULE = "op"
diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index c1ab19ed05..75d36cf246 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -23,7 +23,7 @@
 from deepmd.utils.graph import get_tensor_by_name
 
 from tensorflow.python.client import timeline
-from deepmd.env import op_module
+from deepmd.env import op_module, TF_VERSION
 from deepmd.utils.errors import GraphWithoutTensorError
 
 # load grad of force module
@@ -359,6 +359,9 @@ def _build_training(self):
         else:
             optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate)
         if self.mixed_prec is not None:
+            # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed 
+            if TF_VERSION < "1.12":
+                raise RuntimeError("TensorFlow version %s is not compatible with the mixed precision setting. Please consider upgrade your TF version!" % TF_VERSION)
             # enable dynamic loss scale of the gradients
             optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
         apply_op = optimizer.minimize(loss=self.l2_l,

From af3fcfb0872ef6d3da9da409ea9eea74e57d354f Mon Sep 17 00:00:00 2001
From: Denghui Lu <denghuilu@pku.edu.cn>
Date: Sun, 21 Nov 2021 21:54:12 +0800
Subject: [PATCH 08/16] Update training-advanced.md

---
 doc/train/training-advanced.md | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md
index 184a61f1d7..7226b57847 100644
--- a/doc/train/training-advanced.md
+++ b/doc/train/training-advanced.md
@@ -36,6 +36,10 @@ Other training parameters are given in the `training` section.
 	    "batch_size":	1,
 	    "numb_btch":	3
 	},
+	"mixed_precision": {
+	    "output_prec":      "float32",
+	    "compute_prec":     "float16"
+	},
 
 	"numb_step":	1000000,
 	"seed":		1,
@@ -75,6 +79,13 @@ The sections `"training_data"` and `"validation_data"` give the training dataset
     * `"auto:N"`: automatically determines the batch size so that the `batch_size` times the number of atoms in the system is no less than `N`.
 * The key `numb_batch` in `validate_data` gives the number of batches of model validation. Note that the batches may not be from the same system
 
+The section `mixed_precision` specifies the mixed precision settings, which will enable the mixed precision training workflow for deepmd-kit. The keys are explained below:
+* `output_prec`  precision used in the output tensors, only `float32` is supported currently.
+* `compute_prec` precision used in the computing tensors, only `float16` is supported currently.
+Note there are severial limitations about the mixed precision training:
+* Only 'se_e2_a' type descriptor is supported by the mixed precision training workflow.
+* The precision of embedding net and fitting net are forced to be set to `float32`.
+
 Other keys in the `training` section are explained below:
 * `numb_step` The number of training steps.
 * `seed` The random seed for getting frames from the training data set.
@@ -126,4 +137,3 @@ One can set other environmental variables:
 | Environment variables | Allowed value          | Default value | Usage                      |
 | --------------------- | ---------------------- | ------------- | -------------------------- |
 | DP_INTERFACE_PREC     | `high`, `low`          | `high`        | Control high (double) or low (float) precision of training. |
-| DP_ENABLE_MIXED_PREC  | `fp16`                 |               | Control mixed precision(fp16) of training and inference.    |

From 646233ea39d129a2acc6e593c97eb60d132e68f2 Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Sun, 21 Nov 2021 21:58:48 +0800
Subject: [PATCH 09/16] fix typo

---
 deepmd/train/trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index 75d36cf246..78ef1e3b9a 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -361,7 +361,7 @@ def _build_training(self):
         if self.mixed_prec is not None:
             # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed 
             if TF_VERSION < "1.12":
-                raise RuntimeError("TensorFlow version %s is not compatible with the mixed precision setting. Please consider upgrade your TF version!" % TF_VERSION)
+                raise RuntimeError("TensorFlow version %s is not compatible with the mixed precision setting. Please consider upgrading your TF version!" % TF_VERSION)
             # enable dynamic loss scale of the gradients
             optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
         apply_op = optimizer.minimize(loss=self.l2_l,

From e945ed01f7f2bb602ba9c7f4eb461a6d925a4778 Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Sun, 21 Nov 2021 23:22:42 +0800
Subject: [PATCH 10/16] fix TF_VERSION control

---
 deepmd/train/trainer.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index 78ef1e3b9a..ae9a8a62a5 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -362,8 +362,10 @@ def _build_training(self):
             # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed 
             if TF_VERSION < "1.12":
                 raise RuntimeError("TensorFlow version %s is not compatible with the mixed precision setting. Please consider upgrading your TF version!" % TF_VERSION)
-            # enable dynamic loss scale of the gradients
-            optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
+            elif TF_VERSION < "2.4":
+                optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
+            else:
+                optimizer = tf.mixed_precision.enable_mixed_precision_graph_rewrite(optimizer)
         apply_op = optimizer.minimize(loss=self.l2_l,
                                       global_step=self.global_step,
                                       var_list=trainable_variables,

From 972a5b19fa746295d7716a9e958792e6832483c7 Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Sun, 21 Nov 2021 23:44:23 +0800
Subject: [PATCH 11/16] fix TF_VERSION comparison

---
 deepmd/train/trainer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index ae9a8a62a5..6c38dff376 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -359,10 +359,11 @@ def _build_training(self):
         else:
             optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate)
         if self.mixed_prec is not None:
+            TF_VERSION_LIST = [int(item) for item in TF_VERSION.split('.')]
             # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed 
-            if TF_VERSION < "1.12":
+            if TF_VERSION_LIST < [1, 12, 0]:
                 raise RuntimeError("TensorFlow version %s is not compatible with the mixed precision setting. Please consider upgrading your TF version!" % TF_VERSION)
-            elif TF_VERSION < "2.4":
+            elif TF_VERSION_LIST < [2, 4, 0]:
                 optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
             else:
                 optimizer = tf.mixed_precision.enable_mixed_precision_graph_rewrite(optimizer)

From fcdfb31832badcff1c31fa20a131ed74ffc8cca5 Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Mon, 22 Nov 2021 09:02:57 +0800
Subject: [PATCH 12/16] enable mixed precision for hybrid descriptor

---
 deepmd/descriptor/hybrid.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/deepmd/descriptor/hybrid.py b/deepmd/descriptor/hybrid.py
index 9d8967faee..39cb9fed0e 100644
--- a/deepmd/descriptor/hybrid.py
+++ b/deepmd/descriptor/hybrid.py
@@ -264,6 +264,20 @@ def enable_compression(self,
         for idx, ii in enumerate(self.descrpt_list):
             ii.enable_compression(min_nbor_dist, model_file, table_extrapolate, table_stride_1, table_stride_2, check_frequency, suffix=f"{suffix}_{idx}")
 
+
+    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+        """
+        Reveive the mixed precision setting.
+
+        Parameters
+        ----------
+        mixed_prec
+                The mixed precision setting used in the embedding net
+        """
+        for idx, ii in enumerate(self.descrpt_list):
+            ii.enable_mixed_precision(mixed_prec)
+
+
     def init_variables(self,
                        model_file : str,
                        suffix : str = "",

From b868ea3ffe7faf4727aae1795b1b760a53693a04 Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Mon, 22 Nov 2021 09:28:54 +0800
Subject: [PATCH 13/16] Update network.py

---
 deepmd/utils/network.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py
index cadcc7964b..824af455d4 100644
--- a/deepmd/utils/network.py
+++ b/deepmd/utils/network.py
@@ -17,7 +17,7 @@ def one_layer(inputs,
               reuse=None,
               seed=None, 
               use_timestep = False, 
-              trainable = False,
+              trainable = True,
               useBN = False, 
               uniform_seed = False,
               initial_variables = None,

From 6d517edaaf76de6233149191c05e5c07e03d747b Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Mon, 22 Nov 2021 10:01:11 +0800
Subject: [PATCH 14/16] use parameter to control the network mixed precision
 output precision

---
 deepmd/fit/dipole.py    | 2 +-
 deepmd/fit/ener.py      | 3 ++-
 deepmd/fit/polar.py     | 6 +++---
 deepmd/utils/network.py | 9 +++++----
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/deepmd/fit/dipole.py b/deepmd/fit/dipole.py
index 5dfa5fab0f..7c2d5dea86 100644
--- a/deepmd/fit/dipole.py
+++ b/deepmd/fit/dipole.py
@@ -147,7 +147,7 @@ def build (self,
                     layer = one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, activation_fn = self.fitting_activation_fn, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
                 if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
             # (nframes x natoms) x naxis
-            final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
+            final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec, final_layer = True)
             if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
             # (nframes x natoms) x 1 * naxis
             final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms[2+type_i], 1, self.dim_rot_mat_1])
diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index 3fe4013616..e6b0d0a763 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -322,7 +322,8 @@ def _build_lower(
             trainable = self.trainable[-1],
             uniform_seed = self.uniform_seed,
             initial_variables = self.fitting_net_variables,
-            mixed_prec = self.mixed_prec)
+            mixed_prec = self.mixed_prec,
+            final_layer = True)
         if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
 
         return final_layer
diff --git a/deepmd/fit/polar.py b/deepmd/fit/polar.py
index d858d37ac1..5f6ddd7525 100644
--- a/deepmd/fit/polar.py
+++ b/deepmd/fit/polar.py
@@ -79,7 +79,7 @@ def build (self,
                 else :
                     layer = one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, activation_fn = self.fitting_activation_fn, precision = self.fitting_precision)
             # (nframes x natoms) x 9
-            final_layer = one_layer(layer, 9, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, precision = self.fitting_precision)
+            final_layer = one_layer(layer, 9, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, precision = self.fitting_precision, final_layer = True)
             # (nframes x natoms) x 3 x 3
             final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms[2+type_i], 3, 3])
             # (nframes x natoms) x 3 x 3
@@ -335,7 +335,7 @@ def build (self,
                 # bavg[1] = self.avgeig[1]
                 # bavg[2] = self.avgeig[2]
                 # (nframes x natoms) x naxis
-                final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
+                final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec, final_layer = True)
                 if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
                 # (nframes x natoms) x naxis
                 final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms[2+type_i], self.dim_rot_mat_1])
@@ -347,7 +347,7 @@ def build (self,
                 # bavg[1*self.dim_rot_mat_1+1] = self.avgeig[1]
                 # bavg[2*self.dim_rot_mat_1+2] = self.avgeig[2]
                 # (nframes x natoms) x (naxis x naxis)
-                final_layer = one_layer(layer, self.dim_rot_mat_1*self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec)
+                final_layer = one_layer(layer, self.dim_rot_mat_1*self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg, precision = self.fitting_precision, uniform_seed = self.uniform_seed, initial_variables = self.fitting_net_variables, mixed_prec = self.mixed_prec, final_layer = True)
                 if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
                 # (nframes x natoms) x naxis x naxis
                 final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms[2+type_i], self.dim_rot_mat_1, self.dim_rot_mat_1])
diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py
index 824af455d4..c82721becb 100644
--- a/deepmd/utils/network.py
+++ b/deepmd/utils/network.py
@@ -21,9 +21,10 @@ def one_layer(inputs,
               useBN = False, 
               uniform_seed = False,
               initial_variables = None,
-              mixed_prec = None):
+              mixed_prec = None,
+              final_layer = False):
     # For good accuracy, the last layer of the fitting network uses a higher precision neuron network.
-    if mixed_prec is not None and outputs_size == 1:
+    if mixed_prec is not None and final_layer:
         inputs = tf.cast(inputs, get_precision(mixed_prec['output_prec']))
     with tf.variable_scope(name, reuse=reuse):
         shape = inputs.get_shape().as_list()
@@ -50,7 +51,7 @@ def one_layer(inputs,
                             trainable = trainable)
         variable_summaries(b, 'bias')
 
-        if mixed_prec is not None and outputs_size != 1:
+        if mixed_prec is not None and not final_layer:
             inputs = tf.cast(inputs, get_precision(mixed_prec['compute_prec']))
             w = tf.cast(w, get_precision(mixed_prec['compute_prec']))
             b = tf.cast(b, get_precision(mixed_prec['compute_prec']))
@@ -76,7 +77,7 @@ def one_layer(inputs,
                 # return activation_fn(hidden_bn)
             else:
                 if use_timestep :
-                    if mixed_prec is not None and outputs_size != 1:
+                    if mixed_prec is not None and not final_layer:
                        idt = tf.cast(idt, get_precision(mixed_prec['compute_prec']))
                     return tf.reshape(activation_fn(hidden), [-1, outputs_size]) * idt
                 else :

From e447ab5b75b3927fb9e5840c2d05d2e6510b8a49 Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Mon, 22 Nov 2021 13:52:58 +0800
Subject: [PATCH 15/16] add example for mixed precision training workflow

---
 deepmd/descriptor/descriptor.py              |  4 +-
 deepmd/train/trainer.py                      |  8 ++-
 examples/water/se_e2_a_mixed_prec/input.json | 70 ++++++++++++++++++++
 requirements.txt                             |  1 +
 4 files changed, 79 insertions(+), 4 deletions(-)
 create mode 100644 examples/water/se_e2_a_mixed_prec/input.json

diff --git a/deepmd/descriptor/descriptor.py b/deepmd/descriptor/descriptor.py
index 0642779985..a500826808 100644
--- a/deepmd/descriptor/descriptor.py
+++ b/deepmd/descriptor/descriptor.py
@@ -277,7 +277,9 @@ def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
         This method is called by others when the descriptor supported compression.
         """
         raise NotImplementedError(
-            "Descriptor %s doesn't support mixed precision training!" % type(self).__name__)
+            "Descriptor %s doesn't support mixed precision training!"
+            % type(self).__name__
+        )
 
 
     @abstractmethod
diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index 6c38dff376..0e1ed6d3e8 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -7,6 +7,8 @@
 import shutil
 import google.protobuf.message
 import numpy as np
+from packaging.version import Version
+
 from deepmd.env import tf
 from deepmd.env import get_tf_session_config
 from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
@@ -359,11 +361,11 @@ def _build_training(self):
         else:
             optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate)
         if self.mixed_prec is not None:
-            TF_VERSION_LIST = [int(item) for item in TF_VERSION.split('.')]
+            _TF_VERSION = Version(TF_VERSION)
             # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed 
-            if TF_VERSION_LIST < [1, 12, 0]:
+            if _TF_VERSION < Version('1.12.0'):
                 raise RuntimeError("TensorFlow version %s is not compatible with the mixed precision setting. Please consider upgrading your TF version!" % TF_VERSION)
-            elif TF_VERSION_LIST < [2, 4, 0]:
+            elif _TF_VERSION < Version('2.4.0'):
                 optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
             else:
                 optimizer = tf.mixed_precision.enable_mixed_precision_graph_rewrite(optimizer)
diff --git a/examples/water/se_e2_a_mixed_prec/input.json b/examples/water/se_e2_a_mixed_prec/input.json
new file mode 100644
index 0000000000..889abedabf
--- /dev/null
+++ b/examples/water/se_e2_a_mixed_prec/input.json
@@ -0,0 +1,70 @@
+{
+    "_comment": " model parameters",
+    "model": {
+	"type_map":	["O", "H"],
+	"descriptor" :{
+	    "type":		"se_e2_a",
+	    "sel":		[46, 92],
+	    "rcut_smth":	0.50,
+	    "rcut":		6.00,
+	    "neuron":		[25, 50, 100],
+	    "resnet_dt":	false,
+	    "axis_neuron":	16,
+	    "seed":		1,
+	    "_comment":		" that's all"
+	},
+	"fitting_net" : {
+	    "neuron":		[240, 240, 240],
+	    "resnet_dt":	true,
+	    "seed":		1,
+	    "_comment":		" that's all"
+	},
+	"_comment":	" that's all"
+    },
+
+    "learning_rate" :{
+	"type":		"exp",
+	"decay_steps":	5000,
+	"start_lr":	0.001,	
+	"stop_lr":	3.51e-8,
+	"_comment":	"that's all"
+    },
+
+    "loss" :{
+	"type":		"ener",
+	"start_pref_e":	0.02,
+	"limit_pref_e":	1,
+	"start_pref_f":	1000,
+	"limit_pref_f":	1,
+	"start_pref_v":	0,
+	"limit_pref_v":	0,
+	"_comment":	" that's all"
+    },
+
+    "training" : {
+	"training_data": {
+	    "systems":		["../data/data_0/", "../data/data_1/", "../data/data_2/"],
+	    "batch_size":	"auto",
+	    "_comment":		"that's all"
+	},
+	"validation_data":{
+	    "systems":		["../data/data_3"],
+	    "batch_size":	1,
+	    "numb_btch":	3,
+	    "_comment":		"that's all"
+	},
+	"mixed_precision": {
+            "compute_prec": "float16",
+            "output_prec":  "float32"
+        },
+	"numb_steps":	1000000,
+	"seed":		10,
+	"disp_file":	"lcurve.out",
+	"disp_freq":	100,
+	"save_freq":	1000,
+	"_comment":	"that's all"
+    },    
+
+    "_comment":		"that's all"
+}
+
diff --git a/requirements.txt b/requirements.txt
index f3ead805b8..06b71f825c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ python-hostlist >= 1.21
 typing_extensions; python_version < "3.7"
 h5py
 wcmatch
+packaging

From 6fa19c9404e01c8f0b5396af6e9c8220d01eee98 Mon Sep 17 00:00:00 2001
From: denghuilu <denghuilu@pku.edu.cn>
Date: Mon, 22 Nov 2021 15:23:52 +0800
Subject: [PATCH 16/16] fix lint errors

---
 deepmd/descriptor/descriptor.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/deepmd/descriptor/descriptor.py b/deepmd/descriptor/descriptor.py
index a500826808..231f3abe1e 100644
--- a/deepmd/descriptor/descriptor.py
+++ b/deepmd/descriptor/descriptor.py
@@ -262,8 +262,7 @@ def enable_compression(self,
         raise NotImplementedError(
             "Descriptor %s doesn't support compression!" % type(self).__name__)
 
-
-    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+    def enable_mixed_precision(self, mixed_prec: dict = None) -> None:
         """
         Reveive the mixed precision setting.
 
@@ -271,7 +270,7 @@ def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
         ----------
         mixed_prec
                 The mixed precision setting used in the embedding net
-        
+
         Notes
         -----
         This method is called by others when the descriptor supported compression.
@@ -281,7 +280,6 @@ def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
             % type(self).__name__
         )
 
-
     @abstractmethod
     def prod_force_virial(self,
                           atom_ener: tf.Tensor,