From ebdb4043fbd0ee7d716921828ffd0dc74f7b747c Mon Sep 17 00:00:00 2001 From: denghuilu Date: Fri, 19 Mar 2021 04:26:31 +0800 Subject: [PATCH 1/2] fix bugs of single precision training and transfer --- deepmd/entrypoints/transfer.py | 30 ++++++++++++------------------ deepmd/loss/ener.py | 2 +- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/deepmd/entrypoints/transfer.py b/deepmd/entrypoints/transfer.py index 3e1aa1b5f2..0af45a4244 100644 --- a/deepmd/entrypoints/transfer.py +++ b/deepmd/entrypoints/transfer.py @@ -72,7 +72,7 @@ def transfer(*, old_model: str, raw_model: str, output: str, **kwargs): new_graph_def = transform_graph(raw_graph, old_graph) with tf.gfile.GFile(output, mode="wb") as f: f.write(new_graph_def.SerializeToString()) - log.info("the output model is saved in {output:s}") + log.info("the output model is saved in " + output) def load_graph(graph_name: str) -> tf.Graph: @@ -136,22 +136,20 @@ def transform_graph(raw_graph: tf.Graph, old_graph: tf.Graph) -> tf.Graph: if raw_graph_dtype == np.float16: if old_graph_dtype == np.float64 or old_graph_dtype == np.float32: if (len(tensor_shape) != 1) or (tensor_shape[0] != 1): - tensor = np.frombuffer(old_node.tensor_content, dtype=np.float16) - cp_attr.from_array(tensor, tf.float16, shape=tensor_shape) + tensor = np.frombuffer(old_node.tensor_content).astype(raw_graph_dtype) + cp_attr.from_array(tensor, tf.float16, shape = tensor_shape) else: - tensor = load_tensor(old_node, old_graph_dtype, np.float16) + tensor = load_tensor(old_node, old_graph_dtype, raw_graph_dtype) cp_attr.from_array(tensor, tf.float16, [1]) - elif old_graph_dtype == np.float16: - tensor = convert_matrix(np.array(old_node.half_val), tensor_shape) - cp_attr.from_array(tensor, tf.float16) + elif old_graph_dtype[1] == "float16": + tensor = convertMatrix(np.array(old_node.half_val), tensor_shape) + cp_attr.from_array(tensor, raw_graph_dtype) elif raw_graph_dtype == np.float64 or raw_graph_dtype == np.float32: if old_graph_dtype == np.float64 or old_graph_dtype == np.float32: if (len(tensor_shape) != 1) or (tensor_shape[0] != 1): - tensor = np.frombuffer( - old_node.tensor_content, dtype=raw_graph_dtype - ) + tensor = np.frombuffer(old_node.tensor_content).astype(raw_graph_dtype) cp_attr.from_str(tensor) else: tensor = load_tensor(old_node, old_graph_dtype, raw_graph_dtype) @@ -159,14 +157,10 @@ def transform_graph(raw_graph: tf.Graph, old_graph: tf.Graph) -> tf.Graph: elif old_graph_dtype == np.float16: if (len(tensor_shape) != 1) or (tensor_shape[0] != 1): - tensor = convert_matrix( - np.array(old_node.half_val), tensor_shape, dtype=raw_graph_dtype - ) + tensor = convertMatrix(np.array(old_node.half_val), tensor_shape).astype(raw_graph_dtype) cp_attr.from_str(tensor) else: - tensor = convert_matrix( - np.array(old_node.half_val), tensor_shape, dtype=raw_graph_dtype - ) + tensor = convertMatrix(np.array(old_node.half_val), tensor_shape).astype(raw_graph_dtype) cp_attr.from_array(tensor, raw_graph_dtype) return raw_graph_def @@ -191,9 +185,9 @@ def from_str(self, tensor: np.ndarray): def load_tensor(node: tf.Tensor, dtype_old: type, dtype_new: type) -> np.ndarray: if dtype_old == np.float64: - tensor = np.array(node.double_val, dtype=dtype_new) + tensor = np.array(node.double_val).astype(dtype_new) elif dtype_old == np.float32: - tensor = np.array(node.float_val, dtype=dtype_new) + tensor = np.array(node.float_val).astype(dtype_new) return tensor diff --git a/deepmd/loss/ener.py b/deepmd/loss/ener.py index 08e631bc0b..89d15b3add 100644 --- a/deepmd/loss/ener.py +++ b/deepmd/loss/ener.py @@ -119,7 +119,7 @@ def build (self, # only used when tensorboard was set as true self.l2_loss_summary = tf.summary.scalar('l2_loss', tf.sqrt(l2_loss)) - self.l2_loss_ener_summary = tf.summary.scalar('l2_ener_loss', tf.sqrt(l2_ener_loss) / global_cvt_2_tf_float(natoms[0])) + self.l2_loss_ener_summary = tf.summary.scalar('l2_ener_loss', global_cvt_2_tf_float(tf.sqrt(l2_ener_loss)) / global_cvt_2_tf_float(natoms[0])) self.l2_loss_force_summary = tf.summary.scalar('l2_force_loss', tf.sqrt(l2_force_loss)) self.l2_loss_virial_summary = tf.summary.scalar('l2_virial_loss', tf.sqrt(l2_virial_loss) / global_cvt_2_tf_float(natoms[0])) From 3dc97689feca7a3e74c8acefe5c16a38eac66daf Mon Sep 17 00:00:00 2001 From: denghuilu Date: Fri, 19 Mar 2021 07:50:23 +0800 Subject: [PATCH 2/2] fix bug of nbor sorting --- source/lib/src/cuda/prod_env_mat.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/lib/src/cuda/prod_env_mat.cu b/source/lib/src/cuda/prod_env_mat.cu index d26e089efa..1e38d45f4b 100644 --- a/source/lib/src/cuda/prod_env_mat.cu +++ b/source/lib/src/cuda/prod_env_mat.cu @@ -114,7 +114,7 @@ __global__ void format_nlist_fill_a( } FPTYPE rr = sqrt(dev_dot(diff, diff)); if (rr <= rcut) { - key_in[idy] = type[j_idx] * 1E15+ (int_64)(rr * 1.0E13) / 100000 * 100000 + j_idx; + key_in[idy] = type[j_idx] * 1E15+ (int_64)(rr * 1.0E13) / 10000000 * 10000000 + j_idx; } } @@ -144,7 +144,7 @@ __global__ void format_nlist_fill_b( for (unsigned int kk = 0; key_out[kk] != key_out[max_nbor_size - 1]; kk++) { const int & nei_type = key_out[kk] / 1E15; if (nei_iter[nei_type] < sec[nei_type + 1]) { - row_nlist[nei_iter[nei_type]++] = key_out[kk] % 100000; + row_nlist[nei_iter[nei_type]++] = key_out[kk] % 10000000; } } }