From 7c34c5862fc2e6f6c8b63805de0a2e6bd11cb342 Mon Sep 17 00:00:00 2001
From: Lu <Eric@LuDhMacBook-Pro.local>
Date: Sat, 9 May 2020 01:27:36 +0800
Subject: [PATCH 1/4] fix bugs of gelu activation function

---
 source/CMakeLists.txt         |  5 +++++
 source/lmp/env.sh.in          |  2 +-
 source/op/CMakeLists.txt      |  4 +++-
 source/op/_gelu.py            |  6 +++---
 source/op/cuda/CMakeLists.txt |  2 +-
 source/op/gelu.cc             | 18 +++---------------
 source/op/gelu_gpu.cc         | 18 +++---------------
 source/train/DescrptSeA.py    | 22 ++++++++++++----------
 source/train/DescrptSeR.py    | 11 ++++++-----
 source/train/Network.py       |  4 ++--
 10 files changed, 39 insertions(+), 53 deletions(-)

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 0066e032a7..84c3d326da 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -184,6 +184,11 @@ include_directories(${TensorFlow_INCLUDE_DIRS})
 if (BUILD_CPP_IF)
   set (LIB_DEEPMD		"deepmd")
   set (LIB_DEEPMD_OP		"deepmd_op")
+  if (USE_CUDA_TOOLKIT)
+    set (LIB_DEEPMD_OP_CUDA		"deepmd_op_cuda")
+  else ()
+    set (LIB_DEEPMD_OP_CUDA		"deepmd_op")
+  endif()
   if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 4.9)
     set (LIB_DEEPMD_NATIVE	"deepmd_native_md")
     set (LIB_DEEPMD_IPI		"deepmd_ipi")
diff --git a/source/lmp/env.sh.in b/source/lmp/env.sh.in
index 7f58018b79..2e091432f6 100644
--- a/source/lmp/env.sh.in
+++ b/source/lmp/env.sh.in
@@ -8,4 +8,4 @@ TF_RPATH=`echo $TENSORFLOW_LIBRARY_PATH | sed "s/;/ -Wl,-rpath=/g"`
 
 NNP_INC=" -std=c++11 @PREC_DEF@ @TTM_DEF@ @OLD_LMP_PPPM_DEF@ -I$TF_INCLUDE_DIRS -I$DEEPMD_ROOT/include/deepmd "
 NNP_PATH=" -L$TF_LIBRARY_PATH -L$DEEPMD_ROOT/lib"
-NNP_LIB=" -Wl,--no-as-needed -l@LIB_DEEPMD_OP@ -l@LIB_DEEPMD@ -ltensorflow_cc -ltensorflow_framework -Wl,-rpath=$TF_RPATH -Wl,-rpath=$DEEPMD_ROOT/lib"
+NNP_LIB=" -Wl,--no-as-needed -l@LIB_DEEPMD_OP_CUDA@ -l@LIB_DEEPMD_OP@ -l@LIB_DEEPMD@ -ltensorflow_cc -ltensorflow_framework -Wl,-rpath=$TF_RPATH -Wl,-rpath=$DEEPMD_ROOT/lib"
diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt
index d0dc200236..39f77e6788 100644
--- a/source/op/CMakeLists.txt
+++ b/source/op/CMakeLists.txt
@@ -24,6 +24,8 @@ if (BUILD_CPP_IF)
 endif (BUILD_CPP_IF)
 
 if (BUILD_PY_IF)
+  set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+  set(CMAKE_INSTALL_RPATH DESTINATION ${CMAKE_BINARY_DIR}/op/cuda)
   if (USE_CUDA_TOOLKIT)
     add_library(op_abi SHARED ${OP_PY_CUDA_SRC} ${OP_LIB})
     add_library(op_grads SHARED ${OP_GRADS_SRC})
@@ -33,11 +35,11 @@ if (BUILD_PY_IF)
     set (EXTRA_LIBS ${EXTRA_LIBS} deepmd_op_cuda)
     target_link_libraries (op_abi ${EXTRA_LIBS})
     target_link_libraries (op_grads ${EXTRA_LIBS})
-    message(STATUS ${TensorFlowFramework_LIBRARY})
   else (USE_CUDA_TOOLKIT)
     add_library(op_abi SHARED ${OP_SRC} ${OP_LIB})
     add_library(op_grads SHARED ${OP_GRADS_SRC})
   endif(USE_CUDA_TOOLKIT)
+  message(STATUS ${TensorFlowFramework_LIBRARY})
   target_link_libraries(
     op_abi ${TensorFlowFramework_LIBRARY}
     )
diff --git a/source/op/_gelu.py b/source/op/_gelu.py
index 9af8d3cbb0..ac0585da78 100644
--- a/source/op/_gelu.py
+++ b/source/op/_gelu.py
@@ -7,9 +7,9 @@
 from deepmd.env import op_module
 
 @ops.RegisterGradient("Gelu")
-def gelu_cc (op, dy) :
+def _gelu_cc (op, dy) :
     return op_module.gelu_grad(dy, op.inputs[0])     
 
 @ops.RegisterGradient("GeluGrad")
-def gelu_grad_cc (op, dy) :
-    return [None, op_module.gelu_grad_grad(dy, op.inputs[0], op.inputs[1])]
+def _gelu_grad_cc (op, dy) :
+    return [op_module.gelu_grad(dy, op.inputs[1]), op_module.gelu_grad_grad(dy, op.inputs[0], op.inputs[1])]
diff --git a/source/op/cuda/CMakeLists.txt b/source/op/cuda/CMakeLists.txt
index d3edc6e98e..89dd0b5922 100644
--- a/source/op/cuda/CMakeLists.txt
+++ b/source/op/cuda/CMakeLists.txt
@@ -83,7 +83,7 @@ set (SOURCE_FILES
     descrpt_se_a.cu descrpt_se_r.cu prod_force_se_a.cu prod_force_se_r.cu prod_virial_se_a.cu prod_virial_se_r.cu gelu.cu 
 )
 
-cuda_add_library(deepmd_op_cuda STATIC ${SOURCE_FILES})
+cuda_add_library(deepmd_op_cuda SHARED ${SOURCE_FILES})
 
 if (BUILD_CPP_IF)
     install(TARGETS deepmd_op_cuda DESTINATION lib/)
diff --git a/source/op/gelu.cc b/source/op/gelu.cc
index 2e59de9e34..26c53c8511 100644
--- a/source/op/gelu.cc
+++ b/source/op/gelu.cc
@@ -11,32 +11,20 @@ using GPUDevice = Eigen::GpuDevice;
 REGISTER_OP("Gelu")
     .Attr("T: {float, double}")
     .Input("x: T")
-    .Output("output: T")
-    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
-        c->set_output(0, c->input(0));
-        return Status::OK();
-    });
+    .Output("output: T");
 
 REGISTER_OP("GeluGrad")
     .Attr("T: {float, double}")
     .Input("dy: T")
     .Input("x: T")
-    .Output("output: T")
-    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
-        c->set_output(0, c->input(1));
-        return Status::OK();
-    });
+    .Output("output: T");
 
 REGISTER_OP("GeluGradGrad")
     .Attr("T: {float, double}")
     .Input("dy: T")
     .Input("dy_: T")
     .Input("x: T")
-    .Output("output: T")
-    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
-        c->set_output(0, c->input(2));
-        return Status::OK();
-    });
+    .Output("output: T");
 
 template <typename Device, typename T>
 struct GeluFunctor {
diff --git a/source/op/gelu_gpu.cc b/source/op/gelu_gpu.cc
index d41c438882..34d4183f98 100644
--- a/source/op/gelu_gpu.cc
+++ b/source/op/gelu_gpu.cc
@@ -10,32 +10,20 @@ using GPUDevice = Eigen::GpuDevice;
 REGISTER_OP("Gelu")
     .Attr("T: {float, double}")
     .Input("x: T")
-    .Output("output: T")
-    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
-        c->set_output(0, c->input(0));
-        return Status::OK();
-    });
+    .Output("output: T");
 
 REGISTER_OP("GeluGrad")
     .Attr("T: {float, double}")
     .Input("dy: T")
     .Input("x: T")
-    .Output("output: T")
-    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
-        c->set_output(0, c->input(1));
-        return Status::OK();
-    });
+    .Output("output: T");
 
 REGISTER_OP("GeluGradGrad")
     .Attr("T: {float, double}")
     .Input("dy: T")
     .Input("dy_: T")
     .Input("x: T")
-    .Output("output: T")
-    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
-        c->set_output(0, c->input(2));
-        return Status::OK();
-    });
+    .Output("output: T");
 
 // maybe instead use cudnn activation forward 
 void GeluLauncher(const float * in, float * out, int const size);
diff --git a/source/train/DescrptSeA.py b/source/train/DescrptSeA.py
index d409f7134f..890fe24672 100644
--- a/source/train/DescrptSeA.py
+++ b/source/train/DescrptSeA.py
@@ -353,6 +353,7 @@ def _filter(self,
                                   self.filter_precision,
                                   tf.random_normal_initializer(stddev=stddev, mean = bavg, seed = seed), 
                                     trainable = trainable)
+                hidden = tf.reshape(activation_fn(tf.matmul(xyz_scatter, w) + b), [-1, outputs_size[ii]])
                 if self.filter_resnet_dt :
                     idt = tf.get_variable('idt_'+str(ii)+'_'+str(type_i), 
                                           [1, outputs_size[ii]], 
@@ -361,16 +362,16 @@ def _filter(self,
                                           trainable = trainable)
                 if outputs_size[ii] == outputs_size[ii-1]:
                     if self.filter_resnet_dt :
-                        xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                        xyz_scatter += hidden * idt
                     else :
-                        xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b)
+                        xyz_scatter += hidden
                 elif outputs_size[ii] == outputs_size[ii-1] * 2: 
                     if self.filter_resnet_dt :
-                        xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                        xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + hidden * idt
                     else :
-                        xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b)
+                        xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + hidden
                 else:
-                    xyz_scatter = activation_fn(tf.matmul(xyz_scatter, w) + b)
+                    xyz_scatter = hidden
             else:
               w = tf.zeros((outputs_size[0], outputs_size[-1]), dtype=global_tf_float_precision)
               xyz_scatter = tf.matmul(xyz_scatter, w)
@@ -440,6 +441,7 @@ def _filter_type_ext(self,
                                 self.filter_precision,
                                 tf.random_normal_initializer(stddev=stddev, mean = bavg, seed = seed),
                                   trainable = trainable)
+              hidden = tf.reshape(activation_fn(tf.matmul(xyz_scatter, w) + b), [-1, outputs_size[ii]])
               if self.filter_resnet_dt :
                   idt = tf.get_variable('idt_'+str(ii)+'_'+str(type_i), 
                                         [1, outputs_size[ii]], 
@@ -448,16 +450,16 @@ def _filter_type_ext(self,
                                         trainable = trainable)
               if outputs_size[ii] == outputs_size[ii-1]:
                   if self.filter_resnet_dt :
-                      xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                      xyz_scatter += hidden * idt
                   else :
-                      xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b)
+                      xyz_scatter += hidden
               elif outputs_size[ii] == outputs_size[ii-1] * 2: 
                   if self.filter_resnet_dt :
-                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + hidden * idt
                   else :
-                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b)
+                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + hidden
               else:
-                  xyz_scatter = activation_fn(tf.matmul(xyz_scatter, w) + b)
+                  xyz_scatter = hidden
             # natom x nei_type_i x out_size
             xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))
             # natom x nei_type_i x 4  
diff --git a/source/train/DescrptSeR.py b/source/train/DescrptSeR.py
index ed53a3bcd4..5c4af52d4d 100644
--- a/source/train/DescrptSeR.py
+++ b/source/train/DescrptSeR.py
@@ -298,6 +298,7 @@ def _filter_r(self,
                                             self.filter_precision,
                                             tf.random_normal_initializer(stddev=stddev, mean = bavg, seed = seed), 
                                             trainable = trainable)
+                        hidden = tf.reshape(activation_fn(tf.matmul(xyz_scatter, w) + b), [-1, outputs_size[ii]])
                         if self.filter_resnet_dt :
                             idt = tf.get_variable('idt_'+str(ii)+'_'+str(type_i), 
                                                   [1, outputs_size[ii]], 
@@ -306,16 +307,16 @@ def _filter_r(self,
                                                   trainable = trainable)
                         if outputs_size[ii] == outputs_size[ii-1]:
                             if self.filter_resnet_dt :
-                                xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                                xyz_scatter += hidden * idt
                             else :
-                                xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b)
+                                xyz_scatter += hidden
                         elif outputs_size[ii] == outputs_size[ii-1] * 2: 
                             if self.filter_resnet_dt :
-                                xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                                xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + hidden * idt
                             else :
-                                xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b)
+                                xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + hidden
                         else:
-                            xyz_scatter = activation_fn(tf.matmul(xyz_scatter, w) + b)
+                            xyz_scatter = hidden
                 else:
                     w = tf.zeros((outputs_size[0], outputs_size[-1]), dtype=global_tf_float_precision)
                     xyz_scatter = tf.matmul(xyz_scatter, w)
diff --git a/source/train/Network.py b/source/train/Network.py
index ed188c085d..1afa83bd1c 100644
--- a/source/train/Network.py
+++ b/source/train/Network.py
@@ -41,9 +41,9 @@ def one_layer(inputs,
                 # return activation_fn(hidden_bn)
             else:
                 if use_timestep :
-                    return activation_fn(hidden) * idt
+                    return tf.reshape(activation_fn(hidden), [-1, outputs_size]) * idt
                 else :
-                    return activation_fn(hidden)                    
+                    return tf.reshape(activation_fn(hidden), [-1, outputs_size])                    
         else:
             if useBN:
                 None

From 8ef6453eea22a21030921972d613ae913259914e Mon Sep 17 00:00:00 2001
From: Lu <Eric@LuDhMacBook-Pro.local>
Date: Tue, 12 May 2020 23:12:05 +0800
Subject: [PATCH 2/4] set rpath for libdeepmd_op_cuda.so

---
 pyproject.toml           |  2 +-
 setup.py                 | 16 +++++++++++++++-
 source/op/CMakeLists.txt |  2 +-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b1251c5890..2e9e3239ab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,3 @@
 [build-system]
-requires = ["setuptools", "wheel", "scikit-build", "cmake", "ninja", "m2r"]
+requires = ["setuptools", "setuptools_scm", "wheel", "scikit-build", "cmake", "ninja", "m2r"]
 
diff --git a/setup.py b/setup.py
index 8c6a335ad4..3f658b3383 100644
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,20 @@
 from skbuild import setup
 from skbuild.exceptions import SKBuildError
 from skbuild.cmaker import get_cmake_version
+from setuptools_scm import get_version
 from packaging.version import LegacyVersion
 from os import path, makedirs
-import imp
+import imp, sys, platform
+
+def get_dp_install_path() :
+    site_packages_path = path.join(path.dirname(path.__file__), 'site-packages')
+    dp_scm_version     = get_version(root="./", relative_to=__file__)
+    python_version     = 'py' + str(sys.version_info.major + sys.version_info.minor * 0.1)
+    os_info            = sys.platform
+    machine_info       = platform.machine()
+    dp_install_path    = site_packages_path + '/deepmd_kit-' + dp_scm_version + '-' + python_version + '-' + os_info + '-' + machine_info + '.egg/deepmd'
+    
+    return dp_install_path
 
 readme_file = path.join(path.dirname(path.abspath(__file__)), 'README.md')
 try:
@@ -34,6 +45,8 @@
 except OSError:
     pass
 
+dp_install_path = get_dp_install_path()
+
 setup(
     name="deepmd-kit",
     setup_requires=setup_requires,
@@ -56,6 +69,7 @@
                 '-DBUILD_PY_IF:BOOL=TRUE', 
                 '-DBUILD_CPP_IF:BOOL=FALSE',
                 '-DFLOAT_PREC:STRING=high',
+                '-DDP_INSTALL_PATH=%s' % dp_install_path,
     ],
     cmake_source_dir='source',
     cmake_minimum_required_version='3.0',
diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt
index 39f77e6788..2552269783 100644
--- a/source/op/CMakeLists.txt
+++ b/source/op/CMakeLists.txt
@@ -25,7 +25,7 @@ endif (BUILD_CPP_IF)
 
 if (BUILD_PY_IF)
   set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-  set(CMAKE_INSTALL_RPATH DESTINATION ${CMAKE_BINARY_DIR}/op/cuda)
+  set(CMAKE_INSTALL_RPATH DESTINATION ${DP_INSTALL_PATH})
   if (USE_CUDA_TOOLKIT)
     add_library(op_abi SHARED ${OP_PY_CUDA_SRC} ${OP_LIB})
     add_library(op_grads SHARED ${OP_GRADS_SRC})

From c3cafcf062d8dff117481231c162206e9b85ed22 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 12 May 2020 23:44:08 -0400
Subject: [PATCH 3/4] install setuptools_scm before sdist

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4db9e48dd4..dd4b0eed9e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -83,7 +83,7 @@ jobs:
         - CXX=g++-7
         - TENSORFLOW_VERSION=2.1
       install:
-        - python -m pip install twine cibuildwheel==1.1.0 scikit-build
+        - python -m pip install twine cibuildwheel==1.1.0 scikit-build setuptools_scm
       script:
         - python -m cibuildwheel --output-dir wheelhouse
         - python setup.py sdist

From 2d2ff4b88fd200dddad5374d401731f04286e187 Mon Sep 17 00:00:00 2001
From: Lu <Eric@LuDhMacBook-Pro.local>
Date: Wed, 13 May 2020 11:44:16 +0800
Subject: [PATCH 4/4] set library rpath for both 'pip install .'

---
 setup.py                 | 10 ++++++----
 source/op/CMakeLists.txt |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index 3f658b3383..99a86d3da8 100644
--- a/setup.py
+++ b/setup.py
@@ -12,9 +12,10 @@ def get_dp_install_path() :
     python_version     = 'py' + str(sys.version_info.major + sys.version_info.minor * 0.1)
     os_info            = sys.platform
     machine_info       = platform.machine()
-    dp_install_path    = site_packages_path + '/deepmd_kit-' + dp_scm_version + '-' + python_version + '-' + os_info + '-' + machine_info + '.egg/deepmd'
+    dp_pip_install_path    = site_packages_path + '/deepmd'
+    dp_setup_install_path    = site_packages_path + '/deepmd_kit-' + dp_scm_version + '-' + python_version + '-' + os_info + '-' + machine_info + '.egg/deepmd'
     
-    return dp_install_path
+    return dp_pip_install_path, dp_setup_install_path
 
 readme_file = path.join(path.dirname(path.abspath(__file__)), 'README.md')
 try:
@@ -45,7 +46,7 @@ def get_dp_install_path() :
 except OSError:
     pass
 
-dp_install_path = get_dp_install_path()
+dp_pip_install_path, dp_setup_install_path = get_dp_install_path()
 
 setup(
     name="deepmd-kit",
@@ -69,7 +70,8 @@ def get_dp_install_path() :
                 '-DBUILD_PY_IF:BOOL=TRUE', 
                 '-DBUILD_CPP_IF:BOOL=FALSE',
                 '-DFLOAT_PREC:STRING=high',
-                '-DDP_INSTALL_PATH=%s' % dp_install_path,
+                '-DDP_PIP_INSTALL_PATH=%s' % dp_pip_install_path,
+                '-DDP_SETUP_INSTALL_PATH=%s' % dp_setup_install_path,
     ],
     cmake_source_dir='source',
     cmake_minimum_required_version='3.0',
diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt
index 2552269783..993a1b6fd4 100644
--- a/source/op/CMakeLists.txt
+++ b/source/op/CMakeLists.txt
@@ -25,7 +25,7 @@ endif (BUILD_CPP_IF)
 
 if (BUILD_PY_IF)
   set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-  set(CMAKE_INSTALL_RPATH DESTINATION ${DP_INSTALL_PATH})
+  set(CMAKE_INSTALL_RPATH DESTINATION ${DP_PIP_INSTALL_PATH} ${DP_SETUP_INSTALL_PATH} ${CMAKE_BINARY_DIR}/op/cuda)
   if (USE_CUDA_TOOLKIT)
     add_library(op_abi SHARED ${OP_PY_CUDA_SRC} ${OP_LIB})
     add_library(op_grads SHARED ${OP_GRADS_SRC})